@phnx-labs/agents-cli 1.20.17 → 1.20.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +1 -1
- package/dist/commands/budget.d.ts +14 -0
- package/dist/commands/budget.js +137 -0
- package/dist/commands/cost.d.ts +12 -0
- package/dist/commands/cost.js +139 -0
- package/dist/commands/exec.d.ts +20 -0
- package/dist/commands/exec.js +382 -5
- package/dist/commands/secrets.d.ts +15 -0
- package/dist/commands/secrets.js +250 -4
- package/dist/commands/sessions.js +4 -0
- package/dist/index.js +4 -0
- package/dist/lib/budget/config.d.ts +9 -0
- package/dist/lib/budget/config.js +115 -0
- package/dist/lib/budget/enforce.d.ts +94 -0
- package/dist/lib/budget/enforce.js +151 -0
- package/dist/lib/budget/ledger.d.ts +61 -0
- package/dist/lib/budget/ledger.js +107 -0
- package/dist/lib/budget/preflight.d.ts +110 -0
- package/dist/lib/budget/preflight.js +200 -0
- package/dist/lib/checkpoint.d.ts +54 -0
- package/dist/lib/checkpoint.js +56 -0
- package/dist/lib/cloud/rush.js +18 -0
- package/dist/lib/exec.d.ts +36 -0
- package/dist/lib/exec.js +192 -4
- package/dist/lib/git.d.ts +18 -0
- package/dist/lib/git.js +67 -4
- package/dist/lib/loop.d.ts +145 -0
- package/dist/lib/loop.js +330 -0
- package/dist/lib/mcp.d.ts +7 -0
- package/dist/lib/mcp.js +24 -0
- package/dist/lib/models.d.ts +11 -0
- package/dist/lib/models.js +21 -0
- package/dist/lib/plugins.js +5 -2
- package/dist/lib/pricing/cost.d.ts +46 -0
- package/dist/lib/pricing/cost.js +71 -0
- package/dist/lib/pricing/index.d.ts +8 -0
- package/dist/lib/pricing/index.js +8 -0
- package/dist/lib/pricing/prices.json +138 -0
- package/dist/lib/pricing/table.d.ts +17 -0
- package/dist/lib/pricing/table.js +73 -0
- package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
- package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
- package/dist/lib/secrets/agent.d.ts +134 -0
- package/dist/lib/secrets/agent.js +501 -0
- package/dist/lib/secrets/bundles.d.ts +21 -0
- package/dist/lib/secrets/bundles.js +43 -0
- package/dist/lib/session/db.d.ts +40 -0
- package/dist/lib/session/db.js +84 -2
- package/dist/lib/session/discover.d.ts +2 -0
- package/dist/lib/session/discover.js +126 -2
- package/dist/lib/session/render.d.ts +2 -0
- package/dist/lib/session/render.js +1 -1
- package/dist/lib/session/types.d.ts +4 -0
- package/dist/lib/teams/agents.d.ts +32 -0
- package/dist/lib/teams/agents.js +66 -3
- package/dist/lib/teams/api.js +20 -0
- package/dist/lib/teams/parsers.js +16 -4
- package/dist/lib/types.d.ts +48 -0
- package/dist/lib/workflows.d.ts +56 -0
- package/dist/lib/workflows.js +72 -5
- package/package.json +2 -1
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import { actualCost } from '../pricing/index.js';
|
|
2
|
+
/** Convert a resolved BudgetConfig + prior ledger spend into the caps the watcher needs. */
|
|
3
|
+
export function capsFromConfig(cfg, prior) {
|
|
4
|
+
return {
|
|
5
|
+
perRun: cfg.per_run,
|
|
6
|
+
perDay: cfg.per_day,
|
|
7
|
+
perProject: cfg.per_project,
|
|
8
|
+
perAgent: cfg.per_agent,
|
|
9
|
+
priorDaySpend: prior?.daySpend ?? 0,
|
|
10
|
+
priorProjectSpend: prior?.projectSpend ?? 0,
|
|
11
|
+
priorAgentDaySpend: prior?.agentDaySpend ?? {},
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Create a live spend watcher. `onBreach` fires at most once, on the first
|
|
16
|
+
* event that pushes any active cap from at-or-under to over. After it fires the
|
|
17
|
+
* watcher keeps accumulating (so `runSpend()` stays accurate for the final
|
|
18
|
+
* ledger record) but never calls `onBreach` again.
|
|
19
|
+
*/
|
|
20
|
+
export function makeLiveSpendWatcher(args) {
|
|
21
|
+
const { caps, onBreach } = args;
|
|
22
|
+
let run = 0;
|
|
23
|
+
// Cross-vendor accumulators, seeded with pre-run ledger spend.
|
|
24
|
+
let day = caps.priorDaySpend ?? 0;
|
|
25
|
+
let project = caps.priorProjectSpend ?? 0;
|
|
26
|
+
const agentDay = {};
|
|
27
|
+
for (const [k, v] of Object.entries(caps.priorAgentDaySpend ?? {})) {
|
|
28
|
+
if (typeof v === 'number')
|
|
29
|
+
agentDay[k] = v;
|
|
30
|
+
}
|
|
31
|
+
let didBreach = false;
|
|
32
|
+
let disposed = false;
|
|
33
|
+
function checkBreach(agent) {
|
|
34
|
+
if (caps.perRun !== undefined && run > caps.perRun) {
|
|
35
|
+
return { cap: 'per_run', limit: caps.perRun, spend: run, runSpend: run };
|
|
36
|
+
}
|
|
37
|
+
if (caps.perDay !== undefined && day > caps.perDay) {
|
|
38
|
+
return { cap: 'per_day', limit: caps.perDay, spend: day, runSpend: run };
|
|
39
|
+
}
|
|
40
|
+
if (caps.perProject !== undefined && project > caps.perProject) {
|
|
41
|
+
return { cap: 'per_project', limit: caps.perProject, spend: project, runSpend: run };
|
|
42
|
+
}
|
|
43
|
+
if (agent && caps.perAgent && caps.perAgent[agent] !== undefined) {
|
|
44
|
+
const limit = caps.perAgent[agent];
|
|
45
|
+
if ((agentDay[agent] ?? 0) > limit) {
|
|
46
|
+
return { cap: 'per_agent', limit, spend: agentDay[agent], agent, runSpend: run };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
return {
|
|
52
|
+
feedUsage(event) {
|
|
53
|
+
if (disposed)
|
|
54
|
+
return;
|
|
55
|
+
const { usd } = actualCost(event.model ?? '', {
|
|
56
|
+
inputTokens: event.inputTokens ?? 0,
|
|
57
|
+
outputTokens: event.outputTokens ?? 0,
|
|
58
|
+
cacheReadTokens: event.cacheReadTokens,
|
|
59
|
+
cacheCreationTokens: event.cacheCreationTokens,
|
|
60
|
+
});
|
|
61
|
+
if (usd <= 0)
|
|
62
|
+
return;
|
|
63
|
+
const agent = event.agent ? String(event.agent) : undefined;
|
|
64
|
+
run += usd;
|
|
65
|
+
day += usd;
|
|
66
|
+
project += usd;
|
|
67
|
+
if (agent)
|
|
68
|
+
agentDay[agent] = (agentDay[agent] ?? 0) + usd;
|
|
69
|
+
if (didBreach)
|
|
70
|
+
return;
|
|
71
|
+
const breach = checkBreach(agent);
|
|
72
|
+
if (breach) {
|
|
73
|
+
didBreach = true;
|
|
74
|
+
onBreach(breach);
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
runSpend: () => run,
|
|
78
|
+
breached: () => didBreach,
|
|
79
|
+
dispose() {
|
|
80
|
+
disposed = true;
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Incrementally extract usage events from a stream-json chunk. Buffers a partial
|
|
86
|
+
* trailing line across calls (returned in `rest`), parses each complete line,
|
|
87
|
+
* and yields one UsageEvent per line that carries token counts. Provider shapes
|
|
88
|
+
* handled: Claude/`--json` assistant turns (`message.usage` with
|
|
89
|
+
* `input_tokens`/`output_tokens`/`cache_*_input_tokens`) and the flatter
|
|
90
|
+
* `usage.record` shape (`usage.input_tokens`/`output`). Lines that aren't JSON
|
|
91
|
+
* or carry no usage are skipped — this never throws on agent output.
|
|
92
|
+
*/
|
|
93
|
+
export function extractUsageEvents(chunk, pending, fallbackModel, fallbackAgent) {
|
|
94
|
+
const combined = pending + chunk;
|
|
95
|
+
const lines = combined.split('\n');
|
|
96
|
+
const rest = lines.pop() ?? '';
|
|
97
|
+
const events = [];
|
|
98
|
+
for (const line of lines) {
|
|
99
|
+
const trimmed = line.trim();
|
|
100
|
+
if (!trimmed || trimmed[0] !== '{')
|
|
101
|
+
continue;
|
|
102
|
+
let obj;
|
|
103
|
+
try {
|
|
104
|
+
obj = JSON.parse(trimmed);
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
const ev = usageFromObject(obj, fallbackModel, fallbackAgent);
|
|
110
|
+
if (ev)
|
|
111
|
+
events.push(ev);
|
|
112
|
+
}
|
|
113
|
+
return { events, rest };
|
|
114
|
+
}
|
|
115
|
+
function usageFromObject(obj, fallbackModel, fallbackAgent) {
|
|
116
|
+
// Claude emits a final `type:"result"` event carrying a TOP-LEVEL cumulative
|
|
117
|
+
// `usage` that already sums every per-turn `message.usage`. Counting both the
|
|
118
|
+
// per-turn turns AND this cumulative total double-counts a multi-turn run
|
|
119
|
+
// (~2x). The canonical session parser (src/lib/session/parse.ts) reads usage
|
|
120
|
+
// ONLY from `message.usage` and extracts nothing from the result line — mirror
|
|
121
|
+
// that here: skip result lines entirely for usage.
|
|
122
|
+
if (obj?.type === 'result')
|
|
123
|
+
return null;
|
|
124
|
+
// Claude stream-json assistant turn.
|
|
125
|
+
const mu = obj?.message?.usage;
|
|
126
|
+
if (mu && (typeof mu.input_tokens === 'number' || typeof mu.output_tokens === 'number')) {
|
|
127
|
+
return {
|
|
128
|
+
agent: fallbackAgent,
|
|
129
|
+
model: obj.message.model ?? fallbackModel,
|
|
130
|
+
inputTokens: mu.input_tokens ?? 0,
|
|
131
|
+
outputTokens: mu.output_tokens ?? 0,
|
|
132
|
+
cacheReadTokens: mu.cache_read_input_tokens,
|
|
133
|
+
cacheCreationTokens: mu.cache_creation_input_tokens,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
// Flatter usage.record / usage shape (Codex / `usage.record`). The result-line
|
|
137
|
+
// guard above already excludes Claude's cumulative result usage, so this only
|
|
138
|
+
// matches genuine per-event usage records.
|
|
139
|
+
const u = obj?.usage;
|
|
140
|
+
if (u && (typeof u.input_tokens === 'number' || typeof u.output === 'number' || typeof u.output_tokens === 'number')) {
|
|
141
|
+
return {
|
|
142
|
+
agent: fallbackAgent,
|
|
143
|
+
model: obj.model ?? u.model ?? fallbackModel,
|
|
144
|
+
inputTokens: u.input_tokens ?? u.inputOther ?? 0,
|
|
145
|
+
outputTokens: u.output_tokens ?? u.output ?? 0,
|
|
146
|
+
cacheReadTokens: u.cache_read_input_tokens,
|
|
147
|
+
cacheCreationTokens: u.cache_creation_input_tokens,
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/** A single spend observation. Append-only; never mutated in place. */
|
|
2
|
+
export interface SpendEntry {
|
|
3
|
+
/** Run identifier — groups multiple usage observations from one dispatch. */
|
|
4
|
+
runId: string;
|
|
5
|
+
/** Agent id (claude, codex, ...). The cross-vendor attribution key. */
|
|
6
|
+
agent: string;
|
|
7
|
+
/** Project key (absolute path or repo slug). Empty string when unknown. */
|
|
8
|
+
project: string;
|
|
9
|
+
/** Local calendar day, YYYY-MM-DD. */
|
|
10
|
+
day: string;
|
|
11
|
+
/** Model id as reported by the stream (may carry vendor prefix / date suffix). */
|
|
12
|
+
model: string;
|
|
13
|
+
inputTok: number;
|
|
14
|
+
outputTok: number;
|
|
15
|
+
/** Combined cache read + creation tokens (kept as one field for the ledger). */
|
|
16
|
+
cacheTok: number;
|
|
17
|
+
/** USD cost of THIS observation, via actualCost() at write time. */
|
|
18
|
+
costUsd: number;
|
|
19
|
+
/** Where the spend came from: local run, teams teammate, or cloud dispatch. */
|
|
20
|
+
source: 'run' | 'teams' | 'cloud';
|
|
21
|
+
/** ISO timestamp of the observation. */
|
|
22
|
+
ts: string;
|
|
23
|
+
}
|
|
24
|
+
/** Token bundle for a single observation (matches session/parse usage fields). */
|
|
25
|
+
export interface UsageObservation {
|
|
26
|
+
model?: string;
|
|
27
|
+
inputTokens?: number;
|
|
28
|
+
outputTokens?: number;
|
|
29
|
+
cacheReadTokens?: number;
|
|
30
|
+
cacheCreationTokens?: number;
|
|
31
|
+
}
|
|
32
|
+
/** Default ledger path: <history>/spend/ledger.jsonl. */
|
|
33
|
+
export declare function defaultLedgerPath(): string;
|
|
34
|
+
/** Local YYYY-MM-DD for a Date (defaults to now). Local, not UTC — caps are a human-day notion. */
|
|
35
|
+
export declare function localDay(d?: Date): string;
|
|
36
|
+
/**
|
|
37
|
+
* Append one spend observation. Computes `costUsd` from the usage via the
|
|
38
|
+
* canonical pricing module (unpriced models contribute $0). Returns the written
|
|
39
|
+
* entry. Creates the spend dir on first write.
|
|
40
|
+
*/
|
|
41
|
+
export declare function recordSpend(input: {
|
|
42
|
+
runId: string;
|
|
43
|
+
agent: string;
|
|
44
|
+
project?: string;
|
|
45
|
+
model: string;
|
|
46
|
+
usage: UsageObservation;
|
|
47
|
+
source: SpendEntry['source'];
|
|
48
|
+
ts?: Date;
|
|
49
|
+
}, ledgerPath?: string): SpendEntry;
|
|
50
|
+
/** Load every entry. Skips malformed lines (a half-written final line never breaks a rollup). */
|
|
51
|
+
export declare function loadLedger(ledgerPath?: string): SpendEntry[];
|
|
52
|
+
/** Total USD spend on a given local day across ALL agents (cross-vendor). */
|
|
53
|
+
export declare function spendForDay(day: string, ledger?: SpendEntry[]): number;
|
|
54
|
+
/** Total USD spend on a given day for ONE agent (per-agent cap accounting). */
|
|
55
|
+
export declare function spendForAgentDay(agent: string, day: string, ledger?: SpendEntry[]): number;
|
|
56
|
+
/** Total USD spend attributed to an agent across all time. */
|
|
57
|
+
export declare function spendForAgent(agent: string, ledger?: SpendEntry[]): number;
|
|
58
|
+
/** Total USD spend attributed to a project across all time (cross-vendor). */
|
|
59
|
+
export declare function spendForProject(project: string, ledger?: SpendEntry[]): number;
|
|
60
|
+
/** Total USD spend for a single run id (all of its usage observations). */
|
|
61
|
+
export declare function spendForRun(runId: string, ledger?: SpendEntry[]): number;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Append-only spend ledger (issue #346).
|
|
3
|
+
*
|
|
4
|
+
* Every dispatched run that produces token usage records one JSONL line under
|
|
5
|
+
* `<history>/spend/ledger.jsonl`. The ledger is the shared artifact #323's
|
|
6
|
+
* `agents cost` can later read for $ rollups, so the entry shape stays clean
|
|
7
|
+
* and stable: one record = one usage observation attributed to a run.
|
|
8
|
+
*
|
|
9
|
+
* `costUsd` is computed at write time via the canonical pricing module
|
|
10
|
+
* (lib/pricing) so the ledger is self-contained — a reader never needs the
|
|
11
|
+
* pricing table to sum spend. Rollups (`spendForDay`/`spendForAgent`/...) are
|
|
12
|
+
* pure folds over the file; for the modest line counts a developer accrues this
|
|
13
|
+
* is plenty fast, and there's no index to corrupt.
|
|
14
|
+
*/
|
|
15
|
+
import * as fs from 'fs';
|
|
16
|
+
import * as path from 'path';
|
|
17
|
+
import { getHistoryDir } from '../state.js';
|
|
18
|
+
import { actualCost } from '../pricing/index.js';
|
|
19
|
+
/** Default ledger path: <history>/spend/ledger.jsonl. */
|
|
20
|
+
export function defaultLedgerPath() {
|
|
21
|
+
return path.join(getHistoryDir(), 'spend', 'ledger.jsonl');
|
|
22
|
+
}
|
|
23
|
+
/** Local YYYY-MM-DD for a Date (defaults to now). Local, not UTC — caps are a human-day notion. */
|
|
24
|
+
export function localDay(d = new Date()) {
|
|
25
|
+
const y = d.getFullYear();
|
|
26
|
+
const m = String(d.getMonth() + 1).padStart(2, '0');
|
|
27
|
+
const day = String(d.getDate()).padStart(2, '0');
|
|
28
|
+
return `${y}-${m}-${day}`;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Append one spend observation. Computes `costUsd` from the usage via the
|
|
32
|
+
* canonical pricing module (unpriced models contribute $0). Returns the written
|
|
33
|
+
* entry. Creates the spend dir on first write.
|
|
34
|
+
*/
|
|
35
|
+
export function recordSpend(input, ledgerPath = defaultLedgerPath()) {
|
|
36
|
+
const ts = input.ts ?? new Date();
|
|
37
|
+
const cacheTok = (input.usage.cacheReadTokens ?? 0) + (input.usage.cacheCreationTokens ?? 0);
|
|
38
|
+
const { usd } = actualCost(input.model, {
|
|
39
|
+
inputTokens: input.usage.inputTokens ?? 0,
|
|
40
|
+
outputTokens: input.usage.outputTokens ?? 0,
|
|
41
|
+
cacheReadTokens: input.usage.cacheReadTokens,
|
|
42
|
+
cacheCreationTokens: input.usage.cacheCreationTokens,
|
|
43
|
+
});
|
|
44
|
+
const entry = {
|
|
45
|
+
runId: input.runId,
|
|
46
|
+
agent: input.agent,
|
|
47
|
+
project: input.project ?? '',
|
|
48
|
+
day: localDay(ts),
|
|
49
|
+
model: input.model,
|
|
50
|
+
inputTok: input.usage.inputTokens ?? 0,
|
|
51
|
+
outputTok: input.usage.outputTokens ?? 0,
|
|
52
|
+
cacheTok,
|
|
53
|
+
costUsd: usd,
|
|
54
|
+
source: input.source,
|
|
55
|
+
ts: ts.toISOString(),
|
|
56
|
+
};
|
|
57
|
+
fs.mkdirSync(path.dirname(ledgerPath), { recursive: true });
|
|
58
|
+
fs.appendFileSync(ledgerPath, JSON.stringify(entry) + '\n');
|
|
59
|
+
return entry;
|
|
60
|
+
}
|
|
61
|
+
/** Load every entry. Skips malformed lines (a half-written final line never breaks a rollup). */
|
|
62
|
+
export function loadLedger(ledgerPath = defaultLedgerPath()) {
|
|
63
|
+
if (!fs.existsSync(ledgerPath))
|
|
64
|
+
return [];
|
|
65
|
+
const out = [];
|
|
66
|
+
for (const line of fs.readFileSync(ledgerPath, 'utf-8').split('\n')) {
|
|
67
|
+
const trimmed = line.trim();
|
|
68
|
+
if (!trimmed)
|
|
69
|
+
continue;
|
|
70
|
+
try {
|
|
71
|
+
const parsed = JSON.parse(trimmed);
|
|
72
|
+
if (typeof parsed.costUsd === 'number')
|
|
73
|
+
out.push(parsed);
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
// Tolerate a torn final line; everything before it is intact.
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return out;
|
|
80
|
+
}
|
|
81
|
+
function sum(entries, pred) {
|
|
82
|
+
let total = 0;
|
|
83
|
+
for (const e of entries)
|
|
84
|
+
if (pred(e))
|
|
85
|
+
total += e.costUsd;
|
|
86
|
+
return total;
|
|
87
|
+
}
|
|
88
|
+
/** Total USD spend on a given local day across ALL agents (cross-vendor). */
|
|
89
|
+
export function spendForDay(day, ledger = loadLedger()) {
|
|
90
|
+
return sum(ledger, (e) => e.day === day);
|
|
91
|
+
}
|
|
92
|
+
/** Total USD spend on a given day for ONE agent (per-agent cap accounting). */
|
|
93
|
+
export function spendForAgentDay(agent, day, ledger = loadLedger()) {
|
|
94
|
+
return sum(ledger, (e) => e.agent === agent && e.day === day);
|
|
95
|
+
}
|
|
96
|
+
/** Total USD spend attributed to an agent across all time. */
|
|
97
|
+
export function spendForAgent(agent, ledger = loadLedger()) {
|
|
98
|
+
return sum(ledger, (e) => e.agent === agent);
|
|
99
|
+
}
|
|
100
|
+
/** Total USD spend attributed to a project across all time (cross-vendor). */
|
|
101
|
+
export function spendForProject(project, ledger = loadLedger()) {
|
|
102
|
+
return sum(ledger, (e) => e.project === project);
|
|
103
|
+
}
|
|
104
|
+
/** Total USD spend for a single run id (all of its usage observations). */
|
|
105
|
+
export function spendForRun(runId, ledger = loadLedger()) {
|
|
106
|
+
return sum(ledger, (e) => e.runId === runId);
|
|
107
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-flight cost estimate + gate (issue #346).
|
|
3
|
+
*
|
|
4
|
+
* Before a run spawns we estimate its cost and decide whether to allow it. The
|
|
5
|
+
* estimate's token basis comes from recent ledger averages for the same agent
|
|
6
|
+
* (the most accurate signal we have), falling back to a prompt-character
|
|
7
|
+
* heuristic when there's no history. Cost is computed via the canonical pricing
|
|
8
|
+
* module — never reimplemented here.
|
|
9
|
+
*
|
|
10
|
+
* `enforcePreflight` is the decision: with `on_exceed: block`, if launching
|
|
11
|
+
* this run would push any cap (per_run / per_day / per_agent / per_project)
|
|
12
|
+
* over the line, it denies. With `on_exceed: warn` it always allows but reports
|
|
13
|
+
* the projected overrun.
|
|
14
|
+
*/
|
|
15
|
+
import type { BudgetConfig } from '../types.js';
|
|
16
|
+
import type { SpendEntry } from './ledger.js';
|
|
17
|
+
/** A pre-flight cost estimate for one run. */
|
|
18
|
+
export interface RunEstimate {
|
|
19
|
+
/** Estimated USD for this run. 0 when the model is unpriced. */
|
|
20
|
+
estUsd: number;
|
|
21
|
+
/** How the token count was derived. */
|
|
22
|
+
basis: 'ledger-average' | 'prompt-heuristic' | 'none';
|
|
23
|
+
/** True when the model resolved to a priced entry. */
|
|
24
|
+
priced: boolean;
|
|
25
|
+
estInputTokens: number;
|
|
26
|
+
estOutputTokens: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Estimate the cost of a run. When the ledger has prior runs for this agent we
|
|
30
|
+
* use their average input/output tokens; otherwise we fall back to a
|
|
31
|
+
* prompt-character heuristic. `recentAvgTokens` lets callers inject a
|
|
32
|
+
* precomputed average (e.g. from a scoped ledger) for testability.
|
|
33
|
+
*/
|
|
34
|
+
export declare function estimateRunCost(args: {
|
|
35
|
+
agent: string;
|
|
36
|
+
model: string;
|
|
37
|
+
mode?: string;
|
|
38
|
+
promptChars?: number;
|
|
39
|
+
recentAvgTokens?: {
|
|
40
|
+
input: number;
|
|
41
|
+
output: number;
|
|
42
|
+
};
|
|
43
|
+
ledger?: SpendEntry[];
|
|
44
|
+
}): RunEstimate;
|
|
45
|
+
/** Average input/output tokens per RUN for an agent, from the ledger. Null when no history. */
|
|
46
|
+
export declare function ledgerAverageTokens(agent: string, ledger: SpendEntry[]): {
|
|
47
|
+
input: number;
|
|
48
|
+
output: number;
|
|
49
|
+
} | null;
|
|
50
|
+
/** Decision returned by the pre-flight gate. */
|
|
51
|
+
export interface PreflightDecision {
|
|
52
|
+
/** Whether the run may proceed. */
|
|
53
|
+
allow: boolean;
|
|
54
|
+
/** Whether the caller must interactively confirm (estimate >= require_confirm_over). */
|
|
55
|
+
needsConfirm: boolean;
|
|
56
|
+
/** Human reason when blocked or confirming. */
|
|
57
|
+
reason?: string;
|
|
58
|
+
/** Which cap blocked, if any. */
|
|
59
|
+
blockedCap?: 'per_run' | 'per_day' | 'per_agent' | 'per_project';
|
|
60
|
+
/** Projected day spend if this run lands at its estimate. */
|
|
61
|
+
projectedDaySpend: number;
|
|
62
|
+
/** Projected project spend if this run lands at its estimate. */
|
|
63
|
+
projectedProjectSpend: number;
|
|
64
|
+
}
|
|
65
|
+
/** Current spend snapshot the gate compares the estimate against. */
|
|
66
|
+
export interface LedgerState {
|
|
67
|
+
/** Agent this snapshot is for (used to pick the matching per_agent cap). */
|
|
68
|
+
agent: string;
|
|
69
|
+
daySpend: number;
|
|
70
|
+
projectSpend: number;
|
|
71
|
+
agentDaySpend: number;
|
|
72
|
+
}
|
|
73
|
+
/** Read the ledger snapshot the gate needs for `agent` / `project` / today. */
|
|
74
|
+
export declare function ledgerStateFor(agent: string, project: string, ledger?: SpendEntry[]): LedgerState;
|
|
75
|
+
/**
|
|
76
|
+
* The pre-flight gate. Projects this run's estimate on top of current spend and
|
|
77
|
+
* decides allow/deny. `on_exceed: warn` never blocks (allow:true) but still
|
|
78
|
+
* reports the projected overrun via `reason`. A hard block sets allow:false —
|
|
79
|
+
* `--yes` MUST NOT override it (the caller enforces that; this function only
|
|
80
|
+
* reports the truth).
|
|
81
|
+
*/
|
|
82
|
+
export declare function enforcePreflight(cfg: BudgetConfig, state: LedgerState, est: RunEstimate): PreflightDecision;
|
|
83
|
+
/** Build a one-line human estimate banner for `agents run` preamble. */
|
|
84
|
+
export declare function formatEstimateBanner(agent: string, model: string, est: RunEstimate): string;
|
|
85
|
+
/** Result of the high-level run gate consumed by `agents run` / teams / cloud. */
|
|
86
|
+
export interface PreflightGateResult {
|
|
87
|
+
/** True when no caps are configured — budget feature dormant, nothing to do. */
|
|
88
|
+
dormant: boolean;
|
|
89
|
+
cfg: BudgetConfig;
|
|
90
|
+
estimate: RunEstimate;
|
|
91
|
+
decision: PreflightDecision;
|
|
92
|
+
banner: string;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* High-level pre-flight gate: resolve the effective budget for `cwd`, estimate
|
|
96
|
+
* the run, and evaluate every cap. Returns `dormant:true` (and skips all work)
|
|
97
|
+
* when no caps are set, so the gate is zero-cost for users who never configure
|
|
98
|
+
* a budget. The CLI layer decides how to act on `decision` (print banner,
|
|
99
|
+
* confirm, or block + exit non-zero).
|
|
100
|
+
*/
|
|
101
|
+
export declare function runPreflightGate(args: {
|
|
102
|
+
agent: string;
|
|
103
|
+
model: string;
|
|
104
|
+
mode?: string;
|
|
105
|
+
prompt?: string;
|
|
106
|
+
project: string;
|
|
107
|
+
cwd?: string;
|
|
108
|
+
ledger?: SpendEntry[];
|
|
109
|
+
}): PreflightGateResult;
|
|
110
|
+
export type { SpendEntry };
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { estimateCost, formatUsd } from '../pricing/index.js';
|
|
2
|
+
import { loadLedger, spendForDay, spendForAgentDay, spendForProject, localDay } from './ledger.js';
|
|
3
|
+
import { resolveBudgetConfig, hasAnyCap } from './config.js';
|
|
4
|
+
/** Roughly 4 characters per token — the standard coarse heuristic for English text. */
|
|
5
|
+
const CHARS_PER_TOKEN = 4;
|
|
6
|
+
/**
|
|
7
|
+
* Output is typically a multiple of the visible prompt for an agentic run
|
|
8
|
+
* (tool calls, file reads, reasoning). 6x is a deliberately conservative
|
|
9
|
+
* lower bound so the estimate doesn't wildly under-report and wave through a
|
|
10
|
+
* run that then blows the cap on its first turn.
|
|
11
|
+
*/
|
|
12
|
+
const HEURISTIC_OUTPUT_MULTIPLIER = 6;
|
|
13
|
+
/**
|
|
14
|
+
* Estimate the cost of a run. When the ledger has prior runs for this agent we
|
|
15
|
+
* use their average input/output tokens; otherwise we fall back to a
|
|
16
|
+
* prompt-character heuristic. `recentAvgTokens` lets callers inject a
|
|
17
|
+
* precomputed average (e.g. from a scoped ledger) for testability.
|
|
18
|
+
*/
|
|
19
|
+
export function estimateRunCost(args) {
|
|
20
|
+
const ledger = args.ledger ?? loadLedger();
|
|
21
|
+
let estInputTokens = 0;
|
|
22
|
+
let estOutputTokens = 0;
|
|
23
|
+
let basis = 'none';
|
|
24
|
+
const avg = args.recentAvgTokens ?? ledgerAverageTokens(args.agent, ledger);
|
|
25
|
+
if (avg && (avg.input > 0 || avg.output > 0)) {
|
|
26
|
+
estInputTokens = avg.input;
|
|
27
|
+
estOutputTokens = avg.output;
|
|
28
|
+
basis = 'ledger-average';
|
|
29
|
+
}
|
|
30
|
+
else if (args.promptChars && args.promptChars > 0) {
|
|
31
|
+
estInputTokens = Math.ceil(args.promptChars / CHARS_PER_TOKEN);
|
|
32
|
+
estOutputTokens = estInputTokens * HEURISTIC_OUTPUT_MULTIPLIER;
|
|
33
|
+
basis = 'prompt-heuristic';
|
|
34
|
+
}
|
|
35
|
+
const { usd, modelMatched } = estimateCost(args.model, {
|
|
36
|
+
inputTokens: estInputTokens,
|
|
37
|
+
outputTokens: estOutputTokens,
|
|
38
|
+
});
|
|
39
|
+
return {
|
|
40
|
+
estUsd: usd,
|
|
41
|
+
basis: estInputTokens === 0 && estOutputTokens === 0 ? 'none' : basis,
|
|
42
|
+
priced: modelMatched !== null,
|
|
43
|
+
estInputTokens,
|
|
44
|
+
estOutputTokens,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/** Average input/output tokens per RUN for an agent, from the ledger. Null when no history. */
|
|
48
|
+
export function ledgerAverageTokens(agent, ledger) {
|
|
49
|
+
const runs = new Map();
|
|
50
|
+
for (const e of ledger) {
|
|
51
|
+
if (e.agent !== agent)
|
|
52
|
+
continue;
|
|
53
|
+
const acc = runs.get(e.runId) ?? { input: 0, output: 0 };
|
|
54
|
+
acc.input += e.inputTok;
|
|
55
|
+
acc.output += e.outputTok;
|
|
56
|
+
runs.set(e.runId, acc);
|
|
57
|
+
}
|
|
58
|
+
if (runs.size === 0)
|
|
59
|
+
return null;
|
|
60
|
+
let input = 0;
|
|
61
|
+
let output = 0;
|
|
62
|
+
for (const r of runs.values()) {
|
|
63
|
+
input += r.input;
|
|
64
|
+
output += r.output;
|
|
65
|
+
}
|
|
66
|
+
return { input: Math.round(input / runs.size), output: Math.round(output / runs.size) };
|
|
67
|
+
}
|
|
68
|
+
/** Read the ledger snapshot the gate needs for `agent` / `project` / today. */
|
|
69
|
+
export function ledgerStateFor(agent, project, ledger) {
|
|
70
|
+
const entries = ledger ?? loadLedger();
|
|
71
|
+
const today = localDay();
|
|
72
|
+
return {
|
|
73
|
+
agent,
|
|
74
|
+
daySpend: spendForDay(today, entries),
|
|
75
|
+
projectSpend: spendForProject(project, entries),
|
|
76
|
+
agentDaySpend: spendForAgentDay(agent, today, entries),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* The pre-flight gate. Projects this run's estimate on top of current spend and
|
|
81
|
+
* decides allow/deny. `on_exceed: warn` never blocks (allow:true) but still
|
|
82
|
+
* reports the projected overrun via `reason`. A hard block sets allow:false —
|
|
83
|
+
* `--yes` MUST NOT override it (the caller enforces that; this function only
|
|
84
|
+
* reports the truth).
|
|
85
|
+
*/
|
|
86
|
+
export function enforcePreflight(cfg, state, est) {
|
|
87
|
+
const projectedDaySpend = state.daySpend + est.estUsd;
|
|
88
|
+
const projectedProjectSpend = state.projectSpend + est.estUsd;
|
|
89
|
+
const projectedAgentDaySpend = state.agentDaySpend + est.estUsd;
|
|
90
|
+
const warnOnly = cfg.on_exceed === 'warn';
|
|
91
|
+
const breaches = [];
|
|
92
|
+
if (cfg.per_run !== undefined && est.estUsd > cfg.per_run) {
|
|
93
|
+
breaches.push({
|
|
94
|
+
cap: 'per_run',
|
|
95
|
+
reason: `estimated ${formatUsd(est.estUsd)} exceeds per_run cap ${formatUsd(cfg.per_run)}`,
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
if (cfg.per_day !== undefined && projectedDaySpend > cfg.per_day) {
|
|
99
|
+
breaches.push({
|
|
100
|
+
cap: 'per_day',
|
|
101
|
+
reason: `projected day spend ${formatUsd(projectedDaySpend)} exceeds per_day cap ${formatUsd(cfg.per_day)}`,
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
if (cfg.per_project !== undefined && projectedProjectSpend > cfg.per_project) {
|
|
105
|
+
breaches.push({
|
|
106
|
+
cap: 'per_project',
|
|
107
|
+
reason: `projected project spend ${formatUsd(projectedProjectSpend)} exceeds per_project cap ${formatUsd(cfg.per_project)}`,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
const agentCap = cfg.per_agent?.[state.agent];
|
|
111
|
+
if (agentCap !== undefined && projectedAgentDaySpend > agentCap) {
|
|
112
|
+
breaches.push({
|
|
113
|
+
cap: 'per_agent',
|
|
114
|
+
reason: `projected agent day spend ${formatUsd(projectedAgentDaySpend)} exceeds per_agent cap ${formatUsd(agentCap)}`,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
// require_confirm_over only governs interactive confirm, not a hard block.
|
|
118
|
+
let needsConfirm = cfg.require_confirm_over !== undefined && est.estUsd >= cfg.require_confirm_over;
|
|
119
|
+
// Unpriced model + active caps: the estimate is $0 because we have no price
|
|
120
|
+
// for this model, so NONE of the per_run/per_day caps above can ever trip and
|
|
121
|
+
// we'd silently wave the run through. Never $0-wave-through (#346): when caps
|
|
122
|
+
// are set but the model is unpriced, require confirmation so the user is told
|
|
123
|
+
// the cap cannot be enforced for this model rather than getting a false pass.
|
|
124
|
+
if (!est.priced && hasAnyCap(cfg) && breaches.length === 0) {
|
|
125
|
+
needsConfirm = true;
|
|
126
|
+
return {
|
|
127
|
+
allow: true,
|
|
128
|
+
needsConfirm: true,
|
|
129
|
+
reason: `model is unpriced — budget caps cannot be enforced for this run (estimate is $0); confirm to proceed`,
|
|
130
|
+
projectedDaySpend,
|
|
131
|
+
projectedProjectSpend,
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
if (breaches.length > 0) {
|
|
135
|
+
const first = breaches[0];
|
|
136
|
+
return {
|
|
137
|
+
allow: warnOnly,
|
|
138
|
+
needsConfirm: warnOnly ? needsConfirm : false,
|
|
139
|
+
reason: first.reason,
|
|
140
|
+
blockedCap: first.cap,
|
|
141
|
+
projectedDaySpend,
|
|
142
|
+
projectedProjectSpend,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
return {
|
|
146
|
+
allow: true,
|
|
147
|
+
needsConfirm,
|
|
148
|
+
reason: needsConfirm
|
|
149
|
+
? `estimated ${formatUsd(est.estUsd)} is at or above confirm threshold ${formatUsd(cfg.require_confirm_over)}`
|
|
150
|
+
: undefined,
|
|
151
|
+
projectedDaySpend,
|
|
152
|
+
projectedProjectSpend,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
/** Build a one-line human estimate banner for `agents run` preamble. */
|
|
156
|
+
export function formatEstimateBanner(agent, model, est) {
|
|
157
|
+
const cost = est.priced ? formatUsd(est.estUsd) : 'unpriced';
|
|
158
|
+
const basisLabel = est.basis === 'ledger-average'
|
|
159
|
+
? 'recent average'
|
|
160
|
+
: est.basis === 'prompt-heuristic'
|
|
161
|
+
? 'prompt size'
|
|
162
|
+
: 'no basis';
|
|
163
|
+
return `[budget] est. ${cost} for this ${agent} run (${model}, ${basisLabel})`;
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* High-level pre-flight gate: resolve the effective budget for `cwd`, estimate
|
|
167
|
+
* the run, and evaluate every cap. Returns `dormant:true` (and skips all work)
|
|
168
|
+
* when no caps are set, so the gate is zero-cost for users who never configure
|
|
169
|
+
* a budget. The CLI layer decides how to act on `decision` (print banner,
|
|
170
|
+
* confirm, or block + exit non-zero).
|
|
171
|
+
*/
|
|
172
|
+
export function runPreflightGate(args) {
|
|
173
|
+
const cfg = resolveBudgetConfig(args.cwd);
|
|
174
|
+
const ledger = args.ledger ?? loadLedger();
|
|
175
|
+
const estimate = estimateRunCost({
|
|
176
|
+
agent: args.agent,
|
|
177
|
+
model: args.model,
|
|
178
|
+
mode: args.mode,
|
|
179
|
+
promptChars: args.prompt?.length,
|
|
180
|
+
ledger,
|
|
181
|
+
});
|
|
182
|
+
const banner = formatEstimateBanner(args.agent, args.model, estimate);
|
|
183
|
+
if (!hasAnyCap(cfg)) {
|
|
184
|
+
return {
|
|
185
|
+
dormant: true,
|
|
186
|
+
cfg,
|
|
187
|
+
estimate,
|
|
188
|
+
decision: {
|
|
189
|
+
allow: true,
|
|
190
|
+
needsConfirm: false,
|
|
191
|
+
projectedDaySpend: 0,
|
|
192
|
+
projectedProjectSpend: 0,
|
|
193
|
+
},
|
|
194
|
+
banner,
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
const state = ledgerStateFor(args.agent, args.project, ledger);
|
|
198
|
+
const decision = enforcePreflight(cfg, state, estimate);
|
|
199
|
+
return { dormant: false, cfg, estimate, decision, banner };
|
|
200
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Harness-level loop checkpoint (issue #332).
|
|
3
|
+
*
|
|
4
|
+
* A checkpoint is the durable harness state for a `--loop` run: it records the
|
|
5
|
+
* iteration count, the pinned session id, the prompt being re-injected, and the
|
|
6
|
+
* loop config — everything `--resume-checkpoint` needs to continue a run that a
|
|
7
|
+
* SIGTERM, timeout, or machine sleep killed mid-flight.
|
|
8
|
+
*
|
|
9
|
+
* This is NOT provider-side state. `--session-id` resumes Claude's *conversation*
|
|
10
|
+
* (server-side); a checkpoint resumes the *harness* (iteration count, loop
|
|
11
|
+
* variables, prompt chain) — the part Claude's own resume cannot recover.
|
|
12
|
+
*
|
|
13
|
+
* Atomic write (temp + rename) mirrors `writeRunMeta` in routines.ts so a crash
|
|
14
|
+
* mid-write never leaves a half-written checkpoint that `readCheckpoint` would
|
|
15
|
+
* choke on. `readCheckpoint` returns null on a missing or corrupt file (mirrors
|
|
16
|
+
* `readRunMeta`) — a corrupt checkpoint is a "start fresh", never a throw.
|
|
17
|
+
*/
|
|
18
|
+
import type { AgentId } from './types.js';
|
|
19
|
+
import type { LoopConfig, LoopSignal } from './loop.js';
|
|
20
|
+
/** Durable harness state for a looped run, serialized to checkpoint.json. */
|
|
21
|
+
export interface Checkpoint {
|
|
22
|
+
/** runId == the run directory name under getRunsDir(). */
|
|
23
|
+
id: string;
|
|
24
|
+
agent: AgentId;
|
|
25
|
+
version?: string;
|
|
26
|
+
/** The prompt re-injected each iteration. */
|
|
27
|
+
prompt?: string;
|
|
28
|
+
/** Pinned Claude session id so a resume continues the same conversation. */
|
|
29
|
+
sessionId?: string;
|
|
30
|
+
/** Iterations COMPLETED so far. A resume starts at iteration + 1. */
|
|
31
|
+
iteration: number;
|
|
32
|
+
/** The loop config governing termination. */
|
|
33
|
+
loop: LoopConfig;
|
|
34
|
+
/** Last loop-signal read, if any (for audit / resume context). */
|
|
35
|
+
loopSignal?: LoopSignal;
|
|
36
|
+
/** Cumulative tokens consumed across all iterations so far. */
|
|
37
|
+
cumulativeTokens?: number;
|
|
38
|
+
createdAt: string;
|
|
39
|
+
updatedAt: string;
|
|
40
|
+
}
|
|
41
|
+
/** Path to a run's checkpoint file: <runsDir>/<runId>/checkpoint.json. */
|
|
42
|
+
export declare function checkpointPath(runId: string): string;
|
|
43
|
+
/**
|
|
44
|
+
* Write a checkpoint atomically (temp file + rename). The rename is atomic on a
|
|
45
|
+
* single filesystem, so a reader never observes a partially written file.
|
|
46
|
+
* Mirrors the durable-write contract of `writeRunMeta`.
|
|
47
|
+
*/
|
|
48
|
+
export declare function writeCheckpoint(c: Checkpoint, file?: string): void;
|
|
49
|
+
/**
|
|
50
|
+
* Read a checkpoint from disk. Returns null if the file is missing or its
|
|
51
|
+
* contents are not valid JSON — corruption means "no resumable state", which
|
|
52
|
+
* the caller treats as a fresh start. Mirrors `readRunMeta`.
|
|
53
|
+
*/
|
|
54
|
+
export declare function readCheckpoint(file: string): Checkpoint | null;
|