npm - @phnx-labs/agents-cli - Versions diffs - 1.20.17 → 1.20.19 - Mend

@phnx-labs/agents-cli 1.20.17 → 1.20.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/CHANGELOG.md +19 -0
package/README.md +1 -1
package/dist/commands/budget.d.ts +14 -0
package/dist/commands/budget.js +137 -0
package/dist/commands/cost.d.ts +12 -0
package/dist/commands/cost.js +139 -0
package/dist/commands/exec.d.ts +20 -0
package/dist/commands/exec.js +382 -5
package/dist/commands/secrets.d.ts +15 -0
package/dist/commands/secrets.js +343 -16
package/dist/commands/sessions.js +4 -0
package/dist/index.js +4 -0
package/dist/lib/budget/config.d.ts +9 -0
package/dist/lib/budget/config.js +115 -0
package/dist/lib/budget/enforce.d.ts +94 -0
package/dist/lib/budget/enforce.js +151 -0
package/dist/lib/budget/ledger.d.ts +61 -0
package/dist/lib/budget/ledger.js +107 -0
package/dist/lib/budget/preflight.d.ts +110 -0
package/dist/lib/budget/preflight.js +200 -0
package/dist/lib/checkpoint.d.ts +54 -0
package/dist/lib/checkpoint.js +56 -0
package/dist/lib/cloud/rush.js +18 -0
package/dist/lib/exec.d.ts +36 -0
package/dist/lib/exec.js +192 -4
package/dist/lib/git.d.ts +18 -0
package/dist/lib/git.js +67 -4
package/dist/lib/loop.d.ts +145 -0
package/dist/lib/loop.js +330 -0
package/dist/lib/mcp.d.ts +7 -0
package/dist/lib/mcp.js +24 -0
package/dist/lib/models.d.ts +11 -0
package/dist/lib/models.js +21 -0
package/dist/lib/plugins.js +5 -2
package/dist/lib/pricing/cost.d.ts +46 -0
package/dist/lib/pricing/cost.js +71 -0
package/dist/lib/pricing/index.d.ts +8 -0
package/dist/lib/pricing/index.js +8 -0
package/dist/lib/pricing/prices.json +138 -0
package/dist/lib/pricing/table.d.ts +17 -0
package/dist/lib/pricing/table.js +73 -0
package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
package/dist/lib/secrets/agent.d.ts +147 -0
package/dist/lib/secrets/agent.js +500 -0
package/dist/lib/secrets/bundles.d.ts +58 -7
package/dist/lib/secrets/bundles.js +264 -75
package/dist/lib/secrets/filestore.d.ts +82 -0
package/dist/lib/secrets/filestore.js +295 -0
package/dist/lib/secrets/linux.d.ts +6 -24
package/dist/lib/secrets/linux.js +22 -265
package/dist/lib/session/db.d.ts +40 -0
package/dist/lib/session/db.js +84 -2
package/dist/lib/session/discover.d.ts +2 -0
package/dist/lib/session/discover.js +126 -2
package/dist/lib/session/render.d.ts +2 -0
package/dist/lib/session/render.js +1 -1
package/dist/lib/session/types.d.ts +4 -0
package/dist/lib/teams/agents.d.ts +32 -0
package/dist/lib/teams/agents.js +66 -3
package/dist/lib/teams/api.js +20 -0
package/dist/lib/teams/parsers.js +16 -4
package/dist/lib/types.d.ts +48 -0
package/dist/lib/workflows.d.ts +56 -0
package/dist/lib/workflows.js +72 -5
package/package.json +2 -1

package/dist/lib/budget/config.js ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Budget config resolution (issue #346).
+ *
+ * The `budget:` block can live in the user/global agents.yaml (`readMeta().budget`)
+ * and in any project-local agents.yaml walked from cwd upward. Precedence is
+ * project > user, matching `run:` resolution (lib/run-config.ts). Caps merge
+ * field-by-field — a project that sets only `per_run` inherits the user's
+ * `per_day`/`per_project`/`per_agent` rather than wiping them.
+ *
+ * This is the single resolver the pre-flight gate, the live watcher, and the
+ * `agents budget` command all route through, so the effective cap set is
+ * computed in exactly one place.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import * as yaml from 'yaml';
+import { getUserAgentsDir, readMeta } from '../state.js';
+function isRecord(value) {
+    return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+/**
+ * Coerce a raw parsed `budget:` block into a typed BudgetConfig, dropping any
+ * field whose value is the wrong shape. Malformed entries are ignored, not
+ * thrown — a typo in one cap must never crash a run (no-fallbacks applies to
+ * the data path, not to user-typed config we choose to be lenient about).
+ */
+function coerceBudget(raw) {
+    if (!isRecord(raw))
+        return {};
+    const out = {};
+    if (typeof raw.currency === 'string')
+        out.currency = raw.currency;
+    if (typeof raw.per_run === 'number' && raw.per_run >= 0)
+        out.per_run = raw.per_run;
+    if (typeof raw.per_day === 'number' && raw.per_day >= 0)
+        out.per_day = raw.per_day;
+    if (typeof raw.per_project === 'number' && raw.per_project >= 0)
+        out.per_project = raw.per_project;
+    if (raw.on_exceed === 'block' || raw.on_exceed === 'warn')
+        out.on_exceed = raw.on_exceed;
+    if (typeof raw.require_confirm_over === 'number' && raw.require_confirm_over >= 0) {
+        out.require_confirm_over = raw.require_confirm_over;
+    }
+    if (isRecord(raw.per_agent)) {
+        const perAgent = {};
+        for (const [k, v] of Object.entries(raw.per_agent)) {
+            if (typeof v === 'number' && v >= 0)
+                perAgent[k] = v;
+        }
+        if (Object.keys(perAgent).length > 0)
+            out.per_agent = perAgent;
+    }
+    return out;
+}
+/** Merge a higher-precedence budget over a base. Set fields win; per_agent merges key-by-key. */
+function mergeBudget(base, over) {
+    const merged = { ...base, ...stripUndefined(over) };
+    if (base.per_agent || over.per_agent) {
+        merged.per_agent = { ...(base.per_agent ?? {}), ...(over.per_agent ?? {}) };
+    }
+    return merged;
+}
+function stripUndefined(cfg) {
+    const out = {};
+    for (const [k, v] of Object.entries(cfg)) {
+        if (v !== undefined)
+            out[k] = v;
+    }
+    return out;
+}
+/** Read project-local `budget:` blocks from nearest dir upward, nearest LAST (highest precedence). */
+function getProjectBudgets(startPath) {
+    const configs = [];
+    let dir = path.resolve(startPath);
+    const userAgentsYaml = path.join(getUserAgentsDir(), 'agents.yaml');
+    while (dir !== path.dirname(dir)) {
+        const manifestPath = path.join(dir, 'agents.yaml');
+        if (manifestPath !== userAgentsYaml && fs.existsSync(manifestPath)) {
+            try {
+                const parsed = yaml.parse(fs.readFileSync(manifestPath, 'utf-8'));
+                if (isRecord(parsed) && parsed.budget !== undefined) {
+                    configs.push(coerceBudget(parsed.budget));
+                }
+            }
+            catch {
+                // Malformed project config — ignore and keep walking.
+            }
+        }
+        dir = path.dirname(dir);
+    }
+    // configs[0] is the nearest dir. Reverse so the nearest applies LAST (wins).
+    return configs.reverse();
+}
+/**
+ * Effective budget for `cwd`: user/global base, then each project-local block
+ * from farthest ancestor to nearest, nearest winning. `on_exceed` defaults to
+ * `block` when nothing sets it (fail-closed: the safe default is to enforce).
+ */
+export function resolveBudgetConfig(cwd = process.cwd()) {
+    const userBudget = coerceBudget(readMeta().budget);
+    let merged = userBudget;
+    for (const projectBudget of getProjectBudgets(cwd)) {
+        merged = mergeBudget(merged, projectBudget);
+    }
+    if (merged.on_exceed === undefined)
+        merged.on_exceed = 'block';
+    return merged;
+}
+/** True when at least one enforceable cap is set. No caps => budget feature is dormant. */
+export function hasAnyCap(cfg) {
+    return (cfg.per_run !== undefined ||
+        cfg.per_day !== undefined ||
+        cfg.per_project !== undefined ||
+        (cfg.per_agent !== undefined && Object.keys(cfg.per_agent).length > 0));
+}

package/dist/lib/budget/enforce.d.ts ADDED Viewed

@@ -0,0 +1,94 @@
+/**
+ * Live spend watcher + cap math (issue #346).
+ *
+ * This is the provider-agnostic shared surface the loop driver (#332) will
+ * reuse for its budget guard. It knows nothing about child processes, agents,
+ * or the ledger — it accepts parsed usage events and a caps object, accumulates
+ * cost via the canonical pricing module, and fires `onBreach` exactly once when
+ * any active cap is crossed.
+ *
+ * The accumulation is the cross-vendor primitive: feed Claude usage and Codex
+ * usage to the same watcher under one `per_project` / `per_run` cap and the
+ * spend aggregates across both — no single-vendor control can do that.
+ */
+import type { AgentId, BudgetConfig } from '../types.js';
+/** A parsed usage event from any agent's stream (fields match session/parse). */
+export interface UsageEvent {
+    agent?: AgentId | string;
+    model?: string;
+    inputTokens?: number;
+    outputTokens?: number;
+    cacheReadTokens?: number;
+    cacheCreationTokens?: number;
+}
+/**
+ * Caps the watcher enforces. `priorDaySpend` / `priorProjectSpend` seed the
+ * accumulators with spend already on the ledger BEFORE this run started, so a
+ * per_day cap counts today's earlier runs too — not just this process. Per-cap
+ * fields are USD; undefined means "not enforced".
+ */
+export interface LiveCaps {
+    perRun?: number;
+    perDay?: number;
+    perProject?: number;
+    /** Per-agent daily caps. Each agent's running spend is checked against its own cap. */
+    perAgent?: Partial<Record<string, number>>;
+    /** Day spend already on the ledger before this run (cross-vendor). */
+    priorDaySpend?: number;
+    /** Project spend already on the ledger before this run (cross-vendor). */
+    priorProjectSpend?: number;
+    /** Per-agent day spend already on the ledger before this run, keyed by agent. */
+    priorAgentDaySpend?: Partial<Record<string, number>>;
+}
+/** Which cap tripped, and the spend figures at the moment of the breach. */
+export interface BreachInfo {
+    cap: 'per_run' | 'per_day' | 'per_project' | 'per_agent';
+    /** The configured limit that was crossed (USD). */
+    limit: number;
+    /** The spend that crossed it (USD). */
+    spend: number;
+    /** Agent attributed to the breach (only meaningful for per_agent). */
+    agent?: string;
+    /** This run's accumulated spend so far (USD). */
+    runSpend: number;
+}
+/** Public watcher surface. `feedUsage` is idempotent after a breach (no double-fire). */
+export interface LiveSpendWatcher {
+    /** Feed one parsed usage event; accrues cost and may fire onBreach. */
+    feedUsage(event: UsageEvent): void;
+    /** Total USD this run has accumulated across all fed events. */
+    runSpend(): number;
+    /** True once a cap has been breached. */
+    breached(): boolean;
+    /** Stop accepting events / release references. Idempotent. */
+    dispose(): void;
+}
+/** Convert a resolved BudgetConfig + prior ledger spend into the caps the watcher needs. */
+export declare function capsFromConfig(cfg: BudgetConfig, prior?: {
+    daySpend?: number;
+    projectSpend?: number;
+    agentDaySpend?: Partial<Record<string, number>>;
+}): LiveCaps;
+/**
+ * Create a live spend watcher. `onBreach` fires at most once, on the first
+ * event that pushes any active cap from at-or-under to over. After it fires the
+ * watcher keeps accumulating (so `runSpend()` stays accurate for the final
+ * ledger record) but never calls `onBreach` again.
+ */
+export declare function makeLiveSpendWatcher(args: {
+    caps: LiveCaps;
+    onBreach: (breach: BreachInfo) => void;
+}): LiveSpendWatcher;
+/**
+ * Incrementally extract usage events from a stream-json chunk. Buffers a partial
+ * trailing line across calls (returned in `rest`), parses each complete line,
+ * and yields one UsageEvent per line that carries token counts. Provider shapes
+ * handled: Claude/`--json` assistant turns (`message.usage` with
+ * `input_tokens`/`output_tokens`/`cache_*_input_tokens`) and the flatter
+ * `usage.record` shape (`usage.input_tokens`/`output`). Lines that aren't JSON
+ * or carry no usage are skipped — this never throws on agent output.
+ */
+export declare function extractUsageEvents(chunk: string, pending: string, fallbackModel?: string, fallbackAgent?: string): {
+    events: UsageEvent[];
+    rest: string;
+};

package/dist/lib/budget/enforce.js ADDED Viewed

@@ -0,0 +1,151 @@
+import { actualCost } from '../pricing/index.js';
+/** Convert a resolved BudgetConfig + prior ledger spend into the caps the watcher needs. */
+export function capsFromConfig(cfg, prior) {
+    return {
+        perRun: cfg.per_run,
+        perDay: cfg.per_day,
+        perProject: cfg.per_project,
+        perAgent: cfg.per_agent,
+        priorDaySpend: prior?.daySpend ?? 0,
+        priorProjectSpend: prior?.projectSpend ?? 0,
+        priorAgentDaySpend: prior?.agentDaySpend ?? {},
+    };
+}
+/**
+ * Create a live spend watcher. `onBreach` fires at most once, on the first
+ * event that pushes any active cap from at-or-under to over. After it fires the
+ * watcher keeps accumulating (so `runSpend()` stays accurate for the final
+ * ledger record) but never calls `onBreach` again.
+ */
+export function makeLiveSpendWatcher(args) {
+    const { caps, onBreach } = args;
+    let run = 0;
+    // Cross-vendor accumulators, seeded with pre-run ledger spend.
+    let day = caps.priorDaySpend ?? 0;
+    let project = caps.priorProjectSpend ?? 0;
+    const agentDay = {};
+    for (const [k, v] of Object.entries(caps.priorAgentDaySpend ?? {})) {
+        if (typeof v === 'number')
+            agentDay[k] = v;
+    }
+    let didBreach = false;
+    let disposed = false;
+    function checkBreach(agent) {
+        if (caps.perRun !== undefined && run > caps.perRun) {
+            return { cap: 'per_run', limit: caps.perRun, spend: run, runSpend: run };
+        }
+        if (caps.perDay !== undefined && day > caps.perDay) {
+            return { cap: 'per_day', limit: caps.perDay, spend: day, runSpend: run };
+        }
+        if (caps.perProject !== undefined && project > caps.perProject) {
+            return { cap: 'per_project', limit: caps.perProject, spend: project, runSpend: run };
+        }
+        if (agent && caps.perAgent && caps.perAgent[agent] !== undefined) {
+            const limit = caps.perAgent[agent];
+            if ((agentDay[agent] ?? 0) > limit) {
+                return { cap: 'per_agent', limit, spend: agentDay[agent], agent, runSpend: run };
+            }
+        }
+        return null;
+    }
+    return {
+        feedUsage(event) {
+            if (disposed)
+                return;
+            const { usd } = actualCost(event.model ?? '', {
+                inputTokens: event.inputTokens ?? 0,
+                outputTokens: event.outputTokens ?? 0,
+                cacheReadTokens: event.cacheReadTokens,
+                cacheCreationTokens: event.cacheCreationTokens,
+            });
+            if (usd <= 0)
+                return;
+            const agent = event.agent ? String(event.agent) : undefined;
+            run += usd;
+            day += usd;
+            project += usd;
+            if (agent)
+                agentDay[agent] = (agentDay[agent] ?? 0) + usd;
+            if (didBreach)
+                return;
+            const breach = checkBreach(agent);
+            if (breach) {
+                didBreach = true;
+                onBreach(breach);
+            }
+        },
+        runSpend: () => run,
+        breached: () => didBreach,
+        dispose() {
+            disposed = true;
+        },
+    };
+}
+/**
+ * Incrementally extract usage events from a stream-json chunk. Buffers a partial
+ * trailing line across calls (returned in `rest`), parses each complete line,
+ * and yields one UsageEvent per line that carries token counts. Provider shapes
+ * handled: Claude/`--json` assistant turns (`message.usage` with
+ * `input_tokens`/`output_tokens`/`cache_*_input_tokens`) and the flatter
+ * `usage.record` shape (`usage.input_tokens`/`output`). Lines that aren't JSON
+ * or carry no usage are skipped — this never throws on agent output.
+ */
+export function extractUsageEvents(chunk, pending, fallbackModel, fallbackAgent) {
+    const combined = pending + chunk;
+    const lines = combined.split('\n');
+    const rest = lines.pop() ?? '';
+    const events = [];
+    for (const line of lines) {
+        const trimmed = line.trim();
+        if (!trimmed || trimmed[0] !== '{')
+            continue;
+        let obj;
+        try {
+            obj = JSON.parse(trimmed);
+        }
+        catch {
+            continue;
+        }
+        const ev = usageFromObject(obj, fallbackModel, fallbackAgent);
+        if (ev)
+            events.push(ev);
+    }
+    return { events, rest };
+}
+function usageFromObject(obj, fallbackModel, fallbackAgent) {
+    // Claude emits a final `type:"result"` event carrying a TOP-LEVEL cumulative
+    // `usage` that already sums every per-turn `message.usage`. Counting both the
+    // per-turn turns AND this cumulative total double-counts a multi-turn run
+    // (~2x). The canonical session parser (src/lib/session/parse.ts) reads usage
+    // ONLY from `message.usage` and extracts nothing from the result line — mirror
+    // that here: skip result lines entirely for usage.
+    if (obj?.type === 'result')
+        return null;
+    // Claude stream-json assistant turn.
+    const mu = obj?.message?.usage;
+    if (mu && (typeof mu.input_tokens === 'number' || typeof mu.output_tokens === 'number')) {
+        return {
+            agent: fallbackAgent,
+            model: obj.message.model ?? fallbackModel,
+            inputTokens: mu.input_tokens ?? 0,
+            outputTokens: mu.output_tokens ?? 0,
+            cacheReadTokens: mu.cache_read_input_tokens,
+            cacheCreationTokens: mu.cache_creation_input_tokens,
+        };
+    }
+    // Flatter usage.record / usage shape (Codex / `usage.record`). The result-line
+    // guard above already excludes Claude's cumulative result usage, so this only
+    // matches genuine per-event usage records.
+    const u = obj?.usage;
+    if (u && (typeof u.input_tokens === 'number' || typeof u.output === 'number' || typeof u.output_tokens === 'number')) {
+        return {
+            agent: fallbackAgent,
+            model: obj.model ?? u.model ?? fallbackModel,
+            inputTokens: u.input_tokens ?? u.inputOther ?? 0,
+            outputTokens: u.output_tokens ?? u.output ?? 0,
+            cacheReadTokens: u.cache_read_input_tokens,
+            cacheCreationTokens: u.cache_creation_input_tokens,
+        };
+    }
+    return null;
+}

package/dist/lib/budget/ledger.d.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/** A single spend observation. Append-only; never mutated in place. */
+export interface SpendEntry {
+    /** Run identifier — groups multiple usage observations from one dispatch. */
+    runId: string;
+    /** Agent id (claude, codex, ...). The cross-vendor attribution key. */
+    agent: string;
+    /** Project key (absolute path or repo slug). Empty string when unknown. */
+    project: string;
+    /** Local calendar day, YYYY-MM-DD. */
+    day: string;
+    /** Model id as reported by the stream (may carry vendor prefix / date suffix). */
+    model: string;
+    inputTok: number;
+    outputTok: number;
+    /** Combined cache read + creation tokens (kept as one field for the ledger). */
+    cacheTok: number;
+    /** USD cost of THIS observation, via actualCost() at write time. */
+    costUsd: number;
+    /** Where the spend came from: local run, teams teammate, or cloud dispatch. */
+    source: 'run' | 'teams' | 'cloud';
+    /** ISO timestamp of the observation. */
+    ts: string;
+}
+/** Token bundle for a single observation (matches session/parse usage fields). */
+export interface UsageObservation {
+    model?: string;
+    inputTokens?: number;
+    outputTokens?: number;
+    cacheReadTokens?: number;
+    cacheCreationTokens?: number;
+}
+/** Default ledger path: <history>/spend/ledger.jsonl. */
+export declare function defaultLedgerPath(): string;
+/** Local YYYY-MM-DD for a Date (defaults to now). Local, not UTC — caps are a human-day notion. */
+export declare function localDay(d?: Date): string;
+/**
+ * Append one spend observation. Computes `costUsd` from the usage via the
+ * canonical pricing module (unpriced models contribute $0). Returns the written
+ * entry. Creates the spend dir on first write.
+ */
+export declare function recordSpend(input: {
+    runId: string;
+    agent: string;
+    project?: string;
+    model: string;
+    usage: UsageObservation;
+    source: SpendEntry['source'];
+    ts?: Date;
+}, ledgerPath?: string): SpendEntry;
+/** Load every entry. Skips malformed lines (a half-written final line never breaks a rollup). */
+export declare function loadLedger(ledgerPath?: string): SpendEntry[];
+/** Total USD spend on a given local day across ALL agents (cross-vendor). */
+export declare function spendForDay(day: string, ledger?: SpendEntry[]): number;
+/** Total USD spend on a given day for ONE agent (per-agent cap accounting). */
+export declare function spendForAgentDay(agent: string, day: string, ledger?: SpendEntry[]): number;
+/** Total USD spend attributed to an agent across all time. */
+export declare function spendForAgent(agent: string, ledger?: SpendEntry[]): number;
+/** Total USD spend attributed to a project across all time (cross-vendor). */
+export declare function spendForProject(project: string, ledger?: SpendEntry[]): number;
+/** Total USD spend for a single run id (all of its usage observations). */
+export declare function spendForRun(runId: string, ledger?: SpendEntry[]): number;

package/dist/lib/budget/ledger.js ADDED Viewed

@@ -0,0 +1,107 @@
+/**
+ * Append-only spend ledger (issue #346).
+ *
+ * Every dispatched run that produces token usage records one JSONL line under
+ * `<history>/spend/ledger.jsonl`. The ledger is the shared artifact #323's
+ * `agents cost` can later read for $ rollups, so the entry shape stays clean
+ * and stable: one record = one usage observation attributed to a run.
+ *
+ * `costUsd` is computed at write time via the canonical pricing module
+ * (lib/pricing) so the ledger is self-contained — a reader never needs the
+ * pricing table to sum spend. Rollups (`spendForDay`/`spendForAgent`/...) are
+ * pure folds over the file; for the modest line counts a developer accrues this
+ * is plenty fast, and there's no index to corrupt.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { getHistoryDir } from '../state.js';
+import { actualCost } from '../pricing/index.js';
+/** Default ledger path: <history>/spend/ledger.jsonl. */
+export function defaultLedgerPath() {
+    return path.join(getHistoryDir(), 'spend', 'ledger.jsonl');
+}
+/** Local YYYY-MM-DD for a Date (defaults to now). Local, not UTC — caps are a human-day notion. */
+export function localDay(d = new Date()) {
+    const y = d.getFullYear();
+    const m = String(d.getMonth() + 1).padStart(2, '0');
+    const day = String(d.getDate()).padStart(2, '0');
+    return `${y}-${m}-${day}`;
+}
+/**
+ * Append one spend observation. Computes `costUsd` from the usage via the
+ * canonical pricing module (unpriced models contribute $0). Returns the written
+ * entry. Creates the spend dir on first write.
+ */
+export function recordSpend(input, ledgerPath = defaultLedgerPath()) {
+    const ts = input.ts ?? new Date();
+    const cacheTok = (input.usage.cacheReadTokens ?? 0) + (input.usage.cacheCreationTokens ?? 0);
+    const { usd } = actualCost(input.model, {
+        inputTokens: input.usage.inputTokens ?? 0,
+        outputTokens: input.usage.outputTokens ?? 0,
+        cacheReadTokens: input.usage.cacheReadTokens,
+        cacheCreationTokens: input.usage.cacheCreationTokens,
+    });
+    const entry = {
+        runId: input.runId,
+        agent: input.agent,
+        project: input.project ?? '',
+        day: localDay(ts),
+        model: input.model,
+        inputTok: input.usage.inputTokens ?? 0,
+        outputTok: input.usage.outputTokens ?? 0,
+        cacheTok,
+        costUsd: usd,
+        source: input.source,
+        ts: ts.toISOString(),
+    };
+    fs.mkdirSync(path.dirname(ledgerPath), { recursive: true });
+    fs.appendFileSync(ledgerPath, JSON.stringify(entry) + '\n');
+    return entry;
+}
+/** Load every entry. Skips malformed lines (a half-written final line never breaks a rollup). */
+export function loadLedger(ledgerPath = defaultLedgerPath()) {
+    if (!fs.existsSync(ledgerPath))
+        return [];
+    const out = [];
+    for (const line of fs.readFileSync(ledgerPath, 'utf-8').split('\n')) {
+        const trimmed = line.trim();
+        if (!trimmed)
+            continue;
+        try {
+            const parsed = JSON.parse(trimmed);
+            if (typeof parsed.costUsd === 'number')
+                out.push(parsed);
+        }
+        catch {
+            // Tolerate a torn final line; everything before it is intact.
+        }
+    }
+    return out;
+}
+function sum(entries, pred) {
+    let total = 0;
+    for (const e of entries)
+        if (pred(e))
+            total += e.costUsd;
+    return total;
+}
+/** Total USD spend on a given local day across ALL agents (cross-vendor). */
+export function spendForDay(day, ledger = loadLedger()) {
+    return sum(ledger, (e) => e.day === day);
+}
+/** Total USD spend on a given day for ONE agent (per-agent cap accounting). */
+export function spendForAgentDay(agent, day, ledger = loadLedger()) {
+    return sum(ledger, (e) => e.agent === agent && e.day === day);
+}
+/** Total USD spend attributed to an agent across all time. */
+export function spendForAgent(agent, ledger = loadLedger()) {
+    return sum(ledger, (e) => e.agent === agent);
+}
+/** Total USD spend attributed to a project across all time (cross-vendor). */
+export function spendForProject(project, ledger = loadLedger()) {
+    return sum(ledger, (e) => e.project === project);
+}
+/** Total USD spend for a single run id (all of its usage observations). */
+export function spendForRun(runId, ledger = loadLedger()) {
+    return sum(ledger, (e) => e.runId === runId);
+}

package/dist/lib/budget/preflight.d.ts ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * Pre-flight cost estimate + gate (issue #346).
+ *
+ * Before a run spawns we estimate its cost and decide whether to allow it. The
+ * estimate's token basis comes from recent ledger averages for the same agent
+ * (the most accurate signal we have), falling back to a prompt-character
+ * heuristic when there's no history. Cost is computed via the canonical pricing
+ * module — never reimplemented here.
+ *
+ * `enforcePreflight` is the decision: with `on_exceed: block`, if launching
+ * this run would push any cap (per_run / per_day / per_agent / per_project)
+ * over the line, it denies. With `on_exceed: warn` it always allows but reports
+ * the projected overrun.
+ */
+import type { BudgetConfig } from '../types.js';
+import type { SpendEntry } from './ledger.js';
+/** A pre-flight cost estimate for one run. */
+export interface RunEstimate {
+    /** Estimated USD for this run. 0 when the model is unpriced. */
+    estUsd: number;
+    /** How the token count was derived. */
+    basis: 'ledger-average' | 'prompt-heuristic' | 'none';
+    /** True when the model resolved to a priced entry. */
+    priced: boolean;
+    estInputTokens: number;
+    estOutputTokens: number;
+}
+/**
+ * Estimate the cost of a run. When the ledger has prior runs for this agent we
+ * use their average input/output tokens; otherwise we fall back to a
+ * prompt-character heuristic. `recentAvgTokens` lets callers inject a
+ * precomputed average (e.g. from a scoped ledger) for testability.
+ */
+export declare function estimateRunCost(args: {
+    agent: string;
+    model: string;
+    mode?: string;
+    promptChars?: number;
+    recentAvgTokens?: {
+        input: number;
+        output: number;
+    };
+    ledger?: SpendEntry[];
+}): RunEstimate;
+/** Average input/output tokens per RUN for an agent, from the ledger. Null when no history. */
+export declare function ledgerAverageTokens(agent: string, ledger: SpendEntry[]): {
+    input: number;
+    output: number;
+} | null;
+/** Decision returned by the pre-flight gate. */
+export interface PreflightDecision {
+    /** Whether the run may proceed. */
+    allow: boolean;
+    /** Whether the caller must interactively confirm (estimate >= require_confirm_over). */
+    needsConfirm: boolean;
+    /** Human reason when blocked or confirming. */
+    reason?: string;
+    /** Which cap blocked, if any. */
+    blockedCap?: 'per_run' | 'per_day' | 'per_agent' | 'per_project';
+    /** Projected day spend if this run lands at its estimate. */
+    projectedDaySpend: number;
+    /** Projected project spend if this run lands at its estimate. */
+    projectedProjectSpend: number;
+}
+/** Current spend snapshot the gate compares the estimate against. */
+export interface LedgerState {
+    /** Agent this snapshot is for (used to pick the matching per_agent cap). */
+    agent: string;
+    daySpend: number;
+    projectSpend: number;
+    agentDaySpend: number;
+}
+/** Read the ledger snapshot the gate needs for `agent` / `project` / today. */
+export declare function ledgerStateFor(agent: string, project: string, ledger?: SpendEntry[]): LedgerState;
+/**
+ * The pre-flight gate. Projects this run's estimate on top of current spend and
+ * decides allow/deny. `on_exceed: warn` never blocks (allow:true) but still
+ * reports the projected overrun via `reason`. A hard block sets allow:false —
+ * `--yes` MUST NOT override it (the caller enforces that; this function only
+ * reports the truth).
+ */
+export declare function enforcePreflight(cfg: BudgetConfig, state: LedgerState, est: RunEstimate): PreflightDecision;
+/** Build a one-line human estimate banner for `agents run` preamble. */
+export declare function formatEstimateBanner(agent: string, model: string, est: RunEstimate): string;
+/** Result of the high-level run gate consumed by `agents run` / teams / cloud. */
+export interface PreflightGateResult {
+    /** True when no caps are configured — budget feature dormant, nothing to do. */
+    dormant: boolean;
+    cfg: BudgetConfig;
+    estimate: RunEstimate;
+    decision: PreflightDecision;
+    banner: string;
+}
+/**
+ * High-level pre-flight gate: resolve the effective budget for `cwd`, estimate
+ * the run, and evaluate every cap. Returns `dormant:true` (and skips all work)
+ * when no caps are set, so the gate is zero-cost for users who never configure
+ * a budget. The CLI layer decides how to act on `decision` (print banner,
+ * confirm, or block + exit non-zero).
+ */
+export declare function runPreflightGate(args: {
+    agent: string;
+    model: string;
+    mode?: string;
+    prompt?: string;
+    project: string;
+    cwd?: string;
+    ledger?: SpendEntry[];
+}): PreflightGateResult;
+export type { SpendEntry };