@phnx-labs/agents-cli 1.20.16 → 1.20.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +1 -1
  3. package/dist/commands/budget.d.ts +14 -0
  4. package/dist/commands/budget.js +137 -0
  5. package/dist/commands/cost.d.ts +12 -0
  6. package/dist/commands/cost.js +139 -0
  7. package/dist/commands/exec.d.ts +20 -0
  8. package/dist/commands/exec.js +382 -5
  9. package/dist/commands/secrets.d.ts +15 -0
  10. package/dist/commands/secrets.js +250 -4
  11. package/dist/commands/sessions.js +4 -0
  12. package/dist/commands/sync.d.ts +10 -3
  13. package/dist/commands/sync.js +72 -9
  14. package/dist/index.js +4 -0
  15. package/dist/lib/budget/config.d.ts +9 -0
  16. package/dist/lib/budget/config.js +115 -0
  17. package/dist/lib/budget/enforce.d.ts +94 -0
  18. package/dist/lib/budget/enforce.js +151 -0
  19. package/dist/lib/budget/ledger.d.ts +61 -0
  20. package/dist/lib/budget/ledger.js +107 -0
  21. package/dist/lib/budget/preflight.d.ts +110 -0
  22. package/dist/lib/budget/preflight.js +200 -0
  23. package/dist/lib/checkpoint.d.ts +54 -0
  24. package/dist/lib/checkpoint.js +56 -0
  25. package/dist/lib/cloud/rush.js +18 -0
  26. package/dist/lib/exec.d.ts +36 -0
  27. package/dist/lib/exec.js +192 -4
  28. package/dist/lib/git.d.ts +18 -0
  29. package/dist/lib/git.js +67 -4
  30. package/dist/lib/hooks.js +12 -0
  31. package/dist/lib/loop.d.ts +145 -0
  32. package/dist/lib/loop.js +330 -0
  33. package/dist/lib/mcp.d.ts +7 -0
  34. package/dist/lib/mcp.js +24 -0
  35. package/dist/lib/models.d.ts +11 -0
  36. package/dist/lib/models.js +21 -0
  37. package/dist/lib/plugin-marketplace.js +16 -6
  38. package/dist/lib/plugins.js +5 -2
  39. package/dist/lib/pricing/cost.d.ts +46 -0
  40. package/dist/lib/pricing/cost.js +71 -0
  41. package/dist/lib/pricing/index.d.ts +8 -0
  42. package/dist/lib/pricing/index.js +8 -0
  43. package/dist/lib/pricing/prices.json +138 -0
  44. package/dist/lib/pricing/table.d.ts +17 -0
  45. package/dist/lib/pricing/table.js +73 -0
  46. package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
  47. package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
  48. package/dist/lib/secrets/agent.d.ts +134 -0
  49. package/dist/lib/secrets/agent.js +501 -0
  50. package/dist/lib/secrets/bundles.d.ts +21 -0
  51. package/dist/lib/secrets/bundles.js +43 -0
  52. package/dist/lib/secrets/drivers/rush.d.ts +14 -0
  53. package/dist/lib/secrets/drivers/rush.js +84 -0
  54. package/dist/lib/secrets/linux.js +88 -10
  55. package/dist/lib/secrets/sync-backend.d.ts +48 -0
  56. package/dist/lib/secrets/sync-backend.js +13 -0
  57. package/dist/lib/secrets/sync.d.ts +15 -23
  58. package/dist/lib/secrets/sync.js +31 -66
  59. package/dist/lib/session/db.d.ts +40 -0
  60. package/dist/lib/session/db.js +84 -2
  61. package/dist/lib/session/discover.d.ts +2 -0
  62. package/dist/lib/session/discover.js +126 -2
  63. package/dist/lib/session/render.d.ts +2 -0
  64. package/dist/lib/session/render.js +1 -1
  65. package/dist/lib/session/types.d.ts +4 -0
  66. package/dist/lib/sync-umbrella.d.ts +76 -0
  67. package/dist/lib/sync-umbrella.js +125 -0
  68. package/dist/lib/teams/agents.d.ts +32 -0
  69. package/dist/lib/teams/agents.js +66 -3
  70. package/dist/lib/teams/api.js +20 -0
  71. package/dist/lib/teams/parsers.js +16 -4
  72. package/dist/lib/types.d.ts +48 -0
  73. package/dist/lib/workflows.d.ts +56 -0
  74. package/dist/lib/workflows.js +72 -5
  75. package/package.json +2 -1
@@ -0,0 +1,200 @@
1
+ import { estimateCost, formatUsd } from '../pricing/index.js';
2
+ import { loadLedger, spendForDay, spendForAgentDay, spendForProject, localDay } from './ledger.js';
3
+ import { resolveBudgetConfig, hasAnyCap } from './config.js';
4
+ /** Roughly 4 characters per token — the standard coarse heuristic for English text. */
5
+ const CHARS_PER_TOKEN = 4;
6
+ /**
7
+ * Output is typically a multiple of the visible prompt for an agentic run
8
+ * (tool calls, file reads, reasoning). 6x is a deliberately conservative
9
+ * lower bound so the estimate doesn't wildly under-report and wave through a
10
+ * run that then blows the cap on its first turn.
11
+ */
12
+ const HEURISTIC_OUTPUT_MULTIPLIER = 6;
13
+ /**
14
+ * Estimate the cost of a run. When the ledger has prior runs for this agent we
15
+ * use their average input/output tokens; otherwise we fall back to a
16
+ * prompt-character heuristic. `recentAvgTokens` lets callers inject a
17
+ * precomputed average (e.g. from a scoped ledger) for testability.
18
+ */
19
+ export function estimateRunCost(args) {
20
+ const ledger = args.ledger ?? loadLedger();
21
+ let estInputTokens = 0;
22
+ let estOutputTokens = 0;
23
+ let basis = 'none';
24
+ const avg = args.recentAvgTokens ?? ledgerAverageTokens(args.agent, ledger);
25
+ if (avg && (avg.input > 0 || avg.output > 0)) {
26
+ estInputTokens = avg.input;
27
+ estOutputTokens = avg.output;
28
+ basis = 'ledger-average';
29
+ }
30
+ else if (args.promptChars && args.promptChars > 0) {
31
+ estInputTokens = Math.ceil(args.promptChars / CHARS_PER_TOKEN);
32
+ estOutputTokens = estInputTokens * HEURISTIC_OUTPUT_MULTIPLIER;
33
+ basis = 'prompt-heuristic';
34
+ }
35
+ const { usd, modelMatched } = estimateCost(args.model, {
36
+ inputTokens: estInputTokens,
37
+ outputTokens: estOutputTokens,
38
+ });
39
+ return {
40
+ estUsd: usd,
41
+ basis: estInputTokens === 0 && estOutputTokens === 0 ? 'none' : basis,
42
+ priced: modelMatched !== null,
43
+ estInputTokens,
44
+ estOutputTokens,
45
+ };
46
+ }
47
+ /** Average input/output tokens per RUN for an agent, from the ledger. Null when no history. */
48
+ export function ledgerAverageTokens(agent, ledger) {
49
+ const runs = new Map();
50
+ for (const e of ledger) {
51
+ if (e.agent !== agent)
52
+ continue;
53
+ const acc = runs.get(e.runId) ?? { input: 0, output: 0 };
54
+ acc.input += e.inputTok;
55
+ acc.output += e.outputTok;
56
+ runs.set(e.runId, acc);
57
+ }
58
+ if (runs.size === 0)
59
+ return null;
60
+ let input = 0;
61
+ let output = 0;
62
+ for (const r of runs.values()) {
63
+ input += r.input;
64
+ output += r.output;
65
+ }
66
+ return { input: Math.round(input / runs.size), output: Math.round(output / runs.size) };
67
+ }
68
+ /** Read the ledger snapshot the gate needs for `agent` / `project` / today. */
69
+ export function ledgerStateFor(agent, project, ledger) {
70
+ const entries = ledger ?? loadLedger();
71
+ const today = localDay();
72
+ return {
73
+ agent,
74
+ daySpend: spendForDay(today, entries),
75
+ projectSpend: spendForProject(project, entries),
76
+ agentDaySpend: spendForAgentDay(agent, today, entries),
77
+ };
78
+ }
79
+ /**
80
+ * The pre-flight gate. Projects this run's estimate on top of current spend and
81
+ * decides allow/deny. `on_exceed: warn` never blocks (allow:true) but still
82
+ * reports the projected overrun via `reason`. A hard block sets allow:false —
83
+ * `--yes` MUST NOT override it (the caller enforces that; this function only
84
+ * reports the truth).
85
+ */
86
+ export function enforcePreflight(cfg, state, est) {
87
+ const projectedDaySpend = state.daySpend + est.estUsd;
88
+ const projectedProjectSpend = state.projectSpend + est.estUsd;
89
+ const projectedAgentDaySpend = state.agentDaySpend + est.estUsd;
90
+ const warnOnly = cfg.on_exceed === 'warn';
91
+ const breaches = [];
92
+ if (cfg.per_run !== undefined && est.estUsd > cfg.per_run) {
93
+ breaches.push({
94
+ cap: 'per_run',
95
+ reason: `estimated ${formatUsd(est.estUsd)} exceeds per_run cap ${formatUsd(cfg.per_run)}`,
96
+ });
97
+ }
98
+ if (cfg.per_day !== undefined && projectedDaySpend > cfg.per_day) {
99
+ breaches.push({
100
+ cap: 'per_day',
101
+ reason: `projected day spend ${formatUsd(projectedDaySpend)} exceeds per_day cap ${formatUsd(cfg.per_day)}`,
102
+ });
103
+ }
104
+ if (cfg.per_project !== undefined && projectedProjectSpend > cfg.per_project) {
105
+ breaches.push({
106
+ cap: 'per_project',
107
+ reason: `projected project spend ${formatUsd(projectedProjectSpend)} exceeds per_project cap ${formatUsd(cfg.per_project)}`,
108
+ });
109
+ }
110
+ const agentCap = cfg.per_agent?.[state.agent];
111
+ if (agentCap !== undefined && projectedAgentDaySpend > agentCap) {
112
+ breaches.push({
113
+ cap: 'per_agent',
114
+ reason: `projected agent day spend ${formatUsd(projectedAgentDaySpend)} exceeds per_agent cap ${formatUsd(agentCap)}`,
115
+ });
116
+ }
117
+ // require_confirm_over only governs interactive confirm, not a hard block.
118
+ let needsConfirm = cfg.require_confirm_over !== undefined && est.estUsd >= cfg.require_confirm_over;
119
+ // Unpriced model + active caps: the estimate is $0 because we have no price
120
+ // for this model, so NONE of the per_run/per_day caps above can ever trip and
121
+ // we'd silently wave the run through. Never $0-wave-through (#346): when caps
122
+ // are set but the model is unpriced, require confirmation so the user is told
123
+ // the cap cannot be enforced for this model rather than getting a false pass.
124
+ if (!est.priced && hasAnyCap(cfg) && breaches.length === 0) {
125
+ needsConfirm = true;
126
+ return {
127
+ allow: true,
128
+ needsConfirm: true,
129
+ reason: `model is unpriced — budget caps cannot be enforced for this run (estimate is $0); confirm to proceed`,
130
+ projectedDaySpend,
131
+ projectedProjectSpend,
132
+ };
133
+ }
134
+ if (breaches.length > 0) {
135
+ const first = breaches[0];
136
+ return {
137
+ allow: warnOnly,
138
+ needsConfirm: warnOnly ? needsConfirm : false,
139
+ reason: first.reason,
140
+ blockedCap: first.cap,
141
+ projectedDaySpend,
142
+ projectedProjectSpend,
143
+ };
144
+ }
145
+ return {
146
+ allow: true,
147
+ needsConfirm,
148
+ reason: needsConfirm
149
+ ? `estimated ${formatUsd(est.estUsd)} is at or above confirm threshold ${formatUsd(cfg.require_confirm_over)}`
150
+ : undefined,
151
+ projectedDaySpend,
152
+ projectedProjectSpend,
153
+ };
154
+ }
155
+ /** Build a one-line human estimate banner for `agents run` preamble. */
156
+ export function formatEstimateBanner(agent, model, est) {
157
+ const cost = est.priced ? formatUsd(est.estUsd) : 'unpriced';
158
+ const basisLabel = est.basis === 'ledger-average'
159
+ ? 'recent average'
160
+ : est.basis === 'prompt-heuristic'
161
+ ? 'prompt size'
162
+ : 'no basis';
163
+ return `[budget] est. ${cost} for this ${agent} run (${model}, ${basisLabel})`;
164
+ }
165
+ /**
166
+ * High-level pre-flight gate: resolve the effective budget for `cwd`, estimate
167
+ * the run, and evaluate every cap. Returns `dormant:true` (and skips all work)
168
+ * when no caps are set, so the gate is zero-cost for users who never configure
169
+ * a budget. The CLI layer decides how to act on `decision` (print banner,
170
+ * confirm, or block + exit non-zero).
171
+ */
172
+ export function runPreflightGate(args) {
173
+ const cfg = resolveBudgetConfig(args.cwd);
174
+ const ledger = args.ledger ?? loadLedger();
175
+ const estimate = estimateRunCost({
176
+ agent: args.agent,
177
+ model: args.model,
178
+ mode: args.mode,
179
+ promptChars: args.prompt?.length,
180
+ ledger,
181
+ });
182
+ const banner = formatEstimateBanner(args.agent, args.model, estimate);
183
+ if (!hasAnyCap(cfg)) {
184
+ return {
185
+ dormant: true,
186
+ cfg,
187
+ estimate,
188
+ decision: {
189
+ allow: true,
190
+ needsConfirm: false,
191
+ projectedDaySpend: 0,
192
+ projectedProjectSpend: 0,
193
+ },
194
+ banner,
195
+ };
196
+ }
197
+ const state = ledgerStateFor(args.agent, args.project, ledger);
198
+ const decision = enforcePreflight(cfg, state, estimate);
199
+ return { dormant: false, cfg, estimate, decision, banner };
200
+ }
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Harness-level loop checkpoint (issue #332).
3
+ *
4
+ * A checkpoint is the durable harness state for a `--loop` run: it records the
5
+ * iteration count, the pinned session id, the prompt being re-injected, and the
6
+ * loop config — everything `--resume-checkpoint` needs to continue a run that a
7
+ * SIGTERM, timeout, or machine sleep killed mid-flight.
8
+ *
9
+ * This is NOT provider-side state. `--session-id` resumes Claude's *conversation*
10
+ * (server-side); a checkpoint resumes the *harness* (iteration count, loop
11
+ * variables, prompt chain) — the part Claude's own resume cannot recover.
12
+ *
13
+ * Atomic write (temp + rename) mirrors `writeRunMeta` in routines.ts so a crash
14
+ * mid-write never leaves a half-written checkpoint that `readCheckpoint` would
15
+ * choke on. `readCheckpoint` returns null on a missing or corrupt file (mirrors
16
+ * `readRunMeta`) — a corrupt checkpoint is a "start fresh", never a throw.
17
+ */
18
+ import type { AgentId } from './types.js';
19
+ import type { LoopConfig, LoopSignal } from './loop.js';
20
+ /** Durable harness state for a looped run, serialized to checkpoint.json. */
21
+ export interface Checkpoint {
22
+ /** runId == the run directory name under getRunsDir(). */
23
+ id: string;
24
+ agent: AgentId;
25
+ version?: string;
26
+ /** The prompt re-injected each iteration. */
27
+ prompt?: string;
28
+ /** Pinned Claude session id so a resume continues the same conversation. */
29
+ sessionId?: string;
30
+ /** Iterations COMPLETED so far. A resume starts at iteration + 1. */
31
+ iteration: number;
32
+ /** The loop config governing termination. */
33
+ loop: LoopConfig;
34
+ /** Last loop-signal read, if any (for audit / resume context). */
35
+ loopSignal?: LoopSignal;
36
+ /** Cumulative tokens consumed across all iterations so far. */
37
+ cumulativeTokens?: number;
38
+ createdAt: string;
39
+ updatedAt: string;
40
+ }
41
+ /** Path to a run's checkpoint file: <runsDir>/<runId>/checkpoint.json. */
42
+ export declare function checkpointPath(runId: string): string;
43
+ /**
44
+ * Write a checkpoint atomically (temp file + rename). The rename is atomic on a
45
+ * single filesystem, so a reader never observes a partially written file.
46
+ * Mirrors the durable-write contract of `writeRunMeta`.
47
+ */
48
+ export declare function writeCheckpoint(c: Checkpoint, file?: string): void;
49
+ /**
50
+ * Read a checkpoint from disk. Returns null if the file is missing or its
51
+ * contents are not valid JSON — corruption means "no resumable state", which
52
+ * the caller treats as a fresh start. Mirrors `readRunMeta`.
53
+ */
54
+ export declare function readCheckpoint(file: string): Checkpoint | null;
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Harness-level loop checkpoint (issue #332).
3
+ *
4
+ * A checkpoint is the durable harness state for a `--loop` run: it records the
5
+ * iteration count, the pinned session id, the prompt being re-injected, and the
6
+ * loop config — everything `--resume-checkpoint` needs to continue a run that a
7
+ * SIGTERM, timeout, or machine sleep killed mid-flight.
8
+ *
9
+ * This is NOT provider-side state. `--session-id` resumes Claude's *conversation*
10
+ * (server-side); a checkpoint resumes the *harness* (iteration count, loop
11
+ * variables, prompt chain) — the part Claude's own resume cannot recover.
12
+ *
13
+ * Atomic write (temp + rename) mirrors `writeRunMeta` in routines.ts so a crash
14
+ * mid-write never leaves a half-written checkpoint that `readCheckpoint` would
15
+ * choke on. `readCheckpoint` returns null on a missing or corrupt file (mirrors
16
+ * `readRunMeta`) — a corrupt checkpoint is a "start fresh", never a throw.
17
+ */
18
+ import * as fs from 'fs';
19
+ import * as path from 'path';
20
+ import { getRunsDir } from './state.js';
21
+ /** Path to a run's checkpoint file: <runsDir>/<runId>/checkpoint.json. */
22
+ export function checkpointPath(runId) {
23
+ return path.join(getRunsDir(), runId, 'checkpoint.json');
24
+ }
25
+ /**
26
+ * Write a checkpoint atomically (temp file + rename). The rename is atomic on a
27
+ * single filesystem, so a reader never observes a partially written file.
28
+ * Mirrors the durable-write contract of `writeRunMeta`.
29
+ */
30
+ export function writeCheckpoint(c, file) {
31
+ const target = file ?? checkpointPath(c.id);
32
+ fs.mkdirSync(path.dirname(target), { recursive: true });
33
+ const tmp = `${target}.${process.pid}.tmp`;
34
+ fs.writeFileSync(tmp, JSON.stringify(c, null, 2), 'utf-8');
35
+ fs.renameSync(tmp, target);
36
+ }
37
+ /**
38
+ * Read a checkpoint from disk. Returns null if the file is missing or its
39
+ * contents are not valid JSON — corruption means "no resumable state", which
40
+ * the caller treats as a fresh start. Mirrors `readRunMeta`.
41
+ */
42
+ export function readCheckpoint(file) {
43
+ if (!fs.existsSync(file))
44
+ return null;
45
+ try {
46
+ const parsed = JSON.parse(fs.readFileSync(file, 'utf-8'));
47
+ if (!parsed || typeof parsed !== 'object')
48
+ return null;
49
+ if (typeof parsed.id !== 'string' || typeof parsed.iteration !== 'number')
50
+ return null;
51
+ return parsed;
52
+ }
53
+ catch {
54
+ return null;
55
+ }
56
+ }
@@ -341,6 +341,24 @@ export class RushCloudProvider {
341
341
  if (repos.length === 0) {
342
342
  throw new Error('Rush Cloud requires --repo <owner/repo> (or --repo repeated for multi-repo).');
343
343
  }
344
+ // Budget pre-flight gate (issue #346). Cloud dispatches inherit the local
345
+ // project's caps; we refuse to POST a run that would breach an on_exceed:block
346
+ // cap. The repo slug is the project attribution key. Server-side spend is
347
+ // authoritative for live enforcement; this pre-flight is the deterministic
348
+ // "don't even start it" guard. Dormant when no caps are configured.
349
+ {
350
+ const { runPreflightGate } = await import('../budget/preflight.js');
351
+ const projectKey = repos[0] ?? process.cwd();
352
+ const gate = runPreflightGate({
353
+ agent: options.agent ?? 'cloud',
354
+ model: options.model ?? `${options.agent ?? 'cloud'}-default`,
355
+ prompt: options.prompt,
356
+ project: projectKey,
357
+ });
358
+ if (!gate.dormant && !gate.decision.allow) {
359
+ throw new Error(`[budget] BLOCKED cloud dispatch (${projectKey}): ${gate.decision.reason}`);
360
+ }
361
+ }
344
362
  // Validate each repo's shape and resolve its installation_id up front.
345
363
  // Any bad entry fails the whole dispatch — we never want a half-started
346
364
  // multi-repo run that only found installations for some of the repos.
@@ -82,6 +82,23 @@ export interface ExecOptions {
82
82
  sessionId?: string;
83
83
  verbose?: boolean;
84
84
  env?: Record<string, string>;
85
+ /**
86
+ * Workflow capability scoping (Claude only). Sourced from WORKFLOW.md
87
+ * frontmatter `tools:` / `mcpServers:` and translated to Claude headless
88
+ * flags in buildExecCommand. Other agents ignore these.
89
+ *
90
+ * `toolsRestrict` is the AVAILABLE-tool allowlist: it maps to `--tools`, which
91
+ * restricts the built-in tool set the run can use at all (NOT `--allowedTools`,
92
+ * which only auto-approves without restricting availability). Declaring
93
+ * `[Read, Grep]` makes Write/Bash/Edit unavailable for the whole run.
94
+ */
95
+ toolsRestrict?: string[];
96
+ /**
97
+ * Path to an ephemeral mcp-config JSON. Emitted as `--mcp-config <path>`
98
+ * together with `--strict-mcp-config` so ONLY the named servers load (the
99
+ * flag alone merely ADDS to the existing server set).
100
+ */
101
+ mcpConfigPath?: string;
85
102
  }
86
103
  /**
87
104
  * Resolve interactive vs headless. Explicit flags are definitive and win over
@@ -90,6 +107,23 @@ export interface ExecOptions {
90
107
  * `--interactive` takes precedence over `--headless`; the CLI layer rejects passing both.
91
108
  */
92
109
  export declare function resolveInteractive(options: Pick<ExecOptions, 'interactive' | 'headless' | 'prompt'>): boolean;
110
+ /**
111
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
112
+ * live budget watcher can parse it (issue #346, FIX 3).
113
+ *
114
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
115
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
116
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
117
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
118
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
119
+ * so the user still sees output. Interactive REPLs are never tapped (the human
120
+ * owns the TTY; they rely on the pre-flight gate).
121
+ *
122
+ * @param interactive resolveInteractive() result for the run
123
+ * @param piped true when the parent's stdout is NOT a TTY (output piped)
124
+ * @param capsActive true when a budget watcher is attached (caps configured)
125
+ */
126
+ export declare function shouldTapStdout(interactive: boolean, piped: boolean, capsActive: boolean): boolean;
93
127
  /** Parse an array of KEY=VALUE strings into an env record. Returns undefined for empty input. */
94
128
  export declare function parseExecEnv(entries: string[]): Record<string, string> | undefined;
95
129
  /**
@@ -135,6 +169,8 @@ export declare function execAgent(options: ExecOptions): Promise<number>;
135
169
  * keeping version resolution in one place instead of reimplementing it in batch.
136
170
  */
137
171
  export declare function execShimPassthrough(agent: AgentId, rawArgs: string[], cwd: string, pinnedVersion?: string): Promise<number>;
172
+ /** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
173
+ export declare const BUDGET_KILL_EXIT_CODE = 7;
138
174
  /**
139
175
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
140
176
  * because providers phrase these differently -- Anthropic uses "5-hour limit"
package/dist/lib/exec.js CHANGED
@@ -114,6 +114,29 @@ export function resolveInteractive(options) {
114
114
  return false;
115
115
  return options.prompt === undefined;
116
116
  }
117
+ /**
118
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
119
+ * live budget watcher can parse it (issue #346, FIX 3).
120
+ *
121
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
122
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
123
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
124
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
125
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
126
+ * so the user still sees output. Interactive REPLs are never tapped (the human
127
+ * owns the TTY; they rely on the pre-flight gate).
128
+ *
129
+ * @param interactive resolveInteractive() result for the run
130
+ * @param piped true when the parent's stdout is NOT a TTY (output piped)
131
+ * @param capsActive true when a budget watcher is attached (caps configured)
132
+ */
133
+ export function shouldTapStdout(interactive, piped, capsActive) {
134
+ if (interactive)
135
+ return false;
136
+ // Always pipe when the caller pipes us downstream (preserve composability),
137
+ // OR when caps are active so the watcher can read the stream at a TTY.
138
+ return piped || capsActive;
139
+ }
117
140
  /** Pattern for valid environment variable names (C identifier rules). */
118
141
  const EXEC_ENV_KEY_PATTERN = /^[A-Za-z_][A-Za-z0-9_]*$/;
119
142
  /** Parse a single KEY=VALUE string into a tuple, validating the key name. */
@@ -540,6 +563,39 @@ export function buildExecCommand(options) {
540
563
  cmd.push('--add-dir', dir);
541
564
  }
542
565
  }
566
+ // Claude-specific: workflow capability scoping. WORKFLOW.md frontmatter
567
+ // `tools:` / `mcpServers:` is translated to the headless flags that ACTUALLY
568
+ // restrict the run (verified against `claude --help` on the installed CLI):
569
+ //
570
+ // tools: -> `--tools <names...>` — restricts the AVAILABLE built-in
571
+ // tool set. This is the security boundary: tools NOT named
572
+ // here (e.g. Write, Bash, Edit) are unavailable for the whole
573
+ // run. `--allowedTools` would only auto-approve without
574
+ // restricting, so it is the WRONG flag for sandboxing.
575
+ // We also emit `--allowedTools <names...>` for the same set so
576
+ // the permitted tools don't prompt in headless `-p` mode.
577
+ // mcpServers: -> `--mcp-config <path>` PLUS `--strict-mcp-config`. The
578
+ // config flag alone ADDS servers to the existing set; only
579
+ // `--strict-mcp-config` makes the run use *only* the named
580
+ // servers, which is what scoping means.
581
+ //
582
+ // The command layer gates this behind the `allowlist` capability and assembles
583
+ // the mcp-config file; buildExecCommand stays a pure string-builder.
584
+ //
585
+ // `<tools...>` is variadic. Emit the names as separate argv tokens. The flags
586
+ // here are appended AFTER the positional prompt (added above), so the variadic
587
+ // never swallows the prompt; the trailing `--allowedTools` / `--strict-mcp-config`
588
+ // tokens also terminate the `--tools` variadic cleanly.
589
+ if (options.agent === 'claude') {
590
+ if (options.toolsRestrict && options.toolsRestrict.length > 0) {
591
+ cmd.push('--tools', ...options.toolsRestrict);
592
+ cmd.push('--allowedTools', ...options.toolsRestrict);
593
+ }
594
+ if (options.mcpConfigPath) {
595
+ cmd.push('--mcp-config', options.mcpConfigPath);
596
+ cmd.push('--strict-mcp-config');
597
+ }
598
+ }
543
599
  return cmd;
544
600
  }
545
601
  /** Spawn an agent and return its exit code. Convenience wrapper over spawnAgent. */
@@ -599,6 +655,15 @@ async function spawnAgent(options) {
599
655
  const timeoutMs = options.timeout ? parseTimeout(options.timeout) : undefined;
600
656
  const piped = !process.stdout.isTTY;
601
657
  const interactive = resolveInteractive(options);
658
+ // Budget live kill-switch (issue #346). For headless runs we incrementally
659
+ // parse stream-json usage off stdout, accumulate cost, and kill the child the
660
+ // moment a configured cap is crossed — exactly like the --timeout path, but
661
+ // resolving with a DISTINCT exit code so CI/headless can tell budget-kill from
662
+ // timeout. Spend is recorded to the shared ledger in the close handler. The
663
+ // watcher is dormant (and zero-cost) when no caps are configured.
664
+ const cwd = options.cwd || process.cwd();
665
+ const runId = randomUUID();
666
+ const watcherState = await setupBudgetWatcher(options, cwd, runId);
602
667
  maybeRotate();
603
668
  const timer = createTimer('agent.run', {
604
669
  agent: options.agent,
@@ -617,9 +682,13 @@ async function spawnAgent(options) {
617
682
  // rendering, raw-mode keystrokes, colored output). Headless mode pipes
618
683
  // stderr so we can scan for rate limits and feed fallback. stdout stays
619
684
  // inherited for TTY, piped when the caller pipes us downstream.
685
+ // PIPE (and later tee) stdout whenever the live budget watcher must read it
686
+ // — for ALL non-interactive runs when caps are active, regardless of TTY.
687
+ // See shouldTapStdout() for the rationale (FIX 3, issue #346).
688
+ const tapStdout = shouldTapStdout(interactive, piped, watcherState !== null);
620
689
  const stdio = interactive
621
690
  ? ['inherit', 'inherit', 'inherit']
622
- : ['inherit', piped ? 'pipe' : 'inherit', 'pipe'];
691
+ : ['inherit', tapStdout ? 'pipe' : 'inherit', 'pipe'];
623
692
  // On Windows, .cmd batch wrappers (npm-installed CLIs) require shell:true
624
693
  // whether addressed by name or absolute path.
625
694
  const useShell = process.platform === 'win32' && (!path.isAbsolute(executable) || executable.endsWith('.cmd'));
@@ -631,8 +700,29 @@ async function spawnAgent(options) {
631
700
  });
632
701
  // Mark startup time (time from function call to process spawn)
633
702
  timer.mark('startup');
634
- if (!interactive && piped && child.stdout) {
703
+ let budgetKilled = false;
704
+ let budgetKillTimer;
705
+ if (!interactive && tapStdout && child.stdout) {
706
+ // TEE the child's stdout back to the parent's so the user still sees
707
+ // output (mirrors stdio:'inherit') while we tap the same stream for usage.
635
708
  child.stdout.pipe(process.stdout);
709
+ // Tap the same stream for budget usage events without consuming the pipe
710
+ // (a 'data' listener and .pipe() both receive every chunk). Kill on breach.
711
+ if (watcherState) {
712
+ let pendingLine = '';
713
+ child.stdout.on('data', (chunk) => {
714
+ const { events, rest } = watcherState.extract(chunk.toString('utf-8'), pendingLine);
715
+ pendingLine = rest;
716
+ for (const ev of events)
717
+ watcherState.watcher.feedUsage(ev);
718
+ if (watcherState.watcher.breached() && !budgetKilled) {
719
+ budgetKilled = true;
720
+ process.stderr.write(`[budget] hard cap exceeded — terminating ${options.agent} run\n`);
721
+ child.kill('SIGTERM');
722
+ budgetKillTimer = setTimeout(() => child.kill('SIGKILL'), 5000);
723
+ }
724
+ });
725
+ }
636
726
  }
637
727
  let stderrBuffer = '';
638
728
  const STDERR_BUFFER_CAP = 64 * 1024;
@@ -663,11 +753,94 @@ async function spawnAgent(options) {
663
753
  child.on('close', (code) => {
664
754
  if (timeoutTimer)
665
755
  clearTimeout(timeoutTimer);
666
- timer.end({ exitCode: code ?? 0, status: code === 0 ? 'success' : 'failed' });
667
- resolve({ exitCode: code ?? 0, stderr: stderrBuffer });
756
+ // Clear the budget-kill SIGKILL escalation timer (mirror the --timeout
757
+ // timer cleanup) so a programmatic caller reusing execAgent (the #332 loop
758
+ // driver) never sees a stray 5s kill event fire after the child has exited.
759
+ if (budgetKillTimer)
760
+ clearTimeout(budgetKillTimer);
761
+ // Record final spend to the shared ledger (issue #346). Best-effort: a
762
+ // ledger write must never mask the run's own outcome.
763
+ if (watcherState) {
764
+ try {
765
+ watcherState.finalize();
766
+ }
767
+ catch { /* ledger write is non-critical */ }
768
+ // Release the watcher's references / stop accepting events (symmetry).
769
+ try {
770
+ watcherState.watcher.dispose();
771
+ }
772
+ catch { /* dispose is best-effort */ }
773
+ }
774
+ // Budget kill resolves with a DISTINCT non-zero exit so CI/headless and
775
+ // teams/cloud can tell a budget termination apart from a normal failure.
776
+ const exitCode = budgetKilled ? BUDGET_KILL_EXIT_CODE : (code ?? 0);
777
+ timer.end({ exitCode, status: budgetKilled ? 'budget_killed' : code === 0 ? 'success' : 'failed' });
778
+ resolve({ exitCode, stderr: stderrBuffer });
668
779
  });
669
780
  });
670
781
  }
782
+ /** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
783
+ export const BUDGET_KILL_EXIT_CODE = 7;
784
+ /**
785
+ * Resolve the budget watcher for a run. Returns null (watcher dormant) when no
786
+ * caps are configured, so non-budget users pay nothing. When caps exist, builds
787
+ * a live watcher seeded with the day/project spend already on the ledger, plus
788
+ * a finalize() that appends this run's accumulated spend.
789
+ */
790
+ async function setupBudgetWatcher(options, cwd, runId) {
791
+ const interactive = resolveInteractive(options);
792
+ if (interactive)
793
+ return null;
794
+ const [{ resolveBudgetConfig, hasAnyCap }, { makeLiveSpendWatcher, capsFromConfig, extractUsageEvents }, ledger] = await Promise.all([
795
+ import('./budget/config.js'),
796
+ import('./budget/enforce.js'),
797
+ import('./budget/ledger.js'),
798
+ ]);
799
+ const cfg = resolveBudgetConfig(cwd);
800
+ if (!hasAnyCap(cfg))
801
+ return null;
802
+ const today = ledger.localDay();
803
+ const entries = ledger.loadLedger();
804
+ const caps = capsFromConfig(cfg, {
805
+ daySpend: ledger.spendForDay(today, entries),
806
+ projectSpend: ledger.spendForProject(cwd, entries),
807
+ agentDaySpend: { [options.agent]: ledger.spendForAgentDay(options.agent, today, entries) },
808
+ });
809
+ const watcher = makeLiveSpendWatcher({ caps, onBreach: () => { } });
810
+ // Accumulate per-(model) usage for a clean final ledger record.
811
+ const seen = [];
812
+ const model = options.model ?? `${options.agent}-default`;
813
+ return {
814
+ watcher,
815
+ extract: (chunk, pending) => {
816
+ const res = extractUsageEvents(chunk, pending, model, options.agent);
817
+ for (const ev of res.events) {
818
+ seen.push({
819
+ model: ev.model ?? model,
820
+ usage: {
821
+ inputTokens: ev.inputTokens,
822
+ outputTokens: ev.outputTokens,
823
+ cacheReadTokens: ev.cacheReadTokens,
824
+ cacheCreationTokens: ev.cacheCreationTokens,
825
+ },
826
+ });
827
+ }
828
+ return res;
829
+ },
830
+ finalize: () => {
831
+ for (const s of seen) {
832
+ ledger.recordSpend({
833
+ runId,
834
+ agent: options.agent,
835
+ project: cwd,
836
+ model: s.model,
837
+ usage: s.usage,
838
+ source: 'run',
839
+ });
840
+ }
841
+ },
842
+ };
843
+ }
671
844
  /**
672
845
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
673
846
  * because providers phrase these differently -- Anthropic uses "5-hour limit"
@@ -733,6 +906,21 @@ export async function runWithFallback(options) {
733
906
  ];
734
907
  let prevAgent;
735
908
  let prevSessionId;
909
+ // Workflow capability scoping only takes effect on claude (buildExecCommand
910
+ // guards `--tools` / `--mcp-config` / `--strict-mcp-config` on agent==='claude').
911
+ // A fallback to any non-claude agent would run with NONE of that scoping — the
912
+ // declared sandbox silently evaporates. Warn loudly so a rate-limit handoff to
913
+ // an unscoped agent is never silent (issue #324 fail-open).
914
+ const scopingActive = (options.toolsRestrict && options.toolsRestrict.length > 0)
915
+ || !!options.mcpConfigPath;
916
+ if (scopingActive) {
917
+ const unscoped = options.fallback.filter(f => f.agent !== 'claude').map(f => f.agent);
918
+ if (unscoped.length > 0) {
919
+ process.stderr.write(`[agents] WARNING: workflow tool/MCP scoping is enforced on claude only. ` +
920
+ `Fallback agent(s) ${[...new Set(unscoped)].join(', ')} would run UNSCOPED ` +
921
+ `(no --tools / --strict-mcp-config restriction) if claude hits a rate limit.\n`);
922
+ }
923
+ }
736
924
  for (let i = 0; i < chain.length; i++) {
737
925
  const { agent, version } = chain[i];
738
926
  const pinnedSessionId = agent === 'claude' ? randomUUID() : undefined;
package/dist/lib/git.d.ts CHANGED
@@ -1,3 +1,21 @@
1
+ /**
2
+ * Validate that a clone/pull source uses a safe git transport before it is
3
+ * handed to `git`.
4
+ *
5
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
6
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
7
+ * beginning with `-` is parsed by `git` as a command-line flag (option
8
+ * injection). We therefore allow only:
9
+ * - `https://` (encrypted + authenticated)
10
+ * - `ssh://` and SCP-style `git@host:path` / `host:path`
11
+ * - local filesystem paths (callers handle these before reaching `git clone`)
12
+ *
13
+ * Pure string inspection — no filesystem or platform calls — so it behaves
14
+ * identically on Linux, macOS, and Windows.
15
+ *
16
+ * @throws Error if the source uses a disallowed transport.
17
+ */
18
+ export declare function assertSafeGitTransport(source: string): void;
1
19
  /** Parsed representation of a git source string (GitHub, generic URL, or local path). */
2
20
  export interface GitSource {
3
21
  type: 'github' | 'url' | 'local';