@phnx-labs/agents-cli 1.20.16 → 1.20.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +1 -1
  3. package/dist/commands/budget.d.ts +14 -0
  4. package/dist/commands/budget.js +137 -0
  5. package/dist/commands/cost.d.ts +12 -0
  6. package/dist/commands/cost.js +139 -0
  7. package/dist/commands/exec.d.ts +20 -0
  8. package/dist/commands/exec.js +382 -5
  9. package/dist/commands/secrets.d.ts +15 -0
  10. package/dist/commands/secrets.js +250 -4
  11. package/dist/commands/sessions.js +4 -0
  12. package/dist/commands/sync.d.ts +10 -3
  13. package/dist/commands/sync.js +72 -9
  14. package/dist/index.js +4 -0
  15. package/dist/lib/budget/config.d.ts +9 -0
  16. package/dist/lib/budget/config.js +115 -0
  17. package/dist/lib/budget/enforce.d.ts +94 -0
  18. package/dist/lib/budget/enforce.js +151 -0
  19. package/dist/lib/budget/ledger.d.ts +61 -0
  20. package/dist/lib/budget/ledger.js +107 -0
  21. package/dist/lib/budget/preflight.d.ts +110 -0
  22. package/dist/lib/budget/preflight.js +200 -0
  23. package/dist/lib/checkpoint.d.ts +54 -0
  24. package/dist/lib/checkpoint.js +56 -0
  25. package/dist/lib/cloud/rush.js +18 -0
  26. package/dist/lib/exec.d.ts +36 -0
  27. package/dist/lib/exec.js +192 -4
  28. package/dist/lib/git.d.ts +18 -0
  29. package/dist/lib/git.js +67 -4
  30. package/dist/lib/hooks.js +12 -0
  31. package/dist/lib/loop.d.ts +145 -0
  32. package/dist/lib/loop.js +330 -0
  33. package/dist/lib/mcp.d.ts +7 -0
  34. package/dist/lib/mcp.js +24 -0
  35. package/dist/lib/models.d.ts +11 -0
  36. package/dist/lib/models.js +21 -0
  37. package/dist/lib/plugin-marketplace.js +16 -6
  38. package/dist/lib/plugins.js +5 -2
  39. package/dist/lib/pricing/cost.d.ts +46 -0
  40. package/dist/lib/pricing/cost.js +71 -0
  41. package/dist/lib/pricing/index.d.ts +8 -0
  42. package/dist/lib/pricing/index.js +8 -0
  43. package/dist/lib/pricing/prices.json +138 -0
  44. package/dist/lib/pricing/table.d.ts +17 -0
  45. package/dist/lib/pricing/table.js +73 -0
  46. package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
  47. package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
  48. package/dist/lib/secrets/agent.d.ts +134 -0
  49. package/dist/lib/secrets/agent.js +501 -0
  50. package/dist/lib/secrets/bundles.d.ts +21 -0
  51. package/dist/lib/secrets/bundles.js +43 -0
  52. package/dist/lib/secrets/drivers/rush.d.ts +14 -0
  53. package/dist/lib/secrets/drivers/rush.js +84 -0
  54. package/dist/lib/secrets/linux.js +88 -10
  55. package/dist/lib/secrets/sync-backend.d.ts +48 -0
  56. package/dist/lib/secrets/sync-backend.js +13 -0
  57. package/dist/lib/secrets/sync.d.ts +15 -23
  58. package/dist/lib/secrets/sync.js +31 -66
  59. package/dist/lib/session/db.d.ts +40 -0
  60. package/dist/lib/session/db.js +84 -2
  61. package/dist/lib/session/discover.d.ts +2 -0
  62. package/dist/lib/session/discover.js +126 -2
  63. package/dist/lib/session/render.d.ts +2 -0
  64. package/dist/lib/session/render.js +1 -1
  65. package/dist/lib/session/types.d.ts +4 -0
  66. package/dist/lib/sync-umbrella.d.ts +76 -0
  67. package/dist/lib/sync-umbrella.js +125 -0
  68. package/dist/lib/teams/agents.d.ts +32 -0
  69. package/dist/lib/teams/agents.js +66 -3
  70. package/dist/lib/teams/api.js +20 -0
  71. package/dist/lib/teams/parsers.js +16 -4
  72. package/dist/lib/types.d.ts +48 -0
  73. package/dist/lib/workflows.d.ts +56 -0
  74. package/dist/lib/workflows.js +72 -5
  75. package/package.json +2 -1
package/dist/lib/git.js CHANGED
@@ -11,17 +11,76 @@ import * as path from 'path';
11
11
  import { IS_WINDOWS, isWindowsAbsolutePath } from './platform/index.js';
12
12
  import { getPackageLocalPath } from './state.js';
13
13
  import { DEFAULT_SYSTEM_REPO, systemRepoSlug } from './types.js';
14
+ /**
15
+ * Validate that a clone/pull source uses a safe git transport before it is
16
+ * handed to `git`.
17
+ *
18
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
19
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
20
+ * beginning with `-` is parsed by `git` as a command-line flag (option
21
+ * injection). We therefore allow only:
22
+ * - `https://` (encrypted + authenticated)
23
+ * - `ssh://` and SCP-style `git@host:path` / `host:path`
24
+ * - local filesystem paths (callers handle these before reaching `git clone`)
25
+ *
26
+ * Pure string inspection — no filesystem or platform calls — so it behaves
27
+ * identically on Linux, macOS, and Windows.
28
+ *
29
+ * @throws Error if the source uses a disallowed transport.
30
+ */
31
+ export function assertSafeGitTransport(source) {
32
+ const s = source.trim();
33
+ // A leading dash is interpreted by git as an option, not a source.
34
+ if (s.startsWith('-')) {
35
+ throw new Error(`Refusing to use git source "${source}": a source starting with "-" is interpreted as a git option.`);
36
+ }
37
+ // Remote-helper transports look like "<name>::…" (ext::, fd::, …). SCP-style
38
+ // "git@host:path" uses a single ":" and is intentionally not matched here.
39
+ const helper = s.match(/^[a-zA-Z][a-zA-Z0-9+.-]*::/);
40
+ if (helper) {
41
+ throw new Error(`Refusing to use git source "${source}": git remote-helper transports (ext::, fd::, …) are not allowed.`);
42
+ }
43
+ // Explicit "<scheme>://" URLs: permit only https and ssh.
44
+ const scheme = s.match(/^([a-zA-Z][a-zA-Z0-9+.-]*):\/\//);
45
+ if (scheme) {
46
+ const name = scheme[1].toLowerCase();
47
+ if (name !== 'https' && name !== 'ssh') {
48
+ throw new Error(`Refusing to use git source "${source}": "${name}://" is not an allowed transport (use https:// or ssh://).`);
49
+ }
50
+ }
51
+ // No scheme -> SCP-style SSH ("git@host:path") or a local path; both safe.
52
+ }
53
+ /**
54
+ * Whether installing a cloned/pulled repo's `.githooks/` is enabled.
55
+ *
56
+ * Installing hooks wires those scripts into `.git/hooks/`, so `git` EXECUTES
57
+ * them on the next commit/checkout/merge. A repo added via `agents repo add
58
+ * <source>` is untrusted, so auto-installing its hooks is remote code
59
+ * execution. We require explicit opt-in via `AGENTS_ENABLE_GITHOOKS=1`.
60
+ */
61
+ function githooksEnabled() {
62
+ const v = process.env.AGENTS_ENABLE_GITHOOKS;
63
+ return v === '1' || v === 'true';
64
+ }
14
65
  /**
15
66
  * Install hooks from `.githooks/` by symlinking each entry into `.git/hooks/`.
16
67
  *
17
- * Why: `git config core.hooksPath` is a known sandbox-escape vector and is
18
- * blocked by some sandboxed environments (e.g. Claude Code). Symlinks inside
19
- * `.git/hooks/` sidestep that restriction entirely -- Git runs them the same way.
68
+ * Gated behind `AGENTS_ENABLE_GITHOOKS=1` (see {@link githooksEnabled}) because
69
+ * the hooks run code on git operations and the source repo may be untrusted.
70
+ *
71
+ * Why symlinks rather than `git config core.hooksPath`: `core.hooksPath` is a
72
+ * known sandbox-escape vector and is blocked by some sandboxed environments
73
+ * (e.g. Claude Code). Symlinks inside `.git/hooks/` run the same way.
20
74
  */
21
75
  function installGithooksSymlinks(repoDir) {
22
76
  const githooksDir = path.join(repoDir, '.githooks');
23
77
  if (!fs.existsSync(githooksDir))
24
78
  return;
79
+ if (!githooksEnabled()) {
80
+ console.error(`Skipped installing git hooks from ${githooksDir} (they run code on git operations).\n` +
81
+ ` Set AGENTS_ENABLE_GITHOOKS=1 to enable hooks for repos you trust.`);
82
+ return;
83
+ }
25
84
  const hooksDir = path.join(repoDir, '.git', 'hooks');
26
85
  fs.mkdirSync(hooksDir, { recursive: true });
27
86
  for (const name of fs.readdirSync(githooksDir)) {
@@ -121,7 +180,9 @@ export function parseSource(source) {
121
180
  ref: ref || 'main',
122
181
  };
123
182
  }
124
- // Generic URL
183
+ // Generic URL -- must be an encrypted, authenticated transport
184
+ // (rejects http://, file://, git://, ext::, and leading "-").
185
+ assertSafeGitTransport(cleanSource);
125
186
  return {
126
187
  type: 'url',
127
188
  url: cleanSource.endsWith('.git') ? cleanSource : `${cleanSource}.git`,
@@ -183,6 +244,7 @@ export async function cloneOrPull(source, targetDir) {
183
244
  const log = await repoGit.log({ maxCount: 1 });
184
245
  return { isNew: false, commit: log.latest?.hash.slice(0, 8) || 'unknown' };
185
246
  }
247
+ assertSafeGitTransport(source.url);
186
248
  fs.mkdirSync(targetDir, { recursive: true });
187
249
  await git.clone(source.url, targetDir);
188
250
  const repoGit = simpleGit(targetDir);
@@ -364,6 +426,7 @@ export async function cloneIntoExisting(source, targetDir) {
364
426
  const git = simpleGit();
365
427
  const tempDir = path.join(targetDir, '.git-clone-temp');
366
428
  try {
429
+ assertSafeGitTransport(parsed.url);
367
430
  // Clone to temp directory
368
431
  fs.mkdirSync(tempDir, { recursive: true });
369
432
  await git.clone(parsed.url, tempDir);
package/dist/lib/hooks.js CHANGED
@@ -90,6 +90,18 @@ function isManagedHookCommand(command, prefixes) {
90
90
  for (const prefix of prefixes) {
91
91
  if (resolved.startsWith(prefix))
92
92
  return true;
93
+ // The command dir above is realpath-resolved, but a raw prefix may still
94
+ // point through a symlink (macOS TMPDIR /var -> /private/var, or a
95
+ // symlinked ~/.agents). Compare against a realpath-normalized prefix too
96
+ // so the two sides match. Strip the trailing sep, resolve the dir, re-add.
97
+ const rawPrefixDir = prefix.endsWith(path.sep) ? prefix.slice(0, -path.sep.length) : prefix;
98
+ let resolvedPrefix = prefix;
99
+ try {
100
+ resolvedPrefix = fs.realpathSync(rawPrefixDir) + path.sep;
101
+ }
102
+ catch { /* absent or broken link */ }
103
+ if (resolvedPrefix !== prefix && resolved.startsWith(resolvedPrefix))
104
+ return true;
93
105
  }
94
106
  return false;
95
107
  }
@@ -0,0 +1,145 @@
1
+ /**
2
+ * Autonomous loop driver (issue #332).
3
+ *
4
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
5
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
6
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
7
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
8
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
9
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
10
+ *
11
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
12
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
13
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
14
+ * union for the exit reason.
15
+ *
16
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
17
+ * turn from the usage events parsed off the agent's stream-json output. Token
18
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
19
+ * import) rather than re-implementing the per-provider parsing.
20
+ */
21
+ import type { AgentId } from './types.js';
22
+ import type { ExecOptions } from './exec.js';
23
+ import { type Checkpoint } from './checkpoint.js';
24
+ /** Loop block config (docs/07-entrypoints-and-loops.md → "The loop block"). */
25
+ export interface LoopConfig {
26
+ /** Stop condition. `signal` reads loop-signal.json; absence is fail-closed. */
27
+ until?: 'signal';
28
+ /** Hard cap on iterations. */
29
+ maxIterations?: number;
30
+ /** Token hard-cap, enforced outside the agent. */
31
+ budget?: number;
32
+ /** Delay between iterations: "0" back-to-back, "30m" paces. */
33
+ interval?: string;
34
+ }
35
+ /** The loop-signal.json contract the entrypoint writes each iteration. */
36
+ export interface LoopSignal {
37
+ continue: boolean;
38
+ reason?: string;
39
+ }
40
+ /** Why the loop stopped. Mirrors the teams supervisor exit reasons. */
41
+ export type LoopStoppedBy = 'condition-met' | 'budget' | 'stalled' | 'max' | 'signal' | 'error';
42
+ /** Result of a loop run. */
43
+ export interface LoopResult {
44
+ /** Iterations actually executed. */
45
+ iterations: number;
46
+ stoppedBy: LoopStoppedBy;
47
+ elapsedMs: number;
48
+ /** Cumulative tokens consumed across all iterations. */
49
+ tokens: number;
50
+ /** Last loop-signal read, if any. */
51
+ lastSignal?: LoopSignal;
52
+ }
53
+ /** What a single iteration's run function returns. */
54
+ export interface IterationResult {
55
+ exitCode: number;
56
+ /** Tokens consumed this iteration (input + output + cache). */
57
+ tokens: number;
58
+ }
59
+ /** Per-iteration run function — the injectable seam that makes the driver testable. */
60
+ export type RunIteration = (options: ExecOptions) => Promise<IterationResult>;
61
+ /** Context the driver needs that isn't part of ExecOptions. */
62
+ export interface LoopContext {
63
+ runId: string;
64
+ runDir: string;
65
+ agent: AgentId;
66
+ version?: string;
67
+ /** Iteration to start at (1 for a fresh run, checkpoint.iteration+1 for a resume). */
68
+ startIteration?: number;
69
+ /** Tokens already consumed before this driver started (carried across a resume). */
70
+ startTokens?: number;
71
+ /**
72
+ * On a resume, the killed run's LAST iteration session id. The first resumed
73
+ * iteration `/continue`s from it to thread conversation memory forward.
74
+ * Undefined on a fresh run (iteration 1 mints its own id, no prior to continue).
75
+ */
76
+ sessionId?: string;
77
+ }
78
+ /** Dependency seams for testing. */
79
+ export interface LoopDeps {
80
+ /** Per-iteration runner. Defaults to a token-capturing spawn (defaultRunIteration). */
81
+ runIteration?: RunIteration;
82
+ /** Sleep function (ms). Defaults to setTimeout-backed. Injectable so tests don't wait. */
83
+ sleep?: (ms: number) => Promise<void>;
84
+ /** Checkpoint writer. Defaults to writeCheckpoint. */
85
+ writeCheckpoint?: (c: Checkpoint) => void;
86
+ }
87
+ /** Path to a run's loop-signal.json. */
88
+ export declare function loopSignalPath(runDir: string): string;
89
+ /**
90
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
91
+ * iteration's conversation instead of starting fresh.
92
+ *
93
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
94
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
95
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
96
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
97
+ * the provider's native session being "active"; it reads the transcript off
98
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
99
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
100
+ * already in use") while still threading the conversation forward via the
101
+ * prior id.
102
+ *
103
+ * The original entrypoint is re-appended after the continue directive so the
104
+ * agent both recalls the prior turn AND knows what to do this iteration.
105
+ */
106
+ export declare function buildLoopContinuePrompt(prevSessionId: string, entrypoint: string): string;
107
+ /**
108
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
109
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
110
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
111
+ * configuration error and must NOT silently coalesce to 0 (which would run the
112
+ * loop full-speed on a typo). Throws on bad input; validate at config build
113
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
114
+ */
115
+ export declare function parseLoopInterval(interval: string | undefined): number;
116
+ /**
117
+ * Read and parse loop-signal.json. Returns null when the file is absent or
118
+ * unparseable — the caller treats null as fail-closed (continue:false).
119
+ */
120
+ export declare function readLoopSignal(runDir: string): LoopSignal | null;
121
+ /** Delete loop-signal.json so a stale signal never carries into the next iteration. */
122
+ export declare function clearLoopSignal(runDir: string): void;
123
+ /**
124
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
125
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
126
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
127
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
128
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
129
+ * is forced to JSON/headless so the usage stream is parseable.
130
+ */
131
+ export declare function defaultRunIteration(options: ExecOptions): Promise<IterationResult>;
132
+ /**
133
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
134
+ * met, the iteration cap is reached, or a signal arrives.
135
+ *
136
+ * stoppedBy semantics:
137
+ * - `condition-met` — until=signal and the signal said stop (continue:false
138
+ * OR the file was absent/corrupt → fail-closed).
139
+ * - `budget` — cumulative tokens crossed the budget cap (checked after
140
+ * each turn, outside the agent).
141
+ * - `max` — ran maxIterations iterations without any earlier stop.
142
+ * - `signal` — SIGINT/SIGTERM arrived; checkpoint is written before exit.
143
+ * - `error` — an iteration threw or exited non-zero.
144
+ */
145
+ export declare function runLoop(execOptions: ExecOptions, loop: LoopConfig, ctx: LoopContext, deps?: LoopDeps): Promise<LoopResult>;
@@ -0,0 +1,330 @@
1
+ /**
2
+ * Autonomous loop driver (issue #332).
3
+ *
4
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
5
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
6
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
7
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
8
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
9
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
10
+ *
11
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
12
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
13
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
14
+ * union for the exit reason.
15
+ *
16
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
17
+ * turn from the usage events parsed off the agent's stream-json output. Token
18
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
19
+ * import) rather than re-implementing the per-provider parsing.
20
+ */
21
+ import { spawn } from 'child_process';
22
+ import { randomUUID } from 'crypto';
23
+ import * as fs from 'fs';
24
+ import * as path from 'path';
25
+ import { buildExecCommand, buildExecEnv } from './exec.js';
26
+ import { extractUsageEvents } from './budget/enforce.js';
27
+ import { parseTimeout } from './routines.js';
28
+ import { writeCheckpoint } from './checkpoint.js';
29
+ const defaultSleep = (ms) => new Promise((r) => setTimeout(r, ms));
30
+ /** Path to a run's loop-signal.json. */
31
+ export function loopSignalPath(runDir) {
32
+ return path.join(runDir, 'loop-signal.json');
33
+ }
34
+ /**
35
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
36
+ * iteration's conversation instead of starting fresh.
37
+ *
38
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
39
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
40
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
41
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
42
+ * the provider's native session being "active"; it reads the transcript off
43
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
44
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
45
+ * already in use") while still threading the conversation forward via the
46
+ * prior id.
47
+ *
48
+ * The original entrypoint is re-appended after the continue directive so the
49
+ * agent both recalls the prior turn AND knows what to do this iteration.
50
+ */
51
+ export function buildLoopContinuePrompt(prevSessionId, entrypoint) {
52
+ return `/continue ${prevSessionId}\n\n${entrypoint}`;
53
+ }
54
+ /**
55
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
56
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
57
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
58
+ * configuration error and must NOT silently coalesce to 0 (which would run the
59
+ * loop full-speed on a typo). Throws on bad input; validate at config build
60
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
61
+ */
62
+ export function parseLoopInterval(interval) {
63
+ if (interval === undefined)
64
+ return 0;
65
+ if (interval.trim() === '0')
66
+ return 0;
67
+ const ms = parseTimeout(interval);
68
+ if (ms === null) {
69
+ throw new Error(`Invalid loop interval '${interval}'. Use "0" for back-to-back or a duration like "30m", "1h", "2h30m" (units: w/d/h/m).`);
70
+ }
71
+ return ms;
72
+ }
73
+ /**
74
+ * Read and parse loop-signal.json. Returns null when the file is absent or
75
+ * unparseable — the caller treats null as fail-closed (continue:false).
76
+ */
77
+ export function readLoopSignal(runDir) {
78
+ const file = loopSignalPath(runDir);
79
+ if (!fs.existsSync(file))
80
+ return null;
81
+ try {
82
+ const parsed = JSON.parse(fs.readFileSync(file, 'utf-8'));
83
+ if (!parsed || typeof parsed !== 'object')
84
+ return null;
85
+ return { continue: parsed.continue === true, reason: typeof parsed.reason === 'string' ? parsed.reason : undefined };
86
+ }
87
+ catch {
88
+ return null;
89
+ }
90
+ }
91
+ /** Delete loop-signal.json so a stale signal never carries into the next iteration. */
92
+ export function clearLoopSignal(runDir) {
93
+ const file = loopSignalPath(runDir);
94
+ try {
95
+ if (fs.existsSync(file))
96
+ fs.unlinkSync(file);
97
+ }
98
+ catch {
99
+ /* best-effort: a missing file is the desired state anyway. */
100
+ }
101
+ }
102
+ /**
103
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
104
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
105
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
106
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
107
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
108
+ * is forced to JSON/headless so the usage stream is parseable.
109
+ */
110
+ export function defaultRunIteration(options) {
111
+ // Force the stream-json output the usage parser needs; a loop iteration is
112
+ // always headless (re-injected programmatically, never an interactive TUI).
113
+ const execOptions = { ...options, json: true, headless: true, interactive: false };
114
+ const cmd = buildExecCommand(execOptions);
115
+ const [executable, ...args] = cmd;
116
+ const env = buildExecEnv(execOptions);
117
+ const cwd = execOptions.cwd || process.cwd();
118
+ const model = execOptions.model ?? `${execOptions.agent}-default`;
119
+ return new Promise((resolve, reject) => {
120
+ const useShell = process.platform === 'win32' && (!path.isAbsolute(executable) || executable.endsWith('.cmd'));
121
+ const child = spawn(executable, args, {
122
+ cwd,
123
+ stdio: ['inherit', 'pipe', 'pipe'],
124
+ env,
125
+ shell: useShell,
126
+ });
127
+ let tokens = 0;
128
+ let pending = '';
129
+ if (child.stdout) {
130
+ child.stdout.pipe(process.stdout);
131
+ child.stdout.on('data', (chunk) => {
132
+ const { events, rest } = extractUsageEvents(chunk.toString('utf-8'), pending, model, execOptions.agent);
133
+ pending = rest;
134
+ for (const ev of events) {
135
+ tokens += (ev.inputTokens ?? 0) + (ev.outputTokens ?? 0)
136
+ + (ev.cacheReadTokens ?? 0) + (ev.cacheCreationTokens ?? 0);
137
+ }
138
+ });
139
+ }
140
+ if (child.stderr)
141
+ child.stderr.pipe(process.stderr);
142
+ child.on('error', (err) => reject(err));
143
+ child.on('close', (code, signal) => {
144
+ resolve({ exitCode: code ?? (signal ? 1 : 0), tokens });
145
+ });
146
+ });
147
+ }
148
+ /**
149
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
150
+ * met, the iteration cap is reached, or a signal arrives.
151
+ *
152
+ * stoppedBy semantics:
153
+ * - `condition-met` — until=signal and the signal said stop (continue:false
154
+ * OR the file was absent/corrupt → fail-closed).
155
+ * - `budget` — cumulative tokens crossed the budget cap (checked after
156
+ * each turn, outside the agent).
157
+ * - `max` — ran maxIterations iterations without any earlier stop.
158
+ * - `signal` — SIGINT/SIGTERM arrived; checkpoint is written before exit.
159
+ * - `error` — an iteration threw or exited non-zero.
160
+ */
161
+ export async function runLoop(execOptions, loop, ctx, deps) {
162
+ const runIteration = deps?.runIteration ?? defaultRunIteration;
163
+ const sleep = deps?.sleep ?? defaultSleep;
164
+ const persist = deps?.writeCheckpoint ?? writeCheckpoint;
165
+ const startedAt = Date.now();
166
+ const maxIterations = loop.maxIterations ?? 1000;
167
+ const intervalMs = parseLoopInterval(loop.interval);
168
+ // Per-iteration session pinning (issue #332). `--session-id` CREATES a
169
+ // session, so each iteration must pin a DISTINCT id — re-passing one errors
170
+ // "Session ID already in use". Iteration 1 pins `firstSessionId`; iteration
171
+ // >= 2 mints a fresh id AND injects `/continue <prior id>` so the agent
172
+ // threads the prior conversation forward (see buildLoopContinuePrompt).
173
+ //
174
+ // `prevSessionId` is the id whose transcript the NEXT iteration continues
175
+ // from. On a resume it is ctx.sessionId (the killed run's last session);
176
+ // on a fresh run it starts undefined and is set after iteration 1.
177
+ const firstSessionId = randomUUID();
178
+ let prevSessionId = ctx.sessionId;
179
+ // The session id recorded in the checkpoint is the most recent iteration's id
180
+ // (what a resume must continue from). Seeded to the resume id or iter-1 id.
181
+ let lastIterationSessionId = ctx.sessionId ?? firstSessionId;
182
+ const startIteration = ctx.startIteration ?? 1;
183
+ // The loop re-injects the entrypoint every iteration, so a prompt is required.
184
+ // The command layer enforces this before dispatch; assert it here so the
185
+ // continuity prompt-builder has a defined entrypoint to thread.
186
+ if (execOptions.prompt === undefined) {
187
+ throw new Error('runLoop requires execOptions.prompt — the loop re-injects the entrypoint each iteration.');
188
+ }
189
+ const entrypointPrompt = execOptions.prompt;
190
+ // `/continue` continuity only applies to claude (the skill + native resume
191
+ // surface). Other agents run each iteration as an independent fresh
192
+ // conversation — warn so the lost continuity is never silent.
193
+ const continuitySupported = ctx.agent === 'claude';
194
+ if (!continuitySupported && maxIterations !== 1) {
195
+ process.stderr.write(`[loop] WARNING: cross-iteration conversation continuity applies to claude only. ` +
196
+ `Each ${ctx.agent} iteration runs as an independent fresh conversation (no /continue handoff).\n`);
197
+ }
198
+ let tokens = ctx.startTokens ?? 0;
199
+ let lastSignal;
200
+ let stopSignal = false;
201
+ const onSig = () => { stopSignal = true; };
202
+ process.once('SIGINT', onSig);
203
+ process.once('SIGTERM', onSig);
204
+ const checkpoint = (iteration) => {
205
+ const now = new Date().toISOString();
206
+ persist({
207
+ id: ctx.runId,
208
+ agent: ctx.agent,
209
+ version: ctx.version,
210
+ prompt: entrypointPrompt,
211
+ // Resume must continue from the LAST iteration's conversation, so the
212
+ // checkpoint records that iteration's session id (the one a future
213
+ // `/continue` should thread from), not a single pinned id.
214
+ sessionId: lastIterationSessionId,
215
+ iteration,
216
+ loop,
217
+ loopSignal: lastSignal,
218
+ cumulativeTokens: tokens,
219
+ createdAt: now,
220
+ updatedAt: now,
221
+ });
222
+ };
223
+ const done = (iterations, stoppedBy) => ({
224
+ iterations,
225
+ stoppedBy,
226
+ elapsedMs: Date.now() - startedAt,
227
+ tokens,
228
+ lastSignal,
229
+ });
230
+ try {
231
+ let iteration = startIteration;
232
+ for (; iteration <= maxIterations; iteration++) {
233
+ if (stopSignal) {
234
+ checkpoint(iteration - 1);
235
+ return done(iteration - startIteration, 'signal');
236
+ }
237
+ // Pin a DISTINCT session id every iteration (`--session-id` CREATES a
238
+ // session; re-passing one errors "Session ID already in use"). The first
239
+ // executed iteration of a fresh run reuses firstSessionId; every later
240
+ // iteration mints a new id.
241
+ const iterationSessionId = prevSessionId === undefined ? firstSessionId : randomUUID();
242
+ // Continuity: when a prior iteration exists (prevSessionId set) and the
243
+ // agent supports it, thread the conversation forward via the established
244
+ // `/continue <prior id>` prompt-injection. Otherwise re-inject the bare
245
+ // entrypoint. prevSessionId is set after iteration 1 of a fresh run, or
246
+ // carried in from ctx.sessionId on a resume.
247
+ const iterationPrompt = prevSessionId !== undefined && continuitySupported
248
+ ? buildLoopContinuePrompt(prevSessionId, entrypointPrompt)
249
+ : entrypointPrompt;
250
+ // AGENTS_LOOP_SIGNAL / AGENTS_RUN_DIR: tell the entrypoint where to write
251
+ // loop-signal.json so the guard (read OUTSIDE the agent) can see it. The
252
+ // agent never decides whether to continue — it only writes its vote.
253
+ const iterOptions = {
254
+ ...execOptions,
255
+ prompt: iterationPrompt,
256
+ sessionId: iterationSessionId,
257
+ env: {
258
+ ...execOptions.env,
259
+ AGENTS_RUN_DIR: ctx.runDir,
260
+ AGENTS_LOOP_SIGNAL: loopSignalPath(ctx.runDir),
261
+ AGENTS_LOOP_ITERATION: String(iteration),
262
+ },
263
+ };
264
+ let result;
265
+ try {
266
+ result = await runIteration(iterOptions);
267
+ }
268
+ catch (err) {
269
+ // A SIGINT/SIGTERM mid-iteration kills the child; the resulting throw
270
+ // is a signal stop, not an error. Check the stop flag first.
271
+ if (stopSignal) {
272
+ checkpoint(iteration - 1);
273
+ return done(iteration - startIteration, 'signal');
274
+ }
275
+ checkpoint(iteration - 1);
276
+ process.stderr.write(`[loop] iteration ${iteration} failed: ${err.message}\n`);
277
+ return done(iteration - startIteration, 'error');
278
+ }
279
+ // This iteration's conversation is now on disk under iterationSessionId.
280
+ // The next iteration continues from it; a checkpoint records it for resume.
281
+ prevSessionId = iterationSessionId;
282
+ lastIterationSessionId = iterationSessionId;
283
+ tokens += result.tokens;
284
+ const completed = iteration - startIteration + 1;
285
+ // until=signal: read the signal the entrypoint wrote this iteration.
286
+ // Absent/corrupt OR continue:false => stop (fail-closed).
287
+ if (loop.until === 'signal') {
288
+ lastSignal = readLoopSignal(ctx.runDir) ?? { continue: false, reason: 'loop-signal.json absent (fail-closed)' };
289
+ clearLoopSignal(ctx.runDir);
290
+ if (!lastSignal.continue) {
291
+ checkpoint(iteration);
292
+ return done(completed, 'condition-met');
293
+ }
294
+ }
295
+ // Budget (token hard-cap), enforced after the turn — outside the agent.
296
+ if (loop.budget !== undefined && tokens >= loop.budget) {
297
+ checkpoint(iteration);
298
+ return done(completed, 'budget');
299
+ }
300
+ // A non-zero exit is a hard error — UNLESS a signal arrived mid-iteration.
301
+ // Ctrl-C kills the child (non-zero exit / SIGINT exit code); that is a
302
+ // 'signal' stop (exit 130), not an 'error'. Check the stop flag first.
303
+ if (result.exitCode !== 0) {
304
+ if (stopSignal) {
305
+ checkpoint(iteration);
306
+ return done(completed, 'signal');
307
+ }
308
+ checkpoint(iteration);
309
+ process.stderr.write(`[loop] iteration ${iteration} exited ${result.exitCode}\n`);
310
+ return done(completed, 'error');
311
+ }
312
+ checkpoint(iteration);
313
+ if (stopSignal) {
314
+ return done(completed, 'signal');
315
+ }
316
+ // Pace between iterations. Skip the sleep after the final iteration.
317
+ if (iteration < maxIterations && intervalMs > 0) {
318
+ await sleep(intervalMs);
319
+ if (stopSignal) {
320
+ return done(completed, 'signal');
321
+ }
322
+ }
323
+ }
324
+ return done(maxIterations - startIteration + 1, 'max');
325
+ }
326
+ finally {
327
+ process.off('SIGINT', onSig);
328
+ process.off('SIGTERM', onSig);
329
+ }
330
+ }
package/dist/lib/mcp.d.ts CHANGED
@@ -66,6 +66,13 @@ export declare function installMcpConfigCentrally(sourcePath: string): {
66
66
  export declare function getMcpServersByName(names?: string[], options?: {
67
67
  cwd?: string;
68
68
  }): InstalledMcpServer[];
69
+ /**
70
+ * Assemble the JSON payload Claude's `--mcp-config` flag expects from a set of
71
+ * installed MCP servers: `{ "mcpServers": { "<name>": { command, args, env } | { url } } }`.
72
+ * Pure — takes servers, returns a JSON string. The caller writes it to an
73
+ * ephemeral file and passes the path to buildExecCommand.
74
+ */
75
+ export declare function buildWorkflowMcpConfig(servers: InstalledMcpServer[]): string;
69
76
  export declare function registerMcpCommandToTargets(targets: {
70
77
  directAgents: AgentId[];
71
78
  versionSelections: Map<AgentId, string[]>;
package/dist/lib/mcp.js CHANGED
@@ -166,6 +166,30 @@ export function getMcpServersByName(names, options = {}) {
166
166
  }
167
167
  return allServers.filter((server) => names.includes(server.name));
168
168
  }
169
+ /**
170
+ * Assemble the JSON payload Claude's `--mcp-config` flag expects from a set of
171
+ * installed MCP servers: `{ "mcpServers": { "<name>": { command, args, env } | { url } } }`.
172
+ * Pure — takes servers, returns a JSON string. The caller writes it to an
173
+ * ephemeral file and passes the path to buildExecCommand.
174
+ */
175
+ export function buildWorkflowMcpConfig(servers) {
176
+ const mcpServers = {};
177
+ for (const server of servers) {
178
+ const cfg = server.config;
179
+ if (cfg.transport === 'http') {
180
+ mcpServers[server.name] = { url: cfg.url };
181
+ }
182
+ else {
183
+ const entry = { command: cfg.command };
184
+ if (cfg.args && cfg.args.length > 0)
185
+ entry.args = cfg.args;
186
+ if (cfg.env && Object.keys(cfg.env).length > 0)
187
+ entry.env = cfg.env;
188
+ mcpServers[server.name] = entry;
189
+ }
190
+ }
191
+ return JSON.stringify({ mcpServers });
192
+ }
169
193
  /**
170
194
  * Install MCP server using Claude CLI.
171
195
  * Uses: claude mcp add --scope user --transport <type> <name> [--env K=V]... -- <cmd> [args...]
@@ -85,6 +85,17 @@ export interface ResolvedModel {
85
85
  * - If `requested` is unknown to our extractor, we forward it and warn.
86
86
  */
87
87
  export declare function resolveModel(agent: AgentId, version: string, requested: string): ResolvedModel;
88
+ /**
89
+ * Resolve the model id an `agents run` will ACTUALLY use, for cost estimation
90
+ * (issue #346). The run path resolves the model in this precedence:
91
+ * 1. explicit `--model` (or profile/workflow/runDefaults value) — `requested`
92
+ * 2. otherwise the agent CLI's own built-in default, which we read from the
93
+ * extracted catalog's `isDefault` model.
94
+ * Returns null only when we have neither — the caller must then treat the
95
+ * estimate as unpriced rather than silently using an unpriced placeholder id
96
+ * like `${agent}-default`.
97
+ */
98
+ export declare function resolveEffectiveModel(agent: AgentId, version: string, requested?: string): string | null;
88
99
  /**
89
100
  * Build the per-agent CLI flags for a unified reasoning effort knob.
90
101
  *