npm - @phnx-labs/agents-cli - Versions diffs - 1.20.17 → 1.20.19 - Mend

@phnx-labs/agents-cli 1.20.17 → 1.20.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

package/CHANGELOG.md +19 -0
package/README.md +1 -1
package/dist/commands/budget.d.ts +14 -0
package/dist/commands/budget.js +137 -0
package/dist/commands/cost.d.ts +12 -0
package/dist/commands/cost.js +139 -0
package/dist/commands/exec.d.ts +20 -0
package/dist/commands/exec.js +382 -5
package/dist/commands/secrets.d.ts +15 -0
package/dist/commands/secrets.js +343 -16
package/dist/commands/sessions.js +4 -0
package/dist/index.js +4 -0
package/dist/lib/budget/config.d.ts +9 -0
package/dist/lib/budget/config.js +115 -0
package/dist/lib/budget/enforce.d.ts +94 -0
package/dist/lib/budget/enforce.js +151 -0
package/dist/lib/budget/ledger.d.ts +61 -0
package/dist/lib/budget/ledger.js +107 -0
package/dist/lib/budget/preflight.d.ts +110 -0
package/dist/lib/budget/preflight.js +200 -0
package/dist/lib/checkpoint.d.ts +54 -0
package/dist/lib/checkpoint.js +56 -0
package/dist/lib/cloud/rush.js +18 -0
package/dist/lib/exec.d.ts +36 -0
package/dist/lib/exec.js +192 -4
package/dist/lib/git.d.ts +18 -0
package/dist/lib/git.js +67 -4
package/dist/lib/loop.d.ts +145 -0
package/dist/lib/loop.js +330 -0
package/dist/lib/mcp.d.ts +7 -0
package/dist/lib/mcp.js +24 -0
package/dist/lib/models.d.ts +11 -0
package/dist/lib/models.js +21 -0
package/dist/lib/plugins.js +5 -2
package/dist/lib/pricing/cost.d.ts +46 -0
package/dist/lib/pricing/cost.js +71 -0
package/dist/lib/pricing/index.d.ts +8 -0
package/dist/lib/pricing/index.js +8 -0
package/dist/lib/pricing/prices.json +138 -0
package/dist/lib/pricing/table.d.ts +17 -0
package/dist/lib/pricing/table.js +73 -0
package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
package/dist/lib/secrets/agent.d.ts +147 -0
package/dist/lib/secrets/agent.js +500 -0
package/dist/lib/secrets/bundles.d.ts +58 -7
package/dist/lib/secrets/bundles.js +264 -75
package/dist/lib/secrets/filestore.d.ts +82 -0
package/dist/lib/secrets/filestore.js +295 -0
package/dist/lib/secrets/linux.d.ts +6 -24
package/dist/lib/secrets/linux.js +22 -265
package/dist/lib/session/db.d.ts +40 -0
package/dist/lib/session/db.js +84 -2
package/dist/lib/session/discover.d.ts +2 -0
package/dist/lib/session/discover.js +126 -2
package/dist/lib/session/render.d.ts +2 -0
package/dist/lib/session/render.js +1 -1
package/dist/lib/session/types.d.ts +4 -0
package/dist/lib/teams/agents.d.ts +32 -0
package/dist/lib/teams/agents.js +66 -3
package/dist/lib/teams/api.js +20 -0
package/dist/lib/teams/parsers.js +16 -4
package/dist/lib/types.d.ts +48 -0
package/dist/lib/workflows.d.ts +56 -0
package/dist/lib/workflows.js +72 -5
package/package.json +2 -1

package/dist/lib/git.js CHANGED Viewed

@@ -11,17 +11,76 @@ import * as path from 'path';
 import { IS_WINDOWS, isWindowsAbsolutePath } from './platform/index.js';
 import { getPackageLocalPath } from './state.js';
 import { DEFAULT_SYSTEM_REPO, systemRepoSlug } from './types.js';
+/**
+ * Validate that a clone/pull source uses a safe git transport before it is
+ * handed to `git`.
+ *
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
+ * beginning with `-` is parsed by `git` as a command-line flag (option
+ * injection). We therefore allow only:
+ *   - `https://`                         (encrypted + authenticated)
+ *   - `ssh://` and SCP-style `git@host:path` / `host:path`
+ *   - local filesystem paths (callers handle these before reaching `git clone`)
+ *
+ * Pure string inspection — no filesystem or platform calls — so it behaves
+ * identically on Linux, macOS, and Windows.
+ *
+ * @throws Error if the source uses a disallowed transport.
+ */
+export function assertSafeGitTransport(source) {
+    const s = source.trim();
+    // A leading dash is interpreted by git as an option, not a source.
+    if (s.startsWith('-')) {
+        throw new Error(`Refusing to use git source "${source}": a source starting with "-" is interpreted as a git option.`);
+    }
+    // Remote-helper transports look like "<name>::…" (ext::, fd::, …). SCP-style
+    // "git@host:path" uses a single ":" and is intentionally not matched here.
+    const helper = s.match(/^[a-zA-Z][a-zA-Z0-9+.-]*::/);
+    if (helper) {
+        throw new Error(`Refusing to use git source "${source}": git remote-helper transports (ext::, fd::, …) are not allowed.`);
+    }
+    // Explicit "<scheme>://" URLs: permit only https and ssh.
+    const scheme = s.match(/^([a-zA-Z][a-zA-Z0-9+.-]*):\/\//);
+    if (scheme) {
+        const name = scheme[1].toLowerCase();
+        if (name !== 'https' && name !== 'ssh') {
+            throw new Error(`Refusing to use git source "${source}": "${name}://" is not an allowed transport (use https:// or ssh://).`);
+        }
+    }
+    // No scheme -> SCP-style SSH ("git@host:path") or a local path; both safe.
+}
+/**
+ * Whether installing a cloned/pulled repo's `.githooks/` is enabled.
+ *
+ * Installing hooks wires those scripts into `.git/hooks/`, so `git` EXECUTES
+ * them on the next commit/checkout/merge. A repo added via `agents repo add
+ * <source>` is untrusted, so auto-installing its hooks is remote code
+ * execution. We require explicit opt-in via `AGENTS_ENABLE_GITHOOKS=1`.
+ */
+function githooksEnabled() {
+    const v = process.env.AGENTS_ENABLE_GITHOOKS;
+    return v === '1' || v === 'true';
+}
 /**
  * Install hooks from `.githooks/` by symlinking each entry into `.git/hooks/`.
  *
- * Why: `git config core.hooksPath` is a known sandbox-escape vector and is
- * blocked by some sandboxed environments (e.g. Claude Code). Symlinks inside
- * `.git/hooks/` sidestep that restriction entirely -- Git runs them the same way.
+ * Gated behind `AGENTS_ENABLE_GITHOOKS=1` (see {@link githooksEnabled}) because
+ * the hooks run code on git operations and the source repo may be untrusted.
+ *
+ * Why symlinks rather than `git config core.hooksPath`: `core.hooksPath` is a
+ * known sandbox-escape vector and is blocked by some sandboxed environments
+ * (e.g. Claude Code). Symlinks inside `.git/hooks/` run the same way.
  */
 function installGithooksSymlinks(repoDir) {
     const githooksDir = path.join(repoDir, '.githooks');
     if (!fs.existsSync(githooksDir))
         return;
+    if (!githooksEnabled()) {
+        console.error(`Skipped installing git hooks from ${githooksDir} (they run code on git operations).\n` +
+            `  Set AGENTS_ENABLE_GITHOOKS=1 to enable hooks for repos you trust.`);
+        return;
+    }
     const hooksDir = path.join(repoDir, '.git', 'hooks');
     fs.mkdirSync(hooksDir, { recursive: true });
     for (const name of fs.readdirSync(githooksDir)) {
@@ -121,7 +180,9 @@ export function parseSource(source) {
                 ref: ref || 'main',
             };
         }
-        // Generic URL
+        // Generic URL -- must be an encrypted, authenticated transport
+        // (rejects http://, file://, git://, ext::, and leading "-").
+        assertSafeGitTransport(cleanSource);
         return {
             type: 'url',
             url: cleanSource.endsWith('.git') ? cleanSource : `${cleanSource}.git`,
@@ -183,6 +244,7 @@ export async function cloneOrPull(source, targetDir) {
         const log = await repoGit.log({ maxCount: 1 });
         return { isNew: false, commit: log.latest?.hash.slice(0, 8) || 'unknown' };
     }
+    assertSafeGitTransport(source.url);
     fs.mkdirSync(targetDir, { recursive: true });
     await git.clone(source.url, targetDir);
     const repoGit = simpleGit(targetDir);
@@ -364,6 +426,7 @@ export async function cloneIntoExisting(source, targetDir) {
     const git = simpleGit();
     const tempDir = path.join(targetDir, '.git-clone-temp');
     try {
+        assertSafeGitTransport(parsed.url);
         // Clone to temp directory
         fs.mkdirSync(tempDir, { recursive: true });
         await git.clone(parsed.url, tempDir);

package/dist/lib/loop.d.ts ADDED Viewed

@@ -0,0 +1,145 @@
+/**
+ * Autonomous loop driver (issue #332).
+ *
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
+ *
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
+ * union for the exit reason.
+ *
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
+ * turn from the usage events parsed off the agent's stream-json output. Token
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
+ * import) rather than re-implementing the per-provider parsing.
+ */
+import type { AgentId } from './types.js';
+import type { ExecOptions } from './exec.js';
+import { type Checkpoint } from './checkpoint.js';
+/** Loop block config (docs/07-entrypoints-and-loops.md → "The loop block"). */
+export interface LoopConfig {
+    /** Stop condition. `signal` reads loop-signal.json; absence is fail-closed. */
+    until?: 'signal';
+    /** Hard cap on iterations. */
+    maxIterations?: number;
+    /** Token hard-cap, enforced outside the agent. */
+    budget?: number;
+    /** Delay between iterations: "0" back-to-back, "30m" paces. */
+    interval?: string;
+}
+/** The loop-signal.json contract the entrypoint writes each iteration. */
+export interface LoopSignal {
+    continue: boolean;
+    reason?: string;
+}
+/** Why the loop stopped. Mirrors the teams supervisor exit reasons. */
+export type LoopStoppedBy = 'condition-met' | 'budget' | 'stalled' | 'max' | 'signal' | 'error';
+/** Result of a loop run. */
+export interface LoopResult {
+    /** Iterations actually executed. */
+    iterations: number;
+    stoppedBy: LoopStoppedBy;
+    elapsedMs: number;
+    /** Cumulative tokens consumed across all iterations. */
+    tokens: number;
+    /** Last loop-signal read, if any. */
+    lastSignal?: LoopSignal;
+}
+/** What a single iteration's run function returns. */
+export interface IterationResult {
+    exitCode: number;
+    /** Tokens consumed this iteration (input + output + cache). */
+    tokens: number;
+}
+/** Per-iteration run function — the injectable seam that makes the driver testable. */
+export type RunIteration = (options: ExecOptions) => Promise<IterationResult>;
+/** Context the driver needs that isn't part of ExecOptions. */
+export interface LoopContext {
+    runId: string;
+    runDir: string;
+    agent: AgentId;
+    version?: string;
+    /** Iteration to start at (1 for a fresh run, checkpoint.iteration+1 for a resume). */
+    startIteration?: number;
+    /** Tokens already consumed before this driver started (carried across a resume). */
+    startTokens?: number;
+    /**
+     * On a resume, the killed run's LAST iteration session id. The first resumed
+     * iteration `/continue`s from it to thread conversation memory forward.
+     * Undefined on a fresh run (iteration 1 mints its own id, no prior to continue).
+     */
+    sessionId?: string;
+}
+/** Dependency seams for testing. */
+export interface LoopDeps {
+    /** Per-iteration runner. Defaults to a token-capturing spawn (defaultRunIteration). */
+    runIteration?: RunIteration;
+    /** Sleep function (ms). Defaults to setTimeout-backed. Injectable so tests don't wait. */
+    sleep?: (ms: number) => Promise<void>;
+    /** Checkpoint writer. Defaults to writeCheckpoint. */
+    writeCheckpoint?: (c: Checkpoint) => void;
+}
+/** Path to a run's loop-signal.json. */
+export declare function loopSignalPath(runDir: string): string;
+/**
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
+ * iteration's conversation instead of starting fresh.
+ *
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
+ * the provider's native session being "active"; it reads the transcript off
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
+ * already in use") while still threading the conversation forward via the
+ * prior id.
+ *
+ * The original entrypoint is re-appended after the continue directive so the
+ * agent both recalls the prior turn AND knows what to do this iteration.
+ */
+export declare function buildLoopContinuePrompt(prevSessionId: string, entrypoint: string): string;
+/**
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
+ * configuration error and must NOT silently coalesce to 0 (which would run the
+ * loop full-speed on a typo). Throws on bad input; validate at config build
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
+ */
+export declare function parseLoopInterval(interval: string | undefined): number;
+/**
+ * Read and parse loop-signal.json. Returns null when the file is absent or
+ * unparseable — the caller treats null as fail-closed (continue:false).
+ */
+export declare function readLoopSignal(runDir: string): LoopSignal | null;
+/** Delete loop-signal.json so a stale signal never carries into the next iteration. */
+export declare function clearLoopSignal(runDir: string): void;
+/**
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
+ * is forced to JSON/headless so the usage stream is parseable.
+ */
+export declare function defaultRunIteration(options: ExecOptions): Promise<IterationResult>;
+/**
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
+ * met, the iteration cap is reached, or a signal arrives.
+ *
+ * stoppedBy semantics:
+ *   - `condition-met` — until=signal and the signal said stop (continue:false
+ *     OR the file was absent/corrupt → fail-closed).
+ *   - `budget`        — cumulative tokens crossed the budget cap (checked after
+ *     each turn, outside the agent).
+ *   - `max`           — ran maxIterations iterations without any earlier stop.
+ *   - `signal`        — SIGINT/SIGTERM arrived; checkpoint is written before exit.
+ *   - `error`         — an iteration threw or exited non-zero.
+ */
+export declare function runLoop(execOptions: ExecOptions, loop: LoopConfig, ctx: LoopContext, deps?: LoopDeps): Promise<LoopResult>;

package/dist/lib/loop.js ADDED Viewed

@@ -0,0 +1,330 @@
+/**
+ * Autonomous loop driver (issue #332).
+ *
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
+ *
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
+ * union for the exit reason.
+ *
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
+ * turn from the usage events parsed off the agent's stream-json output. Token
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
+ * import) rather than re-implementing the per-provider parsing.
+ */
+import { spawn } from 'child_process';
+import { randomUUID } from 'crypto';
+import * as fs from 'fs';
+import * as path from 'path';
+import { buildExecCommand, buildExecEnv } from './exec.js';
+import { extractUsageEvents } from './budget/enforce.js';
+import { parseTimeout } from './routines.js';
+import { writeCheckpoint } from './checkpoint.js';
+const defaultSleep = (ms) => new Promise((r) => setTimeout(r, ms));
+/** Path to a run's loop-signal.json. */
+export function loopSignalPath(runDir) {
+    return path.join(runDir, 'loop-signal.json');
+}
+/**
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
+ * iteration's conversation instead of starting fresh.
+ *
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
+ * the provider's native session being "active"; it reads the transcript off
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
+ * already in use") while still threading the conversation forward via the
+ * prior id.
+ *
+ * The original entrypoint is re-appended after the continue directive so the
+ * agent both recalls the prior turn AND knows what to do this iteration.
+ */
+export function buildLoopContinuePrompt(prevSessionId, entrypoint) {
+    return `/continue ${prevSessionId}\n\n${entrypoint}`;
+}
+/**
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
+ * configuration error and must NOT silently coalesce to 0 (which would run the
+ * loop full-speed on a typo). Throws on bad input; validate at config build
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
+ */
+export function parseLoopInterval(interval) {
+    if (interval === undefined)
+        return 0;
+    if (interval.trim() === '0')
+        return 0;
+    const ms = parseTimeout(interval);
+    if (ms === null) {
+        throw new Error(`Invalid loop interval '${interval}'. Use "0" for back-to-back or a duration like "30m", "1h", "2h30m" (units: w/d/h/m).`);
+    }
+    return ms;
+}
+/**
+ * Read and parse loop-signal.json. Returns null when the file is absent or
+ * unparseable — the caller treats null as fail-closed (continue:false).
+ */
+export function readLoopSignal(runDir) {
+    const file = loopSignalPath(runDir);
+    if (!fs.existsSync(file))
+        return null;
+    try {
+        const parsed = JSON.parse(fs.readFileSync(file, 'utf-8'));
+        if (!parsed || typeof parsed !== 'object')
+            return null;
+        return { continue: parsed.continue === true, reason: typeof parsed.reason === 'string' ? parsed.reason : undefined };
+    }
+    catch {
+        return null;
+    }
+}
+/** Delete loop-signal.json so a stale signal never carries into the next iteration. */
+export function clearLoopSignal(runDir) {
+    const file = loopSignalPath(runDir);
+    try {
+        if (fs.existsSync(file))
+            fs.unlinkSync(file);
+    }
+    catch {
+        /* best-effort: a missing file is the desired state anyway. */
+    }
+}
+/**
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
+ * is forced to JSON/headless so the usage stream is parseable.
+ */
+export function defaultRunIteration(options) {
+    // Force the stream-json output the usage parser needs; a loop iteration is
+    // always headless (re-injected programmatically, never an interactive TUI).
+    const execOptions = { ...options, json: true, headless: true, interactive: false };
+    const cmd = buildExecCommand(execOptions);
+    const [executable, ...args] = cmd;
+    const env = buildExecEnv(execOptions);
+    const cwd = execOptions.cwd || process.cwd();
+    const model = execOptions.model ?? `${execOptions.agent}-default`;
+    return new Promise((resolve, reject) => {
+        const useShell = process.platform === 'win32' && (!path.isAbsolute(executable) || executable.endsWith('.cmd'));
+        const child = spawn(executable, args, {
+            cwd,
+            stdio: ['inherit', 'pipe', 'pipe'],
+            env,
+            shell: useShell,
+        });
+        let tokens = 0;
+        let pending = '';
+        if (child.stdout) {
+            child.stdout.pipe(process.stdout);
+            child.stdout.on('data', (chunk) => {
+                const { events, rest } = extractUsageEvents(chunk.toString('utf-8'), pending, model, execOptions.agent);
+                pending = rest;
+                for (const ev of events) {
+                    tokens += (ev.inputTokens ?? 0) + (ev.outputTokens ?? 0)
+                        + (ev.cacheReadTokens ?? 0) + (ev.cacheCreationTokens ?? 0);
+                }
+            });
+        }
+        if (child.stderr)
+            child.stderr.pipe(process.stderr);
+        child.on('error', (err) => reject(err));
+        child.on('close', (code, signal) => {
+            resolve({ exitCode: code ?? (signal ? 1 : 0), tokens });
+        });
+    });
+}
+/**
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
+ * met, the iteration cap is reached, or a signal arrives.
+ *
+ * stoppedBy semantics:
+ *   - `condition-met` — until=signal and the signal said stop (continue:false
+ *     OR the file was absent/corrupt → fail-closed).
+ *   - `budget`        — cumulative tokens crossed the budget cap (checked after
+ *     each turn, outside the agent).
+ *   - `max`           — ran maxIterations iterations without any earlier stop.
+ *   - `signal`        — SIGINT/SIGTERM arrived; checkpoint is written before exit.
+ *   - `error`         — an iteration threw or exited non-zero.
+ */
+export async function runLoop(execOptions, loop, ctx, deps) {
+    const runIteration = deps?.runIteration ?? defaultRunIteration;
+    const sleep = deps?.sleep ?? defaultSleep;
+    const persist = deps?.writeCheckpoint ?? writeCheckpoint;
+    const startedAt = Date.now();
+    const maxIterations = loop.maxIterations ?? 1000;
+    const intervalMs = parseLoopInterval(loop.interval);
+    // Per-iteration session pinning (issue #332). `--session-id` CREATES a
+    // session, so each iteration must pin a DISTINCT id — re-passing one errors
+    // "Session ID already in use". Iteration 1 pins `firstSessionId`; iteration
+    // >= 2 mints a fresh id AND injects `/continue <prior id>` so the agent
+    // threads the prior conversation forward (see buildLoopContinuePrompt).
+    //
+    // `prevSessionId` is the id whose transcript the NEXT iteration continues
+    // from. On a resume it is ctx.sessionId (the killed run's last session);
+    // on a fresh run it starts undefined and is set after iteration 1.
+    const firstSessionId = randomUUID();
+    let prevSessionId = ctx.sessionId;
+    // The session id recorded in the checkpoint is the most recent iteration's id
+    // (what a resume must continue from). Seeded to the resume id or iter-1 id.
+    let lastIterationSessionId = ctx.sessionId ?? firstSessionId;
+    const startIteration = ctx.startIteration ?? 1;
+    // The loop re-injects the entrypoint every iteration, so a prompt is required.
+    // The command layer enforces this before dispatch; assert it here so the
+    // continuity prompt-builder has a defined entrypoint to thread.
+    if (execOptions.prompt === undefined) {
+        throw new Error('runLoop requires execOptions.prompt — the loop re-injects the entrypoint each iteration.');
+    }
+    const entrypointPrompt = execOptions.prompt;
+    // `/continue` continuity only applies to claude (the skill + native resume
+    // surface). Other agents run each iteration as an independent fresh
+    // conversation — warn so the lost continuity is never silent.
+    const continuitySupported = ctx.agent === 'claude';
+    if (!continuitySupported && maxIterations !== 1) {
+        process.stderr.write(`[loop] WARNING: cross-iteration conversation continuity applies to claude only. ` +
+            `Each ${ctx.agent} iteration runs as an independent fresh conversation (no /continue handoff).\n`);
+    }
+    let tokens = ctx.startTokens ?? 0;
+    let lastSignal;
+    let stopSignal = false;
+    const onSig = () => { stopSignal = true; };
+    process.once('SIGINT', onSig);
+    process.once('SIGTERM', onSig);
+    const checkpoint = (iteration) => {
+        const now = new Date().toISOString();
+        persist({
+            id: ctx.runId,
+            agent: ctx.agent,
+            version: ctx.version,
+            prompt: entrypointPrompt,
+            // Resume must continue from the LAST iteration's conversation, so the
+            // checkpoint records that iteration's session id (the one a future
+            // `/continue` should thread from), not a single pinned id.
+            sessionId: lastIterationSessionId,
+            iteration,
+            loop,
+            loopSignal: lastSignal,
+            cumulativeTokens: tokens,
+            createdAt: now,
+            updatedAt: now,
+        });
+    };
+    const done = (iterations, stoppedBy) => ({
+        iterations,
+        stoppedBy,
+        elapsedMs: Date.now() - startedAt,
+        tokens,
+        lastSignal,
+    });
+    try {
+        let iteration = startIteration;
+        for (; iteration <= maxIterations; iteration++) {
+            if (stopSignal) {
+                checkpoint(iteration - 1);
+                return done(iteration - startIteration, 'signal');
+            }
+            // Pin a DISTINCT session id every iteration (`--session-id` CREATES a
+            // session; re-passing one errors "Session ID already in use"). The first
+            // executed iteration of a fresh run reuses firstSessionId; every later
+            // iteration mints a new id.
+            const iterationSessionId = prevSessionId === undefined ? firstSessionId : randomUUID();
+            // Continuity: when a prior iteration exists (prevSessionId set) and the
+            // agent supports it, thread the conversation forward via the established
+            // `/continue <prior id>` prompt-injection. Otherwise re-inject the bare
+            // entrypoint. prevSessionId is set after iteration 1 of a fresh run, or
+            // carried in from ctx.sessionId on a resume.
+            const iterationPrompt = prevSessionId !== undefined && continuitySupported
+                ? buildLoopContinuePrompt(prevSessionId, entrypointPrompt)
+                : entrypointPrompt;
+            // AGENTS_LOOP_SIGNAL / AGENTS_RUN_DIR: tell the entrypoint where to write
+            // loop-signal.json so the guard (read OUTSIDE the agent) can see it. The
+            // agent never decides whether to continue — it only writes its vote.
+            const iterOptions = {
+                ...execOptions,
+                prompt: iterationPrompt,
+                sessionId: iterationSessionId,
+                env: {
+                    ...execOptions.env,
+                    AGENTS_RUN_DIR: ctx.runDir,
+                    AGENTS_LOOP_SIGNAL: loopSignalPath(ctx.runDir),
+                    AGENTS_LOOP_ITERATION: String(iteration),
+                },
+            };
+            let result;
+            try {
+                result = await runIteration(iterOptions);
+            }
+            catch (err) {
+                // A SIGINT/SIGTERM mid-iteration kills the child; the resulting throw
+                // is a signal stop, not an error. Check the stop flag first.
+                if (stopSignal) {
+                    checkpoint(iteration - 1);
+                    return done(iteration - startIteration, 'signal');
+                }
+                checkpoint(iteration - 1);
+                process.stderr.write(`[loop] iteration ${iteration} failed: ${err.message}\n`);
+                return done(iteration - startIteration, 'error');
+            }
+            // This iteration's conversation is now on disk under iterationSessionId.
+            // The next iteration continues from it; a checkpoint records it for resume.
+            prevSessionId = iterationSessionId;
+            lastIterationSessionId = iterationSessionId;
+            tokens += result.tokens;
+            const completed = iteration - startIteration + 1;
+            // until=signal: read the signal the entrypoint wrote this iteration.
+            // Absent/corrupt OR continue:false => stop (fail-closed).
+            if (loop.until === 'signal') {
+                lastSignal = readLoopSignal(ctx.runDir) ?? { continue: false, reason: 'loop-signal.json absent (fail-closed)' };
+                clearLoopSignal(ctx.runDir);
+                if (!lastSignal.continue) {
+                    checkpoint(iteration);
+                    return done(completed, 'condition-met');
+                }
+            }
+            // Budget (token hard-cap), enforced after the turn — outside the agent.
+            if (loop.budget !== undefined && tokens >= loop.budget) {
+                checkpoint(iteration);
+                return done(completed, 'budget');
+            }
+            // A non-zero exit is a hard error — UNLESS a signal arrived mid-iteration.
+            // Ctrl-C kills the child (non-zero exit / SIGINT exit code); that is a
+            // 'signal' stop (exit 130), not an 'error'. Check the stop flag first.
+            if (result.exitCode !== 0) {
+                if (stopSignal) {
+                    checkpoint(iteration);
+                    return done(completed, 'signal');
+                }
+                checkpoint(iteration);
+                process.stderr.write(`[loop] iteration ${iteration} exited ${result.exitCode}\n`);
+                return done(completed, 'error');
+            }
+            checkpoint(iteration);
+            if (stopSignal) {
+                return done(completed, 'signal');
+            }
+            // Pace between iterations. Skip the sleep after the final iteration.
+            if (iteration < maxIterations && intervalMs > 0) {
+                await sleep(intervalMs);
+                if (stopSignal) {
+                    return done(completed, 'signal');
+                }
+            }
+        }
+        return done(maxIterations - startIteration + 1, 'max');
+    }
+    finally {
+        process.off('SIGINT', onSig);
+        process.off('SIGTERM', onSig);
+    }
+}

package/dist/lib/mcp.d.ts CHANGED Viewed

@@ -66,6 +66,13 @@ export declare function installMcpConfigCentrally(sourcePath: string): {
 export declare function getMcpServersByName(names?: string[], options?: {
     cwd?: string;
 }): InstalledMcpServer[];
+/**
+ * Assemble the JSON payload Claude's `--mcp-config` flag expects from a set of
+ * installed MCP servers: `{ "mcpServers": { "<name>": { command, args, env } | { url } } }`.
+ * Pure — takes servers, returns a JSON string. The caller writes it to an
+ * ephemeral file and passes the path to buildExecCommand.
+ */
+export declare function buildWorkflowMcpConfig(servers: InstalledMcpServer[]): string;
 export declare function registerMcpCommandToTargets(targets: {
     directAgents: AgentId[];
     versionSelections: Map<AgentId, string[]>;

package/dist/lib/mcp.js CHANGED Viewed

@@ -166,6 +166,30 @@ export function getMcpServersByName(names, options = {}) {
     }
     return allServers.filter((server) => names.includes(server.name));
 }
+/**
+ * Assemble the JSON payload Claude's `--mcp-config` flag expects from a set of
+ * installed MCP servers: `{ "mcpServers": { "<name>": { command, args, env } | { url } } }`.
+ * Pure — takes servers, returns a JSON string. The caller writes it to an
+ * ephemeral file and passes the path to buildExecCommand.
+ */
+export function buildWorkflowMcpConfig(servers) {
+    const mcpServers = {};
+    for (const server of servers) {
+        const cfg = server.config;
+        if (cfg.transport === 'http') {
+            mcpServers[server.name] = { url: cfg.url };
+        }
+        else {
+            const entry = { command: cfg.command };
+            if (cfg.args && cfg.args.length > 0)
+                entry.args = cfg.args;
+            if (cfg.env && Object.keys(cfg.env).length > 0)
+                entry.env = cfg.env;
+            mcpServers[server.name] = entry;
+        }
+    }
+    return JSON.stringify({ mcpServers });
+}
 /**
  * Install MCP server using Claude CLI.
  * Uses: claude mcp add --scope user --transport <type> <name> [--env K=V]... -- <cmd> [args...]

package/dist/lib/models.d.ts CHANGED Viewed

@@ -85,6 +85,17 @@ export interface ResolvedModel {
  * - If `requested` is unknown to our extractor, we forward it and warn.
  */
 export declare function resolveModel(agent: AgentId, version: string, requested: string): ResolvedModel;
+/**
+ * Resolve the model id an `agents run` will ACTUALLY use, for cost estimation
+ * (issue #346). The run path resolves the model in this precedence:
+ *   1. explicit `--model` (or profile/workflow/runDefaults value) — `requested`
+ *   2. otherwise the agent CLI's own built-in default, which we read from the
+ *      extracted catalog's `isDefault` model.
+ * Returns null only when we have neither — the caller must then treat the
+ * estimate as unpriced rather than silently using an unpriced placeholder id
+ * like `${agent}-default`.
+ */
+export declare function resolveEffectiveModel(agent: AgentId, version: string, requested?: string): string | null;
 /**
  * Build the per-agent CLI flags for a unified reasoning effort knob.
  *