npm - @phnx-labs/agents-cli - Versions diffs - 1.20.17 → 1.20.18 - Mend

@phnx-labs/agents-cli 1.20.17 → 1.20.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

package/CHANGELOG.md +15 -0
package/README.md +1 -1
package/dist/commands/budget.d.ts +14 -0
package/dist/commands/budget.js +137 -0
package/dist/commands/cost.d.ts +12 -0
package/dist/commands/cost.js +139 -0
package/dist/commands/exec.d.ts +20 -0
package/dist/commands/exec.js +382 -5
package/dist/commands/secrets.d.ts +15 -0
package/dist/commands/secrets.js +250 -4
package/dist/commands/sessions.js +4 -0
package/dist/index.js +4 -0
package/dist/lib/budget/config.d.ts +9 -0
package/dist/lib/budget/config.js +115 -0
package/dist/lib/budget/enforce.d.ts +94 -0
package/dist/lib/budget/enforce.js +151 -0
package/dist/lib/budget/ledger.d.ts +61 -0
package/dist/lib/budget/ledger.js +107 -0
package/dist/lib/budget/preflight.d.ts +110 -0
package/dist/lib/budget/preflight.js +200 -0
package/dist/lib/checkpoint.d.ts +54 -0
package/dist/lib/checkpoint.js +56 -0
package/dist/lib/cloud/rush.js +18 -0
package/dist/lib/exec.d.ts +36 -0
package/dist/lib/exec.js +192 -4
package/dist/lib/git.d.ts +18 -0
package/dist/lib/git.js +67 -4
package/dist/lib/loop.d.ts +145 -0
package/dist/lib/loop.js +330 -0
package/dist/lib/mcp.d.ts +7 -0
package/dist/lib/mcp.js +24 -0
package/dist/lib/models.d.ts +11 -0
package/dist/lib/models.js +21 -0
package/dist/lib/plugins.js +5 -2
package/dist/lib/pricing/cost.d.ts +46 -0
package/dist/lib/pricing/cost.js +71 -0
package/dist/lib/pricing/index.d.ts +8 -0
package/dist/lib/pricing/index.js +8 -0
package/dist/lib/pricing/prices.json +138 -0
package/dist/lib/pricing/table.d.ts +17 -0
package/dist/lib/pricing/table.js +73 -0
package/dist/lib/secrets/Agents CLI.app/Contents/CodeResources +0 -0
package/dist/lib/secrets/Agents CLI.app/Contents/MacOS/Agents CLI +0 -0
package/dist/lib/secrets/agent.d.ts +134 -0
package/dist/lib/secrets/agent.js +501 -0
package/dist/lib/secrets/bundles.d.ts +21 -0
package/dist/lib/secrets/bundles.js +43 -0
package/dist/lib/session/db.d.ts +40 -0
package/dist/lib/session/db.js +84 -2
package/dist/lib/session/discover.d.ts +2 -0
package/dist/lib/session/discover.js +126 -2
package/dist/lib/session/render.d.ts +2 -0
package/dist/lib/session/render.js +1 -1
package/dist/lib/session/types.d.ts +4 -0
package/dist/lib/teams/agents.d.ts +32 -0
package/dist/lib/teams/agents.js +66 -3
package/dist/lib/teams/api.js +20 -0
package/dist/lib/teams/parsers.js +16 -4
package/dist/lib/types.d.ts +48 -0
package/dist/lib/workflows.d.ts +56 -0
package/dist/lib/workflows.js +72 -5
package/package.json +2 -1

package/dist/lib/checkpoint.js ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Harness-level loop checkpoint (issue #332).
+ *
+ * A checkpoint is the durable harness state for a `--loop` run: it records the
+ * iteration count, the pinned session id, the prompt being re-injected, and the
+ * loop config — everything `--resume-checkpoint` needs to continue a run that a
+ * SIGTERM, timeout, or machine sleep killed mid-flight.
+ *
+ * This is NOT provider-side state. `--session-id` resumes Claude's *conversation*
+ * (server-side); a checkpoint resumes the *harness* (iteration count, loop
+ * variables, prompt chain) — the part Claude's own resume cannot recover.
+ *
+ * Atomic write (temp + rename) mirrors `writeRunMeta` in routines.ts so a crash
+ * mid-write never leaves a half-written checkpoint that `readCheckpoint` would
+ * choke on. `readCheckpoint` returns null on a missing or corrupt file (mirrors
+ * `readRunMeta`) — a corrupt checkpoint is a "start fresh", never a throw.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { getRunsDir } from './state.js';
+/** Path to a run's checkpoint file: <runsDir>/<runId>/checkpoint.json. */
+export function checkpointPath(runId) {
+    return path.join(getRunsDir(), runId, 'checkpoint.json');
+}
+/**
+ * Write a checkpoint atomically (temp file + rename). The rename is atomic on a
+ * single filesystem, so a reader never observes a partially written file.
+ * Mirrors the durable-write contract of `writeRunMeta`.
+ */
+export function writeCheckpoint(c, file) {
+    const target = file ?? checkpointPath(c.id);
+    fs.mkdirSync(path.dirname(target), { recursive: true });
+    const tmp = `${target}.${process.pid}.tmp`;
+    fs.writeFileSync(tmp, JSON.stringify(c, null, 2), 'utf-8');
+    fs.renameSync(tmp, target);
+}
+/**
+ * Read a checkpoint from disk. Returns null if the file is missing or its
+ * contents are not valid JSON — corruption means "no resumable state", which
+ * the caller treats as a fresh start. Mirrors `readRunMeta`.
+ */
+export function readCheckpoint(file) {
+    if (!fs.existsSync(file))
+        return null;
+    try {
+        const parsed = JSON.parse(fs.readFileSync(file, 'utf-8'));
+        if (!parsed || typeof parsed !== 'object')
+            return null;
+        if (typeof parsed.id !== 'string' || typeof parsed.iteration !== 'number')
+            return null;
+        return parsed;
+    }
+    catch {
+        return null;
+    }
+}

package/dist/lib/cloud/rush.js CHANGED Viewed

@@ -341,6 +341,24 @@ export class RushCloudProvider {
         if (repos.length === 0) {
             throw new Error('Rush Cloud requires --repo <owner/repo> (or --repo repeated for multi-repo).');
         }
+        // Budget pre-flight gate (issue #346). Cloud dispatches inherit the local
+        // project's caps; we refuse to POST a run that would breach an on_exceed:block
+        // cap. The repo slug is the project attribution key. Server-side spend is
+        // authoritative for live enforcement; this pre-flight is the deterministic
+        // "don't even start it" guard. Dormant when no caps are configured.
+        {
+            const { runPreflightGate } = await import('../budget/preflight.js');
+            const projectKey = repos[0] ?? process.cwd();
+            const gate = runPreflightGate({
+                agent: options.agent ?? 'cloud',
+                model: options.model ?? `${options.agent ?? 'cloud'}-default`,
+                prompt: options.prompt,
+                project: projectKey,
+            });
+            if (!gate.dormant && !gate.decision.allow) {
+                throw new Error(`[budget] BLOCKED cloud dispatch (${projectKey}): ${gate.decision.reason}`);
+            }
+        }
         // Validate each repo's shape and resolve its installation_id up front.
         // Any bad entry fails the whole dispatch — we never want a half-started
         // multi-repo run that only found installations for some of the repos.

package/dist/lib/exec.d.ts CHANGED Viewed

@@ -82,6 +82,23 @@ export interface ExecOptions {
     sessionId?: string;
     verbose?: boolean;
     env?: Record<string, string>;
+    /**
+     * Workflow capability scoping (Claude only). Sourced from WORKFLOW.md
+     * frontmatter `tools:` / `mcpServers:` and translated to Claude headless
+     * flags in buildExecCommand. Other agents ignore these.
+     *
+     * `toolsRestrict` is the AVAILABLE-tool allowlist: it maps to `--tools`, which
+     * restricts the built-in tool set the run can use at all (NOT `--allowedTools`,
+     * which only auto-approves without restricting availability). Declaring
+     * `[Read, Grep]` makes Write/Bash/Edit unavailable for the whole run.
+     */
+    toolsRestrict?: string[];
+    /**
+     * Path to an ephemeral mcp-config JSON. Emitted as `--mcp-config <path>`
+     * together with `--strict-mcp-config` so ONLY the named servers load (the
+     * flag alone merely ADDS to the existing server set).
+     */
+    mcpConfigPath?: string;
 }
 /**
  * Resolve interactive vs headless. Explicit flags are definitive and win over
@@ -90,6 +107,23 @@ export interface ExecOptions {
  * `--interactive` takes precedence over `--headless`; the CLI layer rejects passing both.
  */
 export declare function resolveInteractive(options: Pick<ExecOptions, 'interactive' | 'headless' | 'prompt'>): boolean;
+/**
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
+ * live budget watcher can parse it (issue #346, FIX 3).
+ *
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
+ * so the user still sees output. Interactive REPLs are never tapped (the human
+ * owns the TTY; they rely on the pre-flight gate).
+ *
+ * @param interactive  resolveInteractive() result for the run
+ * @param piped        true when the parent's stdout is NOT a TTY (output piped)
+ * @param capsActive   true when a budget watcher is attached (caps configured)
+ */
+export declare function shouldTapStdout(interactive: boolean, piped: boolean, capsActive: boolean): boolean;
 /** Parse an array of KEY=VALUE strings into an env record. Returns undefined for empty input. */
 export declare function parseExecEnv(entries: string[]): Record<string, string> | undefined;
 /**
@@ -135,6 +169,8 @@ export declare function execAgent(options: ExecOptions): Promise<number>;
  * keeping version resolution in one place instead of reimplementing it in batch.
  */
 export declare function execShimPassthrough(agent: AgentId, rawArgs: string[], cwd: string, pinnedVersion?: string): Promise<number>;
+/** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
+export declare const BUDGET_KILL_EXIT_CODE = 7;
 /**
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
  * because providers phrase these differently -- Anthropic uses "5-hour limit"

package/dist/lib/exec.js CHANGED Viewed

@@ -114,6 +114,29 @@ export function resolveInteractive(options) {
         return false;
     return options.prompt === undefined;
 }
+/**
+ * Decide whether spawnAgent must capture (PIPE + tee) the child's stdout so the
+ * live budget watcher can parse it (issue #346, FIX 3).
+ *
+ * The bug this fixes: stdout used to be PIPED only when downstream output was
+ * piped (`piped = !isTTY`). For a normal headless run AT A TERMINAL, stdout was
+ * 'inherit', so `child.stdout` was null and the watcher — hence the mid-run
+ * hard-cap kill — was silently skipped. We now tap stdout for ALL
+ * non-interactive runs when caps are active, regardless of TTY, and tee it back
+ * so the user still sees output. Interactive REPLs are never tapped (the human
+ * owns the TTY; they rely on the pre-flight gate).
+ *
+ * @param interactive  resolveInteractive() result for the run
+ * @param piped        true when the parent's stdout is NOT a TTY (output piped)
+ * @param capsActive   true when a budget watcher is attached (caps configured)
+ */
+export function shouldTapStdout(interactive, piped, capsActive) {
+    if (interactive)
+        return false;
+    // Always pipe when the caller pipes us downstream (preserve composability),
+    // OR when caps are active so the watcher can read the stream at a TTY.
+    return piped || capsActive;
+}
 /** Pattern for valid environment variable names (C identifier rules). */
 const EXEC_ENV_KEY_PATTERN = /^[A-Za-z_][A-Za-z0-9_]*$/;
 /** Parse a single KEY=VALUE string into a tuple, validating the key name. */
@@ -540,6 +563,39 @@ export function buildExecCommand(options) {
             cmd.push('--add-dir', dir);
         }
     }
+    // Claude-specific: workflow capability scoping. WORKFLOW.md frontmatter
+    // `tools:` / `mcpServers:` is translated to the headless flags that ACTUALLY
+    // restrict the run (verified against `claude --help` on the installed CLI):
+    //
+    //   tools:       -> `--tools <names...>` — restricts the AVAILABLE built-in
+    //                   tool set. This is the security boundary: tools NOT named
+    //                   here (e.g. Write, Bash, Edit) are unavailable for the whole
+    //                   run. `--allowedTools` would only auto-approve without
+    //                   restricting, so it is the WRONG flag for sandboxing.
+    //                   We also emit `--allowedTools <names...>` for the same set so
+    //                   the permitted tools don't prompt in headless `-p` mode.
+    //   mcpServers:  -> `--mcp-config <path>` PLUS `--strict-mcp-config`. The
+    //                   config flag alone ADDS servers to the existing set; only
+    //                   `--strict-mcp-config` makes the run use *only* the named
+    //                   servers, which is what scoping means.
+    //
+    // The command layer gates this behind the `allowlist` capability and assembles
+    // the mcp-config file; buildExecCommand stays a pure string-builder.
+    //
+    // `<tools...>` is variadic. Emit the names as separate argv tokens. The flags
+    // here are appended AFTER the positional prompt (added above), so the variadic
+    // never swallows the prompt; the trailing `--allowedTools` / `--strict-mcp-config`
+    // tokens also terminate the `--tools` variadic cleanly.
+    if (options.agent === 'claude') {
+        if (options.toolsRestrict && options.toolsRestrict.length > 0) {
+            cmd.push('--tools', ...options.toolsRestrict);
+            cmd.push('--allowedTools', ...options.toolsRestrict);
+        }
+        if (options.mcpConfigPath) {
+            cmd.push('--mcp-config', options.mcpConfigPath);
+            cmd.push('--strict-mcp-config');
+        }
+    }
     return cmd;
 }
 /** Spawn an agent and return its exit code. Convenience wrapper over spawnAgent. */
@@ -599,6 +655,15 @@ async function spawnAgent(options) {
     const timeoutMs = options.timeout ? parseTimeout(options.timeout) : undefined;
     const piped = !process.stdout.isTTY;
     const interactive = resolveInteractive(options);
+    // Budget live kill-switch (issue #346). For headless runs we incrementally
+    // parse stream-json usage off stdout, accumulate cost, and kill the child the
+    // moment a configured cap is crossed — exactly like the --timeout path, but
+    // resolving with a DISTINCT exit code so CI/headless can tell budget-kill from
+    // timeout. Spend is recorded to the shared ledger in the close handler. The
+    // watcher is dormant (and zero-cost) when no caps are configured.
+    const cwd = options.cwd || process.cwd();
+    const runId = randomUUID();
+    const watcherState = await setupBudgetWatcher(options, cwd, runId);
     maybeRotate();
     const timer = createTimer('agent.run', {
         agent: options.agent,
@@ -617,9 +682,13 @@ async function spawnAgent(options) {
         // rendering, raw-mode keystrokes, colored output). Headless mode pipes
         // stderr so we can scan for rate limits and feed fallback. stdout stays
         // inherited for TTY, piped when the caller pipes us downstream.
+        // PIPE (and later tee) stdout whenever the live budget watcher must read it
+        // — for ALL non-interactive runs when caps are active, regardless of TTY.
+        // See shouldTapStdout() for the rationale (FIX 3, issue #346).
+        const tapStdout = shouldTapStdout(interactive, piped, watcherState !== null);
         const stdio = interactive
             ? ['inherit', 'inherit', 'inherit']
-            : ['inherit', piped ? 'pipe' : 'inherit', 'pipe'];
+            : ['inherit', tapStdout ? 'pipe' : 'inherit', 'pipe'];
         // On Windows, .cmd batch wrappers (npm-installed CLIs) require shell:true
         // whether addressed by name or absolute path.
         const useShell = process.platform === 'win32' && (!path.isAbsolute(executable) || executable.endsWith('.cmd'));
@@ -631,8 +700,29 @@ async function spawnAgent(options) {
         });
         // Mark startup time (time from function call to process spawn)
         timer.mark('startup');
-        if (!interactive && piped && child.stdout) {
+        let budgetKilled = false;
+        let budgetKillTimer;
+        if (!interactive && tapStdout && child.stdout) {
+            // TEE the child's stdout back to the parent's so the user still sees
+            // output (mirrors stdio:'inherit') while we tap the same stream for usage.
             child.stdout.pipe(process.stdout);
+            // Tap the same stream for budget usage events without consuming the pipe
+            // (a 'data' listener and .pipe() both receive every chunk). Kill on breach.
+            if (watcherState) {
+                let pendingLine = '';
+                child.stdout.on('data', (chunk) => {
+                    const { events, rest } = watcherState.extract(chunk.toString('utf-8'), pendingLine);
+                    pendingLine = rest;
+                    for (const ev of events)
+                        watcherState.watcher.feedUsage(ev);
+                    if (watcherState.watcher.breached() && !budgetKilled) {
+                        budgetKilled = true;
+                        process.stderr.write(`[budget] hard cap exceeded — terminating ${options.agent} run\n`);
+                        child.kill('SIGTERM');
+                        budgetKillTimer = setTimeout(() => child.kill('SIGKILL'), 5000);
+                    }
+                });
+            }
         }
         let stderrBuffer = '';
         const STDERR_BUFFER_CAP = 64 * 1024;
@@ -663,11 +753,94 @@ async function spawnAgent(options) {
         child.on('close', (code) => {
             if (timeoutTimer)
                 clearTimeout(timeoutTimer);
-            timer.end({ exitCode: code ?? 0, status: code === 0 ? 'success' : 'failed' });
-            resolve({ exitCode: code ?? 0, stderr: stderrBuffer });
+            // Clear the budget-kill SIGKILL escalation timer (mirror the --timeout
+            // timer cleanup) so a programmatic caller reusing execAgent (the #332 loop
+            // driver) never sees a stray 5s kill event fire after the child has exited.
+            if (budgetKillTimer)
+                clearTimeout(budgetKillTimer);
+            // Record final spend to the shared ledger (issue #346). Best-effort: a
+            // ledger write must never mask the run's own outcome.
+            if (watcherState) {
+                try {
+                    watcherState.finalize();
+                }
+                catch { /* ledger write is non-critical */ }
+                // Release the watcher's references / stop accepting events (symmetry).
+                try {
+                    watcherState.watcher.dispose();
+                }
+                catch { /* dispose is best-effort */ }
+            }
+            // Budget kill resolves with a DISTINCT non-zero exit so CI/headless and
+            // teams/cloud can tell a budget termination apart from a normal failure.
+            const exitCode = budgetKilled ? BUDGET_KILL_EXIT_CODE : (code ?? 0);
+            timer.end({ exitCode, status: budgetKilled ? 'budget_killed' : code === 0 ? 'success' : 'failed' });
+            resolve({ exitCode, stderr: stderrBuffer });
         });
     });
 }
+/** Exit code spawnAgent resolves with when a run is killed for crossing a budget cap. */
+export const BUDGET_KILL_EXIT_CODE = 7;
+/**
+ * Resolve the budget watcher for a run. Returns null (watcher dormant) when no
+ * caps are configured, so non-budget users pay nothing. When caps exist, builds
+ * a live watcher seeded with the day/project spend already on the ledger, plus
+ * a finalize() that appends this run's accumulated spend.
+ */
+async function setupBudgetWatcher(options, cwd, runId) {
+    const interactive = resolveInteractive(options);
+    if (interactive)
+        return null;
+    const [{ resolveBudgetConfig, hasAnyCap }, { makeLiveSpendWatcher, capsFromConfig, extractUsageEvents }, ledger] = await Promise.all([
+        import('./budget/config.js'),
+        import('./budget/enforce.js'),
+        import('./budget/ledger.js'),
+    ]);
+    const cfg = resolveBudgetConfig(cwd);
+    if (!hasAnyCap(cfg))
+        return null;
+    const today = ledger.localDay();
+    const entries = ledger.loadLedger();
+    const caps = capsFromConfig(cfg, {
+        daySpend: ledger.spendForDay(today, entries),
+        projectSpend: ledger.spendForProject(cwd, entries),
+        agentDaySpend: { [options.agent]: ledger.spendForAgentDay(options.agent, today, entries) },
+    });
+    const watcher = makeLiveSpendWatcher({ caps, onBreach: () => { } });
+    // Accumulate per-(model) usage for a clean final ledger record.
+    const seen = [];
+    const model = options.model ?? `${options.agent}-default`;
+    return {
+        watcher,
+        extract: (chunk, pending) => {
+            const res = extractUsageEvents(chunk, pending, model, options.agent);
+            for (const ev of res.events) {
+                seen.push({
+                    model: ev.model ?? model,
+                    usage: {
+                        inputTokens: ev.inputTokens,
+                        outputTokens: ev.outputTokens,
+                        cacheReadTokens: ev.cacheReadTokens,
+                        cacheCreationTokens: ev.cacheCreationTokens,
+                    },
+                });
+            }
+            return res;
+        },
+        finalize: () => {
+            for (const s of seen) {
+                ledger.recordSpend({
+                    runId,
+                    agent: options.agent,
+                    project: cwd,
+                    model: s.model,
+                    usage: s.usage,
+                    source: 'run',
+                });
+            }
+        },
+    };
+}
 /**
  * Patterns that indicate a rate/usage limit. Matching is intentionally broad
  * because providers phrase these differently -- Anthropic uses "5-hour limit"
@@ -733,6 +906,21 @@ export async function runWithFallback(options) {
     ];
     let prevAgent;
     let prevSessionId;
+    // Workflow capability scoping only takes effect on claude (buildExecCommand
+    // guards `--tools` / `--mcp-config` / `--strict-mcp-config` on agent==='claude').
+    // A fallback to any non-claude agent would run with NONE of that scoping — the
+    // declared sandbox silently evaporates. Warn loudly so a rate-limit handoff to
+    // an unscoped agent is never silent (issue #324 fail-open).
+    const scopingActive = (options.toolsRestrict && options.toolsRestrict.length > 0)
+        || !!options.mcpConfigPath;
+    if (scopingActive) {
+        const unscoped = options.fallback.filter(f => f.agent !== 'claude').map(f => f.agent);
+        if (unscoped.length > 0) {
+            process.stderr.write(`[agents] WARNING: workflow tool/MCP scoping is enforced on claude only. ` +
+                `Fallback agent(s) ${[...new Set(unscoped)].join(', ')} would run UNSCOPED ` +
+                `(no --tools / --strict-mcp-config restriction) if claude hits a rate limit.\n`);
+        }
+    }
     for (let i = 0; i < chain.length; i++) {
         const { agent, version } = chain[i];
         const pinnedSessionId = agent === 'claude' ? randomUUID() : undefined;

package/dist/lib/git.d.ts CHANGED Viewed

@@ -1,3 +1,21 @@
+/**
+ * Validate that a clone/pull source uses a safe git transport before it is
+ * handed to `git`.
+ *
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
+ * beginning with `-` is parsed by `git` as a command-line flag (option
+ * injection). We therefore allow only:
+ *   - `https://`                         (encrypted + authenticated)
+ *   - `ssh://` and SCP-style `git@host:path` / `host:path`
+ *   - local filesystem paths (callers handle these before reaching `git clone`)
+ *
+ * Pure string inspection — no filesystem or platform calls — so it behaves
+ * identically on Linux, macOS, and Windows.
+ *
+ * @throws Error if the source uses a disallowed transport.
+ */
+export declare function assertSafeGitTransport(source: string): void;
 /** Parsed representation of a git source string (GitHub, generic URL, or local path). */
 export interface GitSource {
     type: 'github' | 'url' | 'local';

package/dist/lib/git.js CHANGED Viewed

@@ -11,17 +11,76 @@ import * as path from 'path';
 import { IS_WINDOWS, isWindowsAbsolutePath } from './platform/index.js';
 import { getPackageLocalPath } from './state.js';
 import { DEFAULT_SYSTEM_REPO, systemRepoSlug } from './types.js';
+/**
+ * Validate that a clone/pull source uses a safe git transport before it is
+ * handed to `git`.
+ *
+ * Git's remote-helper transports (`ext::`, `fd::`, …) execute arbitrary
+ * commands at clone time, `file://`/`git://` are unauthenticated, and a source
+ * beginning with `-` is parsed by `git` as a command-line flag (option
+ * injection). We therefore allow only:
+ *   - `https://`                         (encrypted + authenticated)
+ *   - `ssh://` and SCP-style `git@host:path` / `host:path`
+ *   - local filesystem paths (callers handle these before reaching `git clone`)
+ *
+ * Pure string inspection — no filesystem or platform calls — so it behaves
+ * identically on Linux, macOS, and Windows.
+ *
+ * @throws Error if the source uses a disallowed transport.
+ */
+export function assertSafeGitTransport(source) {
+    const s = source.trim();
+    // A leading dash is interpreted by git as an option, not a source.
+    if (s.startsWith('-')) {
+        throw new Error(`Refusing to use git source "${source}": a source starting with "-" is interpreted as a git option.`);
+    }
+    // Remote-helper transports look like "<name>::…" (ext::, fd::, …). SCP-style
+    // "git@host:path" uses a single ":" and is intentionally not matched here.
+    const helper = s.match(/^[a-zA-Z][a-zA-Z0-9+.-]*::/);
+    if (helper) {
+        throw new Error(`Refusing to use git source "${source}": git remote-helper transports (ext::, fd::, …) are not allowed.`);
+    }
+    // Explicit "<scheme>://" URLs: permit only https and ssh.
+    const scheme = s.match(/^([a-zA-Z][a-zA-Z0-9+.-]*):\/\//);
+    if (scheme) {
+        const name = scheme[1].toLowerCase();
+        if (name !== 'https' && name !== 'ssh') {
+            throw new Error(`Refusing to use git source "${source}": "${name}://" is not an allowed transport (use https:// or ssh://).`);
+        }
+    }
+    // No scheme -> SCP-style SSH ("git@host:path") or a local path; both safe.
+}
+/**
+ * Whether installing a cloned/pulled repo's `.githooks/` is enabled.
+ *
+ * Installing hooks wires those scripts into `.git/hooks/`, so `git` EXECUTES
+ * them on the next commit/checkout/merge. A repo added via `agents repo add
+ * <source>` is untrusted, so auto-installing its hooks is remote code
+ * execution. We require explicit opt-in via `AGENTS_ENABLE_GITHOOKS=1`.
+ */
+function githooksEnabled() {
+    const v = process.env.AGENTS_ENABLE_GITHOOKS;
+    return v === '1' || v === 'true';
+}
 /**
  * Install hooks from `.githooks/` by symlinking each entry into `.git/hooks/`.
  *
- * Why: `git config core.hooksPath` is a known sandbox-escape vector and is
- * blocked by some sandboxed environments (e.g. Claude Code). Symlinks inside
- * `.git/hooks/` sidestep that restriction entirely -- Git runs them the same way.
+ * Gated behind `AGENTS_ENABLE_GITHOOKS=1` (see {@link githooksEnabled}) because
+ * the hooks run code on git operations and the source repo may be untrusted.
+ *
+ * Why symlinks rather than `git config core.hooksPath`: `core.hooksPath` is a
+ * known sandbox-escape vector and is blocked by some sandboxed environments
+ * (e.g. Claude Code). Symlinks inside `.git/hooks/` run the same way.
  */
 function installGithooksSymlinks(repoDir) {
     const githooksDir = path.join(repoDir, '.githooks');
     if (!fs.existsSync(githooksDir))
         return;
+    if (!githooksEnabled()) {
+        console.error(`Skipped installing git hooks from ${githooksDir} (they run code on git operations).\n` +
+            `  Set AGENTS_ENABLE_GITHOOKS=1 to enable hooks for repos you trust.`);
+        return;
+    }
     const hooksDir = path.join(repoDir, '.git', 'hooks');
     fs.mkdirSync(hooksDir, { recursive: true });
     for (const name of fs.readdirSync(githooksDir)) {
@@ -121,7 +180,9 @@ export function parseSource(source) {
                 ref: ref || 'main',
             };
         }
-        // Generic URL
+        // Generic URL -- must be an encrypted, authenticated transport
+        // (rejects http://, file://, git://, ext::, and leading "-").
+        assertSafeGitTransport(cleanSource);
         return {
             type: 'url',
             url: cleanSource.endsWith('.git') ? cleanSource : `${cleanSource}.git`,
@@ -183,6 +244,7 @@ export async function cloneOrPull(source, targetDir) {
         const log = await repoGit.log({ maxCount: 1 });
         return { isNew: false, commit: log.latest?.hash.slice(0, 8) || 'unknown' };
     }
+    assertSafeGitTransport(source.url);
     fs.mkdirSync(targetDir, { recursive: true });
     await git.clone(source.url, targetDir);
     const repoGit = simpleGit(targetDir);
@@ -364,6 +426,7 @@ export async function cloneIntoExisting(source, targetDir) {
     const git = simpleGit();
     const tempDir = path.join(targetDir, '.git-clone-temp');
     try {
+        assertSafeGitTransport(parsed.url);
         // Clone to temp directory
         fs.mkdirSync(tempDir, { recursive: true });
         await git.clone(parsed.url, tempDir);

package/dist/lib/loop.d.ts ADDED Viewed

@@ -0,0 +1,145 @@
+/**
+ * Autonomous loop driver (issue #332).
+ *
+ * Re-injects an entrypoint each iteration until a stop condition is met. The
+ * driver is the deterministic skeleton; the entrypoint inside stays dynamic (it
+ * can spawn subagents freely). Every guard — `max_iterations`, `budget`, the
+ * `until: signal` condition, SIGINT/SIGTERM — lives OUTSIDE the agent, so the
+ * agent cannot vote past a kill-switch (the standard answer to runaway-loop and
+ * runaway-cost failure modes; see docs/07-entrypoints-and-loops.md).
+ *
+ * Structure mirrors the teams supervisor (`runSupervisor` in teams/supervisor.ts):
+ * a bounded for-loop with a hard cap, a SIGINT/SIGTERM trap that flips a stop
+ * flag, a per-iteration guard check, an interval sleep, and a typed `stoppedBy`
+ * union for the exit reason.
+ *
+ * Token accounting: the budget cap is a TOKEN hard-cap, enforced after each
+ * turn from the usage events parsed off the agent's stream-json output. Token
+ * extraction reuses `extractUsageEvents` from budget/enforce.ts (read-only
+ * import) rather than re-implementing the per-provider parsing.
+ */
+import type { AgentId } from './types.js';
+import type { ExecOptions } from './exec.js';
+import { type Checkpoint } from './checkpoint.js';
+/** Loop block config (docs/07-entrypoints-and-loops.md → "The loop block"). */
+export interface LoopConfig {
+    /** Stop condition. `signal` reads loop-signal.json; absence is fail-closed. */
+    until?: 'signal';
+    /** Hard cap on iterations. */
+    maxIterations?: number;
+    /** Token hard-cap, enforced outside the agent. */
+    budget?: number;
+    /** Delay between iterations: "0" back-to-back, "30m" paces. */
+    interval?: string;
+}
+/** The loop-signal.json contract the entrypoint writes each iteration. */
+export interface LoopSignal {
+    continue: boolean;
+    reason?: string;
+}
+/** Why the loop stopped. Mirrors the teams supervisor exit reasons. */
+export type LoopStoppedBy = 'condition-met' | 'budget' | 'stalled' | 'max' | 'signal' | 'error';
+/** Result of a loop run. */
+export interface LoopResult {
+    /** Iterations actually executed. */
+    iterations: number;
+    stoppedBy: LoopStoppedBy;
+    elapsedMs: number;
+    /** Cumulative tokens consumed across all iterations. */
+    tokens: number;
+    /** Last loop-signal read, if any. */
+    lastSignal?: LoopSignal;
+}
+/** What a single iteration's run function returns. */
+export interface IterationResult {
+    exitCode: number;
+    /** Tokens consumed this iteration (input + output + cache). */
+    tokens: number;
+}
+/** Per-iteration run function — the injectable seam that makes the driver testable. */
+export type RunIteration = (options: ExecOptions) => Promise<IterationResult>;
+/** Context the driver needs that isn't part of ExecOptions. */
+export interface LoopContext {
+    runId: string;
+    runDir: string;
+    agent: AgentId;
+    version?: string;
+    /** Iteration to start at (1 for a fresh run, checkpoint.iteration+1 for a resume). */
+    startIteration?: number;
+    /** Tokens already consumed before this driver started (carried across a resume). */
+    startTokens?: number;
+    /**
+     * On a resume, the killed run's LAST iteration session id. The first resumed
+     * iteration `/continue`s from it to thread conversation memory forward.
+     * Undefined on a fresh run (iteration 1 mints its own id, no prior to continue).
+     */
+    sessionId?: string;
+}
+/** Dependency seams for testing. */
+export interface LoopDeps {
+    /** Per-iteration runner. Defaults to a token-capturing spawn (defaultRunIteration). */
+    runIteration?: RunIteration;
+    /** Sleep function (ms). Defaults to setTimeout-backed. Injectable so tests don't wait. */
+    sleep?: (ms: number) => Promise<void>;
+    /** Checkpoint writer. Defaults to writeCheckpoint. */
+    writeCheckpoint?: (c: Checkpoint) => void;
+}
+/** Path to a run's loop-signal.json. */
+export declare function loopSignalPath(runDir: string): string;
+/**
+ * Build the prompt for iteration >= 2 so the agent CONTINUES the prior
+ * iteration's conversation instead of starting fresh.
+ *
+ * This reuses the repo's established cross-process Claude-continuity mechanism —
+ * the `/continue <id>` skill (see `buildFallbackPrompt` in exec.ts, which hands
+ * a rate-limit successor `/continue ${prevSessionId}`). The skill loads the
+ * prior transcript via `agents sessions <id>`, so continuity does NOT depend on
+ * the provider's native session being "active"; it reads the transcript off
+ * disk. That is why each loop iteration can safely pin a FRESH session id (the
+ * `--session-id` flag CREATES a session — re-passing one errors "Session ID
+ * already in use") while still threading the conversation forward via the
+ * prior id.
+ *
+ * The original entrypoint is re-appended after the continue directive so the
+ * agent both recalls the prior turn AND knows what to do this iteration.
+ */
+export declare function buildLoopContinuePrompt(prevSessionId: string, entrypoint: string): string;
+/**
+ * Resolve a loop interval string to milliseconds. `"0"` is an explicit
+ * back-to-back run (0ms). Any other string must parse via parseTimeout
+ * (e.g. "30m", "1h"); an unparseable value (e.g. "30s", "5", "abc") is a
+ * configuration error and must NOT silently coalesce to 0 (which would run the
+ * loop full-speed on a typo). Throws on bad input; validate at config build
+ * time (validateLoopInterval) so the error surfaces before the loop starts.
+ */
+export declare function parseLoopInterval(interval: string | undefined): number;
+/**
+ * Read and parse loop-signal.json. Returns null when the file is absent or
+ * unparseable — the caller treats null as fail-closed (continue:false).
+ */
+export declare function readLoopSignal(runDir: string): LoopSignal | null;
+/** Delete loop-signal.json so a stale signal never carries into the next iteration. */
+export declare function clearLoopSignal(runDir: string): void;
+/**
+ * Default per-iteration runner: spawn the agent, tee stdout, and sum token usage
+ * off the stream. This is a purpose-built token-capturing spawn for the loop's
+ * budget guard, not a re-implementation of exec's fallback/budget machinery —
+ * it reuses `buildExecCommand` / `buildExecEnv` (the canonical command/env
+ * builders) and `extractUsageEvents` (the canonical stream parser). The agent
+ * is forced to JSON/headless so the usage stream is parseable.
+ */
+export declare function defaultRunIteration(options: ExecOptions): Promise<IterationResult>;
+/**
+ * Run the autonomous loop. Returns when a guard trips, the until-condition is
+ * met, the iteration cap is reached, or a signal arrives.
+ *
+ * stoppedBy semantics:
+ *   - `condition-met` — until=signal and the signal said stop (continue:false
+ *     OR the file was absent/corrupt → fail-closed).
+ *   - `budget`        — cumulative tokens crossed the budget cap (checked after
+ *     each turn, outside the agent).
+ *   - `max`           — ran maxIterations iterations without any earlier stop.
+ *   - `signal`        — SIGINT/SIGTERM arrived; checkpoint is written before exit.
+ *   - `error`         — an iteration threw or exited non-zero.
+ */
+export declare function runLoop(execOptions: ExecOptions, loop: LoopConfig, ctx: LoopContext, deps?: LoopDeps): Promise<LoopResult>;