npm - @martinloop/mcp - Versions diffs - 0.3.0 → 0.3.1 - Mend

@martinloop/mcp 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +4 -4
package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/server.js +69 -7
package/dist/tools/doctor.d.ts +27 -0
package/dist/tools/doctor.js +39 -11
package/dist/tools/get-run.d.ts +2 -1
package/dist/tools/get-run.js +1 -0
package/dist/tools/get-verification-results.d.ts +2 -1
package/dist/tools/get-verification-results.js +1 -0
package/dist/tools/plan.js +4 -2
package/dist/tools/preflight.d.ts +27 -0
package/dist/tools/preflight.js +44 -20
package/dist/tools/run-dossier.d.ts +2 -1
package/dist/tools/run-dossier.js +1 -0
package/dist/tools/run-loop.d.ts +5 -1
package/dist/tools/run-loop.js +20 -8
package/dist/tools/run-store.js +67 -15
package/dist/tools/tool-support.d.ts +2 -0
package/dist/tools/tool-support.js +49 -13
package/dist/tools/workflow-governance.d.ts +19 -3
package/dist/tools/workflow-governance.js +107 -55
package/dist/vendor/adapters/claude-cli.d.ts +20 -3
package/dist/vendor/adapters/claude-cli.js +193 -33
package/dist/vendor/adapters/cli-bridge.d.ts +45 -0
package/dist/vendor/adapters/cli-bridge.js +107 -39
package/dist/vendor/adapters/codex-launcher.d.ts +32 -0
package/dist/vendor/adapters/codex-launcher.js +409 -118
package/dist/vendor/adapters/openai-compatible.js +8 -2
package/dist/vendor/adapters/runtime-support.js +1 -0
package/dist/vendor/adapters/stub-direct-provider.js +3 -0
package/dist/vendor/adapters/verifier-only.d.ts +2 -0
package/dist/vendor/adapters/verifier-only.js +9 -3
package/dist/vendor/core/context-integrity.js +28 -3
package/dist/vendor/core/grounding.d.ts +1 -0
package/dist/vendor/core/grounding.js +6 -2
package/dist/vendor/core/index.d.ts +1 -0
package/dist/vendor/core/index.js +25 -6
package/dist/vendor/core/leash.js +85 -8
package/dist/vendor/core/persistence/integrity.d.ts +1 -1
package/dist/vendor/core/persistence/integrity.js +15 -6
package/dist/workflow-state.d.ts +9 -0
package/dist/workflow-state.js +46 -3
package/package.json +2 -2
package/server.json +2 -2

package/dist/vendor/adapters/claude-cli.js CHANGED Viewed

@@ -11,7 +11,8 @@
  *
  * MCP tools and integration tests use the same factories.
  */
-import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
+import { readGitExecutionArtifacts, resolveGitRepositoryRoot, runSubprocess, runVerification } from "./cli-bridge.js";
+import { buildCodexExecArgs } from "./codex-launcher.js";
 import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
 // ---------------------------------------------------------------------------
 // Cost estimation
@@ -56,15 +57,21 @@ function extractUsage(parsed, modelLabel) {
     const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
     const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
         { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
-    const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
-        (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
-        (tokensOut / 1000) * pricing.outputPer1K;
+    // Prefer Claude's own authoritative total_cost_usd (present on the final
+    // `result` event in json/stream-json output) over our pricing-table estimate,
+    // which can drift from real billed cost (cache discounts, surcharges, etc).
+    const hasAuthoritativeCost = typeof parsed.total_cost_usd === "number";
+    const actualUsd = hasAuthoritativeCost
+        ? parsed.total_cost_usd
+        : (promptTokens / 1000) * pricing.inputPer1K +
+            (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
+            (tokensOut / 1000) * pricing.outputPer1K;
     return normalizeUsage({
         actualUsd: Number(actualUsd.toFixed(6)),
         tokensIn,
         tokensOut,
         cachedInputTokens,
-        provenance: "actual",
+        provenance: hasAuthoritativeCost ? "actual" : "estimated",
         providerSettlement: {
             providerId: "claude",
             model: modelLabel ?? "claude",
@@ -232,6 +239,86 @@ function extractGeminiJsonResult(stdout, modelLabel) {
         })
     };
 }
+function createStreamingUsageInspector(capUsd, modelLabel) {
+    const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
+        { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
+    let buffer = "";
+    let cumulativeUsd = 0;
+    let tokensIn = 0;
+    let tokensOut = 0;
+    let turns = 0;
+    let finalResult;
+    const ingestLine = (line, terminate) => {
+        const trimmed = line.trim();
+        if (!trimmed) {
+            return;
+        }
+        let event;
+        try {
+            event = JSON.parse(trimmed);
+        }
+        catch {
+            return;
+        }
+        if (event.type === "assistant" && event.message?.usage) {
+            const usage = event.message.usage;
+            const turnTokensIn = (usage.input_tokens ?? usage.inputTokens ?? 0) +
+                (usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? 0) +
+                (usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? 0);
+            const turnTokensOut = usage.output_tokens ?? usage.outputTokens ?? 0;
+            tokensIn += turnTokensIn;
+            tokensOut += turnTokensOut;
+            turns += 1;
+            cumulativeUsd += (turnTokensIn / 1000) * pricing.inputPer1K + (turnTokensOut / 1000) * pricing.outputPer1K;
+            if (capUsd > 0 && cumulativeUsd > capUsd) {
+                terminate(`Streaming usage cap exceeded after ${String(turns)} turn(s): cumulative cost ~$${cumulativeUsd.toFixed(4)} ` +
+                    `surpassed the per-attempt cap $${capUsd.toFixed(4)} (derived from remaining loop budget). ` +
+                    `Subprocess terminated to bound runaway overspend.`);
+            }
+            return;
+        }
+        if (event.type === "result") {
+            finalResult = event;
+        }
+    };
+    return {
+        onChunk: (chunk, terminate) => {
+            buffer += chunk.toString("utf8");
+            let newlineIndex = buffer.indexOf("\n");
+            while (newlineIndex !== -1) {
+                const line = buffer.slice(0, newlineIndex);
+                buffer = buffer.slice(newlineIndex + 1);
+                ingestLine(line, terminate);
+                newlineIndex = buffer.indexOf("\n");
+            }
+        },
+        snapshot: () => ({ cumulativeUsd, tokensIn, tokensOut, turns, ...(finalResult ? { finalResult } : {}) })
+    };
+}
+/**
+ * Parses Claude's `stream-json` output (one JSON object per line) and returns
+ * the final `result` event, which carries the same `result`/`usage`/
+ * `total_cost_usd` fields as the single-blob `json` format.
+ */
+function parseStreamJsonResult(stdout) {
+    let lastResult;
+    for (const rawLine of stdout.split(/\r?\n/u)) {
+        const line = rawLine.trim();
+        if (!line) {
+            continue;
+        }
+        try {
+            const event = JSON.parse(line);
+            if (event.type === "result") {
+                lastResult = event;
+            }
+        }
+        catch {
+            // Ignore non-JSON / partial lines.
+        }
+    }
+    return lastResult;
+}
 // ---------------------------------------------------------------------------
 // Structural failure hint detection
 //
@@ -306,12 +393,45 @@ export function createAgentCliAdapter(options) {
             }
             const args = options.argsBuilder(prompt);
             const stdinData = options.stdinBuilder?.(prompt);
+            // Live cumulative-cost circuit breaker: a single attempt should never be
+            // allowed to spend more than the loop has left. `--output-format json`
+            // only reports usage once the process exits, so for `stream-json` we
+            // watch per-turn usage events as they arrive and kill the subprocess the
+            // instant projected spend crosses what remains — bounding the worst case
+            // to roughly one turn's overshoot rather than the entire runaway session.
+            const streamingUsage = options.streamingUsageCap && request.context.remainingBudgetUsd > 0
+                ? createStreamingUsageInspector(request.context.remainingBudgetUsd, options.model ?? options.command)
+                : undefined;
             const agentResult = await runSubprocess(options.command, args, {
                 cwd: workingDirectory,
                 timeoutMs,
                 spawnImpl: options.spawnImpl,
-                ...(stdinData === undefined ? {} : { stdinData })
+                ...(stdinData === undefined ? {} : { stdinData }),
+                ...(streamingUsage ? { onStdoutChunk: streamingUsage.onChunk } : {})
             });
+            if (agentResult.terminationReason) {
+                const snapshot = streamingUsage?.snapshot();
+                const cumulativeUsd = snapshot?.cumulativeUsd ?? 0;
+                return {
+                    status: "failed",
+                    summary: `${options.command} subprocess terminated mid-run by the budget circuit breaker. ${agentResult.terminationReason}`,
+                    usage: normalizeUsage({
+                        actualUsd: Number(cumulativeUsd.toFixed(6)),
+                        estimatedUsd: Number(cumulativeUsd.toFixed(6)),
+                        tokensIn: snapshot?.tokensIn ?? 0,
+                        tokensOut: snapshot?.tokensOut ?? 0,
+                        provenance: "estimated"
+                    }),
+                    verification: {
+                        passed: false,
+                        summary: "Subprocess terminated by the streaming budget circuit breaker before verification could run."
+                    },
+                    failure: {
+                        message: agentResult.terminationReason,
+                        classHint: "budget_pressure"
+                    }
+                };
+            }
             if (agentResult.timedOut) {
                 return {
                     status: "failed",
@@ -346,11 +466,15 @@ export function createAgentCliAdapter(options) {
                     }
                 };
             }
-            // Parse JSON output if the CLI supports it (Claude with --output-format json)
+            // Parse JSON output if the CLI supports it. `stream-json` emits one JSON
+            // object per line — the final `result` event carries the same
+            // `result`/`usage`/`total_cost_usd` fields as single-blob `json` output.
             let parsed;
             if (supportsJsonOutput) {
                 try {
-                    parsed = JSON.parse(agentResult.stdout);
+                    parsed = options.streamingUsageCap
+                        ? parseStreamJsonResult(agentResult.stdout)
+                        : JSON.parse(agentResult.stdout);
                 }
                 catch {
                     // Fall through to plain-text handling
@@ -362,6 +486,26 @@ export function createAgentCliAdapter(options) {
             const geminiJsonResult = !supportsJsonOutput && options.command === "gemini"
                 ? extractGeminiJsonResult(agentResult.stdout, options.model)
                 : undefined;
+            const producedStructuredCompletion = parsed?.result !== undefined ||
+                codexJsonlResult !== undefined ||
+                geminiJsonResult !== undefined;
+            if (agentResult.exitCode !== 0 && !producedStructuredCompletion) {
+                const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr || agentResult.stdout, agentResult.exitCode);
+                return {
+                    status: "failed",
+                    summary: `${options.command} subprocess exited before verifier execution.`,
+                    usage: normalizeUsage({
+                        actualUsd: 0,
+                        tokensIn: 0,
+                        tokensOut: 0,
+                        provenance: "unavailable"
+                    }),
+                    verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
+                    failure: {
+                        message: failureMessage
+                    }
+                };
+            }
             const agentText = codexJsonlResult?.summary ??
                 geminiJsonResult?.summary ??
                 parsed?.result ??
@@ -405,21 +549,22 @@ export function createAgentCliAdapter(options) {
             const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
             // Check for zero-diff (agent ran but made no file changes)
             const repoRoot = request.context.repoRoot;
+            const gitRepoRoot = repoRoot ? resolveGitRepositoryRoot(repoRoot) : undefined;
             let noDiff = false;
-            if (repoRoot) {
-                noDiff = await checkNoDiff(repoRoot, options.spawnImpl);
+            if (gitRepoRoot) {
+                noDiff = await checkNoDiff(gitRepoRoot, options.spawnImpl);
             }
             // Extract structured errors from stderr/stdout for better failure context
             const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
-            const executionArtifacts = repoRoot
-                ? await readGitExecutionArtifacts(repoRoot, 5000, options.spawnImpl)
+            const executionArtifacts = gitRepoRoot
+                ? await readGitExecutionArtifacts(gitRepoRoot, 5000, options.spawnImpl)
                 : undefined;
             // Scope contract enforcement: check touched files against allowedPaths/deniedPaths
             let scopeViolations = [];
             const scopeCtx = request.context;
-            if (repoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
+            if (gitRepoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
                 const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
-                    cwd: repoRoot,
+                    cwd: gitRepoRoot,
                     timeoutMs: 5000,
                     spawnImpl: options.spawnImpl
                 });
@@ -492,7 +637,12 @@ export function createAgentCliAdapter(options) {
                 }
                 // Reset tracked files to HEAD so next attempt starts from clean state
                 try {
-                    await runSubprocess("git", ["restore", "--staged", "--worktree", "."], { cwd: repoRoot, timeoutMs: 5000 });
+                    if (gitRepoRoot) {
+                        await runSubprocess("git", ["restore", "--staged", "--worktree", "."], {
+                            cwd: gitRepoRoot,
+                            timeoutMs: 5000
+                        });
+                    }
                 }
                 catch {
                     // Non-fatal
@@ -540,10 +690,16 @@ export function createAgentCliAdapter(options) {
 // Pre-configured: Claude CLI
 // ---------------------------------------------------------------------------
 /**
- * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
+ * Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
  *
- * The --output-format json flag causes Claude CLI to return structured JSON
- * including real token usage counts, enabling accurate cost tracking.
+ * `stream-json` emits one JSON event per line — including per-turn usage on
+ * each `assistant` message and a final `result` event carrying the same
+ * `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
+ * MartinLoop can both (a) recover real token usage/cost as before, and
+ * (b) watch cumulative spend live and self-terminate the subprocess the
+ * moment it crosses the remaining per-attempt budget (see
+ * `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
+ * discovering an overspend after the whole process has already exited.
  *
  * Requires the Claude Code CLI to be installed and authenticated:
  *   https://docs.anthropic.com/claude-code
@@ -560,10 +716,12 @@ export function createClaudeCliAdapter(options = {}) {
         timeoutMs: options.timeoutMs,
         verifyTimeoutMs: options.verifyTimeoutMs,
         supportsJsonOutput: true,
+        streamingUsageCap: true,
         spawnImpl: options.spawnImpl,
         argsBuilder: (_prompt) => [
             "--output-format",
-            "json",
+            "stream-json",
+            "--verbose",
             "--print",
             "--dangerously-skip-permissions",
             ...modelArgs,
@@ -586,12 +744,12 @@ export function createClaudeCliAdapter(options = {}) {
  *   npm install -g @openai/codex
  */
 export function createCodexCliAdapter(options = {}) {
-    const modelArgs = options.model ? ["--model", options.model] : [];
     const extraArgs = options.extraArgs ?? [];
     const sandbox = options.sandbox ?? "workspace-write";
     const workingDirectory = options.workingDirectory ?? process.cwd();
+    const command = options.command ?? "codex";
     return createAgentCliAdapter({
-        command: "codex",
+        command,
         adapterIdSuffix: "codex",
         model: options.model ?? "codex",
         label: options.label ?? "Codex CLI adapter",
@@ -600,19 +758,13 @@ export function createCodexCliAdapter(options = {}) {
         verifyTimeoutMs: options.verifyTimeoutMs,
         supportsJsonOutput: false,
         spawnImpl: options.spawnImpl,
-        argsBuilder: () => [
-            "exec",
-            "--cd",
+        argsBuilder: () => buildCodexExecArgs({
             workingDirectory,
-            "--sandbox",
             sandbox,
-            "--json",
-            "--color",
-            "never",
-            ...modelArgs,
-            ...extraArgs,
-            "-"
-        ],
+            ...(options.model ? { model: options.model } : {}),
+            extraArgs,
+            mode: "prompt"
+        }),
         stdinBuilder: (prompt) => prompt
     });
 }
@@ -815,7 +967,15 @@ function redactSecretsForPrompt(input) {
     return input
         .replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
         .replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
-        .replace(/\bghp_[A-Za-z0-9_]{8,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bghp_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bgithub_pat_[A-Za-z0-9_]{20,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\b(?:gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bAKIA[0-9A-Z]{16}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\b(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*[^\s"'`]+/giu, "AWS_SECRET_ACCESS_KEY=[REDACTED_SECRET]")
+        .replace(/\bxox[baprs]-[A-Za-z0-9-]{10,}\b/giu, "[REDACTED_SECRET]")
+        .replace(/\bAIza[0-9A-Za-z_-]{30,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/-----BEGIN(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----[\s\S]*?-----END(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----/gu, "[REDACTED_SECRET]")
+        .replace(/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/gu, "[REDACTED_SECRET]")
         .replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
 }
 function extractStructuredErrors(stderr, stdout) {

package/dist/vendor/adapters/cli-bridge.d.ts CHANGED Viewed

@@ -6,16 +6,60 @@ export interface SubprocessResult {
     stdout: string;
     stderr: string;
     timedOut: boolean;
+    /**
+     * True when the subprocess was terminated early because its combined
+     * stdout+stderr exceeded `maxOutputBytes` — a circuit breaker against
+     * runaway agent sessions that would otherwise burn far more cost/tokens
+     * than the loop budget allows before MartinLoop can observe the final
+     * (post-hoc) usage report. See `claude-cli.ts` execute() for how this
+     * cap is derived from the remaining loop budget.
+     */
+    outputCapped: boolean;
+    /**
+     * Set to the inspector's reason string when an `onStdoutChunk` callback
+     * requested early termination (e.g. a streaming usage/cost circuit breaker
+     * that detected the agent is on track to blow through its budget). Distinct
+     * from `outputCapped`, which fires on raw byte volume rather than parsed
+     * semantic content.
+     */
+    terminationReason?: string;
+    launched: boolean;
 }
 export interface VerificationOutcome {
     passed: boolean;
     summary: string;
+    steps: VerificationStepOutcome[];
+    warnings?: string[];
+}
+export interface VerificationStepOutcome {
+    command: string;
+    launched: boolean;
+    exitCode?: number;
+    timedOut: boolean;
+    fastFail: boolean;
+    detail?: string;
 }
 export declare function runSubprocess(command: string, args: string[], options: {
     cwd: string;
     timeoutMs: number;
     spawnImpl?: SpawnLike;
     stdinData?: string;
+    /**
+     * Optional circuit breaker: terminate the subprocess once combined
+     * stdout+stderr bytes exceed this threshold, instead of waiting for
+     * natural completion. Used to bound runaway agent-CLI cost/token spend
+     * that can't otherwise be observed until the process exits.
+     */
+    maxOutputBytes?: number;
+    /**
+     * Optional semantic inspector invoked with each raw stdout chunk. Used to
+     * parse streaming structured output (e.g. Claude's `stream-json` usage
+     * events) and request early termination via the supplied `terminate`
+     * callback once a semantic threshold (such as cumulative cost) is
+     * crossed — well before the subprocess would exit naturally and report
+     * a runaway final usage figure.
+     */
+    onStdoutChunk?: (chunk: Buffer, terminate: (reason: string) => void) => void;
 }): Promise<SubprocessResult>;
 export declare function runVerification(commands: string[], cwd: string, timeoutMs: number, verificationStack?: Array<{
     command: string;
@@ -27,6 +71,7 @@ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: n
     diffStats?: ReturnType<typeof diffStatsFromNumstat>;
 }>;
 export declare function readGitChangedFiles(repoRoot: string, timeoutMs: number, spawnImpl?: SpawnLike): Promise<string[]>;
+export declare function resolveGitRepositoryRoot(workingDirectory: string): string | undefined;
 export interface SpawnPlan {
     command: string;
     args: string[];

package/dist/vendor/adapters/cli-bridge.js CHANGED Viewed

@@ -1,11 +1,15 @@
 import { spawn } from "node:child_process";
-import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
+import { delimiter, dirname, extname, isAbsolute, join, resolve } from "node:path";
 import { existsSync } from "node:fs";
 import { diffStatsFromNumstat } from "./runtime-support.js";
+const gitRepositoryRootCache = new Map();
 export async function runSubprocess(command, args, options) {
     return new Promise((resolve) => {
         let timedOut = false;
+        let outputCapped = false;
+        let terminationReason;
         let settled = false;
+        let outputBytes = 0;
         const stdoutChunks = [];
         const stderrChunks = [];
         const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
@@ -14,7 +18,7 @@ export async function runSubprocess(command, args, options) {
                 return;
             }
             settled = true;
-            resolve(result);
+            resolve({ ...result, timedOut, outputCapped, ...(terminationReason ? { terminationReason } : {}) });
         };
         let proc;
         try {
@@ -27,19 +31,33 @@ export async function runSubprocess(command, args, options) {
         }
         catch (error) {
             const message = error instanceof Error ? error.message : String(error);
-            resolveOnce({
-                exitCode: 1,
-                stdout: "",
-                stderr: message,
-                timedOut: false
-            });
+            resolveOnce({ exitCode: 1, stdout: "", stderr: message, launched: false });
             return;
         }
+        const trackOutput = (chunks, chunk) => {
+            chunks.push(chunk);
+            outputBytes += chunk.byteLength;
+            if (options.maxOutputBytes !== undefined &&
+                !outputCapped &&
+                !timedOut &&
+                outputBytes > options.maxOutputBytes) {
+                outputCapped = true;
+                proc.kill("SIGTERM");
+            }
+        };
+        const terminateEarly = (reason) => {
+            if (terminationReason || timedOut || outputCapped) {
+                return;
+            }
+            terminationReason = reason;
+            proc.kill("SIGTERM");
+        };
         proc.stdout?.on("data", (chunk) => {
-            stdoutChunks.push(chunk);
+            trackOutput(stdoutChunks, chunk);
+            options.onStdoutChunk?.(chunk, terminateEarly);
         });
         proc.stderr?.on("data", (chunk) => {
-            stderrChunks.push(chunk);
+            trackOutput(stderrChunks, chunk);
         });
         proc.stdin?.on("error", (error) => {
             // Some CLIs exit before consuming stdin in tests and on fast-fail paths.
@@ -55,12 +73,7 @@ export async function runSubprocess(command, args, options) {
         }, options.timeoutMs);
         proc.on("error", (error) => {
             clearTimeout(timer);
-            resolveOnce({
-                exitCode: 1,
-                stdout: "",
-                stderr: error.message,
-                timedOut: false
-            });
+            resolveOnce({ exitCode: 1, stdout: "", stderr: error.message, launched: false });
         });
         proc.on("close", (code) => {
             clearTimeout(timer);
@@ -68,7 +81,7 @@ export async function runSubprocess(command, args, options) {
                 exitCode: code ?? 1,
                 stdout: Buffer.concat(stdoutChunks).toString("utf8"),
                 stderr: Buffer.concat(stderrChunks).toString("utf8"),
-                timedOut
+                launched: true
             });
         });
         if (options.stdinData !== undefined && proc.stdin) {
@@ -83,7 +96,7 @@ export async function runSubprocess(command, args, options) {
                         exitCode: 1,
                         stdout: Buffer.concat(stdoutChunks).toString("utf8"),
                         stderr: stdinError.message,
-                        timedOut: false
+                        launched: false
                     });
                 }
             }
@@ -98,9 +111,11 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
         }))
         : commands.map((command) => ({ command, fastFail: true }));
     if (steps.length === 0) {
-        return { passed: true, summary: "No verification commands specified." };
+        return { passed: true, summary: "No verification commands specified.", steps: [] };
     }
     const failedSteps = [];
+    const stepOutcomes = [];
+    const warnings = [];
     for (const step of steps) {
         const parts = splitCommand(step.command);
         const [bin, ...args] = parts;
@@ -108,24 +123,53 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
             continue;
         }
         const result = await runSubprocess(bin, args, { cwd, timeoutMs, spawnImpl });
+        const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
+        stepOutcomes.push({
+            command: step.command,
+            launched: result.launched,
+            exitCode: result.exitCode,
+            timedOut: result.timedOut,
+            fastFail: step.fastFail,
+            ...(detail ? { detail } : {})
+        });
         if (result.timedOut) {
-            return { passed: false, summary: `Verification timed out: ${step.command}` };
+            return {
+                passed: false,
+                summary: `Verification timed out: ${step.command}`,
+                steps: stepOutcomes,
+                ...(warnings.length ? { warnings } : {})
+            };
         }
         if (result.exitCode !== 0) {
-            const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
             const summary = `Verification failed: ${step.command}\n${detail}`;
+            if (!result.launched) {
+                warnings.push(`Verifier never launched: ${step.command}`);
+            }
             if (step.fastFail) {
-                return { passed: false, summary };
+                return { passed: false, summary, steps: stepOutcomes, ...(warnings.length ? { warnings } : {}) };
             }
             failedSteps.push(step.command);
         }
     }
     if (failedSteps.length > 0) {
-        return { passed: false, summary: `Failed steps: ${failedSteps.join(", ")}` };
+        return {
+            passed: false,
+            summary: `Failed steps: ${failedSteps.join(", ")}`,
+            steps: stepOutcomes,
+            ...(warnings.length ? { warnings } : {})
+        };
     }
-    return { passed: true, summary: `All ${String(steps.length)} verification step(s) passed.` };
+    return {
+        passed: true,
+        summary: `All ${String(steps.length)} verification step(s) passed.`,
+        steps: stepOutcomes,
+        ...(warnings.length ? { warnings } : {})
+    };
 }
 export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl) {
+    if (!resolveGitRepositoryRoot(repoRoot)) {
+        return {};
+    }
     const changedFilesResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
     const numstatResult = await runSubprocess("git", ["diff", "--numstat", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
     const changedFiles = changedFilesResult.exitCode === 0
@@ -141,12 +185,48 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
     };
 }
 export async function readGitChangedFiles(repoRoot, timeoutMs, spawnImpl) {
-    const statusResult = await runSubprocess("git", ["status", "-z", "--porcelain=v1", "--untracked-files=all", "--ignore-submodules=all"], { cwd: repoRoot, timeoutMs, spawnImpl });
+    if (!resolveGitRepositoryRoot(repoRoot)) {
+        return [];
+    }
+    const statusResult = await runSubprocess("git", ["status", "-z", "--porcelain=v1", "--untracked-files=all", "--ignore-submodules=all", "--", "."], { cwd: repoRoot, timeoutMs, spawnImpl });
     if (statusResult.exitCode !== 0) {
         return [];
     }
     return parsePorcelainEntries(statusResult.stdout).filter((entry) => typeof entry === "string" && entry.length > 0);
 }
+export function resolveGitRepositoryRoot(workingDirectory) {
+    const resolvedWorkingDirectory = resolve(workingDirectory);
+    const cached = gitRepositoryRootCache.get(resolvedWorkingDirectory);
+    if (cached !== undefined) {
+        return cached ?? undefined;
+    }
+    const visited = [];
+    let current = resolvedWorkingDirectory;
+    while (true) {
+        visited.push(current);
+        const currentCached = gitRepositoryRootCache.get(current);
+        if (currentCached !== undefined) {
+            for (const candidate of visited) {
+                gitRepositoryRootCache.set(candidate, currentCached);
+            }
+            return currentCached ?? undefined;
+        }
+        if (existsSync(resolve(current, ".git"))) {
+            for (const candidate of visited) {
+                gitRepositoryRootCache.set(candidate, current);
+            }
+            return current;
+        }
+        const parent = dirname(current);
+        if (parent === current) {
+            for (const candidate of visited) {
+                gitRepositoryRootCache.set(candidate, null);
+            }
+            return undefined;
+        }
+        current = parent;
+    }
+}
 export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
     if (preserveRawForInjectedSpawn || process.platform !== "win32") {
         return { command, args };
@@ -157,18 +237,16 @@ export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn)
     // Windows can resolve the command itself — this covers cases like `pnpm` where the npm global
     // bin directory is present in the shell PATH but not yet visible to this Node.js process.
     if (resolvedOrUndefined === undefined) {
-        const cmdStr = [quoteWindowsCmdArg(command), ...args.map(quoteWindowsCmdArg)].join(" ");
         return {
             command: process.env.ComSpec || "cmd.exe",
-            args: ["/d", "/c", cmdStr]
+            args: ["/d", "/c", command, ...args]
         };
     }
     const extension = extname(resolvedOrUndefined).toLowerCase();
     if (extension === ".cmd" || extension === ".bat") {
-        const cmdStr = [quoteWindowsCmdArg(resolvedOrUndefined), ...args.map(quoteWindowsCmdArg)].join(" ");
         return {
             command: process.env.ComSpec || "cmd.exe",
-            args: ["/d", "/s", "/c", cmdStr]
+            args: ["/d", "/c", resolvedOrUndefined, ...args]
         };
     }
     if (extension === ".ps1") {
@@ -240,16 +318,6 @@ function windowsPathDirectories() {
         .map((entry) => entry.trim().replace(/^"|"$/g, ""))
         .filter(Boolean);
 }
-function quoteWindowsCmdArg(value) {
-    const normalized = value.replace(/\r?\n/gu, " ");
-    const escaped = normalized
-        .replace(/\^/gu, "^^")
-        .replace(/"/gu, '^"')
-        .replace(/%/gu, "%%")
-        .replace(/!/gu, "^^!")
-        .replace(/[&|<>()]/gu, (match) => `^${match}`);
-    return `"${escaped}"`;
-}
 export function splitCommand(command) {
     const tokens = [];
     let current = "";