npm - @martinloop/mcp - Versions diffs - 0.2.7 → 0.3.1 - Mend

@martinloop/mcp 0.2.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/README.md +49 -104
package/dist/package-version.d.ts +1 -1
package/dist/package-version.js +1 -1
package/dist/prompts.d.ts +1 -1
package/dist/resources.d.ts +1 -1
package/dist/resources.js +2 -2
package/dist/server-validation.d.ts +1 -0
package/dist/server-validation.js +8 -0
package/dist/server.js +87 -9
package/dist/tools/doctor.d.ts +39 -1
package/dist/tools/doctor.js +68 -9
package/dist/tools/eval.js +3 -2
package/dist/tools/get-run.d.ts +3 -0
package/dist/tools/get-run.js +3 -1
package/dist/tools/get-verification-results.d.ts +3 -0
package/dist/tools/get-verification-results.js +3 -1
package/dist/tools/plan.js +4 -2
package/dist/tools/pr-tools.js +2 -1
package/dist/tools/preflight.d.ts +41 -1
package/dist/tools/preflight.js +74 -19
package/dist/tools/run-dossier.d.ts +3 -0
package/dist/tools/run-dossier.js +5 -2
package/dist/tools/run-loop.d.ts +7 -2
package/dist/tools/run-loop.js +67 -35
package/dist/tools/run-store.js +67 -15
package/dist/tools/tool-errors.js +1 -1
package/dist/tools/tool-support.d.ts +8 -3
package/dist/tools/tool-support.js +61 -18
package/dist/tools/workflow-governance.d.ts +19 -3
package/dist/tools/workflow-governance.js +107 -55
package/dist/vendor/adapters/claude-cli.d.ts +45 -3
package/dist/vendor/adapters/claude-cli.js +465 -45
package/dist/vendor/adapters/cli-bridge.d.ts +46 -0
package/dist/vendor/adapters/cli-bridge.js +147 -38
package/dist/vendor/adapters/codex-launcher.d.ts +76 -0
package/dist/vendor/adapters/codex-launcher.js +538 -0
package/dist/vendor/adapters/index.d.ts +3 -2
package/dist/vendor/adapters/index.js +3 -2
package/dist/vendor/adapters/openai-compatible.d.ts +19 -4
package/dist/vendor/adapters/openai-compatible.js +50 -19
package/dist/vendor/adapters/runtime-support.d.ts +3 -0
package/dist/vendor/adapters/runtime-support.js +9 -1
package/dist/vendor/adapters/stub-direct-provider.js +3 -0
package/dist/vendor/adapters/verifier-only.d.ts +2 -0
package/dist/vendor/adapters/verifier-only.js +11 -4
package/dist/vendor/contracts/index.d.ts +39 -0
package/dist/vendor/contracts/index.js +2 -0
package/dist/vendor/core/context-integrity.js +28 -3
package/dist/vendor/core/grounding.d.ts +1 -0
package/dist/vendor/core/grounding.js +6 -2
package/dist/vendor/core/index.d.ts +24 -3
package/dist/vendor/core/index.js +113 -21
package/dist/vendor/core/leash.js +85 -8
package/dist/vendor/core/persistence/index.d.ts +2 -0
package/dist/vendor/core/persistence/index.js +1 -0
package/dist/vendor/core/persistence/integrity.d.ts +38 -0
package/dist/vendor/core/persistence/integrity.js +248 -0
package/dist/vendor/core/persistence/store.d.ts +7 -0
package/dist/vendor/core/persistence/store.js +25 -1
package/dist/vendor/core/policy.d.ts +9 -0
package/dist/workflow-state.d.ts +9 -0
package/dist/workflow-state.js +46 -3
package/package.json +2 -2
package/server.json +2 -2

package/dist/vendor/adapters/claude-cli.js CHANGED Viewed

@@ -11,7 +11,8 @@
  *
  * MCP tools and integration tests use the same factories.
  */
-import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
+import { readGitExecutionArtifacts, resolveGitRepositoryRoot, runSubprocess, runVerification } from "./cli-bridge.js";
+import { buildCodexExecArgs } from "./codex-launcher.js";
 import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
 // ---------------------------------------------------------------------------
 // Cost estimation
@@ -31,7 +32,14 @@ const MODEL_PRICING = {
     // Keep legacy names working
     "claude-opus": { inputPer1K: 0.015, outputPer1K: 0.075 },
     "claude-sonnet": { inputPer1K: 0.003, outputPer1K: 0.015 },
-    "claude-haiku": { inputPer1K: 0.00025, outputPer1K: 0.00125 }
+    "claude-haiku": { inputPer1K: 0.00025, outputPer1K: 0.00125 },
+    // OpenAI coding models
+    "codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
+    "gpt-5-codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
+    "gpt-5.1-codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
+    "gpt-5.1-codex-max": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
+    "gpt-5.2-codex": { inputPer1K: 0.00175, cachedInputPer1K: 0.000175, outputPer1K: 0.014 },
+    "codex-mini-latest": { inputPer1K: 0.0015, cachedInputPer1K: 0.000375, outputPer1K: 0.006 }
 };
 function extractUsage(parsed, modelLabel) {
     if (!parsed?.usage) {
@@ -42,21 +50,275 @@ function extractUsage(parsed, modelLabel) {
             provenance: "unavailable"
         });
     }
-    const tokensIn = (parsed.usage.inputTokens ?? parsed.usage.input_tokens ?? 0) +
-        (parsed.usage.cacheReadInputTokens ?? parsed.usage.cache_read_input_tokens ?? 0) +
+    const promptTokens = (parsed.usage.inputTokens ?? parsed.usage.input_tokens ?? 0) +
         (parsed.usage.cacheCreationInputTokens ?? parsed.usage.cache_creation_input_tokens ?? 0);
+    const cachedInputTokens = parsed.usage.cacheReadInputTokens ?? parsed.usage.cache_read_input_tokens ?? 0;
+    const tokensIn = promptTokens + cachedInputTokens;
     const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
     const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
         { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
-    const actualUsd = (tokensIn / 1000) * pricing.inputPer1K +
-        (tokensOut / 1000) * pricing.outputPer1K;
+    // Prefer Claude's own authoritative total_cost_usd (present on the final
+    // `result` event in json/stream-json output) over our pricing-table estimate,
+    // which can drift from real billed cost (cache discounts, surcharges, etc).
+    const hasAuthoritativeCost = typeof parsed.total_cost_usd === "number";
+    const actualUsd = hasAuthoritativeCost
+        ? parsed.total_cost_usd
+        : (promptTokens / 1000) * pricing.inputPer1K +
+            (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
+            (tokensOut / 1000) * pricing.outputPer1K;
     return normalizeUsage({
         actualUsd: Number(actualUsd.toFixed(6)),
         tokensIn,
         tokensOut,
-        provenance: "actual"
+        cachedInputTokens,
+        provenance: hasAuthoritativeCost ? "actual" : "estimated",
+        providerSettlement: {
+            providerId: "claude",
+            model: modelLabel ?? "claude",
+            transport: "cli",
+            source: "claude_json",
+            inputTokens: promptTokens,
+            cachedInputTokens,
+            outputTokens: tokensOut,
+            rawUsageAvailable: true,
+            settledAt: new Date().toISOString()
+        }
     });
 }
+function extractCodexJsonlResult(stdout, modelLabel) {
+    const events = stdout
+        .split(/\r?\n/u)
+        .map((line) => line.trim())
+        .filter(Boolean)
+        .map((line) => {
+        try {
+            return JSON.parse(line);
+        }
+        catch {
+            return undefined;
+        }
+    })
+        .filter((event) => event !== undefined);
+    if (events.length === 0) {
+        return undefined;
+    }
+    const latestAgentMessage = [...events]
+        .reverse()
+        .find((event) => event.type === "item.completed" && event.item?.type === "agent_message");
+    const latestTurnCompleted = [...events]
+        .reverse()
+        .find((event) => event.type === "turn.completed" && event.usage !== undefined);
+    const summary = typeof latestAgentMessage?.item?.text === "string" && latestAgentMessage.item.text.trim().length > 0
+        ? latestAgentMessage.item.text.trim()
+        : stdout.trim();
+    if (!latestTurnCompleted?.usage) {
+        return {
+            summary,
+            usage: normalizeUsage({
+                actualUsd: 0,
+                tokensIn: 0,
+                tokensOut: 0,
+                provenance: "unavailable",
+                providerSettlement: {
+                    providerId: "codex",
+                    model: modelLabel ?? "codex",
+                    transport: "cli",
+                    source: "unavailable",
+                    inputTokens: 0,
+                    outputTokens: 0,
+                    rawUsageAvailable: false,
+                    settledAt: new Date().toISOString()
+                }
+            })
+        };
+    }
+    const promptTokens = latestTurnCompleted.usage.input_tokens ?? 0;
+    const cachedInputTokens = latestTurnCompleted.usage.cached_input_tokens ?? 0;
+    const outputTokens = latestTurnCompleted.usage.output_tokens ?? 0;
+    const reasoningOutputTokens = latestTurnCompleted.usage.reasoning_output_tokens ?? 0;
+    const tokensIn = promptTokens + cachedInputTokens;
+    const tokensOut = outputTokens + reasoningOutputTokens;
+    const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
+        MODEL_PRICING["codex"] ??
+        { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
+    const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
+        (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
+        (tokensOut / 1000) * pricing.outputPer1K;
+    return {
+        summary,
+        usage: normalizeUsage({
+            actualUsd: Number(actualUsd.toFixed(6)),
+            tokensIn,
+            tokensOut,
+            cachedInputTokens,
+            reasoningTokensOut: reasoningOutputTokens,
+            provenance: "actual",
+            providerSettlement: {
+                providerId: "codex",
+                model: modelLabel ?? "codex",
+                transport: "cli",
+                source: "codex_jsonl",
+                inputTokens: promptTokens,
+                cachedInputTokens,
+                outputTokens,
+                reasoningOutputTokens,
+                rawUsageAvailable: true,
+                settledAt: new Date().toISOString()
+            }
+        })
+    };
+}
+function extractGeminiJsonResult(stdout, modelLabel) {
+    let parsed;
+    try {
+        parsed = JSON.parse(stdout);
+    }
+    catch {
+        return undefined;
+    }
+    const summary = typeof parsed.response === "string" && parsed.response.trim().length > 0
+        ? parsed.response.trim()
+        : typeof parsed.error?.message === "string" && parsed.error.message.trim().length > 0
+            ? parsed.error.message.trim()
+            : stdout.trim();
+    const promptTokens = parsed.stats?.inputTokens ?? 0;
+    const cachedInputTokens = parsed.stats?.cachedReadTokens ?? 0;
+    const outputTokens = parsed.stats?.outputTokens ?? 0;
+    const reasoningOutputTokens = parsed.stats?.thoughtTokens ?? 0;
+    const hasUsage = parsed.stats !== undefined &&
+        (promptTokens > 0 || cachedInputTokens > 0 || outputTokens > 0 || reasoningOutputTokens > 0);
+    if (!hasUsage) {
+        return {
+            summary,
+            usage: normalizeUsage({
+                actualUsd: 0,
+                tokensIn: 0,
+                tokensOut: 0,
+                provenance: "unavailable",
+                providerSettlement: {
+                    providerId: "gemini",
+                    model: modelLabel ?? "flash",
+                    transport: "cli",
+                    source: "unavailable",
+                    inputTokens: 0,
+                    outputTokens: 0,
+                    rawUsageAvailable: false,
+                    settledAt: new Date().toISOString()
+                }
+            })
+        };
+    }
+    const tokensIn = promptTokens + cachedInputTokens;
+    const tokensOut = outputTokens + reasoningOutputTokens;
+    const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
+        { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
+    const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
+        (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
+        (tokensOut / 1000) * pricing.outputPer1K;
+    return {
+        summary,
+        usage: normalizeUsage({
+            actualUsd: Number(actualUsd.toFixed(6)),
+            tokensIn,
+            tokensOut,
+            cachedInputTokens,
+            reasoningTokensOut: reasoningOutputTokens,
+            provenance: "actual",
+            providerSettlement: {
+                providerId: "gemini",
+                model: modelLabel ?? "flash",
+                transport: "cli",
+                source: "gemini_json",
+                inputTokens: promptTokens,
+                cachedInputTokens,
+                outputTokens,
+                reasoningOutputTokens,
+                rawUsageAvailable: true,
+                settledAt: new Date().toISOString()
+            }
+        })
+    };
+}
+function createStreamingUsageInspector(capUsd, modelLabel) {
+    const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
+        { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
+    let buffer = "";
+    let cumulativeUsd = 0;
+    let tokensIn = 0;
+    let tokensOut = 0;
+    let turns = 0;
+    let finalResult;
+    const ingestLine = (line, terminate) => {
+        const trimmed = line.trim();
+        if (!trimmed) {
+            return;
+        }
+        let event;
+        try {
+            event = JSON.parse(trimmed);
+        }
+        catch {
+            return;
+        }
+        if (event.type === "assistant" && event.message?.usage) {
+            const usage = event.message.usage;
+            const turnTokensIn = (usage.input_tokens ?? usage.inputTokens ?? 0) +
+                (usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? 0) +
+                (usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? 0);
+            const turnTokensOut = usage.output_tokens ?? usage.outputTokens ?? 0;
+            tokensIn += turnTokensIn;
+            tokensOut += turnTokensOut;
+            turns += 1;
+            cumulativeUsd += (turnTokensIn / 1000) * pricing.inputPer1K + (turnTokensOut / 1000) * pricing.outputPer1K;
+            if (capUsd > 0 && cumulativeUsd > capUsd) {
+                terminate(`Streaming usage cap exceeded after ${String(turns)} turn(s): cumulative cost ~$${cumulativeUsd.toFixed(4)} ` +
+                    `surpassed the per-attempt cap $${capUsd.toFixed(4)} (derived from remaining loop budget). ` +
+                    `Subprocess terminated to bound runaway overspend.`);
+            }
+            return;
+        }
+        if (event.type === "result") {
+            finalResult = event;
+        }
+    };
+    return {
+        onChunk: (chunk, terminate) => {
+            buffer += chunk.toString("utf8");
+            let newlineIndex = buffer.indexOf("\n");
+            while (newlineIndex !== -1) {
+                const line = buffer.slice(0, newlineIndex);
+                buffer = buffer.slice(newlineIndex + 1);
+                ingestLine(line, terminate);
+                newlineIndex = buffer.indexOf("\n");
+            }
+        },
+        snapshot: () => ({ cumulativeUsd, tokensIn, tokensOut, turns, ...(finalResult ? { finalResult } : {}) })
+    };
+}
+/**
+ * Parses Claude's `stream-json` output (one JSON object per line) and returns
+ * the final `result` event, which carries the same `result`/`usage`/
+ * `total_cost_usd` fields as the single-blob `json` format.
+ */
+function parseStreamJsonResult(stdout) {
+    let lastResult;
+    for (const rawLine of stdout.split(/\r?\n/u)) {
+        const line = rawLine.trim();
+        if (!line) {
+            continue;
+        }
+        try {
+            const event = JSON.parse(line);
+            if (event.type === "result") {
+                lastResult = event;
+            }
+        }
+        catch {
+            // Ignore non-JSON / partial lines.
+        }
+    }
+    return lastResult;
+}
 // ---------------------------------------------------------------------------
 // Structural failure hint detection
 //
@@ -90,6 +352,7 @@ export function createAgentCliAdapter(options) {
     const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
     const adapterId = `agent-cli:${options.adapterIdSuffix ?? options.command}`;
     const supportsJsonOutput = options.supportsJsonOutput === true;
+    const supportsUsageSettlement = supportsJsonOutput || options.command === "codex" || options.command === "gemini";
     const adapter = {
         adapterId,
         kind: "agent-cli",
@@ -100,10 +363,10 @@ export function createAgentCliAdapter(options) {
             transport: "cli",
             capabilities: createAdapterCapabilities({
                 preflight: true,
-                usageSettlement: supportsJsonOutput,
+                usageSettlement: supportsUsageSettlement,
                 diffArtifacts: true,
                 structuredErrors: true,
-                cachingSignals: supportsJsonOutput
+                cachingSignals: supportsUsageSettlement
             })
         },
         async execute(request) {
@@ -130,12 +393,45 @@ export function createAgentCliAdapter(options) {
             }
             const args = options.argsBuilder(prompt);
             const stdinData = options.stdinBuilder?.(prompt);
+            // Live cumulative-cost circuit breaker: a single attempt should never be
+            // allowed to spend more than the loop has left. `--output-format json`
+            // only reports usage once the process exits, so for `stream-json` we
+            // watch per-turn usage events as they arrive and kill the subprocess the
+            // instant projected spend crosses what remains — bounding the worst case
+            // to roughly one turn's overshoot rather than the entire runaway session.
+            const streamingUsage = options.streamingUsageCap && request.context.remainingBudgetUsd > 0
+                ? createStreamingUsageInspector(request.context.remainingBudgetUsd, options.model ?? options.command)
+                : undefined;
             const agentResult = await runSubprocess(options.command, args, {
                 cwd: workingDirectory,
                 timeoutMs,
                 spawnImpl: options.spawnImpl,
-                ...(stdinData === undefined ? {} : { stdinData })
+                ...(stdinData === undefined ? {} : { stdinData }),
+                ...(streamingUsage ? { onStdoutChunk: streamingUsage.onChunk } : {})
             });
+            if (agentResult.terminationReason) {
+                const snapshot = streamingUsage?.snapshot();
+                const cumulativeUsd = snapshot?.cumulativeUsd ?? 0;
+                return {
+                    status: "failed",
+                    summary: `${options.command} subprocess terminated mid-run by the budget circuit breaker. ${agentResult.terminationReason}`,
+                    usage: normalizeUsage({
+                        actualUsd: Number(cumulativeUsd.toFixed(6)),
+                        estimatedUsd: Number(cumulativeUsd.toFixed(6)),
+                        tokensIn: snapshot?.tokensIn ?? 0,
+                        tokensOut: snapshot?.tokensOut ?? 0,
+                        provenance: "estimated"
+                    }),
+                    verification: {
+                        passed: false,
+                        summary: "Subprocess terminated by the streaming budget circuit breaker before verification could run."
+                    },
+                    failure: {
+                        message: agentResult.terminationReason,
+                        classHint: "budget_pressure"
+                    }
+                };
+            }
             if (agentResult.timedOut) {
                 return {
                     status: "failed",
@@ -170,45 +466,108 @@ export function createAgentCliAdapter(options) {
                     }
                 };
             }
-            // Parse JSON output if the CLI supports it (Claude with --output-format json)
+            // Parse JSON output if the CLI supports it. `stream-json` emits one JSON
+            // object per line — the final `result` event carries the same
+            // `result`/`usage`/`total_cost_usd` fields as single-blob `json` output.
             let parsed;
             if (supportsJsonOutput) {
                 try {
-                    parsed = JSON.parse(agentResult.stdout);
+                    parsed = options.streamingUsageCap
+                        ? parseStreamJsonResult(agentResult.stdout)
+                        : JSON.parse(agentResult.stdout);
                 }
                 catch {
                     // Fall through to plain-text handling
                 }
             }
-            const agentText = parsed?.result ?? agentResult.stdout.trim();
+            const codexJsonlResult = !supportsJsonOutput && options.command === "codex"
+                ? extractCodexJsonlResult(agentResult.stdout, options.model)
+                : undefined;
+            const geminiJsonResult = !supportsJsonOutput && options.command === "gemini"
+                ? extractGeminiJsonResult(agentResult.stdout, options.model)
+                : undefined;
+            const producedStructuredCompletion = parsed?.result !== undefined ||
+                codexJsonlResult !== undefined ||
+                geminiJsonResult !== undefined;
+            if (agentResult.exitCode !== 0 && !producedStructuredCompletion) {
+                const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr || agentResult.stdout, agentResult.exitCode);
+                return {
+                    status: "failed",
+                    summary: `${options.command} subprocess exited before verifier execution.`,
+                    usage: normalizeUsage({
+                        actualUsd: 0,
+                        tokensIn: 0,
+                        tokensOut: 0,
+                        provenance: "unavailable"
+                    }),
+                    verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
+                    failure: {
+                        message: failureMessage
+                    }
+                };
+            }
+            const agentText = codexJsonlResult?.summary ??
+                geminiJsonResult?.summary ??
+                parsed?.result ??
+                agentResult.stdout.trim();
             const summary = truncate(agentText, 2000);
             const usage = parsed?.usage
                 ? extractUsage(parsed, options.model)
-                : normalizeUsage({
-                    actualUsd: estimatedUsage.actualUsd,
-                    estimatedUsd: estimatedUsage.actualUsd,
-                    tokensIn: estimatedUsage.tokensIn,
-                    tokensOut: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
-                    provenance: "estimated"
-                });
+                : codexJsonlResult?.usage ??
+                    geminiJsonResult?.usage ??
+                    normalizeUsage({
+                        actualUsd: estimatedUsage.actualUsd,
+                        estimatedUsd: estimatedUsage.actualUsd,
+                        tokensIn: estimatedUsage.tokensIn,
+                        tokensOut: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
+                        provenance: "estimated",
+                        providerSettlement: options.command === "codex"
+                            ? {
+                                providerId: "codex",
+                                model: options.model ?? "codex",
+                                transport: "cli",
+                                source: "estimated_fallback",
+                                inputTokens: estimatedUsage.tokensIn,
+                                outputTokens: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
+                                rawUsageAvailable: false,
+                                settledAt: new Date().toISOString()
+                            }
+                            : options.command === "gemini"
+                                ? {
+                                    providerId: "gemini",
+                                    model: options.model ?? "flash",
+                                    transport: "cli",
+                                    source: "estimated_fallback",
+                                    inputTokens: estimatedUsage.tokensIn,
+                                    outputTokens: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
+                                    rawUsageAvailable: false,
+                                    settledAt: new Date().toISOString()
+                                }
+                                : undefined
+                    });
             const verificationStack = request.context.verificationStack;
             const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
             // Check for zero-diff (agent ran but made no file changes)
             const repoRoot = request.context.repoRoot;
+            const gitRepoRoot = repoRoot ? resolveGitRepositoryRoot(repoRoot) : undefined;
             let noDiff = false;
-            if (repoRoot) {
-                noDiff = await checkNoDiff(repoRoot);
+            if (gitRepoRoot) {
+                noDiff = await checkNoDiff(gitRepoRoot, options.spawnImpl);
             }
             // Extract structured errors from stderr/stdout for better failure context
             const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
-            const executionArtifacts = repoRoot
-                ? await readGitExecutionArtifacts(repoRoot, 5000, options.spawnImpl)
+            const executionArtifacts = gitRepoRoot
+                ? await readGitExecutionArtifacts(gitRepoRoot, 5000, options.spawnImpl)
                 : undefined;
             // Scope contract enforcement: check touched files against allowedPaths/deniedPaths
             let scopeViolations = [];
             const scopeCtx = request.context;
-            if (repoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
-                const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs: 5000 });
+            if (gitRepoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
+                const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
+                    cwd: gitRepoRoot,
+                    timeoutMs: 5000,
+                    spawnImpl: options.spawnImpl
+                });
                 if (diffResult.exitCode === 0 && diffResult.stdout.trim()) {
                     const touchedFiles = diffResult.stdout.trim().split("\n").filter(Boolean);
                     const allowed = scopeCtx.allowedPaths ?? [];
@@ -278,7 +637,12 @@ export function createAgentCliAdapter(options) {
                 }
                 // Reset tracked files to HEAD so next attempt starts from clean state
                 try {
-                    await runSubprocess("git", ["restore", "--staged", "--worktree", "."], { cwd: repoRoot, timeoutMs: 5000 });
+                    if (gitRepoRoot) {
+                        await runSubprocess("git", ["restore", "--staged", "--worktree", "."], {
+                            cwd: gitRepoRoot,
+                            timeoutMs: 5000
+                        });
+                    }
                 }
                 catch {
                     // Non-fatal
@@ -326,10 +690,16 @@ export function createAgentCliAdapter(options) {
 // Pre-configured: Claude CLI
 // ---------------------------------------------------------------------------
 /**
- * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
+ * Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
  *
- * The --output-format json flag causes Claude CLI to return structured JSON
- * including real token usage counts, enabling accurate cost tracking.
+ * `stream-json` emits one JSON event per line — including per-turn usage on
+ * each `assistant` message and a final `result` event carrying the same
+ * `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
+ * MartinLoop can both (a) recover real token usage/cost as before, and
+ * (b) watch cumulative spend live and self-terminate the subprocess the
+ * moment it crosses the remaining per-attempt budget (see
+ * `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
+ * discovering an overspend after the whole process has already exited.
  *
  * Requires the Claude Code CLI to be installed and authenticated:
  *   https://docs.anthropic.com/claude-code
@@ -346,10 +716,12 @@ export function createClaudeCliAdapter(options = {}) {
         timeoutMs: options.timeoutMs,
         verifyTimeoutMs: options.verifyTimeoutMs,
         supportsJsonOutput: true,
+        streamingUsageCap: true,
         spawnImpl: options.spawnImpl,
         argsBuilder: (_prompt) => [
             "--output-format",
-            "json",
+            "stream-json",
+            "--verbose",
             "--print",
             "--dangerously-skip-permissions",
             ...modelArgs,
@@ -372,12 +744,12 @@ export function createClaudeCliAdapter(options = {}) {
  *   npm install -g @openai/codex
  */
 export function createCodexCliAdapter(options = {}) {
-    const modelArgs = options.model ? ["--model", options.model] : [];
     const extraArgs = options.extraArgs ?? [];
     const sandbox = options.sandbox ?? "workspace-write";
     const workingDirectory = options.workingDirectory ?? process.cwd();
+    const command = options.command ?? "codex";
     return createAgentCliAdapter({
-        command: "codex",
+        command,
         adapterIdSuffix: "codex",
         model: options.model ?? "codex",
         label: options.label ?? "Codex CLI adapter",
@@ -386,17 +758,53 @@ export function createCodexCliAdapter(options = {}) {
         verifyTimeoutMs: options.verifyTimeoutMs,
         supportsJsonOutput: false,
         spawnImpl: options.spawnImpl,
-        argsBuilder: () => [
-            "exec",
-            "--cd",
+        argsBuilder: () => buildCodexExecArgs({
             workingDirectory,
-            "--sandbox",
             sandbox,
-            "--color",
-            "never",
-            ...modelArgs,
-            ...extraArgs,
-            "-"
+            ...(options.model ? { model: options.model } : {}),
+            extraArgs,
+            mode: "prompt"
+        }),
+        stdinBuilder: (prompt) => prompt
+    });
+}
+// ---------------------------------------------------------------------------
+// Pre-configured: Gemini CLI
+// ---------------------------------------------------------------------------
+/**
+ * Spawns `gemini --model <model> --prompt "" --approval-mode <mode> --output-format json [...]`.
+ *
+ * The prompt is delivered via stdin while forcing headless mode with `--prompt ""`,
+ * which keeps large MartinLoop prompts off the command line on Windows.
+ *
+ * Requires the Gemini CLI to be installed and authenticated:
+ *   npm install -g @google/gemini-cli
+ */
+export function createGeminiCliAdapter(options = {}) {
+    const model = options.model ?? "flash";
+    const approvalMode = options.approvalMode ?? "yolo";
+    const extraArgs = options.extraArgs ?? [];
+    return createAgentCliAdapter({
+        command: "gemini",
+        adapterIdSuffix: "gemini",
+        model,
+        label: options.label ?? "Gemini CLI adapter",
+        workingDirectory: options.workingDirectory,
+        timeoutMs: options.timeoutMs,
+        verifyTimeoutMs: options.verifyTimeoutMs,
+        supportsJsonOutput: false,
+        spawnImpl: options.spawnImpl,
+        argsBuilder: () => [
+            "--model",
+            model,
+            "--prompt",
+            "",
+            "--approval-mode",
+            approvalMode,
+            ...(options.sandbox ? ["--sandbox"] : []),
+            "--output-format",
+            "json",
+            ...extraArgs
         ],
         stdinBuilder: (prompt) => prompt
     });
@@ -559,7 +967,15 @@ function redactSecretsForPrompt(input) {
     return input
         .replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
         .replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
-        .replace(/\bghp_[A-Za-z0-9_]{8,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bghp_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bgithub_pat_[A-Za-z0-9_]{20,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\b(?:gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\bAKIA[0-9A-Z]{16}\b/gu, "[REDACTED_SECRET]")
+        .replace(/\b(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*[^\s"'`]+/giu, "AWS_SECRET_ACCESS_KEY=[REDACTED_SECRET]")
+        .replace(/\bxox[baprs]-[A-Za-z0-9-]{10,}\b/giu, "[REDACTED_SECRET]")
+        .replace(/\bAIza[0-9A-Za-z_-]{30,}\b/gu, "[REDACTED_SECRET]")
+        .replace(/-----BEGIN(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----[\s\S]*?-----END(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----/gu, "[REDACTED_SECRET]")
+        .replace(/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/gu, "[REDACTED_SECRET]")
         .replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
 }
 function extractStructuredErrors(stderr, stdout) {
@@ -579,7 +995,11 @@ function extractStructuredErrors(stderr, stdout) {
     }
     return errors.slice(0, 10); // cap at 10 to avoid bloating prompts
 }
-async function checkNoDiff(repoRoot) {
-    const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs: 5000 });
+async function checkNoDiff(repoRoot, spawnImpl) {
+    const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
+        cwd: repoRoot,
+        timeoutMs: 5000,
+        spawnImpl
+    });
     return result.exitCode === 0 && result.stdout.trim().length === 0;
 }