@martinloop/mcp 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +4 -4
  2. package/dist/package-version.d.ts +1 -1
  3. package/dist/package-version.js +1 -1
  4. package/dist/server.js +69 -7
  5. package/dist/tools/doctor.d.ts +27 -0
  6. package/dist/tools/doctor.js +39 -11
  7. package/dist/tools/get-run.d.ts +2 -1
  8. package/dist/tools/get-run.js +1 -0
  9. package/dist/tools/get-verification-results.d.ts +2 -1
  10. package/dist/tools/get-verification-results.js +1 -0
  11. package/dist/tools/plan.js +4 -2
  12. package/dist/tools/preflight.d.ts +27 -0
  13. package/dist/tools/preflight.js +44 -20
  14. package/dist/tools/run-dossier.d.ts +2 -1
  15. package/dist/tools/run-dossier.js +1 -0
  16. package/dist/tools/run-loop.d.ts +5 -1
  17. package/dist/tools/run-loop.js +20 -8
  18. package/dist/tools/run-store.js +67 -15
  19. package/dist/tools/tool-support.d.ts +2 -0
  20. package/dist/tools/tool-support.js +49 -13
  21. package/dist/tools/workflow-governance.d.ts +19 -3
  22. package/dist/tools/workflow-governance.js +107 -55
  23. package/dist/vendor/adapters/claude-cli.d.ts +20 -3
  24. package/dist/vendor/adapters/claude-cli.js +193 -33
  25. package/dist/vendor/adapters/cli-bridge.d.ts +45 -0
  26. package/dist/vendor/adapters/cli-bridge.js +107 -39
  27. package/dist/vendor/adapters/codex-launcher.d.ts +32 -0
  28. package/dist/vendor/adapters/codex-launcher.js +409 -118
  29. package/dist/vendor/adapters/openai-compatible.js +8 -2
  30. package/dist/vendor/adapters/runtime-support.js +1 -0
  31. package/dist/vendor/adapters/stub-direct-provider.js +3 -0
  32. package/dist/vendor/adapters/verifier-only.d.ts +2 -0
  33. package/dist/vendor/adapters/verifier-only.js +9 -3
  34. package/dist/vendor/core/context-integrity.js +28 -3
  35. package/dist/vendor/core/grounding.d.ts +1 -0
  36. package/dist/vendor/core/grounding.js +6 -2
  37. package/dist/vendor/core/index.d.ts +1 -0
  38. package/dist/vendor/core/index.js +25 -6
  39. package/dist/vendor/core/leash.js +85 -8
  40. package/dist/vendor/core/persistence/integrity.d.ts +1 -1
  41. package/dist/vendor/core/persistence/integrity.js +15 -6
  42. package/dist/workflow-state.d.ts +9 -0
  43. package/dist/workflow-state.js +46 -3
  44. package/package.json +2 -2
  45. package/server.json +2 -2
@@ -11,7 +11,8 @@
11
11
  *
12
12
  * MCP tools and integration tests use the same factories.
13
13
  */
14
- import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
14
+ import { readGitExecutionArtifacts, resolveGitRepositoryRoot, runSubprocess, runVerification } from "./cli-bridge.js";
15
+ import { buildCodexExecArgs } from "./codex-launcher.js";
15
16
  import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
16
17
  // ---------------------------------------------------------------------------
17
18
  // Cost estimation
@@ -56,15 +57,21 @@ function extractUsage(parsed, modelLabel) {
56
57
  const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
57
58
  const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
58
59
  { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
59
- const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
60
- (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
61
- (tokensOut / 1000) * pricing.outputPer1K;
60
+ // Prefer Claude's own authoritative total_cost_usd (present on the final
61
+ // `result` event in json/stream-json output) over our pricing-table estimate,
62
+ // which can drift from real billed cost (cache discounts, surcharges, etc).
63
+ const hasAuthoritativeCost = typeof parsed.total_cost_usd === "number";
64
+ const actualUsd = hasAuthoritativeCost
65
+ ? parsed.total_cost_usd
66
+ : (promptTokens / 1000) * pricing.inputPer1K +
67
+ (cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
68
+ (tokensOut / 1000) * pricing.outputPer1K;
62
69
  return normalizeUsage({
63
70
  actualUsd: Number(actualUsd.toFixed(6)),
64
71
  tokensIn,
65
72
  tokensOut,
66
73
  cachedInputTokens,
67
- provenance: "actual",
74
+ provenance: hasAuthoritativeCost ? "actual" : "estimated",
68
75
  providerSettlement: {
69
76
  providerId: "claude",
70
77
  model: modelLabel ?? "claude",
@@ -232,6 +239,86 @@ function extractGeminiJsonResult(stdout, modelLabel) {
232
239
  })
233
240
  };
234
241
  }
242
+ function createStreamingUsageInspector(capUsd, modelLabel) {
243
+ const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
244
+ { inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
245
+ let buffer = "";
246
+ let cumulativeUsd = 0;
247
+ let tokensIn = 0;
248
+ let tokensOut = 0;
249
+ let turns = 0;
250
+ let finalResult;
251
+ const ingestLine = (line, terminate) => {
252
+ const trimmed = line.trim();
253
+ if (!trimmed) {
254
+ return;
255
+ }
256
+ let event;
257
+ try {
258
+ event = JSON.parse(trimmed);
259
+ }
260
+ catch {
261
+ return;
262
+ }
263
+ if (event.type === "assistant" && event.message?.usage) {
264
+ const usage = event.message.usage;
265
+ const turnTokensIn = (usage.input_tokens ?? usage.inputTokens ?? 0) +
266
+ (usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? 0) +
267
+ (usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? 0);
268
+ const turnTokensOut = usage.output_tokens ?? usage.outputTokens ?? 0;
269
+ tokensIn += turnTokensIn;
270
+ tokensOut += turnTokensOut;
271
+ turns += 1;
272
+ cumulativeUsd += (turnTokensIn / 1000) * pricing.inputPer1K + (turnTokensOut / 1000) * pricing.outputPer1K;
273
+ if (capUsd > 0 && cumulativeUsd > capUsd) {
274
+ terminate(`Streaming usage cap exceeded after ${String(turns)} turn(s): cumulative cost ~$${cumulativeUsd.toFixed(4)} ` +
275
+ `surpassed the per-attempt cap $${capUsd.toFixed(4)} (derived from remaining loop budget). ` +
276
+ `Subprocess terminated to bound runaway overspend.`);
277
+ }
278
+ return;
279
+ }
280
+ if (event.type === "result") {
281
+ finalResult = event;
282
+ }
283
+ };
284
+ return {
285
+ onChunk: (chunk, terminate) => {
286
+ buffer += chunk.toString("utf8");
287
+ let newlineIndex = buffer.indexOf("\n");
288
+ while (newlineIndex !== -1) {
289
+ const line = buffer.slice(0, newlineIndex);
290
+ buffer = buffer.slice(newlineIndex + 1);
291
+ ingestLine(line, terminate);
292
+ newlineIndex = buffer.indexOf("\n");
293
+ }
294
+ },
295
+ snapshot: () => ({ cumulativeUsd, tokensIn, tokensOut, turns, ...(finalResult ? { finalResult } : {}) })
296
+ };
297
+ }
298
+ /**
299
+ * Parses Claude's `stream-json` output (one JSON object per line) and returns
300
+ * the final `result` event, which carries the same `result`/`usage`/
301
+ * `total_cost_usd` fields as the single-blob `json` format.
302
+ */
303
+ function parseStreamJsonResult(stdout) {
304
+ let lastResult;
305
+ for (const rawLine of stdout.split(/\r?\n/u)) {
306
+ const line = rawLine.trim();
307
+ if (!line) {
308
+ continue;
309
+ }
310
+ try {
311
+ const event = JSON.parse(line);
312
+ if (event.type === "result") {
313
+ lastResult = event;
314
+ }
315
+ }
316
+ catch {
317
+ // Ignore non-JSON / partial lines.
318
+ }
319
+ }
320
+ return lastResult;
321
+ }
235
322
  // ---------------------------------------------------------------------------
236
323
  // Structural failure hint detection
237
324
  //
@@ -306,12 +393,45 @@ export function createAgentCliAdapter(options) {
306
393
  }
307
394
  const args = options.argsBuilder(prompt);
308
395
  const stdinData = options.stdinBuilder?.(prompt);
396
+ // Live cumulative-cost circuit breaker: a single attempt should never be
397
+ // allowed to spend more than the loop has left. `--output-format json`
398
+ // only reports usage once the process exits, so for `stream-json` we
399
+ // watch per-turn usage events as they arrive and kill the subprocess the
400
+ // instant projected spend crosses what remains — bounding the worst case
401
+ // to roughly one turn's overshoot rather than the entire runaway session.
402
+ const streamingUsage = options.streamingUsageCap && request.context.remainingBudgetUsd > 0
403
+ ? createStreamingUsageInspector(request.context.remainingBudgetUsd, options.model ?? options.command)
404
+ : undefined;
309
405
  const agentResult = await runSubprocess(options.command, args, {
310
406
  cwd: workingDirectory,
311
407
  timeoutMs,
312
408
  spawnImpl: options.spawnImpl,
313
- ...(stdinData === undefined ? {} : { stdinData })
409
+ ...(stdinData === undefined ? {} : { stdinData }),
410
+ ...(streamingUsage ? { onStdoutChunk: streamingUsage.onChunk } : {})
314
411
  });
412
+ if (agentResult.terminationReason) {
413
+ const snapshot = streamingUsage?.snapshot();
414
+ const cumulativeUsd = snapshot?.cumulativeUsd ?? 0;
415
+ return {
416
+ status: "failed",
417
+ summary: `${options.command} subprocess terminated mid-run by the budget circuit breaker. ${agentResult.terminationReason}`,
418
+ usage: normalizeUsage({
419
+ actualUsd: Number(cumulativeUsd.toFixed(6)),
420
+ estimatedUsd: Number(cumulativeUsd.toFixed(6)),
421
+ tokensIn: snapshot?.tokensIn ?? 0,
422
+ tokensOut: snapshot?.tokensOut ?? 0,
423
+ provenance: "estimated"
424
+ }),
425
+ verification: {
426
+ passed: false,
427
+ summary: "Subprocess terminated by the streaming budget circuit breaker before verification could run."
428
+ },
429
+ failure: {
430
+ message: agentResult.terminationReason,
431
+ classHint: "budget_pressure"
432
+ }
433
+ };
434
+ }
315
435
  if (agentResult.timedOut) {
316
436
  return {
317
437
  status: "failed",
@@ -346,11 +466,15 @@ export function createAgentCliAdapter(options) {
346
466
  }
347
467
  };
348
468
  }
349
- // Parse JSON output if the CLI supports it (Claude with --output-format json)
469
+ // Parse JSON output if the CLI supports it. `stream-json` emits one JSON
470
+ // object per line — the final `result` event carries the same
471
+ // `result`/`usage`/`total_cost_usd` fields as single-blob `json` output.
350
472
  let parsed;
351
473
  if (supportsJsonOutput) {
352
474
  try {
353
- parsed = JSON.parse(agentResult.stdout);
475
+ parsed = options.streamingUsageCap
476
+ ? parseStreamJsonResult(agentResult.stdout)
477
+ : JSON.parse(agentResult.stdout);
354
478
  }
355
479
  catch {
356
480
  // Fall through to plain-text handling
@@ -362,6 +486,26 @@ export function createAgentCliAdapter(options) {
362
486
  const geminiJsonResult = !supportsJsonOutput && options.command === "gemini"
363
487
  ? extractGeminiJsonResult(agentResult.stdout, options.model)
364
488
  : undefined;
489
+ const producedStructuredCompletion = parsed?.result !== undefined ||
490
+ codexJsonlResult !== undefined ||
491
+ geminiJsonResult !== undefined;
492
+ if (agentResult.exitCode !== 0 && !producedStructuredCompletion) {
493
+ const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr || agentResult.stdout, agentResult.exitCode);
494
+ return {
495
+ status: "failed",
496
+ summary: `${options.command} subprocess exited before verifier execution.`,
497
+ usage: normalizeUsage({
498
+ actualUsd: 0,
499
+ tokensIn: 0,
500
+ tokensOut: 0,
501
+ provenance: "unavailable"
502
+ }),
503
+ verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
504
+ failure: {
505
+ message: failureMessage
506
+ }
507
+ };
508
+ }
365
509
  const agentText = codexJsonlResult?.summary ??
366
510
  geminiJsonResult?.summary ??
367
511
  parsed?.result ??
@@ -405,21 +549,22 @@ export function createAgentCliAdapter(options) {
405
549
  const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
406
550
  // Check for zero-diff (agent ran but made no file changes)
407
551
  const repoRoot = request.context.repoRoot;
552
+ const gitRepoRoot = repoRoot ? resolveGitRepositoryRoot(repoRoot) : undefined;
408
553
  let noDiff = false;
409
- if (repoRoot) {
410
- noDiff = await checkNoDiff(repoRoot, options.spawnImpl);
554
+ if (gitRepoRoot) {
555
+ noDiff = await checkNoDiff(gitRepoRoot, options.spawnImpl);
411
556
  }
412
557
  // Extract structured errors from stderr/stdout for better failure context
413
558
  const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
414
- const executionArtifacts = repoRoot
415
- ? await readGitExecutionArtifacts(repoRoot, 5000, options.spawnImpl)
559
+ const executionArtifacts = gitRepoRoot
560
+ ? await readGitExecutionArtifacts(gitRepoRoot, 5000, options.spawnImpl)
416
561
  : undefined;
417
562
  // Scope contract enforcement: check touched files against allowedPaths/deniedPaths
418
563
  let scopeViolations = [];
419
564
  const scopeCtx = request.context;
420
- if (repoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
565
+ if (gitRepoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
421
566
  const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
422
- cwd: repoRoot,
567
+ cwd: gitRepoRoot,
423
568
  timeoutMs: 5000,
424
569
  spawnImpl: options.spawnImpl
425
570
  });
@@ -492,7 +637,12 @@ export function createAgentCliAdapter(options) {
492
637
  }
493
638
  // Reset tracked files to HEAD so next attempt starts from clean state
494
639
  try {
495
- await runSubprocess("git", ["restore", "--staged", "--worktree", "."], { cwd: repoRoot, timeoutMs: 5000 });
640
+ if (gitRepoRoot) {
641
+ await runSubprocess("git", ["restore", "--staged", "--worktree", "."], {
642
+ cwd: gitRepoRoot,
643
+ timeoutMs: 5000
644
+ });
645
+ }
496
646
  }
497
647
  catch {
498
648
  // Non-fatal
@@ -540,10 +690,16 @@ export function createAgentCliAdapter(options) {
540
690
  // Pre-configured: Claude CLI
541
691
  // ---------------------------------------------------------------------------
542
692
  /**
543
- * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
693
+ * Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
544
694
  *
545
- * The --output-format json flag causes Claude CLI to return structured JSON
546
- * including real token usage counts, enabling accurate cost tracking.
695
+ * `stream-json` emits one JSON event per line including per-turn usage on
696
+ * each `assistant` message and a final `result` event carrying the same
697
+ * `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
698
+ * MartinLoop can both (a) recover real token usage/cost as before, and
699
+ * (b) watch cumulative spend live and self-terminate the subprocess the
700
+ * moment it crosses the remaining per-attempt budget (see
701
+ * `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
702
+ * discovering an overspend after the whole process has already exited.
547
703
  *
548
704
  * Requires the Claude Code CLI to be installed and authenticated:
549
705
  * https://docs.anthropic.com/claude-code
@@ -560,10 +716,12 @@ export function createClaudeCliAdapter(options = {}) {
560
716
  timeoutMs: options.timeoutMs,
561
717
  verifyTimeoutMs: options.verifyTimeoutMs,
562
718
  supportsJsonOutput: true,
719
+ streamingUsageCap: true,
563
720
  spawnImpl: options.spawnImpl,
564
721
  argsBuilder: (_prompt) => [
565
722
  "--output-format",
566
- "json",
723
+ "stream-json",
724
+ "--verbose",
567
725
  "--print",
568
726
  "--dangerously-skip-permissions",
569
727
  ...modelArgs,
@@ -586,12 +744,12 @@ export function createClaudeCliAdapter(options = {}) {
586
744
  * npm install -g @openai/codex
587
745
  */
588
746
  export function createCodexCliAdapter(options = {}) {
589
- const modelArgs = options.model ? ["--model", options.model] : [];
590
747
  const extraArgs = options.extraArgs ?? [];
591
748
  const sandbox = options.sandbox ?? "workspace-write";
592
749
  const workingDirectory = options.workingDirectory ?? process.cwd();
750
+ const command = options.command ?? "codex";
593
751
  return createAgentCliAdapter({
594
- command: "codex",
752
+ command,
595
753
  adapterIdSuffix: "codex",
596
754
  model: options.model ?? "codex",
597
755
  label: options.label ?? "Codex CLI adapter",
@@ -600,19 +758,13 @@ export function createCodexCliAdapter(options = {}) {
600
758
  verifyTimeoutMs: options.verifyTimeoutMs,
601
759
  supportsJsonOutput: false,
602
760
  spawnImpl: options.spawnImpl,
603
- argsBuilder: () => [
604
- "exec",
605
- "--cd",
761
+ argsBuilder: () => buildCodexExecArgs({
606
762
  workingDirectory,
607
- "--sandbox",
608
763
  sandbox,
609
- "--json",
610
- "--color",
611
- "never",
612
- ...modelArgs,
613
- ...extraArgs,
614
- "-"
615
- ],
764
+ ...(options.model ? { model: options.model } : {}),
765
+ extraArgs,
766
+ mode: "prompt"
767
+ }),
616
768
  stdinBuilder: (prompt) => prompt
617
769
  });
618
770
  }
@@ -815,7 +967,15 @@ function redactSecretsForPrompt(input) {
815
967
  return input
816
968
  .replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
817
969
  .replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
818
- .replace(/\bghp_[A-Za-z0-9_]{8,}\b/gu, "[REDACTED_SECRET]")
970
+ .replace(/\bghp_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
971
+ .replace(/\bgithub_pat_[A-Za-z0-9_]{20,}\b/gu, "[REDACTED_SECRET]")
972
+ .replace(/\b(?:gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
973
+ .replace(/\bAKIA[0-9A-Z]{16}\b/gu, "[REDACTED_SECRET]")
974
+ .replace(/\b(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*[^\s"'`]+/giu, "AWS_SECRET_ACCESS_KEY=[REDACTED_SECRET]")
975
+ .replace(/\bxox[baprs]-[A-Za-z0-9-]{10,}\b/giu, "[REDACTED_SECRET]")
976
+ .replace(/\bAIza[0-9A-Za-z_-]{30,}\b/gu, "[REDACTED_SECRET]")
977
+ .replace(/-----BEGIN(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----[\s\S]*?-----END(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----/gu, "[REDACTED_SECRET]")
978
+ .replace(/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/gu, "[REDACTED_SECRET]")
819
979
  .replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
820
980
  }
821
981
  function extractStructuredErrors(stderr, stdout) {
@@ -6,16 +6,60 @@ export interface SubprocessResult {
6
6
  stdout: string;
7
7
  stderr: string;
8
8
  timedOut: boolean;
9
+ /**
10
+ * True when the subprocess was terminated early because its combined
11
+ * stdout+stderr exceeded `maxOutputBytes` — a circuit breaker against
12
+ * runaway agent sessions that would otherwise burn far more cost/tokens
13
+ * than the loop budget allows before MartinLoop can observe the final
14
+ * (post-hoc) usage report. See `claude-cli.ts` execute() for how this
15
+ * cap is derived from the remaining loop budget.
16
+ */
17
+ outputCapped: boolean;
18
+ /**
19
+ * Set to the inspector's reason string when an `onStdoutChunk` callback
20
+ * requested early termination (e.g. a streaming usage/cost circuit breaker
21
+ * that detected the agent is on track to blow through its budget). Distinct
22
+ * from `outputCapped`, which fires on raw byte volume rather than parsed
23
+ * semantic content.
24
+ */
25
+ terminationReason?: string;
26
+ launched: boolean;
9
27
  }
10
28
  export interface VerificationOutcome {
11
29
  passed: boolean;
12
30
  summary: string;
31
+ steps: VerificationStepOutcome[];
32
+ warnings?: string[];
33
+ }
34
+ export interface VerificationStepOutcome {
35
+ command: string;
36
+ launched: boolean;
37
+ exitCode?: number;
38
+ timedOut: boolean;
39
+ fastFail: boolean;
40
+ detail?: string;
13
41
  }
14
42
  export declare function runSubprocess(command: string, args: string[], options: {
15
43
  cwd: string;
16
44
  timeoutMs: number;
17
45
  spawnImpl?: SpawnLike;
18
46
  stdinData?: string;
47
+ /**
48
+ * Optional circuit breaker: terminate the subprocess once combined
49
+ * stdout+stderr bytes exceed this threshold, instead of waiting for
50
+ * natural completion. Used to bound runaway agent-CLI cost/token spend
51
+ * that can't otherwise be observed until the process exits.
52
+ */
53
+ maxOutputBytes?: number;
54
+ /**
55
+ * Optional semantic inspector invoked with each raw stdout chunk. Used to
56
+ * parse streaming structured output (e.g. Claude's `stream-json` usage
57
+ * events) and request early termination via the supplied `terminate`
58
+ * callback once a semantic threshold (such as cumulative cost) is
59
+ * crossed — well before the subprocess would exit naturally and report
60
+ * a runaway final usage figure.
61
+ */
62
+ onStdoutChunk?: (chunk: Buffer, terminate: (reason: string) => void) => void;
19
63
  }): Promise<SubprocessResult>;
20
64
  export declare function runVerification(commands: string[], cwd: string, timeoutMs: number, verificationStack?: Array<{
21
65
  command: string;
@@ -27,6 +71,7 @@ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: n
27
71
  diffStats?: ReturnType<typeof diffStatsFromNumstat>;
28
72
  }>;
29
73
  export declare function readGitChangedFiles(repoRoot: string, timeoutMs: number, spawnImpl?: SpawnLike): Promise<string[]>;
74
+ export declare function resolveGitRepositoryRoot(workingDirectory: string): string | undefined;
30
75
  export interface SpawnPlan {
31
76
  command: string;
32
77
  args: string[];
@@ -1,11 +1,15 @@
1
1
  import { spawn } from "node:child_process";
2
- import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
2
+ import { delimiter, dirname, extname, isAbsolute, join, resolve } from "node:path";
3
3
  import { existsSync } from "node:fs";
4
4
  import { diffStatsFromNumstat } from "./runtime-support.js";
5
+ const gitRepositoryRootCache = new Map();
5
6
  export async function runSubprocess(command, args, options) {
6
7
  return new Promise((resolve) => {
7
8
  let timedOut = false;
9
+ let outputCapped = false;
10
+ let terminationReason;
8
11
  let settled = false;
12
+ let outputBytes = 0;
9
13
  const stdoutChunks = [];
10
14
  const stderrChunks = [];
11
15
  const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
@@ -14,7 +18,7 @@ export async function runSubprocess(command, args, options) {
14
18
  return;
15
19
  }
16
20
  settled = true;
17
- resolve(result);
21
+ resolve({ ...result, timedOut, outputCapped, ...(terminationReason ? { terminationReason } : {}) });
18
22
  };
19
23
  let proc;
20
24
  try {
@@ -27,19 +31,33 @@ export async function runSubprocess(command, args, options) {
27
31
  }
28
32
  catch (error) {
29
33
  const message = error instanceof Error ? error.message : String(error);
30
- resolveOnce({
31
- exitCode: 1,
32
- stdout: "",
33
- stderr: message,
34
- timedOut: false
35
- });
34
+ resolveOnce({ exitCode: 1, stdout: "", stderr: message, launched: false });
36
35
  return;
37
36
  }
37
+ const trackOutput = (chunks, chunk) => {
38
+ chunks.push(chunk);
39
+ outputBytes += chunk.byteLength;
40
+ if (options.maxOutputBytes !== undefined &&
41
+ !outputCapped &&
42
+ !timedOut &&
43
+ outputBytes > options.maxOutputBytes) {
44
+ outputCapped = true;
45
+ proc.kill("SIGTERM");
46
+ }
47
+ };
48
+ const terminateEarly = (reason) => {
49
+ if (terminationReason || timedOut || outputCapped) {
50
+ return;
51
+ }
52
+ terminationReason = reason;
53
+ proc.kill("SIGTERM");
54
+ };
38
55
  proc.stdout?.on("data", (chunk) => {
39
- stdoutChunks.push(chunk);
56
+ trackOutput(stdoutChunks, chunk);
57
+ options.onStdoutChunk?.(chunk, terminateEarly);
40
58
  });
41
59
  proc.stderr?.on("data", (chunk) => {
42
- stderrChunks.push(chunk);
60
+ trackOutput(stderrChunks, chunk);
43
61
  });
44
62
  proc.stdin?.on("error", (error) => {
45
63
  // Some CLIs exit before consuming stdin in tests and on fast-fail paths.
@@ -55,12 +73,7 @@ export async function runSubprocess(command, args, options) {
55
73
  }, options.timeoutMs);
56
74
  proc.on("error", (error) => {
57
75
  clearTimeout(timer);
58
- resolveOnce({
59
- exitCode: 1,
60
- stdout: "",
61
- stderr: error.message,
62
- timedOut: false
63
- });
76
+ resolveOnce({ exitCode: 1, stdout: "", stderr: error.message, launched: false });
64
77
  });
65
78
  proc.on("close", (code) => {
66
79
  clearTimeout(timer);
@@ -68,7 +81,7 @@ export async function runSubprocess(command, args, options) {
68
81
  exitCode: code ?? 1,
69
82
  stdout: Buffer.concat(stdoutChunks).toString("utf8"),
70
83
  stderr: Buffer.concat(stderrChunks).toString("utf8"),
71
- timedOut
84
+ launched: true
72
85
  });
73
86
  });
74
87
  if (options.stdinData !== undefined && proc.stdin) {
@@ -83,7 +96,7 @@ export async function runSubprocess(command, args, options) {
83
96
  exitCode: 1,
84
97
  stdout: Buffer.concat(stdoutChunks).toString("utf8"),
85
98
  stderr: stdinError.message,
86
- timedOut: false
99
+ launched: false
87
100
  });
88
101
  }
89
102
  }
@@ -98,9 +111,11 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
98
111
  }))
99
112
  : commands.map((command) => ({ command, fastFail: true }));
100
113
  if (steps.length === 0) {
101
- return { passed: true, summary: "No verification commands specified." };
114
+ return { passed: true, summary: "No verification commands specified.", steps: [] };
102
115
  }
103
116
  const failedSteps = [];
117
+ const stepOutcomes = [];
118
+ const warnings = [];
104
119
  for (const step of steps) {
105
120
  const parts = splitCommand(step.command);
106
121
  const [bin, ...args] = parts;
@@ -108,24 +123,53 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
108
123
  continue;
109
124
  }
110
125
  const result = await runSubprocess(bin, args, { cwd, timeoutMs, spawnImpl });
126
+ const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
127
+ stepOutcomes.push({
128
+ command: step.command,
129
+ launched: result.launched,
130
+ exitCode: result.exitCode,
131
+ timedOut: result.timedOut,
132
+ fastFail: step.fastFail,
133
+ ...(detail ? { detail } : {})
134
+ });
111
135
  if (result.timedOut) {
112
- return { passed: false, summary: `Verification timed out: ${step.command}` };
136
+ return {
137
+ passed: false,
138
+ summary: `Verification timed out: ${step.command}`,
139
+ steps: stepOutcomes,
140
+ ...(warnings.length ? { warnings } : {})
141
+ };
113
142
  }
114
143
  if (result.exitCode !== 0) {
115
- const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
116
144
  const summary = `Verification failed: ${step.command}\n${detail}`;
145
+ if (!result.launched) {
146
+ warnings.push(`Verifier never launched: ${step.command}`);
147
+ }
117
148
  if (step.fastFail) {
118
- return { passed: false, summary };
149
+ return { passed: false, summary, steps: stepOutcomes, ...(warnings.length ? { warnings } : {}) };
119
150
  }
120
151
  failedSteps.push(step.command);
121
152
  }
122
153
  }
123
154
  if (failedSteps.length > 0) {
124
- return { passed: false, summary: `Failed steps: ${failedSteps.join(", ")}` };
155
+ return {
156
+ passed: false,
157
+ summary: `Failed steps: ${failedSteps.join(", ")}`,
158
+ steps: stepOutcomes,
159
+ ...(warnings.length ? { warnings } : {})
160
+ };
125
161
  }
126
- return { passed: true, summary: `All ${String(steps.length)} verification step(s) passed.` };
162
+ return {
163
+ passed: true,
164
+ summary: `All ${String(steps.length)} verification step(s) passed.`,
165
+ steps: stepOutcomes,
166
+ ...(warnings.length ? { warnings } : {})
167
+ };
127
168
  }
128
169
  export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl) {
170
+ if (!resolveGitRepositoryRoot(repoRoot)) {
171
+ return {};
172
+ }
129
173
  const changedFilesResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
130
174
  const numstatResult = await runSubprocess("git", ["diff", "--numstat", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
131
175
  const changedFiles = changedFilesResult.exitCode === 0
@@ -141,12 +185,48 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
141
185
  };
142
186
  }
143
187
  export async function readGitChangedFiles(repoRoot, timeoutMs, spawnImpl) {
144
- const statusResult = await runSubprocess("git", ["status", "-z", "--porcelain=v1", "--untracked-files=all", "--ignore-submodules=all"], { cwd: repoRoot, timeoutMs, spawnImpl });
188
+ if (!resolveGitRepositoryRoot(repoRoot)) {
189
+ return [];
190
+ }
191
+ const statusResult = await runSubprocess("git", ["status", "-z", "--porcelain=v1", "--untracked-files=all", "--ignore-submodules=all", "--", "."], { cwd: repoRoot, timeoutMs, spawnImpl });
145
192
  if (statusResult.exitCode !== 0) {
146
193
  return [];
147
194
  }
148
195
  return parsePorcelainEntries(statusResult.stdout).filter((entry) => typeof entry === "string" && entry.length > 0);
149
196
  }
197
+ export function resolveGitRepositoryRoot(workingDirectory) {
198
+ const resolvedWorkingDirectory = resolve(workingDirectory);
199
+ const cached = gitRepositoryRootCache.get(resolvedWorkingDirectory);
200
+ if (cached !== undefined) {
201
+ return cached ?? undefined;
202
+ }
203
+ const visited = [];
204
+ let current = resolvedWorkingDirectory;
205
+ while (true) {
206
+ visited.push(current);
207
+ const currentCached = gitRepositoryRootCache.get(current);
208
+ if (currentCached !== undefined) {
209
+ for (const candidate of visited) {
210
+ gitRepositoryRootCache.set(candidate, currentCached);
211
+ }
212
+ return currentCached ?? undefined;
213
+ }
214
+ if (existsSync(resolve(current, ".git"))) {
215
+ for (const candidate of visited) {
216
+ gitRepositoryRootCache.set(candidate, current);
217
+ }
218
+ return current;
219
+ }
220
+ const parent = dirname(current);
221
+ if (parent === current) {
222
+ for (const candidate of visited) {
223
+ gitRepositoryRootCache.set(candidate, null);
224
+ }
225
+ return undefined;
226
+ }
227
+ current = parent;
228
+ }
229
+ }
150
230
  export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
151
231
  if (preserveRawForInjectedSpawn || process.platform !== "win32") {
152
232
  return { command, args };
@@ -157,18 +237,16 @@ export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn)
157
237
  // Windows can resolve the command itself — this covers cases like `pnpm` where the npm global
158
238
  // bin directory is present in the shell PATH but not yet visible to this Node.js process.
159
239
  if (resolvedOrUndefined === undefined) {
160
- const cmdStr = [quoteWindowsCmdArg(command), ...args.map(quoteWindowsCmdArg)].join(" ");
161
240
  return {
162
241
  command: process.env.ComSpec || "cmd.exe",
163
- args: ["/d", "/c", cmdStr]
242
+ args: ["/d", "/c", command, ...args]
164
243
  };
165
244
  }
166
245
  const extension = extname(resolvedOrUndefined).toLowerCase();
167
246
  if (extension === ".cmd" || extension === ".bat") {
168
- const cmdStr = [quoteWindowsCmdArg(resolvedOrUndefined), ...args.map(quoteWindowsCmdArg)].join(" ");
169
247
  return {
170
248
  command: process.env.ComSpec || "cmd.exe",
171
- args: ["/d", "/s", "/c", cmdStr]
249
+ args: ["/d", "/c", resolvedOrUndefined, ...args]
172
250
  };
173
251
  }
174
252
  if (extension === ".ps1") {
@@ -240,16 +318,6 @@ function windowsPathDirectories() {
240
318
  .map((entry) => entry.trim().replace(/^"|"$/g, ""))
241
319
  .filter(Boolean);
242
320
  }
243
- function quoteWindowsCmdArg(value) {
244
- const normalized = value.replace(/\r?\n/gu, " ");
245
- const escaped = normalized
246
- .replace(/\^/gu, "^^")
247
- .replace(/"/gu, '^"')
248
- .replace(/%/gu, "%%")
249
- .replace(/!/gu, "^^!")
250
- .replace(/[&|<>()]/gu, (match) => `^${match}`);
251
- return `"${escaped}"`;
252
- }
253
321
  export function splitCommand(command) {
254
322
  const tokens = [];
255
323
  let current = "";