@martinloop/mcp 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/server.js +69 -7
- package/dist/tools/doctor.d.ts +27 -0
- package/dist/tools/doctor.js +39 -11
- package/dist/tools/get-run.d.ts +2 -1
- package/dist/tools/get-run.js +1 -0
- package/dist/tools/get-verification-results.d.ts +2 -1
- package/dist/tools/get-verification-results.js +1 -0
- package/dist/tools/plan.js +4 -2
- package/dist/tools/preflight.d.ts +27 -0
- package/dist/tools/preflight.js +44 -20
- package/dist/tools/run-dossier.d.ts +2 -1
- package/dist/tools/run-dossier.js +1 -0
- package/dist/tools/run-loop.d.ts +5 -1
- package/dist/tools/run-loop.js +20 -8
- package/dist/tools/run-store.js +67 -15
- package/dist/tools/tool-support.d.ts +2 -0
- package/dist/tools/tool-support.js +49 -13
- package/dist/tools/workflow-governance.d.ts +19 -3
- package/dist/tools/workflow-governance.js +107 -55
- package/dist/vendor/adapters/claude-cli.d.ts +20 -3
- package/dist/vendor/adapters/claude-cli.js +193 -33
- package/dist/vendor/adapters/cli-bridge.d.ts +45 -0
- package/dist/vendor/adapters/cli-bridge.js +107 -39
- package/dist/vendor/adapters/codex-launcher.d.ts +32 -0
- package/dist/vendor/adapters/codex-launcher.js +409 -118
- package/dist/vendor/adapters/openai-compatible.js +8 -2
- package/dist/vendor/adapters/runtime-support.js +1 -0
- package/dist/vendor/adapters/stub-direct-provider.js +3 -0
- package/dist/vendor/adapters/verifier-only.d.ts +2 -0
- package/dist/vendor/adapters/verifier-only.js +9 -3
- package/dist/vendor/core/context-integrity.js +28 -3
- package/dist/vendor/core/grounding.d.ts +1 -0
- package/dist/vendor/core/grounding.js +6 -2
- package/dist/vendor/core/index.d.ts +1 -0
- package/dist/vendor/core/index.js +25 -6
- package/dist/vendor/core/leash.js +85 -8
- package/dist/vendor/core/persistence/integrity.d.ts +1 -1
- package/dist/vendor/core/persistence/integrity.js +15 -6
- package/dist/workflow-state.d.ts +9 -0
- package/dist/workflow-state.js +46 -3
- package/package.json +2 -2
- package/server.json +2 -2
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* MCP tools and integration tests use the same factories.
|
|
13
13
|
*/
|
|
14
|
-
import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
|
|
14
|
+
import { readGitExecutionArtifacts, resolveGitRepositoryRoot, runSubprocess, runVerification } from "./cli-bridge.js";
|
|
15
|
+
import { buildCodexExecArgs } from "./codex-launcher.js";
|
|
15
16
|
import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
|
|
16
17
|
// ---------------------------------------------------------------------------
|
|
17
18
|
// Cost estimation
|
|
@@ -56,15 +57,21 @@ function extractUsage(parsed, modelLabel) {
|
|
|
56
57
|
const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
|
|
57
58
|
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
58
59
|
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
// Prefer Claude's own authoritative total_cost_usd (present on the final
|
|
61
|
+
// `result` event in json/stream-json output) over our pricing-table estimate,
|
|
62
|
+
// which can drift from real billed cost (cache discounts, surcharges, etc).
|
|
63
|
+
const hasAuthoritativeCost = typeof parsed.total_cost_usd === "number";
|
|
64
|
+
const actualUsd = hasAuthoritativeCost
|
|
65
|
+
? parsed.total_cost_usd
|
|
66
|
+
: (promptTokens / 1000) * pricing.inputPer1K +
|
|
67
|
+
(cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
|
|
68
|
+
(tokensOut / 1000) * pricing.outputPer1K;
|
|
62
69
|
return normalizeUsage({
|
|
63
70
|
actualUsd: Number(actualUsd.toFixed(6)),
|
|
64
71
|
tokensIn,
|
|
65
72
|
tokensOut,
|
|
66
73
|
cachedInputTokens,
|
|
67
|
-
provenance: "actual",
|
|
74
|
+
provenance: hasAuthoritativeCost ? "actual" : "estimated",
|
|
68
75
|
providerSettlement: {
|
|
69
76
|
providerId: "claude",
|
|
70
77
|
model: modelLabel ?? "claude",
|
|
@@ -232,6 +239,86 @@ function extractGeminiJsonResult(stdout, modelLabel) {
|
|
|
232
239
|
})
|
|
233
240
|
};
|
|
234
241
|
}
|
|
242
|
+
function createStreamingUsageInspector(capUsd, modelLabel) {
|
|
243
|
+
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
244
|
+
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
245
|
+
let buffer = "";
|
|
246
|
+
let cumulativeUsd = 0;
|
|
247
|
+
let tokensIn = 0;
|
|
248
|
+
let tokensOut = 0;
|
|
249
|
+
let turns = 0;
|
|
250
|
+
let finalResult;
|
|
251
|
+
const ingestLine = (line, terminate) => {
|
|
252
|
+
const trimmed = line.trim();
|
|
253
|
+
if (!trimmed) {
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
let event;
|
|
257
|
+
try {
|
|
258
|
+
event = JSON.parse(trimmed);
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
if (event.type === "assistant" && event.message?.usage) {
|
|
264
|
+
const usage = event.message.usage;
|
|
265
|
+
const turnTokensIn = (usage.input_tokens ?? usage.inputTokens ?? 0) +
|
|
266
|
+
(usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? 0) +
|
|
267
|
+
(usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? 0);
|
|
268
|
+
const turnTokensOut = usage.output_tokens ?? usage.outputTokens ?? 0;
|
|
269
|
+
tokensIn += turnTokensIn;
|
|
270
|
+
tokensOut += turnTokensOut;
|
|
271
|
+
turns += 1;
|
|
272
|
+
cumulativeUsd += (turnTokensIn / 1000) * pricing.inputPer1K + (turnTokensOut / 1000) * pricing.outputPer1K;
|
|
273
|
+
if (capUsd > 0 && cumulativeUsd > capUsd) {
|
|
274
|
+
terminate(`Streaming usage cap exceeded after ${String(turns)} turn(s): cumulative cost ~$${cumulativeUsd.toFixed(4)} ` +
|
|
275
|
+
`surpassed the per-attempt cap $${capUsd.toFixed(4)} (derived from remaining loop budget). ` +
|
|
276
|
+
`Subprocess terminated to bound runaway overspend.`);
|
|
277
|
+
}
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
if (event.type === "result") {
|
|
281
|
+
finalResult = event;
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
return {
|
|
285
|
+
onChunk: (chunk, terminate) => {
|
|
286
|
+
buffer += chunk.toString("utf8");
|
|
287
|
+
let newlineIndex = buffer.indexOf("\n");
|
|
288
|
+
while (newlineIndex !== -1) {
|
|
289
|
+
const line = buffer.slice(0, newlineIndex);
|
|
290
|
+
buffer = buffer.slice(newlineIndex + 1);
|
|
291
|
+
ingestLine(line, terminate);
|
|
292
|
+
newlineIndex = buffer.indexOf("\n");
|
|
293
|
+
}
|
|
294
|
+
},
|
|
295
|
+
snapshot: () => ({ cumulativeUsd, tokensIn, tokensOut, turns, ...(finalResult ? { finalResult } : {}) })
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Parses Claude's `stream-json` output (one JSON object per line) and returns
|
|
300
|
+
* the final `result` event, which carries the same `result`/`usage`/
|
|
301
|
+
* `total_cost_usd` fields as the single-blob `json` format.
|
|
302
|
+
*/
|
|
303
|
+
function parseStreamJsonResult(stdout) {
|
|
304
|
+
let lastResult;
|
|
305
|
+
for (const rawLine of stdout.split(/\r?\n/u)) {
|
|
306
|
+
const line = rawLine.trim();
|
|
307
|
+
if (!line) {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
try {
|
|
311
|
+
const event = JSON.parse(line);
|
|
312
|
+
if (event.type === "result") {
|
|
313
|
+
lastResult = event;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
// Ignore non-JSON / partial lines.
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return lastResult;
|
|
321
|
+
}
|
|
235
322
|
// ---------------------------------------------------------------------------
|
|
236
323
|
// Structural failure hint detection
|
|
237
324
|
//
|
|
@@ -306,12 +393,45 @@ export function createAgentCliAdapter(options) {
|
|
|
306
393
|
}
|
|
307
394
|
const args = options.argsBuilder(prompt);
|
|
308
395
|
const stdinData = options.stdinBuilder?.(prompt);
|
|
396
|
+
// Live cumulative-cost circuit breaker: a single attempt should never be
|
|
397
|
+
// allowed to spend more than the loop has left. `--output-format json`
|
|
398
|
+
// only reports usage once the process exits, so for `stream-json` we
|
|
399
|
+
// watch per-turn usage events as they arrive and kill the subprocess the
|
|
400
|
+
// instant projected spend crosses what remains — bounding the worst case
|
|
401
|
+
// to roughly one turn's overshoot rather than the entire runaway session.
|
|
402
|
+
const streamingUsage = options.streamingUsageCap && request.context.remainingBudgetUsd > 0
|
|
403
|
+
? createStreamingUsageInspector(request.context.remainingBudgetUsd, options.model ?? options.command)
|
|
404
|
+
: undefined;
|
|
309
405
|
const agentResult = await runSubprocess(options.command, args, {
|
|
310
406
|
cwd: workingDirectory,
|
|
311
407
|
timeoutMs,
|
|
312
408
|
spawnImpl: options.spawnImpl,
|
|
313
|
-
...(stdinData === undefined ? {} : { stdinData })
|
|
409
|
+
...(stdinData === undefined ? {} : { stdinData }),
|
|
410
|
+
...(streamingUsage ? { onStdoutChunk: streamingUsage.onChunk } : {})
|
|
314
411
|
});
|
|
412
|
+
if (agentResult.terminationReason) {
|
|
413
|
+
const snapshot = streamingUsage?.snapshot();
|
|
414
|
+
const cumulativeUsd = snapshot?.cumulativeUsd ?? 0;
|
|
415
|
+
return {
|
|
416
|
+
status: "failed",
|
|
417
|
+
summary: `${options.command} subprocess terminated mid-run by the budget circuit breaker. ${agentResult.terminationReason}`,
|
|
418
|
+
usage: normalizeUsage({
|
|
419
|
+
actualUsd: Number(cumulativeUsd.toFixed(6)),
|
|
420
|
+
estimatedUsd: Number(cumulativeUsd.toFixed(6)),
|
|
421
|
+
tokensIn: snapshot?.tokensIn ?? 0,
|
|
422
|
+
tokensOut: snapshot?.tokensOut ?? 0,
|
|
423
|
+
provenance: "estimated"
|
|
424
|
+
}),
|
|
425
|
+
verification: {
|
|
426
|
+
passed: false,
|
|
427
|
+
summary: "Subprocess terminated by the streaming budget circuit breaker before verification could run."
|
|
428
|
+
},
|
|
429
|
+
failure: {
|
|
430
|
+
message: agentResult.terminationReason,
|
|
431
|
+
classHint: "budget_pressure"
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
}
|
|
315
435
|
if (agentResult.timedOut) {
|
|
316
436
|
return {
|
|
317
437
|
status: "failed",
|
|
@@ -346,11 +466,15 @@ export function createAgentCliAdapter(options) {
|
|
|
346
466
|
}
|
|
347
467
|
};
|
|
348
468
|
}
|
|
349
|
-
// Parse JSON output if the CLI supports it
|
|
469
|
+
// Parse JSON output if the CLI supports it. `stream-json` emits one JSON
|
|
470
|
+
// object per line — the final `result` event carries the same
|
|
471
|
+
// `result`/`usage`/`total_cost_usd` fields as single-blob `json` output.
|
|
350
472
|
let parsed;
|
|
351
473
|
if (supportsJsonOutput) {
|
|
352
474
|
try {
|
|
353
|
-
parsed =
|
|
475
|
+
parsed = options.streamingUsageCap
|
|
476
|
+
? parseStreamJsonResult(agentResult.stdout)
|
|
477
|
+
: JSON.parse(agentResult.stdout);
|
|
354
478
|
}
|
|
355
479
|
catch {
|
|
356
480
|
// Fall through to plain-text handling
|
|
@@ -362,6 +486,26 @@ export function createAgentCliAdapter(options) {
|
|
|
362
486
|
const geminiJsonResult = !supportsJsonOutput && options.command === "gemini"
|
|
363
487
|
? extractGeminiJsonResult(agentResult.stdout, options.model)
|
|
364
488
|
: undefined;
|
|
489
|
+
const producedStructuredCompletion = parsed?.result !== undefined ||
|
|
490
|
+
codexJsonlResult !== undefined ||
|
|
491
|
+
geminiJsonResult !== undefined;
|
|
492
|
+
if (agentResult.exitCode !== 0 && !producedStructuredCompletion) {
|
|
493
|
+
const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr || agentResult.stdout, agentResult.exitCode);
|
|
494
|
+
return {
|
|
495
|
+
status: "failed",
|
|
496
|
+
summary: `${options.command} subprocess exited before verifier execution.`,
|
|
497
|
+
usage: normalizeUsage({
|
|
498
|
+
actualUsd: 0,
|
|
499
|
+
tokensIn: 0,
|
|
500
|
+
tokensOut: 0,
|
|
501
|
+
provenance: "unavailable"
|
|
502
|
+
}),
|
|
503
|
+
verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
|
|
504
|
+
failure: {
|
|
505
|
+
message: failureMessage
|
|
506
|
+
}
|
|
507
|
+
};
|
|
508
|
+
}
|
|
365
509
|
const agentText = codexJsonlResult?.summary ??
|
|
366
510
|
geminiJsonResult?.summary ??
|
|
367
511
|
parsed?.result ??
|
|
@@ -405,21 +549,22 @@ export function createAgentCliAdapter(options) {
|
|
|
405
549
|
const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
|
|
406
550
|
// Check for zero-diff (agent ran but made no file changes)
|
|
407
551
|
const repoRoot = request.context.repoRoot;
|
|
552
|
+
const gitRepoRoot = repoRoot ? resolveGitRepositoryRoot(repoRoot) : undefined;
|
|
408
553
|
let noDiff = false;
|
|
409
|
-
if (
|
|
410
|
-
noDiff = await checkNoDiff(
|
|
554
|
+
if (gitRepoRoot) {
|
|
555
|
+
noDiff = await checkNoDiff(gitRepoRoot, options.spawnImpl);
|
|
411
556
|
}
|
|
412
557
|
// Extract structured errors from stderr/stdout for better failure context
|
|
413
558
|
const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
|
|
414
|
-
const executionArtifacts =
|
|
415
|
-
? await readGitExecutionArtifacts(
|
|
559
|
+
const executionArtifacts = gitRepoRoot
|
|
560
|
+
? await readGitExecutionArtifacts(gitRepoRoot, 5000, options.spawnImpl)
|
|
416
561
|
: undefined;
|
|
417
562
|
// Scope contract enforcement: check touched files against allowedPaths/deniedPaths
|
|
418
563
|
let scopeViolations = [];
|
|
419
564
|
const scopeCtx = request.context;
|
|
420
|
-
if (
|
|
565
|
+
if (gitRepoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
|
|
421
566
|
const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
|
|
422
|
-
cwd:
|
|
567
|
+
cwd: gitRepoRoot,
|
|
423
568
|
timeoutMs: 5000,
|
|
424
569
|
spawnImpl: options.spawnImpl
|
|
425
570
|
});
|
|
@@ -492,7 +637,12 @@ export function createAgentCliAdapter(options) {
|
|
|
492
637
|
}
|
|
493
638
|
// Reset tracked files to HEAD so next attempt starts from clean state
|
|
494
639
|
try {
|
|
495
|
-
|
|
640
|
+
if (gitRepoRoot) {
|
|
641
|
+
await runSubprocess("git", ["restore", "--staged", "--worktree", "."], {
|
|
642
|
+
cwd: gitRepoRoot,
|
|
643
|
+
timeoutMs: 5000
|
|
644
|
+
});
|
|
645
|
+
}
|
|
496
646
|
}
|
|
497
647
|
catch {
|
|
498
648
|
// Non-fatal
|
|
@@ -540,10 +690,16 @@ export function createAgentCliAdapter(options) {
|
|
|
540
690
|
// Pre-configured: Claude CLI
|
|
541
691
|
// ---------------------------------------------------------------------------
|
|
542
692
|
/**
|
|
543
|
-
* Spawns `claude --output-format json --print "<prompt>"
|
|
693
|
+
* Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
|
|
544
694
|
*
|
|
545
|
-
*
|
|
546
|
-
*
|
|
695
|
+
* `stream-json` emits one JSON event per line — including per-turn usage on
|
|
696
|
+
* each `assistant` message and a final `result` event carrying the same
|
|
697
|
+
* `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
|
|
698
|
+
* MartinLoop can both (a) recover real token usage/cost as before, and
|
|
699
|
+
* (b) watch cumulative spend live and self-terminate the subprocess the
|
|
700
|
+
* moment it crosses the remaining per-attempt budget (see
|
|
701
|
+
* `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
|
|
702
|
+
* discovering an overspend after the whole process has already exited.
|
|
547
703
|
*
|
|
548
704
|
* Requires the Claude Code CLI to be installed and authenticated:
|
|
549
705
|
* https://docs.anthropic.com/claude-code
|
|
@@ -560,10 +716,12 @@ export function createClaudeCliAdapter(options = {}) {
|
|
|
560
716
|
timeoutMs: options.timeoutMs,
|
|
561
717
|
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
562
718
|
supportsJsonOutput: true,
|
|
719
|
+
streamingUsageCap: true,
|
|
563
720
|
spawnImpl: options.spawnImpl,
|
|
564
721
|
argsBuilder: (_prompt) => [
|
|
565
722
|
"--output-format",
|
|
566
|
-
"json",
|
|
723
|
+
"stream-json",
|
|
724
|
+
"--verbose",
|
|
567
725
|
"--print",
|
|
568
726
|
"--dangerously-skip-permissions",
|
|
569
727
|
...modelArgs,
|
|
@@ -586,12 +744,12 @@ export function createClaudeCliAdapter(options = {}) {
|
|
|
586
744
|
* npm install -g @openai/codex
|
|
587
745
|
*/
|
|
588
746
|
export function createCodexCliAdapter(options = {}) {
|
|
589
|
-
const modelArgs = options.model ? ["--model", options.model] : [];
|
|
590
747
|
const extraArgs = options.extraArgs ?? [];
|
|
591
748
|
const sandbox = options.sandbox ?? "workspace-write";
|
|
592
749
|
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
750
|
+
const command = options.command ?? "codex";
|
|
593
751
|
return createAgentCliAdapter({
|
|
594
|
-
command
|
|
752
|
+
command,
|
|
595
753
|
adapterIdSuffix: "codex",
|
|
596
754
|
model: options.model ?? "codex",
|
|
597
755
|
label: options.label ?? "Codex CLI adapter",
|
|
@@ -600,19 +758,13 @@ export function createCodexCliAdapter(options = {}) {
|
|
|
600
758
|
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
601
759
|
supportsJsonOutput: false,
|
|
602
760
|
spawnImpl: options.spawnImpl,
|
|
603
|
-
argsBuilder: () =>
|
|
604
|
-
"exec",
|
|
605
|
-
"--cd",
|
|
761
|
+
argsBuilder: () => buildCodexExecArgs({
|
|
606
762
|
workingDirectory,
|
|
607
|
-
"--sandbox",
|
|
608
763
|
sandbox,
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
"
|
|
612
|
-
|
|
613
|
-
...extraArgs,
|
|
614
|
-
"-"
|
|
615
|
-
],
|
|
764
|
+
...(options.model ? { model: options.model } : {}),
|
|
765
|
+
extraArgs,
|
|
766
|
+
mode: "prompt"
|
|
767
|
+
}),
|
|
616
768
|
stdinBuilder: (prompt) => prompt
|
|
617
769
|
});
|
|
618
770
|
}
|
|
@@ -815,7 +967,15 @@ function redactSecretsForPrompt(input) {
|
|
|
815
967
|
return input
|
|
816
968
|
.replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
|
|
817
969
|
.replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
|
|
818
|
-
.replace(/\bghp_[A-Za-z0-9_]{
|
|
970
|
+
.replace(/\bghp_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
|
|
971
|
+
.replace(/\bgithub_pat_[A-Za-z0-9_]{20,}\b/gu, "[REDACTED_SECRET]")
|
|
972
|
+
.replace(/\b(?:gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
|
|
973
|
+
.replace(/\bAKIA[0-9A-Z]{16}\b/gu, "[REDACTED_SECRET]")
|
|
974
|
+
.replace(/\b(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*[^\s"'`]+/giu, "AWS_SECRET_ACCESS_KEY=[REDACTED_SECRET]")
|
|
975
|
+
.replace(/\bxox[baprs]-[A-Za-z0-9-]{10,}\b/giu, "[REDACTED_SECRET]")
|
|
976
|
+
.replace(/\bAIza[0-9A-Za-z_-]{30,}\b/gu, "[REDACTED_SECRET]")
|
|
977
|
+
.replace(/-----BEGIN(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----[\s\S]*?-----END(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----/gu, "[REDACTED_SECRET]")
|
|
978
|
+
.replace(/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/gu, "[REDACTED_SECRET]")
|
|
819
979
|
.replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
|
|
820
980
|
}
|
|
821
981
|
function extractStructuredErrors(stderr, stdout) {
|
|
@@ -6,16 +6,60 @@ export interface SubprocessResult {
|
|
|
6
6
|
stdout: string;
|
|
7
7
|
stderr: string;
|
|
8
8
|
timedOut: boolean;
|
|
9
|
+
/**
|
|
10
|
+
* True when the subprocess was terminated early because its combined
|
|
11
|
+
* stdout+stderr exceeded `maxOutputBytes` — a circuit breaker against
|
|
12
|
+
* runaway agent sessions that would otherwise burn far more cost/tokens
|
|
13
|
+
* than the loop budget allows before MartinLoop can observe the final
|
|
14
|
+
* (post-hoc) usage report. See `claude-cli.ts` execute() for how this
|
|
15
|
+
* cap is derived from the remaining loop budget.
|
|
16
|
+
*/
|
|
17
|
+
outputCapped: boolean;
|
|
18
|
+
/**
|
|
19
|
+
* Set to the inspector's reason string when an `onStdoutChunk` callback
|
|
20
|
+
* requested early termination (e.g. a streaming usage/cost circuit breaker
|
|
21
|
+
* that detected the agent is on track to blow through its budget). Distinct
|
|
22
|
+
* from `outputCapped`, which fires on raw byte volume rather than parsed
|
|
23
|
+
* semantic content.
|
|
24
|
+
*/
|
|
25
|
+
terminationReason?: string;
|
|
26
|
+
launched: boolean;
|
|
9
27
|
}
|
|
10
28
|
export interface VerificationOutcome {
|
|
11
29
|
passed: boolean;
|
|
12
30
|
summary: string;
|
|
31
|
+
steps: VerificationStepOutcome[];
|
|
32
|
+
warnings?: string[];
|
|
33
|
+
}
|
|
34
|
+
export interface VerificationStepOutcome {
|
|
35
|
+
command: string;
|
|
36
|
+
launched: boolean;
|
|
37
|
+
exitCode?: number;
|
|
38
|
+
timedOut: boolean;
|
|
39
|
+
fastFail: boolean;
|
|
40
|
+
detail?: string;
|
|
13
41
|
}
|
|
14
42
|
export declare function runSubprocess(command: string, args: string[], options: {
|
|
15
43
|
cwd: string;
|
|
16
44
|
timeoutMs: number;
|
|
17
45
|
spawnImpl?: SpawnLike;
|
|
18
46
|
stdinData?: string;
|
|
47
|
+
/**
|
|
48
|
+
* Optional circuit breaker: terminate the subprocess once combined
|
|
49
|
+
* stdout+stderr bytes exceed this threshold, instead of waiting for
|
|
50
|
+
* natural completion. Used to bound runaway agent-CLI cost/token spend
|
|
51
|
+
* that can't otherwise be observed until the process exits.
|
|
52
|
+
*/
|
|
53
|
+
maxOutputBytes?: number;
|
|
54
|
+
/**
|
|
55
|
+
* Optional semantic inspector invoked with each raw stdout chunk. Used to
|
|
56
|
+
* parse streaming structured output (e.g. Claude's `stream-json` usage
|
|
57
|
+
* events) and request early termination via the supplied `terminate`
|
|
58
|
+
* callback once a semantic threshold (such as cumulative cost) is
|
|
59
|
+
* crossed — well before the subprocess would exit naturally and report
|
|
60
|
+
* a runaway final usage figure.
|
|
61
|
+
*/
|
|
62
|
+
onStdoutChunk?: (chunk: Buffer, terminate: (reason: string) => void) => void;
|
|
19
63
|
}): Promise<SubprocessResult>;
|
|
20
64
|
export declare function runVerification(commands: string[], cwd: string, timeoutMs: number, verificationStack?: Array<{
|
|
21
65
|
command: string;
|
|
@@ -27,6 +71,7 @@ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: n
|
|
|
27
71
|
diffStats?: ReturnType<typeof diffStatsFromNumstat>;
|
|
28
72
|
}>;
|
|
29
73
|
export declare function readGitChangedFiles(repoRoot: string, timeoutMs: number, spawnImpl?: SpawnLike): Promise<string[]>;
|
|
74
|
+
export declare function resolveGitRepositoryRoot(workingDirectory: string): string | undefined;
|
|
30
75
|
export interface SpawnPlan {
|
|
31
76
|
command: string;
|
|
32
77
|
args: string[];
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
|
|
2
|
+
import { delimiter, dirname, extname, isAbsolute, join, resolve } from "node:path";
|
|
3
3
|
import { existsSync } from "node:fs";
|
|
4
4
|
import { diffStatsFromNumstat } from "./runtime-support.js";
|
|
5
|
+
const gitRepositoryRootCache = new Map();
|
|
5
6
|
export async function runSubprocess(command, args, options) {
|
|
6
7
|
return new Promise((resolve) => {
|
|
7
8
|
let timedOut = false;
|
|
9
|
+
let outputCapped = false;
|
|
10
|
+
let terminationReason;
|
|
8
11
|
let settled = false;
|
|
12
|
+
let outputBytes = 0;
|
|
9
13
|
const stdoutChunks = [];
|
|
10
14
|
const stderrChunks = [];
|
|
11
15
|
const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
|
|
@@ -14,7 +18,7 @@ export async function runSubprocess(command, args, options) {
|
|
|
14
18
|
return;
|
|
15
19
|
}
|
|
16
20
|
settled = true;
|
|
17
|
-
resolve(result);
|
|
21
|
+
resolve({ ...result, timedOut, outputCapped, ...(terminationReason ? { terminationReason } : {}) });
|
|
18
22
|
};
|
|
19
23
|
let proc;
|
|
20
24
|
try {
|
|
@@ -27,19 +31,33 @@ export async function runSubprocess(command, args, options) {
|
|
|
27
31
|
}
|
|
28
32
|
catch (error) {
|
|
29
33
|
const message = error instanceof Error ? error.message : String(error);
|
|
30
|
-
resolveOnce({
|
|
31
|
-
exitCode: 1,
|
|
32
|
-
stdout: "",
|
|
33
|
-
stderr: message,
|
|
34
|
-
timedOut: false
|
|
35
|
-
});
|
|
34
|
+
resolveOnce({ exitCode: 1, stdout: "", stderr: message, launched: false });
|
|
36
35
|
return;
|
|
37
36
|
}
|
|
37
|
+
const trackOutput = (chunks, chunk) => {
|
|
38
|
+
chunks.push(chunk);
|
|
39
|
+
outputBytes += chunk.byteLength;
|
|
40
|
+
if (options.maxOutputBytes !== undefined &&
|
|
41
|
+
!outputCapped &&
|
|
42
|
+
!timedOut &&
|
|
43
|
+
outputBytes > options.maxOutputBytes) {
|
|
44
|
+
outputCapped = true;
|
|
45
|
+
proc.kill("SIGTERM");
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
const terminateEarly = (reason) => {
|
|
49
|
+
if (terminationReason || timedOut || outputCapped) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
terminationReason = reason;
|
|
53
|
+
proc.kill("SIGTERM");
|
|
54
|
+
};
|
|
38
55
|
proc.stdout?.on("data", (chunk) => {
|
|
39
|
-
stdoutChunks
|
|
56
|
+
trackOutput(stdoutChunks, chunk);
|
|
57
|
+
options.onStdoutChunk?.(chunk, terminateEarly);
|
|
40
58
|
});
|
|
41
59
|
proc.stderr?.on("data", (chunk) => {
|
|
42
|
-
stderrChunks
|
|
60
|
+
trackOutput(stderrChunks, chunk);
|
|
43
61
|
});
|
|
44
62
|
proc.stdin?.on("error", (error) => {
|
|
45
63
|
// Some CLIs exit before consuming stdin in tests and on fast-fail paths.
|
|
@@ -55,12 +73,7 @@ export async function runSubprocess(command, args, options) {
|
|
|
55
73
|
}, options.timeoutMs);
|
|
56
74
|
proc.on("error", (error) => {
|
|
57
75
|
clearTimeout(timer);
|
|
58
|
-
resolveOnce({
|
|
59
|
-
exitCode: 1,
|
|
60
|
-
stdout: "",
|
|
61
|
-
stderr: error.message,
|
|
62
|
-
timedOut: false
|
|
63
|
-
});
|
|
76
|
+
resolveOnce({ exitCode: 1, stdout: "", stderr: error.message, launched: false });
|
|
64
77
|
});
|
|
65
78
|
proc.on("close", (code) => {
|
|
66
79
|
clearTimeout(timer);
|
|
@@ -68,7 +81,7 @@ export async function runSubprocess(command, args, options) {
|
|
|
68
81
|
exitCode: code ?? 1,
|
|
69
82
|
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
70
83
|
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
71
|
-
|
|
84
|
+
launched: true
|
|
72
85
|
});
|
|
73
86
|
});
|
|
74
87
|
if (options.stdinData !== undefined && proc.stdin) {
|
|
@@ -83,7 +96,7 @@ export async function runSubprocess(command, args, options) {
|
|
|
83
96
|
exitCode: 1,
|
|
84
97
|
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
85
98
|
stderr: stdinError.message,
|
|
86
|
-
|
|
99
|
+
launched: false
|
|
87
100
|
});
|
|
88
101
|
}
|
|
89
102
|
}
|
|
@@ -98,9 +111,11 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
|
|
|
98
111
|
}))
|
|
99
112
|
: commands.map((command) => ({ command, fastFail: true }));
|
|
100
113
|
if (steps.length === 0) {
|
|
101
|
-
return { passed: true, summary: "No verification commands specified." };
|
|
114
|
+
return { passed: true, summary: "No verification commands specified.", steps: [] };
|
|
102
115
|
}
|
|
103
116
|
const failedSteps = [];
|
|
117
|
+
const stepOutcomes = [];
|
|
118
|
+
const warnings = [];
|
|
104
119
|
for (const step of steps) {
|
|
105
120
|
const parts = splitCommand(step.command);
|
|
106
121
|
const [bin, ...args] = parts;
|
|
@@ -108,24 +123,53 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
|
|
|
108
123
|
continue;
|
|
109
124
|
}
|
|
110
125
|
const result = await runSubprocess(bin, args, { cwd, timeoutMs, spawnImpl });
|
|
126
|
+
const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
|
|
127
|
+
stepOutcomes.push({
|
|
128
|
+
command: step.command,
|
|
129
|
+
launched: result.launched,
|
|
130
|
+
exitCode: result.exitCode,
|
|
131
|
+
timedOut: result.timedOut,
|
|
132
|
+
fastFail: step.fastFail,
|
|
133
|
+
...(detail ? { detail } : {})
|
|
134
|
+
});
|
|
111
135
|
if (result.timedOut) {
|
|
112
|
-
return {
|
|
136
|
+
return {
|
|
137
|
+
passed: false,
|
|
138
|
+
summary: `Verification timed out: ${step.command}`,
|
|
139
|
+
steps: stepOutcomes,
|
|
140
|
+
...(warnings.length ? { warnings } : {})
|
|
141
|
+
};
|
|
113
142
|
}
|
|
114
143
|
if (result.exitCode !== 0) {
|
|
115
|
-
const detail = truncate(result.stderr.trim() || result.stdout.trim(), 500);
|
|
116
144
|
const summary = `Verification failed: ${step.command}\n${detail}`;
|
|
145
|
+
if (!result.launched) {
|
|
146
|
+
warnings.push(`Verifier never launched: ${step.command}`);
|
|
147
|
+
}
|
|
117
148
|
if (step.fastFail) {
|
|
118
|
-
return { passed: false, summary };
|
|
149
|
+
return { passed: false, summary, steps: stepOutcomes, ...(warnings.length ? { warnings } : {}) };
|
|
119
150
|
}
|
|
120
151
|
failedSteps.push(step.command);
|
|
121
152
|
}
|
|
122
153
|
}
|
|
123
154
|
if (failedSteps.length > 0) {
|
|
124
|
-
return {
|
|
155
|
+
return {
|
|
156
|
+
passed: false,
|
|
157
|
+
summary: `Failed steps: ${failedSteps.join(", ")}`,
|
|
158
|
+
steps: stepOutcomes,
|
|
159
|
+
...(warnings.length ? { warnings } : {})
|
|
160
|
+
};
|
|
125
161
|
}
|
|
126
|
-
return {
|
|
162
|
+
return {
|
|
163
|
+
passed: true,
|
|
164
|
+
summary: `All ${String(steps.length)} verification step(s) passed.`,
|
|
165
|
+
steps: stepOutcomes,
|
|
166
|
+
...(warnings.length ? { warnings } : {})
|
|
167
|
+
};
|
|
127
168
|
}
|
|
128
169
|
export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl) {
|
|
170
|
+
if (!resolveGitRepositoryRoot(repoRoot)) {
|
|
171
|
+
return {};
|
|
172
|
+
}
|
|
129
173
|
const changedFilesResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
|
|
130
174
|
const numstatResult = await runSubprocess("git", ["diff", "--numstat", "HEAD"], { cwd: repoRoot, timeoutMs, spawnImpl });
|
|
131
175
|
const changedFiles = changedFilesResult.exitCode === 0
|
|
@@ -141,12 +185,48 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
|
|
|
141
185
|
};
|
|
142
186
|
}
|
|
143
187
|
export async function readGitChangedFiles(repoRoot, timeoutMs, spawnImpl) {
|
|
144
|
-
|
|
188
|
+
if (!resolveGitRepositoryRoot(repoRoot)) {
|
|
189
|
+
return [];
|
|
190
|
+
}
|
|
191
|
+
const statusResult = await runSubprocess("git", ["status", "-z", "--porcelain=v1", "--untracked-files=all", "--ignore-submodules=all", "--", "."], { cwd: repoRoot, timeoutMs, spawnImpl });
|
|
145
192
|
if (statusResult.exitCode !== 0) {
|
|
146
193
|
return [];
|
|
147
194
|
}
|
|
148
195
|
return parsePorcelainEntries(statusResult.stdout).filter((entry) => typeof entry === "string" && entry.length > 0);
|
|
149
196
|
}
|
|
197
|
+
export function resolveGitRepositoryRoot(workingDirectory) {
|
|
198
|
+
const resolvedWorkingDirectory = resolve(workingDirectory);
|
|
199
|
+
const cached = gitRepositoryRootCache.get(resolvedWorkingDirectory);
|
|
200
|
+
if (cached !== undefined) {
|
|
201
|
+
return cached ?? undefined;
|
|
202
|
+
}
|
|
203
|
+
const visited = [];
|
|
204
|
+
let current = resolvedWorkingDirectory;
|
|
205
|
+
while (true) {
|
|
206
|
+
visited.push(current);
|
|
207
|
+
const currentCached = gitRepositoryRootCache.get(current);
|
|
208
|
+
if (currentCached !== undefined) {
|
|
209
|
+
for (const candidate of visited) {
|
|
210
|
+
gitRepositoryRootCache.set(candidate, currentCached);
|
|
211
|
+
}
|
|
212
|
+
return currentCached ?? undefined;
|
|
213
|
+
}
|
|
214
|
+
if (existsSync(resolve(current, ".git"))) {
|
|
215
|
+
for (const candidate of visited) {
|
|
216
|
+
gitRepositoryRootCache.set(candidate, current);
|
|
217
|
+
}
|
|
218
|
+
return current;
|
|
219
|
+
}
|
|
220
|
+
const parent = dirname(current);
|
|
221
|
+
if (parent === current) {
|
|
222
|
+
for (const candidate of visited) {
|
|
223
|
+
gitRepositoryRootCache.set(candidate, null);
|
|
224
|
+
}
|
|
225
|
+
return undefined;
|
|
226
|
+
}
|
|
227
|
+
current = parent;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
150
230
|
export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
|
|
151
231
|
if (preserveRawForInjectedSpawn || process.platform !== "win32") {
|
|
152
232
|
return { command, args };
|
|
@@ -157,18 +237,16 @@ export function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn)
|
|
|
157
237
|
// Windows can resolve the command itself — this covers cases like `pnpm` where the npm global
|
|
158
238
|
// bin directory is present in the shell PATH but not yet visible to this Node.js process.
|
|
159
239
|
if (resolvedOrUndefined === undefined) {
|
|
160
|
-
const cmdStr = [quoteWindowsCmdArg(command), ...args.map(quoteWindowsCmdArg)].join(" ");
|
|
161
240
|
return {
|
|
162
241
|
command: process.env.ComSpec || "cmd.exe",
|
|
163
|
-
args: ["/d", "/c",
|
|
242
|
+
args: ["/d", "/c", command, ...args]
|
|
164
243
|
};
|
|
165
244
|
}
|
|
166
245
|
const extension = extname(resolvedOrUndefined).toLowerCase();
|
|
167
246
|
if (extension === ".cmd" || extension === ".bat") {
|
|
168
|
-
const cmdStr = [quoteWindowsCmdArg(resolvedOrUndefined), ...args.map(quoteWindowsCmdArg)].join(" ");
|
|
169
247
|
return {
|
|
170
248
|
command: process.env.ComSpec || "cmd.exe",
|
|
171
|
-
args: ["/d", "/
|
|
249
|
+
args: ["/d", "/c", resolvedOrUndefined, ...args]
|
|
172
250
|
};
|
|
173
251
|
}
|
|
174
252
|
if (extension === ".ps1") {
|
|
@@ -240,16 +318,6 @@ function windowsPathDirectories() {
|
|
|
240
318
|
.map((entry) => entry.trim().replace(/^"|"$/g, ""))
|
|
241
319
|
.filter(Boolean);
|
|
242
320
|
}
|
|
243
|
-
function quoteWindowsCmdArg(value) {
|
|
244
|
-
const normalized = value.replace(/\r?\n/gu, " ");
|
|
245
|
-
const escaped = normalized
|
|
246
|
-
.replace(/\^/gu, "^^")
|
|
247
|
-
.replace(/"/gu, '^"')
|
|
248
|
-
.replace(/%/gu, "%%")
|
|
249
|
-
.replace(/!/gu, "^^!")
|
|
250
|
-
.replace(/[&|<>()]/gu, (match) => `^${match}`);
|
|
251
|
-
return `"${escaped}"`;
|
|
252
|
-
}
|
|
253
321
|
export function splitCommand(command) {
|
|
254
322
|
const tokens = [];
|
|
255
323
|
let current = "";
|