martin-loop 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +398 -362
- package/demo/seeded-workspace/README.md +35 -0
- package/demo/seeded-workspace/TASKS.md +29 -0
- package/demo/seeded-workspace/martin.config.yaml +11 -0
- package/demo/seeded-workspace/package.json +8 -0
- package/demo/seeded-workspace/src/invoice-summary.js +11 -0
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/cli/index.d.ts +6 -1
- package/dist/vendor/cli/index.js +124 -7
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/index.d.ts +5 -2
- package/dist/vendor/core/index.js +186 -54
- package/dist/vendor/core/policy.d.ts +6 -0
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
- package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
- package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
- package/docs/oss/EXAMPLES.md +134 -126
- package/docs/oss/OSS-BOUNDARY-REPORT.json +109 -113
- package/docs/oss/OSS-BOUNDARY-REPORT.md +48 -48
- package/docs/oss/QUICKSTART.md +165 -135
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
- package/docs/oss/README.md +96 -93
- package/docs/oss/RELEASE-SURFACE-REPORT.json +45 -45
- package/docs/oss/RELEASE-SURFACE-REPORT.md +35 -35
- package/package.json +19 -11
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# MartinLoop Demo Sandbox
|
|
2
|
+
|
|
3
|
+
This workspace is the safe public demo copied by `martin-loop demo`.
|
|
4
|
+
|
|
5
|
+
It is intentionally small:
|
|
6
|
+
|
|
7
|
+
- `npm test` is green out of the box
|
|
8
|
+
- `martin.config.yaml` keeps the budget tiny
|
|
9
|
+
- the first suggested MartinLoop run can stay in stub mode with `MARTIN_LIVE=false`
|
|
10
|
+
|
|
11
|
+
## Files
|
|
12
|
+
|
|
13
|
+
- `src/invoice-summary.js`: tiny module used by the demo task
|
|
14
|
+
- `test/invoice-summary.test.js`: Node test suite
|
|
15
|
+
- `TASKS.md`: suggested objectives for a stub-safe run or a live adapter run
|
|
16
|
+
- `martin.config.yaml`: low-risk governance defaults
|
|
17
|
+
|
|
18
|
+
## Suggested flow
|
|
19
|
+
|
|
20
|
+
```sh
|
|
21
|
+
npm install
|
|
22
|
+
npm test
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Safe first run:
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
MARTIN_LIVE=false npx martin-loop run "Summarize the demo workspace and confirm the verifier is green" --verify "npm test"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Optional live run:
|
|
32
|
+
|
|
33
|
+
```sh
|
|
34
|
+
npx martin-loop run "Add support for a discount percentage to summarizeInvoice and update the tests" --verify "npm test" --engine codex
|
|
35
|
+
```
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Suggested Demo Tasks
|
|
2
|
+
|
|
3
|
+
## Stub-safe first run
|
|
4
|
+
|
|
5
|
+
Use this when you want to see MartinLoop create a governed run record without spending provider budget:
|
|
6
|
+
|
|
7
|
+
```text
|
|
8
|
+
Summarize the demo workspace, confirm the verifier command is green, and explain the safest next change to make.
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Verifier:
|
|
12
|
+
|
|
13
|
+
```sh
|
|
14
|
+
npm test
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Optional live run
|
|
18
|
+
|
|
19
|
+
Use this when you want a real coding task in the sandbox:
|
|
20
|
+
|
|
21
|
+
```text
|
|
22
|
+
Add support for a discount percentage to summarizeInvoice and update the tests while keeping the existing tax behavior intact.
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Verifier:
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
npm test
|
|
29
|
+
```
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function summarizeInvoice(items, taxRate = 0) {
|
|
2
|
+
const subtotal = items.reduce((sum, item) => sum + item.quantity * item.unitPrice, 0);
|
|
3
|
+
const tax = Number((subtotal * taxRate).toFixed(2));
|
|
4
|
+
const total = Number((subtotal + tax).toFixed(2));
|
|
5
|
+
|
|
6
|
+
return {
|
|
7
|
+
subtotal: Number(subtotal.toFixed(2)),
|
|
8
|
+
tax,
|
|
9
|
+
total
|
|
10
|
+
};
|
|
11
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
|
|
4
|
+
import { summarizeInvoice } from "../src/invoice-summary.js";
|
|
5
|
+
|
|
6
|
+
test("summarizeInvoice returns subtotal, tax, and total", () => {
|
|
7
|
+
const result = summarizeInvoice(
|
|
8
|
+
[
|
|
9
|
+
{ quantity: 2, unitPrice: 19.99 },
|
|
10
|
+
{ quantity: 1, unitPrice: 5.5 }
|
|
11
|
+
],
|
|
12
|
+
0.13
|
|
13
|
+
);
|
|
14
|
+
|
|
15
|
+
assert.deepEqual(result, {
|
|
16
|
+
subtotal: 45.48,
|
|
17
|
+
tax: 5.91,
|
|
18
|
+
total: 51.39
|
|
19
|
+
});
|
|
20
|
+
});
|
|
@@ -15,15 +15,18 @@ import type { MartinAdapter } from "../core/index.js";
|
|
|
15
15
|
import { type SpawnLike } from "./cli-bridge.js";
|
|
16
16
|
/**
|
|
17
17
|
* Given a prompt string, returns the full argv array to pass to spawn().
|
|
18
|
-
* Example for Claude: (
|
|
19
|
-
* Example for Codex: (
|
|
18
|
+
* Example for Claude: () => ["--output-format", "json", "--print"]
|
|
19
|
+
* Example for Codex: () => ["exec", "--sandbox", "workspace-write", "-"]
|
|
20
20
|
*/
|
|
21
21
|
export type CliArgsBuilder = (prompt: string) => string[];
|
|
22
|
+
export type CliStdinBuilder = (prompt: string) => string | undefined;
|
|
22
23
|
export interface AgentCliAdapterOptions {
|
|
23
24
|
/** The executable to spawn (e.g. "claude", "codex"). */
|
|
24
25
|
command: string;
|
|
25
26
|
/** Converts a prompt string into the argv array passed to spawn(). */
|
|
26
27
|
argsBuilder: CliArgsBuilder;
|
|
28
|
+
/** Optional stdin payload for CLIs that accept prompt input via stdin or `-`. */
|
|
29
|
+
stdinBuilder?: CliStdinBuilder;
|
|
27
30
|
/** Adapter ID suffix. Defaults to command. */
|
|
28
31
|
adapterIdSuffix?: string;
|
|
29
32
|
/** Working directory for all subprocesses. Defaults to process.cwd(). */
|
|
@@ -63,8 +66,16 @@ export interface CodexCliAdapterOptions {
|
|
|
63
66
|
label?: string;
|
|
64
67
|
/** Override the model passed via --model flag. */
|
|
65
68
|
model?: string;
|
|
66
|
-
/**
|
|
69
|
+
/**
|
|
70
|
+
* Deprecated no-op retained for compatibility.
|
|
71
|
+
*
|
|
72
|
+
* Codex CLI's supported non-interactive entrypoint is `codex exec`.
|
|
73
|
+
* MartinLoop now uses explicit sandboxing instead of the legacy
|
|
74
|
+
* `--full-auto` compatibility path, which can exit before verifier execution.
|
|
75
|
+
*/
|
|
67
76
|
fullAuto?: boolean;
|
|
77
|
+
/** Codex sandbox mode for model-generated commands. Defaults to workspace-write. */
|
|
78
|
+
sandbox?: "read-only" | "workspace-write" | "danger-full-access";
|
|
68
79
|
/** Extra args appended after core args (before prompt). */
|
|
69
80
|
extraArgs?: string[];
|
|
70
81
|
spawnImpl?: SpawnLike;
|
|
@@ -81,7 +92,11 @@ export declare function createAgentCliAdapter(options: AgentCliAdapterOptions):
|
|
|
81
92
|
*/
|
|
82
93
|
export declare function createClaudeCliAdapter(options?: ClaudeCliAdapterOptions): MartinAdapter;
|
|
83
94
|
/**
|
|
84
|
-
* Spawns `codex
|
|
95
|
+
* Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
|
|
96
|
+
*
|
|
97
|
+
* The prompt is delivered via stdin so Windows shell quoting cannot truncate or
|
|
98
|
+
* reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
|
|
99
|
+
* context.
|
|
85
100
|
*
|
|
86
101
|
* Requires the Codex CLI to be installed and authenticated:
|
|
87
102
|
* npm install -g @openai/codex
|
|
@@ -129,15 +129,12 @@ export function createAgentCliAdapter(options) {
|
|
|
129
129
|
}
|
|
130
130
|
}
|
|
131
131
|
const args = options.argsBuilder(prompt);
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
const useStdin = args.at(-1) === "--stdin-prompt";
|
|
135
|
-
const spawnArgs = useStdin ? args.slice(0, -1) : args;
|
|
136
|
-
const agentResult = await runSubprocess(options.command, spawnArgs, {
|
|
132
|
+
const stdinData = options.stdinBuilder?.(prompt);
|
|
133
|
+
const agentResult = await runSubprocess(options.command, args, {
|
|
137
134
|
cwd: workingDirectory,
|
|
138
135
|
timeoutMs,
|
|
139
136
|
spawnImpl: options.spawnImpl,
|
|
140
|
-
...(
|
|
137
|
+
...(stdinData === undefined ? {} : { stdinData })
|
|
141
138
|
});
|
|
142
139
|
if (agentResult.timedOut) {
|
|
143
140
|
return {
|
|
@@ -157,18 +154,19 @@ export function createAgentCliAdapter(options) {
|
|
|
157
154
|
};
|
|
158
155
|
}
|
|
159
156
|
if (agentResult.exitCode !== 0 && agentResult.stdout.trim().length === 0) {
|
|
157
|
+
const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr, agentResult.exitCode);
|
|
160
158
|
return {
|
|
161
159
|
status: "failed",
|
|
162
|
-
summary: `${options.command} subprocess exited
|
|
160
|
+
summary: `${options.command} subprocess exited before verifier execution.`,
|
|
163
161
|
usage: normalizeUsage({
|
|
164
162
|
actualUsd: 0,
|
|
165
163
|
tokensIn: 0,
|
|
166
164
|
tokensOut: 0,
|
|
167
165
|
provenance: "unavailable"
|
|
168
166
|
}),
|
|
169
|
-
verification: { passed: false, summary:
|
|
167
|
+
verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
|
|
170
168
|
failure: {
|
|
171
|
-
message:
|
|
169
|
+
message: failureMessage
|
|
172
170
|
}
|
|
173
171
|
};
|
|
174
172
|
}
|
|
@@ -355,40 +353,52 @@ export function createClaudeCliAdapter(options = {}) {
|
|
|
355
353
|
"--print",
|
|
356
354
|
"--dangerously-skip-permissions",
|
|
357
355
|
...modelArgs,
|
|
358
|
-
...extraArgs
|
|
359
|
-
|
|
360
|
-
|
|
356
|
+
...extraArgs
|
|
357
|
+
],
|
|
358
|
+
stdinBuilder: (prompt) => prompt
|
|
361
359
|
});
|
|
362
360
|
}
|
|
363
361
|
// ---------------------------------------------------------------------------
|
|
364
362
|
// Pre-configured: OpenAI Codex CLI
|
|
365
363
|
// ---------------------------------------------------------------------------
|
|
366
364
|
/**
|
|
367
|
-
* Spawns `codex
|
|
365
|
+
* Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
|
|
366
|
+
*
|
|
367
|
+
* The prompt is delivered via stdin so Windows shell quoting cannot truncate or
|
|
368
|
+
* reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
|
|
369
|
+
* context.
|
|
368
370
|
*
|
|
369
371
|
* Requires the Codex CLI to be installed and authenticated:
|
|
370
372
|
* npm install -g @openai/codex
|
|
371
373
|
*/
|
|
372
374
|
export function createCodexCliAdapter(options = {}) {
|
|
373
|
-
const fullAuto = options.fullAuto !== false;
|
|
374
375
|
const modelArgs = options.model ? ["--model", options.model] : [];
|
|
375
376
|
const extraArgs = options.extraArgs ?? [];
|
|
377
|
+
const sandbox = options.sandbox ?? "workspace-write";
|
|
378
|
+
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
376
379
|
return createAgentCliAdapter({
|
|
377
380
|
command: "codex",
|
|
378
381
|
adapterIdSuffix: "codex",
|
|
379
382
|
model: options.model ?? "codex",
|
|
380
383
|
label: options.label ?? "Codex CLI adapter",
|
|
381
|
-
workingDirectory
|
|
384
|
+
workingDirectory,
|
|
382
385
|
timeoutMs: options.timeoutMs,
|
|
383
386
|
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
384
387
|
supportsJsonOutput: false,
|
|
385
388
|
spawnImpl: options.spawnImpl,
|
|
386
|
-
argsBuilder: (
|
|
387
|
-
|
|
389
|
+
argsBuilder: () => [
|
|
390
|
+
"exec",
|
|
391
|
+
"--cd",
|
|
392
|
+
workingDirectory,
|
|
393
|
+
"--sandbox",
|
|
394
|
+
sandbox,
|
|
395
|
+
"--color",
|
|
396
|
+
"never",
|
|
388
397
|
...modelArgs,
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
]
|
|
398
|
+
...extraArgs,
|
|
399
|
+
"-"
|
|
400
|
+
],
|
|
401
|
+
stdinBuilder: (prompt) => prompt
|
|
392
402
|
});
|
|
393
403
|
}
|
|
394
404
|
// ---------------------------------------------------------------------------
|
|
@@ -402,14 +412,23 @@ export function createCodexCliAdapter(options = {}) {
|
|
|
402
412
|
// ---------------------------------------------------------------------------
|
|
403
413
|
function buildPrompt(request) {
|
|
404
414
|
const lines = [];
|
|
415
|
+
const mutationMode = request.context.mutationMode ?? "edit";
|
|
405
416
|
lines.push("You are running in autonomous agentic mode.");
|
|
406
|
-
|
|
407
|
-
|
|
417
|
+
if (mutationMode === "verify_only") {
|
|
418
|
+
lines.push("DO NOT EDIT FILES. Run the verifier only and report whether it passes.");
|
|
419
|
+
lines.push("Do not ask for confirmation. Do not ask clarifying questions.");
|
|
420
|
+
}
|
|
421
|
+
else {
|
|
422
|
+
lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
|
|
423
|
+
lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
|
|
424
|
+
}
|
|
408
425
|
lines.push("");
|
|
409
426
|
lines.push("If PROGRESS.md exists in your working directory, read it first for context from prior attempts.");
|
|
410
427
|
lines.push("If it does not exist, proceed with the objective below.");
|
|
411
428
|
lines.push("");
|
|
412
|
-
lines.push(
|
|
429
|
+
lines.push(mutationMode === "verify_only"
|
|
430
|
+
? "Complete the following verification-only task without making file changes."
|
|
431
|
+
: "Complete the following coding task. Make all necessary file changes.");
|
|
413
432
|
lines.push("When you are done, the verification commands listed below must pass.");
|
|
414
433
|
lines.push("");
|
|
415
434
|
lines.push("OBJECTIVE:");
|
|
@@ -447,7 +466,9 @@ function buildPrompt(request) {
|
|
|
447
466
|
lines.push(` Attempt ${String(attemptNumber)}`);
|
|
448
467
|
lines.push(` Remaining budget: $${String(request.context.remainingBudgetUsd)} USD`);
|
|
449
468
|
lines.push(` Remaining iterations: ${String(request.context.remainingIterations)}`);
|
|
450
|
-
lines.push(
|
|
469
|
+
lines.push(mutationMode === "verify_only"
|
|
470
|
+
? " Do not modify files; only run verification."
|
|
471
|
+
: " Do not expand scope beyond what is needed to pass verification.");
|
|
451
472
|
lines.push("");
|
|
452
473
|
if (request.previousAttempts.length > 0) {
|
|
453
474
|
lines.push("PRIOR FAILED ATTEMPTS (learn from these — do not repeat the same mistakes):");
|
|
@@ -494,6 +515,16 @@ function truncate(text, maxLength) {
|
|
|
494
515
|
}
|
|
495
516
|
return `...${text.slice(-(maxLength - 3))}`;
|
|
496
517
|
}
|
|
518
|
+
function formatPreVerifierSubprocessFailure(command, stderr, exitCode) {
|
|
519
|
+
const detail = stderr.trim() || `Exit code ${String(exitCode)}`;
|
|
520
|
+
const lowerDetail = detail.toLowerCase();
|
|
521
|
+
const codexLaunchBlocked = command === "codex" &&
|
|
522
|
+
/\b(full-auto|sandbox|approval|permission|trusted|safety|unexpected argument)\b/u.test(lowerDetail);
|
|
523
|
+
if (codexLaunchBlocked) {
|
|
524
|
+
return `Codex CLI failed before patch completion, likely due to its launch/sandbox configuration. MartinLoop invokes Codex through "codex exec --sandbox workspace-write"; verify Codex CLI auth and configuration if this persists. ${detail}. environment_mismatch`;
|
|
525
|
+
}
|
|
526
|
+
return `${detail}. environment_mismatch`;
|
|
527
|
+
}
|
|
497
528
|
const INJECTION_PATTERNS = [
|
|
498
529
|
/\[INST\]/gi,
|
|
499
530
|
/<\/?system>/gi,
|
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
import { isAbsolute } from "node:path";
|
|
2
|
+
import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
3
4
|
import { diffStatsFromNumstat } from "./runtime-support.js";
|
|
4
5
|
export async function runSubprocess(command, args, options) {
|
|
5
6
|
return new Promise((resolve) => {
|
|
6
7
|
let timedOut = false;
|
|
8
|
+
let settled = false;
|
|
7
9
|
const stdoutChunks = [];
|
|
8
10
|
const stderrChunks = [];
|
|
9
11
|
const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
|
|
12
|
+
const resolveOnce = (result) => {
|
|
13
|
+
if (settled) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
settled = true;
|
|
17
|
+
resolve(result);
|
|
18
|
+
};
|
|
10
19
|
let proc;
|
|
11
20
|
try {
|
|
12
|
-
|
|
21
|
+
const spawnPlan = createSpawnPlan(command, args, options.cwd, options.spawnImpl !== undefined);
|
|
22
|
+
proc = (options.spawnImpl ?? spawn)(spawnPlan.command, spawnPlan.args, {
|
|
13
23
|
cwd: options.cwd,
|
|
14
24
|
stdio: [stdinMode, "pipe", "pipe"],
|
|
15
|
-
env: process.env
|
|
16
|
-
// shell: true is required on Windows to resolve PATH shims (e.g. claude.cmd).
|
|
17
|
-
// Avoid it for absolute .exe paths because cmd.exe can split paths with spaces.
|
|
18
|
-
// Prompt content is never passed as a shell argument, it goes via stdin, so
|
|
19
|
-
// injection risk from the DEP0190 warning does not apply here.
|
|
20
|
-
shell: shouldUseWindowsShell(command)
|
|
25
|
+
env: process.env
|
|
21
26
|
});
|
|
22
27
|
}
|
|
23
28
|
catch (error) {
|
|
24
29
|
const message = error instanceof Error ? error.message : String(error);
|
|
25
|
-
|
|
30
|
+
resolveOnce({
|
|
26
31
|
exitCode: 1,
|
|
27
32
|
stdout: "",
|
|
28
33
|
stderr: message,
|
|
@@ -30,38 +35,59 @@ export async function runSubprocess(command, args, options) {
|
|
|
30
35
|
});
|
|
31
36
|
return;
|
|
32
37
|
}
|
|
33
|
-
if (options.stdinData !== undefined && proc.stdin) {
|
|
34
|
-
proc.stdin.write(options.stdinData, "utf8");
|
|
35
|
-
proc.stdin.end();
|
|
36
|
-
}
|
|
37
38
|
proc.stdout?.on("data", (chunk) => {
|
|
38
39
|
stdoutChunks.push(chunk);
|
|
39
40
|
});
|
|
40
41
|
proc.stderr?.on("data", (chunk) => {
|
|
41
42
|
stderrChunks.push(chunk);
|
|
42
43
|
});
|
|
44
|
+
proc.stdin?.on("error", (error) => {
|
|
45
|
+
// Some CLIs exit before consuming stdin in tests and on fast-fail paths.
|
|
46
|
+
// Treat the closed pipe as a handled subprocess lifecycle condition.
|
|
47
|
+
if (error.code === "EPIPE") {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
stderrChunks.push(Buffer.from(`${error.message}\n`, "utf8"));
|
|
51
|
+
});
|
|
43
52
|
const timer = setTimeout(() => {
|
|
44
53
|
timedOut = true;
|
|
45
54
|
proc.kill("SIGTERM");
|
|
46
55
|
}, options.timeoutMs);
|
|
47
|
-
proc.on("close", (code) => {
|
|
48
|
-
clearTimeout(timer);
|
|
49
|
-
resolve({
|
|
50
|
-
exitCode: code ?? 1,
|
|
51
|
-
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
52
|
-
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
53
|
-
timedOut
|
|
54
|
-
});
|
|
55
|
-
});
|
|
56
56
|
proc.on("error", (error) => {
|
|
57
57
|
clearTimeout(timer);
|
|
58
|
-
|
|
58
|
+
resolveOnce({
|
|
59
59
|
exitCode: 1,
|
|
60
60
|
stdout: "",
|
|
61
61
|
stderr: error.message,
|
|
62
62
|
timedOut: false
|
|
63
63
|
});
|
|
64
64
|
});
|
|
65
|
+
proc.on("close", (code) => {
|
|
66
|
+
clearTimeout(timer);
|
|
67
|
+
resolveOnce({
|
|
68
|
+
exitCode: code ?? 1,
|
|
69
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
70
|
+
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
71
|
+
timedOut
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
if (options.stdinData !== undefined && proc.stdin) {
|
|
75
|
+
try {
|
|
76
|
+
proc.stdin.end(options.stdinData, "utf8");
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
const stdinError = error;
|
|
80
|
+
if (stdinError.code !== "EPIPE") {
|
|
81
|
+
clearTimeout(timer);
|
|
82
|
+
resolveOnce({
|
|
83
|
+
exitCode: 1,
|
|
84
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
85
|
+
stderr: stdinError.message,
|
|
86
|
+
timedOut: false
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
65
91
|
});
|
|
66
92
|
}
|
|
67
93
|
export async function runVerification(commands, cwd, timeoutMs, verificationStack, spawnImpl) {
|
|
@@ -76,9 +102,8 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
|
|
|
76
102
|
}
|
|
77
103
|
const failedSteps = [];
|
|
78
104
|
for (const step of steps) {
|
|
79
|
-
const parts = step.command
|
|
80
|
-
const bin = parts
|
|
81
|
-
const args = parts.slice(1);
|
|
105
|
+
const parts = splitCommand(step.command);
|
|
106
|
+
const [bin, ...args] = parts;
|
|
82
107
|
if (!bin) {
|
|
83
108
|
continue;
|
|
84
109
|
}
|
|
@@ -115,8 +140,109 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
|
|
|
115
140
|
...(diffStats ? { diffStats } : {})
|
|
116
141
|
};
|
|
117
142
|
}
|
|
118
|
-
function
|
|
119
|
-
|
|
143
|
+
function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
|
|
144
|
+
if (preserveRawForInjectedSpawn || process.platform !== "win32" || isAbsolute(command)) {
|
|
145
|
+
return { command, args };
|
|
146
|
+
}
|
|
147
|
+
const resolved = resolveWindowsCommand(command, cwd);
|
|
148
|
+
if (!resolved) {
|
|
149
|
+
return { command, args };
|
|
150
|
+
}
|
|
151
|
+
const extension = extname(resolved).toLowerCase();
|
|
152
|
+
if (extension === ".cmd" || extension === ".bat") {
|
|
153
|
+
return {
|
|
154
|
+
command: process.env.ComSpec || "cmd.exe",
|
|
155
|
+
args: ["/d", "/s", "/c", [quoteWindowsCmdArg(resolved), ...args.map(quoteWindowsCmdArg)].join(" ")]
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
return { command: resolved, args };
|
|
159
|
+
}
|
|
160
|
+
function resolveWindowsCommand(command, cwd) {
|
|
161
|
+
const hasPathSegment = command.includes("\\") || command.includes("/");
|
|
162
|
+
const baseCandidates = expandWindowsCommandCandidates(hasPathSegment ? resolve(cwd, command) : command);
|
|
163
|
+
if (hasPathSegment) {
|
|
164
|
+
return baseCandidates.find((candidate) => existsSync(candidate));
|
|
165
|
+
}
|
|
166
|
+
for (const directory of windowsPathDirectories()) {
|
|
167
|
+
for (const candidate of baseCandidates) {
|
|
168
|
+
const fullPath = join(directory, candidate);
|
|
169
|
+
if (existsSync(fullPath)) {
|
|
170
|
+
return fullPath;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return undefined;
|
|
175
|
+
}
|
|
176
|
+
function expandWindowsCommandCandidates(command) {
|
|
177
|
+
if (extname(command)) {
|
|
178
|
+
return [command];
|
|
179
|
+
}
|
|
180
|
+
const pathExt = process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD";
|
|
181
|
+
return pathExt
|
|
182
|
+
.split(";")
|
|
183
|
+
.map((extension) => extension.trim())
|
|
184
|
+
.filter(Boolean)
|
|
185
|
+
.map((extension) => `${command}${extension.toLowerCase()}`);
|
|
186
|
+
}
|
|
187
|
+
function windowsPathDirectories() {
|
|
188
|
+
const rawPath = process.env.Path ?? process.env.PATH ?? "";
|
|
189
|
+
return rawPath
|
|
190
|
+
.split(delimiter)
|
|
191
|
+
.map((entry) => entry.trim().replace(/^"|"$/g, ""))
|
|
192
|
+
.filter(Boolean);
|
|
193
|
+
}
|
|
194
|
+
function quoteWindowsCmdArg(value) {
|
|
195
|
+
const normalized = value.replace(/\r?\n/gu, " ");
|
|
196
|
+
const escaped = normalized
|
|
197
|
+
.replace(/\^/gu, "^^")
|
|
198
|
+
.replace(/"/gu, '^"')
|
|
199
|
+
.replace(/%/gu, "%%")
|
|
200
|
+
.replace(/!/gu, "^^!")
|
|
201
|
+
.replace(/[&|<>()]/gu, (match) => `^${match}`);
|
|
202
|
+
return `"${escaped}"`;
|
|
203
|
+
}
|
|
204
|
+
export function splitCommand(command) {
|
|
205
|
+
const tokens = [];
|
|
206
|
+
let current = "";
|
|
207
|
+
let quote;
|
|
208
|
+
const trimmed = command.trim();
|
|
209
|
+
for (let index = 0; index < trimmed.length; index += 1) {
|
|
210
|
+
const char = trimmed[index];
|
|
211
|
+
const next = trimmed[index + 1];
|
|
212
|
+
if (char === undefined) {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
if (char === "\\") {
|
|
216
|
+
const canEscape = quote !== "'" && (next === quote || next === "\\");
|
|
217
|
+
if (canEscape && next !== undefined) {
|
|
218
|
+
current += next;
|
|
219
|
+
index += 1;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (char === '"' || char === "'") {
|
|
224
|
+
if (!quote) {
|
|
225
|
+
quote = char;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (quote === char) {
|
|
229
|
+
quote = undefined;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (!quote && /\s/u.test(char)) {
|
|
234
|
+
if (current.length > 0) {
|
|
235
|
+
tokens.push(current);
|
|
236
|
+
current = "";
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
current += char;
|
|
241
|
+
}
|
|
242
|
+
if (current.length > 0) {
|
|
243
|
+
tokens.push(current);
|
|
244
|
+
}
|
|
245
|
+
return tokens;
|
|
120
246
|
}
|
|
121
247
|
function truncate(text, maxLength) {
|
|
122
248
|
if (text.length <= maxLength) {
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter, type DirectProviderAdapterOptions } from "
|
|
|
2
2
|
export { createStubDirectProviderAdapter, type StubDirectProviderAdapterOptions } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter, type StubAgentCliAdapterOptions } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter, type AgentCliAdapterOptions, type ClaudeCliAdapterOptions, type CodexCliAdapterOptions, type CliArgsBuilder } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter, type VerifierOnlyAdapterOptions } from "./verifier-only.js";
|
|
5
6
|
export type { SpawnLike, SubprocessResult, VerificationOutcome } from "./cli-bridge.js";
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter } from "./direct-provider.js";
|
|
|
2
2
|
export { createStubDirectProviderAdapter } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter } from "./verifier-only.js";
|
|
5
6
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MartinAdapter } from "../core/index.js";
|
|
2
|
+
export interface VerifierOnlyAdapterOptions {
|
|
3
|
+
workingDirectory?: string;
|
|
4
|
+
verifyTimeoutMs?: number;
|
|
5
|
+
label?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function createVerifierOnlyAdapter(options?: VerifierOnlyAdapterOptions): MartinAdapter;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { readGitExecutionArtifacts, runVerification } from "./cli-bridge.js";
|
|
2
|
+
import { createAdapterCapabilities, normalizeUsage } from "./runtime-support.js";
|
|
3
|
+
export function createVerifierOnlyAdapter(options = {}) {
|
|
4
|
+
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
5
|
+
const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
|
|
6
|
+
return {
|
|
7
|
+
adapterId: "direct:verifier:verify-only",
|
|
8
|
+
kind: "direct-provider",
|
|
9
|
+
label: options.label ?? "Verifier-only adapter",
|
|
10
|
+
metadata: {
|
|
11
|
+
providerId: "verifier",
|
|
12
|
+
model: "verify-only",
|
|
13
|
+
transport: "cli",
|
|
14
|
+
capabilities: createAdapterCapabilities({
|
|
15
|
+
usageSettlement: true,
|
|
16
|
+
diffArtifacts: true
|
|
17
|
+
})
|
|
18
|
+
},
|
|
19
|
+
async execute(request) {
|
|
20
|
+
const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, request.context.verificationStack);
|
|
21
|
+
const execution = await readGitExecutionArtifacts(workingDirectory, 5_000);
|
|
22
|
+
const changedFiles = execution.changedFiles ?? [];
|
|
23
|
+
if (verification.passed) {
|
|
24
|
+
return {
|
|
25
|
+
status: "completed",
|
|
26
|
+
summary: changedFiles.length > 0
|
|
27
|
+
? `Verifier-only run completed but modified files: ${changedFiles.join(", ")}`
|
|
28
|
+
: "Verifier-only run completed without file edits.",
|
|
29
|
+
usage: normalizeUsage({
|
|
30
|
+
actualUsd: 0,
|
|
31
|
+
tokensIn: 0,
|
|
32
|
+
tokensOut: 0,
|
|
33
|
+
provenance: "actual"
|
|
34
|
+
}),
|
|
35
|
+
verification,
|
|
36
|
+
execution
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
status: "failed",
|
|
41
|
+
summary: "Verifier-only run failed.",
|
|
42
|
+
usage: normalizeUsage({
|
|
43
|
+
actualUsd: 0,
|
|
44
|
+
tokensIn: 0,
|
|
45
|
+
tokensOut: 0,
|
|
46
|
+
provenance: "actual"
|
|
47
|
+
}),
|
|
48
|
+
verification,
|
|
49
|
+
execution,
|
|
50
|
+
failure: {
|
|
51
|
+
message: verification.summary
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=verifier-only.js.map
|