martin-loop 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -16
- package/demo/seeded-workspace/README.md +35 -0
- package/demo/seeded-workspace/TASKS.md +29 -0
- package/demo/seeded-workspace/martin.config.yaml +11 -0
- package/demo/seeded-workspace/package.json +8 -0
- package/demo/seeded-workspace/src/invoice-summary.js +11 -0
- package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/cli/index.d.ts +6 -1
- package/dist/vendor/cli/index.js +124 -7
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/index.d.ts +5 -2
- package/dist/vendor/core/index.js +186 -54
- package/dist/vendor/core/policy.d.ts +6 -0
- package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
- package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
- package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
- package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
- package/docs/oss/EXAMPLES.md +9 -1
- package/docs/oss/OSS-BOUNDARY-REPORT.json +3 -7
- package/docs/oss/OSS-BOUNDARY-REPORT.md +2 -2
- package/docs/oss/QUICKSTART.md +33 -3
- package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
- package/docs/oss/README.md +6 -3
- package/docs/oss/RELEASE-SURFACE-REPORT.json +1 -1
- package/docs/oss/RELEASE-SURFACE-REPORT.md +1 -1
- package/package.json +8 -2
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
import { isAbsolute } from "node:path";
|
|
2
|
+
import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
3
4
|
import { diffStatsFromNumstat } from "./runtime-support.js";
|
|
4
5
|
export async function runSubprocess(command, args, options) {
|
|
5
6
|
return new Promise((resolve) => {
|
|
6
7
|
let timedOut = false;
|
|
8
|
+
let settled = false;
|
|
7
9
|
const stdoutChunks = [];
|
|
8
10
|
const stderrChunks = [];
|
|
9
11
|
const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
|
|
12
|
+
const resolveOnce = (result) => {
|
|
13
|
+
if (settled) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
settled = true;
|
|
17
|
+
resolve(result);
|
|
18
|
+
};
|
|
10
19
|
let proc;
|
|
11
20
|
try {
|
|
12
|
-
|
|
21
|
+
const spawnPlan = createSpawnPlan(command, args, options.cwd, options.spawnImpl !== undefined);
|
|
22
|
+
proc = (options.spawnImpl ?? spawn)(spawnPlan.command, spawnPlan.args, {
|
|
13
23
|
cwd: options.cwd,
|
|
14
24
|
stdio: [stdinMode, "pipe", "pipe"],
|
|
15
|
-
env: process.env
|
|
16
|
-
// shell: true is required on Windows to resolve PATH shims (e.g. claude.cmd).
|
|
17
|
-
// Avoid it for absolute .exe paths because cmd.exe can split paths with spaces.
|
|
18
|
-
// Prompt content is never passed as a shell argument, it goes via stdin, so
|
|
19
|
-
// injection risk from the DEP0190 warning does not apply here.
|
|
20
|
-
shell: shouldUseWindowsShell(command)
|
|
25
|
+
env: process.env
|
|
21
26
|
});
|
|
22
27
|
}
|
|
23
28
|
catch (error) {
|
|
24
29
|
const message = error instanceof Error ? error.message : String(error);
|
|
25
|
-
|
|
30
|
+
resolveOnce({
|
|
26
31
|
exitCode: 1,
|
|
27
32
|
stdout: "",
|
|
28
33
|
stderr: message,
|
|
@@ -30,38 +35,59 @@ export async function runSubprocess(command, args, options) {
|
|
|
30
35
|
});
|
|
31
36
|
return;
|
|
32
37
|
}
|
|
33
|
-
if (options.stdinData !== undefined && proc.stdin) {
|
|
34
|
-
proc.stdin.write(options.stdinData, "utf8");
|
|
35
|
-
proc.stdin.end();
|
|
36
|
-
}
|
|
37
38
|
proc.stdout?.on("data", (chunk) => {
|
|
38
39
|
stdoutChunks.push(chunk);
|
|
39
40
|
});
|
|
40
41
|
proc.stderr?.on("data", (chunk) => {
|
|
41
42
|
stderrChunks.push(chunk);
|
|
42
43
|
});
|
|
44
|
+
proc.stdin?.on("error", (error) => {
|
|
45
|
+
// Some CLIs exit before consuming stdin in tests and on fast-fail paths.
|
|
46
|
+
// Treat the closed pipe as a handled subprocess lifecycle condition.
|
|
47
|
+
if (error.code === "EPIPE") {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
stderrChunks.push(Buffer.from(`${error.message}\n`, "utf8"));
|
|
51
|
+
});
|
|
43
52
|
const timer = setTimeout(() => {
|
|
44
53
|
timedOut = true;
|
|
45
54
|
proc.kill("SIGTERM");
|
|
46
55
|
}, options.timeoutMs);
|
|
47
|
-
proc.on("close", (code) => {
|
|
48
|
-
clearTimeout(timer);
|
|
49
|
-
resolve({
|
|
50
|
-
exitCode: code ?? 1,
|
|
51
|
-
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
52
|
-
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
53
|
-
timedOut
|
|
54
|
-
});
|
|
55
|
-
});
|
|
56
56
|
proc.on("error", (error) => {
|
|
57
57
|
clearTimeout(timer);
|
|
58
|
-
|
|
58
|
+
resolveOnce({
|
|
59
59
|
exitCode: 1,
|
|
60
60
|
stdout: "",
|
|
61
61
|
stderr: error.message,
|
|
62
62
|
timedOut: false
|
|
63
63
|
});
|
|
64
64
|
});
|
|
65
|
+
proc.on("close", (code) => {
|
|
66
|
+
clearTimeout(timer);
|
|
67
|
+
resolveOnce({
|
|
68
|
+
exitCode: code ?? 1,
|
|
69
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
70
|
+
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
71
|
+
timedOut
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
if (options.stdinData !== undefined && proc.stdin) {
|
|
75
|
+
try {
|
|
76
|
+
proc.stdin.end(options.stdinData, "utf8");
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
const stdinError = error;
|
|
80
|
+
if (stdinError.code !== "EPIPE") {
|
|
81
|
+
clearTimeout(timer);
|
|
82
|
+
resolveOnce({
|
|
83
|
+
exitCode: 1,
|
|
84
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
85
|
+
stderr: stdinError.message,
|
|
86
|
+
timedOut: false
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
65
91
|
});
|
|
66
92
|
}
|
|
67
93
|
export async function runVerification(commands, cwd, timeoutMs, verificationStack, spawnImpl) {
|
|
@@ -76,9 +102,8 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
|
|
|
76
102
|
}
|
|
77
103
|
const failedSteps = [];
|
|
78
104
|
for (const step of steps) {
|
|
79
|
-
const parts = step.command
|
|
80
|
-
const bin = parts
|
|
81
|
-
const args = parts.slice(1);
|
|
105
|
+
const parts = splitCommand(step.command);
|
|
106
|
+
const [bin, ...args] = parts;
|
|
82
107
|
if (!bin) {
|
|
83
108
|
continue;
|
|
84
109
|
}
|
|
@@ -115,8 +140,109 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
|
|
|
115
140
|
...(diffStats ? { diffStats } : {})
|
|
116
141
|
};
|
|
117
142
|
}
|
|
118
|
-
function
|
|
119
|
-
|
|
143
|
+
function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
|
|
144
|
+
if (preserveRawForInjectedSpawn || process.platform !== "win32" || isAbsolute(command)) {
|
|
145
|
+
return { command, args };
|
|
146
|
+
}
|
|
147
|
+
const resolved = resolveWindowsCommand(command, cwd);
|
|
148
|
+
if (!resolved) {
|
|
149
|
+
return { command, args };
|
|
150
|
+
}
|
|
151
|
+
const extension = extname(resolved).toLowerCase();
|
|
152
|
+
if (extension === ".cmd" || extension === ".bat") {
|
|
153
|
+
return {
|
|
154
|
+
command: process.env.ComSpec || "cmd.exe",
|
|
155
|
+
args: ["/d", "/s", "/c", [quoteWindowsCmdArg(resolved), ...args.map(quoteWindowsCmdArg)].join(" ")]
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
return { command: resolved, args };
|
|
159
|
+
}
|
|
160
|
+
function resolveWindowsCommand(command, cwd) {
|
|
161
|
+
const hasPathSegment = command.includes("\\") || command.includes("/");
|
|
162
|
+
const baseCandidates = expandWindowsCommandCandidates(hasPathSegment ? resolve(cwd, command) : command);
|
|
163
|
+
if (hasPathSegment) {
|
|
164
|
+
return baseCandidates.find((candidate) => existsSync(candidate));
|
|
165
|
+
}
|
|
166
|
+
for (const directory of windowsPathDirectories()) {
|
|
167
|
+
for (const candidate of baseCandidates) {
|
|
168
|
+
const fullPath = join(directory, candidate);
|
|
169
|
+
if (existsSync(fullPath)) {
|
|
170
|
+
return fullPath;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return undefined;
|
|
175
|
+
}
|
|
176
|
+
function expandWindowsCommandCandidates(command) {
|
|
177
|
+
if (extname(command)) {
|
|
178
|
+
return [command];
|
|
179
|
+
}
|
|
180
|
+
const pathExt = process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD";
|
|
181
|
+
return pathExt
|
|
182
|
+
.split(";")
|
|
183
|
+
.map((extension) => extension.trim())
|
|
184
|
+
.filter(Boolean)
|
|
185
|
+
.map((extension) => `${command}${extension.toLowerCase()}`);
|
|
186
|
+
}
|
|
187
|
+
function windowsPathDirectories() {
|
|
188
|
+
const rawPath = process.env.Path ?? process.env.PATH ?? "";
|
|
189
|
+
return rawPath
|
|
190
|
+
.split(delimiter)
|
|
191
|
+
.map((entry) => entry.trim().replace(/^"|"$/g, ""))
|
|
192
|
+
.filter(Boolean);
|
|
193
|
+
}
|
|
194
|
+
function quoteWindowsCmdArg(value) {
|
|
195
|
+
const normalized = value.replace(/\r?\n/gu, " ");
|
|
196
|
+
const escaped = normalized
|
|
197
|
+
.replace(/\^/gu, "^^")
|
|
198
|
+
.replace(/"/gu, '^"')
|
|
199
|
+
.replace(/%/gu, "%%")
|
|
200
|
+
.replace(/!/gu, "^^!")
|
|
201
|
+
.replace(/[&|<>()]/gu, (match) => `^${match}`);
|
|
202
|
+
return `"${escaped}"`;
|
|
203
|
+
}
|
|
204
|
+
export function splitCommand(command) {
|
|
205
|
+
const tokens = [];
|
|
206
|
+
let current = "";
|
|
207
|
+
let quote;
|
|
208
|
+
const trimmed = command.trim();
|
|
209
|
+
for (let index = 0; index < trimmed.length; index += 1) {
|
|
210
|
+
const char = trimmed[index];
|
|
211
|
+
const next = trimmed[index + 1];
|
|
212
|
+
if (char === undefined) {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
if (char === "\\") {
|
|
216
|
+
const canEscape = quote !== "'" && (next === quote || next === "\\");
|
|
217
|
+
if (canEscape && next !== undefined) {
|
|
218
|
+
current += next;
|
|
219
|
+
index += 1;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (char === '"' || char === "'") {
|
|
224
|
+
if (!quote) {
|
|
225
|
+
quote = char;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (quote === char) {
|
|
229
|
+
quote = undefined;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (!quote && /\s/u.test(char)) {
|
|
234
|
+
if (current.length > 0) {
|
|
235
|
+
tokens.push(current);
|
|
236
|
+
current = "";
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
current += char;
|
|
241
|
+
}
|
|
242
|
+
if (current.length > 0) {
|
|
243
|
+
tokens.push(current);
|
|
244
|
+
}
|
|
245
|
+
return tokens;
|
|
120
246
|
}
|
|
121
247
|
function truncate(text, maxLength) {
|
|
122
248
|
if (text.length <= maxLength) {
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter, type DirectProviderAdapterOptions } from "
|
|
|
2
2
|
export { createStubDirectProviderAdapter, type StubDirectProviderAdapterOptions } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter, type StubAgentCliAdapterOptions } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter, type AgentCliAdapterOptions, type ClaudeCliAdapterOptions, type CodexCliAdapterOptions, type CliArgsBuilder } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter, type VerifierOnlyAdapterOptions } from "./verifier-only.js";
|
|
5
6
|
export type { SpawnLike, SubprocessResult, VerificationOutcome } from "./cli-bridge.js";
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter } from "./direct-provider.js";
|
|
|
2
2
|
export { createStubDirectProviderAdapter } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter } from "./verifier-only.js";
|
|
5
6
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MartinAdapter } from "../core/index.js";
|
|
2
|
+
export interface VerifierOnlyAdapterOptions {
|
|
3
|
+
workingDirectory?: string;
|
|
4
|
+
verifyTimeoutMs?: number;
|
|
5
|
+
label?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function createVerifierOnlyAdapter(options?: VerifierOnlyAdapterOptions): MartinAdapter;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { readGitExecutionArtifacts, runVerification } from "./cli-bridge.js";
|
|
2
|
+
import { createAdapterCapabilities, normalizeUsage } from "./runtime-support.js";
|
|
3
|
+
export function createVerifierOnlyAdapter(options = {}) {
|
|
4
|
+
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
5
|
+
const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
|
|
6
|
+
return {
|
|
7
|
+
adapterId: "direct:verifier:verify-only",
|
|
8
|
+
kind: "direct-provider",
|
|
9
|
+
label: options.label ?? "Verifier-only adapter",
|
|
10
|
+
metadata: {
|
|
11
|
+
providerId: "verifier",
|
|
12
|
+
model: "verify-only",
|
|
13
|
+
transport: "cli",
|
|
14
|
+
capabilities: createAdapterCapabilities({
|
|
15
|
+
usageSettlement: true,
|
|
16
|
+
diffArtifacts: true
|
|
17
|
+
})
|
|
18
|
+
},
|
|
19
|
+
async execute(request) {
|
|
20
|
+
const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, request.context.verificationStack);
|
|
21
|
+
const execution = await readGitExecutionArtifacts(workingDirectory, 5_000);
|
|
22
|
+
const changedFiles = execution.changedFiles ?? [];
|
|
23
|
+
if (verification.passed) {
|
|
24
|
+
return {
|
|
25
|
+
status: "completed",
|
|
26
|
+
summary: changedFiles.length > 0
|
|
27
|
+
? `Verifier-only run completed but modified files: ${changedFiles.join(", ")}`
|
|
28
|
+
: "Verifier-only run completed without file edits.",
|
|
29
|
+
usage: normalizeUsage({
|
|
30
|
+
actualUsd: 0,
|
|
31
|
+
tokensIn: 0,
|
|
32
|
+
tokensOut: 0,
|
|
33
|
+
provenance: "actual"
|
|
34
|
+
}),
|
|
35
|
+
verification,
|
|
36
|
+
execution
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
status: "failed",
|
|
41
|
+
summary: "Verifier-only run failed.",
|
|
42
|
+
usage: normalizeUsage({
|
|
43
|
+
actualUsd: 0,
|
|
44
|
+
tokensIn: 0,
|
|
45
|
+
tokensOut: 0,
|
|
46
|
+
provenance: "actual"
|
|
47
|
+
}),
|
|
48
|
+
verification,
|
|
49
|
+
execution,
|
|
50
|
+
failure: {
|
|
51
|
+
message: verification.summary
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=verifier-only.js.map
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type LoopBudget } from "../contracts/index.js";
|
|
1
|
+
import { type LoopBudget, type MutationMode } from "../contracts/index.js";
|
|
2
2
|
export type RunCommandRequest = {
|
|
3
3
|
workspaceId: string;
|
|
4
4
|
projectId: string;
|
|
@@ -11,6 +11,7 @@ export type RunCommandRequest = {
|
|
|
11
11
|
cwd?: string;
|
|
12
12
|
model?: string;
|
|
13
13
|
engine?: string;
|
|
14
|
+
mutationMode?: MutationMode;
|
|
14
15
|
allowedPaths?: string[];
|
|
15
16
|
deniedPaths?: string[];
|
|
16
17
|
acceptanceCriteria?: string[];
|
|
@@ -23,6 +24,10 @@ export type ParsedCliArguments = {
|
|
|
23
24
|
} | {
|
|
24
25
|
command: "bench";
|
|
25
26
|
suiteId: string;
|
|
27
|
+
} | {
|
|
28
|
+
command: "demo";
|
|
29
|
+
directory: string;
|
|
30
|
+
force: boolean;
|
|
26
31
|
} | {
|
|
27
32
|
command: "inspect";
|
|
28
33
|
file: string;
|
package/dist/vendor/cli/index.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import { appendFile, mkdir, readFile } from "node:fs/promises";
|
|
1
|
+
import { appendFile, cp, mkdir, readFile, readdir, rm } from "node:fs/promises";
|
|
2
2
|
import { homedir } from "node:os";
|
|
3
|
-
import { isAbsolute, join, resolve } from "node:path";
|
|
4
|
-
import {
|
|
3
|
+
import { dirname, isAbsolute, join, resolve } from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { createClaudeCliAdapter, createCodexCliAdapter, createStubDirectProviderAdapter, createVerifierOnlyAdapter } from "../adapters/index.js";
|
|
5
6
|
import { runMartin } from "../core/index.js";
|
|
6
7
|
import { buildPortfolioSnapshot, createLoopRecord } from "../contracts/index.js";
|
|
7
8
|
export async function executeCli(args) {
|
|
@@ -30,7 +31,7 @@ export async function executeCli(args) {
|
|
|
30
31
|
}
|
|
31
32
|
};
|
|
32
33
|
const workingDirectory = parsed.request.cwd ?? readOption(args, "--cwd") ?? process.cwd();
|
|
33
|
-
const adapter = selectAdapter(args, workingDirectory, parsed.request.model, parsed.request.engine);
|
|
34
|
+
const adapter = selectAdapter(args, workingDirectory, parsed.request.model, parsed.request.engine, parsed.request.mutationMode);
|
|
34
35
|
let result;
|
|
35
36
|
try {
|
|
36
37
|
result = await runMartin({
|
|
@@ -40,6 +41,7 @@ export async function executeCli(args) {
|
|
|
40
41
|
title: resolvedRequest.title,
|
|
41
42
|
objective: resolvedRequest.objective,
|
|
42
43
|
verificationPlan: resolvedRequest.verificationPlan,
|
|
44
|
+
...(resolvedRequest.mutationMode ? { mutationMode: resolvedRequest.mutationMode } : {}),
|
|
43
45
|
repoRoot: workingDirectory,
|
|
44
46
|
...(resolvedRequest.allowedPaths?.length ? { allowedPaths: resolvedRequest.allowedPaths } : {}),
|
|
45
47
|
...(resolvedRequest.deniedPaths?.length ? { deniedPaths: resolvedRequest.deniedPaths } : {}),
|
|
@@ -58,6 +60,7 @@ export async function executeCli(args) {
|
|
|
58
60
|
title: resolvedRequest.title,
|
|
59
61
|
objective: resolvedRequest.objective,
|
|
60
62
|
verificationPlan: resolvedRequest.verificationPlan,
|
|
63
|
+
...(resolvedRequest.mutationMode ? { mutationMode: resolvedRequest.mutationMode } : {}),
|
|
61
64
|
repoRoot: workingDirectory
|
|
62
65
|
},
|
|
63
66
|
budget: resolvedRequest.budget,
|
|
@@ -115,6 +118,27 @@ export async function executeCli(args) {
|
|
|
115
118
|
stderr: "The benchmark harness remains a workspace-only RC surface and is not part of the publishable @martin/cli boundary yet. Use pnpm --filter @martin/benchmarks test or pnpm --filter @martin/benchmarks eval:phase12 from the repo root instead."
|
|
116
119
|
};
|
|
117
120
|
}
|
|
121
|
+
case "demo": {
|
|
122
|
+
try {
|
|
123
|
+
const targetDirectory = await createDemoWorkspace({
|
|
124
|
+
targetDirectory: parsed.directory,
|
|
125
|
+
force: parsed.force
|
|
126
|
+
});
|
|
127
|
+
return {
|
|
128
|
+
exitCode: 0,
|
|
129
|
+
stdout: renderDemoInstructions(targetDirectory),
|
|
130
|
+
stderr: ""
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
135
|
+
return {
|
|
136
|
+
exitCode: 1,
|
|
137
|
+
stdout: "",
|
|
138
|
+
stderr: `Error: ${message}`
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
118
142
|
case "inspect": {
|
|
119
143
|
try {
|
|
120
144
|
const contents = await readFile(parsed.file, "utf8");
|
|
@@ -295,6 +319,9 @@ export function parseCliArguments(args) {
|
|
|
295
319
|
request.cwd = next;
|
|
296
320
|
index += 1;
|
|
297
321
|
break;
|
|
322
|
+
case "--verify-only":
|
|
323
|
+
request.mutationMode = "verify_only";
|
|
324
|
+
break;
|
|
298
325
|
case "--allow-path":
|
|
299
326
|
if (next) {
|
|
300
327
|
request.allowedPaths = [...(request.allowedPaths ?? []), next];
|
|
@@ -339,6 +366,7 @@ export function parseCliArguments(args) {
|
|
|
339
366
|
...(request.cwd ? { cwd: request.cwd } : {}),
|
|
340
367
|
...(request.model ? { model: request.model } : {}),
|
|
341
368
|
...(request.engine ? { engine: request.engine } : {}),
|
|
369
|
+
...(request.mutationMode ? { mutationMode: request.mutationMode } : {}),
|
|
342
370
|
...(request.allowedPaths?.length ? { allowedPaths: request.allowedPaths } : {}),
|
|
343
371
|
...(request.deniedPaths?.length ? { deniedPaths: request.deniedPaths } : {}),
|
|
344
372
|
...(request.acceptanceCriteria?.length ? { acceptanceCriteria: request.acceptanceCriteria } : {})
|
|
@@ -351,6 +379,13 @@ export function parseCliArguments(args) {
|
|
|
351
379
|
suiteId: readOption(rest, "--suite") ?? "ralphy-smoke"
|
|
352
380
|
};
|
|
353
381
|
}
|
|
382
|
+
if (command === "demo") {
|
|
383
|
+
return {
|
|
384
|
+
command: "demo",
|
|
385
|
+
directory: resolve(readOption(rest, "--dir") ?? join(process.cwd(), "martin-loop-demo")),
|
|
386
|
+
force: hasFlag(rest, "--force")
|
|
387
|
+
};
|
|
388
|
+
}
|
|
354
389
|
if (command === "inspect") {
|
|
355
390
|
return {
|
|
356
391
|
command: "inspect",
|
|
@@ -373,12 +408,14 @@ export function renderCliHelp() {
|
|
|
373
408
|
" martin-loop run <objective> [options]",
|
|
374
409
|
" martin run <objective> [options] (alias)",
|
|
375
410
|
" martin-loop run --objective <text> [options]",
|
|
411
|
+
" martin-loop demo [--dir <path>] [--force]",
|
|
376
412
|
" martin-loop inspect --file <path>",
|
|
377
413
|
" martin-loop resume <loopId>",
|
|
378
414
|
" martin-loop bench --suite <suiteId>",
|
|
379
415
|
"",
|
|
380
416
|
"Commands:",
|
|
381
417
|
" run Execute a bounded Martin loop against the current repository.",
|
|
418
|
+
" demo Copy a safe local sandbox so you can try MartinLoop outside your own repo.",
|
|
382
419
|
" inspect Read a persisted loop record and summarize its portfolio metrics.",
|
|
383
420
|
" resume Load a persisted loop record by loop ID from ~/.martin/runs/.",
|
|
384
421
|
" bench Redirect to the workspace-only RC benchmark harness.",
|
|
@@ -390,12 +427,19 @@ export function renderCliHelp() {
|
|
|
390
427
|
" --cwd <path> Set the repo root used for repo-backed runs.",
|
|
391
428
|
" --budget <n> Set the hard cost cap in USD (subprocess killed at limit).",
|
|
392
429
|
" --budget-usd <n> Alias for --budget.",
|
|
393
|
-
" --
|
|
430
|
+
" --soft-limit-usd <n> Soft budget warning threshold in USD.",
|
|
394
431
|
" --max-iterations <n> Set the maximum number of attempts.",
|
|
432
|
+
" --max-tokens <n> Set the maximum total token budget.",
|
|
433
|
+
" --verify <cmd> Shell command to run as the verifier after each attempt.",
|
|
434
|
+
" --verify-only Skip the coding adapter and run the verifier only.",
|
|
395
435
|
" --allow-path <glob> Restrict agent writes to this path pattern (repeatable).",
|
|
396
436
|
" --deny-path <glob> Block agent from this path pattern (repeatable).",
|
|
397
437
|
" --accept <criterion> Add an acceptance criterion to the prompt (repeatable).",
|
|
398
|
-
" --config <path> Path to martin.config.yaml."
|
|
438
|
+
" --config <path> Path to martin.config.yaml.",
|
|
439
|
+
"",
|
|
440
|
+
"Demo options:",
|
|
441
|
+
" --dir <path> Target directory for the copied demo sandbox.",
|
|
442
|
+
" --force Replace an existing non-empty demo target."
|
|
399
443
|
].join("\n");
|
|
400
444
|
}
|
|
401
445
|
function readOption(tokens, flag) {
|
|
@@ -418,6 +462,76 @@ function parseLoopRecords(contents) {
|
|
|
418
462
|
return lines.map((line) => JSON.parse(line));
|
|
419
463
|
}
|
|
420
464
|
}
|
|
465
|
+
async function createDemoWorkspace(input) {
|
|
466
|
+
const rootDir = await findMartinPackageRoot();
|
|
467
|
+
const sourceDirectory = join(rootDir, "demo", "seeded-workspace");
|
|
468
|
+
try {
|
|
469
|
+
await readdir(sourceDirectory);
|
|
470
|
+
}
|
|
471
|
+
catch (error) {
|
|
472
|
+
if (isNodeErrorWithCode(error, "ENOENT")) {
|
|
473
|
+
throw new Error(`Demo assets are missing from this install: ${sourceDirectory}`);
|
|
474
|
+
}
|
|
475
|
+
throw error;
|
|
476
|
+
}
|
|
477
|
+
const targetDirectory = resolve(input.targetDirectory);
|
|
478
|
+
const existingEntries = await readdir(targetDirectory).catch((error) => {
|
|
479
|
+
if (isNodeErrorWithCode(error, "ENOENT")) {
|
|
480
|
+
return undefined;
|
|
481
|
+
}
|
|
482
|
+
throw error;
|
|
483
|
+
});
|
|
484
|
+
if (existingEntries) {
|
|
485
|
+
if (existingEntries.length > 0 && !input.force) {
|
|
486
|
+
throw new Error(`Demo target already exists and is not empty: ${targetDirectory}. Re-run with --force to replace it.`);
|
|
487
|
+
}
|
|
488
|
+
await rm(targetDirectory, { force: true, recursive: true });
|
|
489
|
+
}
|
|
490
|
+
await mkdir(dirname(targetDirectory), { recursive: true });
|
|
491
|
+
await cp(sourceDirectory, targetDirectory, { recursive: true });
|
|
492
|
+
return targetDirectory;
|
|
493
|
+
}
|
|
494
|
+
async function findMartinPackageRoot() {
|
|
495
|
+
let currentDirectory = dirname(fileURLToPath(import.meta.url));
|
|
496
|
+
for (let depth = 0; depth < 8; depth += 1) {
|
|
497
|
+
const manifestPath = join(currentDirectory, "package.json");
|
|
498
|
+
try {
|
|
499
|
+
const manifest = JSON.parse(await readFile(manifestPath, "utf8"));
|
|
500
|
+
if (manifest.name === "martin-loop") {
|
|
501
|
+
return currentDirectory;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
catch (error) {
|
|
505
|
+
if (!isNodeErrorWithCode(error, "ENOENT")) {
|
|
506
|
+
throw error;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
const parentDirectory = dirname(currentDirectory);
|
|
510
|
+
if (parentDirectory === currentDirectory) {
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
currentDirectory = parentDirectory;
|
|
514
|
+
}
|
|
515
|
+
throw new Error("Unable to resolve the martin-loop package root for demo assets.");
|
|
516
|
+
}
|
|
517
|
+
function renderDemoInstructions(targetDirectory) {
|
|
518
|
+
return [
|
|
519
|
+
`MartinLoop demo sandbox created at ${targetDirectory}`,
|
|
520
|
+
"",
|
|
521
|
+
"Next steps:",
|
|
522
|
+
` cd ${targetDirectory}`,
|
|
523
|
+
" npm install",
|
|
524
|
+
" npm test",
|
|
525
|
+
"",
|
|
526
|
+
"Safe first run (no provider spend):",
|
|
527
|
+
' MARTIN_LIVE=false npx martin-loop run "Summarize the demo workspace and confirm the verifier is green" --verify "npm test"',
|
|
528
|
+
"",
|
|
529
|
+
"Optional live run:",
|
|
530
|
+
' npx martin-loop run "Add support for a discount percentage to summarizeInvoice and update the tests" --verify "npm test" --engine codex',
|
|
531
|
+
"",
|
|
532
|
+
`Task ideas live in ${join(targetDirectory, "TASKS.md")}`
|
|
533
|
+
].join("\n");
|
|
534
|
+
}
|
|
421
535
|
async function resolveGuardrails(request, rawArgs) {
|
|
422
536
|
const tokens = rawArgs.slice(1);
|
|
423
537
|
const { config, configPath } = await loadGuardrailsConfig(request.configPath);
|
|
@@ -615,7 +729,10 @@ function isNodeErrorWithCode(error, code) {
|
|
|
615
729
|
* --engine codex — real Codex CLI subprocess
|
|
616
730
|
* MARTIN_LIVE=false — stub adapter (for tests / dry-runs)
|
|
617
731
|
*/
|
|
618
|
-
function selectAdapter(rawArgs, workingDirectory, modelOverride, engineOverride) {
|
|
732
|
+
function selectAdapter(rawArgs, workingDirectory, modelOverride, engineOverride, mutationMode) {
|
|
733
|
+
if (mutationMode === "verify_only") {
|
|
734
|
+
return createVerifierOnlyAdapter({ workingDirectory });
|
|
735
|
+
}
|
|
619
736
|
if (process.env.MARTIN_LIVE === "false") {
|
|
620
737
|
return createStubDirectProviderAdapter({
|
|
621
738
|
label: "Stub adapter (MARTIN_LIVE=false)",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type LoopStatus = "queued" | "running" | "verifying" | "completed" | "failed" | "exited";
|
|
2
2
|
export type LoopLifecycleState = "created" | "running" | "verifying" | "completed" | "budget_exit" | "diminishing_returns" | "stuck_exit" | "human_escalation";
|
|
3
|
-
export type FailureClass = "logic_error" | "hallucination" | "syntax_error" | "type_error" | "test_regression" | "scope_creep" | "no_progress" | "repo_grounding_failure" | "verification_failure" | "environment_mismatch" | "budget_pressure";
|
|
3
|
+
export type FailureClass = "logic_error" | "hallucination" | "syntax_error" | "type_error" | "test_regression" | "scope_creep" | "no_progress" | "repo_grounding_failure" | "verification_failure" | "environment_mismatch" | "budget_pressure" | "safety_leash_blocked";
|
|
4
4
|
export type InterventionType = "compress_context" | "change_model" | "tighten_task" | "switch_adapter" | "run_verifier" | "escalate_human" | "stop_loop";
|
|
5
5
|
export type LoopEventType = "run.started" | "attempt.started" | "attempt.completed" | "failure.classified" | "intervention.selected" | "verification.completed" | "budget.updated" | "run.completed";
|
|
6
6
|
export interface LoopTask {
|
|
@@ -9,6 +9,7 @@ export interface LoopTask {
|
|
|
9
9
|
repoRoot?: string;
|
|
10
10
|
verificationPlan: string[];
|
|
11
11
|
verificationStack?: VerificationStep[];
|
|
12
|
+
mutationMode?: MutationMode;
|
|
12
13
|
executionProfile?: ExecutionProfile;
|
|
13
14
|
allowedNetworkDomains?: string[];
|
|
14
15
|
approvalPolicy?: ApprovalPolicy;
|
|
@@ -20,6 +21,7 @@ export interface LoopTask {
|
|
|
20
21
|
acceptanceCriteria?: string[];
|
|
21
22
|
}
|
|
22
23
|
export type ExecutionProfile = "strict_local" | "ci_safe" | "staging_controlled" | "research_untrusted";
|
|
24
|
+
export type MutationMode = "edit" | "verify_only";
|
|
23
25
|
export interface ApprovalPolicy {
|
|
24
26
|
dependencyAdds?: boolean;
|
|
25
27
|
migrations?: boolean;
|
|
@@ -12,6 +12,7 @@ export interface CompilerAdapterRequest {
|
|
|
12
12
|
objective: string;
|
|
13
13
|
verificationPlan: string[];
|
|
14
14
|
verificationStack?: LoopTask["verificationStack"];
|
|
15
|
+
mutationMode?: LoopTask["mutationMode"];
|
|
15
16
|
repoRoot?: string;
|
|
16
17
|
allowedPaths?: string[];
|
|
17
18
|
deniedPaths?: string[];
|
|
@@ -29,6 +30,7 @@ export interface PromptPacket {
|
|
|
29
30
|
contract: {
|
|
30
31
|
objective: string;
|
|
31
32
|
verificationPlan: string[];
|
|
33
|
+
mutationMode?: LoopTask["mutationMode"];
|
|
32
34
|
allowedPaths?: string[];
|
|
33
35
|
deniedPaths?: string[];
|
|
34
36
|
acceptanceCriteria?: string[];
|
|
@@ -8,10 +8,15 @@ export function compilePromptPacket(request) {
|
|
|
8
8
|
const priorFailurePatterns = request.previousAttempts
|
|
9
9
|
.filter((a) => a.failureClass && a.intervention)
|
|
10
10
|
.map((a) => `${a.failureClass}:${a.intervention}`);
|
|
11
|
-
const guidanceParts =
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
const guidanceParts = request.context.mutationMode === "verify_only"
|
|
12
|
+
? [
|
|
13
|
+
"Do not modify files.",
|
|
14
|
+
"Run the verifier only and report whether it passed."
|
|
15
|
+
]
|
|
16
|
+
: [
|
|
17
|
+
"Only modify files directly required to satisfy the contract.",
|
|
18
|
+
"Do not touch files outside the allowed paths."
|
|
19
|
+
];
|
|
15
20
|
if (request.context.allowedPaths && request.context.allowedPaths.length > 0) {
|
|
16
21
|
guidanceParts.push(`Allowed paths: ${request.context.allowedPaths.join(", ")}.`);
|
|
17
22
|
}
|
|
@@ -27,6 +32,7 @@ export function compilePromptPacket(request) {
|
|
|
27
32
|
contract: {
|
|
28
33
|
objective: redactSecretsFromText(request.context.objective),
|
|
29
34
|
verificationPlan: request.context.verificationPlan,
|
|
35
|
+
...(request.context.mutationMode ? { mutationMode: request.context.mutationMode } : {}),
|
|
30
36
|
...(request.context.allowedPaths ? { allowedPaths: request.context.allowedPaths } : {}),
|
|
31
37
|
...(request.context.deniedPaths ? { deniedPaths: request.context.deniedPaths } : {}),
|
|
32
38
|
...(request.context.acceptanceCriteria
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export type ContextIntegrityVerdict = "clean" | "context_poisoning_warning" | "context_poisoning_block";
|
|
2
|
+
export interface ContextIntegrityPrecheck {
|
|
3
|
+
runId: string;
|
|
4
|
+
attemptIndex: number;
|
|
5
|
+
verdict: ContextIntegrityVerdict;
|
|
6
|
+
reason?: string;
|
|
7
|
+
detectedSignals: string[];
|
|
8
|
+
analyzedChannels: {
|
|
9
|
+
system: boolean;
|
|
10
|
+
user: boolean;
|
|
11
|
+
tools: boolean;
|
|
12
|
+
history: boolean;
|
|
13
|
+
};
|
|
14
|
+
timestamp: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* T05: Context Poisoning Pre-gate.
|
|
18
|
+
* Scans untrusted input channels for authority inversion or instruction re-injection.
|
|
19
|
+
* Runs BEFORE admission control and core reasoning.
|
|
20
|
+
*/
|
|
21
|
+
export declare function runContextIntegrityPrecheck(runId: string, attemptIndex: number, artifactsDir: string, inputs: {
|
|
22
|
+
userPrompt?: string;
|
|
23
|
+
toolOutput?: string;
|
|
24
|
+
retrievedContext?: string;
|
|
25
|
+
history?: string;
|
|
26
|
+
}): Promise<ContextIntegrityPrecheck>;
|