@martinloop/mcp 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -41
- package/dist/server-validation.d.ts +10 -0
- package/dist/server-validation.js +234 -0
- package/dist/server.js +59 -15
- package/dist/tools/get-status.d.ts +10 -2
- package/dist/tools/get-status.js +11 -4
- package/dist/tools/inspect-loop.d.ts +4 -2
- package/dist/tools/inspect-loop.js +4 -7
- package/dist/tools/run-loop.d.ts +2 -0
- package/dist/tools/run-loop.js +10 -3
- package/dist/tools/run-store.d.ts +20 -0
- package/dist/tools/run-store.js +109 -0
- package/dist/vendor/adapters/claude-cli.d.ts +19 -4
- package/dist/vendor/adapters/claude-cli.js +55 -24
- package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
- package/dist/vendor/adapters/cli-bridge.js +154 -28
- package/dist/vendor/adapters/index.d.ts +1 -0
- package/dist/vendor/adapters/index.js +1 -0
- package/dist/vendor/adapters/verifier-only.d.ts +7 -0
- package/dist/vendor/adapters/verifier-only.js +57 -0
- package/dist/vendor/contracts/index.d.ts +3 -1
- package/dist/vendor/core/compiler.d.ts +2 -0
- package/dist/vendor/core/compiler.js +10 -4
- package/dist/vendor/core/context-integrity.d.ts +26 -0
- package/dist/vendor/core/context-integrity.js +56 -0
- package/dist/vendor/core/index.d.ts +7 -4
- package/dist/vendor/core/index.js +222 -64
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
- package/dist/vendor/core/persistence/runs-reader.js +84 -0
- package/dist/vendor/core/persistence/store.d.ts +6 -1
- package/dist/vendor/core/persistence/store.js +5 -0
- package/dist/vendor/core/policy.d.ts +6 -0
- package/package.json +17 -12
- package/server.json +21 -0
|
@@ -1,28 +1,33 @@
|
|
|
1
1
|
import { spawn } from "node:child_process";
|
|
2
|
-
import { isAbsolute } from "node:path";
|
|
2
|
+
import { delimiter, extname, isAbsolute, join, resolve } from "node:path";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
3
4
|
import { diffStatsFromNumstat } from "./runtime-support.js";
|
|
4
5
|
export async function runSubprocess(command, args, options) {
|
|
5
6
|
return new Promise((resolve) => {
|
|
6
7
|
let timedOut = false;
|
|
8
|
+
let settled = false;
|
|
7
9
|
const stdoutChunks = [];
|
|
8
10
|
const stderrChunks = [];
|
|
9
11
|
const stdinMode = options.stdinData !== undefined ? "pipe" : "ignore";
|
|
12
|
+
const resolveOnce = (result) => {
|
|
13
|
+
if (settled) {
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
settled = true;
|
|
17
|
+
resolve(result);
|
|
18
|
+
};
|
|
10
19
|
let proc;
|
|
11
20
|
try {
|
|
12
|
-
|
|
21
|
+
const spawnPlan = createSpawnPlan(command, args, options.cwd, options.spawnImpl !== undefined);
|
|
22
|
+
proc = (options.spawnImpl ?? spawn)(spawnPlan.command, spawnPlan.args, {
|
|
13
23
|
cwd: options.cwd,
|
|
14
24
|
stdio: [stdinMode, "pipe", "pipe"],
|
|
15
|
-
env: process.env
|
|
16
|
-
// shell: true is required on Windows to resolve PATH shims (e.g. claude.cmd).
|
|
17
|
-
// Avoid it for absolute .exe paths because cmd.exe can split paths with spaces.
|
|
18
|
-
// Prompt content is never passed as a shell argument, it goes via stdin, so
|
|
19
|
-
// injection risk from the DEP0190 warning does not apply here.
|
|
20
|
-
shell: shouldUseWindowsShell(command)
|
|
25
|
+
env: process.env
|
|
21
26
|
});
|
|
22
27
|
}
|
|
23
28
|
catch (error) {
|
|
24
29
|
const message = error instanceof Error ? error.message : String(error);
|
|
25
|
-
|
|
30
|
+
resolveOnce({
|
|
26
31
|
exitCode: 1,
|
|
27
32
|
stdout: "",
|
|
28
33
|
stderr: message,
|
|
@@ -30,38 +35,59 @@ export async function runSubprocess(command, args, options) {
|
|
|
30
35
|
});
|
|
31
36
|
return;
|
|
32
37
|
}
|
|
33
|
-
if (options.stdinData !== undefined && proc.stdin) {
|
|
34
|
-
proc.stdin.write(options.stdinData, "utf8");
|
|
35
|
-
proc.stdin.end();
|
|
36
|
-
}
|
|
37
38
|
proc.stdout?.on("data", (chunk) => {
|
|
38
39
|
stdoutChunks.push(chunk);
|
|
39
40
|
});
|
|
40
41
|
proc.stderr?.on("data", (chunk) => {
|
|
41
42
|
stderrChunks.push(chunk);
|
|
42
43
|
});
|
|
44
|
+
proc.stdin?.on("error", (error) => {
|
|
45
|
+
// Some CLIs exit before consuming stdin in tests and on fast-fail paths.
|
|
46
|
+
// Treat the closed pipe as a handled subprocess lifecycle condition.
|
|
47
|
+
if (error.code === "EPIPE") {
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
stderrChunks.push(Buffer.from(`${error.message}\n`, "utf8"));
|
|
51
|
+
});
|
|
43
52
|
const timer = setTimeout(() => {
|
|
44
53
|
timedOut = true;
|
|
45
54
|
proc.kill("SIGTERM");
|
|
46
55
|
}, options.timeoutMs);
|
|
47
|
-
proc.on("close", (code) => {
|
|
48
|
-
clearTimeout(timer);
|
|
49
|
-
resolve({
|
|
50
|
-
exitCode: code ?? 1,
|
|
51
|
-
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
52
|
-
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
53
|
-
timedOut
|
|
54
|
-
});
|
|
55
|
-
});
|
|
56
56
|
proc.on("error", (error) => {
|
|
57
57
|
clearTimeout(timer);
|
|
58
|
-
|
|
58
|
+
resolveOnce({
|
|
59
59
|
exitCode: 1,
|
|
60
60
|
stdout: "",
|
|
61
61
|
stderr: error.message,
|
|
62
62
|
timedOut: false
|
|
63
63
|
});
|
|
64
64
|
});
|
|
65
|
+
proc.on("close", (code) => {
|
|
66
|
+
clearTimeout(timer);
|
|
67
|
+
resolveOnce({
|
|
68
|
+
exitCode: code ?? 1,
|
|
69
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
70
|
+
stderr: Buffer.concat(stderrChunks).toString("utf8"),
|
|
71
|
+
timedOut
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
if (options.stdinData !== undefined && proc.stdin) {
|
|
75
|
+
try {
|
|
76
|
+
proc.stdin.end(options.stdinData, "utf8");
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
const stdinError = error;
|
|
80
|
+
if (stdinError.code !== "EPIPE") {
|
|
81
|
+
clearTimeout(timer);
|
|
82
|
+
resolveOnce({
|
|
83
|
+
exitCode: 1,
|
|
84
|
+
stdout: Buffer.concat(stdoutChunks).toString("utf8"),
|
|
85
|
+
stderr: stdinError.message,
|
|
86
|
+
timedOut: false
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
65
91
|
});
|
|
66
92
|
}
|
|
67
93
|
export async function runVerification(commands, cwd, timeoutMs, verificationStack, spawnImpl) {
|
|
@@ -76,9 +102,8 @@ export async function runVerification(commands, cwd, timeoutMs, verificationStac
|
|
|
76
102
|
}
|
|
77
103
|
const failedSteps = [];
|
|
78
104
|
for (const step of steps) {
|
|
79
|
-
const parts = step.command
|
|
80
|
-
const bin = parts
|
|
81
|
-
const args = parts.slice(1);
|
|
105
|
+
const parts = splitCommand(step.command);
|
|
106
|
+
const [bin, ...args] = parts;
|
|
82
107
|
if (!bin) {
|
|
83
108
|
continue;
|
|
84
109
|
}
|
|
@@ -115,8 +140,109 @@ export async function readGitExecutionArtifacts(repoRoot, timeoutMs, spawnImpl)
|
|
|
115
140
|
...(diffStats ? { diffStats } : {})
|
|
116
141
|
};
|
|
117
142
|
}
|
|
118
|
-
function
|
|
119
|
-
|
|
143
|
+
function createSpawnPlan(command, args, cwd, preserveRawForInjectedSpawn) {
|
|
144
|
+
if (preserveRawForInjectedSpawn || process.platform !== "win32" || isAbsolute(command)) {
|
|
145
|
+
return { command, args };
|
|
146
|
+
}
|
|
147
|
+
const resolved = resolveWindowsCommand(command, cwd);
|
|
148
|
+
if (!resolved) {
|
|
149
|
+
return { command, args };
|
|
150
|
+
}
|
|
151
|
+
const extension = extname(resolved).toLowerCase();
|
|
152
|
+
if (extension === ".cmd" || extension === ".bat") {
|
|
153
|
+
return {
|
|
154
|
+
command: process.env.ComSpec || "cmd.exe",
|
|
155
|
+
args: ["/d", "/s", "/c", [quoteWindowsCmdArg(resolved), ...args.map(quoteWindowsCmdArg)].join(" ")]
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
return { command: resolved, args };
|
|
159
|
+
}
|
|
160
|
+
function resolveWindowsCommand(command, cwd) {
|
|
161
|
+
const hasPathSegment = command.includes("\\") || command.includes("/");
|
|
162
|
+
const baseCandidates = expandWindowsCommandCandidates(hasPathSegment ? resolve(cwd, command) : command);
|
|
163
|
+
if (hasPathSegment) {
|
|
164
|
+
return baseCandidates.find((candidate) => existsSync(candidate));
|
|
165
|
+
}
|
|
166
|
+
for (const directory of windowsPathDirectories()) {
|
|
167
|
+
for (const candidate of baseCandidates) {
|
|
168
|
+
const fullPath = join(directory, candidate);
|
|
169
|
+
if (existsSync(fullPath)) {
|
|
170
|
+
return fullPath;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return undefined;
|
|
175
|
+
}
|
|
176
|
+
function expandWindowsCommandCandidates(command) {
|
|
177
|
+
if (extname(command)) {
|
|
178
|
+
return [command];
|
|
179
|
+
}
|
|
180
|
+
const pathExt = process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD";
|
|
181
|
+
return pathExt
|
|
182
|
+
.split(";")
|
|
183
|
+
.map((extension) => extension.trim())
|
|
184
|
+
.filter(Boolean)
|
|
185
|
+
.map((extension) => `${command}${extension.toLowerCase()}`);
|
|
186
|
+
}
|
|
187
|
+
function windowsPathDirectories() {
|
|
188
|
+
const rawPath = process.env.Path ?? process.env.PATH ?? "";
|
|
189
|
+
return rawPath
|
|
190
|
+
.split(delimiter)
|
|
191
|
+
.map((entry) => entry.trim().replace(/^"|"$/g, ""))
|
|
192
|
+
.filter(Boolean);
|
|
193
|
+
}
|
|
194
|
+
function quoteWindowsCmdArg(value) {
|
|
195
|
+
const normalized = value.replace(/\r?\n/gu, " ");
|
|
196
|
+
const escaped = normalized
|
|
197
|
+
.replace(/\^/gu, "^^")
|
|
198
|
+
.replace(/"/gu, '^"')
|
|
199
|
+
.replace(/%/gu, "%%")
|
|
200
|
+
.replace(/!/gu, "^^!")
|
|
201
|
+
.replace(/[&|<>()]/gu, (match) => `^${match}`);
|
|
202
|
+
return `"${escaped}"`;
|
|
203
|
+
}
|
|
204
|
+
export function splitCommand(command) {
|
|
205
|
+
const tokens = [];
|
|
206
|
+
let current = "";
|
|
207
|
+
let quote;
|
|
208
|
+
const trimmed = command.trim();
|
|
209
|
+
for (let index = 0; index < trimmed.length; index += 1) {
|
|
210
|
+
const char = trimmed[index];
|
|
211
|
+
const next = trimmed[index + 1];
|
|
212
|
+
if (char === undefined) {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
if (char === "\\") {
|
|
216
|
+
const canEscape = quote !== "'" && (next === quote || next === "\\");
|
|
217
|
+
if (canEscape && next !== undefined) {
|
|
218
|
+
current += next;
|
|
219
|
+
index += 1;
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (char === '"' || char === "'") {
|
|
224
|
+
if (!quote) {
|
|
225
|
+
quote = char;
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
if (quote === char) {
|
|
229
|
+
quote = undefined;
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (!quote && /\s/u.test(char)) {
|
|
234
|
+
if (current.length > 0) {
|
|
235
|
+
tokens.push(current);
|
|
236
|
+
current = "";
|
|
237
|
+
}
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
240
|
+
current += char;
|
|
241
|
+
}
|
|
242
|
+
if (current.length > 0) {
|
|
243
|
+
tokens.push(current);
|
|
244
|
+
}
|
|
245
|
+
return tokens;
|
|
120
246
|
}
|
|
121
247
|
function truncate(text, maxLength) {
|
|
122
248
|
if (text.length <= maxLength) {
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter, type DirectProviderAdapterOptions } from "
|
|
|
2
2
|
export { createStubDirectProviderAdapter, type StubDirectProviderAdapterOptions } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter, type StubAgentCliAdapterOptions } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter, type AgentCliAdapterOptions, type ClaudeCliAdapterOptions, type CodexCliAdapterOptions, type CliArgsBuilder } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter, type VerifierOnlyAdapterOptions } from "./verifier-only.js";
|
|
5
6
|
export type { SpawnLike, SubprocessResult, VerificationOutcome } from "./cli-bridge.js";
|
|
@@ -2,4 +2,5 @@ export { createDirectProviderAdapter } from "./direct-provider.js";
|
|
|
2
2
|
export { createStubDirectProviderAdapter } from "./stub-direct-provider.js";
|
|
3
3
|
export { createStubAgentCliAdapter } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter } from "./claude-cli.js";
|
|
5
|
+
export { createVerifierOnlyAdapter } from "./verifier-only.js";
|
|
5
6
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MartinAdapter } from "../core/index.js";
|
|
2
|
+
export interface VerifierOnlyAdapterOptions {
|
|
3
|
+
workingDirectory?: string;
|
|
4
|
+
verifyTimeoutMs?: number;
|
|
5
|
+
label?: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function createVerifierOnlyAdapter(options?: VerifierOnlyAdapterOptions): MartinAdapter;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { readGitExecutionArtifacts, runVerification } from "./cli-bridge.js";
|
|
2
|
+
import { createAdapterCapabilities, normalizeUsage } from "./runtime-support.js";
|
|
3
|
+
export function createVerifierOnlyAdapter(options = {}) {
|
|
4
|
+
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
5
|
+
const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
|
|
6
|
+
return {
|
|
7
|
+
adapterId: "direct:verifier:verify-only",
|
|
8
|
+
kind: "direct-provider",
|
|
9
|
+
label: options.label ?? "Verifier-only adapter",
|
|
10
|
+
metadata: {
|
|
11
|
+
providerId: "verifier",
|
|
12
|
+
model: "verify-only",
|
|
13
|
+
transport: "cli",
|
|
14
|
+
capabilities: createAdapterCapabilities({
|
|
15
|
+
usageSettlement: true,
|
|
16
|
+
diffArtifacts: true
|
|
17
|
+
})
|
|
18
|
+
},
|
|
19
|
+
async execute(request) {
|
|
20
|
+
const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, request.context.verificationStack);
|
|
21
|
+
const execution = await readGitExecutionArtifacts(workingDirectory, 5_000);
|
|
22
|
+
const changedFiles = execution.changedFiles ?? [];
|
|
23
|
+
if (verification.passed) {
|
|
24
|
+
return {
|
|
25
|
+
status: "completed",
|
|
26
|
+
summary: changedFiles.length > 0
|
|
27
|
+
? `Verifier-only run completed but modified files: ${changedFiles.join(", ")}`
|
|
28
|
+
: "Verifier-only run completed without file edits.",
|
|
29
|
+
usage: normalizeUsage({
|
|
30
|
+
actualUsd: 0,
|
|
31
|
+
tokensIn: 0,
|
|
32
|
+
tokensOut: 0,
|
|
33
|
+
provenance: "actual"
|
|
34
|
+
}),
|
|
35
|
+
verification,
|
|
36
|
+
execution
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
return {
|
|
40
|
+
status: "failed",
|
|
41
|
+
summary: "Verifier-only run failed.",
|
|
42
|
+
usage: normalizeUsage({
|
|
43
|
+
actualUsd: 0,
|
|
44
|
+
tokensIn: 0,
|
|
45
|
+
tokensOut: 0,
|
|
46
|
+
provenance: "actual"
|
|
47
|
+
}),
|
|
48
|
+
verification,
|
|
49
|
+
execution,
|
|
50
|
+
failure: {
|
|
51
|
+
message: verification.summary
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=verifier-only.js.map
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export type LoopStatus = "queued" | "running" | "verifying" | "completed" | "failed" | "exited";
|
|
2
2
|
export type LoopLifecycleState = "created" | "running" | "verifying" | "completed" | "budget_exit" | "diminishing_returns" | "stuck_exit" | "human_escalation";
|
|
3
|
-
export type FailureClass = "logic_error" | "hallucination" | "syntax_error" | "type_error" | "test_regression" | "scope_creep" | "no_progress" | "repo_grounding_failure" | "verification_failure" | "environment_mismatch" | "budget_pressure";
|
|
3
|
+
export type FailureClass = "logic_error" | "hallucination" | "syntax_error" | "type_error" | "test_regression" | "scope_creep" | "no_progress" | "repo_grounding_failure" | "verification_failure" | "environment_mismatch" | "budget_pressure" | "safety_leash_blocked";
|
|
4
4
|
export type InterventionType = "compress_context" | "change_model" | "tighten_task" | "switch_adapter" | "run_verifier" | "escalate_human" | "stop_loop";
|
|
5
5
|
export type LoopEventType = "run.started" | "attempt.started" | "attempt.completed" | "failure.classified" | "intervention.selected" | "verification.completed" | "budget.updated" | "run.completed";
|
|
6
6
|
export interface LoopTask {
|
|
@@ -9,6 +9,7 @@ export interface LoopTask {
|
|
|
9
9
|
repoRoot?: string;
|
|
10
10
|
verificationPlan: string[];
|
|
11
11
|
verificationStack?: VerificationStep[];
|
|
12
|
+
mutationMode?: MutationMode;
|
|
12
13
|
executionProfile?: ExecutionProfile;
|
|
13
14
|
allowedNetworkDomains?: string[];
|
|
14
15
|
approvalPolicy?: ApprovalPolicy;
|
|
@@ -20,6 +21,7 @@ export interface LoopTask {
|
|
|
20
21
|
acceptanceCriteria?: string[];
|
|
21
22
|
}
|
|
22
23
|
export type ExecutionProfile = "strict_local" | "ci_safe" | "staging_controlled" | "research_untrusted";
|
|
24
|
+
export type MutationMode = "edit" | "verify_only";
|
|
23
25
|
export interface ApprovalPolicy {
|
|
24
26
|
dependencyAdds?: boolean;
|
|
25
27
|
migrations?: boolean;
|
|
@@ -12,6 +12,7 @@ export interface CompilerAdapterRequest {
|
|
|
12
12
|
objective: string;
|
|
13
13
|
verificationPlan: string[];
|
|
14
14
|
verificationStack?: LoopTask["verificationStack"];
|
|
15
|
+
mutationMode?: LoopTask["mutationMode"];
|
|
15
16
|
repoRoot?: string;
|
|
16
17
|
allowedPaths?: string[];
|
|
17
18
|
deniedPaths?: string[];
|
|
@@ -29,6 +30,7 @@ export interface PromptPacket {
|
|
|
29
30
|
contract: {
|
|
30
31
|
objective: string;
|
|
31
32
|
verificationPlan: string[];
|
|
33
|
+
mutationMode?: LoopTask["mutationMode"];
|
|
32
34
|
allowedPaths?: string[];
|
|
33
35
|
deniedPaths?: string[];
|
|
34
36
|
acceptanceCriteria?: string[];
|
|
@@ -8,10 +8,15 @@ export function compilePromptPacket(request) {
|
|
|
8
8
|
const priorFailurePatterns = request.previousAttempts
|
|
9
9
|
.filter((a) => a.failureClass && a.intervention)
|
|
10
10
|
.map((a) => `${a.failureClass}:${a.intervention}`);
|
|
11
|
-
const guidanceParts =
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
const guidanceParts = request.context.mutationMode === "verify_only"
|
|
12
|
+
? [
|
|
13
|
+
"Do not modify files.",
|
|
14
|
+
"Run the verifier only and report whether it passed."
|
|
15
|
+
]
|
|
16
|
+
: [
|
|
17
|
+
"Only modify files directly required to satisfy the contract.",
|
|
18
|
+
"Do not touch files outside the allowed paths."
|
|
19
|
+
];
|
|
15
20
|
if (request.context.allowedPaths && request.context.allowedPaths.length > 0) {
|
|
16
21
|
guidanceParts.push(`Allowed paths: ${request.context.allowedPaths.join(", ")}.`);
|
|
17
22
|
}
|
|
@@ -27,6 +32,7 @@ export function compilePromptPacket(request) {
|
|
|
27
32
|
contract: {
|
|
28
33
|
objective: redactSecretsFromText(request.context.objective),
|
|
29
34
|
verificationPlan: request.context.verificationPlan,
|
|
35
|
+
...(request.context.mutationMode ? { mutationMode: request.context.mutationMode } : {}),
|
|
30
36
|
...(request.context.allowedPaths ? { allowedPaths: request.context.allowedPaths } : {}),
|
|
31
37
|
...(request.context.deniedPaths ? { deniedPaths: request.context.deniedPaths } : {}),
|
|
32
38
|
...(request.context.acceptanceCriteria
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export type ContextIntegrityVerdict = "clean" | "context_poisoning_warning" | "context_poisoning_block";
|
|
2
|
+
export interface ContextIntegrityPrecheck {
|
|
3
|
+
runId: string;
|
|
4
|
+
attemptIndex: number;
|
|
5
|
+
verdict: ContextIntegrityVerdict;
|
|
6
|
+
reason?: string;
|
|
7
|
+
detectedSignals: string[];
|
|
8
|
+
analyzedChannels: {
|
|
9
|
+
system: boolean;
|
|
10
|
+
user: boolean;
|
|
11
|
+
tools: boolean;
|
|
12
|
+
history: boolean;
|
|
13
|
+
};
|
|
14
|
+
timestamp: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* T05: Context Poisoning Pre-gate.
|
|
18
|
+
* Scans untrusted input channels for authority inversion or instruction re-injection.
|
|
19
|
+
* Runs BEFORE admission control and core reasoning.
|
|
20
|
+
*/
|
|
21
|
+
export declare function runContextIntegrityPrecheck(runId: string, attemptIndex: number, artifactsDir: string, inputs: {
|
|
22
|
+
userPrompt?: string;
|
|
23
|
+
toolOutput?: string;
|
|
24
|
+
retrievedContext?: string;
|
|
25
|
+
history?: string;
|
|
26
|
+
}): Promise<ContextIntegrityPrecheck>;
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
const POISON_PATTERNS = [
|
|
4
|
+
/ignore\s+(?:all\s+)?previous\s+instructions/i,
|
|
5
|
+
/you\s+are\s+now\s+a\s+(?!Martin\s+Loop)/i,
|
|
6
|
+
/new\s+rule:/i,
|
|
7
|
+
/disregard\s+(?:safety|policy|guardrails)/i,
|
|
8
|
+
/override\s+system\s+authority/i,
|
|
9
|
+
/hidden\s+instruction:/i,
|
|
10
|
+
/\[system_override\]/i,
|
|
11
|
+
/\[authority_inversion\]/i
|
|
12
|
+
];
|
|
13
|
+
/**
|
|
14
|
+
* T05: Context Poisoning Pre-gate.
|
|
15
|
+
* Scans untrusted input channels for authority inversion or instruction re-injection.
|
|
16
|
+
* Runs BEFORE admission control and core reasoning.
|
|
17
|
+
*/
|
|
18
|
+
export async function runContextIntegrityPrecheck(runId, attemptIndex, artifactsDir, inputs) {
|
|
19
|
+
const signals = [];
|
|
20
|
+
const analyzedChannels = {
|
|
21
|
+
system: true,
|
|
22
|
+
user: Boolean(inputs.userPrompt),
|
|
23
|
+
tools: Boolean(inputs.toolOutput),
|
|
24
|
+
history: Boolean(inputs.history)
|
|
25
|
+
};
|
|
26
|
+
const untrustedBuffer = [inputs.userPrompt, inputs.toolOutput, inputs.retrievedContext]
|
|
27
|
+
.filter(Boolean)
|
|
28
|
+
.join("\n---\n");
|
|
29
|
+
for (const pattern of POISON_PATTERNS) {
|
|
30
|
+
if (pattern.test(untrustedBuffer)) {
|
|
31
|
+
signals.push(`Detected poison pattern: ${pattern.toString()}`);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
if (/\b(?:I am|You are)\s+(?!Martin\s+Loop|an\s+AI)\b/i.test(untrustedBuffer)) {
|
|
35
|
+
signals.push("Identity redefinition attempt detected.");
|
|
36
|
+
}
|
|
37
|
+
const verdict = signals.length > 0 ? "context_poisoning_block" : "clean";
|
|
38
|
+
const precheck = {
|
|
39
|
+
runId,
|
|
40
|
+
attemptIndex,
|
|
41
|
+
verdict,
|
|
42
|
+
reason: signals.length > 0 ? `Detected ${signals.length} poisoning signal(s).` : undefined,
|
|
43
|
+
detectedSignals: signals,
|
|
44
|
+
analyzedChannels,
|
|
45
|
+
timestamp: new Date().toISOString()
|
|
46
|
+
};
|
|
47
|
+
try {
|
|
48
|
+
await mkdir(artifactsDir, { recursive: true });
|
|
49
|
+
await writeFile(join(artifactsDir, "context-integrity-precheck.json"), JSON.stringify(precheck, null, 2), "utf8");
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
// non-fatal — artifact persistence is best-effort
|
|
53
|
+
}
|
|
54
|
+
return precheck;
|
|
55
|
+
}
|
|
56
|
+
//# sourceMappingURL=context-integrity.js.map
|
|
@@ -1,18 +1,20 @@
|
|
|
1
|
-
import { type ApprovalPolicy, type CostProvenance, type ExecutionProfile, type FailureClass, type InterventionType, type LoopArtifact, type LoopAttempt, type LoopBudget, type LoopRecord, type LoopTask } from "../contracts/index.js";
|
|
1
|
+
import { type ApprovalPolicy, type CostProvenance, type ExecutionProfile, type FailureClass, type InterventionType, type LoopArtifact, type LoopAttempt, type LoopBudget, type MutationMode, type LoopRecord, type LoopTask } from "../contracts/index.js";
|
|
2
2
|
import { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, type ExitDecision } from "./policy.js";
|
|
3
3
|
import { evaluateChangeApprovalLeash, evaluateFilesystemLeash, evaluateSecretLeash, redactSecretsFromText, resolveExecutionProfile, evaluateVerificationLeash } from "./leash.js";
|
|
4
4
|
import { buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations } from "./grounding.js";
|
|
5
5
|
import { captureRollbackBoundary, restoreRollbackBoundary } from "./rollback.js";
|
|
6
6
|
import { type RunStore } from "./persistence/index.js";
|
|
7
|
-
export type { ApprovalPolicy, BudgetPreflightEstimate, BudgetSettlement, CostProvenance, EvidenceVector, ExecutionProfile, FailureClass, InterventionType, PatchDecision, PatchDecisionArtifact, PatchDecisionReasonCode, PatchScore, RollbackBoundaryArtifact, RollbackBoundaryStrategy, RollbackFileSnapshot, RollbackOutcomeArtifact, RollbackOutcomeStatus, PolicyPhase } from "../contracts/index.js";
|
|
7
|
+
export type { ApprovalPolicy, BudgetPreflightEstimate, BudgetSettlement, CostProvenance, EvidenceVector, ExecutionProfile, FailureClass, InterventionType, PatchDecision, PatchDecisionArtifact, PatchDecisionReasonCode, PatchScore, MutationMode, RollbackBoundaryArtifact, RollbackBoundaryStrategy, RollbackFileSnapshot, RollbackOutcomeArtifact, RollbackOutcomeStatus, PolicyPhase } from "../contracts/index.js";
|
|
8
8
|
export { classifyFailure, computeEvidenceVector, evaluatePatchDecision, evaluateCostGovernor, evaluateBudgetPreflight, inferExit, nextPolicyPhase, policyPhaseToLifecycleState, scorePatchDecision, selectRecoveryRecipe, evaluateVerificationLeash, evaluateFilesystemLeash, evaluateChangeApprovalLeash, evaluateSecretLeash, resolveExecutionProfile, redactSecretsFromText, buildRepoGroundingIndex, loadOrBuildRepoGroundingIndex, queryRepoGroundingIndex, scanPatchForGroundingViolations, captureRollbackBoundary, restoreRollbackBoundary };
|
|
9
9
|
export type { BudgetPreflightDecision, BudgetPreflightInput, CostGovernorState, EvidenceVectorInput, EvaluatedPatchDecision, ExitDecision, FailureAssessment, PatchDecisionInput, RecoveryDecision, RecoveryRecipe } from "./policy.js";
|
|
10
10
|
export type { ResolvedExecutionProfile, SafetyLeashDecision, SafetyViolation } from "./leash.js";
|
|
11
11
|
export type { GroundingScanResult, GroundingViolation, GroundingViolationKind, RepoGroundingHit, RepoGroundingIndex } from "./grounding.js";
|
|
12
|
+
export { runContextIntegrityPrecheck } from "./context-integrity.js";
|
|
13
|
+
export type { ContextIntegrityPrecheck, ContextIntegrityVerdict } from "./context-integrity.js";
|
|
12
14
|
export { compilePromptPacket } from "./compiler.js";
|
|
13
15
|
export type { PromptPacket, CompilerAdapterRequest } from "./compiler.js";
|
|
14
|
-
export { createFileRunStore, makeLedgerEvent, resolveRunsRoot } from "./persistence/index.js";
|
|
15
|
-
export type { AttemptArtifacts, LedgerEvent, LedgerEventKind, RunContract, RunStore } from "./persistence/index.js";
|
|
16
|
+
export { createFileRunStore, makeLedgerEvent, readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot } from "./persistence/index.js";
|
|
17
|
+
export type { AttemptArtifacts, LedgerEvent, LedgerEventKind, LoopAttemptRecord, LoopRunRecord, RunContract, RunStore } from "./persistence/index.js";
|
|
16
18
|
export { compileAndPersistContext } from "./persistence/index.js";
|
|
17
19
|
export type { CompileResult } from "./persistence/index.js";
|
|
18
20
|
export interface MartinAdapterRequest {
|
|
@@ -23,6 +25,7 @@ export interface MartinAdapterRequest {
|
|
|
23
25
|
objective: string;
|
|
24
26
|
verificationPlan: string[];
|
|
25
27
|
verificationStack?: LoopTask["verificationStack"];
|
|
28
|
+
mutationMode?: MutationMode;
|
|
26
29
|
/** Absolute path to the repository root. */
|
|
27
30
|
repoRoot?: string;
|
|
28
31
|
/** Glob patterns for files the agent may modify. Empty = no restriction. */
|