@martinloop/mcp 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/README.md +181 -41
  2. package/dist/server-validation.d.ts +10 -0
  3. package/dist/server-validation.js +234 -0
  4. package/dist/server.js +59 -15
  5. package/dist/tools/get-status.d.ts +10 -2
  6. package/dist/tools/get-status.js +11 -4
  7. package/dist/tools/inspect-loop.d.ts +4 -2
  8. package/dist/tools/inspect-loop.js +4 -7
  9. package/dist/tools/run-loop.d.ts +2 -0
  10. package/dist/tools/run-loop.js +10 -3
  11. package/dist/tools/run-store.d.ts +20 -0
  12. package/dist/tools/run-store.js +109 -0
  13. package/dist/vendor/adapters/claude-cli.d.ts +19 -4
  14. package/dist/vendor/adapters/claude-cli.js +55 -24
  15. package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
  16. package/dist/vendor/adapters/cli-bridge.js +154 -28
  17. package/dist/vendor/adapters/index.d.ts +1 -0
  18. package/dist/vendor/adapters/index.js +1 -0
  19. package/dist/vendor/adapters/verifier-only.d.ts +7 -0
  20. package/dist/vendor/adapters/verifier-only.js +57 -0
  21. package/dist/vendor/contracts/index.d.ts +3 -1
  22. package/dist/vendor/core/compiler.d.ts +2 -0
  23. package/dist/vendor/core/compiler.js +10 -4
  24. package/dist/vendor/core/context-integrity.d.ts +26 -0
  25. package/dist/vendor/core/context-integrity.js +56 -0
  26. package/dist/vendor/core/index.d.ts +7 -4
  27. package/dist/vendor/core/index.js +222 -64
  28. package/dist/vendor/core/persistence/index.d.ts +2 -0
  29. package/dist/vendor/core/persistence/index.js +1 -0
  30. package/dist/vendor/core/persistence/runs-reader.d.ts +52 -0
  31. package/dist/vendor/core/persistence/runs-reader.js +84 -0
  32. package/dist/vendor/core/persistence/store.d.ts +6 -1
  33. package/dist/vendor/core/persistence/store.js +5 -0
  34. package/dist/vendor/core/policy.d.ts +6 -0
  35. package/package.json +17 -12
  36. package/server.json +21 -0
@@ -1,9 +1,16 @@
1
1
  import { evaluateCostGovernor } from "../vendor/core/index.js";
2
- export function getStatusTool(input) {
3
- const loop = JSON.parse(input.loopJson);
2
+ import { loadLoopRecordForStatus } from "./run-store.js";
3
+ export async function getStatusTool(input) {
4
+ const resolved = await loadLoopRecordForStatus(input);
5
+ const loop = resolved.loop;
4
6
  const costState = evaluateCostGovernor({
5
7
  budget: loop.budget,
6
- cost: loop.cost,
8
+ cost: {
9
+ actualUsd: loop.cost.actualUsd,
10
+ avoidedUsd: loop.cost.avoidedUsd ?? 0,
11
+ tokensIn: loop.cost.tokensIn,
12
+ tokensOut: loop.cost.tokensOut
13
+ },
7
14
  attemptsUsed: loop.attempts.length
8
15
  });
9
16
  return {
@@ -12,7 +19,7 @@ export function getStatusTool(input) {
12
19
  lifecycleState: loop.lifecycleState,
13
20
  attempts: loop.attempts.length,
14
21
  costUsd: loop.cost.actualUsd,
15
- avoidedUsd: loop.cost.avoidedUsd,
22
+ avoidedUsd: loop.cost.avoidedUsd ?? 0,
16
23
  pressure: costState.pressure,
17
24
  shouldStop: costState.shouldStop,
18
25
  remainingBudgetUsd: costState.remainingBudgetUsd,
@@ -1,7 +1,9 @@
1
1
  import { type PortfolioSnapshot } from "../vendor/contracts/index.js";
2
2
  export interface InspectLoopInput {
3
- /** Absolute or relative path to a JSON file containing a LoopRecord or LoopRecord[]. */
4
- file: string;
3
+ /** Optional path to a JSON, JSONL, or run-store directory under the Martin runs root. */
4
+ file?: string;
5
+ /** Optional Martin runs directory. Defaults to MARTIN_RUNS_DIR or ~/.martin/runs. */
6
+ runsDir?: string;
5
7
  }
6
8
  export interface InspectLoopOutput {
7
9
  source: string;
@@ -1,13 +1,10 @@
1
- import { readFile } from "node:fs/promises";
2
1
  import { buildPortfolioSnapshot } from "../vendor/contracts/index.js";
2
+ import { loadLoopRecordsForInspect } from "./run-store.js";
3
3
  export async function inspectLoopTool(input) {
4
- const raw = await readFile(input.file, "utf8");
5
- const parsed = JSON.parse(raw);
6
- const loops = Array.isArray(parsed)
7
- ? parsed
8
- : [parsed];
4
+ const inspection = await loadLoopRecordsForInspect(input);
5
+ const loops = inspection.loops;
9
6
  return {
10
- source: input.file,
7
+ source: inspection.source,
11
8
  loopCount: loops.length,
12
9
  portfolio: buildPortfolioSnapshot(loops)
13
10
  };
@@ -7,6 +7,8 @@ export interface RunLoopInput {
7
7
  maxIterations?: number;
8
8
  maxTokens?: number;
9
9
  verificationPlan?: string[];
10
+ allowedPaths?: string[];
11
+ deniedPaths?: string[];
10
12
  workspaceId?: string;
11
13
  projectId?: string;
12
14
  }
@@ -1,10 +1,13 @@
1
1
  import { createClaudeCliAdapter, createCodexCliAdapter, createStubDirectProviderAdapter } from "../vendor/adapters/index.js";
2
- import { runMartin } from "../vendor/core/index.js";
2
+ import { createFileRunStore, resolveRunsRoot, runMartin } from "../vendor/core/index.js";
3
3
  import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
4
+ import { normalizeSafePathPatterns, resolveSafeRepoRoot } from "../server-validation.js";
4
5
  export async function runLoopTool(input) {
5
- const workingDirectory = input.workingDirectory ?? process.cwd();
6
+ const workingDirectory = resolveSafeRepoRoot(input.workingDirectory);
6
7
  const engine = input.engine ?? "claude";
7
8
  const model = input.model;
9
+ const allowedPaths = normalizeSafePathPatterns(input.allowedPaths, "allowedPaths");
10
+ const deniedPaths = normalizeSafePathPatterns(input.deniedPaths, "deniedPaths");
8
11
  const adapter = process.env.MARTIN_LIVE === "false"
9
12
  ? createStubDirectProviderAdapter({ label: "Stub adapter (MARTIN_LIVE=false)", providerId: "stub", model: "stub" })
10
13
  : engine === "codex"
@@ -27,10 +30,14 @@ export async function runLoopTool(input) {
27
30
  const result = await runMartin({
28
31
  workspaceId: input.workspaceId ?? "ws_mcp",
29
32
  projectId: input.projectId ?? "proj_mcp",
33
+ store: createFileRunStore({ runsRoot: resolveRunsRoot(process.env) }),
30
34
  task: {
31
35
  title: input.objective.slice(0, 100),
32
36
  objective: input.objective,
33
- verificationPlan: input.verificationPlan ?? []
37
+ verificationPlan: input.verificationPlan ?? [],
38
+ repoRoot: workingDirectory,
39
+ ...(allowedPaths ? { allowedPaths } : {}),
40
+ ...(deniedPaths ? { deniedPaths } : {})
34
41
  },
35
42
  budget,
36
43
  adapter
@@ -0,0 +1,20 @@
1
+ import { type LoopRunRecord } from "../vendor/core/index.js";
2
+ export interface InspectLoopSource {
3
+ source: string;
4
+ loops: LoopRunRecord[];
5
+ }
6
+ export interface StatusLoopSource {
7
+ source: string;
8
+ loop: LoopRunRecord;
9
+ }
10
+ export declare function loadLoopRecordsForInspect(input: {
11
+ file?: string;
12
+ runsDir?: string;
13
+ }): Promise<InspectLoopSource>;
14
+ export declare function loadLoopRecordForStatus(input: {
15
+ loopJson?: string;
16
+ file?: string;
17
+ loopId?: string;
18
+ runsDir?: string;
19
+ latest?: boolean;
20
+ }): Promise<StatusLoopSource>;
@@ -0,0 +1,109 @@
1
+ import { stat } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { readAllLoopRecords, readLatestLoopRecord, readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot } from "../vendor/core/index.js";
4
+ import { resolveSafeLoopRecordPath, resolveSafeRunsPath, resolveSafeRunsRootPath } from "../server-validation.js";
5
+ export async function loadLoopRecordsForInspect(input) {
6
+ const runsRoot = resolveSafeRunsRootPath(input.runsDir, resolveRunsRoot(process.env));
7
+ if (!input.file) {
8
+ return {
9
+ source: runsRoot,
10
+ loops: await readAllLoopRecords(runsRoot)
11
+ };
12
+ }
13
+ const targetPath = resolveSafeRunsPath(input.file, runsRoot);
14
+ const targetStats = await stat(targetPath);
15
+ if (targetStats.isDirectory()) {
16
+ const canonicalLoopRecordPath = path.join(targetPath, "loop-record.json");
17
+ try {
18
+ const canonicalLoopRecordStats = await stat(canonicalLoopRecordPath);
19
+ if (canonicalLoopRecordStats.isFile()) {
20
+ return {
21
+ source: canonicalLoopRecordPath,
22
+ loops: await readLoopRecordsFromFile(canonicalLoopRecordPath)
23
+ };
24
+ }
25
+ }
26
+ catch {
27
+ // fall through to treating the directory as a full runs root
28
+ }
29
+ return {
30
+ source: targetPath,
31
+ loops: await readAllLoopRecords(targetPath)
32
+ };
33
+ }
34
+ return {
35
+ source: targetPath,
36
+ loops: await readLoopRecordsFromFile(targetPath)
37
+ };
38
+ }
39
+ export async function loadLoopRecordForStatus(input) {
40
+ if (input.loopJson) {
41
+ return {
42
+ source: "inline:loopJson",
43
+ loop: JSON.parse(input.loopJson)
44
+ };
45
+ }
46
+ const runsRoot = resolveSafeRunsRootPath(input.runsDir, resolveRunsRoot(process.env));
47
+ if (input.file) {
48
+ const targetPath = resolveSafeRunsPath(input.file, runsRoot);
49
+ const targetStats = await stat(targetPath);
50
+ if (targetStats.isDirectory()) {
51
+ const canonicalLoopRecordPath = path.join(targetPath, "loop-record.json");
52
+ try {
53
+ const canonicalLoopRecordStats = await stat(canonicalLoopRecordPath);
54
+ if (canonicalLoopRecordStats.isFile()) {
55
+ const loop = await readLatestLoopRecordFromFile(canonicalLoopRecordPath);
56
+ if (!loop) {
57
+ throw new Error("No loop records found.");
58
+ }
59
+ return {
60
+ source: canonicalLoopRecordPath,
61
+ loop
62
+ };
63
+ }
64
+ }
65
+ catch {
66
+ // fall through to treating the directory as a full runs root
67
+ }
68
+ const loop = await readLatestLoopRecord(targetPath);
69
+ if (!loop) {
70
+ throw new Error("No loop records found.");
71
+ }
72
+ return {
73
+ source: targetPath,
74
+ loop
75
+ };
76
+ }
77
+ const loop = await readLatestLoopRecordFromFile(targetPath);
78
+ if (!loop) {
79
+ throw new Error("No loop records found.");
80
+ }
81
+ return {
82
+ source: targetPath,
83
+ loop
84
+ };
85
+ }
86
+ if (input.loopId) {
87
+ const targetPath = resolveSafeLoopRecordPath(input.loopId, runsRoot);
88
+ const loop = await readLatestLoopRecordFromFile(targetPath);
89
+ if (!loop) {
90
+ throw new Error("No loop records found.");
91
+ }
92
+ return {
93
+ source: targetPath,
94
+ loop
95
+ };
96
+ }
97
+ if (input.latest) {
98
+ const loop = await readLatestLoopRecord(runsRoot);
99
+ if (!loop) {
100
+ throw new Error("No loop records found.");
101
+ }
102
+ return {
103
+ source: runsRoot,
104
+ loop
105
+ };
106
+ }
107
+ throw new Error("Provide exactly one of loopJson, file, loopId, or latest.");
108
+ }
109
+ //# sourceMappingURL=run-store.js.map
@@ -15,15 +15,18 @@ import type { MartinAdapter } from "../core/index.js";
15
15
  import { type SpawnLike } from "./cli-bridge.js";
16
16
  /**
17
17
  * Given a prompt string, returns the full argv array to pass to spawn().
18
- * Example for Claude: (p) => ["--print", p, "--dangerously-skip-permissions"]
19
- * Example for Codex: (p) => ["--full-auto", p]
18
+ * Example for Claude: () => ["--output-format", "json", "--print"]
19
+ * Example for Codex: () => ["exec", "--sandbox", "workspace-write", "-"]
20
20
  */
21
21
  export type CliArgsBuilder = (prompt: string) => string[];
22
+ export type CliStdinBuilder = (prompt: string) => string | undefined;
22
23
  export interface AgentCliAdapterOptions {
23
24
  /** The executable to spawn (e.g. "claude", "codex"). */
24
25
  command: string;
25
26
  /** Converts a prompt string into the argv array passed to spawn(). */
26
27
  argsBuilder: CliArgsBuilder;
28
+ /** Optional stdin payload for CLIs that accept prompt input via stdin or `-`. */
29
+ stdinBuilder?: CliStdinBuilder;
27
30
  /** Adapter ID suffix. Defaults to command. */
28
31
  adapterIdSuffix?: string;
29
32
  /** Working directory for all subprocesses. Defaults to process.cwd(). */
@@ -63,8 +66,16 @@ export interface CodexCliAdapterOptions {
63
66
  label?: string;
64
67
  /** Override the model passed via --model flag. */
65
68
  model?: string;
66
- /** Run in full-auto mode (--full-auto). Defaults to true. */
69
+ /**
70
+ * Deprecated no-op retained for compatibility.
71
+ *
72
+ * Codex CLI's supported non-interactive entrypoint is `codex exec`.
73
+ * MartinLoop now uses explicit sandboxing instead of the legacy
74
+ * `--full-auto` compatibility path, which can exit before verifier execution.
75
+ */
67
76
  fullAuto?: boolean;
77
+ /** Codex sandbox mode for model-generated commands. Defaults to workspace-write. */
78
+ sandbox?: "read-only" | "workspace-write" | "danger-full-access";
68
79
  /** Extra args appended after core args (before prompt). */
69
80
  extraArgs?: string[];
70
81
  spawnImpl?: SpawnLike;
@@ -81,7 +92,11 @@ export declare function createAgentCliAdapter(options: AgentCliAdapterOptions):
81
92
  */
82
93
  export declare function createClaudeCliAdapter(options?: ClaudeCliAdapterOptions): MartinAdapter;
83
94
  /**
84
- * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
95
+ * Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
96
+ *
97
+ * The prompt is delivered via stdin so Windows shell quoting cannot truncate or
98
+ * reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
99
+ * context.
85
100
  *
86
101
  * Requires the Codex CLI to be installed and authenticated:
87
102
  * npm install -g @openai/codex
@@ -129,15 +129,12 @@ export function createAgentCliAdapter(options) {
129
129
  }
130
130
  }
131
131
  const args = options.argsBuilder(prompt);
132
- // stdinPrompt: if argsBuilder signals stdin delivery by returning args ending with "--stdin-prompt",
133
- // remove that sentinel and pass the prompt via stdin instead (avoids Windows shell-escaping issues).
134
- const useStdin = args.at(-1) === "--stdin-prompt";
135
- const spawnArgs = useStdin ? args.slice(0, -1) : args;
136
- const agentResult = await runSubprocess(options.command, spawnArgs, {
132
+ const stdinData = options.stdinBuilder?.(prompt);
133
+ const agentResult = await runSubprocess(options.command, args, {
137
134
  cwd: workingDirectory,
138
135
  timeoutMs,
139
136
  spawnImpl: options.spawnImpl,
140
- ...(useStdin ? { stdinData: prompt } : {})
137
+ ...(stdinData === undefined ? {} : { stdinData })
141
138
  });
142
139
  if (agentResult.timedOut) {
143
140
  return {
@@ -157,18 +154,19 @@ export function createAgentCliAdapter(options) {
157
154
  };
158
155
  }
159
156
  if (agentResult.exitCode !== 0 && agentResult.stdout.trim().length === 0) {
157
+ const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr, agentResult.exitCode);
160
158
  return {
161
159
  status: "failed",
162
- summary: `${options.command} subprocess exited with an error.`,
160
+ summary: `${options.command} subprocess exited before verifier execution.`,
163
161
  usage: normalizeUsage({
164
162
  actualUsd: 0,
165
163
  tokensIn: 0,
166
164
  tokensOut: 0,
167
165
  provenance: "unavailable"
168
166
  }),
169
- verification: { passed: false, summary: "Subprocess error." },
167
+ verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
170
168
  failure: {
171
- message: `${agentResult.stderr.trim() || `Exit code ${String(agentResult.exitCode)}`}. environment_mismatch`
169
+ message: failureMessage
172
170
  }
173
171
  };
174
172
  }
@@ -355,40 +353,52 @@ export function createClaudeCliAdapter(options = {}) {
355
353
  "--print",
356
354
  "--dangerously-skip-permissions",
357
355
  ...modelArgs,
358
- ...extraArgs,
359
- "--stdin-prompt" // sentinel: tells execute() to deliver prompt via stdin
360
- ]
356
+ ...extraArgs
357
+ ],
358
+ stdinBuilder: (prompt) => prompt
361
359
  });
362
360
  }
363
361
  // ---------------------------------------------------------------------------
364
362
  // Pre-configured: OpenAI Codex CLI
365
363
  // ---------------------------------------------------------------------------
366
364
  /**
367
- * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
365
+ * Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
366
+ *
367
+ * The prompt is delivered via stdin so Windows shell quoting cannot truncate or
368
+ * reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
369
+ * context.
368
370
  *
369
371
  * Requires the Codex CLI to be installed and authenticated:
370
372
  * npm install -g @openai/codex
371
373
  */
372
374
  export function createCodexCliAdapter(options = {}) {
373
- const fullAuto = options.fullAuto !== false;
374
375
  const modelArgs = options.model ? ["--model", options.model] : [];
375
376
  const extraArgs = options.extraArgs ?? [];
377
+ const sandbox = options.sandbox ?? "workspace-write";
378
+ const workingDirectory = options.workingDirectory ?? process.cwd();
376
379
  return createAgentCliAdapter({
377
380
  command: "codex",
378
381
  adapterIdSuffix: "codex",
379
382
  model: options.model ?? "codex",
380
383
  label: options.label ?? "Codex CLI adapter",
381
- workingDirectory: options.workingDirectory,
384
+ workingDirectory,
382
385
  timeoutMs: options.timeoutMs,
383
386
  verifyTimeoutMs: options.verifyTimeoutMs,
384
387
  supportsJsonOutput: false,
385
388
  spawnImpl: options.spawnImpl,
386
- argsBuilder: (prompt) => [
387
- ...(fullAuto ? ["--full-auto"] : []),
389
+ argsBuilder: () => [
390
+ "exec",
391
+ "--cd",
392
+ workingDirectory,
393
+ "--sandbox",
394
+ sandbox,
395
+ "--color",
396
+ "never",
388
397
  ...modelArgs,
389
- prompt,
390
- ...extraArgs
391
- ]
398
+ ...extraArgs,
399
+ "-"
400
+ ],
401
+ stdinBuilder: (prompt) => prompt
392
402
  });
393
403
  }
394
404
  // ---------------------------------------------------------------------------
@@ -402,14 +412,23 @@ export function createCodexCliAdapter(options = {}) {
402
412
  // ---------------------------------------------------------------------------
403
413
  function buildPrompt(request) {
404
414
  const lines = [];
415
+ const mutationMode = request.context.mutationMode ?? "edit";
405
416
  lines.push("You are running in autonomous agentic mode.");
406
- lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
407
- lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
417
+ if (mutationMode === "verify_only") {
418
+ lines.push("DO NOT EDIT FILES. Run the verifier only and report whether it passes.");
419
+ lines.push("Do not ask for confirmation. Do not ask clarifying questions.");
420
+ }
421
+ else {
422
+ lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
423
+ lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
424
+ }
408
425
  lines.push("");
409
426
  lines.push("If PROGRESS.md exists in your working directory, read it first for context from prior attempts.");
410
427
  lines.push("If it does not exist, proceed with the objective below.");
411
428
  lines.push("");
412
- lines.push("Complete the following coding task. Make all necessary file changes.");
429
+ lines.push(mutationMode === "verify_only"
430
+ ? "Complete the following verification-only task without making file changes."
431
+ : "Complete the following coding task. Make all necessary file changes.");
413
432
  lines.push("When you are done, the verification commands listed below must pass.");
414
433
  lines.push("");
415
434
  lines.push("OBJECTIVE:");
@@ -447,7 +466,9 @@ function buildPrompt(request) {
447
466
  lines.push(` Attempt ${String(attemptNumber)}`);
448
467
  lines.push(` Remaining budget: $${String(request.context.remainingBudgetUsd)} USD`);
449
468
  lines.push(` Remaining iterations: ${String(request.context.remainingIterations)}`);
450
- lines.push(" Do not expand scope beyond what is needed to pass verification.");
469
+ lines.push(mutationMode === "verify_only"
470
+ ? " Do not modify files; only run verification."
471
+ : " Do not expand scope beyond what is needed to pass verification.");
451
472
  lines.push("");
452
473
  if (request.previousAttempts.length > 0) {
453
474
  lines.push("PRIOR FAILED ATTEMPTS (learn from these — do not repeat the same mistakes):");
@@ -494,6 +515,16 @@ function truncate(text, maxLength) {
494
515
  }
495
516
  return `...${text.slice(-(maxLength - 3))}`;
496
517
  }
518
+ function formatPreVerifierSubprocessFailure(command, stderr, exitCode) {
519
+ const detail = stderr.trim() || `Exit code ${String(exitCode)}`;
520
+ const lowerDetail = detail.toLowerCase();
521
+ const codexLaunchBlocked = command === "codex" &&
522
+ /\b(full-auto|sandbox|approval|permission|trusted|safety|unexpected argument)\b/u.test(lowerDetail);
523
+ if (codexLaunchBlocked) {
524
+ return `Codex CLI failed before patch completion, likely due to its launch/sandbox configuration. MartinLoop invokes Codex through "codex exec --sandbox workspace-write"; verify Codex CLI auth and configuration if this persists. ${detail}. environment_mismatch`;
525
+ }
526
+ return `${detail}. environment_mismatch`;
527
+ }
497
528
  const INJECTION_PATTERNS = [
498
529
  /\[INST\]/gi,
499
530
  /<\/?system>/gi,
@@ -26,3 +26,4 @@ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: n
26
26
  changedFiles?: string[];
27
27
  diffStats?: ReturnType<typeof diffStatsFromNumstat>;
28
28
  }>;
29
+ export declare function splitCommand(command: string): string[];