martin-loop 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +52 -16
  2. package/demo/seeded-workspace/README.md +35 -0
  3. package/demo/seeded-workspace/TASKS.md +29 -0
  4. package/demo/seeded-workspace/martin.config.yaml +11 -0
  5. package/demo/seeded-workspace/package.json +8 -0
  6. package/demo/seeded-workspace/src/invoice-summary.js +11 -0
  7. package/demo/seeded-workspace/test/invoice-summary.test.js +20 -0
  8. package/dist/vendor/adapters/claude-cli.d.ts +19 -4
  9. package/dist/vendor/adapters/claude-cli.js +55 -24
  10. package/dist/vendor/adapters/cli-bridge.d.ts +1 -0
  11. package/dist/vendor/adapters/cli-bridge.js +154 -28
  12. package/dist/vendor/adapters/index.d.ts +1 -0
  13. package/dist/vendor/adapters/index.js +1 -0
  14. package/dist/vendor/adapters/verifier-only.d.ts +7 -0
  15. package/dist/vendor/adapters/verifier-only.js +57 -0
  16. package/dist/vendor/cli/index.d.ts +6 -1
  17. package/dist/vendor/cli/index.js +124 -7
  18. package/dist/vendor/contracts/index.d.ts +3 -1
  19. package/dist/vendor/core/compiler.d.ts +2 -0
  20. package/dist/vendor/core/compiler.js +10 -4
  21. package/dist/vendor/core/context-integrity.d.ts +26 -0
  22. package/dist/vendor/core/context-integrity.js +56 -0
  23. package/dist/vendor/core/index.d.ts +5 -2
  24. package/dist/vendor/core/index.js +186 -54
  25. package/dist/vendor/core/policy.d.ts +6 -0
  26. package/docs/distribution/DIRECTORY-SUBMISSIONS.md +89 -0
  27. package/docs/distribution/INTEGRATION-OUTREACH.md +61 -0
  28. package/docs/distribution/UNDER-3-CHALLENGE.md +65 -0
  29. package/docs/oss/CLAUDE-CODE-WALKTHROUGH.md +142 -0
  30. package/docs/oss/EXAMPLES.md +9 -1
  31. package/docs/oss/OSS-BOUNDARY-REPORT.json +3 -7
  32. package/docs/oss/OSS-BOUNDARY-REPORT.md +2 -2
  33. package/docs/oss/QUICKSTART.md +33 -3
  34. package/docs/oss/RALPH-LOOP-SAFETY.md +113 -0
  35. package/docs/oss/README.md +6 -3
  36. package/docs/oss/RELEASE-SURFACE-REPORT.json +1 -1
  37. package/docs/oss/RELEASE-SURFACE-REPORT.md +1 -1
  38. package/package.json +8 -2
package/README.md CHANGED
@@ -2,25 +2,27 @@
2
2
 
3
3
  <img src="./docs/assets/martinloop-logo.png" alt="MartinLoop" width="260">
4
4
 
5
- ### A governed runtime for autonomous AI coding agents. ⭐⭐⭐
5
+ ### The cross agent governance layer for autonomous AI coding agents.⭐
6
6
 
7
7
  [![License: MIT](https://img.shields.io/badge/license-MIT-7c3aed?style=flat-square)](./LICENSE)
8
8
  [![TypeScript](https://img.shields.io/badge/TypeScript-strict-3178c6?style=flat-square&logo=typescript&logoColor=white)](./tsconfig.base.json)
9
9
  [![Node](https://img.shields.io/badge/node-%3E%3D20-3c873a?style=flat-square&logo=nodedotjs&logoColor=white)](#quick-start)
10
10
  [![npm](https://img.shields.io/badge/npm-martin--loop-cc3534?style=flat-square&logo=npm&logoColor=white)](https://www.npmjs.com/package/martin-loop)
11
11
 
12
+ MartinLoop has been accepted into the NVIDIA Inception program.
13
+
12
14
  <br>
13
15
 
14
16
  **Your overnight AI pipeline estimated $2.40.**
15
17
  **You woke up to a $65 bill.**
16
18
  <br> 47 retries. No hard stop. No rollback. No audit trail. Nothing merged.
17
19
  MartinLoop exists so that never happens again.✅ <br> <br>
18
- If you think autonomous AI coding agents need budgets, brakes, and receipts, ⭐ the repo so more builders can find it.
20
+ If you think autonomous AI coding agents need budgets, brakes, and receipts, Plase star ⭐ the repo so more builders can find it.
19
21
  <br>
20
22
 
21
23
  > AI coding agents are useful. Unbounded retry loops are not.
22
24
  >
23
- > MartinLoop wraps agent runs with budgets, policy checks, verifier gates, rollback evidence, and inspectable run records.
25
+ > MartinLoop wraps agent runs with budgets, policy checks, verifier gates, rollback evidence, and inspectable run records. Built for Enterprise Coding Agents, Agentic Teams, and Autonomous Companies.
24
26
  <br>
25
27
  <img src="./docs/assets/cli-animated.svg" alt="MartinLoop CLI — governed agent run" width="720">
26
28
 
@@ -58,6 +60,7 @@ It does not try to replace the agent pattern. It makes that pattern safe to run.
58
60
  | Verifier gate | A run only reaches `completed` when the adapter result and verifier state pass. Unsafe verifier commands are blocked before agent execution. |
59
61
  | Failure taxonomy | Classifies failures across 11 current classes, including hallucination, test regression, scope creep, repo grounding failure, environment mismatch, and budget pressure, that distinguishes real success from unsafe, invalid, or terminal behavior.|
60
62
  | Safety leash | Evaluates verifier commands, file scope, dependency or migration changes that require approval, and secret-like values in task text. **Policy-as-code**. |
63
+ | Context integrity | Scans user prompts and tool output for injection patterns (authority inversion, instruction override, identity redefinition) before any attempt is admitted. Aborts with human escalation on detection. |
61
64
  | Rollback evidence | Captures rollback boundaries and restore outcomes for repo-backed attempts when a persistence store is configured. |
62
65
  | Context distillation | Carries a distilled summary of recent attempts and remaining constraints into subsequent attempts. |
63
66
  | Run records | The CLI appends JSONL loop records under `~/.martin/runs/<workspaceId>.jsonl`; lower-level stores can also persist contracts, ledgers, and attempt artifacts.
@@ -66,7 +69,7 @@ It does not try to replace the agent pattern. It makes that pattern safe to run.
66
69
  ⭐The result is a runtime that can complete good work, refuse unsafe work, stop uneconomical work, and leave evidence behind.✅
67
70
  ---
68
71
 
69
- ## The Ralph Loop, explained
72
+ ## Ralph-Style Loops Need a Control Layer
70
73
 
71
74
  **"Everybody has gotten infatuated with what we call these Ralph Wiggum loops, just like send the thing off and it'll just go figure something out..A, It never figures anything out. And B, you just get this ginormous bill...**" - Chamath Palihapitiya, All-In Podcast #263, March 2026
72
75
 
@@ -82,7 +85,7 @@ The pattern is simple: attempt the task, run checks, retry on failure, repeat. T
82
85
  - it rolls back failed runs instead of leaving broken state behind
83
86
  - it reduces runaway token growth with context distillation
84
87
 
85
- If Ralph ever burned $165.70 on your dime, you're in the right place. Martin stopped him at $4.97 with a full audit trail. LFG! 🚀 Finally a Martin Prince leash for Ralph Wiggums! :)
88
+ If a Ralph-style loop has ever burned budget without producing a verified result, MartinLoop is designed to stop that failure mode before the next unsafe attempt runs. $165.70 on your dime, you're in the right place. Martin stopped him at $40.97 with a full audit trail.
86
89
 
87
90
  <div align="center">
88
91
  <img src="./docs/assets/martin-raplph.png.jpg" alt="Martin vs Ralph — governed vs ungoverned agent loop" width="240">
@@ -119,6 +122,8 @@ pnpm --filter @martin/benchmarks eval
119
122
  pnpm --filter @martin/benchmarks eval:phase12
120
123
  ```
121
124
 
125
+ Challenge page: [Can your AI coding agent finish this task under $3?](./docs/distribution/UNDER-3-CHALLENGE.md)
126
+
122
127
  ---
123
128
 
124
129
  ## Quick Start
@@ -127,7 +132,9 @@ pnpm --filter @martin/benchmarks eval:phase12
127
132
  npm install -g martin-loop
128
133
  ```
129
134
 
130
- This installs both the `martin-loop` package and the `martin` command alias. The package is currently published on npm as version `0.1.2`.
135
+ This installs both the `martin-loop` package and the `martin` command alias. The package is currently published on npm as version `0.1.4`.
136
+
137
+ Want a safe sandbox first? Run `npx martin-loop demo` and MartinLoop will copy a disposable local workspace into `./martin-loop-demo`.
131
138
 
132
139
  ### Public Package Surface
133
140
 
@@ -136,8 +143,23 @@ The frozen public package surface for this release candidate is:
136
143
  - Install target: `npm install martin-loop`
137
144
  - CLI target: `npx martin-loop`
138
145
  - SDK target: `import { MartinLoop } from "martin-loop"`
146
+ - MCP target (registry-ready package): `npx -y @martinloop/mcp`
139
147
 
140
148
  The `martin` command alias is installed for local operator convenience, but the public CLI surface is `npx martin-loop`.
149
+ The standalone MCP server package is smoke-validated locally with `pnpm --filter @martinloop/mcp smoke:pack` and is ready for registry publication as a separate release step.
150
+
151
+ ### Claude Code MCP install
152
+
153
+ Use the published MCP package directly:
154
+
155
+ - macOS/Linux: `claude mcp add --scope user martin-loop -- npx -y @martinloop/mcp`
156
+ - Windows PowerShell/cmd: `claude mcp add --scope user martin-loop -- cmd /c "npx -y @martinloop/mcp"`
157
+
158
+ If you just want to launch the server manually, the one-line command is:
159
+
160
+ ```sh
161
+ npx @martinloop/mcp
162
+ ```
141
163
 
142
164
  ### Run a governed task
143
165
 
@@ -196,7 +218,7 @@ martin run <objective> [options]
196
218
  --metadata <key=value> Attach metadata to the run record; repeatable
197
219
  ```
198
220
 
199
- The public CLI also includes `inspect`, `resume`, and a `bench` redirect that points reviewers to the workspace benchmark harness.
221
+ The public CLI also includes `demo`, `inspect`, `resume`, and a `bench` redirect that points reviewers to the workspace benchmark harness.
200
222
 
201
223
  <div align="center">
202
224
  <img src="./docs/assets/cli-static.svg" alt="MartinLoop CLI terminal output" width="720">
@@ -287,20 +309,22 @@ The lower-level `runMartin` function is also exported for callers that want to a
287
309
  | `@martin/core` | Runtime controller, policy engine, safety leash, grounding, persistence, and rollback logic. |
288
310
  | `@martin/adapters` | Claude CLI, Codex CLI, direct-provider, and stub adapter surfaces. |
289
311
  | `@martin/cli` | Local CLI implementation for `run`, `inspect`, `resume`, and the benchmark redirect. |
290
- | `@martin/mcp` | MCP server tools: `martin_run`, `martin_inspect`, and `martin_status`. |
312
+ | `@martinloop/mcp` | MCP server tools: `martin_run`, `martin_inspect`, and `martin_status`. |
291
313
  | `benchmarks/` | Workspace-only deterministic benchmark and RC validation harness. |
292
314
  | `apps/control-plane/` | Hosted control-plane workstream, outside the initial npm package surface. |
293
315
  | `apps/local-dashboard/` | Local dashboard/read-model viewer, not currently packaged as public npm API. |
294
316
 
295
- The `@martin/core`, `@martin/adapters`, and `@martin/contracts` package manifests are still private workspace packages; the public install target is the root `martin-loop` facade.
317
+ The `@martin/core`, `@martin/adapters`, and `@martin/contracts` package manifests are still private workspace packages. The public runtime install target is the root `martin-loop` facade, while `@martinloop/mcp` is packaged as a standalone MCP server with vendored internal runtime dependencies for registry publication.
296
318
 
297
319
  ---
298
-
299
320
  ## Development
300
321
 
301
- Requirements: Node 20+ and pnpm 10.x.
322
+ Requirements:
302
323
 
303
- ```sh
324
+ - Node 20+
325
+ - pnpm 10.x
326
+
327
+ ```bash
304
328
  git clone https://github.com/Keesan12/martin-loop.git
305
329
  cd martin-loop
306
330
  pnpm install
@@ -308,9 +332,9 @@ pnpm install
308
332
  pnpm test
309
333
  pnpm lint
310
334
  pnpm build
335
+
311
336
  ```
312
337
 
313
- ```md
314
338
  Current RC gate commands:
315
339
 
316
340
  ```sh
@@ -320,9 +344,7 @@ pnpm repo:smoke
320
344
  pnpm rc:validate
321
345
  pnpm pilot:prep:validate
322
346
  pnpm release:matrix:local
323
- Caution: Registry Publication
324
-
325
- This package is published through the public martin-loop package surface. Treat registry publication as a guarded release step: verify the RC gate commands, confirm the version follows semantic versioning, and document breaking changes before publishing.
347
+ ```
326
348
 
327
349
  > **Caution:** This package is live on npm. Treat registry publication as a guarded release step — verify the RC gate commands, confirm semantic versioning, and document breaking changes before publishing.
328
350
 
@@ -332,6 +354,11 @@ Helpful docs:
332
354
 
333
355
  - [OSS quickstart](./docs/oss/QUICKSTART.md)
334
356
  - [OSS examples](./docs/oss/EXAMPLES.md)
357
+ - [Under-$3 benchmark challenge](./docs/distribution/UNDER-3-CHALLENGE.md)
358
+ - [Directory submission pack](./docs/distribution/DIRECTORY-SUBMISSIONS.md)
359
+ - [Integration outreach pack](./docs/distribution/INTEGRATION-OUTREACH.md)
360
+ - [Claude Code walkthrough](./docs/oss/CLAUDE-CODE-WALKTHROUGH.md)
361
+ - [Ralph-style loop safety guide](./docs/oss/RALPH-LOOP-SAFETY.md)
335
362
  - [OSS boundary report](./docs/oss/OSS-BOUNDARY-REPORT.md)
336
363
  - [Release surface report](./docs/oss/RELEASE-SURFACE-REPORT.md)
337
364
 
@@ -360,3 +387,12 @@ Conventional commit prefixes: `feat:`, `fix:`, `chore:`, `docs:`, `refactor:`, a
360
387
  *"AI coding accountability: completes good work, refuses unsafe work, stops uneconomical work."*
361
388
 
362
389
  </div>
390
+
391
+ <div align="center">
392
+ <br>
393
+ <picture>
394
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/nvidia-inception-program.png">
395
+ <img src="https://raw.githubusercontent.com/Keesan12/martin-loop/main/docs/assets/nvidia-inception-program-light.png" alt="NVIDIA Inception Program logo" width="280">
396
+ </picture>
397
+ <br>
398
+ </div>
@@ -0,0 +1,35 @@
1
+ # MartinLoop Demo Sandbox
2
+
3
+ This workspace is the safe public demo copied by `martin-loop demo`.
4
+
5
+ It is intentionally small:
6
+
7
+ - `npm test` is green out of the box
8
+ - `martin.config.yaml` keeps the budget tiny
9
+ - the first suggested MartinLoop run can stay in stub mode with `MARTIN_LIVE=false`
10
+
11
+ ## Files
12
+
13
+ - `src/invoice-summary.js`: tiny module used by the demo task
14
+ - `test/invoice-summary.test.js`: Node test suite
15
+ - `TASKS.md`: suggested objectives for a stub-safe run or a live adapter run
16
+ - `martin.config.yaml`: low-risk governance defaults
17
+
18
+ ## Suggested flow
19
+
20
+ ```sh
21
+ npm install
22
+ npm test
23
+ ```
24
+
25
+ Safe first run:
26
+
27
+ ```sh
28
+ MARTIN_LIVE=false npx martin-loop run "Summarize the demo workspace and confirm the verifier is green" --verify "npm test"
29
+ ```
30
+
31
+ Optional live run:
32
+
33
+ ```sh
34
+ npx martin-loop run "Add support for a discount percentage to summarizeInvoice and update the tests" --verify "npm test" --engine codex
35
+ ```
@@ -0,0 +1,29 @@
1
+ # Suggested Demo Tasks
2
+
3
+ ## Stub-safe first run
4
+
5
+ Use this when you want to see MartinLoop create a governed run record without spending provider budget:
6
+
7
+ ```text
8
+ Summarize the demo workspace, confirm the verifier command is green, and explain the safest next change to make.
9
+ ```
10
+
11
+ Verifier:
12
+
13
+ ```sh
14
+ npm test
15
+ ```
16
+
17
+ ## Optional live run
18
+
19
+ Use this when you want a real coding task in the sandbox:
20
+
21
+ ```text
22
+ Add support for a discount percentage to summarizeInvoice and update the tests while keeping the existing tax behavior intact.
23
+ ```
24
+
25
+ Verifier:
26
+
27
+ ```sh
28
+ npm test
29
+ ```
@@ -0,0 +1,11 @@
1
+ policyProfile: strict_local
2
+ budget:
3
+ maxUsd: 2
4
+ softLimitUsd: 1
5
+ maxIterations: 2
6
+ maxTokens: 12000
7
+ governance:
8
+ destructiveActionPolicy: approval
9
+ telemetryDestination: local-only
10
+ verifierRules:
11
+ - npm test
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "martin-loop-demo-sandbox",
3
+ "private": true,
4
+ "type": "module",
5
+ "scripts": {
6
+ "test": "node --test"
7
+ }
8
+ }
@@ -0,0 +1,11 @@
1
+ export function summarizeInvoice(items, taxRate = 0) {
2
+ const subtotal = items.reduce((sum, item) => sum + item.quantity * item.unitPrice, 0);
3
+ const tax = Number((subtotal * taxRate).toFixed(2));
4
+ const total = Number((subtotal + tax).toFixed(2));
5
+
6
+ return {
7
+ subtotal: Number(subtotal.toFixed(2)),
8
+ tax,
9
+ total
10
+ };
11
+ }
@@ -0,0 +1,20 @@
1
+ import test from "node:test";
2
+ import assert from "node:assert/strict";
3
+
4
+ import { summarizeInvoice } from "../src/invoice-summary.js";
5
+
6
+ test("summarizeInvoice returns subtotal, tax, and total", () => {
7
+ const result = summarizeInvoice(
8
+ [
9
+ { quantity: 2, unitPrice: 19.99 },
10
+ { quantity: 1, unitPrice: 5.5 }
11
+ ],
12
+ 0.13
13
+ );
14
+
15
+ assert.deepEqual(result, {
16
+ subtotal: 45.48,
17
+ tax: 5.91,
18
+ total: 51.39
19
+ });
20
+ });
@@ -15,15 +15,18 @@ import type { MartinAdapter } from "../core/index.js";
15
15
  import { type SpawnLike } from "./cli-bridge.js";
16
16
  /**
17
17
  * Given a prompt string, returns the full argv array to pass to spawn().
18
- * Example for Claude: (p) => ["--print", p, "--dangerously-skip-permissions"]
19
- * Example for Codex: (p) => ["--full-auto", p]
18
+ * Example for Claude: () => ["--output-format", "json", "--print"]
19
+ * Example for Codex: () => ["exec", "--sandbox", "workspace-write", "-"]
20
20
  */
21
21
  export type CliArgsBuilder = (prompt: string) => string[];
22
+ export type CliStdinBuilder = (prompt: string) => string | undefined;
22
23
  export interface AgentCliAdapterOptions {
23
24
  /** The executable to spawn (e.g. "claude", "codex"). */
24
25
  command: string;
25
26
  /** Converts a prompt string into the argv array passed to spawn(). */
26
27
  argsBuilder: CliArgsBuilder;
28
+ /** Optional stdin payload for CLIs that accept prompt input via stdin or `-`. */
29
+ stdinBuilder?: CliStdinBuilder;
27
30
  /** Adapter ID suffix. Defaults to command. */
28
31
  adapterIdSuffix?: string;
29
32
  /** Working directory for all subprocesses. Defaults to process.cwd(). */
@@ -63,8 +66,16 @@ export interface CodexCliAdapterOptions {
63
66
  label?: string;
64
67
  /** Override the model passed via --model flag. */
65
68
  model?: string;
66
- /** Run in full-auto mode (--full-auto). Defaults to true. */
69
+ /**
70
+ * Deprecated no-op retained for compatibility.
71
+ *
72
+ * Codex CLI's supported non-interactive entrypoint is `codex exec`.
73
+ * MartinLoop now uses explicit sandboxing instead of the legacy
74
+ * `--full-auto` compatibility path, which can exit before verifier execution.
75
+ */
67
76
  fullAuto?: boolean;
77
+ /** Codex sandbox mode for model-generated commands. Defaults to workspace-write. */
78
+ sandbox?: "read-only" | "workspace-write" | "danger-full-access";
68
79
  /** Extra args appended after core args (before prompt). */
69
80
  extraArgs?: string[];
70
81
  spawnImpl?: SpawnLike;
@@ -81,7 +92,11 @@ export declare function createAgentCliAdapter(options: AgentCliAdapterOptions):
81
92
  */
82
93
  export declare function createClaudeCliAdapter(options?: ClaudeCliAdapterOptions): MartinAdapter;
83
94
  /**
84
- * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
95
+ * Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
96
+ *
97
+ * The prompt is delivered via stdin so Windows shell quoting cannot truncate or
98
+ * reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
99
+ * context.
85
100
  *
86
101
  * Requires the Codex CLI to be installed and authenticated:
87
102
  * npm install -g @openai/codex
@@ -129,15 +129,12 @@ export function createAgentCliAdapter(options) {
129
129
  }
130
130
  }
131
131
  const args = options.argsBuilder(prompt);
132
- // stdinPrompt: if argsBuilder signals stdin delivery by returning args ending with "--stdin-prompt",
133
- // remove that sentinel and pass the prompt via stdin instead (avoids Windows shell-escaping issues).
134
- const useStdin = args.at(-1) === "--stdin-prompt";
135
- const spawnArgs = useStdin ? args.slice(0, -1) : args;
136
- const agentResult = await runSubprocess(options.command, spawnArgs, {
132
+ const stdinData = options.stdinBuilder?.(prompt);
133
+ const agentResult = await runSubprocess(options.command, args, {
137
134
  cwd: workingDirectory,
138
135
  timeoutMs,
139
136
  spawnImpl: options.spawnImpl,
140
- ...(useStdin ? { stdinData: prompt } : {})
137
+ ...(stdinData === undefined ? {} : { stdinData })
141
138
  });
142
139
  if (agentResult.timedOut) {
143
140
  return {
@@ -157,18 +154,19 @@ export function createAgentCliAdapter(options) {
157
154
  };
158
155
  }
159
156
  if (agentResult.exitCode !== 0 && agentResult.stdout.trim().length === 0) {
157
+ const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr, agentResult.exitCode);
160
158
  return {
161
159
  status: "failed",
162
- summary: `${options.command} subprocess exited with an error.`,
160
+ summary: `${options.command} subprocess exited before verifier execution.`,
163
161
  usage: normalizeUsage({
164
162
  actualUsd: 0,
165
163
  tokensIn: 0,
166
164
  tokensOut: 0,
167
165
  provenance: "unavailable"
168
166
  }),
169
- verification: { passed: false, summary: "Subprocess error." },
167
+ verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
170
168
  failure: {
171
- message: `${agentResult.stderr.trim() || `Exit code ${String(agentResult.exitCode)}`}. environment_mismatch`
169
+ message: failureMessage
172
170
  }
173
171
  };
174
172
  }
@@ -355,40 +353,52 @@ export function createClaudeCliAdapter(options = {}) {
355
353
  "--print",
356
354
  "--dangerously-skip-permissions",
357
355
  ...modelArgs,
358
- ...extraArgs,
359
- "--stdin-prompt" // sentinel: tells execute() to deliver prompt via stdin
360
- ]
356
+ ...extraArgs
357
+ ],
358
+ stdinBuilder: (prompt) => prompt
361
359
  });
362
360
  }
363
361
  // ---------------------------------------------------------------------------
364
362
  // Pre-configured: OpenAI Codex CLI
365
363
  // ---------------------------------------------------------------------------
366
364
  /**
367
- * Spawns `codex [--full-auto] [--model <model>] "<prompt>" [extraArgs]`.
365
+ * Spawns `codex exec --cd <workspace> --sandbox <mode> [--model <model>] [extraArgs] -`.
366
+ *
367
+ * The prompt is delivered via stdin so Windows shell quoting cannot truncate or
368
+ * reinterpret long MartinLoop prompts that contain paths, deny rules, or budget
369
+ * context.
368
370
  *
369
371
  * Requires the Codex CLI to be installed and authenticated:
370
372
  * npm install -g @openai/codex
371
373
  */
372
374
  export function createCodexCliAdapter(options = {}) {
373
- const fullAuto = options.fullAuto !== false;
374
375
  const modelArgs = options.model ? ["--model", options.model] : [];
375
376
  const extraArgs = options.extraArgs ?? [];
377
+ const sandbox = options.sandbox ?? "workspace-write";
378
+ const workingDirectory = options.workingDirectory ?? process.cwd();
376
379
  return createAgentCliAdapter({
377
380
  command: "codex",
378
381
  adapterIdSuffix: "codex",
379
382
  model: options.model ?? "codex",
380
383
  label: options.label ?? "Codex CLI adapter",
381
- workingDirectory: options.workingDirectory,
384
+ workingDirectory,
382
385
  timeoutMs: options.timeoutMs,
383
386
  verifyTimeoutMs: options.verifyTimeoutMs,
384
387
  supportsJsonOutput: false,
385
388
  spawnImpl: options.spawnImpl,
386
- argsBuilder: (prompt) => [
387
- ...(fullAuto ? ["--full-auto"] : []),
389
+ argsBuilder: () => [
390
+ "exec",
391
+ "--cd",
392
+ workingDirectory,
393
+ "--sandbox",
394
+ sandbox,
395
+ "--color",
396
+ "never",
388
397
  ...modelArgs,
389
- prompt,
390
- ...extraArgs
391
- ]
398
+ ...extraArgs,
399
+ "-"
400
+ ],
401
+ stdinBuilder: (prompt) => prompt
392
402
  });
393
403
  }
394
404
  // ---------------------------------------------------------------------------
@@ -402,14 +412,23 @@ export function createCodexCliAdapter(options = {}) {
402
412
  // ---------------------------------------------------------------------------
403
413
  function buildPrompt(request) {
404
414
  const lines = [];
415
+ const mutationMode = request.context.mutationMode ?? "edit";
405
416
  lines.push("You are running in autonomous agentic mode.");
406
- lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
407
- lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
417
+ if (mutationMode === "verify_only") {
418
+ lines.push("DO NOT EDIT FILES. Run the verifier only and report whether it passes.");
419
+ lines.push("Do not ask for confirmation. Do not ask clarifying questions.");
420
+ }
421
+ else {
422
+ lines.push("MAKE ALL REQUIRED FILE EDITS NOW. Do not ask for confirmation. Do not ask clarifying questions.");
423
+ lines.push("Do not explain what you found without also making the changes. Edit the files and complete the task.");
424
+ }
408
425
  lines.push("");
409
426
  lines.push("If PROGRESS.md exists in your working directory, read it first for context from prior attempts.");
410
427
  lines.push("If it does not exist, proceed with the objective below.");
411
428
  lines.push("");
412
- lines.push("Complete the following coding task. Make all necessary file changes.");
429
+ lines.push(mutationMode === "verify_only"
430
+ ? "Complete the following verification-only task without making file changes."
431
+ : "Complete the following coding task. Make all necessary file changes.");
413
432
  lines.push("When you are done, the verification commands listed below must pass.");
414
433
  lines.push("");
415
434
  lines.push("OBJECTIVE:");
@@ -447,7 +466,9 @@ function buildPrompt(request) {
447
466
  lines.push(` Attempt ${String(attemptNumber)}`);
448
467
  lines.push(` Remaining budget: $${String(request.context.remainingBudgetUsd)} USD`);
449
468
  lines.push(` Remaining iterations: ${String(request.context.remainingIterations)}`);
450
- lines.push(" Do not expand scope beyond what is needed to pass verification.");
469
+ lines.push(mutationMode === "verify_only"
470
+ ? " Do not modify files; only run verification."
471
+ : " Do not expand scope beyond what is needed to pass verification.");
451
472
  lines.push("");
452
473
  if (request.previousAttempts.length > 0) {
453
474
  lines.push("PRIOR FAILED ATTEMPTS (learn from these — do not repeat the same mistakes):");
@@ -494,6 +515,16 @@ function truncate(text, maxLength) {
494
515
  }
495
516
  return `...${text.slice(-(maxLength - 3))}`;
496
517
  }
518
+ function formatPreVerifierSubprocessFailure(command, stderr, exitCode) {
519
+ const detail = stderr.trim() || `Exit code ${String(exitCode)}`;
520
+ const lowerDetail = detail.toLowerCase();
521
+ const codexLaunchBlocked = command === "codex" &&
522
+ /\b(full-auto|sandbox|approval|permission|trusted|safety|unexpected argument)\b/u.test(lowerDetail);
523
+ if (codexLaunchBlocked) {
524
+ return `Codex CLI failed before patch completion, likely due to its launch/sandbox configuration. MartinLoop invokes Codex through "codex exec --sandbox workspace-write"; verify Codex CLI auth and configuration if this persists. ${detail}. environment_mismatch`;
525
+ }
526
+ return `${detail}. environment_mismatch`;
527
+ }
497
528
  const INJECTION_PATTERNS = [
498
529
  /\[INST\]/gi,
499
530
  /<\/?system>/gi,
@@ -26,3 +26,4 @@ export declare function readGitExecutionArtifacts(repoRoot: string, timeoutMs: n
26
26
  changedFiles?: string[];
27
27
  diffStats?: ReturnType<typeof diffStatsFromNumstat>;
28
28
  }>;
29
+ export declare function splitCommand(command: string): string[];