@martinloop/mcp 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +5 -4
  2. package/dist/package-version.d.ts +1 -1
  3. package/dist/package-version.js +1 -1
  4. package/dist/server-validation.js +2 -2
  5. package/dist/server.js +72 -10
  6. package/dist/tools/doctor.d.ts +27 -0
  7. package/dist/tools/doctor.js +39 -11
  8. package/dist/tools/get-run.d.ts +2 -1
  9. package/dist/tools/get-run.js +1 -0
  10. package/dist/tools/get-verification-results.d.ts +2 -1
  11. package/dist/tools/get-verification-results.js +1 -0
  12. package/dist/tools/plan.js +4 -2
  13. package/dist/tools/preflight.d.ts +27 -0
  14. package/dist/tools/preflight.js +44 -20
  15. package/dist/tools/run-dossier.d.ts +2 -1
  16. package/dist/tools/run-dossier.js +1 -0
  17. package/dist/tools/run-loop.d.ts +5 -1
  18. package/dist/tools/run-loop.js +20 -8
  19. package/dist/tools/run-store.js +67 -15
  20. package/dist/tools/tool-support.d.ts +2 -0
  21. package/dist/tools/tool-support.js +49 -13
  22. package/dist/tools/workflow-governance.d.ts +19 -3
  23. package/dist/tools/workflow-governance.js +107 -55
  24. package/dist/vendor/adapters/claude-cli.d.ts +20 -3
  25. package/dist/vendor/adapters/claude-cli.js +193 -33
  26. package/dist/vendor/adapters/cli-bridge.d.ts +45 -0
  27. package/dist/vendor/adapters/cli-bridge.js +107 -39
  28. package/dist/vendor/adapters/codex-launcher.d.ts +32 -0
  29. package/dist/vendor/adapters/codex-launcher.js +409 -118
  30. package/dist/vendor/adapters/openai-compatible.js +8 -2
  31. package/dist/vendor/adapters/runtime-support.js +1 -0
  32. package/dist/vendor/adapters/stub-direct-provider.js +3 -0
  33. package/dist/vendor/adapters/verifier-only.d.ts +2 -0
  34. package/dist/vendor/adapters/verifier-only.js +9 -3
  35. package/dist/vendor/contracts/index.d.ts +2 -1
  36. package/dist/vendor/contracts/index.js +14 -0
  37. package/dist/vendor/core/context-integrity.js +28 -3
  38. package/dist/vendor/core/grounding.d.ts +1 -0
  39. package/dist/vendor/core/grounding.js +6 -2
  40. package/dist/vendor/core/index.d.ts +1 -0
  41. package/dist/vendor/core/index.js +25 -6
  42. package/dist/vendor/core/leash.js +90 -8
  43. package/dist/vendor/core/persistence/integrity.d.ts +1 -1
  44. package/dist/vendor/core/persistence/integrity.js +15 -6
  45. package/dist/workflow-state.d.ts +9 -0
  46. package/dist/workflow-state.js +44 -3
  47. package/package.json +2 -2
  48. package/server.json +2 -2
@@ -1,9 +1,17 @@
1
1
  import { createClaudeCliAdapter, createCodexCliAdapter, createGeminiCliAdapter, probeCodexLaunch, resolveCliCommandAvailability, createVerifierOnlyAdapter } from "../vendor/adapters/index.js";
2
2
  import { createFileRunStore, evaluateCostGovernor, resolveRunsRoot, runMartin } from "../vendor/core/index.js";
3
- import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
4
3
  import { normalizeSafePathPatterns, resolveSafeRepoRoot } from "../server-validation.js";
5
4
  import { MartinToolError } from "./tool-errors.js";
6
5
  import { buildArtifactSummary, buildVerificationSummary, buildLoopPreview, buildRunRecordPaths, getEngineAvailability, resolveExecutionMode } from "./tool-support.js";
6
+ import { normalizeLoopBudget } from "./workflow-governance.js";
7
+ let proofModeVerifierSpawnImpl;
8
+ let runStoreOverrideForTests;
9
+ export function __setProofModeVerifierSpawnImplForTests(spawnImpl) {
10
+ proofModeVerifierSpawnImpl = spawnImpl;
11
+ }
12
+ export function __setRunStoreOverrideForTests(store) {
13
+ runStoreOverrideForTests = store;
14
+ }
7
15
  export async function runLoopTool(input) {
8
16
  const workingDirectory = resolveSafeRepoRoot(input.workingDirectory);
9
17
  const engine = input.engine ?? "claude";
@@ -19,6 +27,7 @@ export async function runLoopTool(input) {
19
27
  repoRoot: workingDirectory,
20
28
  runsRoot
21
29
  };
30
+ let codexCommandOverride;
22
31
  if (executionMode.liveMode) {
23
32
  if (engine === "codex") {
24
33
  const engineAvailability = resolveCliCommandAvailability("codex");
@@ -40,6 +49,7 @@ export async function runLoopTool(input) {
40
49
  retryable: false
41
50
  });
42
51
  }
52
+ codexCommandOverride = codexProbe.command;
43
53
  }
44
54
  else {
45
55
  const engineAvailability = getEngineAvailability(engine);
@@ -55,10 +65,15 @@ export async function runLoopTool(input) {
55
65
  const adapter = !executionMode.liveMode
56
66
  ? createVerifierOnlyAdapter({
57
67
  workingDirectory,
58
- label: "Proof mode adapter (MARTIN_LIVE=false)"
68
+ label: "Proof mode adapter (MARTIN_LIVE=false)",
69
+ ...(proofModeVerifierSpawnImpl ? { spawnImpl: proofModeVerifierSpawnImpl } : {})
59
70
  })
60
71
  : engine === "codex"
61
- ? createCodexCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
72
+ ? createCodexCliAdapter({
73
+ workingDirectory,
74
+ ...(model ? { model } : {}),
75
+ ...(codexCommandOverride ? { command: codexCommandOverride } : {})
76
+ })
62
77
  : engine === "gemini"
63
78
  ? createGeminiCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
64
79
  : createClaudeCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
@@ -72,14 +87,11 @@ export async function runLoopTool(input) {
72
87
  if (input.maxTokens !== undefined) {
73
88
  partialBudget.maxTokens = input.maxTokens;
74
89
  }
75
- const budget = {
76
- ...DEFAULT_BUDGET,
77
- ...partialBudget
78
- };
90
+ const budget = normalizeLoopBudget(partialBudget);
79
91
  const result = await runMartin({
80
92
  workspaceId: input.workspaceId ?? "ws_mcp",
81
93
  projectId: input.projectId ?? "proj_mcp",
82
- store: createFileRunStore({ runsRoot }),
94
+ store: runStoreOverrideForTests ?? createFileRunStore({ runsRoot }),
83
95
  receiptScope,
84
96
  task: {
85
97
  title: input.objective.slice(0, 100),
@@ -1,8 +1,60 @@
1
1
  import { readFile, readdir, stat } from "node:fs/promises";
2
2
  import path from "node:path";
3
- import { readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot } from "../vendor/core/index.js";
3
+ import { readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot, verifyReceiptIntegrityFromFiles } from "../vendor/core/index.js";
4
4
  import { resolveSafeLoopRecordPath, resolveSafeRunsJsonPath, resolveSafeRunsPath, resolveSafeRunsRootPath } from "../server-validation.js";
5
5
  import { attemptNotFoundError, invalidSelectorError, noLoopRecordsError, storeUnreadableError } from "./tool-errors.js";
6
+ async function attachReceiptIntegrity(detail) {
7
+ const ledgerPath = detail.canonicalRunDirectory
8
+ ? await resolveReceiptEvidencePath(detail.canonicalRunDirectory)
9
+ : detail.ledgerPath;
10
+ const integrity = detail.canonicalLoopRecordPath && detail.canonicalRunDirectory && ledgerPath
11
+ ? await verifyReceiptIntegrityFromFiles({
12
+ runId: detail.loop.loopId,
13
+ runsRoot: detail.runsRoot,
14
+ loopRecordPath: detail.canonicalLoopRecordPath,
15
+ ledgerPath
16
+ }).catch(() => ({
17
+ state: "unsigned",
18
+ reason: "Receipt integrity verification could not be completed."
19
+ }))
20
+ : ({
21
+ state: "unsigned",
22
+ reason: "Receipt integrity is only available for canonical run directories."
23
+ });
24
+ const receiptScope = resolveReceiptScope(detail.loop, detail.runsRoot);
25
+ return {
26
+ ...detail,
27
+ ...(ledgerPath ? { ledgerPath } : {}),
28
+ loop: {
29
+ ...detail.loop,
30
+ receiptIntegrity: integrity,
31
+ ...(receiptScope ? { receiptScope } : {})
32
+ }
33
+ };
34
+ }
35
+ function resolveReceiptScope(loop, runsRoot) {
36
+ if (loop.receiptScope) {
37
+ return loop.receiptScope;
38
+ }
39
+ if (!loop.task?.repoRoot && !runsRoot) {
40
+ return undefined;
41
+ }
42
+ return {
43
+ ...(loop.task?.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
44
+ ...(loop.task?.repoRoot ? { workingDirectory: loop.task.repoRoot } : {}),
45
+ ...(runsRoot ? { runsRoot } : {})
46
+ };
47
+ }
48
+ async function resolveReceiptEvidencePath(runDirectory) {
49
+ for (const candidate of ["ledger.jsonl", "events.jsonl"]) {
50
+ const candidatePath = path.join(runDirectory, candidate);
51
+ const candidateStats = await safeStat(candidatePath);
52
+ if (candidateStats?.isFile()) {
53
+ return candidatePath;
54
+ }
55
+ }
56
+ return undefined;
57
+ }
6
58
  export async function loadLoopRecordsForInspect(input) {
7
59
  const runsRoot = resolveSafeRunsRootPath(input.runsDir, resolveRunsRoot(process.env));
8
60
  if (!input.file) {
@@ -118,14 +170,14 @@ export async function loadDetailedLoopRecord(input) {
118
170
  const canonicalStats = await safeStat(canonicalLoopRecordPath);
119
171
  if (canonicalStats?.isFile()) {
120
172
  const loop = await readCanonicalLoopRecord(canonicalLoopRecordPath);
121
- return buildDetailedLoopSource({
173
+ return await attachReceiptIntegrity(buildDetailedLoopSource({
122
174
  source: canonicalLoopRecordPath,
123
175
  sourceKind: "file",
124
176
  runsRoot,
125
177
  loop,
126
178
  canonicalLoopRecordPath,
127
179
  canonicalRunDirectory: path.dirname(canonicalLoopRecordPath)
128
- });
180
+ }));
129
181
  }
130
182
  }
131
183
  const inspected = await readAllLoopRecordsSafely(targetPath);
@@ -139,10 +191,10 @@ export async function loadDetailedLoopRecord(input) {
139
191
  runsRoot,
140
192
  loop
141
193
  });
142
- return {
194
+ return await attachReceiptIntegrity({
143
195
  ...detail,
144
196
  warnings: [...detail.warnings, ...inspected.warnings]
145
- };
197
+ });
146
198
  }
147
199
  const latest = await readLatestLoopRecordFromFile(targetPath);
148
200
  if (!latest) {
@@ -150,35 +202,35 @@ export async function loadDetailedLoopRecord(input) {
150
202
  }
151
203
  if (path.basename(targetPath) === "loop-record.json") {
152
204
  const loop = await readCanonicalLoopRecord(targetPath);
153
- return buildDetailedLoopSource({
205
+ return await attachReceiptIntegrity(buildDetailedLoopSource({
154
206
  source: targetPath,
155
207
  sourceKind: "file",
156
208
  runsRoot,
157
209
  loop,
158
210
  canonicalLoopRecordPath: targetPath,
159
211
  canonicalRunDirectory: path.dirname(targetPath)
160
- });
212
+ }));
161
213
  }
162
- return await buildDetailedLoopSourceFromDiscoveredLoop({
214
+ return await attachReceiptIntegrity(await buildDetailedLoopSourceFromDiscoveredLoop({
163
215
  source: targetPath,
164
216
  sourceKind: "file",
165
217
  runsRoot,
166
218
  loop: latest
167
- });
219
+ }));
168
220
  }
169
221
  if (input.loopId) {
170
222
  const canonicalLoopRecordPath = resolvePotentialLoopRecordPath(input.loopId, runsRoot);
171
223
  const canonicalStats = await safeStat(canonicalLoopRecordPath);
172
224
  if (canonicalStats?.isFile()) {
173
225
  const loop = await readCanonicalLoopRecord(canonicalLoopRecordPath);
174
- return buildDetailedLoopSource({
226
+ return await attachReceiptIntegrity(buildDetailedLoopSource({
175
227
  source: canonicalLoopRecordPath,
176
228
  sourceKind: "loop_id",
177
229
  runsRoot,
178
230
  loop,
179
231
  canonicalLoopRecordPath,
180
232
  canonicalRunDirectory: path.dirname(canonicalLoopRecordPath)
181
- });
233
+ }));
182
234
  }
183
235
  const inspected = await readAllLoopRecordsSafely(runsRoot);
184
236
  const loop = inspected.loops.find((candidate) => candidate.loopId === input.loopId);
@@ -191,10 +243,10 @@ export async function loadDetailedLoopRecord(input) {
191
243
  runsRoot,
192
244
  loop
193
245
  });
194
- return {
246
+ return await attachReceiptIntegrity({
195
247
  ...detail,
196
248
  warnings: [...detail.warnings, ...inspected.warnings]
197
- };
249
+ });
198
250
  }
199
251
  const inspected = await readAllLoopRecordsSafely(runsRoot);
200
252
  const loop = inspected.loops[0];
@@ -207,10 +259,10 @@ export async function loadDetailedLoopRecord(input) {
207
259
  runsRoot,
208
260
  loop
209
261
  });
210
- return {
262
+ return await attachReceiptIntegrity({
211
263
  ...detail,
212
264
  warnings: [...detail.warnings, ...inspected.warnings]
213
- };
265
+ });
214
266
  }
215
267
  export async function loadAttemptFromLoop(input) {
216
268
  const detail = await loadDetailedLoopRecord(input);
@@ -89,6 +89,7 @@ export interface CliAvailability {
89
89
  locator: string;
90
90
  detail: string;
91
91
  resolvedPath?: string;
92
+ candidatePaths?: string[];
92
93
  }
93
94
  export interface ExecutionMode {
94
95
  liveMode: boolean;
@@ -110,6 +111,7 @@ export interface CanonicalRunPaths {
110
111
  export declare function resolveExecutionMode(): ExecutionMode;
111
112
  export declare function detectCliAvailability(command: string): CliAvailability;
112
113
  export declare function getEngineAvailability(engine: MartinEngine): CliAvailability;
114
+ export declare function createSkippedCliAvailability(command: string, detail?: string): CliAvailability;
113
115
  export declare function formatUsd(value: number): string;
114
116
  export declare function buildLoopPreview(loop: InspectableLoopRecord): LoopPreview;
115
117
  export declare function buildAttemptSummary(attempt: InspectableLoopAttempt, artifacts?: AttemptArtifactFiles): AttemptSummary;
@@ -1,4 +1,4 @@
1
- import { spawnSync } from "node:child_process";
1
+ import { accessSync, constants } from "node:fs";
2
2
  import { readdir, stat } from "node:fs/promises";
3
3
  import { join } from "node:path";
4
4
  import { evaluateCostGovernor, resolveRunsRoot } from "../vendor/core/index.js";
@@ -23,18 +23,9 @@ export function detectCliAvailability(command) {
23
23
  if (cached && cached.expiresAt > Date.now()) {
24
24
  return cached.value;
25
25
  }
26
- const locator = process.platform === "win32" ? "where.exe" : "which";
27
- const result = spawnSync(locator, [command], {
28
- encoding: "utf8",
29
- stdio: ["ignore", "pipe", "pipe"]
30
- });
31
- const resolvedPath = result.status === 0
32
- ? (result.stdout ?? "")
33
- .split(/\r?\n/u)
34
- .map((line) => line.trim())
35
- .find(Boolean)
36
- : undefined;
37
- const value = result.status === 0
26
+ const locator = process.platform === "win32" ? "path-scan(win32)" : "path-scan(posix)";
27
+ const resolvedPath = findCommandOnPath(command);
28
+ const value = resolvedPath
38
29
  ? {
39
30
  command,
40
31
  available: true,
@@ -54,9 +45,54 @@ export function detectCliAvailability(command) {
54
45
  });
55
46
  return value;
56
47
  }
48
+ function findCommandOnPath(command) {
49
+ const pathKey = Object.keys(process.env).find((key) => key.toLowerCase() === "path");
50
+ const rawPath = pathKey ? process.env[pathKey] : undefined;
51
+ if (!rawPath) {
52
+ return undefined;
53
+ }
54
+ const pathEntries = rawPath
55
+ .split(process.platform === "win32" ? ";" : ":")
56
+ .map((entry) => entry.trim())
57
+ .filter(Boolean);
58
+ const hasExtension = /\.[A-Za-z0-9]+$/u.test(command);
59
+ const candidateNames = process.platform === "win32" && !hasExtension
60
+ ? (process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD")
61
+ .split(";")
62
+ .map((extension) => extension.trim())
63
+ .filter(Boolean)
64
+ .map((extension) => `${command}${extension.toLowerCase()}`)
65
+ : [command];
66
+ for (const directory of pathEntries) {
67
+ for (const candidateName of candidateNames) {
68
+ const candidatePath = join(directory, candidateName);
69
+ if (isExecutablePath(candidatePath)) {
70
+ return candidatePath;
71
+ }
72
+ }
73
+ }
74
+ return undefined;
75
+ }
76
+ function isExecutablePath(candidatePath) {
77
+ try {
78
+ accessSync(candidatePath, process.platform === "win32" ? constants.F_OK : constants.X_OK);
79
+ return true;
80
+ }
81
+ catch {
82
+ return false;
83
+ }
84
+ }
57
85
  export function getEngineAvailability(engine) {
58
86
  return detectCliAvailability(engine);
59
87
  }
88
+ export function createSkippedCliAvailability(command, detail = "Proof mode skipped live CLI availability detection.") {
89
+ return {
90
+ command,
91
+ available: false,
92
+ locator: "skipped",
93
+ detail
94
+ };
95
+ }
60
96
  export function formatUsd(value) {
61
97
  return `$${value.toFixed(2)}`;
62
98
  }
@@ -1,3 +1,4 @@
1
+ import { type LoopBudget } from "../vendor/contracts/index.js";
1
2
  import { type RunStoreInspection } from "./tool-support.js";
2
3
  export type MartinPolicyPack = "solo-founder" | "startup-team" | "enterprise-strict" | "oss-maintainer" | "security-sensitive";
3
4
  export interface RepoGitState {
@@ -23,6 +24,7 @@ export interface RepoSignals {
23
24
  packageScripts: Record<string, string>;
24
25
  git: RepoGitState;
25
26
  sensitivePaths: string[];
27
+ hostAvailabilityChecked: boolean;
26
28
  availableHosts: Record<"claude" | "codex" | "cursor" | "gemini", {
27
29
  available: boolean;
28
30
  detail: string;
@@ -110,11 +112,25 @@ interface ContractOverrides {
110
112
  maxFilesChanged?: number;
111
113
  maxCommands?: number;
112
114
  }
113
- export declare function inspectRepoSignals(workingDirectory: string): RepoSignals;
115
+ interface LoopBudgetOverrides {
116
+ maxUsd?: number;
117
+ softLimitUsd?: number;
118
+ maxIterations?: number;
119
+ maxTokens?: number;
120
+ }
121
+ export declare function inspectRepoSignals(workingDirectory: string, options?: {
122
+ includeHostAvailability?: boolean;
123
+ }): RepoSignals;
114
124
  export declare function buildReadinessReport(signals: RepoSignals, runStore: RunStoreInspection): MartinReadinessReport;
115
125
  export declare function buildPolicyPackDefinition(policyPack: MartinPolicyPack | undefined, signals: RepoSignals): MartinPolicyPackDefinition;
116
- export declare function buildPlanProposal(workingDirectory: string, overrides: ContractOverrides): MartinPlanProposal;
117
- export declare function buildRunContract(workingDirectory: string, overrides: ContractOverrides): MartinRunContract;
126
+ export declare function buildPlanProposal(workingDirectory: string, overrides: ContractOverrides, options?: {
127
+ signals?: RepoSignals;
128
+ }): MartinPlanProposal;
129
+ export declare function buildRunContract(workingDirectory: string, overrides: ContractOverrides, options?: {
130
+ signals?: RepoSignals;
131
+ plan?: MartinPlanProposal;
132
+ }): MartinRunContract;
133
+ export declare function normalizeLoopBudget(overrides?: LoopBudgetOverrides): LoopBudget;
118
134
  export declare function assessRunRisk(input: {
119
135
  objective: string;
120
136
  context?: string;
@@ -2,13 +2,17 @@ import { existsSync, readFileSync } from "node:fs";
2
2
  import path from "node:path";
3
3
  import { spawnSync } from "node:child_process";
4
4
  import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
5
- import { detectCliAvailability } from "./tool-support.js";
5
+ import { createSkippedCliAvailability, detectCliAvailability } from "./tool-support.js";
6
6
  const HOST_COMMANDS = {
7
7
  claude: "claude",
8
8
  codex: "codex",
9
9
  cursor: "cursor",
10
10
  gemini: "gemini"
11
11
  };
12
+ const REPO_SIGNALS_CACHE_TTL_MS = 5_000;
13
+ const repoSignalsCache = new Map();
14
+ const GIT_STATE_CACHE_TTL_MS = 60_000;
15
+ const repoGitStateCache = new Map();
12
16
  const POLICY_PACKS = {
13
17
  "solo-founder": {
14
18
  name: "solo-founder",
@@ -110,13 +114,19 @@ const POLICY_PACKS = {
110
114
  requireApprovalAtOrAbove: "medium"
111
115
  }
112
116
  };
113
- export function inspectRepoSignals(workingDirectory) {
117
+ export function inspectRepoSignals(workingDirectory, options = {}) {
118
+ const includeHostAvailability = options.includeHostAvailability ?? true;
119
+ const cacheKey = `${workingDirectory}::hosts=${includeHostAvailability ? "live" : "skipped"}`;
120
+ const cached = repoSignalsCache.get(cacheKey);
121
+ if (cached && cached.expiresAt > Date.now()) {
122
+ return cached.value;
123
+ }
114
124
  const packageScripts = readPackageScripts(workingDirectory);
115
125
  const packageManager = detectPackageManager(workingDirectory);
116
126
  const frameworks = detectFrameworks(workingDirectory, packageScripts);
117
127
  const languages = detectLanguages(workingDirectory, frameworks);
118
128
  const verifiers = detectVerifierCommands(packageScripts, packageManager);
119
- return {
129
+ const signals = {
120
130
  workingDirectory,
121
131
  packageManager,
122
132
  languages,
@@ -125,13 +135,27 @@ export function inspectRepoSignals(workingDirectory) {
125
135
  packageScripts,
126
136
  git: detectGitState(workingDirectory),
127
137
  sensitivePaths: detectSensitivePaths(workingDirectory),
138
+ hostAvailabilityChecked: includeHostAvailability,
128
139
  availableHosts: {
129
- claude: detectCliAvailability(HOST_COMMANDS.claude),
130
- codex: detectCliAvailability(HOST_COMMANDS.codex),
131
- cursor: detectCliAvailability(HOST_COMMANDS.cursor),
132
- gemini: detectCliAvailability(HOST_COMMANDS.gemini)
140
+ claude: includeHostAvailability
141
+ ? detectCliAvailability(HOST_COMMANDS.claude)
142
+ : createSkippedCliAvailability(HOST_COMMANDS.claude),
143
+ codex: includeHostAvailability
144
+ ? detectCliAvailability(HOST_COMMANDS.codex)
145
+ : createSkippedCliAvailability(HOST_COMMANDS.codex),
146
+ cursor: includeHostAvailability
147
+ ? detectCliAvailability(HOST_COMMANDS.cursor)
148
+ : createSkippedCliAvailability(HOST_COMMANDS.cursor),
149
+ gemini: includeHostAvailability
150
+ ? detectCliAvailability(HOST_COMMANDS.gemini)
151
+ : createSkippedCliAvailability(HOST_COMMANDS.gemini)
133
152
  }
134
153
  };
154
+ repoSignalsCache.set(cacheKey, {
155
+ expiresAt: Date.now() + REPO_SIGNALS_CACHE_TTL_MS,
156
+ value: signals
157
+ });
158
+ return signals;
135
159
  }
136
160
  export function buildReadinessReport(signals, runStore) {
137
161
  const missingSafeguards = [];
@@ -155,7 +179,9 @@ export function buildReadinessReport(signals, runStore) {
155
179
  if (signals.frameworks.length === 0) {
156
180
  score -= 8;
157
181
  }
158
- if (!signals.availableHosts.claude.available && !signals.availableHosts.codex.available) {
182
+ if (signals.hostAvailabilityChecked &&
183
+ !signals.availableHosts.claude.available &&
184
+ !signals.availableHosts.codex.available) {
159
185
  score -= 18;
160
186
  }
161
187
  score = Math.max(0, Math.min(100, score));
@@ -189,8 +215,8 @@ export function buildPolicyPackDefinition(policyPack, signals) {
189
215
  : fallbackVerifierPlan(signals.packageManager)
190
216
  };
191
217
  }
192
- export function buildPlanProposal(workingDirectory, overrides) {
193
- const signals = inspectRepoSignals(workingDirectory);
218
+ export function buildPlanProposal(workingDirectory, overrides, options = {}) {
219
+ const signals = options.signals ?? inspectRepoSignals(workingDirectory);
194
220
  const policy = buildPolicyPackDefinition(overrides.policyPack, signals);
195
221
  const scope = inferScopeFromObjective(overrides.objective, policy, overrides);
196
222
  const estimatedBudget = buildBudget(overrides, signals);
@@ -223,8 +249,8 @@ export function buildPlanProposal(workingDirectory, overrides) {
223
249
  ]
224
250
  };
225
251
  }
226
- export function buildRunContract(workingDirectory, overrides) {
227
- const plan = buildPlanProposal(workingDirectory, overrides);
252
+ export function buildRunContract(workingDirectory, overrides, options = {}) {
253
+ const plan = options.plan ?? buildPlanProposal(workingDirectory, overrides, options);
228
254
  return {
229
255
  objective: overrides.objective,
230
256
  ...(overrides.context ? { context: overrides.context } : {}),
@@ -238,6 +264,16 @@ export function buildRunContract(workingDirectory, overrides) {
238
264
  shouldRequireApproval(plan.policyPack.requireApprovalAtOrAbove, plan.risk.level)
239
265
  };
240
266
  }
267
+ export function normalizeLoopBudget(overrides = {}) {
268
+ const maxUsd = overrides.maxUsd ?? DEFAULT_BUDGET.maxUsd;
269
+ const softLimitUsd = Math.min(overrides.softLimitUsd ?? DEFAULT_BUDGET.softLimitUsd, maxUsd);
270
+ return {
271
+ maxUsd,
272
+ softLimitUsd,
273
+ maxIterations: overrides.maxIterations ?? DEFAULT_BUDGET.maxIterations,
274
+ maxTokens: overrides.maxTokens ?? DEFAULT_BUDGET.maxTokens
275
+ };
276
+ }
241
277
  export function assessRunRisk(input) {
242
278
  const reasons = [];
243
279
  let score = 12;
@@ -406,17 +442,10 @@ function detectVerifierCommands(scripts, packageManager) {
406
442
  return { test, lint, build, defaultPlan };
407
443
  }
408
444
  function detectGitState(workingDirectory) {
409
- const availability = spawnSync("git", ["--version"], {
410
- cwd: workingDirectory,
411
- encoding: "utf8",
412
- stdio: ["ignore", "pipe", "pipe"]
413
- });
414
- if (availability.status !== 0) {
415
- return {
416
- available: false,
417
- isRepo: false,
418
- clean: false
419
- };
445
+ const cacheKey = workingDirectory;
446
+ const cached = repoGitStateCache.get(cacheKey);
447
+ if (cached && cached.expiresAt > Date.now()) {
448
+ return cached.value;
420
449
  }
421
450
  const isRepo = spawnSync("git", ["rev-parse", "--is-inside-work-tree"], {
422
451
  cwd: workingDirectory,
@@ -424,18 +453,29 @@ function detectGitState(workingDirectory) {
424
453
  stdio: ["ignore", "pipe", "pipe"]
425
454
  });
426
455
  if (isRepo.status !== 0 || !/true/u.test(isRepo.stdout ?? "")) {
427
- return {
428
- available: true,
429
- isRepo: false,
430
- clean: false
431
- };
456
+ const availability = spawnSync("git", ["--version"], {
457
+ cwd: workingDirectory,
458
+ encoding: "utf8",
459
+ stdio: ["ignore", "pipe", "pipe"]
460
+ });
461
+ const value = availability.status !== 0
462
+ ? {
463
+ available: false,
464
+ isRepo: false,
465
+ clean: false
466
+ }
467
+ : {
468
+ available: true,
469
+ isRepo: false,
470
+ clean: false
471
+ };
472
+ repoGitStateCache.set(cacheKey, {
473
+ expiresAt: Date.now() + GIT_STATE_CACHE_TTL_MS,
474
+ value
475
+ });
476
+ return value;
432
477
  }
433
- const branch = spawnSync("git", ["branch", "--show-current"], {
434
- cwd: workingDirectory,
435
- encoding: "utf8",
436
- stdio: ["ignore", "pipe", "pipe"]
437
- }).stdout.trim();
438
- const status = spawnSync("git", ["status", "--porcelain", "--branch"], {
478
+ const status = spawnSync("git", ["status", "--porcelain=v2", "--branch", "--untracked-files=normal", "--ignored=no", "--", "."], {
439
479
  cwd: workingDirectory,
440
480
  encoding: "utf8",
441
481
  stdio: ["ignore", "pipe", "pipe"]
@@ -444,20 +484,42 @@ function detectGitState(workingDirectory) {
444
484
  .split(/\r?\n/u)
445
485
  .map((line) => line.trim())
446
486
  .filter(Boolean);
447
- const dirty = statusLines.some((line) => !line.startsWith("##"));
448
- const header = statusLines.find((line) => line.startsWith("##"));
449
- const upstream = header?.match(/\.\.\.([^\s[]+)/u)?.[1];
450
- const ahead = parseCount(header, /ahead (\d+)/u);
451
- const behind = parseCount(header, /behind (\d+)/u);
452
- return {
487
+ const dirty = statusLines.some((line) => !line.startsWith("#"));
488
+ const branch = statusLines
489
+ .find((line) => line.startsWith("# branch.head "))
490
+ ?.replace("# branch.head ", "")
491
+ .trim();
492
+ const upstream = statusLines
493
+ .find((line) => line.startsWith("# branch.upstream "))
494
+ ?.replace("# branch.upstream ", "")
495
+ .trim();
496
+ const aheadBehind = statusLines
497
+ .find((line) => line.startsWith("# branch.ab "))
498
+ ?.replace("# branch.ab ", "")
499
+ .trim()
500
+ .split(/\s+/u);
501
+ const aheadToken = aheadBehind?.find((token) => token.startsWith("+"));
502
+ const behindToken = aheadBehind?.find((token) => token.startsWith("-"));
503
+ const ahead = aheadToken && aheadToken.length > 1
504
+ ? Number.parseInt(aheadToken.slice(1), 10)
505
+ : undefined;
506
+ const behind = behindToken && behindToken.length > 1
507
+ ? Number.parseInt(behindToken.slice(1), 10)
508
+ : undefined;
509
+ const value = {
453
510
  available: true,
454
511
  isRepo: true,
455
512
  clean: !dirty,
456
- ...(branch ? { branch } : {}),
513
+ ...(branch && branch !== "(detached)" ? { branch } : {}),
457
514
  ...(upstream ? { upstream } : {}),
458
- ...(ahead !== undefined ? { ahead } : {}),
459
- ...(behind !== undefined ? { behind } : {})
515
+ ...(Number.isFinite(ahead) ? { ahead } : {}),
516
+ ...(Number.isFinite(behind) ? { behind } : {})
460
517
  };
518
+ repoGitStateCache.set(cacheKey, {
519
+ expiresAt: Date.now() + GIT_STATE_CACHE_TTL_MS,
520
+ value
521
+ });
522
+ return value;
461
523
  }
462
524
  function detectSensitivePaths(workingDirectory) {
463
525
  const candidates = [
@@ -520,11 +582,9 @@ function inferScopeFromObjective(objective, policy, overrides) {
520
582
  }
521
583
  function buildBudget(overrides, signals) {
522
584
  const defaultCommands = signals.verifiers.defaultPlan.length > 0 ? 12 : 8;
585
+ const normalizedBudget = normalizeLoopBudget(overrides);
523
586
  return {
524
- maxUsd: overrides.maxUsd ?? DEFAULT_BUDGET.maxUsd,
525
- softLimitUsd: Math.min(overrides.maxUsd ?? DEFAULT_BUDGET.maxUsd, DEFAULT_BUDGET.softLimitUsd),
526
- maxIterations: overrides.maxIterations ?? DEFAULT_BUDGET.maxIterations,
527
- maxTokens: overrides.maxTokens ?? DEFAULT_BUDGET.maxTokens,
587
+ ...normalizedBudget,
528
588
  maxMinutes: overrides.maxMinutes ?? 20,
529
589
  maxFilesChanged: overrides.maxFilesChanged ?? 8,
530
590
  maxCommands: overrides.maxCommands ?? defaultCommands
@@ -571,11 +631,3 @@ function shouldRequireApproval(threshold, level) {
571
631
  const ordering = ["low", "medium", "high"];
572
632
  return ordering.indexOf(level) >= ordering.indexOf(threshold);
573
633
  }
574
- function parseCount(value, pattern) {
575
- const match = value?.match(pattern)?.[1];
576
- if (!match) {
577
- return undefined;
578
- }
579
- const parsed = Number.parseInt(match, 10);
580
- return Number.isFinite(parsed) ? parsed : undefined;
581
- }
@@ -45,6 +45,15 @@ export interface AgentCliAdapterOptions {
45
45
  * Defaults to true for Claude.
46
46
  */
47
47
  supportsJsonOutput?: boolean;
48
+ /**
49
+ * Set when `argsBuilder` requests `--output-format stream-json` (newline-
50
+ * delimited JSON events) rather than single-blob `json`. Enables (a)
51
+ * incremental result parsing that scans for the final `result` event, and
52
+ * (b) a live cumulative-cost circuit breaker that terminates the subprocess
53
+ * the moment projected spend crosses the remaining per-attempt budget,
54
+ * rather than only learning about an overspend after the process exits.
55
+ */
56
+ streamingUsageCap?: boolean;
48
57
  /** Test-only override for subprocess spawning. */
49
58
  spawnImpl?: SpawnLike;
50
59
  }
@@ -60,6 +69,8 @@ export interface ClaudeCliAdapterOptions {
60
69
  spawnImpl?: SpawnLike;
61
70
  }
62
71
  export interface CodexCliAdapterOptions {
72
+ /** Override the executable or absolute command path used to launch Codex. */
73
+ command?: string;
63
74
  workingDirectory?: string;
64
75
  timeoutMs?: number;
65
76
  verifyTimeoutMs?: number;
@@ -97,10 +108,16 @@ export interface GeminiCliAdapterOptions {
97
108
  }
98
109
  export declare function createAgentCliAdapter(options: AgentCliAdapterOptions): MartinAdapter;
99
110
  /**
100
- * Spawns `claude --output-format json --print "<prompt>" --dangerously-skip-permissions [extraArgs]`.
111
+ * Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
101
112
  *
102
- * The --output-format json flag causes Claude CLI to return structured JSON
103
- * including real token usage counts, enabling accurate cost tracking.
113
+ * `stream-json` emits one JSON event per line including per-turn usage on
114
+ * each `assistant` message and a final `result` event carrying the same
115
+ * `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
116
+ * MartinLoop can both (a) recover real token usage/cost as before, and
117
+ * (b) watch cumulative spend live and self-terminate the subprocess the
118
+ * moment it crosses the remaining per-attempt budget (see
119
+ * `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
120
+ * discovering an overspend after the whole process has already exited.
104
121
  *
105
122
  * Requires the Claude Code CLI to be installed and authenticated:
106
123
  * https://docs.anthropic.com/claude-code