@martinloop/mcp 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/server-validation.js +2 -2
- package/dist/server.js +72 -10
- package/dist/tools/doctor.d.ts +27 -0
- package/dist/tools/doctor.js +39 -11
- package/dist/tools/get-run.d.ts +2 -1
- package/dist/tools/get-run.js +1 -0
- package/dist/tools/get-verification-results.d.ts +2 -1
- package/dist/tools/get-verification-results.js +1 -0
- package/dist/tools/plan.js +4 -2
- package/dist/tools/preflight.d.ts +27 -0
- package/dist/tools/preflight.js +44 -20
- package/dist/tools/run-dossier.d.ts +2 -1
- package/dist/tools/run-dossier.js +1 -0
- package/dist/tools/run-loop.d.ts +5 -1
- package/dist/tools/run-loop.js +20 -8
- package/dist/tools/run-store.js +67 -15
- package/dist/tools/tool-support.d.ts +2 -0
- package/dist/tools/tool-support.js +49 -13
- package/dist/tools/workflow-governance.d.ts +19 -3
- package/dist/tools/workflow-governance.js +107 -55
- package/dist/vendor/adapters/claude-cli.d.ts +20 -3
- package/dist/vendor/adapters/claude-cli.js +193 -33
- package/dist/vendor/adapters/cli-bridge.d.ts +45 -0
- package/dist/vendor/adapters/cli-bridge.js +107 -39
- package/dist/vendor/adapters/codex-launcher.d.ts +32 -0
- package/dist/vendor/adapters/codex-launcher.js +409 -118
- package/dist/vendor/adapters/openai-compatible.js +8 -2
- package/dist/vendor/adapters/runtime-support.js +1 -0
- package/dist/vendor/adapters/stub-direct-provider.js +3 -0
- package/dist/vendor/adapters/verifier-only.d.ts +2 -0
- package/dist/vendor/adapters/verifier-only.js +9 -3
- package/dist/vendor/contracts/index.d.ts +2 -1
- package/dist/vendor/contracts/index.js +14 -0
- package/dist/vendor/core/context-integrity.js +28 -3
- package/dist/vendor/core/grounding.d.ts +1 -0
- package/dist/vendor/core/grounding.js +6 -2
- package/dist/vendor/core/index.d.ts +1 -0
- package/dist/vendor/core/index.js +25 -6
- package/dist/vendor/core/leash.js +90 -8
- package/dist/vendor/core/persistence/integrity.d.ts +1 -1
- package/dist/vendor/core/persistence/integrity.js +15 -6
- package/dist/workflow-state.d.ts +9 -0
- package/dist/workflow-state.js +44 -3
- package/package.json +2 -2
- package/server.json +2 -2
package/dist/tools/run-loop.js
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
1
|
import { createClaudeCliAdapter, createCodexCliAdapter, createGeminiCliAdapter, probeCodexLaunch, resolveCliCommandAvailability, createVerifierOnlyAdapter } from "../vendor/adapters/index.js";
|
|
2
2
|
import { createFileRunStore, evaluateCostGovernor, resolveRunsRoot, runMartin } from "../vendor/core/index.js";
|
|
3
|
-
import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
|
|
4
3
|
import { normalizeSafePathPatterns, resolveSafeRepoRoot } from "../server-validation.js";
|
|
5
4
|
import { MartinToolError } from "./tool-errors.js";
|
|
6
5
|
import { buildArtifactSummary, buildVerificationSummary, buildLoopPreview, buildRunRecordPaths, getEngineAvailability, resolveExecutionMode } from "./tool-support.js";
|
|
6
|
+
import { normalizeLoopBudget } from "./workflow-governance.js";
|
|
7
|
+
let proofModeVerifierSpawnImpl;
|
|
8
|
+
let runStoreOverrideForTests;
|
|
9
|
+
export function __setProofModeVerifierSpawnImplForTests(spawnImpl) {
|
|
10
|
+
proofModeVerifierSpawnImpl = spawnImpl;
|
|
11
|
+
}
|
|
12
|
+
export function __setRunStoreOverrideForTests(store) {
|
|
13
|
+
runStoreOverrideForTests = store;
|
|
14
|
+
}
|
|
7
15
|
export async function runLoopTool(input) {
|
|
8
16
|
const workingDirectory = resolveSafeRepoRoot(input.workingDirectory);
|
|
9
17
|
const engine = input.engine ?? "claude";
|
|
@@ -19,6 +27,7 @@ export async function runLoopTool(input) {
|
|
|
19
27
|
repoRoot: workingDirectory,
|
|
20
28
|
runsRoot
|
|
21
29
|
};
|
|
30
|
+
let codexCommandOverride;
|
|
22
31
|
if (executionMode.liveMode) {
|
|
23
32
|
if (engine === "codex") {
|
|
24
33
|
const engineAvailability = resolveCliCommandAvailability("codex");
|
|
@@ -40,6 +49,7 @@ export async function runLoopTool(input) {
|
|
|
40
49
|
retryable: false
|
|
41
50
|
});
|
|
42
51
|
}
|
|
52
|
+
codexCommandOverride = codexProbe.command;
|
|
43
53
|
}
|
|
44
54
|
else {
|
|
45
55
|
const engineAvailability = getEngineAvailability(engine);
|
|
@@ -55,10 +65,15 @@ export async function runLoopTool(input) {
|
|
|
55
65
|
const adapter = !executionMode.liveMode
|
|
56
66
|
? createVerifierOnlyAdapter({
|
|
57
67
|
workingDirectory,
|
|
58
|
-
label: "Proof mode adapter (MARTIN_LIVE=false)"
|
|
68
|
+
label: "Proof mode adapter (MARTIN_LIVE=false)",
|
|
69
|
+
...(proofModeVerifierSpawnImpl ? { spawnImpl: proofModeVerifierSpawnImpl } : {})
|
|
59
70
|
})
|
|
60
71
|
: engine === "codex"
|
|
61
|
-
? createCodexCliAdapter({
|
|
72
|
+
? createCodexCliAdapter({
|
|
73
|
+
workingDirectory,
|
|
74
|
+
...(model ? { model } : {}),
|
|
75
|
+
...(codexCommandOverride ? { command: codexCommandOverride } : {})
|
|
76
|
+
})
|
|
62
77
|
: engine === "gemini"
|
|
63
78
|
? createGeminiCliAdapter({ workingDirectory, ...(model ? { model } : {}) })
|
|
64
79
|
: createClaudeCliAdapter({ workingDirectory, ...(model ? { model } : {}) });
|
|
@@ -72,14 +87,11 @@ export async function runLoopTool(input) {
|
|
|
72
87
|
if (input.maxTokens !== undefined) {
|
|
73
88
|
partialBudget.maxTokens = input.maxTokens;
|
|
74
89
|
}
|
|
75
|
-
const budget =
|
|
76
|
-
...DEFAULT_BUDGET,
|
|
77
|
-
...partialBudget
|
|
78
|
-
};
|
|
90
|
+
const budget = normalizeLoopBudget(partialBudget);
|
|
79
91
|
const result = await runMartin({
|
|
80
92
|
workspaceId: input.workspaceId ?? "ws_mcp",
|
|
81
93
|
projectId: input.projectId ?? "proj_mcp",
|
|
82
|
-
store: createFileRunStore({ runsRoot }),
|
|
94
|
+
store: runStoreOverrideForTests ?? createFileRunStore({ runsRoot }),
|
|
83
95
|
receiptScope,
|
|
84
96
|
task: {
|
|
85
97
|
title: input.objective.slice(0, 100),
|
package/dist/tools/run-store.js
CHANGED
|
@@ -1,8 +1,60 @@
|
|
|
1
1
|
import { readFile, readdir, stat } from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot } from "../vendor/core/index.js";
|
|
3
|
+
import { readLatestLoopRecordFromFile, readLoopRecordsFromFile, resolveRunsRoot, verifyReceiptIntegrityFromFiles } from "../vendor/core/index.js";
|
|
4
4
|
import { resolveSafeLoopRecordPath, resolveSafeRunsJsonPath, resolveSafeRunsPath, resolveSafeRunsRootPath } from "../server-validation.js";
|
|
5
5
|
import { attemptNotFoundError, invalidSelectorError, noLoopRecordsError, storeUnreadableError } from "./tool-errors.js";
|
|
6
|
+
async function attachReceiptIntegrity(detail) {
|
|
7
|
+
const ledgerPath = detail.canonicalRunDirectory
|
|
8
|
+
? await resolveReceiptEvidencePath(detail.canonicalRunDirectory)
|
|
9
|
+
: detail.ledgerPath;
|
|
10
|
+
const integrity = detail.canonicalLoopRecordPath && detail.canonicalRunDirectory && ledgerPath
|
|
11
|
+
? await verifyReceiptIntegrityFromFiles({
|
|
12
|
+
runId: detail.loop.loopId,
|
|
13
|
+
runsRoot: detail.runsRoot,
|
|
14
|
+
loopRecordPath: detail.canonicalLoopRecordPath,
|
|
15
|
+
ledgerPath
|
|
16
|
+
}).catch(() => ({
|
|
17
|
+
state: "unsigned",
|
|
18
|
+
reason: "Receipt integrity verification could not be completed."
|
|
19
|
+
}))
|
|
20
|
+
: ({
|
|
21
|
+
state: "unsigned",
|
|
22
|
+
reason: "Receipt integrity is only available for canonical run directories."
|
|
23
|
+
});
|
|
24
|
+
const receiptScope = resolveReceiptScope(detail.loop, detail.runsRoot);
|
|
25
|
+
return {
|
|
26
|
+
...detail,
|
|
27
|
+
...(ledgerPath ? { ledgerPath } : {}),
|
|
28
|
+
loop: {
|
|
29
|
+
...detail.loop,
|
|
30
|
+
receiptIntegrity: integrity,
|
|
31
|
+
...(receiptScope ? { receiptScope } : {})
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
function resolveReceiptScope(loop, runsRoot) {
|
|
36
|
+
if (loop.receiptScope) {
|
|
37
|
+
return loop.receiptScope;
|
|
38
|
+
}
|
|
39
|
+
if (!loop.task?.repoRoot && !runsRoot) {
|
|
40
|
+
return undefined;
|
|
41
|
+
}
|
|
42
|
+
return {
|
|
43
|
+
...(loop.task?.repoRoot ? { repoRoot: loop.task.repoRoot } : {}),
|
|
44
|
+
...(loop.task?.repoRoot ? { workingDirectory: loop.task.repoRoot } : {}),
|
|
45
|
+
...(runsRoot ? { runsRoot } : {})
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
async function resolveReceiptEvidencePath(runDirectory) {
|
|
49
|
+
for (const candidate of ["ledger.jsonl", "events.jsonl"]) {
|
|
50
|
+
const candidatePath = path.join(runDirectory, candidate);
|
|
51
|
+
const candidateStats = await safeStat(candidatePath);
|
|
52
|
+
if (candidateStats?.isFile()) {
|
|
53
|
+
return candidatePath;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return undefined;
|
|
57
|
+
}
|
|
6
58
|
export async function loadLoopRecordsForInspect(input) {
|
|
7
59
|
const runsRoot = resolveSafeRunsRootPath(input.runsDir, resolveRunsRoot(process.env));
|
|
8
60
|
if (!input.file) {
|
|
@@ -118,14 +170,14 @@ export async function loadDetailedLoopRecord(input) {
|
|
|
118
170
|
const canonicalStats = await safeStat(canonicalLoopRecordPath);
|
|
119
171
|
if (canonicalStats?.isFile()) {
|
|
120
172
|
const loop = await readCanonicalLoopRecord(canonicalLoopRecordPath);
|
|
121
|
-
return buildDetailedLoopSource({
|
|
173
|
+
return await attachReceiptIntegrity(buildDetailedLoopSource({
|
|
122
174
|
source: canonicalLoopRecordPath,
|
|
123
175
|
sourceKind: "file",
|
|
124
176
|
runsRoot,
|
|
125
177
|
loop,
|
|
126
178
|
canonicalLoopRecordPath,
|
|
127
179
|
canonicalRunDirectory: path.dirname(canonicalLoopRecordPath)
|
|
128
|
-
});
|
|
180
|
+
}));
|
|
129
181
|
}
|
|
130
182
|
}
|
|
131
183
|
const inspected = await readAllLoopRecordsSafely(targetPath);
|
|
@@ -139,10 +191,10 @@ export async function loadDetailedLoopRecord(input) {
|
|
|
139
191
|
runsRoot,
|
|
140
192
|
loop
|
|
141
193
|
});
|
|
142
|
-
return {
|
|
194
|
+
return await attachReceiptIntegrity({
|
|
143
195
|
...detail,
|
|
144
196
|
warnings: [...detail.warnings, ...inspected.warnings]
|
|
145
|
-
};
|
|
197
|
+
});
|
|
146
198
|
}
|
|
147
199
|
const latest = await readLatestLoopRecordFromFile(targetPath);
|
|
148
200
|
if (!latest) {
|
|
@@ -150,35 +202,35 @@ export async function loadDetailedLoopRecord(input) {
|
|
|
150
202
|
}
|
|
151
203
|
if (path.basename(targetPath) === "loop-record.json") {
|
|
152
204
|
const loop = await readCanonicalLoopRecord(targetPath);
|
|
153
|
-
return buildDetailedLoopSource({
|
|
205
|
+
return await attachReceiptIntegrity(buildDetailedLoopSource({
|
|
154
206
|
source: targetPath,
|
|
155
207
|
sourceKind: "file",
|
|
156
208
|
runsRoot,
|
|
157
209
|
loop,
|
|
158
210
|
canonicalLoopRecordPath: targetPath,
|
|
159
211
|
canonicalRunDirectory: path.dirname(targetPath)
|
|
160
|
-
});
|
|
212
|
+
}));
|
|
161
213
|
}
|
|
162
|
-
return await buildDetailedLoopSourceFromDiscoveredLoop({
|
|
214
|
+
return await attachReceiptIntegrity(await buildDetailedLoopSourceFromDiscoveredLoop({
|
|
163
215
|
source: targetPath,
|
|
164
216
|
sourceKind: "file",
|
|
165
217
|
runsRoot,
|
|
166
218
|
loop: latest
|
|
167
|
-
});
|
|
219
|
+
}));
|
|
168
220
|
}
|
|
169
221
|
if (input.loopId) {
|
|
170
222
|
const canonicalLoopRecordPath = resolvePotentialLoopRecordPath(input.loopId, runsRoot);
|
|
171
223
|
const canonicalStats = await safeStat(canonicalLoopRecordPath);
|
|
172
224
|
if (canonicalStats?.isFile()) {
|
|
173
225
|
const loop = await readCanonicalLoopRecord(canonicalLoopRecordPath);
|
|
174
|
-
return buildDetailedLoopSource({
|
|
226
|
+
return await attachReceiptIntegrity(buildDetailedLoopSource({
|
|
175
227
|
source: canonicalLoopRecordPath,
|
|
176
228
|
sourceKind: "loop_id",
|
|
177
229
|
runsRoot,
|
|
178
230
|
loop,
|
|
179
231
|
canonicalLoopRecordPath,
|
|
180
232
|
canonicalRunDirectory: path.dirname(canonicalLoopRecordPath)
|
|
181
|
-
});
|
|
233
|
+
}));
|
|
182
234
|
}
|
|
183
235
|
const inspected = await readAllLoopRecordsSafely(runsRoot);
|
|
184
236
|
const loop = inspected.loops.find((candidate) => candidate.loopId === input.loopId);
|
|
@@ -191,10 +243,10 @@ export async function loadDetailedLoopRecord(input) {
|
|
|
191
243
|
runsRoot,
|
|
192
244
|
loop
|
|
193
245
|
});
|
|
194
|
-
return {
|
|
246
|
+
return await attachReceiptIntegrity({
|
|
195
247
|
...detail,
|
|
196
248
|
warnings: [...detail.warnings, ...inspected.warnings]
|
|
197
|
-
};
|
|
249
|
+
});
|
|
198
250
|
}
|
|
199
251
|
const inspected = await readAllLoopRecordsSafely(runsRoot);
|
|
200
252
|
const loop = inspected.loops[0];
|
|
@@ -207,10 +259,10 @@ export async function loadDetailedLoopRecord(input) {
|
|
|
207
259
|
runsRoot,
|
|
208
260
|
loop
|
|
209
261
|
});
|
|
210
|
-
return {
|
|
262
|
+
return await attachReceiptIntegrity({
|
|
211
263
|
...detail,
|
|
212
264
|
warnings: [...detail.warnings, ...inspected.warnings]
|
|
213
|
-
};
|
|
265
|
+
});
|
|
214
266
|
}
|
|
215
267
|
export async function loadAttemptFromLoop(input) {
|
|
216
268
|
const detail = await loadDetailedLoopRecord(input);
|
|
@@ -89,6 +89,7 @@ export interface CliAvailability {
|
|
|
89
89
|
locator: string;
|
|
90
90
|
detail: string;
|
|
91
91
|
resolvedPath?: string;
|
|
92
|
+
candidatePaths?: string[];
|
|
92
93
|
}
|
|
93
94
|
export interface ExecutionMode {
|
|
94
95
|
liveMode: boolean;
|
|
@@ -110,6 +111,7 @@ export interface CanonicalRunPaths {
|
|
|
110
111
|
export declare function resolveExecutionMode(): ExecutionMode;
|
|
111
112
|
export declare function detectCliAvailability(command: string): CliAvailability;
|
|
112
113
|
export declare function getEngineAvailability(engine: MartinEngine): CliAvailability;
|
|
114
|
+
export declare function createSkippedCliAvailability(command: string, detail?: string): CliAvailability;
|
|
113
115
|
export declare function formatUsd(value: number): string;
|
|
114
116
|
export declare function buildLoopPreview(loop: InspectableLoopRecord): LoopPreview;
|
|
115
117
|
export declare function buildAttemptSummary(attempt: InspectableLoopAttempt, artifacts?: AttemptArtifactFiles): AttemptSummary;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { accessSync, constants } from "node:fs";
|
|
2
2
|
import { readdir, stat } from "node:fs/promises";
|
|
3
3
|
import { join } from "node:path";
|
|
4
4
|
import { evaluateCostGovernor, resolveRunsRoot } from "../vendor/core/index.js";
|
|
@@ -23,18 +23,9 @@ export function detectCliAvailability(command) {
|
|
|
23
23
|
if (cached && cached.expiresAt > Date.now()) {
|
|
24
24
|
return cached.value;
|
|
25
25
|
}
|
|
26
|
-
const locator = process.platform === "win32" ? "
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
stdio: ["ignore", "pipe", "pipe"]
|
|
30
|
-
});
|
|
31
|
-
const resolvedPath = result.status === 0
|
|
32
|
-
? (result.stdout ?? "")
|
|
33
|
-
.split(/\r?\n/u)
|
|
34
|
-
.map((line) => line.trim())
|
|
35
|
-
.find(Boolean)
|
|
36
|
-
: undefined;
|
|
37
|
-
const value = result.status === 0
|
|
26
|
+
const locator = process.platform === "win32" ? "path-scan(win32)" : "path-scan(posix)";
|
|
27
|
+
const resolvedPath = findCommandOnPath(command);
|
|
28
|
+
const value = resolvedPath
|
|
38
29
|
? {
|
|
39
30
|
command,
|
|
40
31
|
available: true,
|
|
@@ -54,9 +45,54 @@ export function detectCliAvailability(command) {
|
|
|
54
45
|
});
|
|
55
46
|
return value;
|
|
56
47
|
}
|
|
48
|
+
function findCommandOnPath(command) {
|
|
49
|
+
const pathKey = Object.keys(process.env).find((key) => key.toLowerCase() === "path");
|
|
50
|
+
const rawPath = pathKey ? process.env[pathKey] : undefined;
|
|
51
|
+
if (!rawPath) {
|
|
52
|
+
return undefined;
|
|
53
|
+
}
|
|
54
|
+
const pathEntries = rawPath
|
|
55
|
+
.split(process.platform === "win32" ? ";" : ":")
|
|
56
|
+
.map((entry) => entry.trim())
|
|
57
|
+
.filter(Boolean);
|
|
58
|
+
const hasExtension = /\.[A-Za-z0-9]+$/u.test(command);
|
|
59
|
+
const candidateNames = process.platform === "win32" && !hasExtension
|
|
60
|
+
? (process.env.PATHEXT ?? ".COM;.EXE;.BAT;.CMD")
|
|
61
|
+
.split(";")
|
|
62
|
+
.map((extension) => extension.trim())
|
|
63
|
+
.filter(Boolean)
|
|
64
|
+
.map((extension) => `${command}${extension.toLowerCase()}`)
|
|
65
|
+
: [command];
|
|
66
|
+
for (const directory of pathEntries) {
|
|
67
|
+
for (const candidateName of candidateNames) {
|
|
68
|
+
const candidatePath = join(directory, candidateName);
|
|
69
|
+
if (isExecutablePath(candidatePath)) {
|
|
70
|
+
return candidatePath;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return undefined;
|
|
75
|
+
}
|
|
76
|
+
function isExecutablePath(candidatePath) {
|
|
77
|
+
try {
|
|
78
|
+
accessSync(candidatePath, process.platform === "win32" ? constants.F_OK : constants.X_OK);
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
57
85
|
export function getEngineAvailability(engine) {
|
|
58
86
|
return detectCliAvailability(engine);
|
|
59
87
|
}
|
|
88
|
+
export function createSkippedCliAvailability(command, detail = "Proof mode skipped live CLI availability detection.") {
|
|
89
|
+
return {
|
|
90
|
+
command,
|
|
91
|
+
available: false,
|
|
92
|
+
locator: "skipped",
|
|
93
|
+
detail
|
|
94
|
+
};
|
|
95
|
+
}
|
|
60
96
|
export function formatUsd(value) {
|
|
61
97
|
return `$${value.toFixed(2)}`;
|
|
62
98
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { type LoopBudget } from "../vendor/contracts/index.js";
|
|
1
2
|
import { type RunStoreInspection } from "./tool-support.js";
|
|
2
3
|
export type MartinPolicyPack = "solo-founder" | "startup-team" | "enterprise-strict" | "oss-maintainer" | "security-sensitive";
|
|
3
4
|
export interface RepoGitState {
|
|
@@ -23,6 +24,7 @@ export interface RepoSignals {
|
|
|
23
24
|
packageScripts: Record<string, string>;
|
|
24
25
|
git: RepoGitState;
|
|
25
26
|
sensitivePaths: string[];
|
|
27
|
+
hostAvailabilityChecked: boolean;
|
|
26
28
|
availableHosts: Record<"claude" | "codex" | "cursor" | "gemini", {
|
|
27
29
|
available: boolean;
|
|
28
30
|
detail: string;
|
|
@@ -110,11 +112,25 @@ interface ContractOverrides {
|
|
|
110
112
|
maxFilesChanged?: number;
|
|
111
113
|
maxCommands?: number;
|
|
112
114
|
}
|
|
113
|
-
|
|
115
|
+
interface LoopBudgetOverrides {
|
|
116
|
+
maxUsd?: number;
|
|
117
|
+
softLimitUsd?: number;
|
|
118
|
+
maxIterations?: number;
|
|
119
|
+
maxTokens?: number;
|
|
120
|
+
}
|
|
121
|
+
export declare function inspectRepoSignals(workingDirectory: string, options?: {
|
|
122
|
+
includeHostAvailability?: boolean;
|
|
123
|
+
}): RepoSignals;
|
|
114
124
|
export declare function buildReadinessReport(signals: RepoSignals, runStore: RunStoreInspection): MartinReadinessReport;
|
|
115
125
|
export declare function buildPolicyPackDefinition(policyPack: MartinPolicyPack | undefined, signals: RepoSignals): MartinPolicyPackDefinition;
|
|
116
|
-
export declare function buildPlanProposal(workingDirectory: string, overrides: ContractOverrides
|
|
117
|
-
|
|
126
|
+
export declare function buildPlanProposal(workingDirectory: string, overrides: ContractOverrides, options?: {
|
|
127
|
+
signals?: RepoSignals;
|
|
128
|
+
}): MartinPlanProposal;
|
|
129
|
+
export declare function buildRunContract(workingDirectory: string, overrides: ContractOverrides, options?: {
|
|
130
|
+
signals?: RepoSignals;
|
|
131
|
+
plan?: MartinPlanProposal;
|
|
132
|
+
}): MartinRunContract;
|
|
133
|
+
export declare function normalizeLoopBudget(overrides?: LoopBudgetOverrides): LoopBudget;
|
|
118
134
|
export declare function assessRunRisk(input: {
|
|
119
135
|
objective: string;
|
|
120
136
|
context?: string;
|
|
@@ -2,13 +2,17 @@ import { existsSync, readFileSync } from "node:fs";
|
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { spawnSync } from "node:child_process";
|
|
4
4
|
import { DEFAULT_BUDGET } from "../vendor/contracts/index.js";
|
|
5
|
-
import { detectCliAvailability } from "./tool-support.js";
|
|
5
|
+
import { createSkippedCliAvailability, detectCliAvailability } from "./tool-support.js";
|
|
6
6
|
const HOST_COMMANDS = {
|
|
7
7
|
claude: "claude",
|
|
8
8
|
codex: "codex",
|
|
9
9
|
cursor: "cursor",
|
|
10
10
|
gemini: "gemini"
|
|
11
11
|
};
|
|
12
|
+
const REPO_SIGNALS_CACHE_TTL_MS = 5_000;
|
|
13
|
+
const repoSignalsCache = new Map();
|
|
14
|
+
const GIT_STATE_CACHE_TTL_MS = 60_000;
|
|
15
|
+
const repoGitStateCache = new Map();
|
|
12
16
|
const POLICY_PACKS = {
|
|
13
17
|
"solo-founder": {
|
|
14
18
|
name: "solo-founder",
|
|
@@ -110,13 +114,19 @@ const POLICY_PACKS = {
|
|
|
110
114
|
requireApprovalAtOrAbove: "medium"
|
|
111
115
|
}
|
|
112
116
|
};
|
|
113
|
-
export function inspectRepoSignals(workingDirectory) {
|
|
117
|
+
export function inspectRepoSignals(workingDirectory, options = {}) {
|
|
118
|
+
const includeHostAvailability = options.includeHostAvailability ?? true;
|
|
119
|
+
const cacheKey = `${workingDirectory}::hosts=${includeHostAvailability ? "live" : "skipped"}`;
|
|
120
|
+
const cached = repoSignalsCache.get(cacheKey);
|
|
121
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
122
|
+
return cached.value;
|
|
123
|
+
}
|
|
114
124
|
const packageScripts = readPackageScripts(workingDirectory);
|
|
115
125
|
const packageManager = detectPackageManager(workingDirectory);
|
|
116
126
|
const frameworks = detectFrameworks(workingDirectory, packageScripts);
|
|
117
127
|
const languages = detectLanguages(workingDirectory, frameworks);
|
|
118
128
|
const verifiers = detectVerifierCommands(packageScripts, packageManager);
|
|
119
|
-
|
|
129
|
+
const signals = {
|
|
120
130
|
workingDirectory,
|
|
121
131
|
packageManager,
|
|
122
132
|
languages,
|
|
@@ -125,13 +135,27 @@ export function inspectRepoSignals(workingDirectory) {
|
|
|
125
135
|
packageScripts,
|
|
126
136
|
git: detectGitState(workingDirectory),
|
|
127
137
|
sensitivePaths: detectSensitivePaths(workingDirectory),
|
|
138
|
+
hostAvailabilityChecked: includeHostAvailability,
|
|
128
139
|
availableHosts: {
|
|
129
|
-
claude:
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
140
|
+
claude: includeHostAvailability
|
|
141
|
+
? detectCliAvailability(HOST_COMMANDS.claude)
|
|
142
|
+
: createSkippedCliAvailability(HOST_COMMANDS.claude),
|
|
143
|
+
codex: includeHostAvailability
|
|
144
|
+
? detectCliAvailability(HOST_COMMANDS.codex)
|
|
145
|
+
: createSkippedCliAvailability(HOST_COMMANDS.codex),
|
|
146
|
+
cursor: includeHostAvailability
|
|
147
|
+
? detectCliAvailability(HOST_COMMANDS.cursor)
|
|
148
|
+
: createSkippedCliAvailability(HOST_COMMANDS.cursor),
|
|
149
|
+
gemini: includeHostAvailability
|
|
150
|
+
? detectCliAvailability(HOST_COMMANDS.gemini)
|
|
151
|
+
: createSkippedCliAvailability(HOST_COMMANDS.gemini)
|
|
133
152
|
}
|
|
134
153
|
};
|
|
154
|
+
repoSignalsCache.set(cacheKey, {
|
|
155
|
+
expiresAt: Date.now() + REPO_SIGNALS_CACHE_TTL_MS,
|
|
156
|
+
value: signals
|
|
157
|
+
});
|
|
158
|
+
return signals;
|
|
135
159
|
}
|
|
136
160
|
export function buildReadinessReport(signals, runStore) {
|
|
137
161
|
const missingSafeguards = [];
|
|
@@ -155,7 +179,9 @@ export function buildReadinessReport(signals, runStore) {
|
|
|
155
179
|
if (signals.frameworks.length === 0) {
|
|
156
180
|
score -= 8;
|
|
157
181
|
}
|
|
158
|
-
if (
|
|
182
|
+
if (signals.hostAvailabilityChecked &&
|
|
183
|
+
!signals.availableHosts.claude.available &&
|
|
184
|
+
!signals.availableHosts.codex.available) {
|
|
159
185
|
score -= 18;
|
|
160
186
|
}
|
|
161
187
|
score = Math.max(0, Math.min(100, score));
|
|
@@ -189,8 +215,8 @@ export function buildPolicyPackDefinition(policyPack, signals) {
|
|
|
189
215
|
: fallbackVerifierPlan(signals.packageManager)
|
|
190
216
|
};
|
|
191
217
|
}
|
|
192
|
-
export function buildPlanProposal(workingDirectory, overrides) {
|
|
193
|
-
const signals = inspectRepoSignals(workingDirectory);
|
|
218
|
+
export function buildPlanProposal(workingDirectory, overrides, options = {}) {
|
|
219
|
+
const signals = options.signals ?? inspectRepoSignals(workingDirectory);
|
|
194
220
|
const policy = buildPolicyPackDefinition(overrides.policyPack, signals);
|
|
195
221
|
const scope = inferScopeFromObjective(overrides.objective, policy, overrides);
|
|
196
222
|
const estimatedBudget = buildBudget(overrides, signals);
|
|
@@ -223,8 +249,8 @@ export function buildPlanProposal(workingDirectory, overrides) {
|
|
|
223
249
|
]
|
|
224
250
|
};
|
|
225
251
|
}
|
|
226
|
-
export function buildRunContract(workingDirectory, overrides) {
|
|
227
|
-
const plan = buildPlanProposal(workingDirectory, overrides);
|
|
252
|
+
export function buildRunContract(workingDirectory, overrides, options = {}) {
|
|
253
|
+
const plan = options.plan ?? buildPlanProposal(workingDirectory, overrides, options);
|
|
228
254
|
return {
|
|
229
255
|
objective: overrides.objective,
|
|
230
256
|
...(overrides.context ? { context: overrides.context } : {}),
|
|
@@ -238,6 +264,16 @@ export function buildRunContract(workingDirectory, overrides) {
|
|
|
238
264
|
shouldRequireApproval(plan.policyPack.requireApprovalAtOrAbove, plan.risk.level)
|
|
239
265
|
};
|
|
240
266
|
}
|
|
267
|
+
export function normalizeLoopBudget(overrides = {}) {
|
|
268
|
+
const maxUsd = overrides.maxUsd ?? DEFAULT_BUDGET.maxUsd;
|
|
269
|
+
const softLimitUsd = Math.min(overrides.softLimitUsd ?? DEFAULT_BUDGET.softLimitUsd, maxUsd);
|
|
270
|
+
return {
|
|
271
|
+
maxUsd,
|
|
272
|
+
softLimitUsd,
|
|
273
|
+
maxIterations: overrides.maxIterations ?? DEFAULT_BUDGET.maxIterations,
|
|
274
|
+
maxTokens: overrides.maxTokens ?? DEFAULT_BUDGET.maxTokens
|
|
275
|
+
};
|
|
276
|
+
}
|
|
241
277
|
export function assessRunRisk(input) {
|
|
242
278
|
const reasons = [];
|
|
243
279
|
let score = 12;
|
|
@@ -406,17 +442,10 @@ function detectVerifierCommands(scripts, packageManager) {
|
|
|
406
442
|
return { test, lint, build, defaultPlan };
|
|
407
443
|
}
|
|
408
444
|
function detectGitState(workingDirectory) {
|
|
409
|
-
const
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
});
|
|
414
|
-
if (availability.status !== 0) {
|
|
415
|
-
return {
|
|
416
|
-
available: false,
|
|
417
|
-
isRepo: false,
|
|
418
|
-
clean: false
|
|
419
|
-
};
|
|
445
|
+
const cacheKey = workingDirectory;
|
|
446
|
+
const cached = repoGitStateCache.get(cacheKey);
|
|
447
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
448
|
+
return cached.value;
|
|
420
449
|
}
|
|
421
450
|
const isRepo = spawnSync("git", ["rev-parse", "--is-inside-work-tree"], {
|
|
422
451
|
cwd: workingDirectory,
|
|
@@ -424,18 +453,29 @@ function detectGitState(workingDirectory) {
|
|
|
424
453
|
stdio: ["ignore", "pipe", "pipe"]
|
|
425
454
|
});
|
|
426
455
|
if (isRepo.status !== 0 || !/true/u.test(isRepo.stdout ?? "")) {
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
};
|
|
456
|
+
const availability = spawnSync("git", ["--version"], {
|
|
457
|
+
cwd: workingDirectory,
|
|
458
|
+
encoding: "utf8",
|
|
459
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
460
|
+
});
|
|
461
|
+
const value = availability.status !== 0
|
|
462
|
+
? {
|
|
463
|
+
available: false,
|
|
464
|
+
isRepo: false,
|
|
465
|
+
clean: false
|
|
466
|
+
}
|
|
467
|
+
: {
|
|
468
|
+
available: true,
|
|
469
|
+
isRepo: false,
|
|
470
|
+
clean: false
|
|
471
|
+
};
|
|
472
|
+
repoGitStateCache.set(cacheKey, {
|
|
473
|
+
expiresAt: Date.now() + GIT_STATE_CACHE_TTL_MS,
|
|
474
|
+
value
|
|
475
|
+
});
|
|
476
|
+
return value;
|
|
432
477
|
}
|
|
433
|
-
const
|
|
434
|
-
cwd: workingDirectory,
|
|
435
|
-
encoding: "utf8",
|
|
436
|
-
stdio: ["ignore", "pipe", "pipe"]
|
|
437
|
-
}).stdout.trim();
|
|
438
|
-
const status = spawnSync("git", ["status", "--porcelain", "--branch"], {
|
|
478
|
+
const status = spawnSync("git", ["status", "--porcelain=v2", "--branch", "--untracked-files=normal", "--ignored=no", "--", "."], {
|
|
439
479
|
cwd: workingDirectory,
|
|
440
480
|
encoding: "utf8",
|
|
441
481
|
stdio: ["ignore", "pipe", "pipe"]
|
|
@@ -444,20 +484,42 @@ function detectGitState(workingDirectory) {
|
|
|
444
484
|
.split(/\r?\n/u)
|
|
445
485
|
.map((line) => line.trim())
|
|
446
486
|
.filter(Boolean);
|
|
447
|
-
const dirty = statusLines.some((line) => !line.startsWith("
|
|
448
|
-
const
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
487
|
+
const dirty = statusLines.some((line) => !line.startsWith("#"));
|
|
488
|
+
const branch = statusLines
|
|
489
|
+
.find((line) => line.startsWith("# branch.head "))
|
|
490
|
+
?.replace("# branch.head ", "")
|
|
491
|
+
.trim();
|
|
492
|
+
const upstream = statusLines
|
|
493
|
+
.find((line) => line.startsWith("# branch.upstream "))
|
|
494
|
+
?.replace("# branch.upstream ", "")
|
|
495
|
+
.trim();
|
|
496
|
+
const aheadBehind = statusLines
|
|
497
|
+
.find((line) => line.startsWith("# branch.ab "))
|
|
498
|
+
?.replace("# branch.ab ", "")
|
|
499
|
+
.trim()
|
|
500
|
+
.split(/\s+/u);
|
|
501
|
+
const aheadToken = aheadBehind?.find((token) => token.startsWith("+"));
|
|
502
|
+
const behindToken = aheadBehind?.find((token) => token.startsWith("-"));
|
|
503
|
+
const ahead = aheadToken && aheadToken.length > 1
|
|
504
|
+
? Number.parseInt(aheadToken.slice(1), 10)
|
|
505
|
+
: undefined;
|
|
506
|
+
const behind = behindToken && behindToken.length > 1
|
|
507
|
+
? Number.parseInt(behindToken.slice(1), 10)
|
|
508
|
+
: undefined;
|
|
509
|
+
const value = {
|
|
453
510
|
available: true,
|
|
454
511
|
isRepo: true,
|
|
455
512
|
clean: !dirty,
|
|
456
|
-
...(branch ? { branch } : {}),
|
|
513
|
+
...(branch && branch !== "(detached)" ? { branch } : {}),
|
|
457
514
|
...(upstream ? { upstream } : {}),
|
|
458
|
-
...(ahead
|
|
459
|
-
...(behind
|
|
515
|
+
...(Number.isFinite(ahead) ? { ahead } : {}),
|
|
516
|
+
...(Number.isFinite(behind) ? { behind } : {})
|
|
460
517
|
};
|
|
518
|
+
repoGitStateCache.set(cacheKey, {
|
|
519
|
+
expiresAt: Date.now() + GIT_STATE_CACHE_TTL_MS,
|
|
520
|
+
value
|
|
521
|
+
});
|
|
522
|
+
return value;
|
|
461
523
|
}
|
|
462
524
|
function detectSensitivePaths(workingDirectory) {
|
|
463
525
|
const candidates = [
|
|
@@ -520,11 +582,9 @@ function inferScopeFromObjective(objective, policy, overrides) {
|
|
|
520
582
|
}
|
|
521
583
|
function buildBudget(overrides, signals) {
|
|
522
584
|
const defaultCommands = signals.verifiers.defaultPlan.length > 0 ? 12 : 8;
|
|
585
|
+
const normalizedBudget = normalizeLoopBudget(overrides);
|
|
523
586
|
return {
|
|
524
|
-
|
|
525
|
-
softLimitUsd: Math.min(overrides.maxUsd ?? DEFAULT_BUDGET.maxUsd, DEFAULT_BUDGET.softLimitUsd),
|
|
526
|
-
maxIterations: overrides.maxIterations ?? DEFAULT_BUDGET.maxIterations,
|
|
527
|
-
maxTokens: overrides.maxTokens ?? DEFAULT_BUDGET.maxTokens,
|
|
587
|
+
...normalizedBudget,
|
|
528
588
|
maxMinutes: overrides.maxMinutes ?? 20,
|
|
529
589
|
maxFilesChanged: overrides.maxFilesChanged ?? 8,
|
|
530
590
|
maxCommands: overrides.maxCommands ?? defaultCommands
|
|
@@ -571,11 +631,3 @@ function shouldRequireApproval(threshold, level) {
|
|
|
571
631
|
const ordering = ["low", "medium", "high"];
|
|
572
632
|
return ordering.indexOf(level) >= ordering.indexOf(threshold);
|
|
573
633
|
}
|
|
574
|
-
function parseCount(value, pattern) {
|
|
575
|
-
const match = value?.match(pattern)?.[1];
|
|
576
|
-
if (!match) {
|
|
577
|
-
return undefined;
|
|
578
|
-
}
|
|
579
|
-
const parsed = Number.parseInt(match, 10);
|
|
580
|
-
return Number.isFinite(parsed) ? parsed : undefined;
|
|
581
|
-
}
|
|
@@ -45,6 +45,15 @@ export interface AgentCliAdapterOptions {
|
|
|
45
45
|
* Defaults to true for Claude.
|
|
46
46
|
*/
|
|
47
47
|
supportsJsonOutput?: boolean;
|
|
48
|
+
/**
|
|
49
|
+
* Set when `argsBuilder` requests `--output-format stream-json` (newline-
|
|
50
|
+
* delimited JSON events) rather than single-blob `json`. Enables (a)
|
|
51
|
+
* incremental result parsing that scans for the final `result` event, and
|
|
52
|
+
* (b) a live cumulative-cost circuit breaker that terminates the subprocess
|
|
53
|
+
* the moment projected spend crosses the remaining per-attempt budget,
|
|
54
|
+
* rather than only learning about an overspend after the process exits.
|
|
55
|
+
*/
|
|
56
|
+
streamingUsageCap?: boolean;
|
|
48
57
|
/** Test-only override for subprocess spawning. */
|
|
49
58
|
spawnImpl?: SpawnLike;
|
|
50
59
|
}
|
|
@@ -60,6 +69,8 @@ export interface ClaudeCliAdapterOptions {
|
|
|
60
69
|
spawnImpl?: SpawnLike;
|
|
61
70
|
}
|
|
62
71
|
export interface CodexCliAdapterOptions {
|
|
72
|
+
/** Override the executable or absolute command path used to launch Codex. */
|
|
73
|
+
command?: string;
|
|
63
74
|
workingDirectory?: string;
|
|
64
75
|
timeoutMs?: number;
|
|
65
76
|
verifyTimeoutMs?: number;
|
|
@@ -97,10 +108,16 @@ export interface GeminiCliAdapterOptions {
|
|
|
97
108
|
}
|
|
98
109
|
export declare function createAgentCliAdapter(options: AgentCliAdapterOptions): MartinAdapter;
|
|
99
110
|
/**
|
|
100
|
-
* Spawns `claude --output-format json --print "<prompt>"
|
|
111
|
+
* Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
|
|
101
112
|
*
|
|
102
|
-
*
|
|
103
|
-
*
|
|
113
|
+
* `stream-json` emits one JSON event per line — including per-turn usage on
|
|
114
|
+
* each `assistant` message and a final `result` event carrying the same
|
|
115
|
+
* `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
|
|
116
|
+
* MartinLoop can both (a) recover real token usage/cost as before, and
|
|
117
|
+
* (b) watch cumulative spend live and self-terminate the subprocess the
|
|
118
|
+
* moment it crosses the remaining per-attempt budget (see
|
|
119
|
+
* `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
|
|
120
|
+
* discovering an overspend after the whole process has already exited.
|
|
104
121
|
*
|
|
105
122
|
* Requires the Claude Code CLI to be installed and authenticated:
|
|
106
123
|
* https://docs.anthropic.com/claude-code
|