@martinloop/mcp 0.2.7 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +49 -104
- package/dist/package-version.d.ts +1 -1
- package/dist/package-version.js +1 -1
- package/dist/prompts.d.ts +1 -1
- package/dist/resources.d.ts +1 -1
- package/dist/resources.js +2 -2
- package/dist/server-validation.d.ts +1 -0
- package/dist/server-validation.js +8 -0
- package/dist/server.js +87 -9
- package/dist/tools/doctor.d.ts +39 -1
- package/dist/tools/doctor.js +68 -9
- package/dist/tools/eval.js +3 -2
- package/dist/tools/get-run.d.ts +3 -0
- package/dist/tools/get-run.js +3 -1
- package/dist/tools/get-verification-results.d.ts +3 -0
- package/dist/tools/get-verification-results.js +3 -1
- package/dist/tools/plan.js +4 -2
- package/dist/tools/pr-tools.js +2 -1
- package/dist/tools/preflight.d.ts +41 -1
- package/dist/tools/preflight.js +74 -19
- package/dist/tools/run-dossier.d.ts +3 -0
- package/dist/tools/run-dossier.js +5 -2
- package/dist/tools/run-loop.d.ts +7 -2
- package/dist/tools/run-loop.js +67 -35
- package/dist/tools/run-store.js +67 -15
- package/dist/tools/tool-errors.js +1 -1
- package/dist/tools/tool-support.d.ts +8 -3
- package/dist/tools/tool-support.js +61 -18
- package/dist/tools/workflow-governance.d.ts +19 -3
- package/dist/tools/workflow-governance.js +107 -55
- package/dist/vendor/adapters/claude-cli.d.ts +45 -3
- package/dist/vendor/adapters/claude-cli.js +465 -45
- package/dist/vendor/adapters/cli-bridge.d.ts +46 -0
- package/dist/vendor/adapters/cli-bridge.js +147 -38
- package/dist/vendor/adapters/codex-launcher.d.ts +76 -0
- package/dist/vendor/adapters/codex-launcher.js +538 -0
- package/dist/vendor/adapters/index.d.ts +3 -2
- package/dist/vendor/adapters/index.js +3 -2
- package/dist/vendor/adapters/openai-compatible.d.ts +19 -4
- package/dist/vendor/adapters/openai-compatible.js +50 -19
- package/dist/vendor/adapters/runtime-support.d.ts +3 -0
- package/dist/vendor/adapters/runtime-support.js +9 -1
- package/dist/vendor/adapters/stub-direct-provider.js +3 -0
- package/dist/vendor/adapters/verifier-only.d.ts +2 -0
- package/dist/vendor/adapters/verifier-only.js +11 -4
- package/dist/vendor/contracts/index.d.ts +39 -0
- package/dist/vendor/contracts/index.js +2 -0
- package/dist/vendor/core/context-integrity.js +28 -3
- package/dist/vendor/core/grounding.d.ts +1 -0
- package/dist/vendor/core/grounding.js +6 -2
- package/dist/vendor/core/index.d.ts +24 -3
- package/dist/vendor/core/index.js +113 -21
- package/dist/vendor/core/leash.js +85 -8
- package/dist/vendor/core/persistence/index.d.ts +2 -0
- package/dist/vendor/core/persistence/index.js +1 -0
- package/dist/vendor/core/persistence/integrity.d.ts +38 -0
- package/dist/vendor/core/persistence/integrity.js +248 -0
- package/dist/vendor/core/persistence/store.d.ts +7 -0
- package/dist/vendor/core/persistence/store.js +25 -1
- package/dist/vendor/core/policy.d.ts +9 -0
- package/dist/workflow-state.d.ts +9 -0
- package/dist/workflow-state.js +46 -3
- package/package.json +2 -2
- package/server.json +2 -2
|
@@ -11,7 +11,8 @@
|
|
|
11
11
|
*
|
|
12
12
|
* MCP tools and integration tests use the same factories.
|
|
13
13
|
*/
|
|
14
|
-
import { readGitExecutionArtifacts, runSubprocess, runVerification } from "./cli-bridge.js";
|
|
14
|
+
import { readGitExecutionArtifacts, resolveGitRepositoryRoot, runSubprocess, runVerification } from "./cli-bridge.js";
|
|
15
|
+
import { buildCodexExecArgs } from "./codex-launcher.js";
|
|
15
16
|
import { createAdapterCapabilities, normalizeStructuredErrors, normalizeUsage } from "./runtime-support.js";
|
|
16
17
|
// ---------------------------------------------------------------------------
|
|
17
18
|
// Cost estimation
|
|
@@ -31,7 +32,14 @@ const MODEL_PRICING = {
|
|
|
31
32
|
// Keep legacy names working
|
|
32
33
|
"claude-opus": { inputPer1K: 0.015, outputPer1K: 0.075 },
|
|
33
34
|
"claude-sonnet": { inputPer1K: 0.003, outputPer1K: 0.015 },
|
|
34
|
-
"claude-haiku": { inputPer1K: 0.00025, outputPer1K: 0.00125 }
|
|
35
|
+
"claude-haiku": { inputPer1K: 0.00025, outputPer1K: 0.00125 },
|
|
36
|
+
// OpenAI coding models
|
|
37
|
+
"codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
|
|
38
|
+
"gpt-5-codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
|
|
39
|
+
"gpt-5.1-codex": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
|
|
40
|
+
"gpt-5.1-codex-max": { inputPer1K: 0.00125, cachedInputPer1K: 0.000125, outputPer1K: 0.01 },
|
|
41
|
+
"gpt-5.2-codex": { inputPer1K: 0.00175, cachedInputPer1K: 0.000175, outputPer1K: 0.014 },
|
|
42
|
+
"codex-mini-latest": { inputPer1K: 0.0015, cachedInputPer1K: 0.000375, outputPer1K: 0.006 }
|
|
35
43
|
};
|
|
36
44
|
function extractUsage(parsed, modelLabel) {
|
|
37
45
|
if (!parsed?.usage) {
|
|
@@ -42,21 +50,275 @@ function extractUsage(parsed, modelLabel) {
|
|
|
42
50
|
provenance: "unavailable"
|
|
43
51
|
});
|
|
44
52
|
}
|
|
45
|
-
const
|
|
46
|
-
(parsed.usage.cacheReadInputTokens ?? parsed.usage.cache_read_input_tokens ?? 0) +
|
|
53
|
+
const promptTokens = (parsed.usage.inputTokens ?? parsed.usage.input_tokens ?? 0) +
|
|
47
54
|
(parsed.usage.cacheCreationInputTokens ?? parsed.usage.cache_creation_input_tokens ?? 0);
|
|
55
|
+
const cachedInputTokens = parsed.usage.cacheReadInputTokens ?? parsed.usage.cache_read_input_tokens ?? 0;
|
|
56
|
+
const tokensIn = promptTokens + cachedInputTokens;
|
|
48
57
|
const tokensOut = parsed.usage.outputTokens ?? parsed.usage.output_tokens ?? 0;
|
|
49
58
|
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
50
59
|
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
51
|
-
|
|
52
|
-
|
|
60
|
+
// Prefer Claude's own authoritative total_cost_usd (present on the final
|
|
61
|
+
// `result` event in json/stream-json output) over our pricing-table estimate,
|
|
62
|
+
// which can drift from real billed cost (cache discounts, surcharges, etc).
|
|
63
|
+
const hasAuthoritativeCost = typeof parsed.total_cost_usd === "number";
|
|
64
|
+
const actualUsd = hasAuthoritativeCost
|
|
65
|
+
? parsed.total_cost_usd
|
|
66
|
+
: (promptTokens / 1000) * pricing.inputPer1K +
|
|
67
|
+
(cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
|
|
68
|
+
(tokensOut / 1000) * pricing.outputPer1K;
|
|
53
69
|
return normalizeUsage({
|
|
54
70
|
actualUsd: Number(actualUsd.toFixed(6)),
|
|
55
71
|
tokensIn,
|
|
56
72
|
tokensOut,
|
|
57
|
-
|
|
73
|
+
cachedInputTokens,
|
|
74
|
+
provenance: hasAuthoritativeCost ? "actual" : "estimated",
|
|
75
|
+
providerSettlement: {
|
|
76
|
+
providerId: "claude",
|
|
77
|
+
model: modelLabel ?? "claude",
|
|
78
|
+
transport: "cli",
|
|
79
|
+
source: "claude_json",
|
|
80
|
+
inputTokens: promptTokens,
|
|
81
|
+
cachedInputTokens,
|
|
82
|
+
outputTokens: tokensOut,
|
|
83
|
+
rawUsageAvailable: true,
|
|
84
|
+
settledAt: new Date().toISOString()
|
|
85
|
+
}
|
|
58
86
|
});
|
|
59
87
|
}
|
|
88
|
+
function extractCodexJsonlResult(stdout, modelLabel) {
|
|
89
|
+
const events = stdout
|
|
90
|
+
.split(/\r?\n/u)
|
|
91
|
+
.map((line) => line.trim())
|
|
92
|
+
.filter(Boolean)
|
|
93
|
+
.map((line) => {
|
|
94
|
+
try {
|
|
95
|
+
return JSON.parse(line);
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return undefined;
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
.filter((event) => event !== undefined);
|
|
102
|
+
if (events.length === 0) {
|
|
103
|
+
return undefined;
|
|
104
|
+
}
|
|
105
|
+
const latestAgentMessage = [...events]
|
|
106
|
+
.reverse()
|
|
107
|
+
.find((event) => event.type === "item.completed" && event.item?.type === "agent_message");
|
|
108
|
+
const latestTurnCompleted = [...events]
|
|
109
|
+
.reverse()
|
|
110
|
+
.find((event) => event.type === "turn.completed" && event.usage !== undefined);
|
|
111
|
+
const summary = typeof latestAgentMessage?.item?.text === "string" && latestAgentMessage.item.text.trim().length > 0
|
|
112
|
+
? latestAgentMessage.item.text.trim()
|
|
113
|
+
: stdout.trim();
|
|
114
|
+
if (!latestTurnCompleted?.usage) {
|
|
115
|
+
return {
|
|
116
|
+
summary,
|
|
117
|
+
usage: normalizeUsage({
|
|
118
|
+
actualUsd: 0,
|
|
119
|
+
tokensIn: 0,
|
|
120
|
+
tokensOut: 0,
|
|
121
|
+
provenance: "unavailable",
|
|
122
|
+
providerSettlement: {
|
|
123
|
+
providerId: "codex",
|
|
124
|
+
model: modelLabel ?? "codex",
|
|
125
|
+
transport: "cli",
|
|
126
|
+
source: "unavailable",
|
|
127
|
+
inputTokens: 0,
|
|
128
|
+
outputTokens: 0,
|
|
129
|
+
rawUsageAvailable: false,
|
|
130
|
+
settledAt: new Date().toISOString()
|
|
131
|
+
}
|
|
132
|
+
})
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
const promptTokens = latestTurnCompleted.usage.input_tokens ?? 0;
|
|
136
|
+
const cachedInputTokens = latestTurnCompleted.usage.cached_input_tokens ?? 0;
|
|
137
|
+
const outputTokens = latestTurnCompleted.usage.output_tokens ?? 0;
|
|
138
|
+
const reasoningOutputTokens = latestTurnCompleted.usage.reasoning_output_tokens ?? 0;
|
|
139
|
+
const tokensIn = promptTokens + cachedInputTokens;
|
|
140
|
+
const tokensOut = outputTokens + reasoningOutputTokens;
|
|
141
|
+
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
142
|
+
MODEL_PRICING["codex"] ??
|
|
143
|
+
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
144
|
+
const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
|
|
145
|
+
(cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
|
|
146
|
+
(tokensOut / 1000) * pricing.outputPer1K;
|
|
147
|
+
return {
|
|
148
|
+
summary,
|
|
149
|
+
usage: normalizeUsage({
|
|
150
|
+
actualUsd: Number(actualUsd.toFixed(6)),
|
|
151
|
+
tokensIn,
|
|
152
|
+
tokensOut,
|
|
153
|
+
cachedInputTokens,
|
|
154
|
+
reasoningTokensOut: reasoningOutputTokens,
|
|
155
|
+
provenance: "actual",
|
|
156
|
+
providerSettlement: {
|
|
157
|
+
providerId: "codex",
|
|
158
|
+
model: modelLabel ?? "codex",
|
|
159
|
+
transport: "cli",
|
|
160
|
+
source: "codex_jsonl",
|
|
161
|
+
inputTokens: promptTokens,
|
|
162
|
+
cachedInputTokens,
|
|
163
|
+
outputTokens,
|
|
164
|
+
reasoningOutputTokens,
|
|
165
|
+
rawUsageAvailable: true,
|
|
166
|
+
settledAt: new Date().toISOString()
|
|
167
|
+
}
|
|
168
|
+
})
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
function extractGeminiJsonResult(stdout, modelLabel) {
|
|
172
|
+
let parsed;
|
|
173
|
+
try {
|
|
174
|
+
parsed = JSON.parse(stdout);
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
return undefined;
|
|
178
|
+
}
|
|
179
|
+
const summary = typeof parsed.response === "string" && parsed.response.trim().length > 0
|
|
180
|
+
? parsed.response.trim()
|
|
181
|
+
: typeof parsed.error?.message === "string" && parsed.error.message.trim().length > 0
|
|
182
|
+
? parsed.error.message.trim()
|
|
183
|
+
: stdout.trim();
|
|
184
|
+
const promptTokens = parsed.stats?.inputTokens ?? 0;
|
|
185
|
+
const cachedInputTokens = parsed.stats?.cachedReadTokens ?? 0;
|
|
186
|
+
const outputTokens = parsed.stats?.outputTokens ?? 0;
|
|
187
|
+
const reasoningOutputTokens = parsed.stats?.thoughtTokens ?? 0;
|
|
188
|
+
const hasUsage = parsed.stats !== undefined &&
|
|
189
|
+
(promptTokens > 0 || cachedInputTokens > 0 || outputTokens > 0 || reasoningOutputTokens > 0);
|
|
190
|
+
if (!hasUsage) {
|
|
191
|
+
return {
|
|
192
|
+
summary,
|
|
193
|
+
usage: normalizeUsage({
|
|
194
|
+
actualUsd: 0,
|
|
195
|
+
tokensIn: 0,
|
|
196
|
+
tokensOut: 0,
|
|
197
|
+
provenance: "unavailable",
|
|
198
|
+
providerSettlement: {
|
|
199
|
+
providerId: "gemini",
|
|
200
|
+
model: modelLabel ?? "flash",
|
|
201
|
+
transport: "cli",
|
|
202
|
+
source: "unavailable",
|
|
203
|
+
inputTokens: 0,
|
|
204
|
+
outputTokens: 0,
|
|
205
|
+
rawUsageAvailable: false,
|
|
206
|
+
settledAt: new Date().toISOString()
|
|
207
|
+
}
|
|
208
|
+
})
|
|
209
|
+
};
|
|
210
|
+
}
|
|
211
|
+
const tokensIn = promptTokens + cachedInputTokens;
|
|
212
|
+
const tokensOut = outputTokens + reasoningOutputTokens;
|
|
213
|
+
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
214
|
+
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
215
|
+
const actualUsd = (promptTokens / 1000) * pricing.inputPer1K +
|
|
216
|
+
(cachedInputTokens / 1000) * (pricing.cachedInputPer1K ?? pricing.inputPer1K) +
|
|
217
|
+
(tokensOut / 1000) * pricing.outputPer1K;
|
|
218
|
+
return {
|
|
219
|
+
summary,
|
|
220
|
+
usage: normalizeUsage({
|
|
221
|
+
actualUsd: Number(actualUsd.toFixed(6)),
|
|
222
|
+
tokensIn,
|
|
223
|
+
tokensOut,
|
|
224
|
+
cachedInputTokens,
|
|
225
|
+
reasoningTokensOut: reasoningOutputTokens,
|
|
226
|
+
provenance: "actual",
|
|
227
|
+
providerSettlement: {
|
|
228
|
+
providerId: "gemini",
|
|
229
|
+
model: modelLabel ?? "flash",
|
|
230
|
+
transport: "cli",
|
|
231
|
+
source: "gemini_json",
|
|
232
|
+
inputTokens: promptTokens,
|
|
233
|
+
cachedInputTokens,
|
|
234
|
+
outputTokens,
|
|
235
|
+
reasoningOutputTokens,
|
|
236
|
+
rawUsageAvailable: true,
|
|
237
|
+
settledAt: new Date().toISOString()
|
|
238
|
+
}
|
|
239
|
+
})
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
function createStreamingUsageInspector(capUsd, modelLabel) {
|
|
243
|
+
const pricing = (modelLabel ? MODEL_PRICING[modelLabel] : undefined) ??
|
|
244
|
+
{ inputPer1K: BLENDED_INPUT_COST_PER_1K, outputPer1K: BLENDED_OUTPUT_COST_PER_1K };
|
|
245
|
+
let buffer = "";
|
|
246
|
+
let cumulativeUsd = 0;
|
|
247
|
+
let tokensIn = 0;
|
|
248
|
+
let tokensOut = 0;
|
|
249
|
+
let turns = 0;
|
|
250
|
+
let finalResult;
|
|
251
|
+
const ingestLine = (line, terminate) => {
|
|
252
|
+
const trimmed = line.trim();
|
|
253
|
+
if (!trimmed) {
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
let event;
|
|
257
|
+
try {
|
|
258
|
+
event = JSON.parse(trimmed);
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
if (event.type === "assistant" && event.message?.usage) {
|
|
264
|
+
const usage = event.message.usage;
|
|
265
|
+
const turnTokensIn = (usage.input_tokens ?? usage.inputTokens ?? 0) +
|
|
266
|
+
(usage.cache_read_input_tokens ?? usage.cacheReadInputTokens ?? 0) +
|
|
267
|
+
(usage.cache_creation_input_tokens ?? usage.cacheCreationInputTokens ?? 0);
|
|
268
|
+
const turnTokensOut = usage.output_tokens ?? usage.outputTokens ?? 0;
|
|
269
|
+
tokensIn += turnTokensIn;
|
|
270
|
+
tokensOut += turnTokensOut;
|
|
271
|
+
turns += 1;
|
|
272
|
+
cumulativeUsd += (turnTokensIn / 1000) * pricing.inputPer1K + (turnTokensOut / 1000) * pricing.outputPer1K;
|
|
273
|
+
if (capUsd > 0 && cumulativeUsd > capUsd) {
|
|
274
|
+
terminate(`Streaming usage cap exceeded after ${String(turns)} turn(s): cumulative cost ~$${cumulativeUsd.toFixed(4)} ` +
|
|
275
|
+
`surpassed the per-attempt cap $${capUsd.toFixed(4)} (derived from remaining loop budget). ` +
|
|
276
|
+
`Subprocess terminated to bound runaway overspend.`);
|
|
277
|
+
}
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
if (event.type === "result") {
|
|
281
|
+
finalResult = event;
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
return {
|
|
285
|
+
onChunk: (chunk, terminate) => {
|
|
286
|
+
buffer += chunk.toString("utf8");
|
|
287
|
+
let newlineIndex = buffer.indexOf("\n");
|
|
288
|
+
while (newlineIndex !== -1) {
|
|
289
|
+
const line = buffer.slice(0, newlineIndex);
|
|
290
|
+
buffer = buffer.slice(newlineIndex + 1);
|
|
291
|
+
ingestLine(line, terminate);
|
|
292
|
+
newlineIndex = buffer.indexOf("\n");
|
|
293
|
+
}
|
|
294
|
+
},
|
|
295
|
+
snapshot: () => ({ cumulativeUsd, tokensIn, tokensOut, turns, ...(finalResult ? { finalResult } : {}) })
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Parses Claude's `stream-json` output (one JSON object per line) and returns
|
|
300
|
+
* the final `result` event, which carries the same `result`/`usage`/
|
|
301
|
+
* `total_cost_usd` fields as the single-blob `json` format.
|
|
302
|
+
*/
|
|
303
|
+
function parseStreamJsonResult(stdout) {
|
|
304
|
+
let lastResult;
|
|
305
|
+
for (const rawLine of stdout.split(/\r?\n/u)) {
|
|
306
|
+
const line = rawLine.trim();
|
|
307
|
+
if (!line) {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
try {
|
|
311
|
+
const event = JSON.parse(line);
|
|
312
|
+
if (event.type === "result") {
|
|
313
|
+
lastResult = event;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
// Ignore non-JSON / partial lines.
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
return lastResult;
|
|
321
|
+
}
|
|
60
322
|
// ---------------------------------------------------------------------------
|
|
61
323
|
// Structural failure hint detection
|
|
62
324
|
//
|
|
@@ -90,6 +352,7 @@ export function createAgentCliAdapter(options) {
|
|
|
90
352
|
const verifyTimeoutMs = options.verifyTimeoutMs ?? 60_000;
|
|
91
353
|
const adapterId = `agent-cli:${options.adapterIdSuffix ?? options.command}`;
|
|
92
354
|
const supportsJsonOutput = options.supportsJsonOutput === true;
|
|
355
|
+
const supportsUsageSettlement = supportsJsonOutput || options.command === "codex" || options.command === "gemini";
|
|
93
356
|
const adapter = {
|
|
94
357
|
adapterId,
|
|
95
358
|
kind: "agent-cli",
|
|
@@ -100,10 +363,10 @@ export function createAgentCliAdapter(options) {
|
|
|
100
363
|
transport: "cli",
|
|
101
364
|
capabilities: createAdapterCapabilities({
|
|
102
365
|
preflight: true,
|
|
103
|
-
usageSettlement:
|
|
366
|
+
usageSettlement: supportsUsageSettlement,
|
|
104
367
|
diffArtifacts: true,
|
|
105
368
|
structuredErrors: true,
|
|
106
|
-
cachingSignals:
|
|
369
|
+
cachingSignals: supportsUsageSettlement
|
|
107
370
|
})
|
|
108
371
|
},
|
|
109
372
|
async execute(request) {
|
|
@@ -130,12 +393,45 @@ export function createAgentCliAdapter(options) {
|
|
|
130
393
|
}
|
|
131
394
|
const args = options.argsBuilder(prompt);
|
|
132
395
|
const stdinData = options.stdinBuilder?.(prompt);
|
|
396
|
+
// Live cumulative-cost circuit breaker: a single attempt should never be
|
|
397
|
+
// allowed to spend more than the loop has left. `--output-format json`
|
|
398
|
+
// only reports usage once the process exits, so for `stream-json` we
|
|
399
|
+
// watch per-turn usage events as they arrive and kill the subprocess the
|
|
400
|
+
// instant projected spend crosses what remains — bounding the worst case
|
|
401
|
+
// to roughly one turn's overshoot rather than the entire runaway session.
|
|
402
|
+
const streamingUsage = options.streamingUsageCap && request.context.remainingBudgetUsd > 0
|
|
403
|
+
? createStreamingUsageInspector(request.context.remainingBudgetUsd, options.model ?? options.command)
|
|
404
|
+
: undefined;
|
|
133
405
|
const agentResult = await runSubprocess(options.command, args, {
|
|
134
406
|
cwd: workingDirectory,
|
|
135
407
|
timeoutMs,
|
|
136
408
|
spawnImpl: options.spawnImpl,
|
|
137
|
-
...(stdinData === undefined ? {} : { stdinData })
|
|
409
|
+
...(stdinData === undefined ? {} : { stdinData }),
|
|
410
|
+
...(streamingUsage ? { onStdoutChunk: streamingUsage.onChunk } : {})
|
|
138
411
|
});
|
|
412
|
+
if (agentResult.terminationReason) {
|
|
413
|
+
const snapshot = streamingUsage?.snapshot();
|
|
414
|
+
const cumulativeUsd = snapshot?.cumulativeUsd ?? 0;
|
|
415
|
+
return {
|
|
416
|
+
status: "failed",
|
|
417
|
+
summary: `${options.command} subprocess terminated mid-run by the budget circuit breaker. ${agentResult.terminationReason}`,
|
|
418
|
+
usage: normalizeUsage({
|
|
419
|
+
actualUsd: Number(cumulativeUsd.toFixed(6)),
|
|
420
|
+
estimatedUsd: Number(cumulativeUsd.toFixed(6)),
|
|
421
|
+
tokensIn: snapshot?.tokensIn ?? 0,
|
|
422
|
+
tokensOut: snapshot?.tokensOut ?? 0,
|
|
423
|
+
provenance: "estimated"
|
|
424
|
+
}),
|
|
425
|
+
verification: {
|
|
426
|
+
passed: false,
|
|
427
|
+
summary: "Subprocess terminated by the streaming budget circuit breaker before verification could run."
|
|
428
|
+
},
|
|
429
|
+
failure: {
|
|
430
|
+
message: agentResult.terminationReason,
|
|
431
|
+
classHint: "budget_pressure"
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
}
|
|
139
435
|
if (agentResult.timedOut) {
|
|
140
436
|
return {
|
|
141
437
|
status: "failed",
|
|
@@ -170,45 +466,108 @@ export function createAgentCliAdapter(options) {
|
|
|
170
466
|
}
|
|
171
467
|
};
|
|
172
468
|
}
|
|
173
|
-
// Parse JSON output if the CLI supports it
|
|
469
|
+
// Parse JSON output if the CLI supports it. `stream-json` emits one JSON
|
|
470
|
+
// object per line — the final `result` event carries the same
|
|
471
|
+
// `result`/`usage`/`total_cost_usd` fields as single-blob `json` output.
|
|
174
472
|
let parsed;
|
|
175
473
|
if (supportsJsonOutput) {
|
|
176
474
|
try {
|
|
177
|
-
parsed =
|
|
475
|
+
parsed = options.streamingUsageCap
|
|
476
|
+
? parseStreamJsonResult(agentResult.stdout)
|
|
477
|
+
: JSON.parse(agentResult.stdout);
|
|
178
478
|
}
|
|
179
479
|
catch {
|
|
180
480
|
// Fall through to plain-text handling
|
|
181
481
|
}
|
|
182
482
|
}
|
|
183
|
-
const
|
|
483
|
+
const codexJsonlResult = !supportsJsonOutput && options.command === "codex"
|
|
484
|
+
? extractCodexJsonlResult(agentResult.stdout, options.model)
|
|
485
|
+
: undefined;
|
|
486
|
+
const geminiJsonResult = !supportsJsonOutput && options.command === "gemini"
|
|
487
|
+
? extractGeminiJsonResult(agentResult.stdout, options.model)
|
|
488
|
+
: undefined;
|
|
489
|
+
const producedStructuredCompletion = parsed?.result !== undefined ||
|
|
490
|
+
codexJsonlResult !== undefined ||
|
|
491
|
+
geminiJsonResult !== undefined;
|
|
492
|
+
if (agentResult.exitCode !== 0 && !producedStructuredCompletion) {
|
|
493
|
+
const failureMessage = formatPreVerifierSubprocessFailure(options.command, agentResult.stderr || agentResult.stdout, agentResult.exitCode);
|
|
494
|
+
return {
|
|
495
|
+
status: "failed",
|
|
496
|
+
summary: `${options.command} subprocess exited before verifier execution.`,
|
|
497
|
+
usage: normalizeUsage({
|
|
498
|
+
actualUsd: 0,
|
|
499
|
+
tokensIn: 0,
|
|
500
|
+
tokensOut: 0,
|
|
501
|
+
provenance: "unavailable"
|
|
502
|
+
}),
|
|
503
|
+
verification: { passed: false, summary: `Verifier not run: ${failureMessage}` },
|
|
504
|
+
failure: {
|
|
505
|
+
message: failureMessage
|
|
506
|
+
}
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
const agentText = codexJsonlResult?.summary ??
|
|
510
|
+
geminiJsonResult?.summary ??
|
|
511
|
+
parsed?.result ??
|
|
512
|
+
agentResult.stdout.trim();
|
|
184
513
|
const summary = truncate(agentText, 2000);
|
|
185
514
|
const usage = parsed?.usage
|
|
186
515
|
? extractUsage(parsed, options.model)
|
|
187
|
-
:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
516
|
+
: codexJsonlResult?.usage ??
|
|
517
|
+
geminiJsonResult?.usage ??
|
|
518
|
+
normalizeUsage({
|
|
519
|
+
actualUsd: estimatedUsage.actualUsd,
|
|
520
|
+
estimatedUsd: estimatedUsage.actualUsd,
|
|
521
|
+
tokensIn: estimatedUsage.tokensIn,
|
|
522
|
+
tokensOut: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
|
|
523
|
+
provenance: "estimated",
|
|
524
|
+
providerSettlement: options.command === "codex"
|
|
525
|
+
? {
|
|
526
|
+
providerId: "codex",
|
|
527
|
+
model: options.model ?? "codex",
|
|
528
|
+
transport: "cli",
|
|
529
|
+
source: "estimated_fallback",
|
|
530
|
+
inputTokens: estimatedUsage.tokensIn,
|
|
531
|
+
outputTokens: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
|
|
532
|
+
rawUsageAvailable: false,
|
|
533
|
+
settledAt: new Date().toISOString()
|
|
534
|
+
}
|
|
535
|
+
: options.command === "gemini"
|
|
536
|
+
? {
|
|
537
|
+
providerId: "gemini",
|
|
538
|
+
model: options.model ?? "flash",
|
|
539
|
+
transport: "cli",
|
|
540
|
+
source: "estimated_fallback",
|
|
541
|
+
inputTokens: estimatedUsage.tokensIn,
|
|
542
|
+
outputTokens: Math.max(estimatedUsage.tokensOut, Math.ceil(agentText.length / 4)),
|
|
543
|
+
rawUsageAvailable: false,
|
|
544
|
+
settledAt: new Date().toISOString()
|
|
545
|
+
}
|
|
546
|
+
: undefined
|
|
547
|
+
});
|
|
194
548
|
const verificationStack = request.context.verificationStack;
|
|
195
549
|
const verification = await runVerification(request.context.verificationPlan, workingDirectory, verifyTimeoutMs, verificationStack, options.spawnImpl);
|
|
196
550
|
// Check for zero-diff (agent ran but made no file changes)
|
|
197
551
|
const repoRoot = request.context.repoRoot;
|
|
552
|
+
const gitRepoRoot = repoRoot ? resolveGitRepositoryRoot(repoRoot) : undefined;
|
|
198
553
|
let noDiff = false;
|
|
199
|
-
if (
|
|
200
|
-
noDiff = await checkNoDiff(
|
|
554
|
+
if (gitRepoRoot) {
|
|
555
|
+
noDiff = await checkNoDiff(gitRepoRoot, options.spawnImpl);
|
|
201
556
|
}
|
|
202
557
|
// Extract structured errors from stderr/stdout for better failure context
|
|
203
558
|
const structuredErrors = normalizeStructuredErrors(extractStructuredErrors(agentResult.stderr, agentResult.stdout));
|
|
204
|
-
const executionArtifacts =
|
|
205
|
-
? await readGitExecutionArtifacts(
|
|
559
|
+
const executionArtifacts = gitRepoRoot
|
|
560
|
+
? await readGitExecutionArtifacts(gitRepoRoot, 5000, options.spawnImpl)
|
|
206
561
|
: undefined;
|
|
207
562
|
// Scope contract enforcement: check touched files against allowedPaths/deniedPaths
|
|
208
563
|
let scopeViolations = [];
|
|
209
564
|
const scopeCtx = request.context;
|
|
210
|
-
if (
|
|
211
|
-
const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
|
|
565
|
+
if (gitRepoRoot && (scopeCtx.allowedPaths?.length || scopeCtx.deniedPaths?.length)) {
|
|
566
|
+
const diffResult = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
|
|
567
|
+
cwd: gitRepoRoot,
|
|
568
|
+
timeoutMs: 5000,
|
|
569
|
+
spawnImpl: options.spawnImpl
|
|
570
|
+
});
|
|
212
571
|
if (diffResult.exitCode === 0 && diffResult.stdout.trim()) {
|
|
213
572
|
const touchedFiles = diffResult.stdout.trim().split("\n").filter(Boolean);
|
|
214
573
|
const allowed = scopeCtx.allowedPaths ?? [];
|
|
@@ -278,7 +637,12 @@ export function createAgentCliAdapter(options) {
|
|
|
278
637
|
}
|
|
279
638
|
// Reset tracked files to HEAD so next attempt starts from clean state
|
|
280
639
|
try {
|
|
281
|
-
|
|
640
|
+
if (gitRepoRoot) {
|
|
641
|
+
await runSubprocess("git", ["restore", "--staged", "--worktree", "."], {
|
|
642
|
+
cwd: gitRepoRoot,
|
|
643
|
+
timeoutMs: 5000
|
|
644
|
+
});
|
|
645
|
+
}
|
|
282
646
|
}
|
|
283
647
|
catch {
|
|
284
648
|
// Non-fatal
|
|
@@ -326,10 +690,16 @@ export function createAgentCliAdapter(options) {
|
|
|
326
690
|
// Pre-configured: Claude CLI
|
|
327
691
|
// ---------------------------------------------------------------------------
|
|
328
692
|
/**
|
|
329
|
-
* Spawns `claude --output-format json --print "<prompt>"
|
|
693
|
+
* Spawns `claude --output-format stream-json --verbose --print "<prompt>" [extraArgs]`.
|
|
330
694
|
*
|
|
331
|
-
*
|
|
332
|
-
*
|
|
695
|
+
* `stream-json` emits one JSON event per line — including per-turn usage on
|
|
696
|
+
* each `assistant` message and a final `result` event carrying the same
|
|
697
|
+
* `result`/`usage`/`total_cost_usd` fields as single-blob `json` output — so
|
|
698
|
+
* MartinLoop can both (a) recover real token usage/cost as before, and
|
|
699
|
+
* (b) watch cumulative spend live and self-terminate the subprocess the
|
|
700
|
+
* moment it crosses the remaining per-attempt budget (see
|
|
701
|
+
* `streamingUsageCap` / `createStreamingUsageInspector`), instead of only
|
|
702
|
+
* discovering an overspend after the whole process has already exited.
|
|
333
703
|
*
|
|
334
704
|
* Requires the Claude Code CLI to be installed and authenticated:
|
|
335
705
|
* https://docs.anthropic.com/claude-code
|
|
@@ -346,10 +716,12 @@ export function createClaudeCliAdapter(options = {}) {
|
|
|
346
716
|
timeoutMs: options.timeoutMs,
|
|
347
717
|
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
348
718
|
supportsJsonOutput: true,
|
|
719
|
+
streamingUsageCap: true,
|
|
349
720
|
spawnImpl: options.spawnImpl,
|
|
350
721
|
argsBuilder: (_prompt) => [
|
|
351
722
|
"--output-format",
|
|
352
|
-
"json",
|
|
723
|
+
"stream-json",
|
|
724
|
+
"--verbose",
|
|
353
725
|
"--print",
|
|
354
726
|
"--dangerously-skip-permissions",
|
|
355
727
|
...modelArgs,
|
|
@@ -372,12 +744,12 @@ export function createClaudeCliAdapter(options = {}) {
|
|
|
372
744
|
* npm install -g @openai/codex
|
|
373
745
|
*/
|
|
374
746
|
export function createCodexCliAdapter(options = {}) {
|
|
375
|
-
const modelArgs = options.model ? ["--model", options.model] : [];
|
|
376
747
|
const extraArgs = options.extraArgs ?? [];
|
|
377
748
|
const sandbox = options.sandbox ?? "workspace-write";
|
|
378
749
|
const workingDirectory = options.workingDirectory ?? process.cwd();
|
|
750
|
+
const command = options.command ?? "codex";
|
|
379
751
|
return createAgentCliAdapter({
|
|
380
|
-
command
|
|
752
|
+
command,
|
|
381
753
|
adapterIdSuffix: "codex",
|
|
382
754
|
model: options.model ?? "codex",
|
|
383
755
|
label: options.label ?? "Codex CLI adapter",
|
|
@@ -386,17 +758,53 @@ export function createCodexCliAdapter(options = {}) {
|
|
|
386
758
|
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
387
759
|
supportsJsonOutput: false,
|
|
388
760
|
spawnImpl: options.spawnImpl,
|
|
389
|
-
argsBuilder: () =>
|
|
390
|
-
"exec",
|
|
391
|
-
"--cd",
|
|
761
|
+
argsBuilder: () => buildCodexExecArgs({
|
|
392
762
|
workingDirectory,
|
|
393
|
-
"--sandbox",
|
|
394
763
|
sandbox,
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
764
|
+
...(options.model ? { model: options.model } : {}),
|
|
765
|
+
extraArgs,
|
|
766
|
+
mode: "prompt"
|
|
767
|
+
}),
|
|
768
|
+
stdinBuilder: (prompt) => prompt
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
// ---------------------------------------------------------------------------
|
|
772
|
+
// Pre-configured: Gemini CLI
|
|
773
|
+
// ---------------------------------------------------------------------------
|
|
774
|
+
/**
|
|
775
|
+
* Spawns `gemini --model <model> --prompt "" --approval-mode <mode> --output-format json [...]`.
|
|
776
|
+
*
|
|
777
|
+
* The prompt is delivered via stdin while forcing headless mode with `--prompt ""`,
|
|
778
|
+
* which keeps large MartinLoop prompts off the command line on Windows.
|
|
779
|
+
*
|
|
780
|
+
* Requires the Gemini CLI to be installed and authenticated:
|
|
781
|
+
* npm install -g @google/gemini-cli
|
|
782
|
+
*/
|
|
783
|
+
export function createGeminiCliAdapter(options = {}) {
|
|
784
|
+
const model = options.model ?? "flash";
|
|
785
|
+
const approvalMode = options.approvalMode ?? "yolo";
|
|
786
|
+
const extraArgs = options.extraArgs ?? [];
|
|
787
|
+
return createAgentCliAdapter({
|
|
788
|
+
command: "gemini",
|
|
789
|
+
adapterIdSuffix: "gemini",
|
|
790
|
+
model,
|
|
791
|
+
label: options.label ?? "Gemini CLI adapter",
|
|
792
|
+
workingDirectory: options.workingDirectory,
|
|
793
|
+
timeoutMs: options.timeoutMs,
|
|
794
|
+
verifyTimeoutMs: options.verifyTimeoutMs,
|
|
795
|
+
supportsJsonOutput: false,
|
|
796
|
+
spawnImpl: options.spawnImpl,
|
|
797
|
+
argsBuilder: () => [
|
|
798
|
+
"--model",
|
|
799
|
+
model,
|
|
800
|
+
"--prompt",
|
|
801
|
+
"",
|
|
802
|
+
"--approval-mode",
|
|
803
|
+
approvalMode,
|
|
804
|
+
...(options.sandbox ? ["--sandbox"] : []),
|
|
805
|
+
"--output-format",
|
|
806
|
+
"json",
|
|
807
|
+
...extraArgs
|
|
400
808
|
],
|
|
401
809
|
stdinBuilder: (prompt) => prompt
|
|
402
810
|
});
|
|
@@ -559,7 +967,15 @@ function redactSecretsForPrompt(input) {
|
|
|
559
967
|
return input
|
|
560
968
|
.replace(/\bOPENAI_API_KEY\s*=\s*[^\s"'`]+/giu, "OPENAI_API_KEY=[REDACTED_SECRET]")
|
|
561
969
|
.replace(/\bsk-[A-Za-z0-9_-]{8,}\b/gu, "[REDACTED_SECRET]")
|
|
562
|
-
.replace(/\bghp_[A-Za-z0-9_]{
|
|
970
|
+
.replace(/\bghp_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
|
|
971
|
+
.replace(/\bgithub_pat_[A-Za-z0-9_]{20,}\b/gu, "[REDACTED_SECRET]")
|
|
972
|
+
.replace(/\b(?:gho|ghu|ghs|ghr)_[A-Za-z0-9_]{16,}\b/gu, "[REDACTED_SECRET]")
|
|
973
|
+
.replace(/\bAKIA[0-9A-Z]{16}\b/gu, "[REDACTED_SECRET]")
|
|
974
|
+
.replace(/\b(?:aws_secret_access_key|AWS_SECRET_ACCESS_KEY)\s*[:=]\s*[^\s"'`]+/giu, "AWS_SECRET_ACCESS_KEY=[REDACTED_SECRET]")
|
|
975
|
+
.replace(/\bxox[baprs]-[A-Za-z0-9-]{10,}\b/giu, "[REDACTED_SECRET]")
|
|
976
|
+
.replace(/\bAIza[0-9A-Za-z_-]{30,}\b/gu, "[REDACTED_SECRET]")
|
|
977
|
+
.replace(/-----BEGIN(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----[\s\S]*?-----END(?:\s+[A-Z0-9]+)*\s+PRIVATE KEY-----/gu, "[REDACTED_SECRET]")
|
|
978
|
+
.replace(/\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\b/gu, "[REDACTED_SECRET]")
|
|
563
979
|
.replace(/\B\.env(?!\.example\b)(?:\.[A-Za-z0-9._-]+)?\b/giu, "[REDACTED_PATH]");
|
|
564
980
|
}
|
|
565
981
|
function extractStructuredErrors(stderr, stdout) {
|
|
@@ -579,7 +995,11 @@ function extractStructuredErrors(stderr, stdout) {
|
|
|
579
995
|
}
|
|
580
996
|
return errors.slice(0, 10); // cap at 10 to avoid bloating prompts
|
|
581
997
|
}
|
|
582
|
-
async function checkNoDiff(repoRoot) {
|
|
583
|
-
const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
|
|
998
|
+
async function checkNoDiff(repoRoot, spawnImpl) {
|
|
999
|
+
const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], {
|
|
1000
|
+
cwd: repoRoot,
|
|
1001
|
+
timeoutMs: 5000,
|
|
1002
|
+
spawnImpl
|
|
1003
|
+
});
|
|
584
1004
|
return result.exitCode === 0 && result.stdout.trim().length === 0;
|
|
585
1005
|
}
|