@martinloop/mcp 0.1.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -135
- package/dist/discovery-metadata.d.ts +16 -0
- package/dist/discovery-metadata.js +62 -0
- package/dist/discovery-support.d.ts +62 -0
- package/dist/discovery-support.js +224 -0
- package/dist/package-version.d.ts +1 -0
- package/dist/package-version.js +3 -0
- package/dist/prompts.d.ts +13 -0
- package/dist/prompts.js +455 -0
- package/dist/resources.d.ts +29 -0
- package/dist/resources.js +575 -0
- package/dist/server-validation.d.ts +2 -3
- package/dist/server-validation.js +295 -87
- package/dist/server.d.ts +76 -7
- package/dist/server.js +1135 -247
- package/dist/tools/doctor.js +14 -6
- package/dist/tools/get-attempt.d.ts +15 -0
- package/dist/tools/get-attempt.js +15 -0
- package/dist/tools/get-run.d.ts +24 -0
- package/dist/tools/get-run.js +23 -0
- package/dist/tools/get-status.d.ts +11 -0
- package/dist/tools/get-status.js +12 -2
- package/dist/tools/get-verification-results.d.ts +14 -0
- package/dist/tools/get-verification-results.js +14 -0
- package/dist/tools/inspect-loop.d.ts +9 -0
- package/dist/tools/inspect-loop.js +11 -2
- package/dist/tools/list-runs.d.ts +29 -0
- package/dist/tools/list-runs.js +24 -0
- package/dist/tools/preflight.js +7 -2
- package/dist/tools/run-dossier.d.ts +41 -0
- package/dist/tools/run-dossier.js +41 -0
- package/dist/tools/run-loop.d.ts +19 -0
- package/dist/tools/run-loop.js +41 -3
- package/dist/tools/run-store.d.ts +57 -3
- package/dist/tools/run-store.js +404 -53
- package/dist/tools/tool-errors.d.ts +37 -0
- package/dist/tools/tool-errors.js +170 -0
- package/dist/tools/tool-response.d.ts +16 -0
- package/dist/tools/tool-response.js +34 -0
- package/dist/tools/tool-support.d.ts +92 -2
- package/dist/tools/tool-support.js +358 -63
- package/dist/tools/triage-runs.d.ts +33 -0
- package/dist/tools/triage-runs.js +138 -0
- package/dist/vendor/adapters/claude-cli.js +0 -1
- package/dist/vendor/adapters/cli-bridge.js +0 -1
- package/dist/vendor/adapters/direct-provider.js +0 -1
- package/dist/vendor/adapters/index.js +0 -1
- package/dist/vendor/adapters/runtime-support.js +0 -1
- package/dist/vendor/adapters/stub-agent-cli.js +0 -1
- package/dist/vendor/adapters/stub-direct-provider.js +0 -1
- package/dist/vendor/adapters/verifier-only.js +0 -1
- package/dist/vendor/contracts/governance.js +0 -1
- package/dist/vendor/contracts/index.d.ts +2 -0
- package/dist/vendor/contracts/index.js +1 -1
- package/dist/vendor/contracts/operator.d.ts +19 -0
- package/dist/vendor/contracts/operator.js +11 -0
- package/dist/vendor/core/compiler.js +0 -1
- package/dist/vendor/core/context-integrity.js +0 -1
- package/dist/vendor/core/grounding.js +0 -1
- package/dist/vendor/core/index.js +1 -2
- package/dist/vendor/core/leash.js +19 -12
- package/dist/vendor/core/persistence/compiler.js +0 -1
- package/dist/vendor/core/persistence/index.js +0 -1
- package/dist/vendor/core/persistence/ledger.js +0 -1
- package/dist/vendor/core/persistence/runs-reader.js +0 -1
- package/dist/vendor/core/persistence/store.js +0 -1
- package/dist/vendor/core/policy.js +0 -1
- package/dist/vendor/core/red-blue/red-phase.d.ts +64 -0
- package/dist/vendor/core/red-blue/red-phase.js +135 -0
- package/dist/vendor/core/red-blue/risk-tiers.d.ts +22 -0
- package/dist/vendor/core/red-blue/risk-tiers.js +32 -0
- package/dist/vendor/core/rollback.js +2 -3
- package/package.json +10 -3
- package/server.json +2 -2
|
@@ -1,17 +1,30 @@
|
|
|
1
1
|
import { spawnSync } from "node:child_process";
|
|
2
2
|
import { readdir, stat } from "node:fs/promises";
|
|
3
3
|
import { join } from "node:path";
|
|
4
|
-
import { evaluateCostGovernor,
|
|
4
|
+
import { evaluateCostGovernor, resolveRunsRoot } from "../vendor/core/index.js";
|
|
5
|
+
import { readAllLoopRecordsSafely } from "./run-store.js";
|
|
6
|
+
const CLI_CACHE_TTL_MS = 60_000;
|
|
7
|
+
const RUN_STORE_CACHE_TTL_MS = 5_000;
|
|
8
|
+
const cliAvailabilityCache = new Map();
|
|
9
|
+
const runStoreInspectionCache = new Map();
|
|
5
10
|
export function resolveExecutionMode() {
|
|
6
11
|
const liveMode = process.env.MARTIN_LIVE !== "false";
|
|
7
12
|
return {
|
|
8
13
|
liveMode,
|
|
9
|
-
mode: liveMode ? "live" : "stub"
|
|
14
|
+
mode: liveMode ? "live" : "stub",
|
|
15
|
+
detail: liveMode
|
|
16
|
+
? "Live CLI execution is enabled."
|
|
17
|
+
: "Stub mode is active because MARTIN_LIVE=false."
|
|
10
18
|
};
|
|
11
19
|
}
|
|
12
|
-
export function
|
|
20
|
+
export function detectCliAvailability(command) {
|
|
21
|
+
const cacheKey = `${process.platform}:${command}`;
|
|
22
|
+
const cached = cliAvailabilityCache.get(cacheKey);
|
|
23
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
24
|
+
return cached.value;
|
|
25
|
+
}
|
|
13
26
|
const locator = process.platform === "win32" ? "where.exe" : "which";
|
|
14
|
-
const result = spawnSync(locator, [
|
|
27
|
+
const result = spawnSync(locator, [command], {
|
|
15
28
|
encoding: "utf8",
|
|
16
29
|
stdio: ["ignore", "pipe", "pipe"]
|
|
17
30
|
});
|
|
@@ -21,21 +34,168 @@ export function getEngineAvailability(engine) {
|
|
|
21
34
|
.map((line) => line.trim())
|
|
22
35
|
.find(Boolean)
|
|
23
36
|
: undefined;
|
|
24
|
-
|
|
37
|
+
const value = result.status === 0
|
|
25
38
|
? {
|
|
39
|
+
command,
|
|
26
40
|
available: true,
|
|
27
|
-
|
|
41
|
+
locator,
|
|
42
|
+
detail: `${command} is available on PATH.`,
|
|
28
43
|
...(resolvedPath ? { resolvedPath } : {})
|
|
29
44
|
}
|
|
30
45
|
: {
|
|
46
|
+
command,
|
|
31
47
|
available: false,
|
|
32
|
-
|
|
48
|
+
locator,
|
|
49
|
+
detail: `${command} is not available on PATH.`
|
|
33
50
|
};
|
|
51
|
+
cliAvailabilityCache.set(cacheKey, {
|
|
52
|
+
expiresAt: Date.now() + CLI_CACHE_TTL_MS,
|
|
53
|
+
value
|
|
54
|
+
});
|
|
55
|
+
return value;
|
|
56
|
+
}
|
|
57
|
+
export function getEngineAvailability(engine) {
|
|
58
|
+
return detectCliAvailability(engine);
|
|
34
59
|
}
|
|
35
60
|
export function formatUsd(value) {
|
|
36
61
|
return `$${value.toFixed(2)}`;
|
|
37
62
|
}
|
|
63
|
+
export function buildLoopPreview(loop) {
|
|
64
|
+
const costState = evaluateCostGovernor({
|
|
65
|
+
budget: loop.budget,
|
|
66
|
+
cost: {
|
|
67
|
+
actualUsd: loop.cost.actualUsd,
|
|
68
|
+
avoidedUsd: loop.cost.avoidedUsd ?? 0,
|
|
69
|
+
tokensIn: loop.cost.tokensIn,
|
|
70
|
+
tokensOut: loop.cost.tokensOut
|
|
71
|
+
},
|
|
72
|
+
attemptsUsed: loop.attempts.length
|
|
73
|
+
});
|
|
74
|
+
const lastAttempt = loop.attempts.at(-1);
|
|
75
|
+
return {
|
|
76
|
+
loopId: loop.loopId,
|
|
77
|
+
title: loop.task?.title ?? loop.loopId,
|
|
78
|
+
objective: loop.task?.objective ?? "Loop record summary",
|
|
79
|
+
status: loop.status,
|
|
80
|
+
lifecycleState: loop.lifecycleState,
|
|
81
|
+
...(loop.createdAt ? { createdAt: loop.createdAt } : {}),
|
|
82
|
+
...(loop.updatedAt ? { updatedAt: loop.updatedAt } : {}),
|
|
83
|
+
attempts: loop.attempts.length,
|
|
84
|
+
costUsd: loop.cost.actualUsd,
|
|
85
|
+
avoidedUsd: loop.cost.avoidedUsd ?? 0,
|
|
86
|
+
pressure: costState.pressure,
|
|
87
|
+
shouldStop: costState.shouldStop,
|
|
88
|
+
remainingBudgetUsd: costState.remainingBudgetUsd,
|
|
89
|
+
remainingIterations: costState.remainingIterations,
|
|
90
|
+
remainingTokens: costState.remainingTokens,
|
|
91
|
+
...(lastAttempt ? { lastAttempt: buildAttemptSummary(lastAttempt) } : {})
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
export function buildAttemptSummary(attempt, artifacts) {
|
|
95
|
+
return {
|
|
96
|
+
index: attempt.index,
|
|
97
|
+
...(attempt.attemptId ? { attemptId: attempt.attemptId } : {}),
|
|
98
|
+
...(attempt.adapterId ? { adapterId: attempt.adapterId } : {}),
|
|
99
|
+
...(attempt.model ? { model: attempt.model } : {}),
|
|
100
|
+
...(attempt.failureClass ? { failureClass: attempt.failureClass } : {}),
|
|
101
|
+
...(attempt.intervention ? { intervention: attempt.intervention } : {}),
|
|
102
|
+
...(attempt.startedAt ? { startedAt: attempt.startedAt } : {}),
|
|
103
|
+
...(attempt.completedAt ? { completedAt: attempt.completedAt } : {}),
|
|
104
|
+
...(attempt.summary ? { summary: attempt.summary } : {}),
|
|
105
|
+
...(artifacts ? { artifacts } : {})
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
export function buildArtifactSummary(loop) {
|
|
109
|
+
const artifacts = loop.artifacts ?? [];
|
|
110
|
+
const kinds = artifacts.reduce((accumulator, artifact) => {
|
|
111
|
+
accumulator[artifact.kind] = (accumulator[artifact.kind] ?? 0) + 1;
|
|
112
|
+
return accumulator;
|
|
113
|
+
}, {});
|
|
114
|
+
return {
|
|
115
|
+
totalCount: artifacts.length,
|
|
116
|
+
kinds,
|
|
117
|
+
highlights: artifacts.slice(0, 5).map((artifact) => ({
|
|
118
|
+
artifactId: artifact.artifactId,
|
|
119
|
+
kind: artifact.kind,
|
|
120
|
+
label: artifact.label,
|
|
121
|
+
uri: artifact.uri
|
|
122
|
+
}))
|
|
123
|
+
};
|
|
124
|
+
}
|
|
125
|
+
export function buildVerificationSummary(loop, ledgerEvents = []) {
|
|
126
|
+
const verificationEvents = (loop.events ?? []).filter((event) => event.type === "verification.completed");
|
|
127
|
+
const verificationLedgerEvents = ledgerEvents.filter((event) => event.kind === "verification.completed");
|
|
128
|
+
const warnings = [];
|
|
129
|
+
const ledgerWarnings = getLedgerWarnings(ledgerEvents);
|
|
130
|
+
warnings.push(...ledgerWarnings);
|
|
131
|
+
if (verificationEvents.length === 0) {
|
|
132
|
+
warnings.push(verificationLedgerEvents.length > 0
|
|
133
|
+
? "No verification.completed events were found in the loop record; using ledger evidence."
|
|
134
|
+
: "No verification.completed events were found in the loop record.");
|
|
135
|
+
}
|
|
136
|
+
if (verificationLedgerEvents.length === 0 && ledgerWarnings.length === 0) {
|
|
137
|
+
warnings.push("No verification.completed ledger events were found for this run.");
|
|
138
|
+
}
|
|
139
|
+
const selectedEvidence = selectLatestVerificationEvidence(loop, verificationEvents, verificationLedgerEvents);
|
|
140
|
+
warnings.push(...selectedEvidence.warnings);
|
|
141
|
+
const latestEvidence = selectedEvidence.evidence;
|
|
142
|
+
if (!latestEvidence) {
|
|
143
|
+
return {
|
|
144
|
+
status: "unavailable",
|
|
145
|
+
eventCount: verificationEvents.length,
|
|
146
|
+
ledgerEventCount: verificationLedgerEvents.length,
|
|
147
|
+
warnings
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
return {
|
|
151
|
+
status: latestEvidence.passed === true
|
|
152
|
+
? "passed"
|
|
153
|
+
: latestEvidence.passed === false
|
|
154
|
+
? "failed"
|
|
155
|
+
: "unavailable",
|
|
156
|
+
eventCount: verificationEvents.length,
|
|
157
|
+
ledgerEventCount: verificationLedgerEvents.length,
|
|
158
|
+
...(latestEvidence.attemptIndex !== undefined ? { latestAttemptIndex: latestEvidence.attemptIndex } : {}),
|
|
159
|
+
...(latestEvidence.timestamp ? { completedAt: latestEvidence.timestamp } : {}),
|
|
160
|
+
...(typeof latestEvidence.summary === "string" && latestEvidence.summary.trim().length > 0
|
|
161
|
+
? { summary: latestEvidence.summary.trim() }
|
|
162
|
+
: {}),
|
|
163
|
+
warnings
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
export function buildEventSummaries(loop, limit = 5) {
|
|
167
|
+
return (loop.events ?? [])
|
|
168
|
+
.slice(-limit)
|
|
169
|
+
.reverse()
|
|
170
|
+
.map((event) => ({
|
|
171
|
+
type: event.type,
|
|
172
|
+
...(event.timestamp ? { timestamp: event.timestamp } : {}),
|
|
173
|
+
...(event.lifecycleState ? { lifecycleState: event.lifecycleState } : {}),
|
|
174
|
+
payload: event.payload ?? {}
|
|
175
|
+
}));
|
|
176
|
+
}
|
|
177
|
+
export function buildLoopCollectionSummary(loops) {
|
|
178
|
+
const previews = loops
|
|
179
|
+
.map((loop) => buildLoopPreview(loop))
|
|
180
|
+
.sort((left, right) => {
|
|
181
|
+
const leftTime = toPreviewTimestamp(left);
|
|
182
|
+
const rightTime = toPreviewTimestamp(right);
|
|
183
|
+
return rightTime - leftTime;
|
|
184
|
+
});
|
|
185
|
+
const statusBreakdown = countBy(previews, "status");
|
|
186
|
+
const lifecycleBreakdown = countBy(previews, "lifecycleState");
|
|
187
|
+
return {
|
|
188
|
+
...(previews[0] ? { latestRun: previews[0] } : {}),
|
|
189
|
+
recentRuns: previews.slice(0, 5),
|
|
190
|
+
statusBreakdown,
|
|
191
|
+
lifecycleBreakdown
|
|
192
|
+
};
|
|
193
|
+
}
|
|
38
194
|
export async function inspectRunsRoot(runsRoot = resolveRunsRoot(process.env)) {
|
|
195
|
+
const cached = runStoreInspectionCache.get(runsRoot);
|
|
196
|
+
if (cached && cached.expiresAt > Date.now()) {
|
|
197
|
+
return cached.value;
|
|
198
|
+
}
|
|
39
199
|
let exists = false;
|
|
40
200
|
try {
|
|
41
201
|
exists = (await stat(runsRoot)).isDirectory();
|
|
@@ -43,68 +203,203 @@ export async function inspectRunsRoot(runsRoot = resolveRunsRoot(process.env)) {
|
|
|
43
203
|
catch {
|
|
44
204
|
exists = false;
|
|
45
205
|
}
|
|
46
|
-
|
|
206
|
+
const inspected = await readAllLoopRecordsSafely(runsRoot);
|
|
207
|
+
const summary = buildLoopCollectionSummary(inspected.loops);
|
|
208
|
+
const value = {
|
|
209
|
+
runsRoot,
|
|
210
|
+
exists,
|
|
211
|
+
loopCount: inspected.loops.length,
|
|
212
|
+
latestRun: summary.latestRun,
|
|
213
|
+
recentRuns: summary.recentRuns,
|
|
214
|
+
statusBreakdown: summary.statusBreakdown,
|
|
215
|
+
lifecycleBreakdown: summary.lifecycleBreakdown,
|
|
216
|
+
warnings: inspected.warnings
|
|
217
|
+
};
|
|
218
|
+
runStoreInspectionCache.set(runsRoot, {
|
|
219
|
+
expiresAt: Date.now() + RUN_STORE_CACHE_TTL_MS,
|
|
220
|
+
value
|
|
221
|
+
});
|
|
222
|
+
return value;
|
|
223
|
+
}
|
|
224
|
+
export function buildRunRecordPaths(runsRoot, loopId) {
|
|
225
|
+
const runDirectory = join(runsRoot, loopId);
|
|
226
|
+
return {
|
|
227
|
+
runsRoot,
|
|
228
|
+
runDirectory,
|
|
229
|
+
loopRecordPath: join(runDirectory, "loop-record.json"),
|
|
230
|
+
ledgerPath: join(runDirectory, "ledger.jsonl")
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
export function buildAttemptArtifactDirectory(runsRoot, loopId, attemptIndex) {
|
|
234
|
+
return join(runsRoot, loopId, "artifacts", `attempt-${String(attemptIndex).padStart(3, "0")}`);
|
|
235
|
+
}
|
|
236
|
+
export async function buildAttemptArtifactsReference(runsRoot, loopId, attemptIndex) {
|
|
237
|
+
const directory = buildAttemptArtifactDirectory(runsRoot, loopId, attemptIndex);
|
|
238
|
+
try {
|
|
239
|
+
const entries = await readdir(directory, { withFileTypes: true });
|
|
47
240
|
return {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
241
|
+
directory,
|
|
242
|
+
available: true,
|
|
243
|
+
files: entries.filter((entry) => entry.isFile()).map((entry) => entry.name).sort()
|
|
51
244
|
};
|
|
52
245
|
}
|
|
246
|
+
catch {
|
|
247
|
+
return {
|
|
248
|
+
directory,
|
|
249
|
+
available: false,
|
|
250
|
+
files: []
|
|
251
|
+
};
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
export function buildCostSnapshot(cost) {
|
|
255
|
+
return {
|
|
256
|
+
actualUsd: cost.actualUsd,
|
|
257
|
+
avoidedUsd: cost.avoidedUsd ?? 0,
|
|
258
|
+
tokensIn: cost.tokensIn,
|
|
259
|
+
tokensOut: cost.tokensOut
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
export function buildBudgetSnapshot(budget) {
|
|
263
|
+
return {
|
|
264
|
+
maxUsd: budget.maxUsd,
|
|
265
|
+
softLimitUsd: budget.softLimitUsd,
|
|
266
|
+
maxIterations: budget.maxIterations,
|
|
267
|
+
maxTokens: budget.maxTokens
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
export function buildSuggestedResourceUris(loopId) {
|
|
271
|
+
return [
|
|
272
|
+
"martin://server/health",
|
|
273
|
+
"martin://runs/recent",
|
|
274
|
+
"martin://runs/triage",
|
|
275
|
+
`martin://runs/${loopId}`,
|
|
276
|
+
`martin://runs/${loopId}/verification`,
|
|
277
|
+
"martin://guides/mcp-usage",
|
|
278
|
+
"martin://guides/publish-readiness"
|
|
279
|
+
];
|
|
280
|
+
}
|
|
281
|
+
export function buildSuggestedPromptNames() {
|
|
282
|
+
return [
|
|
283
|
+
"martin_governed_coding_kickoff",
|
|
284
|
+
"martin_debug_failed_run",
|
|
285
|
+
"martin_publish_readiness_review",
|
|
286
|
+
"martin_triage_run_store"
|
|
287
|
+
];
|
|
288
|
+
}
|
|
289
|
+
function countBy(values, key) {
|
|
290
|
+
return values.reduce((accumulator, value) => {
|
|
291
|
+
const bucket = String(value[key]);
|
|
292
|
+
accumulator[bucket] = (accumulator[bucket] ?? 0) + 1;
|
|
293
|
+
return accumulator;
|
|
294
|
+
}, {});
|
|
295
|
+
}
|
|
296
|
+
function toPreviewTimestamp(loop) {
|
|
297
|
+
const value = loop.updatedAt ?? loop.createdAt;
|
|
298
|
+
return value ? new Date(value).getTime() : 0;
|
|
299
|
+
}
|
|
300
|
+
function selectLatestVerificationEvidence(loop, verificationEvents, verificationLedgerEvents) {
|
|
53
301
|
const warnings = [];
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
attempts: loop.attempts.length,
|
|
85
|
-
costUsd: loop.cost.actualUsd,
|
|
86
|
-
avoidedUsd: loop.cost.avoidedUsd ?? 0,
|
|
87
|
-
pressure: costState.pressure,
|
|
88
|
-
shouldStop: costState.shouldStop,
|
|
89
|
-
remainingBudgetUsd: costState.remainingBudgetUsd,
|
|
90
|
-
remainingIterations: costState.remainingIterations,
|
|
91
|
-
remainingTokens: costState.remainingTokens
|
|
92
|
-
});
|
|
93
|
-
}
|
|
94
|
-
catch {
|
|
95
|
-
warnings.push(`Skipped unreadable loop record for '${entry.name}'.`);
|
|
96
|
-
}
|
|
302
|
+
const futureEvidenceCount = [
|
|
303
|
+
...verificationEvents.map((event) => event.timestamp),
|
|
304
|
+
...verificationLedgerEvents.map((event) => event.timestamp)
|
|
305
|
+
].filter(isFutureVerificationTimestamp).length;
|
|
306
|
+
if (futureEvidenceCount > 0) {
|
|
307
|
+
warnings.push(`Ignored ${futureEvidenceCount} future-dated verification evidence item(s) that cannot be trusted yet.`);
|
|
308
|
+
}
|
|
309
|
+
const evidence = [
|
|
310
|
+
...verificationEvents.map((event) => normalizeLoopVerificationEvidence(loop, event)),
|
|
311
|
+
...verificationLedgerEvents.map((event) => normalizeLedgerVerificationEvidence(loop, event))
|
|
312
|
+
].filter((candidate) => candidate !== undefined);
|
|
313
|
+
if (evidence.length === 0) {
|
|
314
|
+
return { warnings };
|
|
315
|
+
}
|
|
316
|
+
evidence.sort((left, right) => new Date(right.timestamp).getTime() - new Date(left.timestamp).getTime());
|
|
317
|
+
const latest = evidence[0];
|
|
318
|
+
if (!latest) {
|
|
319
|
+
return { warnings };
|
|
320
|
+
}
|
|
321
|
+
const latestAttemptEvidence = evidence.filter((candidate) => latest.attemptId
|
|
322
|
+
? candidate.attemptId === latest.attemptId
|
|
323
|
+
: latest.attemptIndex !== undefined
|
|
324
|
+
? candidate.attemptIndex === latest.attemptIndex
|
|
325
|
+
: false);
|
|
326
|
+
const distinctStatuses = new Set(latestAttemptEvidence
|
|
327
|
+
.map((candidate) => candidate.passed)
|
|
328
|
+
.filter((candidate) => typeof candidate === "boolean"));
|
|
329
|
+
if (distinctStatuses.size > 1) {
|
|
330
|
+
warnings.push("Verification evidence conflicts for the latest attempt; reporting status as unavailable.");
|
|
331
|
+
return { warnings };
|
|
97
332
|
}
|
|
98
|
-
loops.sort((left, right) => {
|
|
99
|
-
const leftTime = Date.parse(left.updatedAt ?? left.createdAt ?? "");
|
|
100
|
-
const rightTime = Date.parse(right.updatedAt ?? right.createdAt ?? "");
|
|
101
|
-
return (Number.isFinite(rightTime) ? rightTime : 0) - (Number.isFinite(leftTime) ? leftTime : 0);
|
|
102
|
-
});
|
|
103
333
|
return {
|
|
104
|
-
|
|
105
|
-
loopCount: loops.length,
|
|
106
|
-
...(loops[0] ? { latestRun: loops[0] } : {}),
|
|
334
|
+
evidence: latest,
|
|
107
335
|
warnings
|
|
108
336
|
};
|
|
109
337
|
}
|
|
110
|
-
|
|
338
|
+
function normalizeLoopVerificationEvidence(loop, event) {
|
|
339
|
+
if (!isTrustedVerificationTimestamp(event.timestamp)) {
|
|
340
|
+
return undefined;
|
|
341
|
+
}
|
|
342
|
+
const payload = isRecord(event.payload) ? event.payload : undefined;
|
|
343
|
+
const attemptId = typeof payload?.["attemptId"] === "string" ? payload["attemptId"] : undefined;
|
|
344
|
+
const attemptIndex = typeof payload?.["attemptIndex"] === "number" && Number.isInteger(payload["attemptIndex"])
|
|
345
|
+
? payload["attemptIndex"]
|
|
346
|
+
: undefined;
|
|
347
|
+
const matchedAttempt = attemptId
|
|
348
|
+
? loop.attempts.find((attempt) => attempt.attemptId === attemptId)
|
|
349
|
+
: attemptIndex !== undefined
|
|
350
|
+
? loop.attempts.find((attempt) => attempt.index === attemptIndex)
|
|
351
|
+
: undefined;
|
|
352
|
+
if (!matchedAttempt) {
|
|
353
|
+
return undefined;
|
|
354
|
+
}
|
|
355
|
+
return {
|
|
356
|
+
timestamp: event.timestamp,
|
|
357
|
+
...(matchedAttempt.attemptId ? { attemptId: matchedAttempt.attemptId } : {}),
|
|
358
|
+
attemptIndex: matchedAttempt.index,
|
|
359
|
+
...(typeof payload?.["passed"] === "boolean" ? { passed: payload["passed"] } : {}),
|
|
360
|
+
...(typeof payload?.["summary"] === "string" ? { summary: payload["summary"] } : {})
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
function normalizeLedgerVerificationEvidence(loop, event) {
|
|
364
|
+
if (!isTrustedVerificationTimestamp(event.timestamp)) {
|
|
365
|
+
return undefined;
|
|
366
|
+
}
|
|
367
|
+
if (event.attemptIndex === undefined || !Number.isInteger(event.attemptIndex)) {
|
|
368
|
+
return undefined;
|
|
369
|
+
}
|
|
370
|
+
const payload = isRecord(event.payload) ? event.payload : undefined;
|
|
371
|
+
const matchedAttempt = loop.attempts.find((attempt) => attempt.index === event.attemptIndex);
|
|
372
|
+
if (!matchedAttempt) {
|
|
373
|
+
return undefined;
|
|
374
|
+
}
|
|
375
|
+
return {
|
|
376
|
+
timestamp: event.timestamp,
|
|
377
|
+
...(matchedAttempt?.attemptId ? { attemptId: matchedAttempt.attemptId } : {}),
|
|
378
|
+
attemptIndex: event.attemptIndex,
|
|
379
|
+
...(typeof payload?.["passed"] === "boolean" ? { passed: payload["passed"] } : {}),
|
|
380
|
+
...(typeof payload?.["summary"] === "string" ? { summary: payload["summary"] } : {})
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
function isTrustedVerificationTimestamp(value) {
|
|
384
|
+
const timestamp = new Date(value).getTime();
|
|
385
|
+
if (!Number.isFinite(timestamp)) {
|
|
386
|
+
return false;
|
|
387
|
+
}
|
|
388
|
+
return timestamp <= Date.now() + 5 * 60_000;
|
|
389
|
+
}
|
|
390
|
+
function isFutureVerificationTimestamp(value) {
|
|
391
|
+
const timestamp = new Date(value).getTime();
|
|
392
|
+
if (!Number.isFinite(timestamp)) {
|
|
393
|
+
return false;
|
|
394
|
+
}
|
|
395
|
+
return timestamp > Date.now() + 5 * 60_000;
|
|
396
|
+
}
|
|
397
|
+
function getLedgerWarnings(ledgerEvents) {
|
|
398
|
+
const diagnostics = ledgerEvents;
|
|
399
|
+
return Array.isArray(diagnostics.warnings)
|
|
400
|
+
? diagnostics.warnings.filter((warning) => typeof warning === "string")
|
|
401
|
+
: [];
|
|
402
|
+
}
|
|
403
|
+
function isRecord(value) {
|
|
404
|
+
return typeof value === "object" && value !== null;
|
|
405
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { type LoopPreview, type VerificationSummary } from "./tool-support.js";
|
|
2
|
+
import { type LoopListInput } from "./run-store.js";
|
|
3
|
+
export interface MartinTriageRunsInput extends LoopListInput {
|
|
4
|
+
includeHealthy?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export interface MartinRunTriageFinding {
|
|
7
|
+
severity: "critical" | "high" | "medium" | "low";
|
|
8
|
+
summary: string;
|
|
9
|
+
reasonCodes: string[];
|
|
10
|
+
loop: LoopPreview;
|
|
11
|
+
verification: VerificationSummary;
|
|
12
|
+
suggestedResources: string[];
|
|
13
|
+
suggestedPrompts: string[];
|
|
14
|
+
}
|
|
15
|
+
export interface MartinTriageRunsOutput {
|
|
16
|
+
source: string;
|
|
17
|
+
runsRoot: string;
|
|
18
|
+
filters: {
|
|
19
|
+
limit: number;
|
|
20
|
+
includeHealthy: boolean;
|
|
21
|
+
status?: string;
|
|
22
|
+
lifecycleState?: string;
|
|
23
|
+
adapterId?: string;
|
|
24
|
+
model?: string;
|
|
25
|
+
updatedAfter?: string;
|
|
26
|
+
};
|
|
27
|
+
evaluatedRuns: number;
|
|
28
|
+
findingCount: number;
|
|
29
|
+
severityBreakdown: Record<string, number>;
|
|
30
|
+
findings: MartinRunTriageFinding[];
|
|
31
|
+
warnings: string[];
|
|
32
|
+
}
|
|
33
|
+
export declare function martinTriageRunsTool(input: MartinTriageRunsInput): Promise<MartinTriageRunsOutput>;
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { buildLoopPreview, buildSuggestedPromptNames, buildSuggestedResourceUris, buildVerificationSummary } from "./tool-support.js";
|
|
3
|
+
import { listLoopRecords, readLedgerEvents } from "./run-store.js";
|
|
4
|
+
import { resolveSafeLoopRecordPath } from "../server-validation.js";
|
|
5
|
+
const SEVERITY_RANK = {
|
|
6
|
+
critical: 4,
|
|
7
|
+
high: 3,
|
|
8
|
+
medium: 2,
|
|
9
|
+
low: 1
|
|
10
|
+
};
|
|
11
|
+
export async function martinTriageRunsTool(input) {
|
|
12
|
+
const listed = await listLoopRecords(input);
|
|
13
|
+
const warnings = [...listed.warnings];
|
|
14
|
+
const findings = await Promise.all(listed.loops.map(async (loop) => {
|
|
15
|
+
try {
|
|
16
|
+
const preview = buildLoopPreview(loop);
|
|
17
|
+
let ledgerEvents = [];
|
|
18
|
+
const canonicalLoopRecordPath = resolveSafeLoopRecordPath(loop.loopId, listed.runsRoot);
|
|
19
|
+
ledgerEvents = await readLedgerEvents({
|
|
20
|
+
source: listed.source,
|
|
21
|
+
sourceKind: "runs_root",
|
|
22
|
+
runsRoot: listed.runsRoot,
|
|
23
|
+
loop,
|
|
24
|
+
warnings: [],
|
|
25
|
+
canonicalRunDirectory: path.dirname(canonicalLoopRecordPath),
|
|
26
|
+
canonicalLoopRecordPath
|
|
27
|
+
});
|
|
28
|
+
const verification = buildVerificationSummary(loop, ledgerEvents);
|
|
29
|
+
return buildTriageFinding(preview, verification);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
warnings.push(`Skipped triage for '${loop.loopId}' because its run record or verification evidence is unreadable.`);
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
}));
|
|
36
|
+
const filteredFindings = findings
|
|
37
|
+
.filter((finding) => finding !== null)
|
|
38
|
+
.filter((finding) => input.includeHealthy || finding.reasonCodes[0] !== "healthy")
|
|
39
|
+
.sort(compareFindings);
|
|
40
|
+
return {
|
|
41
|
+
source: listed.source,
|
|
42
|
+
runsRoot: listed.runsRoot,
|
|
43
|
+
filters: {
|
|
44
|
+
limit: input.limit ?? 20,
|
|
45
|
+
includeHealthy: input.includeHealthy ?? false,
|
|
46
|
+
...(input.status ? { status: input.status } : {}),
|
|
47
|
+
...(input.lifecycleState ? { lifecycleState: input.lifecycleState } : {}),
|
|
48
|
+
...(input.adapterId ? { adapterId: input.adapterId } : {}),
|
|
49
|
+
...(input.model ? { model: input.model } : {}),
|
|
50
|
+
...(input.updatedAfter ? { updatedAfter: input.updatedAfter } : {})
|
|
51
|
+
},
|
|
52
|
+
evaluatedRuns: listed.loops.length,
|
|
53
|
+
findingCount: filteredFindings.length,
|
|
54
|
+
severityBreakdown: filteredFindings.reduce((accumulator, finding) => {
|
|
55
|
+
accumulator[finding.severity] = (accumulator[finding.severity] ?? 0) + 1;
|
|
56
|
+
return accumulator;
|
|
57
|
+
}, {}),
|
|
58
|
+
findings: filteredFindings,
|
|
59
|
+
warnings
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
function buildTriageFinding(loop, verification) {
|
|
63
|
+
const reasonCodes = [];
|
|
64
|
+
let severity = "low";
|
|
65
|
+
if (verification.status === "failed") {
|
|
66
|
+
reasonCodes.push("verification_failed");
|
|
67
|
+
severity = maxSeverity(severity, "critical");
|
|
68
|
+
}
|
|
69
|
+
if (loop.pressure === "hard_limit" || loop.shouldStop) {
|
|
70
|
+
reasonCodes.push("budget_hard_limit");
|
|
71
|
+
severity = maxSeverity(severity, "critical");
|
|
72
|
+
}
|
|
73
|
+
if (loop.status === "failed") {
|
|
74
|
+
reasonCodes.push("status_failed");
|
|
75
|
+
severity = maxSeverity(severity, "high");
|
|
76
|
+
}
|
|
77
|
+
if (loop.lifecycleState === "diminishing_returns") {
|
|
78
|
+
reasonCodes.push("diminishing_returns");
|
|
79
|
+
severity = maxSeverity(severity, "high");
|
|
80
|
+
}
|
|
81
|
+
if (verification.status === "unavailable" && loop.attempts > 0) {
|
|
82
|
+
reasonCodes.push("verification_unavailable");
|
|
83
|
+
severity = maxSeverity(severity, "medium");
|
|
84
|
+
}
|
|
85
|
+
if (loop.pressure === "soft_limit") {
|
|
86
|
+
reasonCodes.push("budget_soft_limit");
|
|
87
|
+
severity = maxSeverity(severity, "medium");
|
|
88
|
+
}
|
|
89
|
+
if (loop.status === "exited") {
|
|
90
|
+
reasonCodes.push("status_exited");
|
|
91
|
+
severity = maxSeverity(severity, "medium");
|
|
92
|
+
}
|
|
93
|
+
if (reasonCodes.length === 0) {
|
|
94
|
+
reasonCodes.push("healthy");
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
severity,
|
|
98
|
+
summary: summarizeFinding(loop, verification, severity, reasonCodes),
|
|
99
|
+
reasonCodes,
|
|
100
|
+
loop,
|
|
101
|
+
verification,
|
|
102
|
+
suggestedResources: buildSuggestedResourceUris(loop.loopId),
|
|
103
|
+
suggestedPrompts: buildSuggestedPromptNames()
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
function summarizeFinding(loop, verification, severity, reasonCodes) {
|
|
107
|
+
if (reasonCodes.includes("verification_failed")) {
|
|
108
|
+
return `Severity ${severity}: ${loop.loopId} failed verification after ${loop.attempts} attempt(s).`;
|
|
109
|
+
}
|
|
110
|
+
if (reasonCodes.includes("budget_hard_limit")) {
|
|
111
|
+
return `Severity ${severity}: ${loop.loopId} exhausted its budget envelope with pressure ${loop.pressure}.`;
|
|
112
|
+
}
|
|
113
|
+
if (reasonCodes.includes("status_failed")) {
|
|
114
|
+
return `Severity ${severity}: ${loop.loopId} is currently failed/${loop.lifecycleState}.`;
|
|
115
|
+
}
|
|
116
|
+
if (reasonCodes.includes("verification_unavailable")) {
|
|
117
|
+
return `Severity ${severity}: ${loop.loopId} has ${loop.attempts} attempt(s) but verification is unavailable.`;
|
|
118
|
+
}
|
|
119
|
+
if (reasonCodes.includes("budget_soft_limit")) {
|
|
120
|
+
return `Severity ${severity}: ${loop.loopId} is at soft-limit pressure with ${loop.remainingBudgetUsd.toFixed(2)} USD remaining.`;
|
|
121
|
+
}
|
|
122
|
+
if (reasonCodes.includes("status_exited")) {
|
|
123
|
+
return `Severity ${severity}: ${loop.loopId} exited without a clean verification result.`;
|
|
124
|
+
}
|
|
125
|
+
return `Severity ${severity}: ${loop.loopId} is healthy with verification status ${verification.status}.`;
|
|
126
|
+
}
|
|
127
|
+
function maxSeverity(left, right) {
|
|
128
|
+
return SEVERITY_RANK[left] >= SEVERITY_RANK[right] ? left : right;
|
|
129
|
+
}
|
|
130
|
+
function compareFindings(left, right) {
|
|
131
|
+
const severityDelta = SEVERITY_RANK[right.severity] - SEVERITY_RANK[left.severity];
|
|
132
|
+
if (severityDelta !== 0) {
|
|
133
|
+
return severityDelta;
|
|
134
|
+
}
|
|
135
|
+
const leftTimestamp = new Date(left.loop.updatedAt ?? left.loop.createdAt ?? 0).getTime();
|
|
136
|
+
const rightTimestamp = new Date(right.loop.updatedAt ?? right.loop.createdAt ?? 0).getTime();
|
|
137
|
+
return rightTimestamp - leftTimestamp;
|
|
138
|
+
}
|
|
@@ -583,4 +583,3 @@ async function checkNoDiff(repoRoot) {
|
|
|
583
583
|
const result = await runSubprocess("git", ["diff", "--name-only", "HEAD"], { cwd: repoRoot, timeoutMs: 5000 });
|
|
584
584
|
return result.exitCode === 0 && result.stdout.trim().length === 0;
|
|
585
585
|
}
|
|
586
|
-
//# sourceMappingURL=claude-cli.js.map
|
|
@@ -3,4 +3,3 @@ export { createStubDirectProviderAdapter } from "./stub-direct-provider.js";
|
|
|
3
3
|
export { createStubAgentCliAdapter } from "./stub-agent-cli.js";
|
|
4
4
|
export { createAgentCliAdapter, createClaudeCliAdapter, createCodexCliAdapter } from "./claude-cli.js";
|
|
5
5
|
export { createVerifierOnlyAdapter } from "./verifier-only.js";
|
|
6
|
-
//# sourceMappingURL=index.js.map
|