@inspectr/mcplab 1.16.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/assets/index-Bu8rl4jQ.js +254 -0
- package/dist/app/assets/index-rp4gsGJM.css +1 -0
- package/dist/app/index.html +2 -2
- package/dist/app-server/app-context.d.ts +1 -0
- package/dist/app-server/app-context.d.ts.map +1 -1
- package/dist/app-server/markdown-reports.d.ts.map +1 -1
- package/dist/app-server/markdown-reports.js +64 -4
- package/dist/app-server/markdown-reports.js.map +1 -1
- package/dist/app-server/router.d.ts.map +1 -1
- package/dist/app-server/router.js +10 -1
- package/dist/app-server/router.js.map +1 -1
- package/dist/app-server/runs-routes.d.ts.map +1 -1
- package/dist/app-server/runs-routes.js +324 -63
- package/dist/app-server/runs-routes.js.map +1 -1
- package/dist/app-server/runs-store.d.ts +10 -0
- package/dist/app-server/runs-store.d.ts.map +1 -1
- package/dist/app-server/runs-store.js +27 -0
- package/dist/app-server/runs-store.js.map +1 -1
- package/dist/app-server/tool-analysis.d.ts.map +1 -1
- package/dist/app-server/tool-analysis.js +25 -1
- package/dist/app-server/tool-analysis.js.map +1 -1
- package/dist/cli.js +61 -2
- package/dist/cli.js.map +1 -1
- package/package.json +4 -4
- package/dist/app/assets/index-BSGuUMv-.js +0 -254
- package/dist/app/assets/index-Bekohuot.css +0 -1
|
@@ -32,6 +32,53 @@ export function mergeLibraryAgentsIntoConfig(config, libraryAgents) {
|
|
|
32
32
|
export function applyLibraryAgents(loaded, libraryAgents) {
|
|
33
33
|
applyLibraryEntries(loaded, libraryAgents, {});
|
|
34
34
|
}
|
|
35
|
+
function toQueueEntry(job) {
|
|
36
|
+
return {
|
|
37
|
+
jobId: job.id,
|
|
38
|
+
status: job.status,
|
|
39
|
+
blockedReason: job.status === 'blocked_auth' ? 'oauth_required' : undefined,
|
|
40
|
+
requiredServers: job.status === 'blocked_auth' ? job.blockedAuthServers ?? [] : undefined,
|
|
41
|
+
runParams: {
|
|
42
|
+
configPath: job.runParams.configPath,
|
|
43
|
+
runsPerScenario: job.runParams.runsPerScenario,
|
|
44
|
+
scenarioIds: job.runParams.scenarioIds ?? null,
|
|
45
|
+
agents: job.runParams.requestedAgents ?? null,
|
|
46
|
+
runNote: job.runParams.runNote ?? null,
|
|
47
|
+
serverOverrideAll: job.runParams.serverOverrideAll ?? null,
|
|
48
|
+
scenarioServerOverrides: job.runParams.scenarioServerOverrides ?? null
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
function buildQueueState(jobs, runQueueState) {
|
|
53
|
+
const activeJob = runQueueState.activeJobId ? jobs.get(runQueueState.activeJobId) : null;
|
|
54
|
+
const queuedEntries = runQueueState.queue
|
|
55
|
+
.map((id) => jobs.get(id))
|
|
56
|
+
.filter((j) => !!j && (j.status === 'queued' || j.status === 'blocked_auth'))
|
|
57
|
+
.map((job) => toQueueEntry(job));
|
|
58
|
+
return {
|
|
59
|
+
active: activeJob ? toQueueEntry(activeJob) : null,
|
|
60
|
+
queued: queuedEntries
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
function emitQueueEvent(jobs, runQueueState, deps) {
|
|
64
|
+
const event = {
|
|
65
|
+
type: 'queue_event',
|
|
66
|
+
ts: new Date().toISOString(),
|
|
67
|
+
payload: { event: buildQueueState(jobs, runQueueState) }
|
|
68
|
+
};
|
|
69
|
+
for (const client of Array.from(runQueueState.clients)) {
|
|
70
|
+
if (client.destroyed || client.writableEnded) {
|
|
71
|
+
runQueueState.clients.delete(client);
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
try {
|
|
75
|
+
deps.sendSseEvent(client, event);
|
|
76
|
+
}
|
|
77
|
+
catch {
|
|
78
|
+
runQueueState.clients.delete(client);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
35
82
|
export async function handleRunsRoutes(params) {
|
|
36
83
|
const { req, res, pathname, method, settings, jobs, runQueueState, oauthSessionManager, deps } = params;
|
|
37
84
|
const { parseBody, asJson, addJobEvent, sendSseEvent, ensureInsideRoot, listRuns, getRunResults, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pickDefaultAssistantAgentName, pkgVersion } = deps;
|
|
@@ -39,16 +86,71 @@ export async function handleRunsRoutes(params) {
|
|
|
39
86
|
const requestUrl = new URL(req.url ?? '/api/runs', 'http://localhost');
|
|
40
87
|
const since = requestUrl.searchParams.get('since') ?? undefined;
|
|
41
88
|
const until = requestUrl.searchParams.get('until') ?? undefined;
|
|
89
|
+
const scenario = requestUrl.searchParams.get('scenario') ?? undefined;
|
|
42
90
|
const lastDaysRaw = requestUrl.searchParams.get('last_days');
|
|
43
91
|
const lastDaysParsed = lastDaysRaw === null ? NaN : Number(lastDaysRaw);
|
|
44
92
|
const lastDays = Number.isFinite(lastDaysParsed) && lastDaysParsed > 0
|
|
45
93
|
? Math.floor(lastDaysParsed)
|
|
46
94
|
: undefined;
|
|
47
|
-
|
|
95
|
+
const limitRaw = Number(requestUrl.searchParams.get('limit'));
|
|
96
|
+
const offsetRaw = Number(requestUrl.searchParams.get('offset'));
|
|
97
|
+
const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(100, Math.floor(limitRaw))) : 25;
|
|
98
|
+
const offset = Number.isFinite(offsetRaw) ? Math.max(0, Math.floor(offsetRaw)) : 0;
|
|
99
|
+
const all = listRuns(settings.runsDir, {
|
|
48
100
|
since,
|
|
49
101
|
until,
|
|
50
|
-
lastDays
|
|
51
|
-
|
|
102
|
+
lastDays,
|
|
103
|
+
scenario
|
|
104
|
+
});
|
|
105
|
+
const data = all.slice(offset, offset + limit);
|
|
106
|
+
const totalCount = all.length;
|
|
107
|
+
const hasMore = offset + data.length < totalCount;
|
|
108
|
+
const nextOffset = hasMore ? offset + data.length : null;
|
|
109
|
+
const prevOffset = offset > 0 ? Math.max(0, offset - limit) : null;
|
|
110
|
+
asJson(res, 200, {
|
|
111
|
+
object: 'list',
|
|
112
|
+
url: `${pathname}${requestUrl.search}`,
|
|
113
|
+
data,
|
|
114
|
+
has_more: hasMore,
|
|
115
|
+
total_count: totalCount,
|
|
116
|
+
next_offset: nextOffset,
|
|
117
|
+
prev_offset: prevOffset
|
|
118
|
+
});
|
|
119
|
+
return true;
|
|
120
|
+
}
|
|
121
|
+
if (pathname === '/api/runs/latest-pass-rates' && method === 'POST') {
|
|
122
|
+
const body = (await parseBody(req));
|
|
123
|
+
const requestedConfigs = Array.isArray(body.configs) ? body.configs : [];
|
|
124
|
+
const normalizedConfigs = requestedConfigs
|
|
125
|
+
.map((entry) => ({
|
|
126
|
+
id: String(entry?.id ?? '').trim(),
|
|
127
|
+
sourcePath: String(entry?.sourcePath ?? '').trim(),
|
|
128
|
+
relativePath: String(entry?.relativePath ?? '').trim(),
|
|
129
|
+
configHash: String(entry?.configHash ?? '').trim()
|
|
130
|
+
}))
|
|
131
|
+
.filter((entry) => entry.id);
|
|
132
|
+
const lastDaysRaw = Number(body.lastDays);
|
|
133
|
+
const lastDays = Number.isFinite(lastDaysRaw) && lastDaysRaw > 0 ? Math.floor(lastDaysRaw) : undefined;
|
|
134
|
+
const summaries = listRuns(settings.runsDir, { lastDays });
|
|
135
|
+
const pending = new Set(normalizedConfigs.map((entry) => entry.id));
|
|
136
|
+
const byConfigId = {};
|
|
137
|
+
for (const summary of summaries) {
|
|
138
|
+
if (pending.size === 0)
|
|
139
|
+
break;
|
|
140
|
+
const summaryPath = String(summary.configPath ?? '').trim();
|
|
141
|
+
const summaryHash = String(summary.configHash ?? '').trim();
|
|
142
|
+
for (const cfg of normalizedConfigs) {
|
|
143
|
+
if (!pending.has(cfg.id))
|
|
144
|
+
continue;
|
|
145
|
+
if ((cfg.sourcePath && cfg.sourcePath === summaryPath) ||
|
|
146
|
+
(cfg.relativePath && cfg.relativePath === summaryPath) ||
|
|
147
|
+
(cfg.configHash && cfg.configHash === summaryHash)) {
|
|
148
|
+
byConfigId[cfg.id] = summary.passRate;
|
|
149
|
+
pending.delete(cfg.id);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
asJson(res, 200, { byConfigId });
|
|
52
154
|
return true;
|
|
53
155
|
}
|
|
54
156
|
if (pathname.startsWith('/api/runs/') && pathname.endsWith('/trace') && method === 'GET') {
|
|
@@ -102,6 +204,10 @@ export async function handleRunsRoutes(params) {
|
|
|
102
204
|
for (const client of job.clients)
|
|
103
205
|
client.end();
|
|
104
206
|
job.clients.clear();
|
|
207
|
+
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
208
|
+
emitWhenIdle: true,
|
|
209
|
+
hostHeader: req.headers.host
|
|
210
|
+
});
|
|
105
211
|
asJson(res, 200, { ok: true, status: 'stopped' });
|
|
106
212
|
return true;
|
|
107
213
|
}
|
|
@@ -115,42 +221,25 @@ export async function handleRunsRoutes(params) {
|
|
|
115
221
|
return true;
|
|
116
222
|
}
|
|
117
223
|
if (pathname === '/api/runs/queue' && method === 'GET') {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
asJson(res, 200, {
|
|
138
|
-
active: activeJob
|
|
139
|
-
? {
|
|
140
|
-
jobId: activeJob.id,
|
|
141
|
-
status: activeJob.status,
|
|
142
|
-
runParams: {
|
|
143
|
-
configPath: activeJob.runParams.configPath,
|
|
144
|
-
runsPerScenario: activeJob.runParams.runsPerScenario,
|
|
145
|
-
scenarioIds: activeJob.runParams.scenarioIds ?? null,
|
|
146
|
-
agents: activeJob.runParams.requestedAgents ?? null,
|
|
147
|
-
runNote: activeJob.runParams.runNote ?? null,
|
|
148
|
-
serverOverrideAll: activeJob.runParams.serverOverrideAll ?? null,
|
|
149
|
-
scenarioServerOverrides: activeJob.runParams.scenarioServerOverrides ?? null
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
: null,
|
|
153
|
-
queued: queuedEntries
|
|
224
|
+
asJson(res, 200, buildQueueState(jobs, runQueueState));
|
|
225
|
+
return true;
|
|
226
|
+
}
|
|
227
|
+
if (pathname === '/api/runs/queue/events' && method === 'GET') {
|
|
228
|
+
res.statusCode = 200;
|
|
229
|
+
res.setHeader('content-type', 'text/event-stream');
|
|
230
|
+
res.setHeader('cache-control', 'no-cache');
|
|
231
|
+
res.setHeader('connection', 'keep-alive');
|
|
232
|
+
if ('flushHeaders' in res && typeof res.flushHeaders === 'function') {
|
|
233
|
+
res.flushHeaders();
|
|
234
|
+
}
|
|
235
|
+
sendSseEvent(res, {
|
|
236
|
+
type: 'queue_event',
|
|
237
|
+
ts: new Date().toISOString(),
|
|
238
|
+
payload: { event: buildQueueState(jobs, runQueueState) }
|
|
239
|
+
});
|
|
240
|
+
runQueueState.clients.add(res);
|
|
241
|
+
req.on('close', () => {
|
|
242
|
+
runQueueState.clients.delete(res);
|
|
154
243
|
});
|
|
155
244
|
return true;
|
|
156
245
|
}
|
|
@@ -171,7 +260,6 @@ export async function handleRunsRoutes(params) {
|
|
|
171
260
|
asJson(res, 404, { error: 'Job is not queued' });
|
|
172
261
|
return true;
|
|
173
262
|
}
|
|
174
|
-
const wasBlocked = job.status === 'blocked_auth';
|
|
175
263
|
const idx = runQueueState.queue.indexOf(jobId);
|
|
176
264
|
if (idx !== -1)
|
|
177
265
|
runQueueState.queue.splice(idx, 1);
|
|
@@ -184,14 +272,18 @@ export async function handleRunsRoutes(params) {
|
|
|
184
272
|
for (const client of job.clients)
|
|
185
273
|
client.end();
|
|
186
274
|
job.clients.clear();
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
275
|
+
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
276
|
+
emitWhenIdle: true,
|
|
277
|
+
hostHeader: req.headers.host
|
|
278
|
+
});
|
|
190
279
|
asJson(res, 200, { ok: true, jobId, status: 'stopped' });
|
|
191
280
|
return true;
|
|
192
281
|
}
|
|
193
282
|
if (pathname === '/api/runs/queue/resume' && method === 'POST') {
|
|
194
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps
|
|
283
|
+
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
284
|
+
emitWhenIdle: true,
|
|
285
|
+
hostHeader: req.headers.host
|
|
286
|
+
});
|
|
195
287
|
asJson(res, 200, { ok: true });
|
|
196
288
|
return true;
|
|
197
289
|
}
|
|
@@ -320,13 +412,16 @@ export async function handleRunsRoutes(params) {
|
|
|
320
412
|
position: runQueueState.queue.length
|
|
321
413
|
}
|
|
322
414
|
});
|
|
415
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
323
416
|
asJson(res, 202, { jobId, queued: true, position: runQueueState.queue.length });
|
|
324
417
|
}
|
|
325
418
|
else {
|
|
326
419
|
// No active job — add to queue and let advanceQueue handle start (with OAuth pre-check)
|
|
327
420
|
runQueueState.queue.push(jobId);
|
|
328
421
|
asJson(res, 202, { jobId });
|
|
329
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps
|
|
422
|
+
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
423
|
+
hostHeader: req.headers.host
|
|
424
|
+
});
|
|
330
425
|
}
|
|
331
426
|
return true;
|
|
332
427
|
}
|
|
@@ -677,18 +772,26 @@ function resolveOAuthServersForJob(job, librariesDir) {
|
|
|
677
772
|
return [];
|
|
678
773
|
}
|
|
679
774
|
}
|
|
680
|
-
async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps) {
|
|
681
|
-
if (runQueueState.activeJobId)
|
|
775
|
+
async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, options) {
|
|
776
|
+
if (runQueueState.activeJobId) {
|
|
777
|
+
if (options?.emitWhenIdle)
|
|
778
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
682
779
|
return;
|
|
683
|
-
|
|
780
|
+
}
|
|
781
|
+
if (runQueueState.isAdvancingQueue) {
|
|
782
|
+
if (options?.emitWhenIdle)
|
|
783
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
684
784
|
return;
|
|
785
|
+
}
|
|
685
786
|
runQueueState.isAdvancingQueue = true;
|
|
787
|
+
let queueMutated = false;
|
|
686
788
|
try {
|
|
687
789
|
while (runQueueState.queue.length > 0) {
|
|
688
790
|
const nextId = runQueueState.queue[0]; // peek — do not shift yet
|
|
689
791
|
const nextJob = jobs.get(nextId);
|
|
690
792
|
if (!nextJob || (nextJob.status !== 'queued' && nextJob.status !== 'blocked_auth')) {
|
|
691
793
|
runQueueState.queue.shift();
|
|
794
|
+
queueMutated = true;
|
|
692
795
|
continue;
|
|
693
796
|
}
|
|
694
797
|
// Pre-check OAuth before starting
|
|
@@ -709,27 +812,52 @@ async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager,
|
|
|
709
812
|
for (const client of nextJob.clients)
|
|
710
813
|
client.end();
|
|
711
814
|
nextJob.clients.clear();
|
|
815
|
+
queueMutated = true;
|
|
712
816
|
continue;
|
|
713
817
|
}
|
|
714
818
|
if (oauthServers.length > 0) {
|
|
715
|
-
const
|
|
716
|
-
const needsAuth =
|
|
819
|
+
const ensureResult = await oauthSessionManager.ensureServersAuthorized(oauthServers, options?.hostHeader);
|
|
820
|
+
const needsAuth = ensureResult.servers.filter((s) => s.status === 'auth_required');
|
|
717
821
|
if (needsAuth.length > 0) {
|
|
718
|
-
const needsAuthNames = needsAuth.map((s) => s.
|
|
822
|
+
const needsAuthNames = needsAuth.map((s) => s.serverName);
|
|
823
|
+
const wasBlocked = nextJob.status === 'blocked_auth';
|
|
824
|
+
const prevBlockedServers = nextJob.blockedAuthServers ?? [];
|
|
825
|
+
const prevKey = [...prevBlockedServers].sort().join('|');
|
|
826
|
+
const nextKey = [...needsAuthNames].sort().join('|');
|
|
827
|
+
const blockedSetChanged = prevKey !== nextKey;
|
|
719
828
|
nextJob.blockedAuthServers = needsAuthNames; // always refresh to current missing subset
|
|
720
|
-
if (
|
|
829
|
+
if (!wasBlocked) {
|
|
721
830
|
nextJob.status = 'blocked_auth';
|
|
722
831
|
}
|
|
832
|
+
if (!wasBlocked || blockedSetChanged) {
|
|
833
|
+
deps.addJobEvent(nextJob, {
|
|
834
|
+
type: 'oauth_required',
|
|
835
|
+
ts: new Date().toISOString(),
|
|
836
|
+
payload: {
|
|
837
|
+
jobId: nextJob.id,
|
|
838
|
+
servers: needsAuthNames,
|
|
839
|
+
message: `OAuth login required for server(s): ${needsAuthNames.join(', ')}.`
|
|
840
|
+
}
|
|
841
|
+
});
|
|
842
|
+
}
|
|
843
|
+
queueMutated = true;
|
|
844
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
845
|
+
return; // pause — frontend must call /api/runs/queue/resume after auth
|
|
846
|
+
}
|
|
847
|
+
const readyServers = ensureResult.servers
|
|
848
|
+
.filter((s) => s.status === 'ready')
|
|
849
|
+
.map((s) => {
|
|
850
|
+
const mode = s.debugState ?? 'unknown';
|
|
851
|
+
return `${s.serverName} (${mode})`;
|
|
852
|
+
});
|
|
853
|
+
if (readyServers.length > 0) {
|
|
723
854
|
deps.addJobEvent(nextJob, {
|
|
724
|
-
type: '
|
|
855
|
+
type: 'log',
|
|
725
856
|
ts: new Date().toISOString(),
|
|
726
857
|
payload: {
|
|
727
|
-
|
|
728
|
-
servers: needsAuthNames,
|
|
729
|
-
message: `OAuth login required for server(s): ${needsAuthNames.join(', ')}.`
|
|
858
|
+
message: `OAuth credentials ready for queued run: ${readyServers.join(', ')}`
|
|
730
859
|
}
|
|
731
860
|
});
|
|
732
|
-
return; // pause — frontend must call /api/runs/queue/resume after auth
|
|
733
861
|
}
|
|
734
862
|
}
|
|
735
863
|
// OAuth ready (or not required) — start the job
|
|
@@ -750,16 +878,21 @@ async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager,
|
|
|
750
878
|
scenarioServerOverrides: nextJob.runParams.scenarioServerOverrides ?? null
|
|
751
879
|
}
|
|
752
880
|
});
|
|
881
|
+
queueMutated = true;
|
|
882
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
753
883
|
void executeRunJob(nextJob, settings, jobs, runQueueState, oauthSessionManager, deps);
|
|
754
884
|
return;
|
|
755
885
|
}
|
|
886
|
+
if (queueMutated || options?.emitWhenIdle) {
|
|
887
|
+
emitQueueEvent(jobs, runQueueState, deps);
|
|
888
|
+
}
|
|
756
889
|
}
|
|
757
890
|
finally {
|
|
758
891
|
runQueueState.isAdvancingQueue = false;
|
|
759
892
|
}
|
|
760
893
|
}
|
|
761
894
|
async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionManager, deps) {
|
|
762
|
-
const { addJobEvent, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pkgVersion } = deps;
|
|
895
|
+
const { addJobEvent, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pkgVersion } = deps;
|
|
763
896
|
const { configPath, runsPerScenario, scenarioId, scenarioIds, requestedAgents, runNote, serverOverrideAll, scenarioServerOverrides } = job.runParams;
|
|
764
897
|
try {
|
|
765
898
|
addJobEvent(job, {
|
|
@@ -847,8 +980,9 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
847
980
|
});
|
|
848
981
|
const usedServerNames = new Set(expandedConfig.scenarios.flatMap((scenario) => scenario.servers));
|
|
849
982
|
const oauthServers = Array.from(usedServerNames).filter((serverName) => expandedConfig.servers[serverName]?.auth?.type === 'oauth_authorization_code');
|
|
983
|
+
const oauthServerSet = new Set(oauthServers);
|
|
850
984
|
const mcpServerAuthHeaders = oauthServers.length > 0
|
|
851
|
-
? await oauthSessionManager.getAuthHeadersForServers(oauthServers)
|
|
985
|
+
? await oauthSessionManager.getAuthHeadersForServers(oauthServers, undefined)
|
|
852
986
|
: undefined;
|
|
853
987
|
if (oauthServers.length > 0) {
|
|
854
988
|
addJobEvent(job, {
|
|
@@ -884,6 +1018,16 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
884
1018
|
cliVersion: pkgVersion,
|
|
885
1019
|
runsDir: settings.runsDir,
|
|
886
1020
|
mcpServerAuthHeaders,
|
|
1021
|
+
resolveMcpServerAuthHeaders: oauthServers.length > 0
|
|
1022
|
+
? async (serverNames, options) => {
|
|
1023
|
+
if (options?.signal?.aborted)
|
|
1024
|
+
return {};
|
|
1025
|
+
const namesToRefresh = serverNames.filter((name) => oauthServerSet.has(name));
|
|
1026
|
+
if (namesToRefresh.length === 0)
|
|
1027
|
+
return {};
|
|
1028
|
+
return oauthSessionManager.getAuthHeadersForServers(namesToRefresh);
|
|
1029
|
+
}
|
|
1030
|
+
: undefined,
|
|
887
1031
|
signal: job.abortController.signal,
|
|
888
1032
|
onProgress: async (event) => {
|
|
889
1033
|
const message = formatRunProgressMessage(event);
|
|
@@ -902,6 +1046,23 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
902
1046
|
if (loaded.config.name && loaded.config.name.trim().length > 0) {
|
|
903
1047
|
results.metadata.config_name = loaded.config.name.trim();
|
|
904
1048
|
}
|
|
1049
|
+
results.metadata.rerun_agents = [...resolvedAgentList];
|
|
1050
|
+
results.metadata.rerun_scenario_ids = selectedBaseScenarios.scenarios.map((scenario) => scenario.id);
|
|
1051
|
+
if (serverOverrideAll && serverOverrideAll.length > 0) {
|
|
1052
|
+
results.metadata.rerun_server_override_all = [...serverOverrideAll];
|
|
1053
|
+
}
|
|
1054
|
+
else {
|
|
1055
|
+
delete results.metadata.rerun_server_override_all;
|
|
1056
|
+
}
|
|
1057
|
+
if (filteredScenarioOverrides && Object.keys(filteredScenarioOverrides).length > 0) {
|
|
1058
|
+
results.metadata.rerun_scenario_server_overrides = Object.fromEntries(Object.entries(filteredScenarioOverrides).map(([scenarioKey, serverIds]) => [
|
|
1059
|
+
scenarioKey,
|
|
1060
|
+
[...serverIds]
|
|
1061
|
+
]));
|
|
1062
|
+
}
|
|
1063
|
+
else {
|
|
1064
|
+
delete results.metadata.rerun_scenario_server_overrides;
|
|
1065
|
+
}
|
|
905
1066
|
addJobEvent(job, {
|
|
906
1067
|
type: 'log',
|
|
907
1068
|
ts: new Date().toISOString(),
|
|
@@ -914,6 +1075,8 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
914
1075
|
ts: new Date().toISOString(),
|
|
915
1076
|
payload: { message: `Writing results to ${runDir}` }
|
|
916
1077
|
});
|
|
1078
|
+
const traceRecords = getScenarioRunTraceRecords(results.metadata.run_id, settings.runsDir);
|
|
1079
|
+
results.metadata.tool_tokens_total = estimateRunToolTokensTotal(traceRecords);
|
|
917
1080
|
writeFileSync(join(runDir, 'results.json'), `${JSON.stringify(results, null, 2)}\n`, 'utf8');
|
|
918
1081
|
writeFileSync(join(runDir, 'report.html'), renderReport(results), 'utf8');
|
|
919
1082
|
writeFileSync(join(runDir, 'summary.md'), renderSummaryMarkdown(results), 'utf8');
|
|
@@ -940,6 +1103,27 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
940
1103
|
}
|
|
941
1104
|
}
|
|
942
1105
|
catch (error) {
|
|
1106
|
+
if (error instanceof OAuthAuthorizationRequiredError) {
|
|
1107
|
+
const blockedServers = Array.from(new Set(error.details.map((detail) => detail.serverName).filter(Boolean)));
|
|
1108
|
+
const aborted = job.abortController.signal.aborted || job.status === 'stopped';
|
|
1109
|
+
if (!aborted && blockedServers.length > 0) {
|
|
1110
|
+
job.blockedAuthServers = blockedServers;
|
|
1111
|
+
job.status = 'blocked_auth';
|
|
1112
|
+
if (!runQueueState.queue.includes(job.id)) {
|
|
1113
|
+
runQueueState.queue.unshift(job.id);
|
|
1114
|
+
}
|
|
1115
|
+
addJobEvent(job, {
|
|
1116
|
+
type: 'oauth_required',
|
|
1117
|
+
ts: new Date().toISOString(),
|
|
1118
|
+
payload: {
|
|
1119
|
+
jobId: job.id,
|
|
1120
|
+
servers: blockedServers,
|
|
1121
|
+
message: `OAuth login required for server(s): ${blockedServers.join(', ')}.`
|
|
1122
|
+
}
|
|
1123
|
+
});
|
|
1124
|
+
return;
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
943
1127
|
const normalizedError = error instanceof OAuthAuthorizationRequiredError
|
|
944
1128
|
? new Error(error.details[0]?.message || error.message)
|
|
945
1129
|
: error;
|
|
@@ -959,13 +1143,90 @@ async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionMan
|
|
|
959
1143
|
}
|
|
960
1144
|
finally {
|
|
961
1145
|
runQueueState.activeJobId = null;
|
|
962
|
-
|
|
963
|
-
client.
|
|
964
|
-
|
|
965
|
-
|
|
1146
|
+
if (job.status !== 'blocked_auth') {
|
|
1147
|
+
for (const client of job.clients)
|
|
1148
|
+
client.end();
|
|
1149
|
+
job.clients.clear();
|
|
1150
|
+
}
|
|
1151
|
+
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
1152
|
+
emitWhenIdle: true
|
|
1153
|
+
}).catch((error) => {
|
|
1154
|
+
console.warn(`[mcplab] Failed to advance run queue after job '${job.id}': ${error instanceof Error ? error.message : String(error)}`);
|
|
1155
|
+
});
|
|
966
1156
|
pruneOldJobs(jobs, runQueueState);
|
|
967
1157
|
}
|
|
968
1158
|
}
|
|
1159
|
+
function splitInteger(total, parts) {
|
|
1160
|
+
if (!Number.isFinite(total) || !parts || parts <= 0)
|
|
1161
|
+
return Array(parts).fill(0);
|
|
1162
|
+
const safeTotal = Math.max(0, Math.round(total ?? 0));
|
|
1163
|
+
const base = Math.floor(safeTotal / parts);
|
|
1164
|
+
let remainder = safeTotal % parts;
|
|
1165
|
+
return Array.from({ length: parts }, () => {
|
|
1166
|
+
const value = base + (remainder > 0 ? 1 : 0);
|
|
1167
|
+
if (remainder > 0)
|
|
1168
|
+
remainder -= 1;
|
|
1169
|
+
return value;
|
|
1170
|
+
});
|
|
1171
|
+
}
|
|
1172
|
+
function estimateRunToolTokensTotal(records) {
|
|
1173
|
+
let total = 0;
|
|
1174
|
+
let hasAny = false;
|
|
1175
|
+
for (const record of records) {
|
|
1176
|
+
const toolUsesById = new Map();
|
|
1177
|
+
for (const message of record.messages ?? []) {
|
|
1178
|
+
const toolUses = message.content.filter((block) => block.type === 'tool_use');
|
|
1179
|
+
if (toolUses.length > 0) {
|
|
1180
|
+
for (const toolUse of toolUses)
|
|
1181
|
+
toolUsesById.set(toolUse.id, toolUse.name);
|
|
1182
|
+
const allEstimated = toolUses.every((toolUse) => Boolean(toolUse.estimated_tokens));
|
|
1183
|
+
if (allEstimated) {
|
|
1184
|
+
for (const toolUse of toolUses)
|
|
1185
|
+
total += toolUse.estimated_tokens?.total ?? 0;
|
|
1186
|
+
hasAny = true;
|
|
1187
|
+
}
|
|
1188
|
+
else if (toolUses.length === 1 && typeof message.usage?.total_tokens === 'number') {
|
|
1189
|
+
total += message.usage.total_tokens;
|
|
1190
|
+
hasAny = true;
|
|
1191
|
+
}
|
|
1192
|
+
else {
|
|
1193
|
+
const shares = splitInteger(message.usage?.total_tokens, toolUses.length);
|
|
1194
|
+
total += shares.reduce((sum, value) => sum + value, 0);
|
|
1195
|
+
if (typeof message.usage?.total_tokens === 'number')
|
|
1196
|
+
hasAny = true;
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
const toolResults = message.content.filter((block) => block.type === 'tool_result');
|
|
1200
|
+
if (toolResults.length === 0)
|
|
1201
|
+
continue;
|
|
1202
|
+
const allEstimated = toolResults.every((result) => Boolean(result.estimated_tokens));
|
|
1203
|
+
if (allEstimated) {
|
|
1204
|
+
for (const result of toolResults)
|
|
1205
|
+
total += result.estimated_tokens?.total ?? 0;
|
|
1206
|
+
hasAny = true;
|
|
1207
|
+
continue;
|
|
1208
|
+
}
|
|
1209
|
+
if (toolResults.length === 1) {
|
|
1210
|
+
const [result] = toolResults;
|
|
1211
|
+
if (result &&
|
|
1212
|
+
toolUsesById.has(result.tool_use_id) &&
|
|
1213
|
+
typeof message.usage?.total_tokens === 'number') {
|
|
1214
|
+
total += message.usage.total_tokens;
|
|
1215
|
+
hasAny = true;
|
|
1216
|
+
continue;
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
const knownResults = toolResults.filter((result) => toolUsesById.has(result.tool_use_id));
|
|
1220
|
+
if (knownResults.length === 0)
|
|
1221
|
+
continue;
|
|
1222
|
+
const shares = splitInteger(message.usage?.total_tokens, knownResults.length);
|
|
1223
|
+
total += shares.reduce((sum, value) => sum + value, 0);
|
|
1224
|
+
if (typeof message.usage?.total_tokens === 'number')
|
|
1225
|
+
hasAny = true;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
return hasAny ? total : null;
|
|
1229
|
+
}
|
|
969
1230
|
function pruneOldJobs(jobs, runQueueState) {
|
|
970
1231
|
const maxAgeMs = 30 * 60_000;
|
|
971
1232
|
const now = Date.now();
|