@inspectr/mcplab 1.19.0 → 1.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/assets/index-D1aWsOm8.js +254 -0
- package/dist/app/index.html +1 -1
- package/dist/app-server/app-context.d.ts +1 -10
- package/dist/app-server/app-context.d.ts.map +1 -1
- package/dist/app-server/app-dist.d.ts +8 -0
- package/dist/app-server/app-dist.d.ts.map +1 -0
- package/dist/app-server/app-dist.js +13 -0
- package/dist/app-server/app-dist.js.map +1 -0
- package/dist/app-server/browser-launch.d.ts +8 -0
- package/dist/app-server/browser-launch.d.ts.map +1 -0
- package/dist/app-server/browser-launch.js +15 -0
- package/dist/app-server/browser-launch.js.map +1 -0
- package/dist/app-server/router.d.ts.map +1 -1
- package/dist/app-server/router.js +36 -65
- package/dist/app-server/router.js.map +1 -1
- package/dist/app-server/run-agent-selection.d.ts +6 -0
- package/dist/app-server/run-agent-selection.d.ts.map +1 -0
- package/dist/app-server/run-agent-selection.js +9 -0
- package/dist/app-server/run-agent-selection.js.map +1 -0
- package/dist/app-server/run-queue-domain.d.ts +80 -0
- package/dist/app-server/run-queue-domain.d.ts.map +1 -0
- package/dist/app-server/run-queue-domain.js +396 -0
- package/dist/app-server/run-queue-domain.js.map +1 -0
- package/dist/app-server/run-queue-events.d.ts +16 -0
- package/dist/app-server/run-queue-events.d.ts.map +1 -0
- package/dist/app-server/run-queue-events.js +65 -0
- package/dist/app-server/run-queue-events.js.map +1 -0
- package/dist/app-server/run-queue-executor.d.ts +49 -0
- package/dist/app-server/run-queue-executor.d.ts.map +1 -0
- package/dist/app-server/run-queue-executor.js +443 -0
- package/dist/app-server/run-queue-executor.js.map +1 -0
- package/dist/app-server/run-queue-state.d.ts +58 -0
- package/dist/app-server/run-queue-state.d.ts.map +1 -0
- package/dist/app-server/run-queue-state.js +18 -0
- package/dist/app-server/run-queue-state.js.map +1 -0
- package/dist/app-server/runs-routes.d.ts +3 -38
- package/dist/app-server/runs-routes.d.ts.map +1 -1
- package/dist/app-server/runs-routes.js +18 -721
- package/dist/app-server/runs-routes.js.map +1 -1
- package/dist/app-server/runs-routes.test-helpers.d.ts +47 -0
- package/dist/app-server/runs-routes.test-helpers.d.ts.map +1 -0
- package/dist/app-server/runs-routes.test-helpers.js +107 -0
- package/dist/app-server/runs-routes.test-helpers.js.map +1 -0
- package/dist/app-server/runs-store.d.ts +34 -28
- package/dist/app-server/settings-store.d.ts +1 -0
- package/dist/app-server/settings-store.d.ts.map +1 -1
- package/dist/app-server/settings-store.js +33 -0
- package/dist/app-server/settings-store.js.map +1 -1
- package/dist/app-server/tool-analysis.d.ts.map +1 -1
- package/dist/app-server/tool-analysis.js +1 -7
- package/dist/app-server/tool-analysis.js.map +1 -1
- package/dist/app-server/types.d.ts +1 -0
- package/dist/app-server/types.d.ts.map +1 -1
- package/dist/app-server/version-info.d.ts +11 -0
- package/dist/app-server/version-info.d.ts.map +1 -0
- package/dist/app-server/version-info.js +20 -0
- package/dist/app-server/version-info.js.map +1 -0
- package/dist/app-server/workspace-paths.d.ts +4 -0
- package/dist/app-server/workspace-paths.d.ts.map +1 -0
- package/dist/app-server/workspace-paths.js +11 -0
- package/dist/app-server/workspace-paths.js.map +1 -0
- package/dist/cli.js +1 -0
- package/dist/cli.js.map +1 -1
- package/package.json +5 -5
- package/dist/app/assets/index-DHV4p_lQ.js +0 -254
|
@@ -1,30 +1,10 @@
|
|
|
1
|
-
import { randomUUID } from 'node:crypto';
|
|
2
1
|
import { existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
|
|
3
2
|
import { tmpdir } from 'node:os';
|
|
4
|
-
import { isAbsolute, join,
|
|
3
|
+
import { isAbsolute, join, resolve } from 'node:path';
|
|
5
4
|
import { McpClientManager, loadConfig, hashConfig, runAll, renderSummaryMarkdown, applyRuntimeServerOverrides } from '@inspectr/mcplab-core';
|
|
6
5
|
import { renderReport } from '@inspectr/mcplab-reporting';
|
|
7
6
|
import { OAuthAuthorizationRequiredError } from './oauth-session-manager.js';
|
|
8
|
-
import {
|
|
9
|
-
import { readLibraries as readLibrariesFromStore } from './libraries-store.js';
|
|
10
|
-
export function mergeLibraryEntriesIntoConfig(config, libraryAgents, libraryServers) {
|
|
11
|
-
return {
|
|
12
|
-
...config,
|
|
13
|
-
agents: { ...libraryAgents, ...config.agents },
|
|
14
|
-
servers: { ...libraryServers, ...config.servers }
|
|
15
|
-
};
|
|
16
|
-
}
|
|
17
|
-
export function applyLibraryEntries(loaded, libraryAgents, libraryServers) {
|
|
18
|
-
loaded.config = mergeLibraryEntriesIntoConfig(loaded.config, libraryAgents, libraryServers);
|
|
19
|
-
loaded.hash = hashConfig(loaded.config);
|
|
20
|
-
}
|
|
21
|
-
function filterScenarioOverridesToSelectedScenarios(selectedConfig, scenarioServerOverrides) {
|
|
22
|
-
if (!scenarioServerOverrides)
|
|
23
|
-
return undefined;
|
|
24
|
-
const selectedIds = new Set(selectedConfig.scenarios.map((scenario) => scenario.id));
|
|
25
|
-
const filtered = Object.fromEntries(Object.entries(scenarioServerOverrides).filter(([scenarioId]) => selectedIds.has(scenarioId)));
|
|
26
|
-
return Object.keys(filtered).length > 0 ? filtered : undefined;
|
|
27
|
-
}
|
|
7
|
+
import { applyLibraryEntries, filterScenarioOverridesToSelectedScenarios, mergeLibraryEntriesIntoConfig } from './run-queue-executor.js';
|
|
28
8
|
// Backward-compatible exports used by existing tests/imports.
|
|
29
9
|
export function mergeLibraryAgentsIntoConfig(config, libraryAgents) {
|
|
30
10
|
return mergeLibraryEntriesIntoConfig(config, libraryAgents, {});
|
|
@@ -32,55 +12,8 @@ export function mergeLibraryAgentsIntoConfig(config, libraryAgents) {
|
|
|
32
12
|
export function applyLibraryAgents(loaded, libraryAgents) {
|
|
33
13
|
applyLibraryEntries(loaded, libraryAgents, {});
|
|
34
14
|
}
|
|
35
|
-
function toQueueEntry(job) {
|
|
36
|
-
return {
|
|
37
|
-
jobId: job.id,
|
|
38
|
-
status: job.status,
|
|
39
|
-
blockedReason: job.status === 'blocked_auth' ? 'oauth_required' : undefined,
|
|
40
|
-
requiredServers: job.status === 'blocked_auth' ? job.blockedAuthServers ?? [] : undefined,
|
|
41
|
-
runParams: {
|
|
42
|
-
configPath: job.runParams.configPath,
|
|
43
|
-
runsPerScenario: job.runParams.runsPerScenario,
|
|
44
|
-
scenarioIds: job.runParams.scenarioIds ?? null,
|
|
45
|
-
agents: job.runParams.requestedAgents ?? null,
|
|
46
|
-
runNote: job.runParams.runNote ?? null,
|
|
47
|
-
serverOverrideAll: job.runParams.serverOverrideAll ?? null,
|
|
48
|
-
scenarioServerOverrides: job.runParams.scenarioServerOverrides ?? null
|
|
49
|
-
}
|
|
50
|
-
};
|
|
51
|
-
}
|
|
52
|
-
function buildQueueState(jobs, runQueueState) {
|
|
53
|
-
const activeJob = runQueueState.activeJobId ? jobs.get(runQueueState.activeJobId) : null;
|
|
54
|
-
const queuedEntries = runQueueState.queue
|
|
55
|
-
.map((id) => jobs.get(id))
|
|
56
|
-
.filter((j) => !!j && (j.status === 'queued' || j.status === 'blocked_auth'))
|
|
57
|
-
.map((job) => toQueueEntry(job));
|
|
58
|
-
return {
|
|
59
|
-
active: activeJob ? toQueueEntry(activeJob) : null,
|
|
60
|
-
queued: queuedEntries
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
function emitQueueEvent(jobs, runQueueState, deps) {
|
|
64
|
-
const event = {
|
|
65
|
-
type: 'queue_event',
|
|
66
|
-
ts: new Date().toISOString(),
|
|
67
|
-
payload: { event: buildQueueState(jobs, runQueueState) }
|
|
68
|
-
};
|
|
69
|
-
for (const client of Array.from(runQueueState.clients)) {
|
|
70
|
-
if (client.destroyed || client.writableEnded) {
|
|
71
|
-
runQueueState.clients.delete(client);
|
|
72
|
-
continue;
|
|
73
|
-
}
|
|
74
|
-
try {
|
|
75
|
-
deps.sendSseEvent(client, event);
|
|
76
|
-
}
|
|
77
|
-
catch {
|
|
78
|
-
runQueueState.clients.delete(client);
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
15
|
export async function handleRunsRoutes(params) {
|
|
83
|
-
const { req, res, pathname, method, settings,
|
|
16
|
+
const { req, res, pathname, method, settings, runQueueService, oauthSessionManager, deps } = params;
|
|
84
17
|
const { parseBody, asJson, addJobEvent, sendSseEvent, ensureInsideRoot, listRuns, getRunResults, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pickDefaultAssistantAgentName, pkgVersion } = deps;
|
|
85
18
|
if (pathname === '/api/runs' && method === 'GET') {
|
|
86
19
|
const requestUrl = new URL(req.url ?? '/api/runs', 'http://localhost');
|
|
@@ -160,7 +93,7 @@ export async function handleRunsRoutes(params) {
|
|
|
160
93
|
}
|
|
161
94
|
if (pathname.startsWith('/api/runs/jobs/') && pathname.endsWith('/events') && method === 'GET') {
|
|
162
95
|
const jobId = pathname.split('/')[4];
|
|
163
|
-
const job = jobs.get(jobId);
|
|
96
|
+
const job = runQueueService.jobs.get(jobId);
|
|
164
97
|
if (!job) {
|
|
165
98
|
asJson(res, 404, { error: 'Job not found' });
|
|
166
99
|
return true;
|
|
@@ -186,42 +119,16 @@ export async function handleRunsRoutes(params) {
|
|
|
186
119
|
}
|
|
187
120
|
if (pathname.startsWith('/api/runs/jobs/') && pathname.endsWith('/stop') && method === 'POST') {
|
|
188
121
|
const jobId = pathname.split('/')[4];
|
|
189
|
-
const
|
|
190
|
-
if (!
|
|
122
|
+
const result = runQueueService.stopJob(jobId, { hostHeader: req.headers.host });
|
|
123
|
+
if (!result) {
|
|
191
124
|
asJson(res, 404, { error: 'Job not found' });
|
|
192
125
|
return true;
|
|
193
126
|
}
|
|
194
|
-
|
|
195
|
-
const idx = runQueueState.queue.indexOf(jobId);
|
|
196
|
-
if (idx !== -1)
|
|
197
|
-
runQueueState.queue.splice(idx, 1);
|
|
198
|
-
job.status = 'stopped';
|
|
199
|
-
addJobEvent(job, {
|
|
200
|
-
type: 'error',
|
|
201
|
-
ts: new Date().toISOString(),
|
|
202
|
-
payload: { message: 'Run stopped before it started' }
|
|
203
|
-
});
|
|
204
|
-
for (const client of job.clients)
|
|
205
|
-
client.end();
|
|
206
|
-
job.clients.clear();
|
|
207
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
208
|
-
emitWhenIdle: true,
|
|
209
|
-
hostHeader: req.headers.host
|
|
210
|
-
});
|
|
211
|
-
asJson(res, 200, { ok: true, status: 'stopped' });
|
|
212
|
-
return true;
|
|
213
|
-
}
|
|
214
|
-
if (job.status !== 'running') {
|
|
215
|
-
asJson(res, 200, { ok: true, status: job.status });
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
job.abortController.abort();
|
|
219
|
-
job.status = 'stopped';
|
|
220
|
-
asJson(res, 200, { ok: true, status: 'stopped' });
|
|
127
|
+
asJson(res, 200, result);
|
|
221
128
|
return true;
|
|
222
129
|
}
|
|
223
130
|
if (pathname === '/api/runs/queue' && method === 'GET') {
|
|
224
|
-
asJson(res, 200,
|
|
131
|
+
asJson(res, 200, runQueueService.getQueueState());
|
|
225
132
|
return true;
|
|
226
133
|
}
|
|
227
134
|
if (pathname === '/api/runs/queue/events' && method === 'GET') {
|
|
@@ -232,58 +139,27 @@ export async function handleRunsRoutes(params) {
|
|
|
232
139
|
if ('flushHeaders' in res && typeof res.flushHeaders === 'function') {
|
|
233
140
|
res.flushHeaders();
|
|
234
141
|
}
|
|
235
|
-
|
|
236
|
-
type: 'queue_event',
|
|
237
|
-
ts: new Date().toISOString(),
|
|
238
|
-
payload: { event: buildQueueState(jobs, runQueueState) }
|
|
239
|
-
});
|
|
240
|
-
runQueueState.clients.add(res);
|
|
241
|
-
req.on('close', () => {
|
|
242
|
-
runQueueState.clients.delete(res);
|
|
243
|
-
});
|
|
142
|
+
runQueueService.subscribeQueue(req, res);
|
|
244
143
|
return true;
|
|
245
144
|
}
|
|
246
145
|
if (pathname.startsWith('/api/runs/queue/') &&
|
|
247
146
|
method === 'DELETE' &&
|
|
248
147
|
pathname.split('/').length === 5) {
|
|
249
148
|
const jobId = pathname.split('/')[4];
|
|
250
|
-
const
|
|
251
|
-
if (!
|
|
149
|
+
const result = runQueueService.removeQueuedJob(jobId, { hostHeader: req.headers.host });
|
|
150
|
+
if (!result) {
|
|
252
151
|
asJson(res, 404, { error: 'Job not found' });
|
|
253
152
|
return true;
|
|
254
153
|
}
|
|
255
|
-
if (
|
|
256
|
-
asJson(res,
|
|
154
|
+
if ('error' in result) {
|
|
155
|
+
asJson(res, result.statusCode, { error: result.error });
|
|
257
156
|
return true;
|
|
258
157
|
}
|
|
259
|
-
|
|
260
|
-
asJson(res, 404, { error: 'Job is not queued' });
|
|
261
|
-
return true;
|
|
262
|
-
}
|
|
263
|
-
const idx = runQueueState.queue.indexOf(jobId);
|
|
264
|
-
if (idx !== -1)
|
|
265
|
-
runQueueState.queue.splice(idx, 1);
|
|
266
|
-
job.status = 'stopped';
|
|
267
|
-
addJobEvent(job, {
|
|
268
|
-
type: 'error',
|
|
269
|
-
ts: new Date().toISOString(),
|
|
270
|
-
payload: { message: 'Removed from queue by user' }
|
|
271
|
-
});
|
|
272
|
-
for (const client of job.clients)
|
|
273
|
-
client.end();
|
|
274
|
-
job.clients.clear();
|
|
275
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
276
|
-
emitWhenIdle: true,
|
|
277
|
-
hostHeader: req.headers.host
|
|
278
|
-
});
|
|
279
|
-
asJson(res, 200, { ok: true, jobId, status: 'stopped' });
|
|
158
|
+
asJson(res, 200, result);
|
|
280
159
|
return true;
|
|
281
160
|
}
|
|
282
161
|
if (pathname === '/api/runs/queue/resume' && method === 'POST') {
|
|
283
|
-
|
|
284
|
-
emitWhenIdle: true,
|
|
285
|
-
hostHeader: req.headers.host
|
|
286
|
-
});
|
|
162
|
+
runQueueService.resumeBlockedJobs({ hostHeader: req.headers.host });
|
|
287
163
|
asJson(res, 200, { ok: true });
|
|
288
164
|
return true;
|
|
289
165
|
}
|
|
@@ -373,7 +249,6 @@ export async function handleRunsRoutes(params) {
|
|
|
373
249
|
}
|
|
374
250
|
// Resolve lazily in advanceQueue so runtime overrides are always reflected.
|
|
375
251
|
const oauthServerNames = undefined;
|
|
376
|
-
const jobId = `run-${Date.now()}-${randomUUID().slice(0, 8)}`;
|
|
377
252
|
const runParamsObj = {
|
|
378
253
|
configPath,
|
|
379
254
|
runsPerScenario,
|
|
@@ -385,44 +260,8 @@ export async function handleRunsRoutes(params) {
|
|
|
385
260
|
serverOverrideAll,
|
|
386
261
|
scenarioServerOverrides
|
|
387
262
|
};
|
|
388
|
-
const
|
|
389
|
-
|
|
390
|
-
status: 'queued',
|
|
391
|
-
events: [],
|
|
392
|
-
clients: new Set(),
|
|
393
|
-
abortController: new AbortController(),
|
|
394
|
-
runParams: runParamsObj
|
|
395
|
-
};
|
|
396
|
-
jobs.set(jobId, job);
|
|
397
|
-
if (runQueueState.activeJobId) {
|
|
398
|
-
// Another job is running — queue and emit position
|
|
399
|
-
runQueueState.queue.push(jobId);
|
|
400
|
-
addJobEvent(job, {
|
|
401
|
-
type: 'queued',
|
|
402
|
-
ts: new Date().toISOString(),
|
|
403
|
-
payload: {
|
|
404
|
-
configPath,
|
|
405
|
-
runsPerScenario,
|
|
406
|
-
scenarioId: scenarioId ?? null,
|
|
407
|
-
scenarioIds: scenarioIds ?? null,
|
|
408
|
-
agents: requestedAgents ?? null,
|
|
409
|
-
runNote: runNote ?? null,
|
|
410
|
-
serverOverrideAll: serverOverrideAll ?? null,
|
|
411
|
-
scenarioServerOverrides: scenarioServerOverrides ?? null,
|
|
412
|
-
position: runQueueState.queue.length
|
|
413
|
-
}
|
|
414
|
-
});
|
|
415
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
416
|
-
asJson(res, 202, { jobId, queued: true, position: runQueueState.queue.length });
|
|
417
|
-
}
|
|
418
|
-
else {
|
|
419
|
-
// No active job — add to queue and let advanceQueue handle start (with OAuth pre-check)
|
|
420
|
-
runQueueState.queue.push(jobId);
|
|
421
|
-
asJson(res, 202, { jobId });
|
|
422
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
423
|
-
hostHeader: req.headers.host
|
|
424
|
-
});
|
|
425
|
-
}
|
|
263
|
+
const response = runQueueService.enqueueRun(runParamsObj, { hostHeader: req.headers.host });
|
|
264
|
+
asJson(res, 202, response);
|
|
426
265
|
return true;
|
|
427
266
|
}
|
|
428
267
|
if (pathname === '/api/runs/preview' && method === 'POST') {
|
|
@@ -512,6 +351,7 @@ export async function handleRunsRoutes(params) {
|
|
|
512
351
|
configHash: hashConfig(previewConfigBase),
|
|
513
352
|
cliVersion: pkgVersion,
|
|
514
353
|
runsDir: resolve(previewRunsRoot),
|
|
354
|
+
cwd: settings.workspaceRoot,
|
|
515
355
|
mcpServerAuthHeaders
|
|
516
356
|
});
|
|
517
357
|
const scenario = results.scenarios[0];
|
|
@@ -736,549 +576,6 @@ function toCoreExtractRules(extractRules) {
|
|
|
736
576
|
}
|
|
737
577
|
return rules;
|
|
738
578
|
}
|
|
739
|
-
function resolveOAuthServersForJob(job, librariesDir) {
|
|
740
|
-
if (job.runParams.oauthServerNames !== undefined)
|
|
741
|
-
return job.runParams.oauthServerNames;
|
|
742
|
-
try {
|
|
743
|
-
const loaded = loadConfig(job.runParams.configPath, { bundleRoot: librariesDir });
|
|
744
|
-
const libraries = readLibrariesFromStore(librariesDir);
|
|
745
|
-
applyLibraryEntries(loaded, libraries.agents, libraries.servers);
|
|
746
|
-
const selected = job.runParams.scenarioIds?.length
|
|
747
|
-
? selectScenarioIds(loaded.config, job.runParams.scenarioIds)
|
|
748
|
-
: job.runParams.scenarioId
|
|
749
|
-
? selectScenarioIds(loaded.config, [job.runParams.scenarioId])
|
|
750
|
-
: loaded.config;
|
|
751
|
-
const filteredScenarioOverrides = filterScenarioOverridesToSelectedScenarios(selected, job.runParams.scenarioServerOverrides);
|
|
752
|
-
const withOverrides = applyRuntimeServerOverrides(selected, {
|
|
753
|
-
serverOverrideAll: job.runParams.serverOverrideAll,
|
|
754
|
-
scenarioServerOverrides: filteredScenarioOverrides
|
|
755
|
-
});
|
|
756
|
-
const effectiveServers = new Set(withOverrides.scenarios.flatMap((scenario) => scenario.servers));
|
|
757
|
-
const names = Array.from(effectiveServers).filter((name) => {
|
|
758
|
-
const config = withOverrides.servers?.[name];
|
|
759
|
-
return config?.auth?.type === 'oauth_authorization_code';
|
|
760
|
-
});
|
|
761
|
-
job.runParams.oauthServerNames = names;
|
|
762
|
-
return names;
|
|
763
|
-
}
|
|
764
|
-
catch (error) {
|
|
765
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
766
|
-
if (message.includes('Unknown server refs') ||
|
|
767
|
-
message.includes('Unknown scenarios in scenarioServerOverrides') ||
|
|
768
|
-
message.includes('serverOverrideAll must include at least one server id')) {
|
|
769
|
-
throw error;
|
|
770
|
-
}
|
|
771
|
-
console.warn(`[mcplab] Failed to resolve OAuth servers for queued job '${job.id}': ${message}`);
|
|
772
|
-
return [];
|
|
773
|
-
}
|
|
774
|
-
}
|
|
775
|
-
async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, options) {
|
|
776
|
-
if (runQueueState.activeJobId) {
|
|
777
|
-
if (options?.emitWhenIdle)
|
|
778
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
779
|
-
return;
|
|
780
|
-
}
|
|
781
|
-
if (runQueueState.isAdvancingQueue) {
|
|
782
|
-
if (options?.emitWhenIdle)
|
|
783
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
784
|
-
return;
|
|
785
|
-
}
|
|
786
|
-
runQueueState.isAdvancingQueue = true;
|
|
787
|
-
let queueMutated = false;
|
|
788
|
-
try {
|
|
789
|
-
while (runQueueState.queue.length > 0) {
|
|
790
|
-
const nextId = runQueueState.queue[0]; // peek — do not shift yet
|
|
791
|
-
const nextJob = jobs.get(nextId);
|
|
792
|
-
if (!nextJob || (nextJob.status !== 'queued' && nextJob.status !== 'blocked_auth')) {
|
|
793
|
-
runQueueState.queue.shift();
|
|
794
|
-
queueMutated = true;
|
|
795
|
-
continue;
|
|
796
|
-
}
|
|
797
|
-
// Pre-check OAuth before starting
|
|
798
|
-
let oauthServers = [];
|
|
799
|
-
try {
|
|
800
|
-
oauthServers = resolveOAuthServersForJob(nextJob, settings.librariesDir);
|
|
801
|
-
}
|
|
802
|
-
catch (error) {
|
|
803
|
-
runQueueState.queue.shift();
|
|
804
|
-
nextJob.status = 'error';
|
|
805
|
-
deps.addJobEvent(nextJob, {
|
|
806
|
-
type: 'error',
|
|
807
|
-
ts: new Date().toISOString(),
|
|
808
|
-
payload: {
|
|
809
|
-
message: error instanceof Error ? error.message : String(error)
|
|
810
|
-
}
|
|
811
|
-
});
|
|
812
|
-
for (const client of nextJob.clients)
|
|
813
|
-
client.end();
|
|
814
|
-
nextJob.clients.clear();
|
|
815
|
-
queueMutated = true;
|
|
816
|
-
continue;
|
|
817
|
-
}
|
|
818
|
-
if (oauthServers.length > 0) {
|
|
819
|
-
const ensureResult = await oauthSessionManager.ensureServersAuthorized(oauthServers, options?.hostHeader);
|
|
820
|
-
const needsAuth = ensureResult.servers.filter((s) => s.status === 'auth_required');
|
|
821
|
-
if (needsAuth.length > 0) {
|
|
822
|
-
const needsAuthNames = needsAuth.map((s) => s.serverName);
|
|
823
|
-
const wasBlocked = nextJob.status === 'blocked_auth';
|
|
824
|
-
const prevBlockedServers = nextJob.blockedAuthServers ?? [];
|
|
825
|
-
const prevKey = [...prevBlockedServers].sort().join('|');
|
|
826
|
-
const nextKey = [...needsAuthNames].sort().join('|');
|
|
827
|
-
const blockedSetChanged = prevKey !== nextKey;
|
|
828
|
-
nextJob.blockedAuthServers = needsAuthNames; // always refresh to current missing subset
|
|
829
|
-
if (!wasBlocked) {
|
|
830
|
-
nextJob.status = 'blocked_auth';
|
|
831
|
-
}
|
|
832
|
-
if (!wasBlocked || blockedSetChanged) {
|
|
833
|
-
deps.addJobEvent(nextJob, {
|
|
834
|
-
type: 'oauth_required',
|
|
835
|
-
ts: new Date().toISOString(),
|
|
836
|
-
payload: {
|
|
837
|
-
jobId: nextJob.id,
|
|
838
|
-
servers: needsAuthNames,
|
|
839
|
-
message: `OAuth login required for server(s): ${needsAuthNames.join(', ')}.`
|
|
840
|
-
}
|
|
841
|
-
});
|
|
842
|
-
}
|
|
843
|
-
queueMutated = true;
|
|
844
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
845
|
-
return; // pause — frontend must call /api/runs/queue/resume after auth
|
|
846
|
-
}
|
|
847
|
-
const readyServers = ensureResult.servers
|
|
848
|
-
.filter((s) => s.status === 'ready')
|
|
849
|
-
.map((s) => {
|
|
850
|
-
const mode = s.debugState ?? 'unknown';
|
|
851
|
-
return `${s.serverName} (${mode})`;
|
|
852
|
-
});
|
|
853
|
-
if (readyServers.length > 0) {
|
|
854
|
-
deps.addJobEvent(nextJob, {
|
|
855
|
-
type: 'log',
|
|
856
|
-
ts: new Date().toISOString(),
|
|
857
|
-
payload: {
|
|
858
|
-
message: `OAuth credentials ready for queued run: ${readyServers.join(', ')}`
|
|
859
|
-
}
|
|
860
|
-
});
|
|
861
|
-
}
|
|
862
|
-
}
|
|
863
|
-
// OAuth ready (or not required) — start the job
|
|
864
|
-
runQueueState.queue.shift();
|
|
865
|
-
nextJob.status = 'running';
|
|
866
|
-
runQueueState.activeJobId = nextId;
|
|
867
|
-
deps.addJobEvent(nextJob, {
|
|
868
|
-
type: 'started',
|
|
869
|
-
ts: new Date().toISOString(),
|
|
870
|
-
payload: {
|
|
871
|
-
configPath: nextJob.runParams.configPath,
|
|
872
|
-
runsPerScenario: nextJob.runParams.runsPerScenario,
|
|
873
|
-
scenarioId: nextJob.runParams.scenarioId ?? null,
|
|
874
|
-
scenarioIds: nextJob.runParams.scenarioIds ?? null,
|
|
875
|
-
agents: nextJob.runParams.requestedAgents ?? null,
|
|
876
|
-
runNote: nextJob.runParams.runNote ?? null,
|
|
877
|
-
serverOverrideAll: nextJob.runParams.serverOverrideAll ?? null,
|
|
878
|
-
scenarioServerOverrides: nextJob.runParams.scenarioServerOverrides ?? null
|
|
879
|
-
}
|
|
880
|
-
});
|
|
881
|
-
queueMutated = true;
|
|
882
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
883
|
-
void executeRunJob(nextJob, settings, jobs, runQueueState, oauthSessionManager, deps);
|
|
884
|
-
return;
|
|
885
|
-
}
|
|
886
|
-
if (queueMutated || options?.emitWhenIdle) {
|
|
887
|
-
emitQueueEvent(jobs, runQueueState, deps);
|
|
888
|
-
}
|
|
889
|
-
}
|
|
890
|
-
finally {
|
|
891
|
-
runQueueState.isAdvancingQueue = false;
|
|
892
|
-
}
|
|
893
|
-
}
|
|
894
|
-
async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionManager, deps) {
|
|
895
|
-
const { addJobEvent, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pkgVersion } = deps;
|
|
896
|
-
const { configPath, runsPerScenario, scenarioId, scenarioIds, requestedAgents, runNote, serverOverrideAll, scenarioServerOverrides } = job.runParams;
|
|
897
|
-
try {
|
|
898
|
-
addJobEvent(job, {
|
|
899
|
-
type: 'log',
|
|
900
|
-
ts: new Date().toISOString(),
|
|
901
|
-
payload: { message: `Loading MCP Evaluation config: ${configPath}` }
|
|
902
|
-
});
|
|
903
|
-
const loaded = loadConfig(configPath, { bundleRoot: settings.librariesDir });
|
|
904
|
-
const { agents: libraryAgents, servers: libraryServers } = readLibraries(settings.librariesDir);
|
|
905
|
-
applyLibraryEntries(loaded, libraryAgents, libraryServers);
|
|
906
|
-
addJobEvent(job, {
|
|
907
|
-
type: 'log',
|
|
908
|
-
ts: new Date().toISOString(),
|
|
909
|
-
payload: {
|
|
910
|
-
message: `Loaded config (${loaded.config.scenarios.length} scenario(s), ${Object.keys(loaded.config.agents ?? {}).length} agent(s), ${Object.keys(loaded.config.servers ?? {}).length} server(s))`
|
|
911
|
-
}
|
|
912
|
-
});
|
|
913
|
-
for (const warning of loaded.warnings ?? []) {
|
|
914
|
-
addJobEvent(job, {
|
|
915
|
-
type: 'log',
|
|
916
|
-
ts: new Date().toISOString(),
|
|
917
|
-
payload: { message: warning }
|
|
918
|
-
});
|
|
919
|
-
}
|
|
920
|
-
addJobEvent(job, {
|
|
921
|
-
type: 'log',
|
|
922
|
-
ts: new Date().toISOString(),
|
|
923
|
-
payload: {
|
|
924
|
-
message: scenarioIds && scenarioIds.length > 0
|
|
925
|
-
? `Selecting requested scenarios: ${scenarioIds.join(', ')}`
|
|
926
|
-
: scenarioId
|
|
927
|
-
? `Selecting requested scenario: ${scenarioId}`
|
|
928
|
-
: 'Using all scenarios from config'
|
|
929
|
-
}
|
|
930
|
-
});
|
|
931
|
-
const selectedBaseScenarios = selectScenarioIds(loaded.config, scenarioIds && scenarioIds.length > 0 ? scenarioIds : scenarioId ? [scenarioId] : undefined);
|
|
932
|
-
addJobEvent(job, {
|
|
933
|
-
type: 'log',
|
|
934
|
-
ts: new Date().toISOString(),
|
|
935
|
-
payload: {
|
|
936
|
-
message: `Selected ${selectedBaseScenarios.scenarios.length} base scenario(s)`
|
|
937
|
-
}
|
|
938
|
-
});
|
|
939
|
-
const filteredScenarioOverrides = filterScenarioOverridesToSelectedScenarios(selectedBaseScenarios, scenarioServerOverrides);
|
|
940
|
-
const runtimeOverriddenConfig = applyRuntimeServerOverrides(selectedBaseScenarios, {
|
|
941
|
-
serverOverrideAll,
|
|
942
|
-
scenarioServerOverrides: filteredScenarioOverrides
|
|
943
|
-
});
|
|
944
|
-
const effectiveConfigHash = hashConfig(runtimeOverriddenConfig);
|
|
945
|
-
addJobEvent(job, {
|
|
946
|
-
type: 'log',
|
|
947
|
-
ts: new Date().toISOString(),
|
|
948
|
-
payload: {
|
|
949
|
-
message: `Applied runtime server overrides: global=${serverOverrideAll?.length ?? 0} scenario-specific=${Object.keys(filteredScenarioOverrides ?? {}).length}`
|
|
950
|
-
}
|
|
951
|
-
});
|
|
952
|
-
const effectiveScenarioServers = runtimeOverriddenConfig.scenarios
|
|
953
|
-
.map((scenario) => `${scenario.id}=[${scenario.servers.join(', ')}]`)
|
|
954
|
-
.join('; ');
|
|
955
|
-
addJobEvent(job, {
|
|
956
|
-
type: 'log',
|
|
957
|
-
ts: new Date().toISOString(),
|
|
958
|
-
payload: {
|
|
959
|
-
message: `Effective MCP servers per scenario: ${effectiveScenarioServers || '(none)'}`
|
|
960
|
-
}
|
|
961
|
-
});
|
|
962
|
-
const resolvedAgents = resolveRunSelectedAgents(runtimeOverriddenConfig, requestedAgents);
|
|
963
|
-
const resolvedAgentList = Array.isArray(resolvedAgents) ? resolvedAgents : [];
|
|
964
|
-
addJobEvent(job, {
|
|
965
|
-
type: 'log',
|
|
966
|
-
ts: new Date().toISOString(),
|
|
967
|
-
payload: {
|
|
968
|
-
message: requestedAgents && requestedAgents.length > 0
|
|
969
|
-
? `Using requested agents: ${resolvedAgentList.join(', ')}`
|
|
970
|
-
: `Using resolved default agents: ${resolvedAgentList.join(', ')}`
|
|
971
|
-
}
|
|
972
|
-
});
|
|
973
|
-
const expandedConfig = expandConfigForAgents(runtimeOverriddenConfig, resolvedAgents);
|
|
974
|
-
addJobEvent(job, {
|
|
975
|
-
type: 'log',
|
|
976
|
-
ts: new Date().toISOString(),
|
|
977
|
-
payload: {
|
|
978
|
-
message: `Expanded to ${expandedConfig.scenarios.length} executable scenario run(s) across selected agents`
|
|
979
|
-
}
|
|
980
|
-
});
|
|
981
|
-
const usedServerNames = new Set(expandedConfig.scenarios.flatMap((scenario) => scenario.servers));
|
|
982
|
-
const oauthServers = Array.from(usedServerNames).filter((serverName) => expandedConfig.servers[serverName]?.auth?.type === 'oauth_authorization_code');
|
|
983
|
-
const oauthServerSet = new Set(oauthServers);
|
|
984
|
-
const mcpServerAuthHeaders = oauthServers.length > 0
|
|
985
|
-
? await oauthSessionManager.getAuthHeadersForServers(oauthServers, undefined)
|
|
986
|
-
: undefined;
|
|
987
|
-
if (oauthServers.length > 0) {
|
|
988
|
-
addJobEvent(job, {
|
|
989
|
-
type: 'log',
|
|
990
|
-
ts: new Date().toISOString(),
|
|
991
|
-
payload: {
|
|
992
|
-
message: `OAuth runtime credentials resolved for server(s): ${oauthServers.join(', ')}`
|
|
993
|
-
}
|
|
994
|
-
});
|
|
995
|
-
}
|
|
996
|
-
const cwdBefore = process.cwd();
|
|
997
|
-
process.chdir(settings.workspaceRoot);
|
|
998
|
-
try {
|
|
999
|
-
addJobEvent(job, {
|
|
1000
|
-
type: 'log',
|
|
1001
|
-
ts: new Date().toISOString(),
|
|
1002
|
-
payload: {
|
|
1003
|
-
message: `Running evaluation (${runsPerScenario} run(s) per scenario) ...`
|
|
1004
|
-
}
|
|
1005
|
-
});
|
|
1006
|
-
if (runNote) {
|
|
1007
|
-
addJobEvent(job, {
|
|
1008
|
-
type: 'log',
|
|
1009
|
-
ts: new Date().toISOString(),
|
|
1010
|
-
payload: { message: `Run note: ${runNote}` }
|
|
1011
|
-
});
|
|
1012
|
-
}
|
|
1013
|
-
const { runDir, results } = await runAll(expandedConfig, {
|
|
1014
|
-
runsPerScenario,
|
|
1015
|
-
scenarioId,
|
|
1016
|
-
runNote,
|
|
1017
|
-
configHash: effectiveConfigHash,
|
|
1018
|
-
cliVersion: pkgVersion,
|
|
1019
|
-
runsDir: settings.runsDir,
|
|
1020
|
-
mcpServerAuthHeaders,
|
|
1021
|
-
resolveMcpServerAuthHeaders: oauthServers.length > 0
|
|
1022
|
-
? async (serverNames, options) => {
|
|
1023
|
-
if (options?.signal?.aborted)
|
|
1024
|
-
return {};
|
|
1025
|
-
const namesToRefresh = serverNames.filter((name) => oauthServerSet.has(name));
|
|
1026
|
-
if (namesToRefresh.length === 0)
|
|
1027
|
-
return {};
|
|
1028
|
-
return oauthSessionManager.getAuthHeadersForServers(namesToRefresh);
|
|
1029
|
-
}
|
|
1030
|
-
: undefined,
|
|
1031
|
-
signal: job.abortController.signal,
|
|
1032
|
-
onProgress: async (event) => {
|
|
1033
|
-
const message = formatRunProgressMessage(event);
|
|
1034
|
-
if (!message)
|
|
1035
|
-
return;
|
|
1036
|
-
addJobEvent(job, {
|
|
1037
|
-
type: 'log',
|
|
1038
|
-
ts: new Date().toISOString(),
|
|
1039
|
-
payload: { message }
|
|
1040
|
-
});
|
|
1041
|
-
}
|
|
1042
|
-
});
|
|
1043
|
-
const relativeConfigPathRaw = relative(settings.evalsDir, configPath);
|
|
1044
|
-
const relativeConfigPath = relativeConfigPathRaw.replace(/\\/g, '/').replace(/^\.\/+/, '');
|
|
1045
|
-
results.metadata.config_path = relativeConfigPath || configPath;
|
|
1046
|
-
if (loaded.config.name && loaded.config.name.trim().length > 0) {
|
|
1047
|
-
results.metadata.config_name = loaded.config.name.trim();
|
|
1048
|
-
}
|
|
1049
|
-
results.metadata.rerun_agents = [...resolvedAgentList];
|
|
1050
|
-
results.metadata.rerun_scenario_ids = selectedBaseScenarios.scenarios.map((scenario) => scenario.id);
|
|
1051
|
-
if (serverOverrideAll && serverOverrideAll.length > 0) {
|
|
1052
|
-
results.metadata.rerun_server_override_all = [...serverOverrideAll];
|
|
1053
|
-
}
|
|
1054
|
-
else {
|
|
1055
|
-
delete results.metadata.rerun_server_override_all;
|
|
1056
|
-
}
|
|
1057
|
-
if (filteredScenarioOverrides && Object.keys(filteredScenarioOverrides).length > 0) {
|
|
1058
|
-
results.metadata.rerun_scenario_server_overrides = Object.fromEntries(Object.entries(filteredScenarioOverrides).map(([scenarioKey, serverIds]) => [
|
|
1059
|
-
scenarioKey,
|
|
1060
|
-
[...serverIds]
|
|
1061
|
-
]));
|
|
1062
|
-
}
|
|
1063
|
-
else {
|
|
1064
|
-
delete results.metadata.rerun_scenario_server_overrides;
|
|
1065
|
-
}
|
|
1066
|
-
addJobEvent(job, {
|
|
1067
|
-
type: 'log',
|
|
1068
|
-
ts: new Date().toISOString(),
|
|
1069
|
-
payload: {
|
|
1070
|
-
message: `Evaluation execution finished (run id: ${results.metadata.run_id})`
|
|
1071
|
-
}
|
|
1072
|
-
});
|
|
1073
|
-
addJobEvent(job, {
|
|
1074
|
-
type: 'log',
|
|
1075
|
-
ts: new Date().toISOString(),
|
|
1076
|
-
payload: { message: `Writing results to ${runDir}` }
|
|
1077
|
-
});
|
|
1078
|
-
const traceRecords = getScenarioRunTraceRecords(results.metadata.run_id, settings.runsDir);
|
|
1079
|
-
results.metadata.tool_tokens_total = estimateRunToolTokensTotal(traceRecords);
|
|
1080
|
-
writeFileSync(join(runDir, 'results.json'), `${JSON.stringify(results, null, 2)}\n`, 'utf8');
|
|
1081
|
-
writeFileSync(join(runDir, 'report.html'), renderReport(results), 'utf8');
|
|
1082
|
-
writeFileSync(join(runDir, 'summary.md'), renderSummaryMarkdown(results), 'utf8');
|
|
1083
|
-
addJobEvent(job, {
|
|
1084
|
-
type: 'log',
|
|
1085
|
-
ts: new Date().toISOString(),
|
|
1086
|
-
payload: {
|
|
1087
|
-
message: `Run finished: ${results.summary.total_runs} run(s), pass rate ${Math.round(results.summary.pass_rate * 100)}%`
|
|
1088
|
-
}
|
|
1089
|
-
});
|
|
1090
|
-
addJobEvent(job, {
|
|
1091
|
-
type: 'completed',
|
|
1092
|
-
ts: new Date().toISOString(),
|
|
1093
|
-
payload: {
|
|
1094
|
-
runId: results.metadata.run_id,
|
|
1095
|
-
runDir,
|
|
1096
|
-
summary: results.summary
|
|
1097
|
-
}
|
|
1098
|
-
});
|
|
1099
|
-
job.status = 'completed';
|
|
1100
|
-
}
|
|
1101
|
-
finally {
|
|
1102
|
-
process.chdir(cwdBefore);
|
|
1103
|
-
}
|
|
1104
|
-
}
|
|
1105
|
-
catch (error) {
|
|
1106
|
-
if (error instanceof OAuthAuthorizationRequiredError) {
|
|
1107
|
-
const blockedServers = Array.from(new Set(error.details.map((detail) => detail.serverName).filter(Boolean)));
|
|
1108
|
-
const aborted = job.abortController.signal.aborted || job.status === 'stopped';
|
|
1109
|
-
if (!aborted && blockedServers.length > 0) {
|
|
1110
|
-
job.blockedAuthServers = blockedServers;
|
|
1111
|
-
job.status = 'blocked_auth';
|
|
1112
|
-
if (!runQueueState.queue.includes(job.id)) {
|
|
1113
|
-
runQueueState.queue.unshift(job.id);
|
|
1114
|
-
}
|
|
1115
|
-
addJobEvent(job, {
|
|
1116
|
-
type: 'oauth_required',
|
|
1117
|
-
ts: new Date().toISOString(),
|
|
1118
|
-
payload: {
|
|
1119
|
-
jobId: job.id,
|
|
1120
|
-
servers: blockedServers,
|
|
1121
|
-
message: `OAuth login required for server(s): ${blockedServers.join(', ')}.`
|
|
1122
|
-
}
|
|
1123
|
-
});
|
|
1124
|
-
return;
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
const normalizedError = error instanceof OAuthAuthorizationRequiredError
|
|
1128
|
-
? new Error(error.details[0]?.message || error.message)
|
|
1129
|
-
: error;
|
|
1130
|
-
const aborted = job.abortController.signal.aborted || job.status === 'stopped';
|
|
1131
|
-
addJobEvent(job, {
|
|
1132
|
-
type: 'error',
|
|
1133
|
-
ts: new Date().toISOString(),
|
|
1134
|
-
payload: {
|
|
1135
|
-
message: aborted
|
|
1136
|
-
? 'Run aborted by user'
|
|
1137
|
-
: normalizedError instanceof Error
|
|
1138
|
-
? normalizedError.message
|
|
1139
|
-
: String(normalizedError)
|
|
1140
|
-
}
|
|
1141
|
-
});
|
|
1142
|
-
job.status = aborted ? 'stopped' : 'error';
|
|
1143
|
-
}
|
|
1144
|
-
finally {
|
|
1145
|
-
runQueueState.activeJobId = null;
|
|
1146
|
-
if (job.status !== 'blocked_auth') {
|
|
1147
|
-
for (const client of job.clients)
|
|
1148
|
-
client.end();
|
|
1149
|
-
job.clients.clear();
|
|
1150
|
-
}
|
|
1151
|
-
void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
|
|
1152
|
-
emitWhenIdle: true
|
|
1153
|
-
}).catch((error) => {
|
|
1154
|
-
console.warn(`[mcplab] Failed to advance run queue after job '${job.id}': ${error instanceof Error ? error.message : String(error)}`);
|
|
1155
|
-
});
|
|
1156
|
-
pruneOldJobs(jobs, runQueueState);
|
|
1157
|
-
}
|
|
1158
|
-
}
|
|
1159
|
-
function splitInteger(total, parts) {
|
|
1160
|
-
if (!Number.isFinite(total) || !parts || parts <= 0)
|
|
1161
|
-
return Array(parts).fill(0);
|
|
1162
|
-
const safeTotal = Math.max(0, Math.round(total ?? 0));
|
|
1163
|
-
const base = Math.floor(safeTotal / parts);
|
|
1164
|
-
let remainder = safeTotal % parts;
|
|
1165
|
-
return Array.from({ length: parts }, () => {
|
|
1166
|
-
const value = base + (remainder > 0 ? 1 : 0);
|
|
1167
|
-
if (remainder > 0)
|
|
1168
|
-
remainder -= 1;
|
|
1169
|
-
return value;
|
|
1170
|
-
});
|
|
1171
|
-
}
|
|
1172
|
-
function estimateRunToolTokensTotal(records) {
|
|
1173
|
-
let total = 0;
|
|
1174
|
-
let hasAny = false;
|
|
1175
|
-
for (const record of records) {
|
|
1176
|
-
const toolUsesById = new Map();
|
|
1177
|
-
for (const message of record.messages ?? []) {
|
|
1178
|
-
const toolUses = message.content.filter((block) => block.type === 'tool_use');
|
|
1179
|
-
if (toolUses.length > 0) {
|
|
1180
|
-
for (const toolUse of toolUses)
|
|
1181
|
-
toolUsesById.set(toolUse.id, toolUse.name);
|
|
1182
|
-
const allEstimated = toolUses.every((toolUse) => Boolean(toolUse.estimated_tokens));
|
|
1183
|
-
if (allEstimated) {
|
|
1184
|
-
for (const toolUse of toolUses)
|
|
1185
|
-
total += toolUse.estimated_tokens?.total ?? 0;
|
|
1186
|
-
hasAny = true;
|
|
1187
|
-
}
|
|
1188
|
-
else if (toolUses.length === 1 && typeof message.usage?.total_tokens === 'number') {
|
|
1189
|
-
total += message.usage.total_tokens;
|
|
1190
|
-
hasAny = true;
|
|
1191
|
-
}
|
|
1192
|
-
else {
|
|
1193
|
-
const shares = splitInteger(message.usage?.total_tokens, toolUses.length);
|
|
1194
|
-
total += shares.reduce((sum, value) => sum + value, 0);
|
|
1195
|
-
if (typeof message.usage?.total_tokens === 'number')
|
|
1196
|
-
hasAny = true;
|
|
1197
|
-
}
|
|
1198
|
-
}
|
|
1199
|
-
const toolResults = message.content.filter((block) => block.type === 'tool_result');
|
|
1200
|
-
if (toolResults.length === 0)
|
|
1201
|
-
continue;
|
|
1202
|
-
const allEstimated = toolResults.every((result) => Boolean(result.estimated_tokens));
|
|
1203
|
-
if (allEstimated) {
|
|
1204
|
-
for (const result of toolResults)
|
|
1205
|
-
total += result.estimated_tokens?.total ?? 0;
|
|
1206
|
-
hasAny = true;
|
|
1207
|
-
continue;
|
|
1208
|
-
}
|
|
1209
|
-
if (toolResults.length === 1) {
|
|
1210
|
-
const [result] = toolResults;
|
|
1211
|
-
if (result &&
|
|
1212
|
-
toolUsesById.has(result.tool_use_id) &&
|
|
1213
|
-
typeof message.usage?.total_tokens === 'number') {
|
|
1214
|
-
total += message.usage.total_tokens;
|
|
1215
|
-
hasAny = true;
|
|
1216
|
-
continue;
|
|
1217
|
-
}
|
|
1218
|
-
}
|
|
1219
|
-
const knownResults = toolResults.filter((result) => toolUsesById.has(result.tool_use_id));
|
|
1220
|
-
if (knownResults.length === 0)
|
|
1221
|
-
continue;
|
|
1222
|
-
const shares = splitInteger(message.usage?.total_tokens, knownResults.length);
|
|
1223
|
-
total += shares.reduce((sum, value) => sum + value, 0);
|
|
1224
|
-
if (typeof message.usage?.total_tokens === 'number')
|
|
1225
|
-
hasAny = true;
|
|
1226
|
-
}
|
|
1227
|
-
}
|
|
1228
|
-
return hasAny ? total : null;
|
|
1229
|
-
}
|
|
1230
|
-
function pruneOldJobs(jobs, runQueueState) {
|
|
1231
|
-
const maxAgeMs = 30 * 60_000;
|
|
1232
|
-
const now = Date.now();
|
|
1233
|
-
const activeIds = new Set([runQueueState.activeJobId, ...runQueueState.queue].filter(Boolean));
|
|
1234
|
-
for (const [id, job] of jobs) {
|
|
1235
|
-
if (activeIds.has(id))
|
|
1236
|
-
continue;
|
|
1237
|
-
if (job.status !== 'completed' && job.status !== 'error' && job.status !== 'stopped')
|
|
1238
|
-
continue;
|
|
1239
|
-
const lastEvent = job.events[job.events.length - 1];
|
|
1240
|
-
if (!lastEvent)
|
|
1241
|
-
continue;
|
|
1242
|
-
if (now - new Date(lastEvent.ts).getTime() > maxAgeMs) {
|
|
1243
|
-
jobs.delete(id);
|
|
1244
|
-
}
|
|
1245
|
-
}
|
|
1246
|
-
}
|
|
1247
|
-
function formatRunProgressMessage(event) {
|
|
1248
|
-
switch (event.type) {
|
|
1249
|
-
case 'run_started':
|
|
1250
|
-
return `Run initialized (id: ${event.runId}, ${event.totalScenarioRuns} scenario run(s))`;
|
|
1251
|
-
case 'mcp_connect_started':
|
|
1252
|
-
return `Connecting to ${event.serverCount} MCP server(s): ${event.serverNames.join(', ')} ...`;
|
|
1253
|
-
case 'mcp_connect_finished':
|
|
1254
|
-
return `Connected to ${event.serverCount} MCP server(s): ${event.serverNames.join(', ')}`;
|
|
1255
|
-
case 'scenario_run_started':
|
|
1256
|
-
return `Scenario ${event.scenarioRunIndex}/${event.totalScenarioRuns} started: ${event.scenarioId} [agent=${event.agentName}, run=${event.runIndex + 1}/${event.runsPerScenario}]`;
|
|
1257
|
-
case 'scenario_run_finished':
|
|
1258
|
-
return `Scenario ${event.scenarioRunIndex}/${event.totalScenarioRuns} finished: ${event.scenarioId} [agent=${event.agentName}] -> ${event.pass ? 'PASS' : 'FAIL'} (${event.toolCallCount} tool call(s))`;
|
|
1259
|
-
case 'agent_progress': {
|
|
1260
|
-
const p = event.event;
|
|
1261
|
-
switch (p.type) {
|
|
1262
|
-
case 'llm_request_started':
|
|
1263
|
-
return `LLM turn ${p.turn + 1} started for ${p.scenarioId} [${p.agentName}] (${p.provider}/${p.model})`;
|
|
1264
|
-
case 'llm_response_received':
|
|
1265
|
-
return `LLM turn ${p.turn + 1} response for ${p.scenarioId} [${p.agentName}] (text=${p.hasText ? 'yes' : 'no'}, tool_calls=${p.toolCallCount})`;
|
|
1266
|
-
case 'tool_call_started':
|
|
1267
|
-
return `Tool call started: ${p.server}.${p.tool} (turn ${p.turn + 1})`;
|
|
1268
|
-
case 'tool_call_finished':
|
|
1269
|
-
return `Tool call ${p.ok ? 'finished' : 'failed'}: ${p.server}.${p.tool} in ${p.durationMs}ms`;
|
|
1270
|
-
case 'final_answer':
|
|
1271
|
-
return `Final answer produced for ${p.scenarioId} [${p.agentName}] (text=${p.hasText ? 'yes' : 'no'})`;
|
|
1272
|
-
default:
|
|
1273
|
-
return null;
|
|
1274
|
-
}
|
|
1275
|
-
}
|
|
1276
|
-
case 'run_finished':
|
|
1277
|
-
return `Run finished (id: ${event.runId})`;
|
|
1278
|
-
default:
|
|
1279
|
-
return null;
|
|
1280
|
-
}
|
|
1281
|
-
}
|
|
1282
579
|
function localMcplabMcpUrl() {
|
|
1283
580
|
const host = process.env.MCP_HOST || '127.0.0.1';
|
|
1284
581
|
const port = process.env.MCP_PORT || '3011';
|