@inspectr/mcplab 1.19.0 → 1.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/app/assets/index-D1aWsOm8.js +254 -0
  2. package/dist/app/index.html +1 -1
  3. package/dist/app-server/app-context.d.ts +1 -10
  4. package/dist/app-server/app-context.d.ts.map +1 -1
  5. package/dist/app-server/app-dist.d.ts +8 -0
  6. package/dist/app-server/app-dist.d.ts.map +1 -0
  7. package/dist/app-server/app-dist.js +13 -0
  8. package/dist/app-server/app-dist.js.map +1 -0
  9. package/dist/app-server/browser-launch.d.ts +8 -0
  10. package/dist/app-server/browser-launch.d.ts.map +1 -0
  11. package/dist/app-server/browser-launch.js +15 -0
  12. package/dist/app-server/browser-launch.js.map +1 -0
  13. package/dist/app-server/router.d.ts.map +1 -1
  14. package/dist/app-server/router.js +36 -65
  15. package/dist/app-server/router.js.map +1 -1
  16. package/dist/app-server/run-agent-selection.d.ts +6 -0
  17. package/dist/app-server/run-agent-selection.d.ts.map +1 -0
  18. package/dist/app-server/run-agent-selection.js +9 -0
  19. package/dist/app-server/run-agent-selection.js.map +1 -0
  20. package/dist/app-server/run-queue-domain.d.ts +80 -0
  21. package/dist/app-server/run-queue-domain.d.ts.map +1 -0
  22. package/dist/app-server/run-queue-domain.js +396 -0
  23. package/dist/app-server/run-queue-domain.js.map +1 -0
  24. package/dist/app-server/run-queue-events.d.ts +16 -0
  25. package/dist/app-server/run-queue-events.d.ts.map +1 -0
  26. package/dist/app-server/run-queue-events.js +65 -0
  27. package/dist/app-server/run-queue-events.js.map +1 -0
  28. package/dist/app-server/run-queue-executor.d.ts +49 -0
  29. package/dist/app-server/run-queue-executor.d.ts.map +1 -0
  30. package/dist/app-server/run-queue-executor.js +443 -0
  31. package/dist/app-server/run-queue-executor.js.map +1 -0
  32. package/dist/app-server/run-queue-state.d.ts +58 -0
  33. package/dist/app-server/run-queue-state.d.ts.map +1 -0
  34. package/dist/app-server/run-queue-state.js +18 -0
  35. package/dist/app-server/run-queue-state.js.map +1 -0
  36. package/dist/app-server/runs-routes.d.ts +3 -38
  37. package/dist/app-server/runs-routes.d.ts.map +1 -1
  38. package/dist/app-server/runs-routes.js +18 -721
  39. package/dist/app-server/runs-routes.js.map +1 -1
  40. package/dist/app-server/runs-routes.test-helpers.d.ts +47 -0
  41. package/dist/app-server/runs-routes.test-helpers.d.ts.map +1 -0
  42. package/dist/app-server/runs-routes.test-helpers.js +107 -0
  43. package/dist/app-server/runs-routes.test-helpers.js.map +1 -0
  44. package/dist/app-server/runs-store.d.ts +34 -28
  45. package/dist/app-server/settings-store.d.ts +1 -0
  46. package/dist/app-server/settings-store.d.ts.map +1 -1
  47. package/dist/app-server/settings-store.js +33 -0
  48. package/dist/app-server/settings-store.js.map +1 -1
  49. package/dist/app-server/tool-analysis.d.ts.map +1 -1
  50. package/dist/app-server/tool-analysis.js +1 -7
  51. package/dist/app-server/tool-analysis.js.map +1 -1
  52. package/dist/app-server/types.d.ts +1 -0
  53. package/dist/app-server/types.d.ts.map +1 -1
  54. package/dist/app-server/version-info.d.ts +11 -0
  55. package/dist/app-server/version-info.d.ts.map +1 -0
  56. package/dist/app-server/version-info.js +20 -0
  57. package/dist/app-server/version-info.js.map +1 -0
  58. package/dist/app-server/workspace-paths.d.ts +4 -0
  59. package/dist/app-server/workspace-paths.d.ts.map +1 -0
  60. package/dist/app-server/workspace-paths.js +11 -0
  61. package/dist/app-server/workspace-paths.js.map +1 -0
  62. package/dist/cli.js +1 -0
  63. package/dist/cli.js.map +1 -1
  64. package/package.json +5 -5
  65. package/dist/app/assets/index-DHV4p_lQ.js +0 -254
@@ -1,30 +1,10 @@
1
- import { randomUUID } from 'node:crypto';
2
1
  import { existsSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
3
2
  import { tmpdir } from 'node:os';
4
- import { isAbsolute, join, relative, resolve } from 'node:path';
3
+ import { isAbsolute, join, resolve } from 'node:path';
5
4
  import { McpClientManager, loadConfig, hashConfig, runAll, renderSummaryMarkdown, applyRuntimeServerOverrides } from '@inspectr/mcplab-core';
6
5
  import { renderReport } from '@inspectr/mcplab-reporting';
7
6
  import { OAuthAuthorizationRequiredError } from './oauth-session-manager.js';
8
- import { selectScenarioIds } from './runs-store.js';
9
- import { readLibraries as readLibrariesFromStore } from './libraries-store.js';
10
- export function mergeLibraryEntriesIntoConfig(config, libraryAgents, libraryServers) {
11
- return {
12
- ...config,
13
- agents: { ...libraryAgents, ...config.agents },
14
- servers: { ...libraryServers, ...config.servers }
15
- };
16
- }
17
- export function applyLibraryEntries(loaded, libraryAgents, libraryServers) {
18
- loaded.config = mergeLibraryEntriesIntoConfig(loaded.config, libraryAgents, libraryServers);
19
- loaded.hash = hashConfig(loaded.config);
20
- }
21
- function filterScenarioOverridesToSelectedScenarios(selectedConfig, scenarioServerOverrides) {
22
- if (!scenarioServerOverrides)
23
- return undefined;
24
- const selectedIds = new Set(selectedConfig.scenarios.map((scenario) => scenario.id));
25
- const filtered = Object.fromEntries(Object.entries(scenarioServerOverrides).filter(([scenarioId]) => selectedIds.has(scenarioId)));
26
- return Object.keys(filtered).length > 0 ? filtered : undefined;
27
- }
7
+ import { applyLibraryEntries, filterScenarioOverridesToSelectedScenarios, mergeLibraryEntriesIntoConfig } from './run-queue-executor.js';
28
8
  // Backward-compatible exports used by existing tests/imports.
29
9
  export function mergeLibraryAgentsIntoConfig(config, libraryAgents) {
30
10
  return mergeLibraryEntriesIntoConfig(config, libraryAgents, {});
@@ -32,55 +12,8 @@ export function mergeLibraryAgentsIntoConfig(config, libraryAgents) {
32
12
  export function applyLibraryAgents(loaded, libraryAgents) {
33
13
  applyLibraryEntries(loaded, libraryAgents, {});
34
14
  }
35
- function toQueueEntry(job) {
36
- return {
37
- jobId: job.id,
38
- status: job.status,
39
- blockedReason: job.status === 'blocked_auth' ? 'oauth_required' : undefined,
40
- requiredServers: job.status === 'blocked_auth' ? job.blockedAuthServers ?? [] : undefined,
41
- runParams: {
42
- configPath: job.runParams.configPath,
43
- runsPerScenario: job.runParams.runsPerScenario,
44
- scenarioIds: job.runParams.scenarioIds ?? null,
45
- agents: job.runParams.requestedAgents ?? null,
46
- runNote: job.runParams.runNote ?? null,
47
- serverOverrideAll: job.runParams.serverOverrideAll ?? null,
48
- scenarioServerOverrides: job.runParams.scenarioServerOverrides ?? null
49
- }
50
- };
51
- }
52
- function buildQueueState(jobs, runQueueState) {
53
- const activeJob = runQueueState.activeJobId ? jobs.get(runQueueState.activeJobId) : null;
54
- const queuedEntries = runQueueState.queue
55
- .map((id) => jobs.get(id))
56
- .filter((j) => !!j && (j.status === 'queued' || j.status === 'blocked_auth'))
57
- .map((job) => toQueueEntry(job));
58
- return {
59
- active: activeJob ? toQueueEntry(activeJob) : null,
60
- queued: queuedEntries
61
- };
62
- }
63
- function emitQueueEvent(jobs, runQueueState, deps) {
64
- const event = {
65
- type: 'queue_event',
66
- ts: new Date().toISOString(),
67
- payload: { event: buildQueueState(jobs, runQueueState) }
68
- };
69
- for (const client of Array.from(runQueueState.clients)) {
70
- if (client.destroyed || client.writableEnded) {
71
- runQueueState.clients.delete(client);
72
- continue;
73
- }
74
- try {
75
- deps.sendSseEvent(client, event);
76
- }
77
- catch {
78
- runQueueState.clients.delete(client);
79
- }
80
- }
81
- }
82
15
  export async function handleRunsRoutes(params) {
83
- const { req, res, pathname, method, settings, jobs, runQueueState, oauthSessionManager, deps } = params;
16
+ const { req, res, pathname, method, settings, runQueueService, oauthSessionManager, deps } = params;
84
17
  const { parseBody, asJson, addJobEvent, sendSseEvent, ensureInsideRoot, listRuns, getRunResults, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pickDefaultAssistantAgentName, pkgVersion } = deps;
85
18
  if (pathname === '/api/runs' && method === 'GET') {
86
19
  const requestUrl = new URL(req.url ?? '/api/runs', 'http://localhost');
@@ -160,7 +93,7 @@ export async function handleRunsRoutes(params) {
160
93
  }
161
94
  if (pathname.startsWith('/api/runs/jobs/') && pathname.endsWith('/events') && method === 'GET') {
162
95
  const jobId = pathname.split('/')[4];
163
- const job = jobs.get(jobId);
96
+ const job = runQueueService.jobs.get(jobId);
164
97
  if (!job) {
165
98
  asJson(res, 404, { error: 'Job not found' });
166
99
  return true;
@@ -186,42 +119,16 @@ export async function handleRunsRoutes(params) {
186
119
  }
187
120
  if (pathname.startsWith('/api/runs/jobs/') && pathname.endsWith('/stop') && method === 'POST') {
188
121
  const jobId = pathname.split('/')[4];
189
- const job = jobs.get(jobId);
190
- if (!job) {
122
+ const result = runQueueService.stopJob(jobId, { hostHeader: req.headers.host });
123
+ if (!result) {
191
124
  asJson(res, 404, { error: 'Job not found' });
192
125
  return true;
193
126
  }
194
- if (job.status === 'queued') {
195
- const idx = runQueueState.queue.indexOf(jobId);
196
- if (idx !== -1)
197
- runQueueState.queue.splice(idx, 1);
198
- job.status = 'stopped';
199
- addJobEvent(job, {
200
- type: 'error',
201
- ts: new Date().toISOString(),
202
- payload: { message: 'Run stopped before it started' }
203
- });
204
- for (const client of job.clients)
205
- client.end();
206
- job.clients.clear();
207
- void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
208
- emitWhenIdle: true,
209
- hostHeader: req.headers.host
210
- });
211
- asJson(res, 200, { ok: true, status: 'stopped' });
212
- return true;
213
- }
214
- if (job.status !== 'running') {
215
- asJson(res, 200, { ok: true, status: job.status });
216
- return true;
217
- }
218
- job.abortController.abort();
219
- job.status = 'stopped';
220
- asJson(res, 200, { ok: true, status: 'stopped' });
127
+ asJson(res, 200, result);
221
128
  return true;
222
129
  }
223
130
  if (pathname === '/api/runs/queue' && method === 'GET') {
224
- asJson(res, 200, buildQueueState(jobs, runQueueState));
131
+ asJson(res, 200, runQueueService.getQueueState());
225
132
  return true;
226
133
  }
227
134
  if (pathname === '/api/runs/queue/events' && method === 'GET') {
@@ -232,58 +139,27 @@ export async function handleRunsRoutes(params) {
232
139
  if ('flushHeaders' in res && typeof res.flushHeaders === 'function') {
233
140
  res.flushHeaders();
234
141
  }
235
- sendSseEvent(res, {
236
- type: 'queue_event',
237
- ts: new Date().toISOString(),
238
- payload: { event: buildQueueState(jobs, runQueueState) }
239
- });
240
- runQueueState.clients.add(res);
241
- req.on('close', () => {
242
- runQueueState.clients.delete(res);
243
- });
142
+ runQueueService.subscribeQueue(req, res);
244
143
  return true;
245
144
  }
246
145
  if (pathname.startsWith('/api/runs/queue/') &&
247
146
  method === 'DELETE' &&
248
147
  pathname.split('/').length === 5) {
249
148
  const jobId = pathname.split('/')[4];
250
- const job = jobs.get(jobId);
251
- if (!job) {
149
+ const result = runQueueService.removeQueuedJob(jobId, { hostHeader: req.headers.host });
150
+ if (!result) {
252
151
  asJson(res, 404, { error: 'Job not found' });
253
152
  return true;
254
153
  }
255
- if (job.status === 'running') {
256
- asJson(res, 400, { error: 'Cannot remove a running job. Use the /stop endpoint instead.' });
154
+ if ('error' in result) {
155
+ asJson(res, result.statusCode, { error: result.error });
257
156
  return true;
258
157
  }
259
- if (job.status !== 'queued' && job.status !== 'blocked_auth') {
260
- asJson(res, 404, { error: 'Job is not queued' });
261
- return true;
262
- }
263
- const idx = runQueueState.queue.indexOf(jobId);
264
- if (idx !== -1)
265
- runQueueState.queue.splice(idx, 1);
266
- job.status = 'stopped';
267
- addJobEvent(job, {
268
- type: 'error',
269
- ts: new Date().toISOString(),
270
- payload: { message: 'Removed from queue by user' }
271
- });
272
- for (const client of job.clients)
273
- client.end();
274
- job.clients.clear();
275
- void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
276
- emitWhenIdle: true,
277
- hostHeader: req.headers.host
278
- });
279
- asJson(res, 200, { ok: true, jobId, status: 'stopped' });
158
+ asJson(res, 200, result);
280
159
  return true;
281
160
  }
282
161
  if (pathname === '/api/runs/queue/resume' && method === 'POST') {
283
- void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
284
- emitWhenIdle: true,
285
- hostHeader: req.headers.host
286
- });
162
+ runQueueService.resumeBlockedJobs({ hostHeader: req.headers.host });
287
163
  asJson(res, 200, { ok: true });
288
164
  return true;
289
165
  }
@@ -373,7 +249,6 @@ export async function handleRunsRoutes(params) {
373
249
  }
374
250
  // Resolve lazily in advanceQueue so runtime overrides are always reflected.
375
251
  const oauthServerNames = undefined;
376
- const jobId = `run-${Date.now()}-${randomUUID().slice(0, 8)}`;
377
252
  const runParamsObj = {
378
253
  configPath,
379
254
  runsPerScenario,
@@ -385,44 +260,8 @@ export async function handleRunsRoutes(params) {
385
260
  serverOverrideAll,
386
261
  scenarioServerOverrides
387
262
  };
388
- const job = {
389
- id: jobId,
390
- status: 'queued',
391
- events: [],
392
- clients: new Set(),
393
- abortController: new AbortController(),
394
- runParams: runParamsObj
395
- };
396
- jobs.set(jobId, job);
397
- if (runQueueState.activeJobId) {
398
- // Another job is running — queue and emit position
399
- runQueueState.queue.push(jobId);
400
- addJobEvent(job, {
401
- type: 'queued',
402
- ts: new Date().toISOString(),
403
- payload: {
404
- configPath,
405
- runsPerScenario,
406
- scenarioId: scenarioId ?? null,
407
- scenarioIds: scenarioIds ?? null,
408
- agents: requestedAgents ?? null,
409
- runNote: runNote ?? null,
410
- serverOverrideAll: serverOverrideAll ?? null,
411
- scenarioServerOverrides: scenarioServerOverrides ?? null,
412
- position: runQueueState.queue.length
413
- }
414
- });
415
- emitQueueEvent(jobs, runQueueState, deps);
416
- asJson(res, 202, { jobId, queued: true, position: runQueueState.queue.length });
417
- }
418
- else {
419
- // No active job — add to queue and let advanceQueue handle start (with OAuth pre-check)
420
- runQueueState.queue.push(jobId);
421
- asJson(res, 202, { jobId });
422
- void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
423
- hostHeader: req.headers.host
424
- });
425
- }
263
+ const response = runQueueService.enqueueRun(runParamsObj, { hostHeader: req.headers.host });
264
+ asJson(res, 202, response);
426
265
  return true;
427
266
  }
428
267
  if (pathname === '/api/runs/preview' && method === 'POST') {
@@ -512,6 +351,7 @@ export async function handleRunsRoutes(params) {
512
351
  configHash: hashConfig(previewConfigBase),
513
352
  cliVersion: pkgVersion,
514
353
  runsDir: resolve(previewRunsRoot),
354
+ cwd: settings.workspaceRoot,
515
355
  mcpServerAuthHeaders
516
356
  });
517
357
  const scenario = results.scenarios[0];
@@ -736,549 +576,6 @@ function toCoreExtractRules(extractRules) {
736
576
  }
737
577
  return rules;
738
578
  }
739
- function resolveOAuthServersForJob(job, librariesDir) {
740
- if (job.runParams.oauthServerNames !== undefined)
741
- return job.runParams.oauthServerNames;
742
- try {
743
- const loaded = loadConfig(job.runParams.configPath, { bundleRoot: librariesDir });
744
- const libraries = readLibrariesFromStore(librariesDir);
745
- applyLibraryEntries(loaded, libraries.agents, libraries.servers);
746
- const selected = job.runParams.scenarioIds?.length
747
- ? selectScenarioIds(loaded.config, job.runParams.scenarioIds)
748
- : job.runParams.scenarioId
749
- ? selectScenarioIds(loaded.config, [job.runParams.scenarioId])
750
- : loaded.config;
751
- const filteredScenarioOverrides = filterScenarioOverridesToSelectedScenarios(selected, job.runParams.scenarioServerOverrides);
752
- const withOverrides = applyRuntimeServerOverrides(selected, {
753
- serverOverrideAll: job.runParams.serverOverrideAll,
754
- scenarioServerOverrides: filteredScenarioOverrides
755
- });
756
- const effectiveServers = new Set(withOverrides.scenarios.flatMap((scenario) => scenario.servers));
757
- const names = Array.from(effectiveServers).filter((name) => {
758
- const config = withOverrides.servers?.[name];
759
- return config?.auth?.type === 'oauth_authorization_code';
760
- });
761
- job.runParams.oauthServerNames = names;
762
- return names;
763
- }
764
- catch (error) {
765
- const message = error instanceof Error ? error.message : String(error);
766
- if (message.includes('Unknown server refs') ||
767
- message.includes('Unknown scenarios in scenarioServerOverrides') ||
768
- message.includes('serverOverrideAll must include at least one server id')) {
769
- throw error;
770
- }
771
- console.warn(`[mcplab] Failed to resolve OAuth servers for queued job '${job.id}': ${message}`);
772
- return [];
773
- }
774
- }
775
- async function advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, options) {
776
- if (runQueueState.activeJobId) {
777
- if (options?.emitWhenIdle)
778
- emitQueueEvent(jobs, runQueueState, deps);
779
- return;
780
- }
781
- if (runQueueState.isAdvancingQueue) {
782
- if (options?.emitWhenIdle)
783
- emitQueueEvent(jobs, runQueueState, deps);
784
- return;
785
- }
786
- runQueueState.isAdvancingQueue = true;
787
- let queueMutated = false;
788
- try {
789
- while (runQueueState.queue.length > 0) {
790
- const nextId = runQueueState.queue[0]; // peek — do not shift yet
791
- const nextJob = jobs.get(nextId);
792
- if (!nextJob || (nextJob.status !== 'queued' && nextJob.status !== 'blocked_auth')) {
793
- runQueueState.queue.shift();
794
- queueMutated = true;
795
- continue;
796
- }
797
- // Pre-check OAuth before starting
798
- let oauthServers = [];
799
- try {
800
- oauthServers = resolveOAuthServersForJob(nextJob, settings.librariesDir);
801
- }
802
- catch (error) {
803
- runQueueState.queue.shift();
804
- nextJob.status = 'error';
805
- deps.addJobEvent(nextJob, {
806
- type: 'error',
807
- ts: new Date().toISOString(),
808
- payload: {
809
- message: error instanceof Error ? error.message : String(error)
810
- }
811
- });
812
- for (const client of nextJob.clients)
813
- client.end();
814
- nextJob.clients.clear();
815
- queueMutated = true;
816
- continue;
817
- }
818
- if (oauthServers.length > 0) {
819
- const ensureResult = await oauthSessionManager.ensureServersAuthorized(oauthServers, options?.hostHeader);
820
- const needsAuth = ensureResult.servers.filter((s) => s.status === 'auth_required');
821
- if (needsAuth.length > 0) {
822
- const needsAuthNames = needsAuth.map((s) => s.serverName);
823
- const wasBlocked = nextJob.status === 'blocked_auth';
824
- const prevBlockedServers = nextJob.blockedAuthServers ?? [];
825
- const prevKey = [...prevBlockedServers].sort().join('|');
826
- const nextKey = [...needsAuthNames].sort().join('|');
827
- const blockedSetChanged = prevKey !== nextKey;
828
- nextJob.blockedAuthServers = needsAuthNames; // always refresh to current missing subset
829
- if (!wasBlocked) {
830
- nextJob.status = 'blocked_auth';
831
- }
832
- if (!wasBlocked || blockedSetChanged) {
833
- deps.addJobEvent(nextJob, {
834
- type: 'oauth_required',
835
- ts: new Date().toISOString(),
836
- payload: {
837
- jobId: nextJob.id,
838
- servers: needsAuthNames,
839
- message: `OAuth login required for server(s): ${needsAuthNames.join(', ')}.`
840
- }
841
- });
842
- }
843
- queueMutated = true;
844
- emitQueueEvent(jobs, runQueueState, deps);
845
- return; // pause — frontend must call /api/runs/queue/resume after auth
846
- }
847
- const readyServers = ensureResult.servers
848
- .filter((s) => s.status === 'ready')
849
- .map((s) => {
850
- const mode = s.debugState ?? 'unknown';
851
- return `${s.serverName} (${mode})`;
852
- });
853
- if (readyServers.length > 0) {
854
- deps.addJobEvent(nextJob, {
855
- type: 'log',
856
- ts: new Date().toISOString(),
857
- payload: {
858
- message: `OAuth credentials ready for queued run: ${readyServers.join(', ')}`
859
- }
860
- });
861
- }
862
- }
863
- // OAuth ready (or not required) — start the job
864
- runQueueState.queue.shift();
865
- nextJob.status = 'running';
866
- runQueueState.activeJobId = nextId;
867
- deps.addJobEvent(nextJob, {
868
- type: 'started',
869
- ts: new Date().toISOString(),
870
- payload: {
871
- configPath: nextJob.runParams.configPath,
872
- runsPerScenario: nextJob.runParams.runsPerScenario,
873
- scenarioId: nextJob.runParams.scenarioId ?? null,
874
- scenarioIds: nextJob.runParams.scenarioIds ?? null,
875
- agents: nextJob.runParams.requestedAgents ?? null,
876
- runNote: nextJob.runParams.runNote ?? null,
877
- serverOverrideAll: nextJob.runParams.serverOverrideAll ?? null,
878
- scenarioServerOverrides: nextJob.runParams.scenarioServerOverrides ?? null
879
- }
880
- });
881
- queueMutated = true;
882
- emitQueueEvent(jobs, runQueueState, deps);
883
- void executeRunJob(nextJob, settings, jobs, runQueueState, oauthSessionManager, deps);
884
- return;
885
- }
886
- if (queueMutated || options?.emitWhenIdle) {
887
- emitQueueEvent(jobs, runQueueState, deps);
888
- }
889
- }
890
- finally {
891
- runQueueState.isAdvancingQueue = false;
892
- }
893
- }
894
- async function executeRunJob(job, settings, jobs, runQueueState, oauthSessionManager, deps) {
895
- const { addJobEvent, getScenarioRunTraceRecords, selectScenarioIds, expandConfigForAgents, resolveRunSelectedAgents, readLibraries, pkgVersion } = deps;
896
- const { configPath, runsPerScenario, scenarioId, scenarioIds, requestedAgents, runNote, serverOverrideAll, scenarioServerOverrides } = job.runParams;
897
- try {
898
- addJobEvent(job, {
899
- type: 'log',
900
- ts: new Date().toISOString(),
901
- payload: { message: `Loading MCP Evaluation config: ${configPath}` }
902
- });
903
- const loaded = loadConfig(configPath, { bundleRoot: settings.librariesDir });
904
- const { agents: libraryAgents, servers: libraryServers } = readLibraries(settings.librariesDir);
905
- applyLibraryEntries(loaded, libraryAgents, libraryServers);
906
- addJobEvent(job, {
907
- type: 'log',
908
- ts: new Date().toISOString(),
909
- payload: {
910
- message: `Loaded config (${loaded.config.scenarios.length} scenario(s), ${Object.keys(loaded.config.agents ?? {}).length} agent(s), ${Object.keys(loaded.config.servers ?? {}).length} server(s))`
911
- }
912
- });
913
- for (const warning of loaded.warnings ?? []) {
914
- addJobEvent(job, {
915
- type: 'log',
916
- ts: new Date().toISOString(),
917
- payload: { message: warning }
918
- });
919
- }
920
- addJobEvent(job, {
921
- type: 'log',
922
- ts: new Date().toISOString(),
923
- payload: {
924
- message: scenarioIds && scenarioIds.length > 0
925
- ? `Selecting requested scenarios: ${scenarioIds.join(', ')}`
926
- : scenarioId
927
- ? `Selecting requested scenario: ${scenarioId}`
928
- : 'Using all scenarios from config'
929
- }
930
- });
931
- const selectedBaseScenarios = selectScenarioIds(loaded.config, scenarioIds && scenarioIds.length > 0 ? scenarioIds : scenarioId ? [scenarioId] : undefined);
932
- addJobEvent(job, {
933
- type: 'log',
934
- ts: new Date().toISOString(),
935
- payload: {
936
- message: `Selected ${selectedBaseScenarios.scenarios.length} base scenario(s)`
937
- }
938
- });
939
- const filteredScenarioOverrides = filterScenarioOverridesToSelectedScenarios(selectedBaseScenarios, scenarioServerOverrides);
940
- const runtimeOverriddenConfig = applyRuntimeServerOverrides(selectedBaseScenarios, {
941
- serverOverrideAll,
942
- scenarioServerOverrides: filteredScenarioOverrides
943
- });
944
- const effectiveConfigHash = hashConfig(runtimeOverriddenConfig);
945
- addJobEvent(job, {
946
- type: 'log',
947
- ts: new Date().toISOString(),
948
- payload: {
949
- message: `Applied runtime server overrides: global=${serverOverrideAll?.length ?? 0} scenario-specific=${Object.keys(filteredScenarioOverrides ?? {}).length}`
950
- }
951
- });
952
- const effectiveScenarioServers = runtimeOverriddenConfig.scenarios
953
- .map((scenario) => `${scenario.id}=[${scenario.servers.join(', ')}]`)
954
- .join('; ');
955
- addJobEvent(job, {
956
- type: 'log',
957
- ts: new Date().toISOString(),
958
- payload: {
959
- message: `Effective MCP servers per scenario: ${effectiveScenarioServers || '(none)'}`
960
- }
961
- });
962
- const resolvedAgents = resolveRunSelectedAgents(runtimeOverriddenConfig, requestedAgents);
963
- const resolvedAgentList = Array.isArray(resolvedAgents) ? resolvedAgents : [];
964
- addJobEvent(job, {
965
- type: 'log',
966
- ts: new Date().toISOString(),
967
- payload: {
968
- message: requestedAgents && requestedAgents.length > 0
969
- ? `Using requested agents: ${resolvedAgentList.join(', ')}`
970
- : `Using resolved default agents: ${resolvedAgentList.join(', ')}`
971
- }
972
- });
973
- const expandedConfig = expandConfigForAgents(runtimeOverriddenConfig, resolvedAgents);
974
- addJobEvent(job, {
975
- type: 'log',
976
- ts: new Date().toISOString(),
977
- payload: {
978
- message: `Expanded to ${expandedConfig.scenarios.length} executable scenario run(s) across selected agents`
979
- }
980
- });
981
- const usedServerNames = new Set(expandedConfig.scenarios.flatMap((scenario) => scenario.servers));
982
- const oauthServers = Array.from(usedServerNames).filter((serverName) => expandedConfig.servers[serverName]?.auth?.type === 'oauth_authorization_code');
983
- const oauthServerSet = new Set(oauthServers);
984
- const mcpServerAuthHeaders = oauthServers.length > 0
985
- ? await oauthSessionManager.getAuthHeadersForServers(oauthServers, undefined)
986
- : undefined;
987
- if (oauthServers.length > 0) {
988
- addJobEvent(job, {
989
- type: 'log',
990
- ts: new Date().toISOString(),
991
- payload: {
992
- message: `OAuth runtime credentials resolved for server(s): ${oauthServers.join(', ')}`
993
- }
994
- });
995
- }
996
- const cwdBefore = process.cwd();
997
- process.chdir(settings.workspaceRoot);
998
- try {
999
- addJobEvent(job, {
1000
- type: 'log',
1001
- ts: new Date().toISOString(),
1002
- payload: {
1003
- message: `Running evaluation (${runsPerScenario} run(s) per scenario) ...`
1004
- }
1005
- });
1006
- if (runNote) {
1007
- addJobEvent(job, {
1008
- type: 'log',
1009
- ts: new Date().toISOString(),
1010
- payload: { message: `Run note: ${runNote}` }
1011
- });
1012
- }
1013
- const { runDir, results } = await runAll(expandedConfig, {
1014
- runsPerScenario,
1015
- scenarioId,
1016
- runNote,
1017
- configHash: effectiveConfigHash,
1018
- cliVersion: pkgVersion,
1019
- runsDir: settings.runsDir,
1020
- mcpServerAuthHeaders,
1021
- resolveMcpServerAuthHeaders: oauthServers.length > 0
1022
- ? async (serverNames, options) => {
1023
- if (options?.signal?.aborted)
1024
- return {};
1025
- const namesToRefresh = serverNames.filter((name) => oauthServerSet.has(name));
1026
- if (namesToRefresh.length === 0)
1027
- return {};
1028
- return oauthSessionManager.getAuthHeadersForServers(namesToRefresh);
1029
- }
1030
- : undefined,
1031
- signal: job.abortController.signal,
1032
- onProgress: async (event) => {
1033
- const message = formatRunProgressMessage(event);
1034
- if (!message)
1035
- return;
1036
- addJobEvent(job, {
1037
- type: 'log',
1038
- ts: new Date().toISOString(),
1039
- payload: { message }
1040
- });
1041
- }
1042
- });
1043
- const relativeConfigPathRaw = relative(settings.evalsDir, configPath);
1044
- const relativeConfigPath = relativeConfigPathRaw.replace(/\\/g, '/').replace(/^\.\/+/, '');
1045
- results.metadata.config_path = relativeConfigPath || configPath;
1046
- if (loaded.config.name && loaded.config.name.trim().length > 0) {
1047
- results.metadata.config_name = loaded.config.name.trim();
1048
- }
1049
- results.metadata.rerun_agents = [...resolvedAgentList];
1050
- results.metadata.rerun_scenario_ids = selectedBaseScenarios.scenarios.map((scenario) => scenario.id);
1051
- if (serverOverrideAll && serverOverrideAll.length > 0) {
1052
- results.metadata.rerun_server_override_all = [...serverOverrideAll];
1053
- }
1054
- else {
1055
- delete results.metadata.rerun_server_override_all;
1056
- }
1057
- if (filteredScenarioOverrides && Object.keys(filteredScenarioOverrides).length > 0) {
1058
- results.metadata.rerun_scenario_server_overrides = Object.fromEntries(Object.entries(filteredScenarioOverrides).map(([scenarioKey, serverIds]) => [
1059
- scenarioKey,
1060
- [...serverIds]
1061
- ]));
1062
- }
1063
- else {
1064
- delete results.metadata.rerun_scenario_server_overrides;
1065
- }
1066
- addJobEvent(job, {
1067
- type: 'log',
1068
- ts: new Date().toISOString(),
1069
- payload: {
1070
- message: `Evaluation execution finished (run id: ${results.metadata.run_id})`
1071
- }
1072
- });
1073
- addJobEvent(job, {
1074
- type: 'log',
1075
- ts: new Date().toISOString(),
1076
- payload: { message: `Writing results to ${runDir}` }
1077
- });
1078
- const traceRecords = getScenarioRunTraceRecords(results.metadata.run_id, settings.runsDir);
1079
- results.metadata.tool_tokens_total = estimateRunToolTokensTotal(traceRecords);
1080
- writeFileSync(join(runDir, 'results.json'), `${JSON.stringify(results, null, 2)}\n`, 'utf8');
1081
- writeFileSync(join(runDir, 'report.html'), renderReport(results), 'utf8');
1082
- writeFileSync(join(runDir, 'summary.md'), renderSummaryMarkdown(results), 'utf8');
1083
- addJobEvent(job, {
1084
- type: 'log',
1085
- ts: new Date().toISOString(),
1086
- payload: {
1087
- message: `Run finished: ${results.summary.total_runs} run(s), pass rate ${Math.round(results.summary.pass_rate * 100)}%`
1088
- }
1089
- });
1090
- addJobEvent(job, {
1091
- type: 'completed',
1092
- ts: new Date().toISOString(),
1093
- payload: {
1094
- runId: results.metadata.run_id,
1095
- runDir,
1096
- summary: results.summary
1097
- }
1098
- });
1099
- job.status = 'completed';
1100
- }
1101
- finally {
1102
- process.chdir(cwdBefore);
1103
- }
1104
- }
1105
- catch (error) {
1106
- if (error instanceof OAuthAuthorizationRequiredError) {
1107
- const blockedServers = Array.from(new Set(error.details.map((detail) => detail.serverName).filter(Boolean)));
1108
- const aborted = job.abortController.signal.aborted || job.status === 'stopped';
1109
- if (!aborted && blockedServers.length > 0) {
1110
- job.blockedAuthServers = blockedServers;
1111
- job.status = 'blocked_auth';
1112
- if (!runQueueState.queue.includes(job.id)) {
1113
- runQueueState.queue.unshift(job.id);
1114
- }
1115
- addJobEvent(job, {
1116
- type: 'oauth_required',
1117
- ts: new Date().toISOString(),
1118
- payload: {
1119
- jobId: job.id,
1120
- servers: blockedServers,
1121
- message: `OAuth login required for server(s): ${blockedServers.join(', ')}.`
1122
- }
1123
- });
1124
- return;
1125
- }
1126
- }
1127
- const normalizedError = error instanceof OAuthAuthorizationRequiredError
1128
- ? new Error(error.details[0]?.message || error.message)
1129
- : error;
1130
- const aborted = job.abortController.signal.aborted || job.status === 'stopped';
1131
- addJobEvent(job, {
1132
- type: 'error',
1133
- ts: new Date().toISOString(),
1134
- payload: {
1135
- message: aborted
1136
- ? 'Run aborted by user'
1137
- : normalizedError instanceof Error
1138
- ? normalizedError.message
1139
- : String(normalizedError)
1140
- }
1141
- });
1142
- job.status = aborted ? 'stopped' : 'error';
1143
- }
1144
- finally {
1145
- runQueueState.activeJobId = null;
1146
- if (job.status !== 'blocked_auth') {
1147
- for (const client of job.clients)
1148
- client.end();
1149
- job.clients.clear();
1150
- }
1151
- void advanceQueue(jobs, runQueueState, settings, oauthSessionManager, deps, {
1152
- emitWhenIdle: true
1153
- }).catch((error) => {
1154
- console.warn(`[mcplab] Failed to advance run queue after job '${job.id}': ${error instanceof Error ? error.message : String(error)}`);
1155
- });
1156
- pruneOldJobs(jobs, runQueueState);
1157
- }
1158
- }
1159
- function splitInteger(total, parts) {
1160
- if (!Number.isFinite(total) || !parts || parts <= 0)
1161
- return Array(parts).fill(0);
1162
- const safeTotal = Math.max(0, Math.round(total ?? 0));
1163
- const base = Math.floor(safeTotal / parts);
1164
- let remainder = safeTotal % parts;
1165
- return Array.from({ length: parts }, () => {
1166
- const value = base + (remainder > 0 ? 1 : 0);
1167
- if (remainder > 0)
1168
- remainder -= 1;
1169
- return value;
1170
- });
1171
- }
1172
- function estimateRunToolTokensTotal(records) {
1173
- let total = 0;
1174
- let hasAny = false;
1175
- for (const record of records) {
1176
- const toolUsesById = new Map();
1177
- for (const message of record.messages ?? []) {
1178
- const toolUses = message.content.filter((block) => block.type === 'tool_use');
1179
- if (toolUses.length > 0) {
1180
- for (const toolUse of toolUses)
1181
- toolUsesById.set(toolUse.id, toolUse.name);
1182
- const allEstimated = toolUses.every((toolUse) => Boolean(toolUse.estimated_tokens));
1183
- if (allEstimated) {
1184
- for (const toolUse of toolUses)
1185
- total += toolUse.estimated_tokens?.total ?? 0;
1186
- hasAny = true;
1187
- }
1188
- else if (toolUses.length === 1 && typeof message.usage?.total_tokens === 'number') {
1189
- total += message.usage.total_tokens;
1190
- hasAny = true;
1191
- }
1192
- else {
1193
- const shares = splitInteger(message.usage?.total_tokens, toolUses.length);
1194
- total += shares.reduce((sum, value) => sum + value, 0);
1195
- if (typeof message.usage?.total_tokens === 'number')
1196
- hasAny = true;
1197
- }
1198
- }
1199
- const toolResults = message.content.filter((block) => block.type === 'tool_result');
1200
- if (toolResults.length === 0)
1201
- continue;
1202
- const allEstimated = toolResults.every((result) => Boolean(result.estimated_tokens));
1203
- if (allEstimated) {
1204
- for (const result of toolResults)
1205
- total += result.estimated_tokens?.total ?? 0;
1206
- hasAny = true;
1207
- continue;
1208
- }
1209
- if (toolResults.length === 1) {
1210
- const [result] = toolResults;
1211
- if (result &&
1212
- toolUsesById.has(result.tool_use_id) &&
1213
- typeof message.usage?.total_tokens === 'number') {
1214
- total += message.usage.total_tokens;
1215
- hasAny = true;
1216
- continue;
1217
- }
1218
- }
1219
- const knownResults = toolResults.filter((result) => toolUsesById.has(result.tool_use_id));
1220
- if (knownResults.length === 0)
1221
- continue;
1222
- const shares = splitInteger(message.usage?.total_tokens, knownResults.length);
1223
- total += shares.reduce((sum, value) => sum + value, 0);
1224
- if (typeof message.usage?.total_tokens === 'number')
1225
- hasAny = true;
1226
- }
1227
- }
1228
- return hasAny ? total : null;
1229
- }
1230
- function pruneOldJobs(jobs, runQueueState) {
1231
- const maxAgeMs = 30 * 60_000;
1232
- const now = Date.now();
1233
- const activeIds = new Set([runQueueState.activeJobId, ...runQueueState.queue].filter(Boolean));
1234
- for (const [id, job] of jobs) {
1235
- if (activeIds.has(id))
1236
- continue;
1237
- if (job.status !== 'completed' && job.status !== 'error' && job.status !== 'stopped')
1238
- continue;
1239
- const lastEvent = job.events[job.events.length - 1];
1240
- if (!lastEvent)
1241
- continue;
1242
- if (now - new Date(lastEvent.ts).getTime() > maxAgeMs) {
1243
- jobs.delete(id);
1244
- }
1245
- }
1246
- }
1247
- function formatRunProgressMessage(event) {
1248
- switch (event.type) {
1249
- case 'run_started':
1250
- return `Run initialized (id: ${event.runId}, ${event.totalScenarioRuns} scenario run(s))`;
1251
- case 'mcp_connect_started':
1252
- return `Connecting to ${event.serverCount} MCP server(s): ${event.serverNames.join(', ')} ...`;
1253
- case 'mcp_connect_finished':
1254
- return `Connected to ${event.serverCount} MCP server(s): ${event.serverNames.join(', ')}`;
1255
- case 'scenario_run_started':
1256
- return `Scenario ${event.scenarioRunIndex}/${event.totalScenarioRuns} started: ${event.scenarioId} [agent=${event.agentName}, run=${event.runIndex + 1}/${event.runsPerScenario}]`;
1257
- case 'scenario_run_finished':
1258
- return `Scenario ${event.scenarioRunIndex}/${event.totalScenarioRuns} finished: ${event.scenarioId} [agent=${event.agentName}] -> ${event.pass ? 'PASS' : 'FAIL'} (${event.toolCallCount} tool call(s))`;
1259
- case 'agent_progress': {
1260
- const p = event.event;
1261
- switch (p.type) {
1262
- case 'llm_request_started':
1263
- return `LLM turn ${p.turn + 1} started for ${p.scenarioId} [${p.agentName}] (${p.provider}/${p.model})`;
1264
- case 'llm_response_received':
1265
- return `LLM turn ${p.turn + 1} response for ${p.scenarioId} [${p.agentName}] (text=${p.hasText ? 'yes' : 'no'}, tool_calls=${p.toolCallCount})`;
1266
- case 'tool_call_started':
1267
- return `Tool call started: ${p.server}.${p.tool} (turn ${p.turn + 1})`;
1268
- case 'tool_call_finished':
1269
- return `Tool call ${p.ok ? 'finished' : 'failed'}: ${p.server}.${p.tool} in ${p.durationMs}ms`;
1270
- case 'final_answer':
1271
- return `Final answer produced for ${p.scenarioId} [${p.agentName}] (text=${p.hasText ? 'yes' : 'no'})`;
1272
- default:
1273
- return null;
1274
- }
1275
- }
1276
- case 'run_finished':
1277
- return `Run finished (id: ${event.runId})`;
1278
- default:
1279
- return null;
1280
- }
1281
- }
1282
579
  function localMcplabMcpUrl() {
1283
580
  const host = process.env.MCP_HOST || '127.0.0.1';
1284
581
  const port = process.env.MCP_PORT || '3011';