vskill 0.5.11 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/commands/eval/credentials.d.ts +12 -0
  2. package/dist/commands/eval/credentials.js +140 -0
  3. package/dist/commands/eval/credentials.js.map +1 -0
  4. package/dist/commands/eval/generate-all.d.ts +1 -1
  5. package/dist/commands/eval/generate-all.js +57 -12
  6. package/dist/commands/eval/generate-all.js.map +1 -1
  7. package/dist/commands/eval/init.d.ts +2 -1
  8. package/dist/commands/eval/init.js +76 -10
  9. package/dist/commands/eval/init.js.map +1 -1
  10. package/dist/commands/eval/run.d.ts +7 -1
  11. package/dist/commands/eval/run.js +207 -26
  12. package/dist/commands/eval/run.js.map +1 -1
  13. package/dist/commands/eval/sweep.d.ts +7 -0
  14. package/dist/commands/eval/sweep.js +99 -0
  15. package/dist/commands/eval/sweep.js.map +1 -0
  16. package/dist/commands/eval.d.ts +10 -0
  17. package/dist/commands/eval.js +62 -4
  18. package/dist/commands/eval.js.map +1 -1
  19. package/dist/eval/batch-judge.d.ts +27 -0
  20. package/dist/eval/batch-judge.js +242 -0
  21. package/dist/eval/batch-judge.js.map +1 -0
  22. package/dist/eval/chrome-profile.d.ts +16 -0
  23. package/dist/eval/chrome-profile.js +65 -0
  24. package/dist/eval/chrome-profile.js.map +1 -0
  25. package/dist/eval/comparator.d.ts +3 -1
  26. package/dist/eval/comparator.js +19 -3
  27. package/dist/eval/comparator.js.map +1 -1
  28. package/dist/eval/concurrency.d.ts +13 -0
  29. package/dist/eval/concurrency.js +53 -0
  30. package/dist/eval/concurrency.js.map +1 -0
  31. package/dist/eval/credential-resolver.d.ts +31 -0
  32. package/dist/eval/credential-resolver.js +111 -0
  33. package/dist/eval/credential-resolver.js.map +1 -0
  34. package/dist/eval/integration-runner.d.ts +12 -0
  35. package/dist/eval/integration-runner.js +303 -0
  36. package/dist/eval/integration-runner.js.map +1 -0
  37. package/dist/eval/integration-types.d.ts +65 -0
  38. package/dist/eval/integration-types.js +18 -0
  39. package/dist/eval/integration-types.js.map +1 -0
  40. package/dist/eval/judge-cache.d.ts +29 -0
  41. package/dist/eval/judge-cache.js +109 -0
  42. package/dist/eval/judge-cache.js.map +1 -0
  43. package/dist/eval/judge.d.ts +1 -1
  44. package/dist/eval/judge.js +20 -3
  45. package/dist/eval/judge.js.map +1 -1
  46. package/dist/eval/llm.d.ts +2 -1
  47. package/dist/eval/llm.js +54 -2
  48. package/dist/eval/llm.js.map +1 -1
  49. package/dist/eval/prompt-builder.d.ts +10 -0
  50. package/dist/eval/prompt-builder.js +167 -0
  51. package/dist/eval/prompt-builder.js.map +1 -1
  52. package/dist/eval/rate-limiter.d.ts +20 -0
  53. package/dist/eval/rate-limiter.js +62 -0
  54. package/dist/eval/rate-limiter.js.map +1 -0
  55. package/dist/eval/schema.d.ts +16 -0
  56. package/dist/eval/schema.js +58 -6
  57. package/dist/eval/schema.js.map +1 -1
  58. package/dist/eval/verdict.d.ts +9 -0
  59. package/dist/eval/verdict.js +50 -0
  60. package/dist/eval/verdict.js.map +1 -1
  61. package/dist/eval-server/api-routes.js +99 -3
  62. package/dist/eval-server/api-routes.js.map +1 -1
  63. package/dist/eval-server/benchmark-runner.d.ts +7 -0
  64. package/dist/eval-server/benchmark-runner.js +158 -42
  65. package/dist/eval-server/benchmark-runner.js.map +1 -1
  66. package/dist/eval-server/concurrency.d.ts +1 -13
  67. package/dist/eval-server/concurrency.js +3 -49
  68. package/dist/eval-server/concurrency.js.map +1 -1
  69. package/dist/eval-server/eval-server.js +4 -0
  70. package/dist/eval-server/eval-server.js.map +1 -1
  71. package/dist/eval-server/integration-routes.d.ts +2 -0
  72. package/dist/eval-server/integration-routes.js +100 -0
  73. package/dist/eval-server/integration-routes.js.map +1 -0
  74. package/dist/eval-server/skill-create-routes.js +151 -22
  75. package/dist/eval-server/skill-create-routes.js.map +1 -1
  76. package/dist/eval-server/sweep-routes.d.ts +2 -0
  77. package/dist/eval-server/sweep-routes.js +93 -0
  78. package/dist/eval-server/sweep-routes.js.map +1 -0
  79. package/dist/eval-server/sweep-runner.d.ts +93 -0
  80. package/dist/eval-server/sweep-runner.js +275 -0
  81. package/dist/eval-server/sweep-runner.js.map +1 -0
  82. package/dist/eval-ui/assets/index-C9_Pey9T.css +1 -0
  83. package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
  84. package/dist/eval-ui/index.html +2 -2
  85. package/dist/index.js +8 -0
  86. package/dist/index.js.map +1 -1
  87. package/package.json +1 -1
  88. package/dist/eval-ui/assets/index-CxHCKEhf.js +0 -74
  89. package/dist/eval-ui/assets/index-D2UkOol1.css +0 -1
@@ -6,58 +6,137 @@ import { judgeAssertion } from "../eval/judge.js";
6
6
  import { writeHistoryEntry } from "../eval/benchmark-history.js";
7
7
  import { sendSSE, sendSSEDone, withHeartbeat } from "./sse-helpers.js";
8
8
  import { classifyError } from "./error-classifier.js";
9
+ import { Semaphore } from "./concurrency.js";
10
+ // ---------------------------------------------------------------------------
11
+ // CLI providers that spawn child processes — default concurrency 1
12
+ // ---------------------------------------------------------------------------
13
+ const CLI_PROVIDERS = new Set(["claude-cli", "codex-cli", "gemini-cli"]);
14
+ function defaultConcurrency(provider) {
15
+ return CLI_PROVIDERS.has(provider) ? 1 : 5;
16
+ }
17
+ // ---------------------------------------------------------------------------
18
+ // 429 retry helper — exponential backoff (1s, 2s, 4s), max 3 retries
19
+ // ---------------------------------------------------------------------------
20
+ function is429(err) {
21
+ if (err instanceof Error) {
22
+ const msg = err.message;
23
+ if (msg.includes("429") || msg.includes("rate limit") || msg.includes("Rate limit"))
24
+ return true;
25
+ if ("status" in err && err.status === 429)
26
+ return true;
27
+ }
28
+ return false;
29
+ }
30
+ async function sleep(ms) {
31
+ return new Promise((resolve) => setTimeout(resolve, ms));
32
+ }
33
+ // ---------------------------------------------------------------------------
34
+ // Weak-model judge warning heuristic
35
+ // ---------------------------------------------------------------------------
36
+ const MODEL_STRENGTH = {
37
+ haiku: 1,
38
+ flash: 1,
39
+ "mini": 1,
40
+ sonnet: 2,
41
+ pro: 2,
42
+ opus: 3,
43
+ };
44
+ function estimateStrength(model) {
45
+ const lower = model.toLowerCase();
46
+ for (const [key, strength] of Object.entries(MODEL_STRENGTH)) {
47
+ if (lower.includes(key))
48
+ return strength;
49
+ }
50
+ return 2; // unknown = medium
51
+ }
52
+ export function checkWeakJudgeWarning(generatorModel, judgeModel) {
53
+ const genStrength = estimateStrength(generatorModel);
54
+ const judgeStrength = estimateStrength(judgeModel);
55
+ if (judgeStrength < genStrength) {
56
+ return `Warning: Judge model "${judgeModel}" appears weaker than generator "${generatorModel}". Judge scores may be less reliable.`;
57
+ }
58
+ return null;
59
+ }
9
60
  export async function runSingleCaseSSE(opts) {
10
- const { res, evalCase, systemPrompt, client, isAborted, totalCases = 1, provider } = opts;
11
- sendSSE(res, "case_start", {
61
+ const { res, evalCase, systemPrompt, client, judgeClient, judgeCache, isAborted, totalCases = 1, provider } = opts;
62
+ const effectiveJudgeClient = judgeClient ?? client;
63
+ let sequence = 0;
64
+ const emitSSE = (event, data) => {
65
+ sendSSE(res, event, { ...data, caseId: evalCase.id, sequence: sequence++ });
66
+ };
67
+ emitSSE("case_start", {
12
68
  eval_id: evalCase.id,
13
69
  eval_name: evalCase.name,
14
70
  total: totalCases,
15
71
  });
16
- sendSSE(res, "progress", {
72
+ emitSSE("progress", {
17
73
  eval_id: evalCase.id,
18
74
  phase: "generating",
19
75
  message: `Generating LLM response for "${evalCase.name}"...`,
20
76
  });
21
77
  try {
22
- const genResult = await withHeartbeat(res, evalCase.id, "generating", `Generating LLM response for "${evalCase.name}"...`, () => client.generate(systemPrompt, evalCase.prompt));
78
+ // Retry with exponential backoff on 429
79
+ let genResult;
80
+ const MAX_RETRIES = 3;
81
+ const RETRY_DELAYS = [1000, 2000, 4000];
82
+ for (let attempt = 0;; attempt++) {
83
+ try {
84
+ genResult = await withHeartbeat(res, evalCase.id, "generating", `Generating LLM response for "${evalCase.name}"...`, () => client.generate(systemPrompt, evalCase.prompt));
85
+ break;
86
+ }
87
+ catch (err) {
88
+ if (is429(err) && attempt < MAX_RETRIES) {
89
+ const delayMs = RETRY_DELAYS[attempt];
90
+ emitSSE("warning", {
91
+ eval_id: evalCase.id,
92
+ message: `Rate limited (429). Retrying in ${delayMs / 1000}s (attempt ${attempt + 1}/${MAX_RETRIES})...`,
93
+ attempt: attempt + 1,
94
+ delayMs,
95
+ });
96
+ await sleep(delayMs);
97
+ continue;
98
+ }
99
+ throw err;
100
+ }
101
+ }
23
102
  const totalTokens = genResult.inputTokens != null && genResult.outputTokens != null
24
103
  ? genResult.inputTokens + genResult.outputTokens
25
104
  : null;
26
- sendSSE(res, "output_ready", {
105
+ emitSSE("output_ready", {
27
106
  eval_id: evalCase.id,
28
107
  output: genResult.text,
29
108
  durationMs: genResult.durationMs,
30
109
  tokens: totalTokens,
31
110
  });
32
- sendSSE(res, "progress", {
111
+ emitSSE("progress", {
33
112
  eval_id: evalCase.id,
34
113
  phase: "judging",
35
114
  message: `Evaluating ${evalCase.assertions.length} assertion${evalCase.assertions.length !== 1 ? "s" : ""}...`,
36
115
  total: evalCase.assertions.length,
37
116
  });
38
- const assertionResults = [];
39
- for (let ai = 0; ai < evalCase.assertions.length; ai++) {
40
- const assertion = evalCase.assertions[ai];
41
- sendSSE(res, "progress", {
42
- eval_id: evalCase.id,
43
- phase: "judging_assertion",
44
- message: `Evaluating assertion ${ai + 1}/${evalCase.assertions.length}: "${assertion.text.slice(0, 60)}${assertion.text.length > 60 ? "..." : ""}"`,
45
- current: ai + 1,
46
- total: evalCase.assertions.length,
47
- assertion_id: assertion.id,
48
- });
49
- if (isAborted())
50
- break;
51
- const result = await judgeAssertion(genResult.text, assertion, client);
52
- assertionResults.push(result);
53
- sendSSE(res, "assertion_result", {
117
+ // T-003: Parallelize intra-case assertion judges via Promise.all
118
+ const assertionResults = await Promise.all(evalCase.assertions.map(async (assertion, ai) => {
119
+ if (isAborted()) {
120
+ return { id: assertion.id, text: assertion.text, pass: false, reasoning: "aborted" };
121
+ }
122
+ const judgeCall = () => judgeAssertion(genResult.text, assertion, client, effectiveJudgeClient);
123
+ let result;
124
+ if (judgeCache) {
125
+ result = await judgeCache.getOrCompute(assertion.text, genResult.text, effectiveJudgeClient.model, judgeCall);
126
+ }
127
+ else {
128
+ result = await judgeCall();
129
+ }
130
+ emitSSE("assertion_result", {
54
131
  eval_id: evalCase.id,
55
132
  assertion_id: result.id,
56
133
  text: result.text,
57
134
  pass: result.pass,
58
135
  reasoning: result.reasoning,
136
+ assertion_index: ai,
59
137
  });
60
- }
138
+ return result;
139
+ }));
61
140
  const passRate = assertionResults.length > 0
62
141
  ? assertionResults.filter((a) => a.pass).length / assertionResults.length
63
142
  : 0;
@@ -75,7 +154,7 @@ export async function runSingleCaseSSE(opts) {
75
154
  output: genResult.text,
76
155
  assertions: assertionResults,
77
156
  };
78
- sendSSE(res, "case_complete", {
157
+ emitSSE("case_complete", {
79
158
  eval_id: evalCase.id,
80
159
  status,
81
160
  pass_rate: passRate,
@@ -95,7 +174,7 @@ export async function runSingleCaseSSE(opts) {
95
174
  pass_rate: 0,
96
175
  assertions: [],
97
176
  };
98
- sendSSE(res, "case_complete", {
177
+ emitSSE("case_complete", {
99
178
  eval_id: evalCase.id,
100
179
  status: "error",
101
180
  error_message: errorMsg,
@@ -127,10 +206,11 @@ export function assembleBulkResult(cases, meta) {
127
206
  };
128
207
  }
129
208
  export async function runBenchmarkSSE(opts) {
130
- const { res, skillDir, skillName, systemPrompt, runType, provider, evalCases: allCases, filterIds, client, isAborted, } = opts;
209
+ const { res, skillDir, skillName, systemPrompt, runType, provider, evalCases: allCases, filterIds, client, judgeClient, judgeCache, isAborted, concurrency: concurrencyOverride, } = opts;
131
210
  const evalCases = filterIds
132
211
  ? allCases.filter((e) => filterIds.has(e.id))
133
212
  : allCases;
213
+ const concurrency = concurrencyOverride ?? defaultConcurrency(provider);
134
214
  // Emit duration estimate so the UI can warn about long runs
135
215
  const totalAssertions = evalCases.reduce((s, e) => s + e.assertions.length, 0);
136
216
  const estimate = estimateDurationSec(provider, evalCases.length, totalAssertions);
@@ -140,29 +220,65 @@ export async function runBenchmarkSSE(opts) {
140
220
  estimatedMinSec: estimate.minSec,
141
221
  estimatedMaxSec: estimate.maxSec,
142
222
  estimatedLabel: estimate.label,
223
+ concurrency,
143
224
  });
144
- const cases = [];
145
- for (const evalCase of evalCases) {
225
+ // Emit weak-model judge warning if applicable
226
+ if (judgeClient) {
227
+ const warning = checkWeakJudgeWarning(client.model, judgeClient.model);
228
+ if (warning) {
229
+ console.warn(warning);
230
+ sendSSE(res, "warning", { message: warning });
231
+ }
232
+ }
233
+ // T-001: Parallel case execution via Semaphore + Promise.allSettled
234
+ const semaphore = new Semaphore(concurrency);
235
+ const caseTasks = evalCases.map((evalCase) => async () => {
146
236
  if (isAborted())
147
- break;
148
- const benchCase = await runSingleCaseSSE({
149
- res,
150
- evalCase,
151
- systemPrompt,
152
- client,
153
- isAborted,
154
- totalCases: evalCases.length,
155
- provider,
156
- });
157
- cases.push(benchCase);
237
+ return null;
238
+ await semaphore.acquire();
239
+ try {
240
+ return await runSingleCaseSSE({
241
+ res,
242
+ evalCase,
243
+ systemPrompt,
244
+ client,
245
+ judgeClient,
246
+ judgeCache,
247
+ isAborted,
248
+ totalCases: evalCases.length,
249
+ provider,
250
+ });
251
+ }
252
+ finally {
253
+ semaphore.release();
254
+ }
255
+ });
256
+ const settled = await Promise.allSettled(caseTasks.map((fn) => fn()));
257
+ const cases = [];
258
+ for (const result of settled) {
259
+ if (result.status === "fulfilled" && result.value != null) {
260
+ cases.push(result.value);
261
+ }
262
+ else if (result.status === "rejected") {
263
+ // Shouldn't normally happen since runSingleCaseSSE catches errors,
264
+ // but handle as safety net
265
+ cases.push({
266
+ eval_id: -1,
267
+ eval_name: "unknown",
268
+ status: "error",
269
+ error_message: result.reason instanceof Error ? result.reason.message : String(result.reason),
270
+ pass_rate: 0,
271
+ assertions: [],
272
+ });
273
+ }
158
274
  }
159
- const result = assembleBulkResult(cases, { model: client.model, skillName, runType, provider });
275
+ const bulkResult = assembleBulkResult(cases, { model: client.model, skillName, runType, provider });
160
276
  if (!isAborted()) {
161
277
  // Save history for bulk runs (single-case runs save via per-case endpoint)
162
278
  if (!filterIds) {
163
- await writeHistoryEntry(skillDir, result);
279
+ await writeHistoryEntry(skillDir, bulkResult);
164
280
  }
165
- sendSSEDone(res, result);
281
+ sendSSEDone(res, bulkResult);
166
282
  }
167
283
  }
168
284
  //# sourceMappingURL=benchmark-runner.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark-runner.js","sourceRoot":"","sources":["../../src/eval-server/benchmark-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,sEAAsE;AACtE,8EAA8E;AAM9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAgBtD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAA0B;IAC/D,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,GAAG,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC;IAE1F,OAAO,CAAC,GAAG,EAAE,YAAY,EAAE;QACzB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;QACxB,KAAK,EAAE,UAAU;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,gCAAgC,QAAQ,CAAC,IAAI,MAAM;KAC7D,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,aAAa,CACnC,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAC9B,gCAAgC,QAAQ,CAAC,IAAI,MAAM,EACnD,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CACrD,CAAC;QACF,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,IAAI,IAAI,SAAS,CAAC,YAAY,IAAI,IAAI;YACjF,CAAC,CAAC,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,YAAY;YAChD,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,CAAC,GAAG,EAAE,cAAc,EAAE;YAC3B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,cAAc,QAAQ,CAAC,UAAU,CAAC,MAAM,aAAa,QAAQ,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK;YAC9G,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;SAClC,CAAC,CAAC;QAEH,MAAM,gBAAgB,GAA+B,EAAE,CAAC;QAExD,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC;YACvD,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;YAE1C,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;gBACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,KAAK,EAAE,mBAAmB;gBAC1B,OAAO,EAAE,wBAAwB,EAAE,GAAG,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG;gBACnJ,OAAO,EAAE,EAAE,GAAG,CAAC;gBACf,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACjC,YAAY,EAAE,SAAS,CAAC,EAAE;aAC3B,CAAC,CAAC;YACH,IAAI,SAAS,EAAE;gBAAE,MAAM;YACvB,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;YACvE,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9B,OAAO,CAAC,GAAG,EAAE,kBAAkB,EAAE;gBAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,YAAY,EAAE,MAAM,CAAC,EAAE;gBACvB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,MAAM,QAAQ,GACZ,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACzB,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM;YACzE,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEtG,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,MAAyB;YACjC,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,YAAY,EAAE,SAAS,CAAC,YAAY;YACpC,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,gBAAgB;SAC7B,CAAC;QAEF,OAAO,CAAC,GAAG,EAAE,eAAe,EAAE;YAC5B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM;YACN,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,EAAE;SACf,CAAC;QACF,OAAO,CAAC,GAAG,EAAE,eAAe,EAAE;YAC5B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,gBAAgB,EAAE,UAAU;SAC7B,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,oDAAoD;AACpD,8EAA8E;AAE9E,MAAM,UAAU,kBAAkB,CAChC,KAAsB,EACtB,IAA+F;IAE/F,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EACvD,CAAC,CACF,CAAC;IACF,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC;IAE3D,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,UAAU,EAAE,IAAI,CAAC,SAAS;QAC1B,KAAK;QACL,iBAAiB,EAAE,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/E,IAAI,EAAE,IAAI,CAAC,OAAO;QAClB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,eAAe;QACf,gBAAgB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QACxF,iBAAiB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QAC1F,KAAK,EAAE,MAAM;KACd,CAAC;AACJ,CAAC;AAmBD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAyB;IAC7D,MAAM,EACJ,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EACzD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,GAClD,GAAG,IAAI,CAAC;IAET,MAAM,SAAS,GAAG,SAAS;QACzB,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC,CAAC,QAAQ,CAAC;IAEb,4DAA4D;IAC5D,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAwB,EAAE,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAClG,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,UAAU,EAAE,SAAS,CAAC,MAAM;QAC5B,eAAe;QACf,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,cAAc,EAAE,QAAQ,CAAC,KAAK;KAC/B,CAAC,CAAC;IAEH,MAAM,KAAK,GAAoB,EAAE,CAAC;IAElC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,SAAS,EAAE;YAAE,MAAM;QAEvB,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC;YACvC,GAAG;YACH,QAAQ;YACR,YAAY;YACZ,MAAM;YACN,SAAS;YACT,UAAU,EAAE,SAAS,CAAC,MAAM;YAC5B,QAAQ;SACT,CAAC,CAAC;QACH,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEhG,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC;QACjB,2EAA2E;QAC3E,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC5C,CAAC;QACD,WAAW,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"benchmark-runner.js","sourceRoot":"","sources":["../../src/eval-server/benchmark-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,sEAAsE;AACtE,8EAA8E;AAM9E,OAAO,EAAE,mBAAmB,EAAmB,MAAM,gBAAgB,CAAC;AACtE,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,8EAA8E;AAC9E,mEAAmE;AACnE,8EAA8E;AAE9E,MAAM,aAAa,GAAG,IAAI,GAAG,CAAS,CAAC,YAAY,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC;AAEjF,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,OAAO,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED,8EAA8E;AAC9E,qEAAqE;AACrE,8EAA8E;AAE9E,SAAS,KAAK,CAAC,GAAY;IACzB,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC;QACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QACjG,IAAI,QAAQ,IAAI,GAAG,IAAK,GAAW,CAAC,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;IAClE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,KAAK,CAAC,EAAU;IAC7B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E,MAAM,cAAc,GAA2B;IAC7C,KAAK,EAAE,CAAC;IACR,KAAK,EAAE,CAAC;IACR,MAAM,EAAE,CAAC;IACT,MAAM,EAAE,CAAC;IACT,GAAG,EAAE,CAAC;IACN,IAAI,EAAE,CAAC;CACR,CAAC;AAEF,SAAS,gBAAgB,CAAC,KAAa;IACrC,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAClC,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;QAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,QAAQ,CAAC;IAC3C,CAAC;IACD,OAAO,CAAC,CAAC,CAAC,mBAAmB;AAC/B,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,cAAsB,EAAE,UAAkB;IAC9E,MAAM,WAAW,GAAG,gBAAgB,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,aAAa,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IACnD,IAAI,aAAa,GAAG,WAAW,EAAE,CAAC;QAChC,OAAO,yBAAyB,UAAU,oCAAoC,cAAc,uCAAuC,CAAC;IACtI,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAkBD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAA0B;IAC/D,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,UAAU,GAAG,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC;IACnH,MAAM,oBAAoB,GAAG,WAAW,IAAI,MAAM,CAAC;IACnD,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,OAAO,GAAG,CAAC,KAAa,EAAE,IAA6B,EAAE,EAAE;QAC/D,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC;IAC9E,CAAC,CAAC;IAEF,OAAO,CAAC,YAAY,EAAE;QACpB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;QACxB,KAAK,EAAE,UAAU;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,UAAU,EAAE;QAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,gCAAgC,QAAQ,CAAC,IAAI,MAAM;KAC7D,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,wCAAwC;QACxC,IAAI,SAAS,CAAC;QACd,MAAM,WAAW,GAAG,CAAC,CAAC;QACtB,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;QACxC,KAAK,IAAI,OAAO,GAAG,CAAC,GAAI,OAAO,EAAE,EAAE,CAAC;YAClC,IAAI,CAAC;gBACH,SAAS,GAAG,MAAM,aAAa,CAC7B,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAC9B,gCAAgC,QAAQ,CAAC,IAAI,MAAM,EACnD,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CACrD,CAAC;gBACF,MAAM;YACR,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,IAAI,KAAK,CAAC,GAAG,CAAC,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;oBACxC,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;oBACtC,OAAO,CAAC,SAAS,EAAE;wBACjB,OAAO,EAAE,QAAQ,CAAC,EAAE;wBACpB,OAAO,EAAE,mCAAmC,OAAO,GAAG,IAAI,cAAc,OAAO,GAAG,CAAC,IAAI,WAAW,MAAM;wBACxG,OAAO,EAAE,OAAO,GAAG,CAAC;wBACpB,OAAO;qBACR,CAAC,CAAC;oBACH,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC;oBACrB,SAAS;gBACX,CAAC;gBACD,MAAM,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,IAAI,IAAI,SAAS,CAAC,YAAY,IAAI,IAAI;YACjF,CAAC,CAAC,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,YAAY;YAChD,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,CAAC,cAAc,EAAE;YACtB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,CAAC,UAAU,EAAE;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,cAAc,QAAQ,CAAC,UAAU,CAAC,MAAM,aAAa,QAAQ,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK;YAC9G,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;SAClC,CAAC,CAAC;QAEH,iEAAiE;QACjE,MAAM,gBAAgB,GAA+B,MAAM,OAAO,CAAC,GAAG,CACpE,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE;YAC9C,IAAI,SAAS,EAAE,EAAE,CAAC;gBAChB,OAAO,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;YACvF,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,oBAAoB,CAAC,CAAC;YAEhG,IAAI,MAAM,CAAC;YACX,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,GAAG,MAAM,UAAU,CAAC,YAAY,CACpC,SAAS,CAAC,IAAI,EACd,SAAS,CAAC,IAAI,EACd,oBAAoB,CAAC,KAAK,EAC1B,SAAS,CACV,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;YAC7B,CAAC;YAED,OAAO,CAAC,kBAAkB,EAAE;gBAC1B,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,YAAY,EAAE,MAAM,CAAC,EAAE;gBACvB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,eAAe,EAAE,EAAE;aACpB,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,QAAQ,GACZ,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACzB,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM;YACzE,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEtG,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,MAAyB;YACjC,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,YAAY,EAAE,SAAS,CAAC,YAAY;YACpC,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,gBAAgB;SAC7B,CAAC;QAEF,OAAO,CAAC,eAAe,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM;YACN,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,EAAE;SACf,CAAC;QACF,OAAO,CAAC,eAAe,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,gBAAgB,EAAE,UAAU;SAC7B,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,oDAAoD;AACpD,8EAA8E;AAE9E,MAAM,UAAU,kBAAkB,CAChC,KAAsB,EACtB,IAA+F;IAE/F,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EACvD,CAAC,CACF,CAAC;IACF,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC;IAE3D,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,UAAU,EAAE,IAAI,CAAC,SAAS;QAC1B,KAAK;QACL,iBAAiB,EAAE,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/E,IAAI,EAAE,IAAI,CAAC,OAAO;QAClB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,eAAe;QACf,gBAAgB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QACxF,iBAAiB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QAC1F,KAAK,EAAE,MAAM;KACd,CAAC;AACJ,CAAC;AAsBD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAyB;IAC7D,MAAM,EACJ,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EACzD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAC1E,WAAW,EAAE,mBAAmB,GACjC,GAAG,IAAI,CAAC;IAET,MAAM,SAAS,GAAG,SAAS;QACzB,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC,CAAC,QAAQ,CAAC;IAEb,MAAM,WAAW,GAAG,mBAAmB,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAExE,4DAA4D;IAC5D,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAwB,EAAE,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAClG,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,UAAU,EAAE,SAAS,CAAC,MAAM;QAC5B,eAAe;QACf,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,cAAc,EAAE,QAAQ,CAAC,KAAK;QAC9B,WAAW;KACZ,CAAC,CAAC;IAEH,8CAA8C;IAC9C,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,OAAO,GAAG,qBAAqB,CAAC,MAAM,CAAC,KAAK,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACvE,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,CAAC,GAAG,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,WAAW,CAAC,CAAC;IAE7C,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QACvD,IAAI,SAAS,EAAE;YAAE,OAAO,IAAI,CAAC;QAC7B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;QAC1B,IAAI,CAAC;YACH,OAAO,MAAM,gBAAgB,CAAC;gBAC5B,GAAG;gBACH,QAAQ;gBACR,YAAY;gBACZ,MAAM;gBACN,WAAW;gBACX,UAAU;gBACV,SAAS;gBACT,UAAU,EAAE,SAAS,CAAC,MAAM;gBAC5B,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;gBAAS,CAAC;YACT,SAAS,CAAC,OAAO,EAAE,CAAC;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAEtE,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;YAC1D,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACxC,mEAAmE;YACnE,2BAA2B;YAC3B,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC,CAAC;gBACX,SAAS,EAAE,SAAS;gBACpB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAE,MAAM,CAAC,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC7F,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEpG,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC;QACjB,2EAA2E;QAC3E,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,iBAAiB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QACD,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC"}
@@ -1,13 +1 @@
1
- export declare const DEFAULT_CONCURRENCY = 3;
2
- export declare class Semaphore {
3
- private readonly limit;
4
- private running;
5
- private queue;
6
- constructor(limit?: number);
7
- acquire(): Promise<void>;
8
- release(): void;
9
- get available(): number;
10
- get pending(): number;
11
- get active(): number;
12
- }
13
- export declare function getSkillSemaphore(skillKey: string, limit?: number): Semaphore;
1
+ export { Semaphore, DEFAULT_CONCURRENCY, getSkillSemaphore } from "../eval/concurrency.js";
@@ -1,52 +1,6 @@
1
1
  // ---------------------------------------------------------------------------
2
- // concurrency.ts -- cooperative semaphore for limiting concurrent LLM calls
2
+ // concurrency.ts -- re-exports from shared eval/concurrency.ts
3
+ // Kept for backward compatibility with eval-server/ imports
3
4
  // ---------------------------------------------------------------------------
4
- export const DEFAULT_CONCURRENCY = 3;
5
- export class Semaphore {
6
- limit;
7
- running = 0;
8
- queue = [];
9
- constructor(limit = DEFAULT_CONCURRENCY) {
10
- this.limit = limit;
11
- if (limit < 1)
12
- throw new Error("Semaphore limit must be >= 1");
13
- }
14
- async acquire() {
15
- if (this.running < this.limit) {
16
- this.running++;
17
- return;
18
- }
19
- return new Promise((resolve) => this.queue.push(() => {
20
- this.running++;
21
- resolve();
22
- }));
23
- }
24
- release() {
25
- if (this.running <= 0)
26
- return; // idempotent — no underflow
27
- this.running--;
28
- const next = this.queue.shift();
29
- if (next)
30
- next();
31
- }
32
- get available() {
33
- return Math.max(0, this.limit - this.running);
34
- }
35
- get pending() {
36
- return this.queue.length;
37
- }
38
- get active() {
39
- return this.running;
40
- }
41
- }
42
- // Per-skill semaphore registry — ensures one semaphore per skill
43
- const registry = new Map();
44
- export function getSkillSemaphore(skillKey, limit = DEFAULT_CONCURRENCY) {
45
- let sem = registry.get(skillKey);
46
- if (!sem) {
47
- sem = new Semaphore(limit);
48
- registry.set(skillKey, sem);
49
- }
50
- return sem;
51
- }
5
+ export { Semaphore, DEFAULT_CONCURRENCY, getSkillSemaphore } from "../eval/concurrency.js";
52
6
  //# sourceMappingURL=concurrency.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/eval-server/concurrency.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAE9E,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AAErC,MAAM,OAAO,SAAS;IAIS;IAHrB,OAAO,GAAG,CAAC,CAAC;IACZ,KAAK,GAAsB,EAAE,CAAC;IAEtC,YAA6B,QAAgB,mBAAmB;QAAnC,UAAK,GAAL,KAAK,CAA8B;QAC9D,IAAI,KAAK,GAAG,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;IACjE,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;YAC9B,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO;QACT,CAAC;QACD,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE;YACzD,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC,CAAC;IACN,CAAC;IAED,OAAO;QACL,IAAI,IAAI,CAAC,OAAO,IAAI,CAAC;YAAE,OAAO,CAAC,4BAA4B;QAC3D,IAAI,CAAC,OAAO,EAAE,CAAC;QACf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QAChC,IAAI,IAAI;YAAE,IAAI,EAAE,CAAC;IACnB,CAAC;IAED,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,CAAC;IAChD,CAAC;IAED,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;IAC3B,CAAC;IAED,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF;AAED,iEAAiE;AACjE,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAqB,CAAC;AAE9C,MAAM,UAAU,iBAAiB,CAAC,QAAgB,EAAE,KAAK,GAAG,mBAAmB;IAC7E,IAAI,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACjC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,GAAG,GAAG,IAAI,SAAS,CAAC,KAAK,CAAC,CAAC;QAC3B,QAAQ,CAAC,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9B,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
1
+ {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/eval-server/concurrency.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+DAA+D;AAC/D,4DAA4D;AAC5D,8EAA8E;AAE9E,OAAO,EAAE,SAAS,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
@@ -11,6 +11,8 @@ import { registerRoutes } from "./api-routes.js";
11
11
  import { registerImproveRoutes } from "./improve-routes.js";
12
12
  import { registerModelCompareRoutes } from "./model-compare-routes.js";
13
13
  import { registerSkillCreateRoutes } from "./skill-create-routes.js";
14
+ import { registerSweepRoutes } from "./sweep-routes.js";
15
+ import { registerIntegrationRoutes } from "./integration-routes.js";
14
16
  const __filename = fileURLToPath(import.meta.url);
15
17
  const __dirname = path.dirname(__filename);
16
18
  const MIME_TYPES = {
@@ -35,6 +37,8 @@ export async function startEvalServer(opts) {
35
37
  registerImproveRoutes(router, root);
36
38
  registerModelCompareRoutes(router, root);
37
39
  registerSkillCreateRoutes(router, root);
40
+ registerSweepRoutes(router, root);
41
+ registerIntegrationRoutes(router, root);
38
42
  // Static asset directory
39
43
  const staticDir = path.resolve(__dirname, "../eval-ui");
40
44
  const server = http.createServer(async (req, res) => {
@@ -1 +1 @@
1
- {"version":3,"file":"eval-server.js","sourceRoot":"","sources":["../../src/eval-server/eval-server.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gDAAgD;AAChD,8EAA8E;AAE9E,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AACvE,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AAErE,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,MAAM,UAAU,GAA2B;IACzC,OAAO,EAAE,WAAW;IACpB,KAAK,EAAE,wBAAwB;IAC/B,MAAM,EAAE,UAAU;IAClB,OAAO,EAAE,kBAAkB;IAC3B,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,eAAe;IACvB,MAAM,EAAE,cAAc;IACtB,OAAO,EAAE,WAAW;IACpB,QAAQ,EAAE,YAAY;IACtB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,kBAAkB;IAC1B,OAAO,EAAE,YAAY;CACtB,CAAC;AAQF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAuB;IAC3D,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IAC5B,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;IAE5B,sBAAsB;IACtB,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/C,qBAAqB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACpC,0BAA0B,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACzC,yBAAyB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAExC,yBAAyB;IACzB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;QAClD,wBAAwB;QACxB,IAAI,GAAG,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;YAC/C,IAAI,SAAS,IAAK,MAAc,CAAC,OAAO,EAAE,CAAC;gBACxC,MAAc,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBAClC,OAAO;YACT,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QAED,uBAAuB;QACvB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAC9C,IAAI,OAAO;YAAE,OAAO;QAEpB,6BAA6B;QAC7B,IAAI,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YAChD,OAAO;QACT,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,sCAAsC,IAAI,IAAI,CAAC,CAAC;YAC5D,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,GAAyB,EACzB,GAAwB,EACxB,SAAiB;IAEjB,IAAI,OAAO,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,kBAAkB,CAAC,CAAC,QAAQ,CAAC;IACnE,IAAI,OAAO,KAAK,GAAG;QAAE,OAAO,GAAG,aAAa,CAAC;IAE7C,mCAAmC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEhD,kCAAkC;IAClC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACpC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YAClB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACnC,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,0BAA0B,CAAC;YAClE,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;YACpD,EAAE,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxC,OAAO;QACT,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,gCAAgC;IAClC,CAAC;IAED,0DAA0D;IAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IACrD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACpD,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"eval-server.js","sourceRoot":"","sources":["../../src/eval-server/eval-server.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gDAAgD;AAChD,8EAA8E;AAE9E,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AACvE,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,yBAAyB,EAAE,MAAM,yBAAyB,CAAC;AAEpE,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,MAAM,UAAU,GAA2B;IACzC,OAAO,EAAE,WAAW;IACpB,KAAK,EAAE,wBAAwB;IAC/B,MAAM,EAAE,UAAU;IAClB,OAAO,EAAE,kBAAkB;IAC3B,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,eAAe;IACvB,MAAM,EAAE,cAAc;IACtB,OAAO,EAAE,WAAW;IACpB,QAAQ,EAAE,YAAY;IACtB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,kBAAkB;IAC1B,OAAO,EAAE,YAAY;CACtB,CAAC;AAQF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAuB;IAC3D,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IAC5B,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;IAE5B,sBAAsB;IACtB,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/C,qBAAqB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACpC,0BAA0B,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACzC,yBAAyB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACxC,mBAAmB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAClC,yBAAyB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAExC,yBAAyB;IACzB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;QAClD,wBAAwB;QACxB,IAAI,GAAG,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;YAC/C,IAAI,SAAS,IAAK,MAAc,CAAC,OAAO,EAAE,CAAC;gBACxC,MAAc,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBAClC,OAAO;YACT,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QAED,uBAAuB;QACvB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAC9C,IAAI,OAAO;YAAE,OAAO;QAEpB,6BAA6B;QAC7B,IAAI,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YAChD,OAAO;QACT,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,sCAAsC,IAAI,IAAI,CAAC,CAAC;YAC5D,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,GAAyB,EACzB,GAAwB,EACxB,SAAiB;IAEjB,IAAI,OAAO,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,kBAAkB,CAAC,CAAC,QAAQ,CAAC;IACnE,IAAI,OAAO,KAAK,GAAG;QAAE,OAAO,GAAG,aAAa,CAAC;IAE7C,mCAAmC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEhD,kCAAkC;IAClC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACpC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YAClB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACnC,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,0BAA0B,CAAC;YAClE,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;YACpD,EAAE,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxC,OAAO;QACT,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,gCAAgC;IAClC,CAAC;IAED,0DAA0D;IAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IACrD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACpD,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ import type { Router } from "./router.js";
2
+ export declare function registerIntegrationRoutes(router: Router, root: string): void;
@@ -0,0 +1,100 @@
1
+ // ---------------------------------------------------------------------------
2
+ // integration-routes.ts -- API routes for integration tests and credentials
3
+ // ---------------------------------------------------------------------------
4
+ import { sendJson, readBody } from "./router.js";
5
+ import { initSSE, sendSSE, sendSSEDone } from "./sse-helpers.js";
6
+ import { resolveSkillDir } from "./skill-resolver.js";
7
+ import { loadAndValidateEvals } from "../eval/schema.js";
8
+ import { resolveAllCredentials } from "../eval/credential-resolver.js";
9
+ import { runIntegrationCase, recordRun } from "../eval/integration-runner.js";
10
+ export function registerIntegrationRoutes(router, root) {
11
+ // -------------------------------------------------------------------------
12
+ // POST /api/skills/:plugin/:skill/integration-run -- SSE integration test
13
+ // -------------------------------------------------------------------------
14
+ router.post("/api/skills/:plugin/:skill/integration-run", async (req, res, params) => {
15
+ const skillDir = resolveSkillDir(root, params.plugin, params.skill);
16
+ const body = (await readBody(req));
17
+ initSSE(res, req);
18
+ try {
19
+ // Load evals and filter for integration tests
20
+ const evalsFile = loadAndValidateEvals(skillDir);
21
+ let integrationCases = evalsFile.evals.filter((e) => e.testType === "integration");
22
+ // Filter by eval_ids if provided
23
+ if (body.eval_ids?.length) {
24
+ const ids = new Set(body.eval_ids);
25
+ integrationCases = integrationCases.filter((e) => ids.has(e.id));
26
+ }
27
+ if (integrationCases.length === 0) {
28
+ sendSSEDone(res, { status: "no_cases", message: "No integration test cases found" });
29
+ return;
30
+ }
31
+ for (const evalCase of integrationCases) {
32
+ const integrationCase = {
33
+ ...evalCase,
34
+ testType: "integration",
35
+ cleanup: evalCase.cleanup?.map((c) => ({
36
+ type: c.action,
37
+ description: c.description ?? c.action,
38
+ })),
39
+ };
40
+ sendSSE(res, "preflight_start", { evalId: evalCase.id, name: evalCase.name });
41
+ const result = await runIntegrationCase(integrationCase, {
42
+ skillDir,
43
+ dryRun: body.dryRun,
44
+ confirm: body.confirm,
45
+ });
46
+ // Emit phase events
47
+ for (const phase of result.phases) {
48
+ sendSSE(res, phase.phase, {
49
+ evalId: evalCase.id,
50
+ phase: phase.phase,
51
+ status: phase.status,
52
+ durationMs: phase.durationMs,
53
+ error: phase.errorMessage,
54
+ });
55
+ }
56
+ // Record run
57
+ recordRun(skillDir, result);
58
+ }
59
+ sendSSEDone(res, { status: "complete" });
60
+ }
61
+ catch (err) {
62
+ sendSSE(res, "error", { error: err.message });
63
+ sendSSEDone(res, { status: "error", error: err.message });
64
+ }
65
+ });
66
+ // -------------------------------------------------------------------------
67
+ // GET /api/credentials/:plugin/:skill -- credential status
68
+ // -------------------------------------------------------------------------
69
+ router.get("/api/credentials/:plugin/:skill", async (req, res, params) => {
70
+ const skillDir = resolveSkillDir(root, params.plugin, params.skill);
71
+ try {
72
+ // Load evals and collect all requiredCredentials from integration tests
73
+ const evalsFile = loadAndValidateEvals(skillDir);
74
+ const allCreds = new Set();
75
+ for (const evalCase of evalsFile.evals) {
76
+ if (evalCase.testType === "integration" && evalCase.requiredCredentials) {
77
+ for (const cred of evalCase.requiredCredentials) {
78
+ allCreds.add(cred);
79
+ }
80
+ }
81
+ }
82
+ const names = [...allCreds].sort();
83
+ if (names.length === 0) {
84
+ sendJson(res, { credentials: [] }, 200, req);
85
+ return;
86
+ }
87
+ const statuses = resolveAllCredentials(names, skillDir);
88
+ const credentials = statuses.map((s) => ({
89
+ name: s.name,
90
+ status: s.status,
91
+ ...(s.source ? { source: s.source } : {}),
92
+ }));
93
+ sendJson(res, { credentials }, 200, req);
94
+ }
95
+ catch (err) {
96
+ sendJson(res, { error: err.message, credentials: [] }, 400, req);
97
+ }
98
+ });
99
+ }
100
+ //# sourceMappingURL=integration-routes.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"integration-routes.js","sourceRoot":"","sources":["../../src/eval-server/integration-routes.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAI9E,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AACvE,OAAO,EAAE,kBAAkB,EAAc,SAAS,EAAsB,MAAM,+BAA+B,CAAC;AAG9G,MAAM,UAAU,yBAAyB,CAAC,MAAc,EAAE,IAAY;IACpE,4EAA4E;IAC5E,0EAA0E;IAC1E,4EAA4E;IAC5E,MAAM,CAAC,IAAI,CAAC,4CAA4C,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE;QACnF,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACpE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,GAAG,CAAC,CAIhC,CAAC;QAEF,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAElB,IAAI,CAAC;YACH,8CAA8C;YAC9C,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;YACjD,IAAI,gBAAgB,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,aAAa,CAAC,CAAC;YAEnF,iCAAiC;YACjC,IAAI,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;gBAC1B,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACnC,gBAAgB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACnE,CAAC;YAED,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,iCAAiC,EAAE,CAAC,CAAC;gBACrF,OAAO;YACT,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;gBACxC,MAAM,eAAe,GAAwB;oBAC3C,GAAG,QAAQ;oBACX,QAAQ,EAAE,aAAa;oBACvB,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBACrC,IAAI,EAAE,CAAC,CAAC,MAAM;wBACd,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,MAAM;qBACvC,CAAC,CAAC;iBACJ,CAAC;gBAEF,OAAO,CAAC,GAAG,EAAE,iBAAiB,EAAE,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;gBAE9E,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE;oBACvD,QAAQ;oBACR,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,OAAO,EAAE,IAAI,CAAC,OAAO;iBACtB,CAAC,CAAC;gBAEH,oBAAoB;gBACpB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAClC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,EAAE;wBACxB,MAAM,EAAE,QAAQ,CAAC,EAAE;wBACnB,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,KAAK,EAAE,KAAK,CAAC,YAAY;qBAC1B,CAAC,CAAC;gBACL,CAAC;gBAED,aAAa;gBACb,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC9B,CAAC;YAED,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACzD,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,2DAA2D;IAC3D,4EAA4E;IAC5E,MAAM,CAAC,GAAG,CAAC,iCAAiC,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE;QACvE,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAEpE,IAAI,CAAC;YACH,wEAAwE;YACxE,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;YACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;gBACvC,IAAI,QAAQ,CAAC,QAAQ,KAAK,aAAa,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;oBACxE,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;wBAChD,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;YACnC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;gBAC7C,OAAO;YACT,CAAC;YAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACvC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC1C,CAAC,CAAC,CAAC;YAEJ,QAAQ,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}