vskill 0.5.11 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval/credentials.d.ts +12 -0
- package/dist/commands/eval/credentials.js +140 -0
- package/dist/commands/eval/credentials.js.map +1 -0
- package/dist/commands/eval/generate-all.d.ts +1 -1
- package/dist/commands/eval/generate-all.js +57 -12
- package/dist/commands/eval/generate-all.js.map +1 -1
- package/dist/commands/eval/init.d.ts +2 -1
- package/dist/commands/eval/init.js +76 -10
- package/dist/commands/eval/init.js.map +1 -1
- package/dist/commands/eval/run.d.ts +7 -1
- package/dist/commands/eval/run.js +207 -26
- package/dist/commands/eval/run.js.map +1 -1
- package/dist/commands/eval/sweep.d.ts +7 -0
- package/dist/commands/eval/sweep.js +99 -0
- package/dist/commands/eval/sweep.js.map +1 -0
- package/dist/commands/eval.d.ts +10 -0
- package/dist/commands/eval.js +62 -4
- package/dist/commands/eval.js.map +1 -1
- package/dist/eval/batch-judge.d.ts +27 -0
- package/dist/eval/batch-judge.js +242 -0
- package/dist/eval/batch-judge.js.map +1 -0
- package/dist/eval/chrome-profile.d.ts +16 -0
- package/dist/eval/chrome-profile.js +65 -0
- package/dist/eval/chrome-profile.js.map +1 -0
- package/dist/eval/comparator.d.ts +3 -1
- package/dist/eval/comparator.js +19 -3
- package/dist/eval/comparator.js.map +1 -1
- package/dist/eval/concurrency.d.ts +13 -0
- package/dist/eval/concurrency.js +53 -0
- package/dist/eval/concurrency.js.map +1 -0
- package/dist/eval/credential-resolver.d.ts +31 -0
- package/dist/eval/credential-resolver.js +111 -0
- package/dist/eval/credential-resolver.js.map +1 -0
- package/dist/eval/integration-runner.d.ts +12 -0
- package/dist/eval/integration-runner.js +303 -0
- package/dist/eval/integration-runner.js.map +1 -0
- package/dist/eval/integration-types.d.ts +65 -0
- package/dist/eval/integration-types.js +18 -0
- package/dist/eval/integration-types.js.map +1 -0
- package/dist/eval/judge-cache.d.ts +29 -0
- package/dist/eval/judge-cache.js +109 -0
- package/dist/eval/judge-cache.js.map +1 -0
- package/dist/eval/judge.d.ts +1 -1
- package/dist/eval/judge.js +20 -3
- package/dist/eval/judge.js.map +1 -1
- package/dist/eval/llm.d.ts +2 -1
- package/dist/eval/llm.js +54 -2
- package/dist/eval/llm.js.map +1 -1
- package/dist/eval/prompt-builder.d.ts +10 -0
- package/dist/eval/prompt-builder.js +167 -0
- package/dist/eval/prompt-builder.js.map +1 -1
- package/dist/eval/rate-limiter.d.ts +20 -0
- package/dist/eval/rate-limiter.js +62 -0
- package/dist/eval/rate-limiter.js.map +1 -0
- package/dist/eval/schema.d.ts +16 -0
- package/dist/eval/schema.js +58 -6
- package/dist/eval/schema.js.map +1 -1
- package/dist/eval/verdict.d.ts +9 -0
- package/dist/eval/verdict.js +50 -0
- package/dist/eval/verdict.js.map +1 -1
- package/dist/eval-server/api-routes.js +99 -3
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-server/benchmark-runner.d.ts +7 -0
- package/dist/eval-server/benchmark-runner.js +158 -42
- package/dist/eval-server/benchmark-runner.js.map +1 -1
- package/dist/eval-server/concurrency.d.ts +1 -13
- package/dist/eval-server/concurrency.js +3 -49
- package/dist/eval-server/concurrency.js.map +1 -1
- package/dist/eval-server/eval-server.js +4 -0
- package/dist/eval-server/eval-server.js.map +1 -1
- package/dist/eval-server/integration-routes.d.ts +2 -0
- package/dist/eval-server/integration-routes.js +100 -0
- package/dist/eval-server/integration-routes.js.map +1 -0
- package/dist/eval-server/skill-create-routes.js +151 -22
- package/dist/eval-server/skill-create-routes.js.map +1 -1
- package/dist/eval-server/sweep-routes.d.ts +2 -0
- package/dist/eval-server/sweep-routes.js +93 -0
- package/dist/eval-server/sweep-routes.js.map +1 -0
- package/dist/eval-server/sweep-runner.d.ts +93 -0
- package/dist/eval-server/sweep-runner.js +275 -0
- package/dist/eval-server/sweep-runner.js.map +1 -0
- package/dist/eval-ui/assets/index-C9_Pey9T.css +1 -0
- package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
- package/dist/eval-ui/index.html +2 -2
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/eval-ui/assets/index-CxHCKEhf.js +0 -74
- package/dist/eval-ui/assets/index-D2UkOol1.css +0 -1
|
@@ -6,58 +6,137 @@ import { judgeAssertion } from "../eval/judge.js";
|
|
|
6
6
|
import { writeHistoryEntry } from "../eval/benchmark-history.js";
|
|
7
7
|
import { sendSSE, sendSSEDone, withHeartbeat } from "./sse-helpers.js";
|
|
8
8
|
import { classifyError } from "./error-classifier.js";
|
|
9
|
+
import { Semaphore } from "./concurrency.js";
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
// CLI providers that spawn child processes — default concurrency 1
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
const CLI_PROVIDERS = new Set(["claude-cli", "codex-cli", "gemini-cli"]);
|
|
14
|
+
function defaultConcurrency(provider) {
|
|
15
|
+
return CLI_PROVIDERS.has(provider) ? 1 : 5;
|
|
16
|
+
}
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// 429 retry helper — exponential backoff (1s, 2s, 4s), max 3 retries
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
function is429(err) {
|
|
21
|
+
if (err instanceof Error) {
|
|
22
|
+
const msg = err.message;
|
|
23
|
+
if (msg.includes("429") || msg.includes("rate limit") || msg.includes("Rate limit"))
|
|
24
|
+
return true;
|
|
25
|
+
if ("status" in err && err.status === 429)
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
return false;
|
|
29
|
+
}
|
|
30
|
+
async function sleep(ms) {
|
|
31
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
32
|
+
}
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
// Weak-model judge warning heuristic
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
const MODEL_STRENGTH = {
|
|
37
|
+
haiku: 1,
|
|
38
|
+
flash: 1,
|
|
39
|
+
"mini": 1,
|
|
40
|
+
sonnet: 2,
|
|
41
|
+
pro: 2,
|
|
42
|
+
opus: 3,
|
|
43
|
+
};
|
|
44
|
+
function estimateStrength(model) {
|
|
45
|
+
const lower = model.toLowerCase();
|
|
46
|
+
for (const [key, strength] of Object.entries(MODEL_STRENGTH)) {
|
|
47
|
+
if (lower.includes(key))
|
|
48
|
+
return strength;
|
|
49
|
+
}
|
|
50
|
+
return 2; // unknown = medium
|
|
51
|
+
}
|
|
52
|
+
export function checkWeakJudgeWarning(generatorModel, judgeModel) {
|
|
53
|
+
const genStrength = estimateStrength(generatorModel);
|
|
54
|
+
const judgeStrength = estimateStrength(judgeModel);
|
|
55
|
+
if (judgeStrength < genStrength) {
|
|
56
|
+
return `Warning: Judge model "${judgeModel}" appears weaker than generator "${generatorModel}". Judge scores may be less reliable.`;
|
|
57
|
+
}
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
9
60
|
export async function runSingleCaseSSE(opts) {
|
|
10
|
-
const { res, evalCase, systemPrompt, client, isAborted, totalCases = 1, provider } = opts;
|
|
11
|
-
|
|
61
|
+
const { res, evalCase, systemPrompt, client, judgeClient, judgeCache, isAborted, totalCases = 1, provider } = opts;
|
|
62
|
+
const effectiveJudgeClient = judgeClient ?? client;
|
|
63
|
+
let sequence = 0;
|
|
64
|
+
const emitSSE = (event, data) => {
|
|
65
|
+
sendSSE(res, event, { ...data, caseId: evalCase.id, sequence: sequence++ });
|
|
66
|
+
};
|
|
67
|
+
emitSSE("case_start", {
|
|
12
68
|
eval_id: evalCase.id,
|
|
13
69
|
eval_name: evalCase.name,
|
|
14
70
|
total: totalCases,
|
|
15
71
|
});
|
|
16
|
-
|
|
72
|
+
emitSSE("progress", {
|
|
17
73
|
eval_id: evalCase.id,
|
|
18
74
|
phase: "generating",
|
|
19
75
|
message: `Generating LLM response for "${evalCase.name}"...`,
|
|
20
76
|
});
|
|
21
77
|
try {
|
|
22
|
-
|
|
78
|
+
// Retry with exponential backoff on 429
|
|
79
|
+
let genResult;
|
|
80
|
+
const MAX_RETRIES = 3;
|
|
81
|
+
const RETRY_DELAYS = [1000, 2000, 4000];
|
|
82
|
+
for (let attempt = 0;; attempt++) {
|
|
83
|
+
try {
|
|
84
|
+
genResult = await withHeartbeat(res, evalCase.id, "generating", `Generating LLM response for "${evalCase.name}"...`, () => client.generate(systemPrompt, evalCase.prompt));
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
catch (err) {
|
|
88
|
+
if (is429(err) && attempt < MAX_RETRIES) {
|
|
89
|
+
const delayMs = RETRY_DELAYS[attempt];
|
|
90
|
+
emitSSE("warning", {
|
|
91
|
+
eval_id: evalCase.id,
|
|
92
|
+
message: `Rate limited (429). Retrying in ${delayMs / 1000}s (attempt ${attempt + 1}/${MAX_RETRIES})...`,
|
|
93
|
+
attempt: attempt + 1,
|
|
94
|
+
delayMs,
|
|
95
|
+
});
|
|
96
|
+
await sleep(delayMs);
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
throw err;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
23
102
|
const totalTokens = genResult.inputTokens != null && genResult.outputTokens != null
|
|
24
103
|
? genResult.inputTokens + genResult.outputTokens
|
|
25
104
|
: null;
|
|
26
|
-
|
|
105
|
+
emitSSE("output_ready", {
|
|
27
106
|
eval_id: evalCase.id,
|
|
28
107
|
output: genResult.text,
|
|
29
108
|
durationMs: genResult.durationMs,
|
|
30
109
|
tokens: totalTokens,
|
|
31
110
|
});
|
|
32
|
-
|
|
111
|
+
emitSSE("progress", {
|
|
33
112
|
eval_id: evalCase.id,
|
|
34
113
|
phase: "judging",
|
|
35
114
|
message: `Evaluating ${evalCase.assertions.length} assertion${evalCase.assertions.length !== 1 ? "s" : ""}...`,
|
|
36
115
|
total: evalCase.assertions.length,
|
|
37
116
|
});
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
assertionResults.push(result);
|
|
53
|
-
sendSSE(res, "assertion_result", {
|
|
117
|
+
// T-003: Parallelize intra-case assertion judges via Promise.all
|
|
118
|
+
const assertionResults = await Promise.all(evalCase.assertions.map(async (assertion, ai) => {
|
|
119
|
+
if (isAborted()) {
|
|
120
|
+
return { id: assertion.id, text: assertion.text, pass: false, reasoning: "aborted" };
|
|
121
|
+
}
|
|
122
|
+
const judgeCall = () => judgeAssertion(genResult.text, assertion, client, effectiveJudgeClient);
|
|
123
|
+
let result;
|
|
124
|
+
if (judgeCache) {
|
|
125
|
+
result = await judgeCache.getOrCompute(assertion.text, genResult.text, effectiveJudgeClient.model, judgeCall);
|
|
126
|
+
}
|
|
127
|
+
else {
|
|
128
|
+
result = await judgeCall();
|
|
129
|
+
}
|
|
130
|
+
emitSSE("assertion_result", {
|
|
54
131
|
eval_id: evalCase.id,
|
|
55
132
|
assertion_id: result.id,
|
|
56
133
|
text: result.text,
|
|
57
134
|
pass: result.pass,
|
|
58
135
|
reasoning: result.reasoning,
|
|
136
|
+
assertion_index: ai,
|
|
59
137
|
});
|
|
60
|
-
|
|
138
|
+
return result;
|
|
139
|
+
}));
|
|
61
140
|
const passRate = assertionResults.length > 0
|
|
62
141
|
? assertionResults.filter((a) => a.pass).length / assertionResults.length
|
|
63
142
|
: 0;
|
|
@@ -75,7 +154,7 @@ export async function runSingleCaseSSE(opts) {
|
|
|
75
154
|
output: genResult.text,
|
|
76
155
|
assertions: assertionResults,
|
|
77
156
|
};
|
|
78
|
-
|
|
157
|
+
emitSSE("case_complete", {
|
|
79
158
|
eval_id: evalCase.id,
|
|
80
159
|
status,
|
|
81
160
|
pass_rate: passRate,
|
|
@@ -95,7 +174,7 @@ export async function runSingleCaseSSE(opts) {
|
|
|
95
174
|
pass_rate: 0,
|
|
96
175
|
assertions: [],
|
|
97
176
|
};
|
|
98
|
-
|
|
177
|
+
emitSSE("case_complete", {
|
|
99
178
|
eval_id: evalCase.id,
|
|
100
179
|
status: "error",
|
|
101
180
|
error_message: errorMsg,
|
|
@@ -127,10 +206,11 @@ export function assembleBulkResult(cases, meta) {
|
|
|
127
206
|
};
|
|
128
207
|
}
|
|
129
208
|
export async function runBenchmarkSSE(opts) {
|
|
130
|
-
const { res, skillDir, skillName, systemPrompt, runType, provider, evalCases: allCases, filterIds, client, isAborted, } = opts;
|
|
209
|
+
const { res, skillDir, skillName, systemPrompt, runType, provider, evalCases: allCases, filterIds, client, judgeClient, judgeCache, isAborted, concurrency: concurrencyOverride, } = opts;
|
|
131
210
|
const evalCases = filterIds
|
|
132
211
|
? allCases.filter((e) => filterIds.has(e.id))
|
|
133
212
|
: allCases;
|
|
213
|
+
const concurrency = concurrencyOverride ?? defaultConcurrency(provider);
|
|
134
214
|
// Emit duration estimate so the UI can warn about long runs
|
|
135
215
|
const totalAssertions = evalCases.reduce((s, e) => s + e.assertions.length, 0);
|
|
136
216
|
const estimate = estimateDurationSec(provider, evalCases.length, totalAssertions);
|
|
@@ -140,29 +220,65 @@ export async function runBenchmarkSSE(opts) {
|
|
|
140
220
|
estimatedMinSec: estimate.minSec,
|
|
141
221
|
estimatedMaxSec: estimate.maxSec,
|
|
142
222
|
estimatedLabel: estimate.label,
|
|
223
|
+
concurrency,
|
|
143
224
|
});
|
|
144
|
-
|
|
145
|
-
|
|
225
|
+
// Emit weak-model judge warning if applicable
|
|
226
|
+
if (judgeClient) {
|
|
227
|
+
const warning = checkWeakJudgeWarning(client.model, judgeClient.model);
|
|
228
|
+
if (warning) {
|
|
229
|
+
console.warn(warning);
|
|
230
|
+
sendSSE(res, "warning", { message: warning });
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
// T-001: Parallel case execution via Semaphore + Promise.allSettled
|
|
234
|
+
const semaphore = new Semaphore(concurrency);
|
|
235
|
+
const caseTasks = evalCases.map((evalCase) => async () => {
|
|
146
236
|
if (isAborted())
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
237
|
+
return null;
|
|
238
|
+
await semaphore.acquire();
|
|
239
|
+
try {
|
|
240
|
+
return await runSingleCaseSSE({
|
|
241
|
+
res,
|
|
242
|
+
evalCase,
|
|
243
|
+
systemPrompt,
|
|
244
|
+
client,
|
|
245
|
+
judgeClient,
|
|
246
|
+
judgeCache,
|
|
247
|
+
isAborted,
|
|
248
|
+
totalCases: evalCases.length,
|
|
249
|
+
provider,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
finally {
|
|
253
|
+
semaphore.release();
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
const settled = await Promise.allSettled(caseTasks.map((fn) => fn()));
|
|
257
|
+
const cases = [];
|
|
258
|
+
for (const result of settled) {
|
|
259
|
+
if (result.status === "fulfilled" && result.value != null) {
|
|
260
|
+
cases.push(result.value);
|
|
261
|
+
}
|
|
262
|
+
else if (result.status === "rejected") {
|
|
263
|
+
// Shouldn't normally happen since runSingleCaseSSE catches errors,
|
|
264
|
+
// but handle as safety net
|
|
265
|
+
cases.push({
|
|
266
|
+
eval_id: -1,
|
|
267
|
+
eval_name: "unknown",
|
|
268
|
+
status: "error",
|
|
269
|
+
error_message: result.reason instanceof Error ? result.reason.message : String(result.reason),
|
|
270
|
+
pass_rate: 0,
|
|
271
|
+
assertions: [],
|
|
272
|
+
});
|
|
273
|
+
}
|
|
158
274
|
}
|
|
159
|
-
const
|
|
275
|
+
const bulkResult = assembleBulkResult(cases, { model: client.model, skillName, runType, provider });
|
|
160
276
|
if (!isAborted()) {
|
|
161
277
|
// Save history for bulk runs (single-case runs save via per-case endpoint)
|
|
162
278
|
if (!filterIds) {
|
|
163
|
-
await writeHistoryEntry(skillDir,
|
|
279
|
+
await writeHistoryEntry(skillDir, bulkResult);
|
|
164
280
|
}
|
|
165
|
-
sendSSEDone(res,
|
|
281
|
+
sendSSEDone(res, bulkResult);
|
|
166
282
|
}
|
|
167
283
|
}
|
|
168
284
|
//# sourceMappingURL=benchmark-runner.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark-runner.js","sourceRoot":"","sources":["../../src/eval-server/benchmark-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,sEAAsE;AACtE,8EAA8E;AAM9E,OAAO,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAgBtD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAA0B;IAC/D,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,GAAG,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC;IAE1F,OAAO,CAAC,GAAG,EAAE,YAAY,EAAE;QACzB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;QACxB,KAAK,EAAE,UAAU;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,gCAAgC,QAAQ,CAAC,IAAI,MAAM;KAC7D,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,aAAa,CACnC,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAC9B,gCAAgC,QAAQ,CAAC,IAAI,MAAM,EACnD,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CACrD,CAAC;QACF,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,IAAI,IAAI,SAAS,CAAC,YAAY,IAAI,IAAI;YACjF,CAAC,CAAC,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,YAAY;YAChD,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,CAAC,GAAG,EAAE,cAAc,EAAE;YAC3B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,cAAc,QAAQ,CAAC,UAAU,CAAC,MAAM,aAAa,QAAQ,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK;YAC9G,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;SAClC,CAAC,CAAC;QAEH,MAAM,gBAAgB,GAA+B,EAAE,CAAC;QAExD,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,CAAC;YACvD,MAAM,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC;YAE1C,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;gBACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,KAAK,EAAE,mBAAmB;gBAC1B,OAAO,EAAE,wBAAwB,EAAE,GAAG,CAAC,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG;gBACnJ,OAAO,EAAE,EAAE,GAAG,CAAC;gBACf,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACjC,YAAY,EAAE,SAAS,CAAC,EAAE;aAC3B,CAAC,CAAC;YACH,IAAI,SAAS,EAAE;gBAAE,MAAM;YACvB,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;YACvE,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC9B,OAAO,CAAC,GAAG,EAAE,kBAAkB,EAAE;gBAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,YAAY,EAAE,MAAM,CAAC,EAAE;gBACvB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC,CAAC;QACL,CAAC;QAED,MAAM,QAAQ,GACZ,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACzB,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM;YACzE,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEtG,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,MAAyB;YACjC,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,YAAY,EAAE,SAAS,CAAC,YAAY;YACpC,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,gBAAgB;SAC7B,CAAC;QAEF,OAAO,CAAC,GAAG,EAAE,eAAe,EAAE;YAC5B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM;YACN,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,EAAE;SACf,CAAC;QACF,OAAO,CAAC,GAAG,EAAE,eAAe,EAAE;YAC5B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,gBAAgB,EAAE,UAAU;SAC7B,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,oDAAoD;AACpD,8EAA8E;AAE9E,MAAM,UAAU,kBAAkB,CAChC,KAAsB,EACtB,IAA+F;IAE/F,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EACvD,CAAC,CACF,CAAC;IACF,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC;IAE3D,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,UAAU,EAAE,IAAI,CAAC,SAAS;QAC1B,KAAK;QACL,iBAAiB,EAAE,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/E,IAAI,EAAE,IAAI,CAAC,OAAO;QAClB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,eAAe;QACf,gBAAgB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QACxF,iBAAiB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QAC1F,KAAK,EAAE,MAAM;KACd,CAAC;AACJ,CAAC;AAmBD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAyB;IAC7D,MAAM,EACJ,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EACzD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,GAClD,GAAG,IAAI,CAAC;IAET,MAAM,SAAS,GAAG,SAAS;QACzB,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC,CAAC,QAAQ,CAAC;IAEb,4DAA4D;IAC5D,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAwB,EAAE,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAClG,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,UAAU,EAAE,SAAS,CAAC,MAAM;QAC5B,eAAe;QACf,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,cAAc,EAAE,QAAQ,CAAC,KAAK;KAC/B,CAAC,CAAC;IAEH,MAAM,KAAK,GAAoB,EAAE,CAAC;IAElC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,SAAS,EAAE;YAAE,MAAM;QAEvB,MAAM,SAAS,GAAG,MAAM,gBAAgB,CAAC;YACvC,GAAG;YACH,QAAQ;YACR,YAAY;YACZ,MAAM;YACN,SAAS;YACT,UAAU,EAAE,SAAS,CAAC,MAAM;YAC5B,QAAQ;SACT,CAAC,CAAC;QACH,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxB,CAAC;IAED,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEhG,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC;QACjB,2EAA2E;QAC3E,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,iBAAiB,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC5C,CAAC;QACD,WAAW,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC"}
|
|
1
|
+
{"version":3,"file":"benchmark-runner.js","sourceRoot":"","sources":["../../src/eval-server/benchmark-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,sEAAsE;AACtE,8EAA8E;AAM9E,OAAO,EAAE,mBAAmB,EAAmB,MAAM,gBAAgB,CAAC;AACtE,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AACjE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAE7C,8EAA8E;AAC9E,mEAAmE;AACnE,8EAA8E;AAE9E,MAAM,aAAa,GAAG,IAAI,GAAG,CAAS,CAAC,YAAY,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC;AAEjF,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,OAAO,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED,8EAA8E;AAC9E,qEAAqE;AACrE,8EAA8E;AAE9E,SAAS,KAAK,CAAC,GAAY;IACzB,IAAI,GAAG,YAAY,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC;QACxB,IAAI,GAAG,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QACjG,IAAI,QAAQ,IAAI,GAAG,IAAK,GAAW,CAAC,MAAM,KAAK,GAAG;YAAE,OAAO,IAAI,CAAC;IAClE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,KAAK,UAAU,KAAK,CAAC,EAAU;IAC7B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC3D,CAAC;AAED,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E,MAAM,cAAc,GAA2B;IAC7C,KAAK,EAAE,CAAC;IACR,KAAK,EAAE,CAAC;IACR,MAAM,EAAE,CAAC;IACT,MAAM,EAAE,CAAC;IACT,GAAG,EAAE,CAAC;IACN,IAAI,EAAE,CAAC;CACR,CAAC;AAEF,SAAS,gBAAgB,CAAC,KAAa;IACrC,MAAM,KAAK,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC;IAClC,KAAK,MAAM,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE,CAAC;QAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;YAAE,OAAO,QAAQ,CAAC;IAC3C,CAAC;IACD,OAAO,CAAC,CAAC,CAAC,mBAAmB;AAC/B,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,cAAsB,EAAE,UAAkB;IAC9E,MAAM,WAAW,GAAG,gBAAgB,CAAC,cAAc,CAAC,CAAC;IACrD,MAAM,aAAa,GAAG,gBAAgB,CAAC,UAAU,CAAC,CAAC;IACnD,IAAI,aAAa,GAAG,WAAW,EAAE,CAAC;QAChC,OAAO,yBAAyB,UAAU,oCAAoC,cAAc,uCAAuC,CAAC;IACtI,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAkBD,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAA0B;IAC/D,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,UAAU,GAAG,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,CAAC;IACnH,MAAM,oBAAoB,GAAG,WAAW,IAAI,MAAM,CAAC;IACnD,IAAI,QAAQ,GAAG,CAAC,CAAC;IAEjB,MAAM,OAAO,GAAG,CAAC,KAAa,EAAE,IAA6B,EAAE,EAAE;QAC/D,OAAO,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC;IAC9E,CAAC,CAAC;IAEF,OAAO,CAAC,YAAY,EAAE;QACpB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;QACxB,KAAK,EAAE,UAAU;KAClB,CAAC,CAAC;IAEH,OAAO,CAAC,UAAU,EAAE;QAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;QACpB,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,gCAAgC,QAAQ,CAAC,IAAI,MAAM;KAC7D,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,wCAAwC;QACxC,IAAI,SAAS,CAAC;QACd,MAAM,WAAW,GAAG,CAAC,CAAC;QACtB,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;QACxC,KAAK,IAAI,OAAO,GAAG,CAAC,GAAI,OAAO,EAAE,EAAE,CAAC;YAClC,IAAI,CAAC;gBACH,SAAS,GAAG,MAAM,aAAa,CAC7B,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,YAAY,EAC9B,gCAAgC,QAAQ,CAAC,IAAI,MAAM,EACnD,GAAG,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CACrD,CAAC;gBACF,MAAM;YACR,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,IAAI,KAAK,CAAC,GAAG,CAAC,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;oBACxC,MAAM,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;oBACtC,OAAO,CAAC,SAAS,EAAE;wBACjB,OAAO,EAAE,QAAQ,CAAC,EAAE;wBACpB,OAAO,EAAE,mCAAmC,OAAO,GAAG,IAAI,cAAc,OAAO,GAAG,CAAC,IAAI,WAAW,MAAM;wBACxG,OAAO,EAAE,OAAO,GAAG,CAAC;wBACpB,OAAO;qBACR,CAAC,CAAC;oBACH,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC;oBACrB,SAAS;gBACX,CAAC;gBACD,MAAM,GAAG,CAAC;YACZ,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,WAAW,IAAI,IAAI,IAAI,SAAS,CAAC,YAAY,IAAI,IAAI;YACjF,CAAC,CAAC,SAAS,CAAC,WAAW,GAAG,SAAS,CAAC,YAAY;YAChD,CAAC,CAAC,IAAI,CAAC;QAET,OAAO,CAAC,cAAc,EAAE;YACtB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,CAAC,UAAU,EAAE;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,cAAc,QAAQ,CAAC,UAAU,CAAC,MAAM,aAAa,QAAQ,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK;YAC9G,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM;SAClC,CAAC,CAAC;QAEH,iEAAiE;QACjE,MAAM,gBAAgB,GAA+B,MAAM,OAAO,CAAC,GAAG,CACpE,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE,EAAE;YAC9C,IAAI,SAAS,EAAE,EAAE,CAAC;gBAChB,OAAO,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;YACvF,CAAC;YAED,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,oBAAoB,CAAC,CAAC;YAEhG,IAAI,MAAM,CAAC;YACX,IAAI,UAAU,EAAE,CAAC;gBACf,MAAM,GAAG,MAAM,UAAU,CAAC,YAAY,CACpC,SAAS,CAAC,IAAI,EACd,SAAS,CAAC,IAAI,EACd,oBAAoB,CAAC,KAAK,EAC1B,SAAS,CACV,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;YAC7B,CAAC;YAED,OAAO,CAAC,kBAAkB,EAAE;gBAC1B,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,YAAY,EAAE,MAAM,CAAC,EAAE;gBACvB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,eAAe,EAAE,EAAE;aACpB,CAAC,CAAC;YAEH,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,QAAQ,GACZ,gBAAgB,CAAC,MAAM,GAAG,CAAC;YACzB,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,gBAAgB,CAAC,MAAM;YACzE,CAAC,CAAC,CAAC,CAAC;QACR,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,GAAG,CAAC,IAAI,gBAAgB,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC;QAEtG,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,MAAyB;YACjC,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,SAAS,CAAC,WAAW;YAClC,YAAY,EAAE,SAAS,CAAC,YAAY;YACpC,MAAM,EAAE,SAAS,CAAC,IAAI;YACtB,UAAU,EAAE,gBAAgB;SAC7B,CAAC;QAEF,OAAO,CAAC,eAAe,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM;YACN,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,SAAS,CAAC,UAAU;YAChC,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAClE,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;QAChD,MAAM,SAAS,GAAkB;YAC/B,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,SAAS,EAAE,CAAC;YACZ,UAAU,EAAE,EAAE;SACf,CAAC;QACF,OAAO,CAAC,eAAe,EAAE;YACvB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,MAAM,EAAE,OAAO;YACf,aAAa,EAAE,QAAQ;YACvB,gBAAgB,EAAE,UAAU;SAC7B,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,oDAAoD;AACpD,8EAA8E;AAE9E,MAAM,UAAU,kBAAkB,CAChC,KAAsB,EACtB,IAA+F;IAE/F,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,gBAAgB,GAAG,KAAK,CAAC,MAAM,CACnC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EACvD,CAAC,CACF,CAAC;IACF,MAAM,eAAe,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAC3E,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,CAAC;IAE3D,OAAO;QACL,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK,EAAE,IAAI,CAAC,KAAK;QACjB,UAAU,EAAE,IAAI,CAAC,SAAS;QAC1B,KAAK;QACL,iBAAiB,EAAE,eAAe,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,CAAC;QAC/E,IAAI,EAAE,IAAI,CAAC,OAAO;QAClB,QAAQ,EAAE,IAAI,CAAC,QAAQ;QACvB,eAAe;QACf,gBAAgB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QACxF,iBAAiB,EAAE,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI;QAC1F,KAAK,EAAE,MAAM;KACd,CAAC;AACJ,CAAC;AAsBD,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAyB;IAC7D,MAAM,EACJ,GAAG,EAAE,QAAQ,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,QAAQ,EACzD,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAC1E,WAAW,EAAE,mBAAmB,GACjC,GAAG,IAAI,CAAC;IAET,MAAM,SAAS,GAAG,SAAS;QACzB,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC7C,CAAC,CAAC,QAAQ,CAAC;IAEb,MAAM,WAAW,GAAG,mBAAmB,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAExE,4DAA4D;IAC5D,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/E,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAwB,EAAE,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IAClG,OAAO,CAAC,GAAG,EAAE,UAAU,EAAE;QACvB,UAAU,EAAE,SAAS,CAAC,MAAM;QAC5B,eAAe;QACf,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,eAAe,EAAE,QAAQ,CAAC,MAAM;QAChC,cAAc,EAAE,QAAQ,CAAC,KAAK;QAC9B,WAAW;KACZ,CAAC,CAAC;IAEH,8CAA8C;IAC9C,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,OAAO,GAAG,qBAAqB,CAAC,MAAM,CAAC,KAAK,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACvE,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACtB,OAAO,CAAC,GAAG,EAAE,SAAS,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,oEAAoE;IACpE,MAAM,SAAS,GAAG,IAAI,SAAS,CAAC,WAAW,CAAC,CAAC;IAE7C,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QACvD,IAAI,SAAS,EAAE;YAAE,OAAO,IAAI,CAAC;QAC7B,MAAM,SAAS,CAAC,OAAO,EAAE,CAAC;QAC1B,IAAI,CAAC;YACH,OAAO,MAAM,gBAAgB,CAAC;gBAC5B,GAAG;gBACH,QAAQ;gBACR,YAAY;gBACZ,MAAM;gBACN,WAAW;gBACX,UAAU;gBACV,SAAS;gBACT,UAAU,EAAE,SAAS,CAAC,MAAM;gBAC5B,QAAQ;aACT,CAAC,CAAC;QACL,CAAC;gBAAS,CAAC;YACT,SAAS,CAAC,OAAO,EAAE,CAAC;QACtB,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IAEtE,MAAM,KAAK,GAAoB,EAAE,CAAC;IAClC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;YAC1D,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3B,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACxC,mEAAmE;YACnE,2BAA2B;YAC3B,KAAK,CAAC,IAAI,CAAC;gBACT,OAAO,EAAE,CAAC,CAAC;gBACX,SAAS,EAAE,SAAS;gBACpB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAE,MAAM,CAAC,MAAM,YAAY,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC;gBAC7F,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;IAEpG,IAAI,CAAC,SAAS,EAAE,EAAE,CAAC;QACjB,2EAA2E;QAC3E,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,iBAAiB,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QACD,WAAW,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC"}
|
|
@@ -1,13 +1 @@
|
|
|
1
|
-
export
|
|
2
|
-
export declare class Semaphore {
|
|
3
|
-
private readonly limit;
|
|
4
|
-
private running;
|
|
5
|
-
private queue;
|
|
6
|
-
constructor(limit?: number);
|
|
7
|
-
acquire(): Promise<void>;
|
|
8
|
-
release(): void;
|
|
9
|
-
get available(): number;
|
|
10
|
-
get pending(): number;
|
|
11
|
-
get active(): number;
|
|
12
|
-
}
|
|
13
|
-
export declare function getSkillSemaphore(skillKey: string, limit?: number): Semaphore;
|
|
1
|
+
export { Semaphore, DEFAULT_CONCURRENCY, getSkillSemaphore } from "../eval/concurrency.js";
|
|
@@ -1,52 +1,6 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
|
-
// concurrency.ts --
|
|
2
|
+
// concurrency.ts -- re-exports from shared eval/concurrency.ts
|
|
3
|
+
// Kept for backward compatibility with eval-server/ imports
|
|
3
4
|
// ---------------------------------------------------------------------------
|
|
4
|
-
export
|
|
5
|
-
export class Semaphore {
|
|
6
|
-
limit;
|
|
7
|
-
running = 0;
|
|
8
|
-
queue = [];
|
|
9
|
-
constructor(limit = DEFAULT_CONCURRENCY) {
|
|
10
|
-
this.limit = limit;
|
|
11
|
-
if (limit < 1)
|
|
12
|
-
throw new Error("Semaphore limit must be >= 1");
|
|
13
|
-
}
|
|
14
|
-
async acquire() {
|
|
15
|
-
if (this.running < this.limit) {
|
|
16
|
-
this.running++;
|
|
17
|
-
return;
|
|
18
|
-
}
|
|
19
|
-
return new Promise((resolve) => this.queue.push(() => {
|
|
20
|
-
this.running++;
|
|
21
|
-
resolve();
|
|
22
|
-
}));
|
|
23
|
-
}
|
|
24
|
-
release() {
|
|
25
|
-
if (this.running <= 0)
|
|
26
|
-
return; // idempotent — no underflow
|
|
27
|
-
this.running--;
|
|
28
|
-
const next = this.queue.shift();
|
|
29
|
-
if (next)
|
|
30
|
-
next();
|
|
31
|
-
}
|
|
32
|
-
get available() {
|
|
33
|
-
return Math.max(0, this.limit - this.running);
|
|
34
|
-
}
|
|
35
|
-
get pending() {
|
|
36
|
-
return this.queue.length;
|
|
37
|
-
}
|
|
38
|
-
get active() {
|
|
39
|
-
return this.running;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
// Per-skill semaphore registry — ensures one semaphore per skill
|
|
43
|
-
const registry = new Map();
|
|
44
|
-
export function getSkillSemaphore(skillKey, limit = DEFAULT_CONCURRENCY) {
|
|
45
|
-
let sem = registry.get(skillKey);
|
|
46
|
-
if (!sem) {
|
|
47
|
-
sem = new Semaphore(limit);
|
|
48
|
-
registry.set(skillKey, sem);
|
|
49
|
-
}
|
|
50
|
-
return sem;
|
|
51
|
-
}
|
|
5
|
+
export { Semaphore, DEFAULT_CONCURRENCY, getSkillSemaphore } from "../eval/concurrency.js";
|
|
52
6
|
//# sourceMappingURL=concurrency.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/eval-server/concurrency.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,
|
|
1
|
+
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/eval-server/concurrency.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+DAA+D;AAC/D,4DAA4D;AAC5D,8EAA8E;AAE9E,OAAO,EAAE,SAAS,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,wBAAwB,CAAC"}
|
|
@@ -11,6 +11,8 @@ import { registerRoutes } from "./api-routes.js";
|
|
|
11
11
|
import { registerImproveRoutes } from "./improve-routes.js";
|
|
12
12
|
import { registerModelCompareRoutes } from "./model-compare-routes.js";
|
|
13
13
|
import { registerSkillCreateRoutes } from "./skill-create-routes.js";
|
|
14
|
+
import { registerSweepRoutes } from "./sweep-routes.js";
|
|
15
|
+
import { registerIntegrationRoutes } from "./integration-routes.js";
|
|
14
16
|
const __filename = fileURLToPath(import.meta.url);
|
|
15
17
|
const __dirname = path.dirname(__filename);
|
|
16
18
|
const MIME_TYPES = {
|
|
@@ -35,6 +37,8 @@ export async function startEvalServer(opts) {
|
|
|
35
37
|
registerImproveRoutes(router, root);
|
|
36
38
|
registerModelCompareRoutes(router, root);
|
|
37
39
|
registerSkillCreateRoutes(router, root);
|
|
40
|
+
registerSweepRoutes(router, root);
|
|
41
|
+
registerIntegrationRoutes(router, root);
|
|
38
42
|
// Static asset directory
|
|
39
43
|
const staticDir = path.resolve(__dirname, "../eval-ui");
|
|
40
44
|
const server = http.createServer(async (req, res) => {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"eval-server.js","sourceRoot":"","sources":["../../src/eval-server/eval-server.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gDAAgD;AAChD,8EAA8E;AAE9E,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AACvE,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"eval-server.js","sourceRoot":"","sources":["../../src/eval-server/eval-server.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gDAAgD;AAChD,8EAA8E;AAE9E,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAClC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EAAE,0BAA0B,EAAE,MAAM,2BAA2B,CAAC;AACvE,OAAO,EAAE,yBAAyB,EAAE,MAAM,0BAA0B,CAAC;AACrE,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,yBAAyB,EAAE,MAAM,yBAAyB,CAAC;AAEpE,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAE3C,MAAM,UAAU,GAA2B;IACzC,OAAO,EAAE,WAAW;IACpB,KAAK,EAAE,wBAAwB;IAC/B,MAAM,EAAE,UAAU;IAClB,OAAO,EAAE,kBAAkB;IAC3B,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,eAAe;IACvB,MAAM,EAAE,cAAc;IACtB,OAAO,EAAE,WAAW;IACpB,QAAQ,EAAE,YAAY;IACtB,MAAM,EAAE,UAAU;IAClB,MAAM,EAAE,kBAAkB;IAC1B,OAAO,EAAE,YAAY;CACtB,CAAC;AAQF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAuB;IAC3D,MAAM,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;IAC5B,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;IAE5B,sBAAsB;IACtB,cAAc,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/C,qBAAqB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACpC,0BAA0B,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACzC,yBAAyB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IACxC,mBAAmB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAClC,yBAAyB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAExC,yBAAyB;IACzB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;QAClD,wBAAwB;QACxB,IAAI,GAAG,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,CAAC;YAC/C,IAAI,SAAS,IAAK,MAAc,CAAC,OAAO,EAAE,CAAC;gBACxC,MAAc,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBAClC,OAAO;YACT,CAAC;YACD,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YACnB,GAAG,CAAC,GAAG,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QAED,uBAAuB;QACvB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAC9C,IAAI,OAAO;YAAE,OAAO;QAEpB,6BAA6B;QAC7B,IAAI,GAAG,CAAC,GAAG,EAAE,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;YAChD,OAAO;QACT,CAAC;QAED,qBAAqB;QACrB,MAAM,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,SAAS,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;YACvB,OAAO,CAAC,GAAG,CAAC,sCAAsC,IAAI,IAAI,CAAC,CAAC;YAC5D,OAAO,CAAC,MAAM,CAAC,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,KAAK,UAAU,WAAW,CACxB,GAAyB,EACzB,GAAwB,EACxB,SAAiB;IAEjB,IAAI,OAAO,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,EAAE,kBAAkB,CAAC,CAAC,QAAQ,CAAC;IACnE,IAAI,OAAO,KAAK,GAAG;QAAE,OAAO,GAAG,aAAa,CAAC;IAE7C,mCAAmC;IACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;IACtE,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAEhD,kCAAkC;IAClC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACpC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;YAClB,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACnC,MAAM,WAAW,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,0BAA0B,CAAC;YAClE,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;YACpD,EAAE,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACxC,OAAO;QACT,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,gCAAgC;IAClC,CAAC;IAED,0DAA0D;IAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IACrD,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QACpD,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,WAAW,EAAE,CAAC,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IACnB,CAAC;IAAC,MAAM,CAAC;QACP,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;QACnB,GAAG,CAAC,GAAG,CAAC,+CAA+C,CAAC,CAAC;IAC3D,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// integration-routes.ts -- API routes for integration tests and credentials
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import { sendJson, readBody } from "./router.js";
|
|
5
|
+
import { initSSE, sendSSE, sendSSEDone } from "./sse-helpers.js";
|
|
6
|
+
import { resolveSkillDir } from "./skill-resolver.js";
|
|
7
|
+
import { loadAndValidateEvals } from "../eval/schema.js";
|
|
8
|
+
import { resolveAllCredentials } from "../eval/credential-resolver.js";
|
|
9
|
+
import { runIntegrationCase, recordRun } from "../eval/integration-runner.js";
|
|
10
|
+
export function registerIntegrationRoutes(router, root) {
|
|
11
|
+
// -------------------------------------------------------------------------
|
|
12
|
+
// POST /api/skills/:plugin/:skill/integration-run -- SSE integration test
|
|
13
|
+
// -------------------------------------------------------------------------
|
|
14
|
+
router.post("/api/skills/:plugin/:skill/integration-run", async (req, res, params) => {
|
|
15
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
16
|
+
const body = (await readBody(req));
|
|
17
|
+
initSSE(res, req);
|
|
18
|
+
try {
|
|
19
|
+
// Load evals and filter for integration tests
|
|
20
|
+
const evalsFile = loadAndValidateEvals(skillDir);
|
|
21
|
+
let integrationCases = evalsFile.evals.filter((e) => e.testType === "integration");
|
|
22
|
+
// Filter by eval_ids if provided
|
|
23
|
+
if (body.eval_ids?.length) {
|
|
24
|
+
const ids = new Set(body.eval_ids);
|
|
25
|
+
integrationCases = integrationCases.filter((e) => ids.has(e.id));
|
|
26
|
+
}
|
|
27
|
+
if (integrationCases.length === 0) {
|
|
28
|
+
sendSSEDone(res, { status: "no_cases", message: "No integration test cases found" });
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
for (const evalCase of integrationCases) {
|
|
32
|
+
const integrationCase = {
|
|
33
|
+
...evalCase,
|
|
34
|
+
testType: "integration",
|
|
35
|
+
cleanup: evalCase.cleanup?.map((c) => ({
|
|
36
|
+
type: c.action,
|
|
37
|
+
description: c.description ?? c.action,
|
|
38
|
+
})),
|
|
39
|
+
};
|
|
40
|
+
sendSSE(res, "preflight_start", { evalId: evalCase.id, name: evalCase.name });
|
|
41
|
+
const result = await runIntegrationCase(integrationCase, {
|
|
42
|
+
skillDir,
|
|
43
|
+
dryRun: body.dryRun,
|
|
44
|
+
confirm: body.confirm,
|
|
45
|
+
});
|
|
46
|
+
// Emit phase events
|
|
47
|
+
for (const phase of result.phases) {
|
|
48
|
+
sendSSE(res, phase.phase, {
|
|
49
|
+
evalId: evalCase.id,
|
|
50
|
+
phase: phase.phase,
|
|
51
|
+
status: phase.status,
|
|
52
|
+
durationMs: phase.durationMs,
|
|
53
|
+
error: phase.errorMessage,
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
// Record run
|
|
57
|
+
recordRun(skillDir, result);
|
|
58
|
+
}
|
|
59
|
+
sendSSEDone(res, { status: "complete" });
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
sendSSE(res, "error", { error: err.message });
|
|
63
|
+
sendSSEDone(res, { status: "error", error: err.message });
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
// -------------------------------------------------------------------------
|
|
67
|
+
// GET /api/credentials/:plugin/:skill -- credential status
|
|
68
|
+
// -------------------------------------------------------------------------
|
|
69
|
+
router.get("/api/credentials/:plugin/:skill", async (req, res, params) => {
|
|
70
|
+
const skillDir = resolveSkillDir(root, params.plugin, params.skill);
|
|
71
|
+
try {
|
|
72
|
+
// Load evals and collect all requiredCredentials from integration tests
|
|
73
|
+
const evalsFile = loadAndValidateEvals(skillDir);
|
|
74
|
+
const allCreds = new Set();
|
|
75
|
+
for (const evalCase of evalsFile.evals) {
|
|
76
|
+
if (evalCase.testType === "integration" && evalCase.requiredCredentials) {
|
|
77
|
+
for (const cred of evalCase.requiredCredentials) {
|
|
78
|
+
allCreds.add(cred);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
const names = [...allCreds].sort();
|
|
83
|
+
if (names.length === 0) {
|
|
84
|
+
sendJson(res, { credentials: [] }, 200, req);
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
const statuses = resolveAllCredentials(names, skillDir);
|
|
88
|
+
const credentials = statuses.map((s) => ({
|
|
89
|
+
name: s.name,
|
|
90
|
+
status: s.status,
|
|
91
|
+
...(s.source ? { source: s.source } : {}),
|
|
92
|
+
}));
|
|
93
|
+
sendJson(res, { credentials }, 200, req);
|
|
94
|
+
}
|
|
95
|
+
catch (err) {
|
|
96
|
+
sendJson(res, { error: err.message, credentials: [] }, 400, req);
|
|
97
|
+
}
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
//# sourceMappingURL=integration-routes.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"integration-routes.js","sourceRoot":"","sources":["../../src/eval-server/integration-routes.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4EAA4E;AAC5E,8EAA8E;AAI9E,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,qBAAqB,EAAE,MAAM,gCAAgC,CAAC;AACvE,OAAO,EAAE,kBAAkB,EAAc,SAAS,EAAsB,MAAM,+BAA+B,CAAC;AAG9G,MAAM,UAAU,yBAAyB,CAAC,MAAc,EAAE,IAAY;IACpE,4EAA4E;IAC5E,0EAA0E;IAC1E,4EAA4E;IAC5E,MAAM,CAAC,IAAI,CAAC,4CAA4C,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE;QACnF,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QACpE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,GAAG,CAAC,CAIhC,CAAC;QAEF,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAElB,IAAI,CAAC;YACH,8CAA8C;YAC9C,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;YACjD,IAAI,gBAAgB,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,aAAa,CAAC,CAAC;YAEnF,iCAAiC;YACjC,IAAI,IAAI,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC;gBAC1B,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACnC,gBAAgB,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YACnE,CAAC;YAED,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAClC,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,iCAAiC,EAAE,CAAC,CAAC;gBACrF,OAAO;YACT,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;gBACxC,MAAM,eAAe,GAAwB;oBAC3C,GAAG,QAAQ;oBACX,QAAQ,EAAE,aAAa;oBACvB,OAAO,EAAE,QAAQ,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;wBACrC,IAAI,EAAE,CAAC,CAAC,MAAM;wBACd,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,CAAC,CAAC,MAAM;qBACvC,CAAC,CAAC;iBACJ,CAAC;gBAEF,OAAO,CAAC,GAAG,EAAE,iBAAiB,EAAE,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;gBAE9E,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE;oBACvD,QAAQ;oBACR,MAAM,EAAE,IAAI,CAAC,MAAM;oBACnB,OAAO,EAAE,IAAI,CAAC,OAAO;iBACtB,CAAC,CAAC;gBAEH,oBAAoB;gBACpB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;oBAClC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,EAAE;wBACxB,MAAM,EAAE,QAAQ,CAAC,EAAE;wBACnB,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,KAAK,EAAE,KAAK,CAAC,YAAY;qBAC1B,CAAC,CAAC;gBACL,CAAC;gBAED,aAAa;gBACb,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC9B,CAAC;YAED,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,EAAE,OAAO,EAAE,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACzD,WAAW,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,2DAA2D;IAC3D,4EAA4E;IAC5E,MAAM,CAAC,GAAG,CAAC,iCAAiC,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE;QACvE,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAEpE,IAAI,CAAC;YACH,wEAAwE;YACxE,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;YACjD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;YACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;gBACvC,IAAI,QAAQ,CAAC,QAAQ,KAAK,aAAa,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;oBACxE,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;wBAChD,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACrB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;YACnC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;gBAC7C,OAAO;YACT,CAAC;YAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,MAAM,WAAW,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACvC,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,MAAM,EAAE,CAAC,CAAC,MAAM;gBAChB,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAC1C,CAAC,CAAC,CAAC;YAEJ,QAAQ,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,QAAQ,CAAC,GAAG,EAAE,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,WAAW,EAAE,EAAE,EAAE,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
|