vskill 0.5.12 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/commands/eval/credentials.d.ts +12 -0
  2. package/dist/commands/eval/credentials.js +140 -0
  3. package/dist/commands/eval/credentials.js.map +1 -0
  4. package/dist/commands/eval/generate-all.d.ts +1 -1
  5. package/dist/commands/eval/generate-all.js +57 -12
  6. package/dist/commands/eval/generate-all.js.map +1 -1
  7. package/dist/commands/eval/init.d.ts +2 -1
  8. package/dist/commands/eval/init.js +76 -10
  9. package/dist/commands/eval/init.js.map +1 -1
  10. package/dist/commands/eval/run.d.ts +7 -1
  11. package/dist/commands/eval/run.js +207 -26
  12. package/dist/commands/eval/run.js.map +1 -1
  13. package/dist/commands/eval/sweep.d.ts +7 -0
  14. package/dist/commands/eval/sweep.js +99 -0
  15. package/dist/commands/eval/sweep.js.map +1 -0
  16. package/dist/commands/eval.d.ts +10 -0
  17. package/dist/commands/eval.js +62 -4
  18. package/dist/commands/eval.js.map +1 -1
  19. package/dist/eval/batch-judge.d.ts +27 -0
  20. package/dist/eval/batch-judge.js +242 -0
  21. package/dist/eval/batch-judge.js.map +1 -0
  22. package/dist/eval/chrome-profile.d.ts +16 -0
  23. package/dist/eval/chrome-profile.js +65 -0
  24. package/dist/eval/chrome-profile.js.map +1 -0
  25. package/dist/eval/comparator.d.ts +3 -1
  26. package/dist/eval/comparator.js +19 -3
  27. package/dist/eval/comparator.js.map +1 -1
  28. package/dist/eval/concurrency.d.ts +13 -0
  29. package/dist/eval/concurrency.js +53 -0
  30. package/dist/eval/concurrency.js.map +1 -0
  31. package/dist/eval/credential-resolver.d.ts +31 -0
  32. package/dist/eval/credential-resolver.js +111 -0
  33. package/dist/eval/credential-resolver.js.map +1 -0
  34. package/dist/eval/integration-runner.d.ts +12 -0
  35. package/dist/eval/integration-runner.js +303 -0
  36. package/dist/eval/integration-runner.js.map +1 -0
  37. package/dist/eval/integration-types.d.ts +65 -0
  38. package/dist/eval/integration-types.js +18 -0
  39. package/dist/eval/integration-types.js.map +1 -0
  40. package/dist/eval/judge-cache.d.ts +29 -0
  41. package/dist/eval/judge-cache.js +109 -0
  42. package/dist/eval/judge-cache.js.map +1 -0
  43. package/dist/eval/judge.d.ts +1 -1
  44. package/dist/eval/judge.js +20 -3
  45. package/dist/eval/judge.js.map +1 -1
  46. package/dist/eval/llm.d.ts +2 -1
  47. package/dist/eval/llm.js +54 -2
  48. package/dist/eval/llm.js.map +1 -1
  49. package/dist/eval/prompt-builder.d.ts +10 -0
  50. package/dist/eval/prompt-builder.js +167 -0
  51. package/dist/eval/prompt-builder.js.map +1 -1
  52. package/dist/eval/rate-limiter.d.ts +20 -0
  53. package/dist/eval/rate-limiter.js +62 -0
  54. package/dist/eval/rate-limiter.js.map +1 -0
  55. package/dist/eval/schema.d.ts +14 -0
  56. package/dist/eval/schema.js +55 -5
  57. package/dist/eval/schema.js.map +1 -1
  58. package/dist/eval-server/api-routes.js +71 -2
  59. package/dist/eval-server/api-routes.js.map +1 -1
  60. package/dist/eval-server/benchmark-runner.d.ts +7 -0
  61. package/dist/eval-server/benchmark-runner.js +158 -42
  62. package/dist/eval-server/benchmark-runner.js.map +1 -1
  63. package/dist/eval-server/concurrency.d.ts +1 -13
  64. package/dist/eval-server/concurrency.js +3 -49
  65. package/dist/eval-server/concurrency.js.map +1 -1
  66. package/dist/eval-server/eval-server.js +4 -0
  67. package/dist/eval-server/eval-server.js.map +1 -1
  68. package/dist/eval-server/integration-routes.d.ts +2 -0
  69. package/dist/eval-server/integration-routes.js +100 -0
  70. package/dist/eval-server/integration-routes.js.map +1 -0
  71. package/dist/eval-server/skill-create-routes.js +151 -22
  72. package/dist/eval-server/skill-create-routes.js.map +1 -1
  73. package/dist/eval-server/sweep-routes.d.ts +2 -0
  74. package/dist/eval-server/sweep-routes.js +93 -0
  75. package/dist/eval-server/sweep-routes.js.map +1 -0
  76. package/dist/eval-server/sweep-runner.d.ts +93 -0
  77. package/dist/eval-server/sweep-runner.js +275 -0
  78. package/dist/eval-server/sweep-runner.js.map +1 -0
  79. package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
  80. package/dist/eval-ui/index.html +1 -1
  81. package/dist/index.js +8 -0
  82. package/dist/index.js.map +1 -1
  83. package/package.json +1 -1
  84. package/dist/eval-ui/assets/index-C7MIPqI-.js +0 -74
@@ -11,7 +11,20 @@ import { green, red, yellow, bold, dim, table } from "../../utils/output.js";
11
11
  import { buildEvalSystemPrompt } from "../../eval/prompt-builder.js";
12
12
  import { classifyError } from "../../eval-server/error-classifier.js";
13
13
  import { ProgressLog } from "../../eval/progress-log.js";
14
- export async function runEvalRun(skillDir) {
14
+ import { JudgeCache } from "../../eval/judge-cache.js";
15
+ import { checkWeakJudgeWarning } from "../../eval-server/benchmark-runner.js";
16
+ import { batchJudgeAssertions } from "../../eval/batch-judge.js";
17
+ function parseJudgeModelSpec(spec) {
18
+ const slashIndex = spec.indexOf("/");
19
+ if (slashIndex === -1) {
20
+ throw new Error(`Invalid --judge-model format "${spec}". Expected "provider/model" (e.g., "anthropic/claude-haiku-3").`);
21
+ }
22
+ return {
23
+ provider: spec.slice(0, slashIndex),
24
+ model: spec.slice(slashIndex + 1),
25
+ };
26
+ }
27
+ export async function runEvalRun(skillDir, options) {
15
28
  // Load and validate evals.json
16
29
  let evalsFile;
17
30
  try {
@@ -48,8 +61,33 @@ export async function runEvalRun(skillDir) {
48
61
  const provider = (process.env.VSKILL_EVAL_PROVIDER || "claude-cli");
49
62
  const total = evalsFile.evals.length;
50
63
  const totalAssertions = evalsFile.evals.reduce((s, e) => s + e.assertions.length, 0);
64
+ // T-006: Create separate judge client if --judge-model is specified
65
+ let judgeClient;
66
+ if (options?.judgeModel) {
67
+ const { provider: judgeProvider, model: judgeModelName } = parseJudgeModelSpec(options.judgeModel);
68
+ judgeClient = createLlmClient({ provider: judgeProvider, model: judgeModelName });
69
+ // Emit weak-model warning
70
+ const warning = checkWeakJudgeWarning(model, judgeClient.model);
71
+ if (warning) {
72
+ console.warn(yellow(warning));
73
+ }
74
+ }
75
+ const effectiveJudgeClient = judgeClient ?? client;
76
+ // T-008: Judge cache (bypassed with --no-cache)
77
+ const judgeCache = options?.noCache ? null : new JudgeCache(skillDir);
51
78
  console.log(dim(`Provider: ${model} | ${total} eval case${total !== 1 ? "s" : ""} | ${totalAssertions} assertions`));
79
+ if (judgeClient) {
80
+ console.log(dim(`Judge model: ${judgeClient.model}`));
81
+ }
82
+ if (judgeCache && !options?.noCache) {
83
+ console.log(dim(`Judge cache: enabled (${judgeCache.size} cached entries)`));
84
+ }
52
85
  console.log(dim(`Skill: ${skillContent ? skillMdPath : "(none)"}`));
86
+ // T-025: Batch mode validation
87
+ const useBatch = resolveBatchMode(options?.batch, provider);
88
+ if (useBatch) {
89
+ console.log(dim(`Batch mode: enabled (Anthropic Message Batches API — 50% cost savings)`));
90
+ }
53
91
  // Duration estimate
54
92
  const estimate = estimateDurationSec(provider, total, totalAssertions);
55
93
  console.log(dim(`Estimated duration: ${estimate.label}`));
@@ -59,6 +97,57 @@ export async function runEvalRun(skillDir) {
59
97
  const benchmarkCases = [];
60
98
  const tableRows = [];
61
99
  const runStart = Date.now();
100
+ // T-026: Two-round execution for batch mode
101
+ if (useBatch) {
102
+ await runBatchMode(evalsFile, systemPrompt, client, effectiveJudgeClient, provider, progress, benchmarkCases, tableRows, total);
103
+ }
104
+ else {
105
+ await runSequentialMode(evalsFile, systemPrompt, client, effectiveJudgeClient, provider, judgeCache, progress, benchmarkCases, tableRows, total);
106
+ }
107
+ // Flush judge cache to disk
108
+ judgeCache?.flush();
109
+ // Complete progress tracking
110
+ progress.complete();
111
+ const totalElapsed = ((Date.now() - runStart) / 1000).toFixed(1);
112
+ // Print results table
113
+ const headers = ["EVAL", "ASSERTION", "TEXT", "STATUS"];
114
+ console.log(bold(`\nEval Results: ${evalsFile.skill_name}\n`));
115
+ console.log(table(headers, tableRows));
116
+ // Compute summary
117
+ const passed = benchmarkCases.filter((c) => c.status === "pass").length;
118
+ const failed = benchmarkCases.filter((c) => c.status === "fail").length;
119
+ const errors = benchmarkCases.filter((c) => c.status === "error").length;
120
+ console.log(`\n${green(`${passed} passed`)} ${failed > 0 ? red(`${failed} failed`) : ""} ${errors > 0 ? yellow(`${errors} errors`) : ""} ${dim(`(${totalElapsed}s)`)}`.trim());
121
+ // Write benchmark.json
122
+ const benchmark = {
123
+ timestamp: new Date().toISOString(),
124
+ model,
125
+ skill_name: evalsFile.skill_name,
126
+ cases: benchmarkCases,
127
+ };
128
+ await writeBenchmark(skillDir, benchmark);
129
+ console.log(dim(`\nBenchmark written to ${skillDir}/evals/benchmark.json`));
130
+ }
131
+ // ---------------------------------------------------------------------------
132
+ // T-025: Resolve whether to use batch mode
133
+ // ---------------------------------------------------------------------------
134
+ function resolveBatchMode(batchFlag, provider) {
135
+ if (!batchFlag)
136
+ return false;
137
+ if (provider !== "anthropic") {
138
+ console.warn(yellow("Batch mode only supported with anthropic provider, running sequentially"));
139
+ return false;
140
+ }
141
+ if (!process.env.ANTHROPIC_API_KEY) {
142
+ console.warn(yellow("Batch mode requires ANTHROPIC_API_KEY, running sequentially"));
143
+ return false;
144
+ }
145
+ return true;
146
+ }
147
+ // ---------------------------------------------------------------------------
148
+ // Sequential mode (existing behavior)
149
+ // ---------------------------------------------------------------------------
150
+ async function runSequentialMode(evalsFile, systemPrompt, client, effectiveJudgeClient, provider, judgeCache, progress, benchmarkCases, tableRows, total) {
62
151
  for (let i = 0; i < evalsFile.evals.length; i++) {
63
152
  const evalCase = evalsFile.evals[i];
64
153
  const caseStart = Date.now();
@@ -68,18 +157,24 @@ export async function runEvalRun(skillDir) {
68
157
  completedCases: i,
69
158
  });
70
159
  try {
71
- // Step 1: Send prompt to LLM
72
160
  process.stdout.write(dim(`[${i + 1}/${total}] ${evalCase.name} — generating...`));
73
161
  const genResult = await client.generate(systemPrompt, evalCase.prompt);
74
162
  const genSec = ((Date.now() - caseStart) / 1000).toFixed(1);
75
163
  process.stdout.write(dim(` ${genSec}s`));
76
164
  progress.update({ phase: "judging" });
77
165
  process.stdout.write(dim(` judging ${evalCase.assertions.length} assertions...`));
78
- // Step 2: Judge each assertion
79
166
  const assertionResults = [];
80
167
  let passCount = 0;
81
168
  for (const assertion of evalCase.assertions) {
82
- const result = await judgeAssertion(genResult.text, assertion, client);
169
+ const judgeCall = () => judgeAssertion(genResult.text, assertion, client, effectiveJudgeClient);
170
+ let result;
171
+ if (judgeCache) {
172
+ result = await judgeCache.getOrCompute(assertion.text, genResult.text, effectiveJudgeClient.model, judgeCall);
173
+ result = { ...result, id: assertion.id, text: assertion.text };
174
+ }
175
+ else {
176
+ result = await judgeCall();
177
+ }
83
178
  assertionResults.push(result);
84
179
  if (result.pass)
85
180
  passCount++;
@@ -130,7 +225,6 @@ export async function runEvalRun(skillDir) {
130
225
  dim(`${classified.title}`),
131
226
  yellow("ERROR"),
132
227
  ]);
133
- // For auth errors, all subsequent cases will fail too — abort early
134
228
  if (classified.category === "auth" || classified.category === "provider_unavailable") {
135
229
  console.log(red(`\nAborting remaining cases — ${classified.category} error is not recoverable.`));
136
230
  console.log(dim(` ${classified.hint}\n`));
@@ -138,26 +232,113 @@ export async function runEvalRun(skillDir) {
138
232
  }
139
233
  }
140
234
  }
141
- // Complete progress tracking
142
- progress.complete();
143
- const totalElapsed = ((Date.now() - runStart) / 1000).toFixed(1);
144
- // Print results table
145
- const headers = ["EVAL", "ASSERTION", "TEXT", "STATUS"];
146
- console.log(bold(`\nEval Results: ${evalsFile.skill_name}\n`));
147
- console.log(table(headers, tableRows));
148
- // Compute summary
149
- const passed = benchmarkCases.filter((c) => c.status === "pass").length;
150
- const failed = benchmarkCases.filter((c) => c.status === "fail").length;
151
- const errors = benchmarkCases.filter((c) => c.status === "error").length;
152
- console.log(`\n${green(`${passed} passed`)} ${failed > 0 ? red(`${failed} failed`) : ""} ${errors > 0 ? yellow(`${errors} errors`) : ""} ${dim(`(${totalElapsed}s)`)}`.trim());
153
- // Write benchmark.json
154
- const benchmark = {
155
- timestamp: new Date().toISOString(),
156
- model,
157
- skill_name: evalsFile.skill_name,
158
- cases: benchmarkCases,
159
- };
160
- await writeBenchmark(skillDir, benchmark);
161
- console.log(dim(`\nBenchmark written to ${skillDir}/evals/benchmark.json`));
235
+ }
236
+ // ---------------------------------------------------------------------------
237
+ // T-026: Batch mode — two-round execution
238
+ // Round 1: Generate all outputs sequentially
239
+ // Round 2: Submit all judge calls as a single batch
240
+ // ---------------------------------------------------------------------------
241
+ async function runBatchMode(evalsFile, systemPrompt, client, effectiveJudgeClient, provider, progress, benchmarkCases, tableRows, total) {
242
+ // Round 1: Generate all outputs
243
+ console.log(bold("\n--- Round 1: Generating outputs ---\n"));
244
+ const generatedCases = [];
245
+ for (let i = 0; i < evalsFile.evals.length; i++) {
246
+ const evalCase = evalsFile.evals[i];
247
+ const caseStart = Date.now();
248
+ progress.update({
249
+ currentCase: evalCase.name,
250
+ phase: "generating",
251
+ completedCases: i,
252
+ });
253
+ try {
254
+ process.stdout.write(dim(`[${i + 1}/${total}] ${evalCase.name} — generating...`));
255
+ const genResult = await client.generate(systemPrompt, evalCase.prompt);
256
+ const genSec = ((Date.now() - caseStart) / 1000).toFixed(1);
257
+ console.log(dim(` ${genSec}s`));
258
+ generatedCases.push({ evalCase, output: genResult.text });
259
+ }
260
+ catch (err) {
261
+ const classified = classifyError(err, provider);
262
+ console.log(yellow(` error: ${classified.title}`));
263
+ generatedCases.push({ evalCase, output: "", error: err.message });
264
+ if (classified.category === "auth" || classified.category === "provider_unavailable") {
265
+ console.log(red(`\nAborting generation — ${classified.category} error is not recoverable.`));
266
+ break;
267
+ }
268
+ }
269
+ }
270
+ // Round 2: Collect all judge prompts and submit as single batch
271
+ console.log(bold("\n--- Round 2: Batch judging assertions ---\n"));
272
+ const batchRequests = [];
273
+ const caseIndexMap = [];
274
+ for (let i = 0; i < generatedCases.length; i++) {
275
+ const { evalCase, output, error } = generatedCases[i];
276
+ if (error) {
277
+ // Skip errored cases — already recorded
278
+ benchmarkCases.push({
279
+ eval_id: evalCase.id,
280
+ eval_name: evalCase.name,
281
+ status: "error",
282
+ error_message: error,
283
+ pass_rate: 0,
284
+ assertions: [],
285
+ });
286
+ tableRows.push([evalCase.name, "-", dim("generation error"), yellow("ERROR")]);
287
+ continue;
288
+ }
289
+ const startIdx = batchRequests.length;
290
+ for (let j = 0; j < evalCase.assertions.length; j++) {
291
+ batchRequests.push({
292
+ evalId: String(evalCase.id),
293
+ assertionIdx: j,
294
+ assertion: evalCase.assertions[j],
295
+ output,
296
+ });
297
+ }
298
+ caseIndexMap.push({ caseIdx: i, startIdx, count: evalCase.assertions.length });
299
+ }
300
+ if (batchRequests.length === 0) {
301
+ console.log(dim("No assertions to judge."));
302
+ return;
303
+ }
304
+ console.log(dim(`Submitting ${batchRequests.length} judge calls as batch...`));
305
+ const apiKey = process.env.ANTHROPIC_API_KEY;
306
+ const judgeModel = effectiveJudgeClient.model;
307
+ const { results: batchResults, costInfo } = await batchJudgeAssertions(batchRequests, client, effectiveJudgeClient, { apiKey, model: judgeModel });
308
+ // Map batch results back to per-case benchmark entries
309
+ for (const { caseIdx, startIdx, count } of caseIndexMap) {
310
+ const { evalCase } = generatedCases[caseIdx];
311
+ const caseResults = batchResults.slice(startIdx, startIdx + count);
312
+ let passCount = 0;
313
+ for (const result of caseResults) {
314
+ if (result.pass)
315
+ passCount++;
316
+ const truncatedText = result.text.length > 60
317
+ ? result.text.slice(0, 57) + "..."
318
+ : result.text;
319
+ tableRows.push([
320
+ evalCase.name,
321
+ result.id,
322
+ truncatedText,
323
+ result.pass ? green("PASS") : red("FAIL"),
324
+ ]);
325
+ }
326
+ const passRate = count > 0 ? passCount / count : 0;
327
+ const allPassed = passCount === count;
328
+ console.log(dim(` ${evalCase.name}: `) +
329
+ (allPassed ? green(`${passCount}/${count} passed`) : red(`${passCount}/${count} passed`)));
330
+ benchmarkCases.push({
331
+ eval_id: evalCase.id,
332
+ eval_name: evalCase.name,
333
+ status: allPassed ? "pass" : "fail",
334
+ error_message: null,
335
+ pass_rate: passRate,
336
+ assertions: caseResults,
337
+ });
338
+ }
339
+ // T-027: Print batch cost summary
340
+ if (costInfo) {
341
+ console.log(dim(`\nBatch cost: $${costInfo.batchCost.toFixed(4)} (50% discount vs $${costInfo.sequentialCost.toFixed(4)} sequential)`));
342
+ }
162
343
  }
163
344
  //# sourceMappingURL=run.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/commands/eval/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6DAA6D;AAC7D,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAEzE,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAC7E,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AACtE,OAAO,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAEzD,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,QAAQ,mBAAmB,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,8CAA8C;IAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC5B,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,iCAAiC,WAAW,wCAAwC,CAAC,CAAC,CAAC;IAC9G,CAAC;IAED,MAAM,YAAY,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IAEzD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,YAAY,CAAiB,CAAC;IACpF,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;IACrC,MAAM,eAAe,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAErF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,aAAa,KAAK,MAAM,KAAK,aAAa,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,eAAe,aAAa,CAAC,CAAC,CAAC;IACrH,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;IAEpE,oBAAoB;IACpB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,uBAAuB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;IAEvF,qBAAqB;IACrB,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAEpF,MAAM,cAAc,GAAoB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,QAAQ,CAAC,MAAM,CAAC;YACd,WAAW,EAAE,QAAQ,CAAC,IAAI;YAC1B,KAAK,EAAE,YAAY;YACnB,cAAc,EAAE,CAAC;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,QAAQ,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;YAClF,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YACvE,MAAM,MAAM,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC;YAEzC,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;YAEtC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,QAAQ,CAAC,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC,CAAC;YAElF,+BAA+B;YAC/B,MAAM,gBAAgB,GAAG,EAAE,CAAC;YAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBACvE,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC9B,IAAI,MAAM,CAAC,IAAI;oBAAE,SAAS,EAAE,CAAC;gBAE7B,MAAM,aAAa,GACjB,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;oBACxB,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;oBACrC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;gBAErB,SAAS,CAAC,IAAI,CAAC;oBACb,QAAQ,CAAC,IAAI;oBACb,SAAS,CAAC,EAAE;oBACZ,aAAa;oBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;iBAC1C,CAAC,CAAC;YACL,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAC7C,CAAC,CAAC,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACxC,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,SAAS,GAAG,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC;YAC3D,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC9D,OAAO,CAAC,GAAG,CACT,SAAS;gBACP,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,KAAK,QAAQ,IAAI,CAAC;gBACzC,CAAC,CAAC,GAAG,CAAC,IAAI,SAAS,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,SAAS,CAAC,GAAG,GAAG,CAAC,KAAK,QAAQ,IAAI,CAAC,CACvF,CAAC;YAEF,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBACnC,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAChD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,UAAU,CAAC,KAAK,KAAK,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;YACxE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAEzC,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,KAAK,EAAE,CAAC,CAAC;YAEjD,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAG,GAAa,CAAC,OAAO;gBACrC,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,GAAG;gBACH,GAAG,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;gBAC1B,MAAM,CAAC,OAAO,CAAC;aAChB,CAAC,CAAC;YAEH,oEAAoE;YACpE,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,IAAI,UAAU,CAAC,QAAQ,KAAK,sBAAsB,EAAE,CAAC;gBACrF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gCAAgC,UAAU,CAAC,QAAQ,4BAA4B,CAAC,CAAC,CAAC;gBAClG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,UAAU,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;gBAC3C,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,QAAQ,CAAC,QAAQ,EAAE,CAAC;IAEpB,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAEjE,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;IAEvC,kBAAkB;IAClB,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACzE,OAAO,CAAC,GAAG,CACT,KAAK,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,GAAG,CAAC,IAAI,YAAY,IAAI,CAAC,EAAE,CAAC,IAAI,EAAE,CAClK,CAAC;IAEF,uBAAuB;IACvB,MAAM,SAAS,GAAoB;QACjC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK;QACL,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,KAAK,EAAE,cAAc;KACtB,CAAC;IAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,uBAAuB,CAAC,CAAC,CAAC;AAC9E,CAAC"}
1
+ {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/commands/eval/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6DAA6D;AAC7D,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAEzE,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAC7E,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,uCAAuC,CAAC;AACtE,OAAO,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AACzD,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uCAAuC,CAAC;AAC9E,OAAO,EAAE,oBAAoB,EAAE,MAAM,2BAA2B,CAAC;AAUjE,SAAS,mBAAmB,CAAC,IAAY;IACvC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,IAAI,kEAAkE,CAAC,CAAC;IAC3H,CAAC;IACD,OAAO;QACL,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAiB;QACnD,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC;KAClC,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB,EAAE,OAAwB;IACzE,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,QAAQ,mBAAmB,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,8CAA8C;IAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC5B,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,iCAAiC,WAAW,wCAAwC,CAAC,CAAC,CAAC;IAC9G,CAAC;IAED,MAAM,YAAY,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IAEzD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,YAAY,CAAiB,CAAC;IACpF,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;IACrC,MAAM,eAAe,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAErF,oEAAoE;IACpE,IAAI,WAAkC,CAAC;IACvC,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;QACxB,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;QACnG,WAAW,GAAG,eAAe,CAAC,EAAE,QAAQ,EAAE,aAAa,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC;QAElF,0BAA0B;QAC1B,MAAM,OAAO,GAAG,qBAAqB,CAAC,KAAK,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QAChE,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IACD,MAAM,oBAAoB,GAAG,WAAW,IAAI,MAAM,CAAC;IAEnD,gDAAgD;IAChD,MAAM,UAAU,GAAG,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;IAEtE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,aAAa,KAAK,MAAM,KAAK,aAAa,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,eAAe,aAAa,CAAC,CAAC,CAAC;IACrH,IAAI,WAAW,EAAE,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gBAAgB,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,UAAU,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,yBAAyB,UAAU,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;IAC/E,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;IAEpE,+BAA+B;IAC/B,MAAM,QAAQ,GAAG,gBAAgB,CAAC,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAC5D,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC,CAAC;IAC7F,CAAC;IAED,oBAAoB;IACpB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC;IACvE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,uBAAuB,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC1D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,qBAAqB,CAAC,IAAI,CAAC,CAAC,CAAC;IAEvF,qBAAqB;IACrB,MAAM,QAAQ,GAAG,IAAI,WAAW,CAAC,QAAQ,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;IAEpF,MAAM,cAAc,GAAoB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE5B,4CAA4C;IAC5C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,YAAY,CAChB,SAAS,EAAE,YAAY,EAAE,MAAM,EAAE,oBAAoB,EAAE,QAAQ,EAC/D,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,KAAK,CAC3C,CAAC;IACJ,CAAC;SAAM,CAAC;QACN,MAAM,iBAAiB,CACrB,SAAS,EAAE,YAAY,EAAE,MAAM,EAAE,oBAAoB,EAAE,QAAQ,EAC/D,UAAU,EAAE,QAAQ,EAAE,cAAc,EAAE,SAAS,EAAE,KAAK,CACvD,CAAC;IACJ,CAAC;IAED,4BAA4B;IAC5B,UAAU,EAAE,KAAK,EAAE,CAAC;IAEpB,6BAA6B;IAC7B,QAAQ,CAAC,QAAQ,EAAE,CAAC;IAEpB,MAAM,YAAY,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAEjE,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;IAEvC,kBAAkB;IAClB,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACzE,OAAO,CAAC,GAAG,CACT,KAAK,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,GAAG,CAAC,IAAI,YAAY,IAAI,CAAC,EAAE,CAAC,IAAI,EAAE,CAClK,CAAC;IAEF,uBAAuB;IACvB,MAAM,SAAS,GAAoB;QACjC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK;QACL,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,KAAK,EAAE,cAAc;KACtB,CAAC;IAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,uBAAuB,CAAC,CAAC,CAAC;AAC9E,CAAC;AAED,8EAA8E;AAC9E,2CAA2C;AAC3C,8EAA8E;AAE9E,SAAS,gBAAgB,CAAC,SAA8B,EAAE,QAAsB;IAC9E,IAAI,CAAC,SAAS;QAAE,OAAO,KAAK,CAAC;IAE7B,IAAI,QAAQ,KAAK,WAAW,EAAE,CAAC;QAC7B,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,yEAAyE,CAAC,CAAC,CAAC;QAChG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;QACnC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,6DAA6D,CAAC,CAAC,CAAC;QACpF,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,sCAAsC;AACtC,8EAA8E;AAE9E,KAAK,UAAU,iBAAiB,CAC9B,SAA+D,EAC/D,YAAoB,EACpB,MAAiB,EACjB,oBAA+B,EAC/B,QAAsB,EACtB,UAAiE,EACjE,QAAqB,EACrB,cAA+B,EAC/B,SAAqB,EACrB,KAAa;IAEb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,QAAQ,CAAC,MAAM,CAAC;YACd,WAAW,EAAE,QAAQ,CAAC,IAAI;YAC1B,KAAK,EAAE,YAAY;YACnB,cAAc,EAAE,CAAC;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,QAAQ,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;YAClF,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YACvE,MAAM,MAAM,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC;YAEzC,QAAQ,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;YACtC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,QAAQ,CAAC,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC,CAAC;YAElF,MAAM,gBAAgB,GAAG,EAAE,CAAC;YAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC5C,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,oBAAoB,CAAC,CAAC;gBAEhG,IAAI,MAAM,CAAC;gBACX,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,GAAG,MAAM,UAAU,CAAC,YAAY,CACpC,SAAS,CAAC,IAAI,EACd,SAAS,CAAC,IAAI,EACd,oBAAoB,CAAC,KAAK,EAC1B,SAAS,CACV,CAAC;oBACF,MAAM,GAAG,EAAE,GAAG,MAAM,EAAE,EAAE,EAAE,SAAS,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,CAAC,IAAI,EAAE,CAAC;gBACjE,CAAC;qBAAM,CAAC;oBACN,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;gBAC7B,CAAC;gBAED,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC9B,IAAI,MAAM,CAAC,IAAI;oBAAE,SAAS,EAAE,CAAC;gBAE7B,MAAM,aAAa,GACjB,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;oBACxB,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;oBACrC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;gBAErB,SAAS,CAAC,IAAI,CAAC;oBACb,QAAQ,CAAC,IAAI;oBACb,SAAS,CAAC,EAAE;oBACZ,aAAa;oBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;iBAC1C,CAAC,CAAC;YACL,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAC7C,CAAC,CAAC,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACxC,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,SAAS,GAAG,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC;YAC3D,MAAM,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC9D,OAAO,CAAC,GAAG,CACT,SAAS;gBACP,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,KAAK,QAAQ,IAAI,CAAC;gBACzC,CAAC,CAAC,GAAG,CAAC,IAAI,SAAS,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,SAAS,CAAC,GAAG,GAAG,CAAC,KAAK,QAAQ,IAAI,CAAC,CACvF,CAAC;YAEF,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBACnC,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAChD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC9B,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,UAAU,CAAC,KAAK,KAAK,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;YACxE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAEzC,QAAQ,CAAC,MAAM,CAAC,EAAE,SAAS,EAAE,UAAU,CAAC,KAAK,EAAE,CAAC,CAAC;YAEjD,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAG,GAAa,CAAC,OAAO;gBACrC,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,GAAG;gBACH,GAAG,CAAC,GAAG,UAAU,CAAC,KAAK,EAAE,CAAC;gBAC1B,MAAM,CAAC,OAAO,CAAC;aAChB,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,IAAI,UAAU,CAAC,QAAQ,KAAK,sBAAsB,EAAE,CAAC;gBACrF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gCAAgC,UAAU,CAAC,QAAQ,4BAA4B,CAAC,CAAC,CAAC;gBAClG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,UAAU,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;gBAC3C,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,0CAA0C;AAC1C,+CAA+C;AAC/C,sDAAsD;AACtD,8EAA8E;AAE9E,KAAK,UAAU,YAAY,CACzB,SAA+D,EAC/D,YAAoB,EACpB,MAAiB,EACjB,oBAA+B,EAC/B,QAAsB,EACtB,QAAqB,EACrB,cAA+B,EAC/B,SAAqB,EACrB,KAAa;IAEb,gCAAgC;IAChC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC,CAAC;IAQ7D,MAAM,cAAc,GAAoB,EAAE,CAAC;IAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,QAAQ,CAAC,MAAM,CAAC;YACd,WAAW,EAAE,QAAQ,CAAC,IAAI;YAC1B,KAAK,EAAE,YAAY;YACnB,cAAc,EAAE,CAAC;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,QAAQ,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;YAClF,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YACvE,MAAM,MAAM,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5D,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC;YAEhC,cAAc,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;QAC5D,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,UAAU,GAAG,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;YAChD,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,UAAU,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAEnD,cAAc,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,KAAK,EAAG,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YAE7E,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,IAAI,UAAU,CAAC,QAAQ,KAAK,sBAAsB,EAAE,CAAC;gBACrF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,2BAA2B,UAAU,CAAC,QAAQ,4BAA4B,CAAC,CAAC,CAAC;gBAC7F,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,gEAAgE;IAChE,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC,CAAC;IAEnE,MAAM,aAAa,GAAwB,EAAE,CAAC;IAC9C,MAAM,YAAY,GAAgE,EAAE,CAAC;IAErF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC/C,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;QAEtD,IAAI,KAAK,EAAE,CAAC;YACV,wCAAwC;YACxC,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAE,KAAK;gBACpB,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YACH,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,kBAAkB,CAAC,EAAE,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC/E,SAAS;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC;QACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpD,aAAa,CAAC,IAAI,CAAC;gBACjB,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3B,YAAY,EAAE,CAAC;gBACf,SAAS,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC;gBACjC,MAAM;aACP,CAAC,CAAC;QACL,CAAC;QACD,YAAY,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IACjF,CAAC;IAED,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAC5C,OAAO;IACT,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,cAAc,aAAa,CAAC,MAAM,0BAA0B,CAAC,CAAC,CAAC;IAE/E,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAkB,CAAC;IAC9C,MAAM,UAAU,GAAG,oBAAoB,CAAC,KAAK,CAAC;IAE9C,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,GAAG,MAAM,oBAAoB,CACpE,aAAa,EACb,MAAM,EACN,oBAAoB,EACpB,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,CAC9B,CAAC;IAEF,uDAAuD;IACvD,KAAK,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,YAAY,EAAE,CAAC;QACxD,MAAM,EAAE,QAAQ,EAAE,GAAG,cAAc,CAAC,OAAO,CAAC,CAAC;QAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,KAAK,CAAC,CAAC;QAEnE,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;YACjC,IAAI,MAAM,CAAC,IAAI;gBAAE,SAAS,EAAE,CAAC;YAE7B,MAAM,aAAa,GACjB,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;gBACrB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;gBAClC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;YAElB,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,MAAM,CAAC,EAAE;gBACT,aAAa;gBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;aAC1C,CAAC,CAAC;QACL,CAAC;QAED,MAAM,QAAQ,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QACnD,MAAM,SAAS,GAAG,SAAS,KAAK,KAAK,CAAC;QAEtC,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,QAAQ,CAAC,IAAI,IAAI,CAAC;YAC3B,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,SAAS,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,SAAS,IAAI,KAAK,SAAS,CAAC,CAAC,CAC1F,CAAC;QAEF,cAAc,CAAC,IAAI,CAAC;YAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;YACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;YACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;YACnC,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,QAAQ;YACnB,UAAU,EAAE,WAAW;SACxB,CAAC,CAAC;IACL,CAAC;IAED,kCAAkC;IAClC,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,kBAAkB,QAAQ,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,QAAQ,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,CAAC,CAC3H,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1,7 @@
1
+ export interface SweepOptions {
2
+ models: string;
3
+ judge: string;
4
+ runs?: number;
5
+ concurrency?: number;
6
+ }
7
+ export declare function runEvalSweep(skillDir: string, options: SweepOptions): Promise<void>;
@@ -0,0 +1,99 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval sweep -- run evals across multiple models
3
+ // ---------------------------------------------------------------------------
4
+ import { existsSync, readFileSync } from "node:fs";
5
+ import { join } from "node:path";
6
+ import { loadAndValidateEvals, EvalValidationError } from "../../eval/schema.js";
7
+ import { buildEvalSystemPrompt } from "../../eval/prompt-builder.js";
8
+ import { runSweep } from "../../eval-server/sweep-runner.js";
9
+ import { green, red, bold, dim, table } from "../../utils/output.js";
10
+ export async function runEvalSweep(skillDir, options) {
11
+ // Load and validate evals.json
12
+ let evalsFile;
13
+ try {
14
+ evalsFile = loadAndValidateEvals(skillDir);
15
+ }
16
+ catch (err) {
17
+ if (err instanceof EvalValidationError) {
18
+ console.error(red(`Invalid evals.json: ${err.message}`));
19
+ }
20
+ else {
21
+ console.error(red(`Error loading evals: ${err.message}`));
22
+ }
23
+ process.exit(1);
24
+ return;
25
+ }
26
+ const skillMdPath = join(skillDir, "SKILL.md");
27
+ const skillContent = existsSync(skillMdPath) ? readFileSync(skillMdPath, "utf-8") : "";
28
+ const systemPrompt = buildEvalSystemPrompt(skillContent);
29
+ const modelList = options.models.split(",").map((m) => m.trim()).filter(Boolean);
30
+ if (modelList.length === 0) {
31
+ console.error(red("No models specified. Use --models 'provider/model,provider/model'"));
32
+ process.exit(1);
33
+ return;
34
+ }
35
+ const runs = options.runs ?? 1;
36
+ const concurrency = options.concurrency ?? 5;
37
+ console.log(bold(`\nSweep: ${evalsFile.skill_name}`));
38
+ console.log(dim(`Models: ${modelList.join(", ")}`));
39
+ console.log(dim(`Judge: ${options.judge}`));
40
+ console.log(dim(`Runs per model: ${runs}`));
41
+ console.log(dim(`Cases: ${evalsFile.evals.length}\n`));
42
+ let sweepResult = null;
43
+ for await (const event of runSweep({
44
+ skillDir,
45
+ skillName: evalsFile.skill_name,
46
+ systemPrompt,
47
+ evalCases: evalsFile.evals,
48
+ models: modelList,
49
+ judge: options.judge,
50
+ runs,
51
+ concurrency,
52
+ })) {
53
+ switch (event.type) {
54
+ case "sweep_model_start":
55
+ process.stdout.write(dim(`[${event.data.modelIndex + 1}/${event.data.totalModels}] ${event.data.model} — `));
56
+ break;
57
+ case "sweep_model_progress":
58
+ process.stdout.write(dim(`\r[${event.data.model}] run ${event.data.run}/${event.data.totalRuns} case ${event.data.currentCase}/${event.data.totalCases} (${event.data.percentComplete}%)`));
59
+ break;
60
+ case "sweep_model_complete":
61
+ if (event.data.status === "complete" && event.data.passRate) {
62
+ console.log(green(` done`) + dim(` (pass rate: ${(event.data.passRate.mean * 100).toFixed(1)}%)`));
63
+ }
64
+ else {
65
+ console.log(red(` error: ${event.data.errorMessage || "unknown"}`));
66
+ }
67
+ break;
68
+ case "sweep_complete":
69
+ sweepResult = event.data;
70
+ break;
71
+ }
72
+ }
73
+ if (!sweepResult) {
74
+ console.error(red("\nSweep failed to produce results."));
75
+ process.exit(1);
76
+ return;
77
+ }
78
+ // Print summary table
79
+ const headers = ["RANK", "MODEL", "PASS RATE", "DURATION", "COST", "STATUS"];
80
+ const sorted = [...sweepResult.models].sort((a, b) => b.passRate.mean - a.passRate.mean);
81
+ const rows = sorted.map((m, i) => [
82
+ String(i + 1),
83
+ `${m.provider}/${m.model}`,
84
+ m.status === "complete" ? formatStats(m.passRate, true) : "-",
85
+ m.status === "complete" ? formatStats(m.duration, false, "ms") : "-",
86
+ m.cost.total > 0 ? `$${m.cost.total.toFixed(4)}` : "-",
87
+ m.status === "complete" ? green("OK") : red("ERR"),
88
+ ]);
89
+ console.log(bold("\nSweep Results\n"));
90
+ console.log(table(headers, rows));
91
+ console.log(dim(`\nLeaderboard saved to ${skillDir}/evals/leaderboard/`));
92
+ }
93
+ function formatStats(stats, asPercent, suffix = "") {
94
+ if (asPercent) {
95
+ return `${(stats.mean * 100).toFixed(1)}%${stats.stddev > 0 ? ` (±${(stats.stddev * 100).toFixed(1)}%)` : ""}`;
96
+ }
97
+ return `${Math.round(stats.mean)}${suffix}${stats.stddev > 0 ? ` (±${Math.round(stats.stddev)}${suffix})` : ""}`;
98
+ }
99
+ //# sourceMappingURL=sweep.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sweep.js","sourceRoot":"","sources":["../../../src/commands/eval/sweep.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,wDAAwD;AACxD,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,mCAAmC,CAAC;AAE7D,OAAO,EAAE,KAAK,EAAE,GAAG,EAAU,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAS7E,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB,EAAE,OAAqB;IACxE,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,YAAY,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvF,MAAM,YAAY,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,mEAAmE,CAAC,CAAC,CAAC;QACxF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAE7C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,SAAS,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAEvD,IAAI,WAAW,GAAuB,IAAI,CAAC;IAE3C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,CAAC;QACjC,QAAQ;QACR,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,YAAY;QACZ,SAAS,EAAE,SAAS,CAAC,KAAK;QAC1B,MAAM,EAAE,SAAS;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,IAAI;QACJ,WAAW;KACZ,CAAC,EAAE,CAAC;QACH,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,mBAAmB;gBACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC7G,MAAM;YAER,KAAK,sBAAsB;gBACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,SAAS,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,SAAS,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,KAAK,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC;gBAC5L,MAAM;YAER,KAAK,sBAAsB;gBACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC5D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,gBAAgB,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBACrG,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC;gBACtE,CAAC;gBACD,MAAM;YAER,KAAK,gBAAgB;gBACnB,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC;gBACzB,MAAM;QACV,CAAC;IACH,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC,CAAC;QACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC7E,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACzF,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;QACb,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE;QAC1B,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;QAC7D,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;QACpE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG;QACtD,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC;KACnD,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,qBAAqB,CAAC,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,WAAW,CAAC,KAAiB,EAAE,SAAkB,EAAE,MAAM,GAAG,EAAE;IACrE,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACjH,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACnH,CAAC"}
@@ -1,5 +1,15 @@
1
1
  export declare function evalCommand(subcommand: string, target?: string, opts?: {
2
2
  force?: boolean;
3
+ type?: string;
3
4
  root?: string;
4
5
  port?: string;
6
+ credentialKey?: string;
7
+ concurrency?: string;
8
+ judgeModel?: string;
9
+ noCache?: boolean;
10
+ cache?: boolean;
11
+ models?: string;
12
+ judge?: string;
13
+ runs?: string;
14
+ batch?: boolean;
5
15
  }): Promise<void>;
@@ -19,7 +19,8 @@ export async function evalCommand(subcommand, target, opts = {}) {
19
19
  }
20
20
  const skillDir = resolveSkillDir(root, target);
21
21
  const { runEvalInit } = await import("./eval/init.js");
22
- return runEvalInit(skillDir, !!opts.force);
22
+ const evalType = (opts.type === "integration" || opts.type === "all") ? opts.type : "unit";
23
+ return runEvalInit(skillDir, !!opts.force, evalType);
23
24
  }
24
25
  case "run": {
25
26
  if (!target) {
@@ -28,7 +29,14 @@ export async function evalCommand(subcommand, target, opts = {}) {
28
29
  }
29
30
  const skillDir = resolveSkillDir(root, target);
30
31
  const { runEvalRun } = await import("./eval/run.js");
31
- return runEvalRun(skillDir);
32
+ // Commander uses --no-cache to set cache=false (noCache is undefined)
33
+ const noCache = opts.noCache === true || opts.cache === false;
34
+ return runEvalRun(skillDir, {
35
+ concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
36
+ judgeModel: opts.judgeModel,
37
+ noCache,
38
+ batch: opts.batch,
39
+ });
32
40
  }
33
41
  case "coverage": {
34
42
  const { runEvalCoverage } = await import("./eval/coverage.js");
@@ -36,11 +44,61 @@ export async function evalCommand(subcommand, target, opts = {}) {
36
44
  }
37
45
  case "generate-all": {
38
46
  const { runEvalGenerateAll } = await import("./eval/generate-all.js");
39
- return runEvalGenerateAll(root, !!opts.force);
47
+ const batchConcurrency = opts.concurrency ? parseInt(opts.concurrency, 10) : undefined;
48
+ return runEvalGenerateAll(root, !!opts.force, batchConcurrency);
49
+ }
50
+ case "sweep": {
51
+ if (!target) {
52
+ console.error(red("Usage: vskill eval sweep <plugin>/<skill> --models '...' --judge '...'"));
53
+ process.exit(1);
54
+ }
55
+ if (!opts.models) {
56
+ console.error(red("--models flag is required (comma-separated, e.g., 'anthropic/claude-sonnet-4,openrouter/meta-llama/llama-3.1-70b')"));
57
+ process.exit(1);
58
+ }
59
+ if (!opts.judge) {
60
+ console.error(red("--judge flag is required (e.g., 'anthropic/claude-sonnet-4')"));
61
+ process.exit(1);
62
+ }
63
+ const skillDir = resolveSkillDir(root, target);
64
+ const { runEvalSweep } = await import("./eval/sweep.js");
65
+ return runEvalSweep(skillDir, {
66
+ models: opts.models,
67
+ judge: opts.judge,
68
+ runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
69
+ concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
70
+ });
71
+ }
72
+ case "credentials": {
73
+ if (!target) {
74
+ console.error(red("Usage: vskill credentials <set|list|check> [plugin/skill] [KEY]"));
75
+ process.exit(1);
76
+ }
77
+ // target is the sub-subcommand: set, list, check
78
+ // For credentials, we use CWD as skillDir (or --root)
79
+ const credSkillDir = root;
80
+ const { runCredentialsSet, runCredentialsList, runCredentialsCheck } = await import("./eval/credentials.js");
81
+ switch (target) {
82
+ case "set": {
83
+ const key = opts.credentialKey;
84
+ if (!key) {
85
+ console.error(red("Usage: vskill credentials set <KEY>"));
86
+ process.exit(1);
87
+ }
88
+ return runCredentialsSet(credSkillDir, key);
89
+ }
90
+ case "list":
91
+ return runCredentialsList(credSkillDir);
92
+ case "check":
93
+ return runCredentialsCheck(credSkillDir);
94
+ default:
95
+ console.error(red(`Unknown credentials subcommand: "${target}"\n`) + dim("Available: set, list, check"));
96
+ }
97
+ break;
40
98
  }
41
99
  default:
42
100
  console.error(red(`Unknown subcommand: "${subcommand}"\n`) +
43
- dim("Available: serve, init, run, coverage, generate-all"));
101
+ dim("Available: serve, init, run, sweep, coverage, generate-all, credentials"));
44
102
  }
45
103
  }
46
104
  function resolveSkillDir(root, target) {
@@ -1 +1 @@
1
- {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAA0D,EAAE;IAE5D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAE3D,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7C,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,qDAAqD,CAAC,CAC7D,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,sEAAsE;IACtE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,sEAAsE;IACtE,OAAO,UAAU,CAAC;AACpB,CAAC"}
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAAiP,EAAE;IAEnP,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAE3D,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,aAAa,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YAC3F,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,sEAAsE;YACtE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,KAAK,CAAC;YAC9D,OAAO,UAAU,CAAC,QAAQ,EAAE;gBAC1B,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC1E,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,OAAO;gBACP,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;QACL,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACvF,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAClE,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC,CAAC;gBAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oHAAoH,CAAC,CAAC,CAAC;gBACzI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC,CAAC;gBACnF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,QAAQ,EAAE;gBAC5B,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBACrD,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;aAC3E,CAAC,CAAC;QACL,CAAC;QAED,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAAC,CAAC;gBACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,iDAAiD;YACjD,sDAAsD;YACtD,MAAM,YAAY,GAAG,IAAI,CAAC;YAC1B,MAAM,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;YAC7G,QAAQ,MAAM,EAAE,CAAC;gBACf,KAAK,KAAK,CAAC,CAAC,CAAC;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC;oBAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;wBACT,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAClB,CAAC;oBACD,OAAO,iBAAiB,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;gBAC9C,CAAC;gBACD,KAAK,MAAM;oBACT,OAAO,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBAC1C,KAAK,OAAO;oBACV,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;gBAC3C;oBACE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,MAAM,KAAK,CAAC,GAAG,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YAC7G,CAAC;YACD,MAAM;QACR,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,yEAAyE,CAAC,CACjF,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,sEAAsE;IACtE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,sEAAsE;IACtE,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -0,0 +1,27 @@
1
+ import type { Assertion } from "./schema.js";
2
+ import type { AssertionResult } from "./judge.js";
3
+ import type { LlmClient } from "./llm.js";
4
+ import type { McpDependency } from "./mcp-detector.js";
5
+ export interface BatchJudgeRequest {
6
+ evalId: string;
7
+ assertionIdx: number;
8
+ assertion: Assertion;
9
+ output: string;
10
+ mcpDeps?: McpDependency[];
11
+ }
12
+ export interface BatchJudgeOptions {
13
+ apiKey: string;
14
+ model?: string;
15
+ minBatchSize?: number;
16
+ }
17
+ export declare function getPollInterval(elapsedMs: number): number;
18
+ export interface BatchCostInfo {
19
+ batchCost: number;
20
+ sequentialCost: number;
21
+ savings: number;
22
+ }
23
+ export declare function calculateBatchCost(inputTokens: number, outputTokens: number, model: string): BatchCostInfo;
24
+ export declare function batchJudgeAssertions(requests: BatchJudgeRequest[], client: LlmClient, judgeClient: LlmClient, options: BatchJudgeOptions): Promise<{
25
+ results: AssertionResult[];
26
+ costInfo: BatchCostInfo | null;
27
+ }>;