vskill 0.5.21 → 0.5.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,5 +3,6 @@ export interface SweepOptions {
3
3
  judge: string;
4
4
  runs?: number;
5
5
  concurrency?: number;
6
+ baseline?: boolean;
6
7
  }
7
8
  export declare function runEvalSweep(skillDir: string, options: SweepOptions): Promise<void>;
@@ -6,7 +6,7 @@ import { join } from "node:path";
6
6
  import { loadAndValidateEvals, EvalValidationError } from "../../eval/schema.js";
7
7
  import { buildEvalSystemPrompt } from "../../eval/prompt-builder.js";
8
8
  import { runSweep } from "../../eval-server/sweep-runner.js";
9
- import { green, red, bold, dim, table } from "../../utils/output.js";
9
+ import { green, red, yellow, bold, dim, table } from "../../utils/output.js";
10
10
  export async function runEvalSweep(skillDir, options) {
11
11
  // Load and validate evals.json
12
12
  let evalsFile;
@@ -34,11 +34,19 @@ export async function runEvalSweep(skillDir, options) {
34
34
  }
35
35
  const runs = options.runs ?? 1;
36
36
  const concurrency = options.concurrency ?? 5;
37
+ const baseline = options.baseline ?? false;
37
38
  console.log(bold(`\nSweep: ${evalsFile.skill_name}`));
38
39
  console.log(dim(`Models: ${modelList.join(", ")}`));
39
40
  console.log(dim(`Judge: ${options.judge}`));
40
41
  console.log(dim(`Runs per model: ${runs}`));
41
- console.log(dim(`Cases: ${evalsFile.evals.length}\n`));
42
+ console.log(dim(`Cases: ${evalsFile.evals.length}`));
43
+ if (baseline)
44
+ console.log(dim(`Baseline: enabled (comparing with vs without skill)`));
45
+ console.log("");
46
+ // Warn about low run count
47
+ if (runs < 3) {
48
+ console.log(yellow(`Note: ${runs} run(s) may not produce statistically meaningful results. Use --runs 3+ for reliable ranking.\n`));
49
+ }
42
50
  let sweepResult = null;
43
51
  for await (const event of runSweep({
44
52
  skillDir,
@@ -49,17 +57,33 @@ export async function runEvalSweep(skillDir, options) {
49
57
  judge: options.judge,
50
58
  runs,
51
59
  concurrency,
60
+ baseline,
52
61
  })) {
53
62
  switch (event.type) {
63
+ case "sweep_judge_bias_warning":
64
+ console.log(yellow(`WARNING: ${event.data.warning}\n`));
65
+ break;
54
66
  case "sweep_model_start":
55
67
  process.stdout.write(dim(`[${event.data.modelIndex + 1}/${event.data.totalModels}] ${event.data.model} — `));
56
68
  break;
57
- case "sweep_model_progress":
58
- process.stdout.write(dim(`\r[${event.data.model}] run ${event.data.run}/${event.data.totalRuns} case ${event.data.currentCase}/${event.data.totalCases} (${event.data.percentComplete}%)`));
69
+ case "sweep_model_progress": {
70
+ const phaseLabel = event.data.phase === "baseline" ? " [baseline]" : "";
71
+ process.stdout.write(dim(`\r[${event.data.model}${phaseLabel}] run ${event.data.run}/${event.data.totalRuns} case ${event.data.currentCase}/${event.data.totalCases} (${event.data.percentComplete}%)`));
59
72
  break;
73
+ }
60
74
  case "sweep_model_complete":
61
75
  if (event.data.status === "complete" && event.data.passRate) {
62
- console.log(green(` done`) + dim(` (pass rate: ${(event.data.passRate.mean * 100).toFixed(1)}%)`));
76
+ let summary = ` done (pass rate: ${(event.data.passRate.mean * 100).toFixed(1)}%)`;
77
+ if (event.data.baselinePassRate && event.data.skillDelta) {
78
+ const delta = event.data.skillDelta.mean * 100;
79
+ const sign = delta >= 0 ? "+" : "";
80
+ summary += ` | baseline: ${(event.data.baselinePassRate.mean * 100).toFixed(1)}% | delta: ${sign}${delta.toFixed(1)}pp`;
81
+ if (event.data.amplificationPct != null && isFinite(event.data.amplificationPct)) {
82
+ const ampSign = event.data.amplificationPct >= 0 ? "+" : "";
83
+ summary += ` (${ampSign}${event.data.amplificationPct.toFixed(1)}%)`;
84
+ }
85
+ }
86
+ console.log(green(summary));
63
87
  }
64
88
  else {
65
89
  console.log(red(` error: ${event.data.errorMessage || "unknown"}`));
@@ -75,19 +99,55 @@ export async function runEvalSweep(skillDir, options) {
75
99
  process.exit(1);
76
100
  return;
77
101
  }
78
- // Print summary table
79
- const headers = ["RANK", "MODEL", "PASS RATE", "DURATION", "COST", "STATUS"];
80
- const sorted = [...sweepResult.models].sort((a, b) => b.passRate.mean - a.passRate.mean);
81
- const rows = sorted.map((m, i) => [
82
- String(i + 1),
83
- `${m.provider}/${m.model}`,
84
- m.status === "complete" ? formatStats(m.passRate, true) : "-",
85
- m.status === "complete" ? formatStats(m.duration, false, "ms") : "-",
86
- m.cost.total > 0 ? `$${m.cost.total.toFixed(4)}` : "-",
87
- m.status === "complete" ? green("OK") : red("ERR"),
88
- ]);
89
- console.log(bold("\nSweep Results\n"));
90
- console.log(table(headers, rows));
102
+ // Sort by composite score (if available) then by pass rate
103
+ const sorted = [...sweepResult.models].sort((a, b) => {
104
+ if (a.compositeScore != null && b.compositeScore != null) {
105
+ return b.compositeScore - a.compositeScore;
106
+ }
107
+ return b.passRate.mean - a.passRate.mean;
108
+ });
109
+ // Build table based on whether baseline was used
110
+ if (baseline) {
111
+ const headers = ["RANK", "MODEL", "WITH SKILL", "WITHOUT SKILL", "DELTA", "AMPLIFICATION", "STATUS"];
112
+ const rows = sorted.map((m, i) => [
113
+ String(i + 1),
114
+ `${m.provider}/${m.model}`,
115
+ m.status === "complete" ? formatStats(m.passRate, true) : "-",
116
+ m.status === "complete" && m.baselinePassRate ? formatStats(m.baselinePassRate, true) : "-",
117
+ m.status === "complete" && m.skillDelta
118
+ ? `${m.skillDelta.mean >= 0 ? "+" : ""}${(m.skillDelta.mean * 100).toFixed(1)}pp`
119
+ : "-",
120
+ m.status === "complete" && m.amplificationPct != null && isFinite(m.amplificationPct)
121
+ ? `${m.amplificationPct >= 0 ? "+" : ""}${m.amplificationPct.toFixed(1)}%`
122
+ : "-",
123
+ m.status === "complete" ? green("OK") : red("ERR"),
124
+ ]);
125
+ console.log(bold("\nSweep Results (Skill Amplification)\n"));
126
+ console.log(table(headers, rows));
127
+ // Skill quality badge
128
+ if (sweepResult.skillQualityScore != null && sweepResult.skillQualityRating) {
129
+ const ratingColors = {
130
+ excellent: green, good: green, marginal: yellow, minimal: yellow, harmful: red,
131
+ };
132
+ const colorFn = ratingColors[sweepResult.skillQualityRating] ?? dim;
133
+ const sign = sweepResult.skillQualityScore >= 0 ? "+" : "";
134
+ const label = `${sign}${sweepResult.skillQualityScore.toFixed(1)}% (${sweepResult.skillQualityRating.toUpperCase()})`;
135
+ console.log(`\nSkill Quality: ${colorFn(label)}`);
136
+ }
137
+ }
138
+ else {
139
+ const headers = ["RANK", "MODEL", "PASS RATE", "DURATION", "COST", "STATUS"];
140
+ const rows = sorted.map((m, i) => [
141
+ String(i + 1),
142
+ `${m.provider}/${m.model}`,
143
+ m.status === "complete" ? formatStats(m.passRate, true) : "-",
144
+ m.status === "complete" ? formatStats(m.duration, false, "ms") : "-",
145
+ m.cost.total > 0 ? `$${m.cost.total.toFixed(4)}` : "-",
146
+ m.status === "complete" ? green("OK") : red("ERR"),
147
+ ]);
148
+ console.log(bold("\nSweep Results\n"));
149
+ console.log(table(headers, rows));
150
+ }
91
151
  console.log(dim(`\nLeaderboard saved to ${skillDir}/evals/leaderboard/`));
92
152
  }
93
153
  function formatStats(stats, asPercent, suffix = "") {
@@ -1 +1 @@
1
- {"version":3,"file":"sweep.js","sourceRoot":"","sources":["../../../src/commands/eval/sweep.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,wDAAwD;AACxD,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,mCAAmC,CAAC;AAE7D,OAAO,EAAE,KAAK,EAAE,GAAG,EAAU,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAS7E,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB,EAAE,OAAqB;IACxE,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,YAAY,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvF,MAAM,YAAY,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,mEAAmE,CAAC,CAAC,CAAC;QACxF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAE7C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,SAAS,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAEvD,IAAI,WAAW,GAAuB,IAAI,CAAC;IAE3C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,CAAC;QACjC,QAAQ;QACR,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,YAAY;QACZ,SAAS,EAAE,SAAS,CAAC,KAAK;QAC1B,MAAM,EAAE,SAAS;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,IAAI;QACJ,WAAW;KACZ,CAAC,EAAE,CAAC;QACH,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,mBAAmB;gBACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC7G,MAAM;YAER,KAAK,sBAAsB;gBACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,SAAS,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,SAAS,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,KAAK,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC;gBAC5L,MAAM;YAER,KAAK,sBAAsB;gBACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC5D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,gBAAgB,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;gBACrG,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC;gBACtE,CAAC;gBACD,MAAM;YAER,KAAK,gBAAgB;gBACnB,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC;gBACzB,MAAM;QACV,CAAC;IACH,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC,CAAC;QACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IAC7E,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACzF,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;QACb,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE;QAC1B,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;QAC7D,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;QACpE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG;QACtD,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC;KACnD,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,qBAAqB,CAAC,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,WAAW,CAAC,KAAiB,EAAE,SAAkB,EAAE,MAAM,GAAG,EAAE;IACrE,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACjH,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACnH,CAAC"}
1
+ {"version":3,"file":"sweep.js","sourceRoot":"","sources":["../../../src/commands/eval/sweep.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,wDAAwD;AACxD,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,QAAQ,EAAE,MAAM,mCAAmC,CAAC;AAE7D,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAU7E,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB,EAAE,OAAqB;IACxE,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QAC3D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,YAAY,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACvF,MAAM,YAAY,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,mEAAmE,CAAC,CAAC,CAAC;QACxF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,CAAC,CAAC;IAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,CAAC;IAC7C,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,KAAK,CAAC;IAE3C,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,YAAY,SAAS,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC,CAAC;IAC5C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACrD,IAAI,QAAQ;QAAE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC,CAAC;IACtF,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAEhB,2BAA2B;IAC3B,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,iGAAiG,CAAC,CAAC,CAAC;IACtI,CAAC;IAED,IAAI,WAAW,GAAuB,IAAI,CAAC;IAE3C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,QAAQ,CAAC;QACjC,QAAQ;QACR,SAAS,EAAE,SAAS,CAAC,UAAU;QAC/B,YAAY;QACZ,SAAS,EAAE,SAAS,CAAC,KAAK;QAC1B,MAAM,EAAE,SAAS;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,IAAI;QACJ,WAAW;QACX,QAAQ;KACT,CAAC,EAAE,CAAC;QACH,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,0BAA0B;gBAC7B,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,YAAY,KAAK,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,CAAC,CAAC;gBACxD,MAAM;YAER,KAAK,mBAAmB;gBACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC;gBAC7G,MAAM;YAER,KAAK,sBAAsB,CAAC,CAAC,CAAC;gBAC5B,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,UAAU,SAAS,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,SAAS,KAAK,CAAC,IAAI,CAAC,WAAW,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,KAAK,CAAC,IAAI,CAAC,eAAe,IAAI,CAAC,CAAC,CAAC;gBACzM,MAAM;YACR,CAAC;YAED,KAAK,sBAAsB;gBACzB,IAAI,KAAK,CAAC,IAAI,CAAC,MAAM,KAAK,UAAU,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC5D,IAAI,OAAO,GAAG,qBAAqB,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;oBACnF,IAAI,KAAK,CAAC,IAAI,CAAC,gBAAgB,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;wBACzD,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC;wBAC/C,MAAM,IAAI,GAAG,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnC,OAAO,IAAI,gBAAgB,CAAC,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,cAAc,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;wBACxH,IAAI,KAAK,CAAC,IAAI,CAAC,gBAAgB,IAAI,IAAI,IAAI,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,EAAE,CAAC;4BACjF,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC5D,OAAO,IAAI,KAAK,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;wBACvE,CAAC;oBACH,CAAC;oBACD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,KAAK,CAAC,IAAI,CAAC,YAAY,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC;gBACtE,CAAC;gBACD,MAAM;YAER,KAAK,gBAAgB;gBACnB,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC;gBACzB,MAAM;QACV,CAAC;IACH,CAAC;IAED,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC,CAAC;QACzD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,2DAA2D;IAC3D,MAAM,MAAM,GAAG,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACnD,IAAI,CAAC,CAAC,cAAc,IAAI,IAAI,IAAI,CAAC,CAAC,cAAc,IAAI,IAAI,EAAE,CAAC;YACzD,OAAO,CAAC,CAAC,cAAc,GAAG,CAAC,CAAC,cAAc,CAAC;QAC7C,CAAC;QACD,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,iDAAiD;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,QAAQ,CAAC,CAAC;QACrG,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;YACb,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE;YAC1B,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;YAC7D,CAAC,CAAC,MAAM,KAAK,UAAU,IAAI,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;YAC3F,CAAC,CAAC,MAAM,KAAK,UAAU,IAAI,CAAC,CAAC,UAAU;gBACrC,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;gBACjF,CAAC,CAAC,GAAG;YACP,CAAC,CAAC,MAAM,KAAK,UAAU,IAAI,CAAC,CAAC,gBAAgB,IAAI,IAAI,IAAI,QAAQ,CAAC,CAAC,CAAC,gBAAgB,CAAC;gBACnF,CAAC,CAAC,GAAG,CAAC,CAAC,gBAAgB,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;gBAC1E,CAAC,CAAC,GAAG;YACP,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC;SACnD,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;QAElC,sBAAsB;QACtB,IAAI,WAAW,CAAC,iBAAiB,IAAI,IAAI,IAAI,WAAW,CAAC,kBAAkB,EAAE,CAAC;YAC5E,MAAM,YAAY,GAA0C;gBAC1D,SAAS,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,GAAG;aAC/E,CAAC;YACF,MAAM,OAAO,GAAG,YAAY,CAAC,WAAW,CAAC,kBAAkB,CAAC,IAAI,GAAG,CAAC;YACpE,MAAM,IAAI,GAAG,WAAW,CAAC,iBAAiB,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3D,MAAM,KAAK,GAAG,GAAG,IAAI,GAAG,WAAW,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,WAAW,CAAC,kBAAkB,CAAC,WAAW,EAAE,GAAG,CAAC;YACtH,OAAO,CAAC,GAAG,CAAC,oBAAoB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;SAAM,CAAC;QACN,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;QAC7E,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC;YACb,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE;YAC1B,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;YAC7D,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG;YACpE,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG;YACtD,CAAC,CAAC,MAAM,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC;SACnD,CAAC,CAAC;QAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IACpC,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,qBAAqB,CAAC,CAAC,CAAC;AAC5E,CAAC;AAED,SAAS,WAAW,CAAC,KAAiB,EAAE,SAAkB,EAAE,MAAM,GAAG,EAAE;IACrE,IAAI,SAAS,EAAE,CAAC;QACd,OAAO,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;IACjH,CAAC;IACD,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACnH,CAAC"}
@@ -12,4 +12,5 @@ export declare function evalCommand(subcommand: string, target?: string, opts?:
12
12
  judge?: string;
13
13
  runs?: string;
14
14
  batch?: boolean;
15
+ baseline?: boolean;
15
16
  }): Promise<void>;
@@ -67,6 +67,7 @@ export async function evalCommand(subcommand, target, opts = {}) {
67
67
  judge: opts.judge,
68
68
  runs: opts.runs ? parseInt(opts.runs, 10) : undefined,
69
69
  concurrency: opts.concurrency ? parseInt(opts.concurrency, 10) : undefined,
70
+ baseline: opts.baseline,
70
71
  });
71
72
  }
72
73
  case "credentials": {
@@ -1 +1 @@
1
- {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAAiP,EAAE;IAEnP,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAE3D,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,aAAa,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YAC3F,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,sEAAsE;YACtE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,KAAK,CAAC;YAC9D,OAAO,UAAU,CAAC,QAAQ,EAAE;gBAC1B,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC1E,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,OAAO;gBACP,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;QACL,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACvF,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAClE,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC,CAAC;gBAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oHAAoH,CAAC,CAAC,CAAC;gBACzI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC,CAAC;gBACnF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,QAAQ,EAAE;gBAC5B,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBACrD,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;aAC3E,CAAC,CAAC;QACL,CAAC;QAED,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAAC,CAAC;gBACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,iDAAiD;YACjD,sDAAsD;YACtD,MAAM,YAAY,GAAG,IAAI,CAAC;YAC1B,MAAM,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;YAC7G,QAAQ,MAAM,EAAE,CAAC;gBACf,KAAK,KAAK,CAAC,CAAC,CAAC;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC;oBAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;wBACT,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAClB,CAAC;oBACD,OAAO,iBAAiB,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;gBAC9C,CAAC;gBACD,KAAK,MAAM;oBACT,OAAO,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBAC1C,KAAK,OAAO;oBACV,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;gBAC3C;oBACE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,MAAM,KAAK,CAAC,GAAG,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YAC7G,CAAC;YACD,MAAM;QACR,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,yEAAyE,CAAC,CACjF,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,sEAAsE;IACtE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,sEAAsE;IACtE,OAAO,UAAU,CAAC;AACpB,CAAC"}
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAAqQ,EAAE;IAEvQ,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAE3D,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,aAAa,IAAI,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC;YAC3F,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QACvD,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,sEAAsE;YACtE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,KAAK,KAAK,KAAK,CAAC;YAC9D,OAAO,UAAU,CAAC,QAAQ,EAAE;gBAC1B,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC1E,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,OAAO;gBACP,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;QACL,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,MAAM,gBAAgB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YACvF,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QAClE,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wEAAwE,CAAC,CAAC,CAAC;gBAC7F,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oHAAoH,CAAC,CAAC,CAAC;gBACzI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,8DAA8D,CAAC,CAAC,CAAC;gBACnF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,QAAQ,EAAE;gBAC5B,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBACrD,WAAW,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS;gBAC1E,QAAQ,EAAE,IAAI,CAAC,QAAQ;aACxB,CAAC,CAAC;QACL,CAAC;QAED,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,iEAAiE,CAAC,CAAC,CAAC;gBACtF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,iDAAiD;YACjD,sDAAsD;YACtD,MAAM,YAAY,GAAG,IAAI,CAAC;YAC1B,MAAM,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;YAC7G,QAAQ,MAAM,EAAE,CAAC;gBACf,KAAK,KAAK,CAAC,CAAC,CAAC;oBACX,MAAM,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC;oBAC/B,IAAI,CAAC,GAAG,EAAE,CAAC;wBACT,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC,CAAC;wBAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAClB,CAAC;oBACD,OAAO,iBAAiB,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC;gBAC9C,CAAC;gBACD,KAAK,MAAM;oBACT,OAAO,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBAC1C,KAAK,OAAO;oBACV,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;gBAC3C;oBACE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,oCAAoC,MAAM,KAAK,CAAC,GAAG,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;YAC7G,CAAC;YACD,MAAM;QACR,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,yEAAyE,CAAC,CACjF,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,qDAAqD;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,sEAAsE;IACtE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvE,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,0CAA0C;IAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,sEAAsE;IACtE,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -0,0 +1,21 @@
1
+ import type { ActivationResult } from "./activation-tester.js";
2
+ export interface ActivationHistoryRun {
3
+ id: string;
4
+ timestamp: string;
5
+ model: string;
6
+ provider: string;
7
+ promptCount: number;
8
+ summary: {
9
+ precision: number;
10
+ recall: number;
11
+ reliability: number;
12
+ tp: number;
13
+ tn: number;
14
+ fp: number;
15
+ fn: number;
16
+ };
17
+ results: ActivationResult[];
18
+ }
19
+ export declare function writeActivationRun(skillDir: string, run: ActivationHistoryRun): Promise<void>;
20
+ export declare function listActivationRuns(skillDir: string): Promise<Omit<ActivationHistoryRun, "results">[]>;
21
+ export declare function getActivationRun(skillDir: string, runId: string): Promise<ActivationHistoryRun | null>;
@@ -0,0 +1,41 @@
1
+ // ---------------------------------------------------------------------------
2
+ // activation-history.ts -- persistent activation test history per skill
3
+ // ---------------------------------------------------------------------------
4
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
5
+ import { join } from "node:path";
6
+ const HISTORY_FILENAME = "activation-history.json";
7
+ const MAX_RUNS = 50;
8
+ function historyPath(skillDir) {
9
+ return join(skillDir, HISTORY_FILENAME);
10
+ }
11
+ async function readHistoryFile(skillDir) {
12
+ try {
13
+ const content = await readFile(historyPath(skillDir), "utf-8");
14
+ const parsed = JSON.parse(content);
15
+ if (parsed && Array.isArray(parsed.runs))
16
+ return parsed;
17
+ return { runs: [] };
18
+ }
19
+ catch {
20
+ return { runs: [] };
21
+ }
22
+ }
23
+ export async function writeActivationRun(skillDir, run) {
24
+ await mkdir(skillDir, { recursive: true });
25
+ const history = await readHistoryFile(skillDir);
26
+ history.runs.push(run);
27
+ // Prune oldest if over cap
28
+ if (history.runs.length > MAX_RUNS) {
29
+ history.runs = history.runs.slice(history.runs.length - MAX_RUNS);
30
+ }
31
+ await writeFile(historyPath(skillDir), JSON.stringify(history, null, 2));
32
+ }
33
+ export async function listActivationRuns(skillDir) {
34
+ const history = await readHistoryFile(skillDir);
35
+ return history.runs.map(({ results: _results, ...rest }) => rest).reverse();
36
+ }
37
+ export async function getActivationRun(skillDir, runId) {
38
+ const history = await readHistoryFile(skillDir);
39
+ return history.runs.find((r) => r.id === runId) ?? null;
40
+ }
41
+ //# sourceMappingURL=activation-history.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"activation-history.js","sourceRoot":"","sources":["../../src/eval/activation-history.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,wEAAwE;AACxE,8EAA8E;AAE9E,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAyBjC,MAAM,gBAAgB,GAAG,yBAAyB,CAAC;AACnD,MAAM,QAAQ,GAAG,EAAE,CAAC;AAEpB,SAAS,WAAW,CAAC,QAAgB;IACnC,OAAO,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;AAC1C,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,QAAgB;IAC7C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,MAAM,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC;YAAE,OAAO,MAA+B,CAAC;QACjF,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IACtB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,GAAyB;IAEzB,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACvB,2BAA2B;IAC3B,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,EAAE,CAAC;QACnC,OAAO,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC,CAAC;IACpE,CAAC;IACD,MAAM,SAAS,CAAC,WAAW,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAC3E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB;IAEhB,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,CAAC;IAChD,OAAO,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,CAAC;AAC9E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,QAAgB,EAChB,KAAa;IAEb,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,QAAQ,CAAC,CAAC;IAChD,OAAO,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,KAAK,CAAC,IAAI,IAAI,CAAC;AAC1D,CAAC"}
@@ -22,6 +22,7 @@ import { generateActionItems } from "../eval/action-items.js";
22
22
  import { buildEvalInitPrompt, parseGeneratedEvals } from "../eval/prompt-builder.js";
23
23
  import { testActivation } from "../eval/activation-tester.js";
24
24
  import { detectMcpDependencies, detectSkillDependencies } from "../eval/mcp-detector.js";
25
+ import { writeActivationRun, listActivationRuns, getActivationRun } from "../eval/activation-history.js";
25
26
  // ---------------------------------------------------------------------------
26
27
  // In-memory config state — UI can change provider/model at runtime.
27
28
  //
@@ -1033,13 +1034,40 @@ export function registerRoutes(router, root, projectName) {
1033
1034
  name: nameMatch ? nameMatch[1] : params.skill,
1034
1035
  tags: tagsMatch ? tagsMatch[1].split(",").map((t) => t.trim()).filter(Boolean) : [],
1035
1036
  };
1036
- const client = getClient();
1037
+ // Use per-request model overrides if provided, fall back to global config
1038
+ const client = body.provider || body.model
1039
+ ? createLlmClient({ provider: body.provider, model: body.model })
1040
+ : getClient();
1037
1041
  const summary = await testActivation(description, body.prompts, client, (result) => {
1038
1042
  if (!aborted) {
1039
1043
  sendSSE(res, "prompt_result", result);
1040
1044
  }
1041
1045
  }, meta);
1042
1046
  if (!aborted) {
1047
+ // Write activation history entry
1048
+ const usedProvider = body.provider || currentOverrides.provider || "unknown";
1049
+ const usedModel = body.model || currentOverrides.model || "unknown";
1050
+ const run = {
1051
+ id: `run-${Date.now()}`,
1052
+ timestamp: new Date().toISOString(),
1053
+ model: usedModel,
1054
+ provider: usedProvider,
1055
+ promptCount: summary.total,
1056
+ summary: {
1057
+ precision: summary.precision,
1058
+ recall: summary.recall,
1059
+ reliability: summary.reliability,
1060
+ tp: summary.tp,
1061
+ tn: summary.tn,
1062
+ fp: summary.fp,
1063
+ fn: summary.fn,
1064
+ },
1065
+ results: summary.results,
1066
+ };
1067
+ try {
1068
+ await writeActivationRun(skillDir, run);
1069
+ }
1070
+ catch { /* non-blocking */ }
1043
1071
  sendSSEDone(res, { ...summary, description });
1044
1072
  }
1045
1073
  }
@@ -1047,6 +1075,81 @@ export function registerRoutes(router, root, projectName) {
1047
1075
  sendSSEDone(res, { error: err instanceof Error ? err.message : String(err) });
1048
1076
  }
1049
1077
  });
1078
+ // AI-generate activation test prompts (SSE)
1079
+ router.post("/api/skills/:plugin/:skill/activation-prompts", async (req, res, params) => {
1080
+ const skillDir = resolveSkillDir(root, params.plugin, params.skill);
1081
+ let aborted = false;
1082
+ res.on("close", () => { aborted = true; });
1083
+ try {
1084
+ const body = (await readBody(req));
1085
+ const skillMdPath = join(skillDir, "SKILL.md");
1086
+ if (!existsSync(skillMdPath)) {
1087
+ sendJson(res, { error: "SKILL.md not found" }, 404, req);
1088
+ return;
1089
+ }
1090
+ const skillContent = readFileSync(skillMdPath, "utf-8");
1091
+ const descMatch = skillContent.match(/^---[\s\S]*?description:\s*"([^"]+)"[\s\S]*?---/);
1092
+ const description = descMatch ? descMatch[1] : "";
1093
+ if (!description) {
1094
+ sendJson(res, { error: "No skill description available" }, 400, req);
1095
+ return;
1096
+ }
1097
+ initSSE(res, req);
1098
+ const count = body.count || 8;
1099
+ const half = Math.ceil(count / 2);
1100
+ const client = body.provider || body.model
1101
+ ? createLlmClient({ provider: body.provider, model: body.model })
1102
+ : getClient();
1103
+ const systemPrompt = `Given this skill description, generate test prompts to evaluate activation quality.
1104
+ Generate ${count} prompts: ${half} that SHOULD activate this skill, ${count - half} that should NOT.
1105
+ For "should not" prompts, make them plausible but clearly outside this skill's domain.
1106
+ Return one JSON object per line: {"prompt": "...", "expected": "should_activate"|"should_not_activate"}
1107
+ Return ONLY the JSON lines, no other text.`;
1108
+ const userPrompt = `Skill description: ${description}`;
1109
+ const { text } = await client.generate(systemPrompt, userPrompt);
1110
+ if (aborted)
1111
+ return;
1112
+ const allPrompts = [];
1113
+ const lines = text.split("\n").filter((l) => l.trim());
1114
+ for (const line of lines) {
1115
+ try {
1116
+ const cleaned = line.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/i, "").trim();
1117
+ if (!cleaned.startsWith("{"))
1118
+ continue;
1119
+ const parsed = JSON.parse(cleaned);
1120
+ if (parsed.prompt && parsed.expected) {
1121
+ allPrompts.push({ prompt: parsed.prompt, expected: parsed.expected });
1122
+ if (!aborted)
1123
+ sendSSE(res, "prompt_generated", parsed);
1124
+ }
1125
+ }
1126
+ catch { /* skip malformed lines */ }
1127
+ }
1128
+ if (!aborted)
1129
+ sendSSEDone(res, { prompts: allPrompts });
1130
+ }
1131
+ catch (err) {
1132
+ if (!aborted) {
1133
+ sendSSEDone(res, { error: err instanceof Error ? err.message : String(err) });
1134
+ }
1135
+ }
1136
+ });
1137
+ // List activation test history (summaries only)
1138
+ router.get("/api/skills/:plugin/:skill/activation-history", async (req, res, params) => {
1139
+ const skillDir = resolveSkillDir(root, params.plugin, params.skill);
1140
+ const runs = await listActivationRuns(skillDir);
1141
+ sendJson(res, { runs }, 200, req);
1142
+ });
1143
+ // Get full activation test run by ID
1144
+ router.get("/api/skills/:plugin/:skill/activation-history/:runId", async (req, res, params) => {
1145
+ const skillDir = resolveSkillDir(root, params.plugin, params.skill);
1146
+ const run = await getActivationRun(skillDir, params.runId);
1147
+ if (!run) {
1148
+ sendJson(res, { error: "Run not found" }, 404, req);
1149
+ return;
1150
+ }
1151
+ sendJson(res, run, 200, req);
1152
+ });
1050
1153
  // Get skill dependencies (MCP + skill-to-skill)
1051
1154
  router.get("/api/skills/:plugin/:skill/dependencies", async (req, res, params) => {
1052
1155
  const skillDir = resolveSkillDir(root, params.plugin, params.skill);