vskill 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/eval/__tests__/benchmark.test.js +29 -0
  2. package/dist/eval/__tests__/benchmark.test.js.map +1 -1
  3. package/dist/eval/__tests__/comparator.test.js +80 -0
  4. package/dist/eval/__tests__/comparator.test.js.map +1 -1
  5. package/dist/eval/__tests__/judge.test.js +75 -1
  6. package/dist/eval/__tests__/judge.test.js.map +1 -1
  7. package/dist/eval/__tests__/mcp-detector.test.js +55 -0
  8. package/dist/eval/__tests__/mcp-detector.test.js.map +1 -1
  9. package/dist/eval/__tests__/prompt-builder.test.js +18 -0
  10. package/dist/eval/__tests__/prompt-builder.test.js.map +1 -1
  11. package/dist/eval/action-items.d.ts +21 -0
  12. package/dist/eval/action-items.js +97 -0
  13. package/dist/eval/action-items.js.map +1 -0
  14. package/dist/eval/benchmark-history.d.ts +4 -4
  15. package/dist/eval/benchmark-history.js.map +1 -1
  16. package/dist/eval/benchmark.d.ts +18 -1
  17. package/dist/eval/benchmark.js.map +1 -1
  18. package/dist/eval/comparator.d.ts +2 -1
  19. package/dist/eval/comparator.js +10 -3
  20. package/dist/eval/comparator.js.map +1 -1
  21. package/dist/eval/judge.d.ts +3 -1
  22. package/dist/eval/judge.js +18 -2
  23. package/dist/eval/judge.js.map +1 -1
  24. package/dist/eval/llm.js +5 -7
  25. package/dist/eval/llm.js.map +1 -1
  26. package/dist/eval/mcp-detector.js +30 -0
  27. package/dist/eval/mcp-detector.js.map +1 -1
  28. package/dist/eval/prompt-builder.js +22 -1
  29. package/dist/eval/prompt-builder.js.map +1 -1
  30. package/dist/eval/skill-scanner.d.ts +6 -0
  31. package/dist/eval/skill-scanner.js +55 -9
  32. package/dist/eval/skill-scanner.js.map +1 -1
  33. package/dist/eval-server/api-routes.js +37 -1
  34. package/dist/eval-server/api-routes.js.map +1 -1
  35. package/dist/eval-server/skill-create-routes.js +17 -2
  36. package/dist/eval-server/skill-create-routes.js.map +1 -1
  37. package/dist/eval-ui/assets/index-BHqTxODT.js +75 -0
  38. package/dist/eval-ui/index.html +1 -1
  39. package/dist/utils/__tests__/resolve-binary.integration.test.d.ts +1 -0
  40. package/dist/utils/__tests__/resolve-binary.integration.test.js +138 -0
  41. package/dist/utils/__tests__/resolve-binary.integration.test.js.map +1 -0
  42. package/dist/utils/resolve-binary.js +27 -8
  43. package/dist/utils/resolve-binary.js.map +1 -1
  44. package/package.json +1 -1
  45. package/dist/eval-ui/assets/index-CU39Pi0h.js +0 -75
@@ -61,5 +61,34 @@ describe("benchmark", () => {
61
61
  const result = await readBenchmark(testDir);
62
62
  expect(result).toBeNull();
63
63
  });
64
+ it("writes and reads BenchmarkResult with mcpSimulation", async () => {
65
+ const benchmarkWithMcp = {
66
+ ...SAMPLE_BENCHMARK,
67
+ mcpSimulation: {
68
+ active: true,
69
+ servers: ["Slack", "GitHub"],
70
+ },
71
+ };
72
+ await writeBenchmark(testDir, benchmarkWithMcp);
73
+ const result = await readBenchmark(testDir);
74
+ expect(result).not.toBeNull();
75
+ expect(result.mcpSimulation).toBeDefined();
76
+ expect(result.mcpSimulation.active).toBe(true);
77
+ expect(result.mcpSimulation.servers).toEqual(["Slack", "GitHub"]);
78
+ });
79
+ it("reads BenchmarkResult without mcpSimulation (backward compat)", async () => {
80
+ // Write a benchmark without mcpSimulation field
81
+ const rawBenchmark = {
82
+ timestamp: "2026-03-01T00:00:00.000Z",
83
+ model: "claude-sonnet-4-6",
84
+ skill_name: "old-skill",
85
+ cases: [],
86
+ };
87
+ writeFileSync(join(testDir, "evals", "benchmark.json"), JSON.stringify(rawBenchmark));
88
+ const result = await readBenchmark(testDir);
89
+ expect(result).not.toBeNull();
90
+ expect(result.mcpSimulation).toBeUndefined();
91
+ expect(result.skill_name).toBe("old-skill");
92
+ });
64
93
  });
65
94
  //# sourceMappingURL=benchmark.test.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"benchmark.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGhE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,MAAM,gBAAgB,GAAoB;IACxC,SAAS,EAAE,0BAA0B;IACrC,KAAK,EAAE,mBAAmB;IAC1B,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,GAAG;YACd,UAAU,EAAE;gBACV;oBACE,EAAE,EAAE,IAAI;oBACR,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,IAAI;oBACV,SAAS,EAAE,YAAY;iBACxB;aACF;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IACzB,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,gBAAgB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACvD,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAChD,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,CACjC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"benchmark.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGhE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,MAAM,gBAAgB,GAAoB;IACxC,SAAS,EAAE,0BAA0B;IACrC,KAAK,EAAE,mBAAmB;IAC1B,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,GAAG;YACd,UAAU,EAAE;gBACV;oBACE,EAAE,EAAE,IAAI;oBACR,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,IAAI;oBACV,SAAS,EAAE,YAAY;iBACxB;aACF;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IACzB,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,gBAAgB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACvD,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAChD,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,CACjC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,gBAAgB,GAAoB;YACxC,GAAG,gBAAgB;YACnB,aAAa,EAAE;gBACb,MAAM,EAAE,IAAI;gBACZ,OAAO,EAAE,CAAC,OAAO,EAAE,QAAQ,CAAC;aAC7B;SACF,CAAC;QAEF,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,aAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,MAAM,CAAC,MAAO,CAAC,aAAc,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,KAAK,IAAI,EAAE;QAC7E,gDAAgD;QAChD,MAAM,YAAY,GAAG;YACnB,SAAS,EAAE,0BAA0B;YACrC,KAAK,EAAE,mBAAmB;YAC1B,UAAU,EAAE,WAAW;YACvB,KAAK,EAAE,EAAE;SACV,CAAC;QACF,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,CAC7B,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,aAAa,CAAC,CAAC,aAAa,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -136,6 +136,86 @@ describe("runComparison", () => {
136
136
  vi.restoreAllMocks();
137
137
  });
138
138
  });
139
+ describe("scoreComparison with MCP deps", () => {
140
+ it("uses standard prompt when no MCP deps", async () => {
141
+ const client = mockClient([
142
+ JSON.stringify({
143
+ content_score_a: 3,
144
+ structure_score_a: 3,
145
+ content_score_b: 3,
146
+ structure_score_b: 3,
147
+ winner: "tie",
148
+ }),
149
+ ]);
150
+ await scoreComparison("A", "B", "prompt", client);
151
+ const systemPrompt = client.generate.mock.calls[0][0];
152
+ expect(systemPrompt).not.toContain("SIMULATED MCP");
153
+ });
154
+ it("augments prompt when MCP deps present", async () => {
155
+ const client = mockClient([
156
+ JSON.stringify({
157
+ content_score_a: 3,
158
+ structure_score_a: 3,
159
+ content_score_b: 3,
160
+ structure_score_b: 3,
161
+ winner: "tie",
162
+ }),
163
+ ]);
164
+ const mcpDeps = [
165
+ {
166
+ server: "Slack",
167
+ url: "https://mcp.slack.com/mcp",
168
+ transport: "http",
169
+ matchedTools: ["slack_send_message"],
170
+ configSnippet: "{}",
171
+ },
172
+ ];
173
+ await scoreComparison("A", "B", "prompt", client, mcpDeps);
174
+ const systemPrompt = client.generate.mock.calls[0][0];
175
+ expect(systemPrompt).toContain("SIMULATED MCP");
176
+ expect(systemPrompt).toContain("Slack");
177
+ });
178
+ });
179
+ describe("runComparison with MCP auto-detection", () => {
180
+ it("auto-detects MCP deps from skill content and augments comparison", async () => {
181
+ const client = mockClient([
182
+ "skill output",
183
+ "baseline output",
184
+ JSON.stringify({
185
+ content_score_a: 3,
186
+ structure_score_a: 3,
187
+ content_score_b: 3,
188
+ structure_score_b: 3,
189
+ winner: "tie",
190
+ }),
191
+ ]);
192
+ vi.spyOn(Math, "random").mockReturnValue(0.3);
193
+ await runComparison("prompt", "Use slack_send_message to send messages.", client);
194
+ // The third call is the scoring call - check its system prompt
195
+ const scoringCall = client.generate.mock.calls[2];
196
+ expect(scoringCall[0]).toContain("SIMULATED MCP");
197
+ expect(scoringCall[0]).toContain("Slack");
198
+ vi.restoreAllMocks();
199
+ });
200
+ it("does not augment comparison for non-MCP skills", async () => {
201
+ const client = mockClient([
202
+ "skill output",
203
+ "baseline output",
204
+ JSON.stringify({
205
+ content_score_a: 3,
206
+ structure_score_a: 3,
207
+ content_score_b: 3,
208
+ structure_score_b: 3,
209
+ winner: "tie",
210
+ }),
211
+ ]);
212
+ vi.spyOn(Math, "random").mockReturnValue(0.3);
213
+ await runComparison("prompt", "Plain text skill without MCP.", client);
214
+ const scoringCall = client.generate.mock.calls[2];
215
+ expect(scoringCall[0]).not.toContain("SIMULATED MCP");
216
+ vi.restoreAllMocks();
217
+ });
218
+ });
139
219
  describe("generateComparisonOutputs with onProgress", () => {
140
220
  it("calls onProgress with generating_skill before first LLM call", async () => {
141
221
  const client = mockClient(["skill response", "baseline response"]);
@@ -1 +1 @@
1
- {"version":3,"file":"comparator.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/comparator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,eAAe,EACf,aAAa,GACd,MAAM,kBAAkB,CAAC;AAG1B,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEzF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAEjD,0CAA0C;QAC1C,MAAM,SAAS,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEhD,gCAAgC;QAChC,MAAM,UAAU,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;gBAChB,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC/E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,6HAA6H;SAC9H,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,EAAE;gBACrB,eAAe,EAAE,CAAC,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,oEAAoE;QACpE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,mBAAmB;YACnB,sBAAsB;YACtB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;gBACf,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,wCAAwC;QACxC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;QAEpE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;QAE3E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC3D,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,WAAW;YACX,cAAc;YACd,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,qBAAqB;QACrB,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACrD,6CAA6C;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,8BAA8B;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,2CAA2C,EAAE,GAAG,EAAE;IACzD,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,kBAAkB,EAAE,4BAA4B,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,qBAAqB,EAAE,+BAA+B,CAAC,CAAC;IACxG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QAEnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEjF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,aAAa,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAEnE,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC7D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAChE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,OAAO;YACP,UAAU;YACV,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAE9D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEzC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"comparator.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/comparator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,eAAe,EACf,aAAa,GACd,MAAM,kBAAkB,CAAC;AAI1B,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEzF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAEjD,0CAA0C;QAC1C,MAAM,SAAS,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEhD,gCAAgC;QAChC,MAAM,UAAU,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;gBAChB,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC/E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,6HAA6H;SAC9H,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,EAAE;gBACrB,eAAe,EAAE,CAAC,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,oEAAoE;QACpE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,mBAAmB;YACnB,sBAAsB;YACtB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;gBACf,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,wCAAwC;QACxC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;QAEpE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;QAE3E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC3D,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,WAAW;YACX,cAAc;YACd,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,qBAAqB;QACrB,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACrD,6CAA6C;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,8BAA8B;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAChD,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,uCAAuC,EAAE,GAAG,EAAE;IACrD,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,aAAa,CAAC,QAAQ,EAAE,0CAA0C,EAAE,MAAM,CAAC,CAAC;QAElF,+DAA+D;QAC/D,MAAM,WAAW,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAClD,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAE1C,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,aAAa,CAAC,QAAQ,EAAE,+BAA+B,EAAE,MAAM,CAAC,CAAC;QAEvE,MAAM,WAAW,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEtD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,2CAA2C,EAAE,GAAG,EAAE;IACzD,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,kBAAkB,EAAE,4BAA4B,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,qBAAqB,EAAE,+BAA+B,CAAC,CAAC;IACxG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QAEnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEjF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,aAAa,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAEnE,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC7D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAChE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,OAAO;YACP,UAAU;YACV,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAE9D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEzC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,5 +1,5 @@
1
1
  import { describe, it, expect, vi } from "vitest";
2
- import { judgeAssertion } from "../judge.js";
2
+ import { judgeAssertion, buildJudgeSystemPrompt } from "../judge.js";
3
3
  // ---------------------------------------------------------------------------
4
4
  // Helpers
5
5
  // ---------------------------------------------------------------------------
@@ -44,5 +44,79 @@ describe("judgeAssertion", () => {
44
44
  const result = await judgeAssertion("some output", ASSERTION, client);
45
45
  expect(result.pass).toBe(true);
46
46
  });
47
+ it("uses standard prompt when mcpDeps not provided", async () => {
48
+ const client = mockClient(JSON.stringify({ pass: true, reasoning: "ok" }));
49
+ await judgeAssertion("output", ASSERTION, client);
50
+ const systemPrompt = client.generate.mock.calls[0][0];
51
+ expect(systemPrompt).toContain("binary assertion evaluator");
52
+ expect(systemPrompt).not.toContain("SIMULATION MODE");
53
+ });
54
+ it("uses MCP-augmented prompt when mcpDeps provided", async () => {
55
+ const client = mockClient(JSON.stringify({ pass: true, reasoning: "simulation valid" }));
56
+ const mcpDeps = [
57
+ {
58
+ server: "Slack",
59
+ url: "https://mcp.slack.com/mcp",
60
+ transport: "http",
61
+ matchedTools: ["slack_send_message"],
62
+ configSnippet: "{}",
63
+ },
64
+ ];
65
+ await judgeAssertion("output", ASSERTION, client, mcpDeps);
66
+ const systemPrompt = client.generate.mock.calls[0][0];
67
+ expect(systemPrompt).toContain("SIMULATION MODE");
68
+ expect(systemPrompt).toContain("Slack");
69
+ });
70
+ });
71
+ // ---------------------------------------------------------------------------
72
+ // buildJudgeSystemPrompt
73
+ // ---------------------------------------------------------------------------
74
+ describe("buildJudgeSystemPrompt", () => {
75
+ it("returns standard prompt when no MCP deps", () => {
76
+ const prompt = buildJudgeSystemPrompt();
77
+ expect(prompt).toContain("binary assertion evaluator");
78
+ expect(prompt).not.toContain("SIMULATION MODE");
79
+ });
80
+ it("returns standard prompt when mcpDeps is empty", () => {
81
+ const prompt = buildJudgeSystemPrompt([]);
82
+ expect(prompt).toContain("binary assertion evaluator");
83
+ expect(prompt).not.toContain("SIMULATION MODE");
84
+ });
85
+ it("returns augmented prompt with MCP deps", () => {
86
+ const mcpDeps = [
87
+ {
88
+ server: "Slack",
89
+ url: "https://mcp.slack.com/mcp",
90
+ transport: "http",
91
+ matchedTools: ["slack_send_message"],
92
+ configSnippet: "{}",
93
+ },
94
+ ];
95
+ const prompt = buildJudgeSystemPrompt(mcpDeps);
96
+ expect(prompt).toContain("SIMULATION MODE");
97
+ expect(prompt).toContain("Slack");
98
+ expect(prompt).toContain("binary assertion evaluator");
99
+ });
100
+ it("lists all simulated servers", () => {
101
+ const mcpDeps = [
102
+ {
103
+ server: "Slack",
104
+ url: "https://mcp.slack.com/mcp",
105
+ transport: "http",
106
+ matchedTools: ["slack_send_message"],
107
+ configSnippet: "{}",
108
+ },
109
+ {
110
+ server: "GitHub",
111
+ url: "https://api.githubcopilot.com/mcp/",
112
+ transport: "http",
113
+ matchedTools: ["github_create_pr"],
114
+ configSnippet: "{}",
115
+ },
116
+ ];
117
+ const prompt = buildJudgeSystemPrompt(mcpDeps);
118
+ expect(prompt).toContain("Slack");
119
+ expect(prompt).toContain("GitHub");
120
+ });
47
121
  });
48
122
  //# sourceMappingURL=judge.test.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"judge.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/judge.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAc,MAAM,QAAQ,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB;IAClC,OAAO,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC5F,CAAC;AAED,MAAM,SAAS,GAAc;IAC3B,EAAE,EAAE,UAAU;IACd,IAAI,EAAE,6BAA6B;IACnC,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,2BAA2B,EAAE,CAAC,CACvE,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,6CAA6C,EAC7C,SAAS,EACT,MAAM,CACP,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,KAAK;YACX,SAAS,EAAE,8BAA8B;SAC1C,CAAC,CACH,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAE9C,MAAM,MAAM,CACV,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CACjD,CAAC,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CACvB,yDAAyD,CAC1D,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"judge.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/judge.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAc,MAAM,QAAQ,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB;IAClC,OAAO,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC5F,CAAC;AAED,MAAM,SAAS,GAAc;IAC3B,EAAE,EAAE,UAAU;IACd,IAAI,EAAE,6BAA6B;IACnC,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,2BAA2B,EAAE,CAAC,CACvE,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,6CAA6C,EAC7C,SAAS,EACT,MAAM,CACP,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,KAAK;YACX,SAAS,EAAE,8BAA8B;SAC1C,CAAC,CACH,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAE9C,MAAM,MAAM,CACV,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CACjD,CAAC,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CACvB,yDAAyD,CAC1D,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAChD,CAAC;QAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QAC7D,MAAM,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAC9D,CAAC;QAEF,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAClD,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,MAAM,GAAG,sBAAsB,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;YACD;gBACE,MAAM,EAAE,QAAQ;gBAChB,GAAG,EAAE,oCAAoC;gBACzC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,kBAAkB,CAAC;gBAClC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -64,6 +64,61 @@ This skill does basic text processing.`;
64
64
  expect(parsed.mcpServers.slack).toBeDefined();
65
65
  expect(parsed.mcpServers.slack.url).toBe("https://mcp.slack.com/mcp");
66
66
  });
67
+ it("detects Notion tool patterns", () => {
68
+ const content = `Use notion_create_page and notion_search to manage Notion workspace.`;
69
+ const deps = detectMcpDependencies(content);
70
+ expect(deps).toHaveLength(1);
71
+ expect(deps[0].server).toBe("Notion");
72
+ expect(deps[0].url).toBe("https://mcp.notion.com/mcp");
73
+ expect(deps[0].transport).toBe("http");
74
+ expect(deps[0].matchedTools).toContain("notion_create_page");
75
+ expect(deps[0].matchedTools).toContain("notion_search");
76
+ });
77
+ it("detects Jira tool patterns", () => {
78
+ const content = `Use jira_create_issue to file bugs in Jira.`;
79
+ const deps = detectMcpDependencies(content);
80
+ expect(deps).toHaveLength(1);
81
+ expect(deps[0].server).toBe("Jira");
82
+ expect(deps[0].url).toBe("https://mcp.atlassian.com/jira/mcp");
83
+ expect(deps[0].transport).toBe("http");
84
+ expect(deps[0].matchedTools).toContain("jira_create_issue");
85
+ });
86
+ it("detects Confluence tool patterns", () => {
87
+ const content = `Use confluence_create_page to write documentation.`;
88
+ const deps = detectMcpDependencies(content);
89
+ expect(deps).toHaveLength(1);
90
+ expect(deps[0].server).toBe("Confluence");
91
+ expect(deps[0].url).toBe("https://mcp.atlassian.com/confluence/mcp");
92
+ expect(deps[0].transport).toBe("http");
93
+ expect(deps[0].matchedTools).toContain("confluence_create_page");
94
+ });
95
+ it("detects Figma tool patterns", () => {
96
+ const content = `Use figma_get_file to inspect design files.`;
97
+ const deps = detectMcpDependencies(content);
98
+ expect(deps).toHaveLength(1);
99
+ expect(deps[0].server).toBe("Figma");
100
+ expect(deps[0].url).toBe("https://mcp.figma.com/mcp");
101
+ expect(deps[0].transport).toBe("http");
102
+ expect(deps[0].matchedTools).toContain("figma_get_file");
103
+ });
104
+ it("detects Sentry tool patterns", () => {
105
+ const content = `Use sentry_list_issues to monitor errors.`;
106
+ const deps = detectMcpDependencies(content);
107
+ expect(deps).toHaveLength(1);
108
+ expect(deps[0].server).toBe("Sentry");
109
+ expect(deps[0].url).toBe("https://mcp.sentry.dev/mcp");
110
+ expect(deps[0].transport).toBe("http");
111
+ expect(deps[0].matchedTools).toContain("sentry_list_issues");
112
+ });
113
+ it("detects multiple new servers simultaneously", () => {
114
+ const content = `Use notion_create_page for docs, jira_create_issue for bugs, and figma_get_file for design.`;
115
+ const deps = detectMcpDependencies(content);
116
+ expect(deps).toHaveLength(3);
117
+ const servers = deps.map((d) => d.server);
118
+ expect(servers).toContain("Notion");
119
+ expect(servers).toContain("Jira");
120
+ expect(servers).toContain("Figma");
121
+ });
67
122
  it("deduplicates tools from frontmatter and body", () => {
68
123
  const content = `---
69
124
  allowed-tools: [slack_send_message]
@@ -1 +1 @@
1
- {"version":3,"file":"mcp-detector.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/mcp-detector.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAEpF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;kEAG8C,CAAC;QAE/D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,iEAAiE,CAAC;QAClF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,OAAO,GAAG,qDAAqD,CAAC;QACtE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC3D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,kDAAkD,CAAC;QACnE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;;gBAIJ,CAAC;QAEb,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;uCAGmB,CAAC;QAEpC,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,OAAO,GAAG,uCAAuC,CAAC;QACxD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;sDAGkC,CAAC;QAEnD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC;QACxC,6CAA6C;QAC7C,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,OAAO,GAAG;;;8CAG0B,CAAC;QAE3C,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG,yDAAyD,CAAC;QAC1E,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,OAAO,GAAG;;;cAGN,CAAC;QAEX,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG;;;sBAGE,CAAC;QAEnB,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG;;;kCAGc,CAAC;QAE/B,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,6BAA6B;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"mcp-detector.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/mcp-detector.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAEpF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;kEAG8C,CAAC;QAE/D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,iEAAiE,CAAC;QAClF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,OAAO,GAAG,qDAAqD,CAAC;QACtE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC3D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,kDAAkD,CAAC;QACnE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;;gBAIJ,CAAC;QAEb,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;uCAGmB,CAAC;QAEpC,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,OAAO,GAAG,uCAAuC,CAAC;QACxD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,sEAAsE,CAAC;QACvF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,OAAO,GAAG,6CAA6C,CAAC;QAC9D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;QAC/D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,OAAO,GAAG,oDAAoD,CAAC;QACrE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAAG,6CAA6C,CAAC;QAC9D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,2CAA2C,CAAC;QAC5D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAG,6FAA6F,CAAC;QAC9G,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACpC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;sDAGkC,CAAC;QAEnD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC;QACxC,6CAA6C;QAC7C,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,OAAO,GAAG;;;8CAG0B,CAAC;QAE3C,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG,yDAAyD,CAAC;QAC1E,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,OAAO,GAAG;;;cAGN,CAAC;QAEX,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG;;;sBAGE,CAAC;QAEnB,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG;;;kCAGc,CAAC;QAE/B,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,6BAA6B;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -24,6 +24,24 @@ describe("buildEvalInitPrompt", () => {
24
24
  expect(prompt).toContain("Best Practices");
25
25
  expect(prompt).toContain("objectively verifiable");
26
26
  });
27
+ it("includes MCP context for Slack skill", () => {
28
+ const slackSkill = "Use slack_send_message to post messages to Slack channels.";
29
+ const prompt = buildEvalInitPrompt(slackSkill);
30
+ expect(prompt).toContain("MCP Simulation Context");
31
+ expect(prompt).toContain("Slack");
32
+ });
33
+ it("is unchanged for non-MCP skill", () => {
34
+ const plainSkill = "# My Skill\nThis skill does text processing.";
35
+ const prompt = buildEvalInitPrompt(plainSkill);
36
+ expect(prompt).not.toContain("MCP Simulation Context");
37
+ });
38
+ it("lists all detected MCP servers", () => {
39
+ const multiSkill = "Use slack_send_message for chat and github_create_pr for PRs.";
40
+ const prompt = buildEvalInitPrompt(multiSkill);
41
+ expect(prompt).toContain("Slack");
42
+ expect(prompt).toContain("GitHub");
43
+ expect(prompt).toContain("MCP Simulation Context");
44
+ });
27
45
  });
28
46
  // ---------------------------------------------------------------------------
29
47
  // buildEvalSystemPrompt (MCP-aware)
@@ -1 +1 @@
1
- {"version":3,"file":"prompt-builder.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/prompt-builder.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,sBAAsB,CAAC;AAE9B,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,YAAY,GAAG,6CAA6C,CAAC;IAEnE,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,MAAM,GAAG,qBAAqB,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,2DAA2D,CAAC,CAAC;QACtF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,UAAU,GAAG,uEAAuE,CAAC;QAC3F,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG,+CAA+C,CAAC;QACpE,MAAM,MAAM,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,UAAU,GAAG,6FAA6F,CAAC;QACjH,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0EAA0E,EAAE,GAAG,EAAE;QAClF,MAAM,KAAK,GAAG,0CAA0C,CAAC;QACzD,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0CAA0C,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,CAAC,yBAAyB,EAAE,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG;;;;;;;;;;;;;;;;;;;;uBAoBO,CAAC;QAEpB,MAAM,MAAM,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,6CAA6C,CAAC;QAC1D,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,gCAAgC,CAAC;QAC7C,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,wCAAwC,CAAC;QACrD,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB;IAC1E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"prompt-builder.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/prompt-builder.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,sBAAsB,CAAC;AAE9B,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,YAAY,GAAG,6CAA6C,CAAC;IAEnE,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,UAAU,GAAG,4DAA4D,CAAC;QAChF,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;QACnD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,UAAU,GAAG,8CAA8C,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,UAAU,GAAG,+DAA+D,CAAC;QACnF,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,MAAM,GAAG,qBAAqB,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,2DAA2D,CAAC,CAAC;QACtF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,UAAU,GAAG,uEAAuE,CAAC;QAC3F,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG,+CAA+C,CAAC;QACpE,MAAM,MAAM,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,UAAU,GAAG,6FAA6F,CAAC;QACjH,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0EAA0E,EAAE,GAAG,EAAE;QAClF,MAAM,KAAK,GAAG,0CAA0C,CAAC;QACzD,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0CAA0C,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,CAAC,yBAAyB,EAAE,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG;;;;;;;;;;;;;;;;;;;;uBAoBO,CAAC;QAEpB,MAAM,MAAM,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,6CAA6C,CAAC;QAC1D,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,gCAAgC,CAAC;QAC7C,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,wCAAwC,CAAC;QACrD,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB;IAC1E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,21 @@
1
+ import type { LlmClient } from "./llm.js";
2
+ import type { EvalVerdict } from "./verdict.js";
3
+ import type { ActionItems, BenchmarkAssertionResult } from "./benchmark.js";
4
+ interface ComparisonStats {
5
+ passRate: number;
6
+ skillRubricAvg: number;
7
+ baselineRubricAvg: number;
8
+ delta: number;
9
+ }
10
+ interface CaseResult {
11
+ eval_id: number;
12
+ eval_name: string;
13
+ winner: "skill" | "baseline" | "tie";
14
+ skillContentScore: number;
15
+ skillStructureScore: number;
16
+ baselineContentScore: number;
17
+ baselineStructureScore: number;
18
+ assertionResults: BenchmarkAssertionResult[];
19
+ }
20
+ export declare function generateActionItems(client: LlmClient, verdict: EvalVerdict, stats: ComparisonStats, cases: CaseResult[], skillContent: string): Promise<ActionItems>;
21
+ export {};
@@ -0,0 +1,97 @@
1
+ // ---------------------------------------------------------------------------
2
+ // action-items.ts -- generate actionable recommendations from A/B comparison
3
+ // ---------------------------------------------------------------------------
4
+ const SYSTEM_PROMPT = `You are an expert skill evaluator analyzing A/B comparison results.
5
+
6
+ A skill file (SKILL.md) guides an AI assistant's behavior. An A/B comparison runs the same prompts with and without the skill, then blind-scores both outputs on content (1-5) and structure (1-5).
7
+
8
+ Analyze the results and produce concrete, actionable recommendations.
9
+
10
+ Respond with ONLY valid JSON (no code fences, no preamble):
11
+ {
12
+ "recommendation": "keep" | "improve" | "rewrite" | "remove",
13
+ "summary": "<1-2 sentences: what happened and what to do>",
14
+ "weaknesses": ["<specific weakness 1>", ...],
15
+ "strengths": ["<specific strength 1>", ...],
16
+ "suggestedFocus": "<the single most impactful change to make>"
17
+ }
18
+
19
+ Recommendation criteria:
20
+ - "keep": Skill clearly beats baseline — high pass rate (>=80%), consistent wins, delta > +1
21
+ - "improve": Skill shows promise but has fixable weaknesses — moderate pass rate, some wins
22
+ - "rewrite": Skill barely helps or is inconsistent — low pass rate, mixed wins/losses
23
+ - "remove": Skill actively degrades output — baseline consistently wins, negative delta
24
+
25
+ Be specific in weaknesses and strengths — reference actual eval cases and scores, not generic advice.
26
+ Keep suggestedFocus to one concrete, actionable sentence.`;
27
+ function buildUserPrompt(verdict, stats, cases, skillContent) {
28
+ const caseBreakdown = cases.map((c) => {
29
+ const failed = c.assertionResults.filter((a) => !a.pass);
30
+ const failedSection = failed.length > 0
31
+ ? failed.map((a) => ` - FAIL: ${a.text} — ${a.reasoning}`).join("\n")
32
+ : " (all passed)";
33
+ return `### Case: "${c.eval_name}" (eval #${c.eval_id})
34
+ - Winner: ${c.winner}
35
+ - Skill scores: content=${c.skillContentScore}/5, structure=${c.skillStructureScore}/5
36
+ - Baseline scores: content=${c.baselineContentScore}/5, structure=${c.baselineStructureScore}/5
37
+ - Assertions: ${c.assertionResults.filter((a) => a.pass).length}/${c.assertionResults.length} passed
38
+ ${failedSection}`;
39
+ }).join("\n\n");
40
+ // Truncate skill content to avoid token bloat (keep first 2000 chars)
41
+ const truncatedSkill = skillContent.length > 2000
42
+ ? skillContent.slice(0, 2000) + "\n\n[... truncated ...]"
43
+ : skillContent;
44
+ return `## Verdict: ${verdict}
45
+
46
+ ## Comparison Statistics
47
+ - Assertion pass rate: ${Math.round(stats.passRate * 100)}%
48
+ - Skill rubric average: ${stats.skillRubricAvg.toFixed(1)}/5
49
+ - Baseline rubric average: ${stats.baselineRubricAvg.toFixed(1)}/5
50
+ - Delta: ${stats.delta > 0 ? "+" : ""}${stats.delta.toFixed(1)}
51
+
52
+ ## Per-Case Breakdown
53
+ ${caseBreakdown}
54
+
55
+ ## SKILL.md Content
56
+ ${truncatedSkill}
57
+
58
+ Analyze these results and provide your recommendation.`;
59
+ }
60
+ function parseActionItems(raw) {
61
+ let json;
62
+ try {
63
+ const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
64
+ const jsonStr = fenceMatch ? fenceMatch[1] : raw;
65
+ json = JSON.parse(jsonStr.trim());
66
+ }
67
+ catch {
68
+ return {
69
+ recommendation: "improve",
70
+ summary: "Could not parse action items from LLM response.",
71
+ weaknesses: [],
72
+ strengths: [],
73
+ suggestedFocus: "Re-run the comparison for actionable recommendations.",
74
+ };
75
+ }
76
+ const validRecs = new Set(["keep", "improve", "rewrite", "remove"]);
77
+ const rec = typeof json.recommendation === "string" && validRecs.has(json.recommendation)
78
+ ? json.recommendation
79
+ : "improve";
80
+ return {
81
+ recommendation: rec,
82
+ summary: typeof json.summary === "string" ? json.summary : "",
83
+ weaknesses: Array.isArray(json.weaknesses)
84
+ ? json.weaknesses.filter((w) => typeof w === "string")
85
+ : [],
86
+ strengths: Array.isArray(json.strengths)
87
+ ? json.strengths.filter((s) => typeof s === "string")
88
+ : [],
89
+ suggestedFocus: typeof json.suggestedFocus === "string" ? json.suggestedFocus : "",
90
+ };
91
+ }
92
+ export async function generateActionItems(client, verdict, stats, cases, skillContent) {
93
+ const userPrompt = buildUserPrompt(verdict, stats, cases, skillContent);
94
+ const { text } = await client.generate(SYSTEM_PROMPT, userPrompt);
95
+ return parseActionItems(text);
96
+ }
97
+ //# sourceMappingURL=action-items.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"action-items.js","sourceRoot":"","sources":["../../src/eval/action-items.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6EAA6E;AAC7E,8EAA8E;AAwB9E,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;0DAsBoC,CAAC;AAE3D,SAAS,eAAe,CACtB,OAAoB,EACpB,KAAsB,EACtB,KAAmB,EACnB,YAAoB;IAEpB,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACpC,MAAM,MAAM,GAAG,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;YACrC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YACtE,CAAC,CAAC,gBAAgB,CAAC;QACrB,OAAO,cAAc,CAAC,CAAC,SAAS,YAAY,CAAC,CAAC,OAAO;YAC7C,CAAC,CAAC,MAAM;0BACM,CAAC,CAAC,iBAAiB,iBAAiB,CAAC,CAAC,mBAAmB;6BACtD,CAAC,CAAC,oBAAoB,iBAAiB,CAAC,CAAC,sBAAsB;gBAC5E,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,gBAAgB,CAAC,MAAM;EAC1F,aAAa,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,sEAAsE;IACtE,MAAM,cAAc,GAAG,YAAY,CAAC,MAAM,GAAG,IAAI;QAC/C,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,yBAAyB;QACzD,CAAC,CAAC,YAAY,CAAC;IAEjB,OAAO,eAAe,OAAO;;;yBAGN,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;0BAC/B,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC;6BAC5B,KAAK,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC;WACpD,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;;;EAG5D,aAAa;;;EAGb,cAAc;;uDAEuC,CAAC;AACxD,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW;IACnC,IAAI,IAA6B,CAAC;IAClC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QACjD,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,cAAc,EAAE,SAAS;YACzB,OAAO,EAAE,iDAAiD;YAC1D,UAAU,EAAE,EAAE;YACd,SAAS,EAAE,EAAE;YACb,cAAc,EAAE,uDAAuD;SACxE,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC;IACpE,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,cAAc,KAAK,QAAQ,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC;QACvF,CAAC,CAAC,IAAI,CAAC,cAA+C;QACtD,CAAC,CAAC,SAAS,CAAC;IAEd,OAAO;QACL,cAAc,EAAE,GAAG;QACnB,OAAO,EAAE,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;QAC7D,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YACnE,CAAC,CAAC,EAAE;QACN,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YAClE,CAAC,CAAC,EAAE;QACN,cAAc,EAAE,OAAO,IAAI,CAAC,cAAc,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE;KACnF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,MAAiB,EACjB,OAAoB,EACpB,KAAsB,EACtB,KAAmB,EACnB,YAAoB;IAEpB,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC;IACxE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;IAClE,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC"}
@@ -5,7 +5,7 @@ export interface HistorySummary {
5
5
  model: string;
6
6
  skillName: string;
7
7
  passRate: number;
8
- type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
8
+ type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
9
9
  caseCount: number;
10
10
  totalDurationMs: number;
11
11
  totalTokens: number | null;
@@ -14,14 +14,14 @@ export interface HistorySummary {
14
14
  }
15
15
  export interface HistoryFilter {
16
16
  model?: string;
17
- type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
17
+ type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
18
18
  from?: string;
19
19
  to?: string;
20
20
  }
21
21
  export interface CaseHistoryEntry {
22
22
  timestamp: string;
23
23
  model: string;
24
- type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
24
+ type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
25
25
  provider?: string;
26
26
  pass_rate: number;
27
27
  durationMs?: number;
@@ -39,7 +39,7 @@ export interface RegressionEntry {
39
39
  change: "regression" | "improvement";
40
40
  }
41
41
  export declare function writeHistoryEntry(skillDir: string, result: BenchmarkResult & {
42
- type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
42
+ type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
43
43
  }): Promise<string>;
44
44
  export declare function deleteHistoryEntry(skillDir: string, timestamp: string): Promise<boolean>;
45
45
  export declare function listHistory(skillDir: string, filter?: HistoryFilter): Promise<HistorySummary[]>;