vskill 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/eval/__tests__/benchmark.test.js +29 -0
- package/dist/eval/__tests__/benchmark.test.js.map +1 -1
- package/dist/eval/__tests__/comparator.test.js +80 -0
- package/dist/eval/__tests__/comparator.test.js.map +1 -1
- package/dist/eval/__tests__/judge.test.js +75 -1
- package/dist/eval/__tests__/judge.test.js.map +1 -1
- package/dist/eval/__tests__/mcp-detector.test.js +55 -0
- package/dist/eval/__tests__/mcp-detector.test.js.map +1 -1
- package/dist/eval/__tests__/prompt-builder.test.js +18 -0
- package/dist/eval/__tests__/prompt-builder.test.js.map +1 -1
- package/dist/eval/action-items.d.ts +21 -0
- package/dist/eval/action-items.js +97 -0
- package/dist/eval/action-items.js.map +1 -0
- package/dist/eval/benchmark-history.d.ts +4 -4
- package/dist/eval/benchmark-history.js.map +1 -1
- package/dist/eval/benchmark.d.ts +18 -1
- package/dist/eval/benchmark.js.map +1 -1
- package/dist/eval/comparator.d.ts +2 -1
- package/dist/eval/comparator.js +10 -3
- package/dist/eval/comparator.js.map +1 -1
- package/dist/eval/judge.d.ts +3 -1
- package/dist/eval/judge.js +18 -2
- package/dist/eval/judge.js.map +1 -1
- package/dist/eval/llm.js +5 -7
- package/dist/eval/llm.js.map +1 -1
- package/dist/eval/mcp-detector.js +30 -0
- package/dist/eval/mcp-detector.js.map +1 -1
- package/dist/eval/prompt-builder.js +22 -1
- package/dist/eval/prompt-builder.js.map +1 -1
- package/dist/eval/skill-scanner.d.ts +6 -0
- package/dist/eval/skill-scanner.js +55 -9
- package/dist/eval/skill-scanner.js.map +1 -1
- package/dist/eval-server/api-routes.js +37 -1
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-server/skill-create-routes.js +17 -2
- package/dist/eval-server/skill-create-routes.js.map +1 -1
- package/dist/eval-ui/assets/index-BHqTxODT.js +75 -0
- package/dist/eval-ui/index.html +1 -1
- package/dist/utils/__tests__/resolve-binary.integration.test.d.ts +1 -0
- package/dist/utils/__tests__/resolve-binary.integration.test.js +138 -0
- package/dist/utils/__tests__/resolve-binary.integration.test.js.map +1 -0
- package/dist/utils/resolve-binary.js +27 -8
- package/dist/utils/resolve-binary.js.map +1 -1
- package/package.json +1 -1
- package/dist/eval-ui/assets/index-CU39Pi0h.js +0 -75
|
@@ -61,5 +61,34 @@ describe("benchmark", () => {
|
|
|
61
61
|
const result = await readBenchmark(testDir);
|
|
62
62
|
expect(result).toBeNull();
|
|
63
63
|
});
|
|
64
|
+
it("writes and reads BenchmarkResult with mcpSimulation", async () => {
|
|
65
|
+
const benchmarkWithMcp = {
|
|
66
|
+
...SAMPLE_BENCHMARK,
|
|
67
|
+
mcpSimulation: {
|
|
68
|
+
active: true,
|
|
69
|
+
servers: ["Slack", "GitHub"],
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
await writeBenchmark(testDir, benchmarkWithMcp);
|
|
73
|
+
const result = await readBenchmark(testDir);
|
|
74
|
+
expect(result).not.toBeNull();
|
|
75
|
+
expect(result.mcpSimulation).toBeDefined();
|
|
76
|
+
expect(result.mcpSimulation.active).toBe(true);
|
|
77
|
+
expect(result.mcpSimulation.servers).toEqual(["Slack", "GitHub"]);
|
|
78
|
+
});
|
|
79
|
+
it("reads BenchmarkResult without mcpSimulation (backward compat)", async () => {
|
|
80
|
+
// Write a benchmark without mcpSimulation field
|
|
81
|
+
const rawBenchmark = {
|
|
82
|
+
timestamp: "2026-03-01T00:00:00.000Z",
|
|
83
|
+
model: "claude-sonnet-4-6",
|
|
84
|
+
skill_name: "old-skill",
|
|
85
|
+
cases: [],
|
|
86
|
+
};
|
|
87
|
+
writeFileSync(join(testDir, "evals", "benchmark.json"), JSON.stringify(rawBenchmark));
|
|
88
|
+
const result = await readBenchmark(testDir);
|
|
89
|
+
expect(result).not.toBeNull();
|
|
90
|
+
expect(result.mcpSimulation).toBeUndefined();
|
|
91
|
+
expect(result.skill_name).toBe("old-skill");
|
|
92
|
+
});
|
|
64
93
|
});
|
|
65
94
|
//# sourceMappingURL=benchmark.test.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"benchmark.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGhE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,MAAM,gBAAgB,GAAoB;IACxC,SAAS,EAAE,0BAA0B;IACrC,KAAK,EAAE,mBAAmB;IAC1B,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,GAAG;YACd,UAAU,EAAE;gBACV;oBACE,EAAE,EAAE,IAAI;oBACR,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,IAAI;oBACV,SAAS,EAAE,YAAY;iBACxB;aACF;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IACzB,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,gBAAgB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACvD,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAChD,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,CACjC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"benchmark.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/benchmark.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAGhE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,MAAM,gBAAgB,GAAoB;IACxC,SAAS,EAAE,0BAA0B;IACrC,KAAK,EAAE,mBAAmB;IAC1B,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,GAAG;YACd,UAAU,EAAE;gBACV;oBACE,EAAE,EAAE,IAAI;oBACR,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,IAAI;oBACV,SAAS,EAAE,YAAY;iBACxB;aACF;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;IACzB,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,gBAAgB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACvD,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAChD,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACtC,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,CACjC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,gBAAgB,GAAoB;YACxC,GAAG,gBAAgB;YACnB,aAAa,EAAE;gBACb,MAAM,EAAE,IAAI;gBACZ,OAAO,EAAE,CAAC,OAAO,EAAE,QAAQ,CAAC;aAC7B;SACF,CAAC;QAEF,MAAM,cAAc,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;QAC5C,MAAM,CAAC,MAAO,CAAC,aAAc,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,MAAM,CAAC,MAAO,CAAC,aAAc,CAAC,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,KAAK,IAAI,EAAE;QAC7E,gDAAgD;QAChD,MAAM,YAAY,GAAG;YACnB,SAAS,EAAE,0BAA0B;YACrC,KAAK,EAAE,mBAAmB;YAC1B,UAAU,EAAE,WAAW;YACvB,KAAK,EAAE,EAAE;SACV,CAAC;QACF,aAAa,CACX,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EACxC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,CAC7B,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAO,CAAC,aAAa,CAAC,CAAC,aAAa,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -136,6 +136,86 @@ describe("runComparison", () => {
|
|
|
136
136
|
vi.restoreAllMocks();
|
|
137
137
|
});
|
|
138
138
|
});
|
|
139
|
+
describe("scoreComparison with MCP deps", () => {
|
|
140
|
+
it("uses standard prompt when no MCP deps", async () => {
|
|
141
|
+
const client = mockClient([
|
|
142
|
+
JSON.stringify({
|
|
143
|
+
content_score_a: 3,
|
|
144
|
+
structure_score_a: 3,
|
|
145
|
+
content_score_b: 3,
|
|
146
|
+
structure_score_b: 3,
|
|
147
|
+
winner: "tie",
|
|
148
|
+
}),
|
|
149
|
+
]);
|
|
150
|
+
await scoreComparison("A", "B", "prompt", client);
|
|
151
|
+
const systemPrompt = client.generate.mock.calls[0][0];
|
|
152
|
+
expect(systemPrompt).not.toContain("SIMULATED MCP");
|
|
153
|
+
});
|
|
154
|
+
it("augments prompt when MCP deps present", async () => {
|
|
155
|
+
const client = mockClient([
|
|
156
|
+
JSON.stringify({
|
|
157
|
+
content_score_a: 3,
|
|
158
|
+
structure_score_a: 3,
|
|
159
|
+
content_score_b: 3,
|
|
160
|
+
structure_score_b: 3,
|
|
161
|
+
winner: "tie",
|
|
162
|
+
}),
|
|
163
|
+
]);
|
|
164
|
+
const mcpDeps = [
|
|
165
|
+
{
|
|
166
|
+
server: "Slack",
|
|
167
|
+
url: "https://mcp.slack.com/mcp",
|
|
168
|
+
transport: "http",
|
|
169
|
+
matchedTools: ["slack_send_message"],
|
|
170
|
+
configSnippet: "{}",
|
|
171
|
+
},
|
|
172
|
+
];
|
|
173
|
+
await scoreComparison("A", "B", "prompt", client, mcpDeps);
|
|
174
|
+
const systemPrompt = client.generate.mock.calls[0][0];
|
|
175
|
+
expect(systemPrompt).toContain("SIMULATED MCP");
|
|
176
|
+
expect(systemPrompt).toContain("Slack");
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
describe("runComparison with MCP auto-detection", () => {
|
|
180
|
+
it("auto-detects MCP deps from skill content and augments comparison", async () => {
|
|
181
|
+
const client = mockClient([
|
|
182
|
+
"skill output",
|
|
183
|
+
"baseline output",
|
|
184
|
+
JSON.stringify({
|
|
185
|
+
content_score_a: 3,
|
|
186
|
+
structure_score_a: 3,
|
|
187
|
+
content_score_b: 3,
|
|
188
|
+
structure_score_b: 3,
|
|
189
|
+
winner: "tie",
|
|
190
|
+
}),
|
|
191
|
+
]);
|
|
192
|
+
vi.spyOn(Math, "random").mockReturnValue(0.3);
|
|
193
|
+
await runComparison("prompt", "Use slack_send_message to send messages.", client);
|
|
194
|
+
// The third call is the scoring call - check its system prompt
|
|
195
|
+
const scoringCall = client.generate.mock.calls[2];
|
|
196
|
+
expect(scoringCall[0]).toContain("SIMULATED MCP");
|
|
197
|
+
expect(scoringCall[0]).toContain("Slack");
|
|
198
|
+
vi.restoreAllMocks();
|
|
199
|
+
});
|
|
200
|
+
it("does not augment comparison for non-MCP skills", async () => {
|
|
201
|
+
const client = mockClient([
|
|
202
|
+
"skill output",
|
|
203
|
+
"baseline output",
|
|
204
|
+
JSON.stringify({
|
|
205
|
+
content_score_a: 3,
|
|
206
|
+
structure_score_a: 3,
|
|
207
|
+
content_score_b: 3,
|
|
208
|
+
structure_score_b: 3,
|
|
209
|
+
winner: "tie",
|
|
210
|
+
}),
|
|
211
|
+
]);
|
|
212
|
+
vi.spyOn(Math, "random").mockReturnValue(0.3);
|
|
213
|
+
await runComparison("prompt", "Plain text skill without MCP.", client);
|
|
214
|
+
const scoringCall = client.generate.mock.calls[2];
|
|
215
|
+
expect(scoringCall[0]).not.toContain("SIMULATED MCP");
|
|
216
|
+
vi.restoreAllMocks();
|
|
217
|
+
});
|
|
218
|
+
});
|
|
139
219
|
describe("generateComparisonOutputs with onProgress", () => {
|
|
140
220
|
it("calls onProgress with generating_skill before first LLM call", async () => {
|
|
141
221
|
const client = mockClient(["skill response", "baseline response"]);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"comparator.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/comparator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,eAAe,EACf,aAAa,GACd,MAAM,kBAAkB,CAAC;AAG1B,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEzF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAEjD,0CAA0C;QAC1C,MAAM,SAAS,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEhD,gCAAgC;QAChC,MAAM,UAAU,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;gBAChB,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC/E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,6HAA6H;SAC9H,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,EAAE;gBACrB,eAAe,EAAE,CAAC,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,oEAAoE;QACpE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,mBAAmB;YACnB,sBAAsB;YACtB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;gBACf,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,wCAAwC;QACxC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;QAEpE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;QAE3E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC3D,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,WAAW;YACX,cAAc;YACd,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,qBAAqB;QACrB,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACrD,6CAA6C;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,8BAA8B;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,2CAA2C,EAAE,GAAG,EAAE;IACzD,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,kBAAkB,EAAE,4BAA4B,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,qBAAqB,EAAE,+BAA+B,CAAC,CAAC;IACxG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QAEnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEjF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,aAAa,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAEnE,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC7D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAChE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,OAAO;YACP,UAAU;YACV,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAE9D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEzC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"comparator.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/comparator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,eAAe,EACf,aAAa,GACd,MAAM,kBAAkB,CAAC;AAI1B,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,IAAI,GAAG,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,CAAC;YAC1C,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEzF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAEjD,0CAA0C;QAC1C,MAAM,SAAS,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEhD,gCAAgC;QAChC,MAAM,UAAU,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;gBAChB,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC/E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,6HAA6H;SAC9H,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,EAAE;gBACrB,eAAe,EAAE,CAAC,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,oEAAoE;QACpE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,mBAAmB;YACnB,sBAAsB;YACtB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;gBACf,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,wCAAwC;QACxC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;QAEpE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;QAE3E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC3D,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,WAAW;YACX,cAAc;YACd,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,qBAAqB;QACrB,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACrD,6CAA6C;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,8BAA8B;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAChD,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,uCAAuC,EAAE,GAAG,EAAE;IACrD,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,aAAa,CAAC,QAAQ,EAAE,0CAA0C,EAAE,MAAM,CAAC,CAAC;QAElF,+DAA+D;QAC/D,MAAM,WAAW,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAClD,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAE1C,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,aAAa,CAAC,QAAQ,EAAE,+BAA+B,EAAE,MAAM,CAAC,CAAC;QAEvE,MAAM,WAAW,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3D,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEtD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,2CAA2C,EAAE,GAAG,EAAE;IACzD,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,kBAAkB,EAAE,4BAA4B,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,KAAK,IAAI,EAAE;QAChF,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,uBAAuB,CAAC,CAAC,EAAE,qBAAqB,EAAE,+BAA+B,CAAC,CAAC;IACxG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAE9E,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QAEnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEjF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,+BAA+B,EAAE,GAAG,EAAE;IAC7C,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,cAAc;YACd,iBAAiB;YACjB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAC9C,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAE3B,MAAM,aAAa,CAAC,QAAQ,EAAE,eAAe,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAEnE,MAAM,CAAC,UAAU,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC7D,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAChE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,OAAO;YACP,UAAU;YACV,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,eAAe,EAAE,CAAC,EAAE,iBAAiB,EAAE,CAAC;gBACxC,MAAM,EAAE,KAAK;aACd,CAAC;SACH,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAE9D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEzC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { describe, it, expect, vi } from "vitest";
|
|
2
|
-
import { judgeAssertion } from "../judge.js";
|
|
2
|
+
import { judgeAssertion, buildJudgeSystemPrompt } from "../judge.js";
|
|
3
3
|
// ---------------------------------------------------------------------------
|
|
4
4
|
// Helpers
|
|
5
5
|
// ---------------------------------------------------------------------------
|
|
@@ -44,5 +44,79 @@ describe("judgeAssertion", () => {
|
|
|
44
44
|
const result = await judgeAssertion("some output", ASSERTION, client);
|
|
45
45
|
expect(result.pass).toBe(true);
|
|
46
46
|
});
|
|
47
|
+
it("uses standard prompt when mcpDeps not provided", async () => {
|
|
48
|
+
const client = mockClient(JSON.stringify({ pass: true, reasoning: "ok" }));
|
|
49
|
+
await judgeAssertion("output", ASSERTION, client);
|
|
50
|
+
const systemPrompt = client.generate.mock.calls[0][0];
|
|
51
|
+
expect(systemPrompt).toContain("binary assertion evaluator");
|
|
52
|
+
expect(systemPrompt).not.toContain("SIMULATION MODE");
|
|
53
|
+
});
|
|
54
|
+
it("uses MCP-augmented prompt when mcpDeps provided", async () => {
|
|
55
|
+
const client = mockClient(JSON.stringify({ pass: true, reasoning: "simulation valid" }));
|
|
56
|
+
const mcpDeps = [
|
|
57
|
+
{
|
|
58
|
+
server: "Slack",
|
|
59
|
+
url: "https://mcp.slack.com/mcp",
|
|
60
|
+
transport: "http",
|
|
61
|
+
matchedTools: ["slack_send_message"],
|
|
62
|
+
configSnippet: "{}",
|
|
63
|
+
},
|
|
64
|
+
];
|
|
65
|
+
await judgeAssertion("output", ASSERTION, client, mcpDeps);
|
|
66
|
+
const systemPrompt = client.generate.mock.calls[0][0];
|
|
67
|
+
expect(systemPrompt).toContain("SIMULATION MODE");
|
|
68
|
+
expect(systemPrompt).toContain("Slack");
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// buildJudgeSystemPrompt
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
describe("buildJudgeSystemPrompt", () => {
|
|
75
|
+
it("returns standard prompt when no MCP deps", () => {
|
|
76
|
+
const prompt = buildJudgeSystemPrompt();
|
|
77
|
+
expect(prompt).toContain("binary assertion evaluator");
|
|
78
|
+
expect(prompt).not.toContain("SIMULATION MODE");
|
|
79
|
+
});
|
|
80
|
+
it("returns standard prompt when mcpDeps is empty", () => {
|
|
81
|
+
const prompt = buildJudgeSystemPrompt([]);
|
|
82
|
+
expect(prompt).toContain("binary assertion evaluator");
|
|
83
|
+
expect(prompt).not.toContain("SIMULATION MODE");
|
|
84
|
+
});
|
|
85
|
+
it("returns augmented prompt with MCP deps", () => {
|
|
86
|
+
const mcpDeps = [
|
|
87
|
+
{
|
|
88
|
+
server: "Slack",
|
|
89
|
+
url: "https://mcp.slack.com/mcp",
|
|
90
|
+
transport: "http",
|
|
91
|
+
matchedTools: ["slack_send_message"],
|
|
92
|
+
configSnippet: "{}",
|
|
93
|
+
},
|
|
94
|
+
];
|
|
95
|
+
const prompt = buildJudgeSystemPrompt(mcpDeps);
|
|
96
|
+
expect(prompt).toContain("SIMULATION MODE");
|
|
97
|
+
expect(prompt).toContain("Slack");
|
|
98
|
+
expect(prompt).toContain("binary assertion evaluator");
|
|
99
|
+
});
|
|
100
|
+
it("lists all simulated servers", () => {
|
|
101
|
+
const mcpDeps = [
|
|
102
|
+
{
|
|
103
|
+
server: "Slack",
|
|
104
|
+
url: "https://mcp.slack.com/mcp",
|
|
105
|
+
transport: "http",
|
|
106
|
+
matchedTools: ["slack_send_message"],
|
|
107
|
+
configSnippet: "{}",
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
server: "GitHub",
|
|
111
|
+
url: "https://api.githubcopilot.com/mcp/",
|
|
112
|
+
transport: "http",
|
|
113
|
+
matchedTools: ["github_create_pr"],
|
|
114
|
+
configSnippet: "{}",
|
|
115
|
+
},
|
|
116
|
+
];
|
|
117
|
+
const prompt = buildJudgeSystemPrompt(mcpDeps);
|
|
118
|
+
expect(prompt).toContain("Slack");
|
|
119
|
+
expect(prompt).toContain("GitHub");
|
|
120
|
+
});
|
|
47
121
|
});
|
|
48
122
|
//# sourceMappingURL=judge.test.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"judge.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/judge.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAc,MAAM,QAAQ,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"judge.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/judge.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAc,MAAM,QAAQ,CAAC;AAG9D,OAAO,EAAE,cAAc,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAC;AAGrE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAY;IAC9B,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB;IAClC,OAAO,EAAE,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;AAC5F,CAAC;AAED,MAAM,SAAS,GAAc;IAC3B,EAAE,EAAE,UAAU;IACd,IAAI,EAAE,6BAA6B;IACnC,IAAI,EAAE,SAAS;CAChB,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,2BAA2B,EAAE,CAAC,CACvE,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CACjC,6CAA6C,EAC7C,SAAS,EACT,MAAM,CACP,CAAC;QAEF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QAC3D,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,KAAK;YACX,SAAS,EAAE,8BAA8B;SAC1C,CAAC,CACH,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAEtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC;QAE9C,MAAM,MAAM,CACV,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CACjD,CAAC,OAAO,CAAC,OAAO,CAAC,uBAAuB,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,UAAU,CACvB,yDAAyD,CAC1D,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACtE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAChD,CAAC;QAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAElD,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QAC7D,MAAM,CAAC,YAAY,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,MAAM,GAAG,UAAU,CACvB,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAC9D,CAAC;QAEF,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAE3D,MAAM,YAAY,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAClD,MAAM,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,MAAM,GAAG,sBAAsB,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,MAAM,GAAG,sBAAsB,CAAC,EAAE,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,4BAA4B,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAAoB;YAC/B;gBACE,MAAM,EAAE,OAAO;gBACf,GAAG,EAAE,2BAA2B;gBAChC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,oBAAoB,CAAC;gBACpC,aAAa,EAAE,IAAI;aACpB;YACD;gBACE,MAAM,EAAE,QAAQ;gBAChB,GAAG,EAAE,oCAAoC;gBACzC,SAAS,EAAE,MAAM;gBACjB,YAAY,EAAE,CAAC,kBAAkB,CAAC;gBAClC,aAAa,EAAE,IAAI;aACpB;SACF,CAAC;QAEF,MAAM,MAAM,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -64,6 +64,61 @@ This skill does basic text processing.`;
|
|
|
64
64
|
expect(parsed.mcpServers.slack).toBeDefined();
|
|
65
65
|
expect(parsed.mcpServers.slack.url).toBe("https://mcp.slack.com/mcp");
|
|
66
66
|
});
|
|
67
|
+
it("detects Notion tool patterns", () => {
|
|
68
|
+
const content = `Use notion_create_page and notion_search to manage Notion workspace.`;
|
|
69
|
+
const deps = detectMcpDependencies(content);
|
|
70
|
+
expect(deps).toHaveLength(1);
|
|
71
|
+
expect(deps[0].server).toBe("Notion");
|
|
72
|
+
expect(deps[0].url).toBe("https://mcp.notion.com/mcp");
|
|
73
|
+
expect(deps[0].transport).toBe("http");
|
|
74
|
+
expect(deps[0].matchedTools).toContain("notion_create_page");
|
|
75
|
+
expect(deps[0].matchedTools).toContain("notion_search");
|
|
76
|
+
});
|
|
77
|
+
it("detects Jira tool patterns", () => {
|
|
78
|
+
const content = `Use jira_create_issue to file bugs in Jira.`;
|
|
79
|
+
const deps = detectMcpDependencies(content);
|
|
80
|
+
expect(deps).toHaveLength(1);
|
|
81
|
+
expect(deps[0].server).toBe("Jira");
|
|
82
|
+
expect(deps[0].url).toBe("https://mcp.atlassian.com/jira/mcp");
|
|
83
|
+
expect(deps[0].transport).toBe("http");
|
|
84
|
+
expect(deps[0].matchedTools).toContain("jira_create_issue");
|
|
85
|
+
});
|
|
86
|
+
it("detects Confluence tool patterns", () => {
|
|
87
|
+
const content = `Use confluence_create_page to write documentation.`;
|
|
88
|
+
const deps = detectMcpDependencies(content);
|
|
89
|
+
expect(deps).toHaveLength(1);
|
|
90
|
+
expect(deps[0].server).toBe("Confluence");
|
|
91
|
+
expect(deps[0].url).toBe("https://mcp.atlassian.com/confluence/mcp");
|
|
92
|
+
expect(deps[0].transport).toBe("http");
|
|
93
|
+
expect(deps[0].matchedTools).toContain("confluence_create_page");
|
|
94
|
+
});
|
|
95
|
+
it("detects Figma tool patterns", () => {
|
|
96
|
+
const content = `Use figma_get_file to inspect design files.`;
|
|
97
|
+
const deps = detectMcpDependencies(content);
|
|
98
|
+
expect(deps).toHaveLength(1);
|
|
99
|
+
expect(deps[0].server).toBe("Figma");
|
|
100
|
+
expect(deps[0].url).toBe("https://mcp.figma.com/mcp");
|
|
101
|
+
expect(deps[0].transport).toBe("http");
|
|
102
|
+
expect(deps[0].matchedTools).toContain("figma_get_file");
|
|
103
|
+
});
|
|
104
|
+
it("detects Sentry tool patterns", () => {
|
|
105
|
+
const content = `Use sentry_list_issues to monitor errors.`;
|
|
106
|
+
const deps = detectMcpDependencies(content);
|
|
107
|
+
expect(deps).toHaveLength(1);
|
|
108
|
+
expect(deps[0].server).toBe("Sentry");
|
|
109
|
+
expect(deps[0].url).toBe("https://mcp.sentry.dev/mcp");
|
|
110
|
+
expect(deps[0].transport).toBe("http");
|
|
111
|
+
expect(deps[0].matchedTools).toContain("sentry_list_issues");
|
|
112
|
+
});
|
|
113
|
+
it("detects multiple new servers simultaneously", () => {
|
|
114
|
+
const content = `Use notion_create_page for docs, jira_create_issue for bugs, and figma_get_file for design.`;
|
|
115
|
+
const deps = detectMcpDependencies(content);
|
|
116
|
+
expect(deps).toHaveLength(3);
|
|
117
|
+
const servers = deps.map((d) => d.server);
|
|
118
|
+
expect(servers).toContain("Notion");
|
|
119
|
+
expect(servers).toContain("Jira");
|
|
120
|
+
expect(servers).toContain("Figma");
|
|
121
|
+
});
|
|
67
122
|
it("deduplicates tools from frontmatter and body", () => {
|
|
68
123
|
const content = `---
|
|
69
124
|
allowed-tools: [slack_send_message]
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mcp-detector.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/mcp-detector.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAEpF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;kEAG8C,CAAC;QAE/D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,iEAAiE,CAAC;QAClF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,OAAO,GAAG,qDAAqD,CAAC;QACtE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC3D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,kDAAkD,CAAC;QACnE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;;gBAIJ,CAAC;QAEb,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;uCAGmB,CAAC;QAEpC,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,OAAO,GAAG,uCAAuC,CAAC;QACxD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;sDAGkC,CAAC;QAEnD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC;QACxC,6CAA6C;QAC7C,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,OAAO,GAAG;;;8CAG0B,CAAC;QAE3C,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG,yDAAyD,CAAC;QAC1E,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,OAAO,GAAG;;;cAGN,CAAC;QAEX,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG;;;sBAGE,CAAC;QAEnB,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG;;;kCAGc,CAAC;QAE/B,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,6BAA6B;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"mcp-detector.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/mcp-detector.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,qBAAqB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAEpF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;kEAG8C,CAAC;QAE/D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,qBAAqB,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,iEAAiE,CAAC;QAClF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,OAAO,GAAG,qDAAqD,CAAC;QACtE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC3D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,kDAAkD,CAAC;QACnE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;;gBAIJ,CAAC;QAEb,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,OAAO,GAAG;;;uCAGmB,CAAC;QAEpC,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,OAAO,GAAG,uCAAuC,CAAC;QACxD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IACxE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,sEAAsE,CAAC;QACvF,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,MAAM,OAAO,GAAG,6CAA6C,CAAC;QAC9D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;QAC/D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,mBAAmB,CAAC,CAAC;IAC9D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,OAAO,GAAG,oDAAoD,CAAC;QACrE,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,0CAA0C,CAAC,CAAC;QACrE,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAAG,6CAA6C,CAAC;QAC9D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACtD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG,2CAA2C,CAAC;QAC5D,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACtC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;IAC/D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAG,6FAA6F,CAAC;QAC9G,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAC1C,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACpC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,OAAO,GAAG;;;sDAGkC,CAAC;QAEnD,MAAM,IAAI,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC;QACxC,6CAA6C;QAC7C,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,oBAAoB,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yBAAyB,EAAE,GAAG,EAAE;IACvC,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,OAAO,GAAG;;;8CAG0B,CAAC;QAE3C,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG,yDAAyD,CAAC;QAC1E,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,OAAO,GAAG;;;cAGN,CAAC;QAEX,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,OAAO,GAAG;;;sBAGE,CAAC;QAEnB,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAG;;;kCAGc,CAAC;QAE/B,MAAM,IAAI,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,6BAA6B;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -24,6 +24,24 @@ describe("buildEvalInitPrompt", () => {
|
|
|
24
24
|
expect(prompt).toContain("Best Practices");
|
|
25
25
|
expect(prompt).toContain("objectively verifiable");
|
|
26
26
|
});
|
|
27
|
+
it("includes MCP context for Slack skill", () => {
|
|
28
|
+
const slackSkill = "Use slack_send_message to post messages to Slack channels.";
|
|
29
|
+
const prompt = buildEvalInitPrompt(slackSkill);
|
|
30
|
+
expect(prompt).toContain("MCP Simulation Context");
|
|
31
|
+
expect(prompt).toContain("Slack");
|
|
32
|
+
});
|
|
33
|
+
it("is unchanged for non-MCP skill", () => {
|
|
34
|
+
const plainSkill = "# My Skill\nThis skill does text processing.";
|
|
35
|
+
const prompt = buildEvalInitPrompt(plainSkill);
|
|
36
|
+
expect(prompt).not.toContain("MCP Simulation Context");
|
|
37
|
+
});
|
|
38
|
+
it("lists all detected MCP servers", () => {
|
|
39
|
+
const multiSkill = "Use slack_send_message for chat and github_create_pr for PRs.";
|
|
40
|
+
const prompt = buildEvalInitPrompt(multiSkill);
|
|
41
|
+
expect(prompt).toContain("Slack");
|
|
42
|
+
expect(prompt).toContain("GitHub");
|
|
43
|
+
expect(prompt).toContain("MCP Simulation Context");
|
|
44
|
+
});
|
|
27
45
|
});
|
|
28
46
|
// ---------------------------------------------------------------------------
|
|
29
47
|
// buildEvalSystemPrompt (MCP-aware)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt-builder.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/prompt-builder.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,sBAAsB,CAAC;AAE9B,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,YAAY,GAAG,6CAA6C,CAAC;IAEnE,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,MAAM,GAAG,qBAAqB,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,2DAA2D,CAAC,CAAC;QACtF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,UAAU,GAAG,uEAAuE,CAAC;QAC3F,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG,+CAA+C,CAAC;QACpE,MAAM,MAAM,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,UAAU,GAAG,6FAA6F,CAAC;QACjH,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0EAA0E,EAAE,GAAG,EAAE;QAClF,MAAM,KAAK,GAAG,0CAA0C,CAAC;QACzD,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0CAA0C,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,CAAC,yBAAyB,EAAE,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG;;;;;;;;;;;;;;;;;;;;uBAoBO,CAAC;QAEpB,MAAM,MAAM,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,6CAA6C,CAAC;QAC1D,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,gCAAgC,CAAC;QAC7C,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,wCAAwC,CAAC;QACrD,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB;IAC1E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
1
|
+
{"version":3,"file":"prompt-builder.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/prompt-builder.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EACL,mBAAmB,EACnB,qBAAqB,EACrB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,sBAAsB,CAAC;AAE9B,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,YAAY,GAAG,6CAA6C,CAAC;IAEnE,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;QACzC,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,UAAU,GAAG,4DAA4D,CAAC;QAChF,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;QACnD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,UAAU,GAAG,8CAA8C,CAAC;QAClE,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;QACxC,MAAM,UAAU,GAAG,+DAA+D,CAAC;QACnF,MAAM,MAAM,GAAG,mBAAmB,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,wBAAwB,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,MAAM,GAAG,qBAAqB,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,2DAA2D,CAAC,CAAC;QACtF,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,GAAG,EAAE;QACvD,MAAM,UAAU,GAAG,uEAAuE,CAAC;QAC3F,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,MAAM,WAAW,GAAG,+CAA+C,CAAC;QACpE,MAAM,MAAM,GAAG,qBAAqB,CAAC,WAAW,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;QAC5D,MAAM,UAAU,GAAG,6FAA6F,CAAC;QACjH,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,CAAC,qBAAqB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC5E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0EAA0E,EAAE,GAAG,EAAE;QAClF,MAAM,KAAK,GAAG,0CAA0C,CAAC;QACzD,MAAM,MAAM,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,0CAA0C,CAAC,CAAC;IACvE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAE9E,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;QACjC,MAAM,CAAC,yBAAyB,EAAE,CAAC,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG;;;;;;;;;;;;;;;;;;;;uBAoBO,CAAC;QAEpB,MAAM,MAAM,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;QAC5C,MAAM,GAAG,GAAG,6CAA6C,CAAC;QAC1D,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,GAAG,GAAG,gCAAgC,CAAC;QAC7C,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,GAAG,GAAG,wCAAwC,CAAC;QACrD,MAAM,CAAC,GAAG,EAAE,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB;IAC1E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { LlmClient } from "./llm.js";
|
|
2
|
+
import type { EvalVerdict } from "./verdict.js";
|
|
3
|
+
import type { ActionItems, BenchmarkAssertionResult } from "./benchmark.js";
|
|
4
|
+
interface ComparisonStats {
|
|
5
|
+
passRate: number;
|
|
6
|
+
skillRubricAvg: number;
|
|
7
|
+
baselineRubricAvg: number;
|
|
8
|
+
delta: number;
|
|
9
|
+
}
|
|
10
|
+
interface CaseResult {
|
|
11
|
+
eval_id: number;
|
|
12
|
+
eval_name: string;
|
|
13
|
+
winner: "skill" | "baseline" | "tie";
|
|
14
|
+
skillContentScore: number;
|
|
15
|
+
skillStructureScore: number;
|
|
16
|
+
baselineContentScore: number;
|
|
17
|
+
baselineStructureScore: number;
|
|
18
|
+
assertionResults: BenchmarkAssertionResult[];
|
|
19
|
+
}
|
|
20
|
+
export declare function generateActionItems(client: LlmClient, verdict: EvalVerdict, stats: ComparisonStats, cases: CaseResult[], skillContent: string): Promise<ActionItems>;
|
|
21
|
+
export {};
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// action-items.ts -- generate actionable recommendations from A/B comparison
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
const SYSTEM_PROMPT = `You are an expert skill evaluator analyzing A/B comparison results.
|
|
5
|
+
|
|
6
|
+
A skill file (SKILL.md) guides an AI assistant's behavior. An A/B comparison runs the same prompts with and without the skill, then blind-scores both outputs on content (1-5) and structure (1-5).
|
|
7
|
+
|
|
8
|
+
Analyze the results and produce concrete, actionable recommendations.
|
|
9
|
+
|
|
10
|
+
Respond with ONLY valid JSON (no code fences, no preamble):
|
|
11
|
+
{
|
|
12
|
+
"recommendation": "keep" | "improve" | "rewrite" | "remove",
|
|
13
|
+
"summary": "<1-2 sentences: what happened and what to do>",
|
|
14
|
+
"weaknesses": ["<specific weakness 1>", ...],
|
|
15
|
+
"strengths": ["<specific strength 1>", ...],
|
|
16
|
+
"suggestedFocus": "<the single most impactful change to make>"
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
Recommendation criteria:
|
|
20
|
+
- "keep": Skill clearly beats baseline — high pass rate (>=80%), consistent wins, delta > +1
|
|
21
|
+
- "improve": Skill shows promise but has fixable weaknesses — moderate pass rate, some wins
|
|
22
|
+
- "rewrite": Skill barely helps or is inconsistent — low pass rate, mixed wins/losses
|
|
23
|
+
- "remove": Skill actively degrades output — baseline consistently wins, negative delta
|
|
24
|
+
|
|
25
|
+
Be specific in weaknesses and strengths — reference actual eval cases and scores, not generic advice.
|
|
26
|
+
Keep suggestedFocus to one concrete, actionable sentence.`;
|
|
27
|
+
function buildUserPrompt(verdict, stats, cases, skillContent) {
|
|
28
|
+
const caseBreakdown = cases.map((c) => {
|
|
29
|
+
const failed = c.assertionResults.filter((a) => !a.pass);
|
|
30
|
+
const failedSection = failed.length > 0
|
|
31
|
+
? failed.map((a) => ` - FAIL: ${a.text} — ${a.reasoning}`).join("\n")
|
|
32
|
+
: " (all passed)";
|
|
33
|
+
return `### Case: "${c.eval_name}" (eval #${c.eval_id})
|
|
34
|
+
- Winner: ${c.winner}
|
|
35
|
+
- Skill scores: content=${c.skillContentScore}/5, structure=${c.skillStructureScore}/5
|
|
36
|
+
- Baseline scores: content=${c.baselineContentScore}/5, structure=${c.baselineStructureScore}/5
|
|
37
|
+
- Assertions: ${c.assertionResults.filter((a) => a.pass).length}/${c.assertionResults.length} passed
|
|
38
|
+
${failedSection}`;
|
|
39
|
+
}).join("\n\n");
|
|
40
|
+
// Truncate skill content to avoid token bloat (keep first 2000 chars)
|
|
41
|
+
const truncatedSkill = skillContent.length > 2000
|
|
42
|
+
? skillContent.slice(0, 2000) + "\n\n[... truncated ...]"
|
|
43
|
+
: skillContent;
|
|
44
|
+
return `## Verdict: ${verdict}
|
|
45
|
+
|
|
46
|
+
## Comparison Statistics
|
|
47
|
+
- Assertion pass rate: ${Math.round(stats.passRate * 100)}%
|
|
48
|
+
- Skill rubric average: ${stats.skillRubricAvg.toFixed(1)}/5
|
|
49
|
+
- Baseline rubric average: ${stats.baselineRubricAvg.toFixed(1)}/5
|
|
50
|
+
- Delta: ${stats.delta > 0 ? "+" : ""}${stats.delta.toFixed(1)}
|
|
51
|
+
|
|
52
|
+
## Per-Case Breakdown
|
|
53
|
+
${caseBreakdown}
|
|
54
|
+
|
|
55
|
+
## SKILL.md Content
|
|
56
|
+
${truncatedSkill}
|
|
57
|
+
|
|
58
|
+
Analyze these results and provide your recommendation.`;
|
|
59
|
+
}
|
|
60
|
+
function parseActionItems(raw) {
|
|
61
|
+
let json;
|
|
62
|
+
try {
|
|
63
|
+
const fenceMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
64
|
+
const jsonStr = fenceMatch ? fenceMatch[1] : raw;
|
|
65
|
+
json = JSON.parse(jsonStr.trim());
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
return {
|
|
69
|
+
recommendation: "improve",
|
|
70
|
+
summary: "Could not parse action items from LLM response.",
|
|
71
|
+
weaknesses: [],
|
|
72
|
+
strengths: [],
|
|
73
|
+
suggestedFocus: "Re-run the comparison for actionable recommendations.",
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
const validRecs = new Set(["keep", "improve", "rewrite", "remove"]);
|
|
77
|
+
const rec = typeof json.recommendation === "string" && validRecs.has(json.recommendation)
|
|
78
|
+
? json.recommendation
|
|
79
|
+
: "improve";
|
|
80
|
+
return {
|
|
81
|
+
recommendation: rec,
|
|
82
|
+
summary: typeof json.summary === "string" ? json.summary : "",
|
|
83
|
+
weaknesses: Array.isArray(json.weaknesses)
|
|
84
|
+
? json.weaknesses.filter((w) => typeof w === "string")
|
|
85
|
+
: [],
|
|
86
|
+
strengths: Array.isArray(json.strengths)
|
|
87
|
+
? json.strengths.filter((s) => typeof s === "string")
|
|
88
|
+
: [],
|
|
89
|
+
suggestedFocus: typeof json.suggestedFocus === "string" ? json.suggestedFocus : "",
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
export async function generateActionItems(client, verdict, stats, cases, skillContent) {
|
|
93
|
+
const userPrompt = buildUserPrompt(verdict, stats, cases, skillContent);
|
|
94
|
+
const { text } = await client.generate(SYSTEM_PROMPT, userPrompt);
|
|
95
|
+
return parseActionItems(text);
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=action-items.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"action-items.js","sourceRoot":"","sources":["../../src/eval/action-items.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6EAA6E;AAC7E,8EAA8E;AAwB9E,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;;;;;;;0DAsBoC,CAAC;AAE3D,SAAS,eAAe,CACtB,OAAoB,EACpB,KAAsB,EACtB,KAAmB,EACnB,YAAoB;IAEpB,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACpC,MAAM,MAAM,GAAG,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzD,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC;YACrC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;YACtE,CAAC,CAAC,gBAAgB,CAAC;QACrB,OAAO,cAAc,CAAC,CAAC,SAAS,YAAY,CAAC,CAAC,OAAO;YAC7C,CAAC,CAAC,MAAM;0BACM,CAAC,CAAC,iBAAiB,iBAAiB,CAAC,CAAC,mBAAmB;6BACtD,CAAC,CAAC,oBAAoB,iBAAiB,CAAC,CAAC,sBAAsB;gBAC5E,CAAC,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,gBAAgB,CAAC,MAAM;EAC1F,aAAa,EAAE,CAAC;IAChB,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,sEAAsE;IACtE,MAAM,cAAc,GAAG,YAAY,CAAC,MAAM,GAAG,IAAI;QAC/C,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,yBAAyB;QACzD,CAAC,CAAC,YAAY,CAAC;IAEjB,OAAO,eAAe,OAAO;;;yBAGN,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC;0BAC/B,KAAK,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC;6BAC5B,KAAK,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC;WACpD,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;;;EAG5D,aAAa;;;EAGb,cAAc;;uDAEuC,CAAC;AACxD,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW;IACnC,IAAI,IAA6B,CAAC;IAClC,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;QACjD,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACpC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,cAAc,EAAE,SAAS;YACzB,OAAO,EAAE,iDAAiD;YAC1D,UAAU,EAAE,EAAE;YACd,SAAS,EAAE,EAAE;YACb,cAAc,EAAE,uDAAuD;SACxE,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,CAAC;IACpE,MAAM,GAAG,GAAG,OAAO,IAAI,CAAC,cAAc,KAAK,QAAQ,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC;QACvF,CAAC,CAAC,IAAI,CAAC,cAA+C;QACtD,CAAC,CAAC,SAAS,CAAC;IAEd,OAAO;QACL,cAAc,EAAE,GAAG;QACnB,OAAO,EAAE,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;QAC7D,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC;YACxC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YACnE,CAAC,CAAC,EAAE;QACN,SAAS,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC;YACtC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC;YAClE,CAAC,CAAC,EAAE;QACN,cAAc,EAAE,OAAO,IAAI,CAAC,cAAc,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE;KACnF,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,MAAiB,EACjB,OAAoB,EACpB,KAAsB,EACtB,KAAmB,EACnB,YAAoB;IAEpB,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC;IACxE,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,aAAa,EAAE,UAAU,CAAC,CAAC;IAClE,OAAO,gBAAgB,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -5,7 +5,7 @@ export interface HistorySummary {
|
|
|
5
5
|
model: string;
|
|
6
6
|
skillName: string;
|
|
7
7
|
passRate: number;
|
|
8
|
-
type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
|
|
8
|
+
type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
|
|
9
9
|
caseCount: number;
|
|
10
10
|
totalDurationMs: number;
|
|
11
11
|
totalTokens: number | null;
|
|
@@ -14,14 +14,14 @@ export interface HistorySummary {
|
|
|
14
14
|
}
|
|
15
15
|
export interface HistoryFilter {
|
|
16
16
|
model?: string;
|
|
17
|
-
type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
|
|
17
|
+
type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
|
|
18
18
|
from?: string;
|
|
19
19
|
to?: string;
|
|
20
20
|
}
|
|
21
21
|
export interface CaseHistoryEntry {
|
|
22
22
|
timestamp: string;
|
|
23
23
|
model: string;
|
|
24
|
-
type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
|
|
24
|
+
type: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
|
|
25
25
|
provider?: string;
|
|
26
26
|
pass_rate: number;
|
|
27
27
|
durationMs?: number;
|
|
@@ -39,7 +39,7 @@ export interface RegressionEntry {
|
|
|
39
39
|
change: "regression" | "improvement";
|
|
40
40
|
}
|
|
41
41
|
export declare function writeHistoryEntry(skillDir: string, result: BenchmarkResult & {
|
|
42
|
-
type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct";
|
|
42
|
+
type?: "benchmark" | "comparison" | "baseline" | "model-compare" | "improve" | "instruct" | "ai-generate" | "eval-generate";
|
|
43
43
|
}): Promise<string>;
|
|
44
44
|
export declare function deleteHistoryEntry(skillDir: string, timestamp: string): Promise<boolean>;
|
|
45
45
|
export declare function listHistory(skillDir: string, filter?: HistoryFilter): Promise<HistorySummary[]>;
|