vskill 0.2.55 → 0.2.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +6 -3
  2. package/dist/commands/eval/__tests__/run.test.js +7 -2
  3. package/dist/commands/eval/__tests__/run.test.js.map +1 -1
  4. package/dist/commands/eval/run.js +24 -3
  5. package/dist/commands/eval/run.js.map +1 -1
  6. package/dist/commands/eval/serve.d.ts +1 -0
  7. package/dist/commands/eval/serve.js +51 -0
  8. package/dist/commands/eval/serve.js.map +1 -0
  9. package/dist/commands/eval.d.ts +1 -0
  10. package/dist/commands/eval.js +19 -3
  11. package/dist/commands/eval.js.map +1 -1
  12. package/dist/eval/__tests__/activation-tester.test.d.ts +1 -0
  13. package/dist/eval/__tests__/activation-tester.test.js +94 -0
  14. package/dist/eval/__tests__/activation-tester.test.js.map +1 -0
  15. package/dist/eval/__tests__/benchmark-history.test.d.ts +1 -0
  16. package/dist/eval/__tests__/benchmark-history.test.js +200 -0
  17. package/dist/eval/__tests__/benchmark-history.test.js.map +1 -0
  18. package/dist/eval/__tests__/comparator.test.d.ts +1 -0
  19. package/dist/eval/__tests__/comparator.test.js +136 -0
  20. package/dist/eval/__tests__/comparator.test.js.map +1 -0
  21. package/dist/eval/__tests__/llm.test.js +161 -44
  22. package/dist/eval/__tests__/llm.test.js.map +1 -1
  23. package/dist/eval/__tests__/skill-scanner.test.js +40 -1
  24. package/dist/eval/__tests__/skill-scanner.test.js.map +1 -1
  25. package/dist/eval/__tests__/verdict.test.d.ts +1 -0
  26. package/dist/eval/__tests__/verdict.test.js +47 -0
  27. package/dist/eval/__tests__/verdict.test.js.map +1 -0
  28. package/dist/eval/activation-tester.d.ts +25 -0
  29. package/dist/eval/activation-tester.js +89 -0
  30. package/dist/eval/activation-tester.js.map +1 -0
  31. package/dist/eval/benchmark-history.d.ts +23 -0
  32. package/dist/eval/benchmark-history.js +108 -0
  33. package/dist/eval/benchmark-history.js.map +1 -0
  34. package/dist/eval/comparator.d.ts +29 -0
  35. package/dist/eval/comparator.js +100 -0
  36. package/dist/eval/comparator.js.map +1 -0
  37. package/dist/eval/llm.js +119 -6
  38. package/dist/eval/llm.js.map +1 -1
  39. package/dist/eval/skill-scanner.js +35 -26
  40. package/dist/eval/skill-scanner.js.map +1 -1
  41. package/dist/eval/verdict.d.ts +3 -0
  42. package/dist/eval/verdict.js +28 -0
  43. package/dist/eval/verdict.js.map +1 -0
  44. package/dist/eval-server/api-routes.d.ts +2 -0
  45. package/dist/eval-server/api-routes.js +425 -0
  46. package/dist/eval-server/api-routes.js.map +1 -0
  47. package/dist/eval-server/eval-server.d.ts +6 -0
  48. package/dist/eval-server/eval-server.js +102 -0
  49. package/dist/eval-server/eval-server.js.map +1 -0
  50. package/dist/eval-server/router.d.ts +14 -0
  51. package/dist/eval-server/router.js +117 -0
  52. package/dist/eval-server/router.js.map +1 -0
  53. package/dist/eval-server/sse-helpers.d.ts +4 -0
  54. package/dist/eval-server/sse-helpers.js +24 -0
  55. package/dist/eval-server/sse-helpers.js.map +1 -0
  56. package/dist/eval-ui/assets/index-BYpLv_X1.css +1 -0
  57. package/dist/eval-ui/assets/index-Od6Ch9-a.js +70 -0
  58. package/dist/eval-ui/index.html +13 -0
  59. package/dist/index.js +3 -2
  60. package/dist/index.js.map +1 -1
  61. package/package.json +15 -2
@@ -0,0 +1,136 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import { generateComparisonOutputs, scoreComparison, runComparison, } from "../comparator.js";
3
+ function mockClient(responses) {
4
+ let callIndex = 0;
5
+ return {
6
+ model: "test-model",
7
+ generate: vi.fn(async () => responses[callIndex++] ?? ""),
8
+ };
9
+ }
10
+ describe("generateComparisonOutputs", () => {
11
+ it("generates skill and baseline outputs sequentially", async () => {
12
+ const client = mockClient(["skill response", "baseline response"]);
13
+ const result = await generateComparisonOutputs("test prompt", "# Skill Content", client);
14
+ expect(result.skillOutput).toBe("skill response");
15
+ expect(result.baselineOutput).toBe("baseline response");
16
+ expect(result.skillDurationMs).toBeGreaterThanOrEqual(0);
17
+ expect(result.baselineDurationMs).toBeGreaterThanOrEqual(0);
18
+ expect(client.generate).toHaveBeenCalledTimes(2);
19
+ // First call should include skill content
20
+ const firstCall = client.generate.mock.calls[0];
21
+ expect(firstCall[0]).toContain("Skill Content");
22
+ // Second call should be generic
23
+ const secondCall = client.generate.mock.calls[1];
24
+ expect(secondCall[0]).toContain("helpful AI assistant");
25
+ });
26
+ });
27
+ describe("scoreComparison", () => {
28
+ it("parses JSON scores from LLM response", async () => {
29
+ const client = mockClient([
30
+ JSON.stringify({
31
+ content_score_a: 4,
32
+ structure_score_a: 3,
33
+ content_score_b: 5,
34
+ structure_score_b: 4,
35
+ winner: "second",
36
+ reasoning: "B is better",
37
+ }),
38
+ ]);
39
+ const result = await scoreComparison("output A", "output B", "prompt", client);
40
+ expect(result.contentScoreA).toBe(4);
41
+ expect(result.structureScoreA).toBe(3);
42
+ expect(result.contentScoreB).toBe(5);
43
+ expect(result.structureScoreB).toBe(4);
44
+ expect(result.winner).toBe("second");
45
+ });
46
+ it("parses JSON from code fence", async () => {
47
+ const client = mockClient([
48
+ '```json\n{"content_score_a": 3, "structure_score_a": 3, "content_score_b": 3, "structure_score_b": 3, "winner": "tie"}\n```',
49
+ ]);
50
+ const result = await scoreComparison("A", "B", "p", client);
51
+ expect(result.winner).toBe("tie");
52
+ expect(result.contentScoreA).toBe(3);
53
+ });
54
+ it("clamps scores to 1-5 range", async () => {
55
+ const client = mockClient([
56
+ JSON.stringify({
57
+ content_score_a: 0,
58
+ structure_score_a: 10,
59
+ content_score_b: -1,
60
+ structure_score_b: 6,
61
+ winner: "first",
62
+ }),
63
+ ]);
64
+ const result = await scoreComparison("A", "B", "p", client);
65
+ expect(result.contentScoreA).toBe(1);
66
+ expect(result.structureScoreA).toBe(5);
67
+ expect(result.contentScoreB).toBe(1);
68
+ expect(result.structureScoreB).toBe(5);
69
+ });
70
+ it("defaults invalid winner to tie", async () => {
71
+ const client = mockClient([
72
+ JSON.stringify({
73
+ content_score_a: 3,
74
+ structure_score_a: 3,
75
+ content_score_b: 3,
76
+ structure_score_b: 3,
77
+ winner: "invalid",
78
+ }),
79
+ ]);
80
+ const result = await scoreComparison("A", "B", "p", client);
81
+ expect(result.winner).toBe("tie");
82
+ });
83
+ });
84
+ describe("runComparison", () => {
85
+ it("maps scores back to skill/baseline correctly", async () => {
86
+ // Mock: first two calls = skill + baseline outputs, third = scoring
87
+ const client = mockClient([
88
+ "skill output here",
89
+ "baseline output here",
90
+ JSON.stringify({
91
+ content_score_a: 4,
92
+ structure_score_a: 5,
93
+ content_score_b: 2,
94
+ structure_score_b: 3,
95
+ winner: "first",
96
+ reasoning: "A is better",
97
+ }),
98
+ ]);
99
+ // Fix randomness for deterministic test
100
+ vi.spyOn(Math, "random").mockReturnValue(0.3); // < 0.5 → skill is A
101
+ const result = await runComparison("test prompt", "skill content", client);
102
+ expect(result.prompt).toBe("test prompt");
103
+ expect(result.skillOutput).toBe("skill output here");
104
+ expect(result.baselineOutput).toBe("baseline output here");
105
+ // skill is A, so scores map directly
106
+ expect(result.skillContentScore).toBe(4);
107
+ expect(result.skillStructureScore).toBe(5);
108
+ expect(result.baselineContentScore).toBe(2);
109
+ expect(result.baselineStructureScore).toBe(3);
110
+ expect(result.winner).toBe("skill");
111
+ vi.restoreAllMocks();
112
+ });
113
+ it("maps scores correctly when baseline is A", async () => {
114
+ const client = mockClient([
115
+ "skill out",
116
+ "baseline out",
117
+ JSON.stringify({
118
+ content_score_a: 2,
119
+ structure_score_a: 2,
120
+ content_score_b: 4,
121
+ structure_score_b: 4,
122
+ winner: "second",
123
+ }),
124
+ ]);
125
+ // > 0.5 → skill is B
126
+ vi.spyOn(Math, "random").mockReturnValue(0.7);
127
+ const result = await runComparison("p", "s", client);
128
+ // skill is B → scores.contentScoreB is skill
129
+ expect(result.skillContentScore).toBe(4);
130
+ expect(result.baselineContentScore).toBe(2);
131
+ // winner "second" = B = skill
132
+ expect(result.winner).toBe("skill");
133
+ vi.restoreAllMocks();
134
+ });
135
+ });
136
+ //# sourceMappingURL=comparator.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"comparator.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/comparator.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EACL,yBAAyB,EACzB,eAAe,EACf,aAAa,GACd,MAAM,kBAAkB,CAAC;AAG1B,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC,IAAI,EAAE,CAAC;KAC1D,CAAC;AACJ,CAAC;AAED,QAAQ,CAAC,2BAA2B,EAAE,GAAG,EAAE;IACzC,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,gBAAgB,EAAE,mBAAmB,CAAC,CAAC,CAAC;QACnE,MAAM,MAAM,GAAG,MAAM,yBAAyB,CAAC,aAAa,EAAE,iBAAiB,EAAE,MAAM,CAAC,CAAC;QAEzF,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QAClD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACxD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,CAAC;QAEjD,0CAA0C;QAC1C,MAAM,SAAS,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;QAEhD,gCAAgC;QAChC,MAAM,UAAU,GAAI,MAAM,CAAC,QAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1D,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;gBAChB,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;QAC/E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,6HAA6H;SAC9H,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,KAAK,IAAI,EAAE;QAC1C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,EAAE;gBACrB,eAAe,EAAE,CAAC,CAAC;gBACnB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,oEAAoE;QACpE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,mBAAmB;YACnB,sBAAsB;YACtB,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO;gBACf,SAAS,EAAE,aAAa;aACzB,CAAC;SACH,CAAC,CAAC;QAEH,wCAAwC;QACxC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,qBAAqB;QAEpE,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,CAAC,CAAC;QAE3E,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1C,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QAC3D,qCAAqC;QACrC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,CAAC,MAAM,CAAC,sBAAsB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,WAAW;YACX,cAAc;YACd,IAAI,CAAC,SAAS,CAAC;gBACb,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,eAAe,EAAE,CAAC;gBAClB,iBAAiB,EAAE,CAAC;gBACpB,MAAM,EAAE,QAAQ;aACjB,CAAC;SACH,CAAC,CAAC;QAEH,qBAAqB;QACrB,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC;QAE9C,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC;QACrD,6CAA6C;QAC7C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC5C,8BAA8B;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEpC,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -3,11 +3,18 @@ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
3
3
  // Mocks
4
4
  // ---------------------------------------------------------------------------
5
5
  const mockCreate = vi.hoisted(() => vi.fn());
6
+ const mockExecFile = vi.hoisted(() => vi.fn());
6
7
  vi.mock("@anthropic-ai/sdk", () => ({
7
8
  default: class MockAnthropic {
8
9
  messages = { create: mockCreate };
9
10
  },
10
11
  }));
12
+ vi.mock("node:child_process", () => ({
13
+ execFile: mockExecFile,
14
+ }));
15
+ vi.mock("node:util", () => ({
16
+ promisify: (fn) => fn,
17
+ }));
11
18
  // ---------------------------------------------------------------------------
12
19
  // Import module under test AFTER mocks
13
20
  // ---------------------------------------------------------------------------
@@ -19,67 +26,177 @@ describe("createLlmClient", () => {
19
26
  const origEnv = { ...process.env };
20
27
  beforeEach(() => {
21
28
  vi.resetAllMocks();
22
- process.env.ANTHROPIC_API_KEY = "test-key";
29
+ delete process.env.VSKILL_EVAL_PROVIDER;
23
30
  delete process.env.VSKILL_EVAL_MODEL;
31
+ delete process.env.ANTHROPIC_API_KEY;
32
+ delete process.env.OLLAMA_BASE_URL;
33
+ delete process.env.CLAUDECODE;
24
34
  });
25
35
  afterEach(() => {
26
36
  process.env = { ...origEnv };
27
37
  });
28
- it("returns text content on successful generate call", async () => {
29
- mockCreate.mockResolvedValue({
30
- content: [{ type: "text", text: "Generated response" }],
31
- });
38
+ // -------------------------------------------------------------------------
39
+ // Auto-detection
40
+ // -------------------------------------------------------------------------
41
+ it("defaults to claude-cli from a plain terminal", () => {
32
42
  const client = createLlmClient();
33
- const result = await client.generate("system prompt", "user prompt");
34
- expect(result).toBe("Generated response");
35
- expect(mockCreate).toHaveBeenCalledOnce();
43
+ expect(client.model).toBe("claude-sonnet");
36
44
  });
37
- it("uses default model claude-sonnet-4-20250514 when env not set", async () => {
38
- mockCreate.mockResolvedValue({
39
- content: [{ type: "text", text: "ok" }],
40
- });
45
+ it("auto-detects ollama inside Claude Code session", () => {
46
+ process.env.CLAUDECODE = "1";
41
47
  const client = createLlmClient();
42
- await client.generate("sys", "usr");
43
- expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ model: "claude-sonnet-4-20250514" }), expect.anything());
48
+ expect(client.model).toBe("llama3.1:8b");
44
49
  });
45
- it("uses custom model from VSKILL_EVAL_MODEL env var", async () => {
46
- process.env.VSKILL_EVAL_MODEL = "claude-opus-4-20250514";
47
- mockCreate.mockResolvedValue({
48
- content: [{ type: "text", text: "ok" }],
49
- });
50
+ it("auto-detects anthropic when ANTHROPIC_API_KEY is set", () => {
51
+ process.env.ANTHROPIC_API_KEY = "test-key";
50
52
  const client = createLlmClient();
51
- await client.generate("sys", "usr");
52
- expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ model: "claude-opus-4-20250514" }), expect.anything());
53
+ expect(client.model).toBe("claude-sonnet-4-20250514");
53
54
  });
54
- it("propagates network error from SDK", async () => {
55
- mockCreate.mockRejectedValue(new Error("Connection timeout"));
55
+ it("CLAUDECODE takes priority over ANTHROPIC_API_KEY for auto-detection", () => {
56
+ process.env.CLAUDECODE = "1";
57
+ process.env.ANTHROPIC_API_KEY = "test-key";
56
58
  const client = createLlmClient();
57
- await expect(client.generate("sys", "usr")).rejects.toThrow("Connection timeout");
59
+ expect(client.model).toBe("llama3.1:8b");
58
60
  });
59
- it("passes system and user prompts correctly", async () => {
60
- mockCreate.mockResolvedValue({
61
- content: [{ type: "text", text: "ok" }],
62
- });
61
+ it("explicit VSKILL_EVAL_PROVIDER overrides auto-detection", () => {
62
+ process.env.VSKILL_EVAL_PROVIDER = "ollama";
63
+ process.env.ANTHROPIC_API_KEY = "test-key";
63
64
  const client = createLlmClient();
64
- await client.generate("my system prompt", "my user prompt");
65
- expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
66
- system: "my system prompt",
67
- messages: [{ role: "user", content: "my user prompt" }],
68
- max_tokens: 4096,
69
- }), expect.anything());
65
+ expect(client.model).toBe("llama3.1:8b");
70
66
  });
71
- it("throws when ANTHROPIC_API_KEY is not set", () => {
72
- delete process.env.ANTHROPIC_API_KEY;
73
- expect(() => createLlmClient()).toThrow("ANTHROPIC_API_KEY is not set");
67
+ it("throws on unknown provider", () => {
68
+ process.env.VSKILL_EVAL_PROVIDER = "gpt-magic";
69
+ expect(() => createLlmClient()).toThrow('Unknown VSKILL_EVAL_PROVIDER: "gpt-magic"');
74
70
  });
75
- it("exposes model name on the client", () => {
76
- const client = createLlmClient();
77
- expect(client.model).toBe("claude-sonnet-4-20250514");
71
+ // -------------------------------------------------------------------------
72
+ // Anthropic provider
73
+ // -------------------------------------------------------------------------
74
+ describe("anthropic provider", () => {
75
+ beforeEach(() => {
76
+ process.env.VSKILL_EVAL_PROVIDER = "anthropic";
77
+ process.env.ANTHROPIC_API_KEY = "test-key";
78
+ });
79
+ it("returns text content on successful generate call", async () => {
80
+ mockCreate.mockResolvedValue({
81
+ content: [{ type: "text", text: "Generated response" }],
82
+ });
83
+ const client = createLlmClient();
84
+ const result = await client.generate("system prompt", "user prompt");
85
+ expect(result).toBe("Generated response");
86
+ expect(mockCreate).toHaveBeenCalledOnce();
87
+ });
88
+ it("uses default model claude-sonnet-4-20250514", async () => {
89
+ mockCreate.mockResolvedValue({
90
+ content: [{ type: "text", text: "ok" }],
91
+ });
92
+ const client = createLlmClient();
93
+ await client.generate("sys", "usr");
94
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ model: "claude-sonnet-4-20250514" }), expect.anything());
95
+ });
96
+ it("uses custom model from VSKILL_EVAL_MODEL", async () => {
97
+ process.env.VSKILL_EVAL_MODEL = "claude-opus-4-20250514";
98
+ mockCreate.mockResolvedValue({
99
+ content: [{ type: "text", text: "ok" }],
100
+ });
101
+ const client = createLlmClient();
102
+ await client.generate("sys", "usr");
103
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({ model: "claude-opus-4-20250514" }), expect.anything());
104
+ });
105
+ it("propagates network error from SDK", async () => {
106
+ mockCreate.mockRejectedValue(new Error("Connection timeout"));
107
+ const client = createLlmClient();
108
+ await expect(client.generate("sys", "usr")).rejects.toThrow("Connection timeout");
109
+ });
110
+ it("passes system and user prompts correctly", async () => {
111
+ mockCreate.mockResolvedValue({
112
+ content: [{ type: "text", text: "ok" }],
113
+ });
114
+ const client = createLlmClient();
115
+ await client.generate("my system prompt", "my user prompt");
116
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
117
+ system: "my system prompt",
118
+ messages: [{ role: "user", content: "my user prompt" }],
119
+ max_tokens: 4096,
120
+ }), expect.anything());
121
+ });
122
+ it("throws when ANTHROPIC_API_KEY is not set", () => {
123
+ delete process.env.ANTHROPIC_API_KEY;
124
+ expect(() => createLlmClient()).toThrow("ANTHROPIC_API_KEY is not set");
125
+ });
78
126
  });
79
- it("exposes custom model name when VSKILL_EVAL_MODEL is set", () => {
80
- process.env.VSKILL_EVAL_MODEL = "claude-opus-4-20250514";
81
- const client = createLlmClient();
82
- expect(client.model).toBe("claude-opus-4-20250514");
127
+ // -------------------------------------------------------------------------
128
+ // Claude CLI provider
129
+ // -------------------------------------------------------------------------
130
+ describe("claude-cli provider", () => {
131
+ beforeEach(() => {
132
+ process.env.VSKILL_EVAL_PROVIDER = "claude-cli";
133
+ });
134
+ it("calls claude CLI with --model flag", async () => {
135
+ mockExecFile.mockResolvedValue({ stdout: "CLI response\n" });
136
+ const client = createLlmClient();
137
+ const result = await client.generate("system prompt", "user prompt");
138
+ expect(result).toBe("CLI response");
139
+ expect(mockExecFile).toHaveBeenCalledWith("claude", ["-p", "system prompt\n\nuser prompt", "--model", "sonnet", "--no-input"], expect.objectContaining({ timeout: 120_000 }));
140
+ });
141
+ it("defaults to sonnet model", () => {
142
+ const client = createLlmClient();
143
+ expect(client.model).toBe("claude-sonnet");
144
+ });
145
+ it("passes custom model from VSKILL_EVAL_MODEL", async () => {
146
+ process.env.VSKILL_EVAL_MODEL = "opus";
147
+ mockExecFile.mockResolvedValue({ stdout: "ok\n" });
148
+ const client = createLlmClient();
149
+ expect(client.model).toBe("claude-opus");
150
+ await client.generate("sys", "usr");
151
+ expect(mockExecFile).toHaveBeenCalledWith("claude", expect.arrayContaining(["--model", "opus"]), expect.anything());
152
+ });
153
+ it("throws helpful error when claude CLI not found", async () => {
154
+ const err = new Error("ENOENT");
155
+ err.code = "ENOENT";
156
+ mockExecFile.mockRejectedValue(err);
157
+ const client = createLlmClient();
158
+ await expect(client.generate("sys", "usr")).rejects.toThrow("Claude CLI not found");
159
+ });
160
+ it("throws when explicitly selected inside Claude Code session", () => {
161
+ process.env.CLAUDECODE = "1";
162
+ expect(() => createLlmClient()).toThrow("Cannot use claude-cli provider inside a Claude Code session");
163
+ });
164
+ });
165
+ // -------------------------------------------------------------------------
166
+ // Ollama provider
167
+ // -------------------------------------------------------------------------
168
+ describe("ollama provider", () => {
169
+ beforeEach(() => {
170
+ process.env.VSKILL_EVAL_PROVIDER = "ollama";
171
+ });
172
+ it("uses default model llama3.1:8b", () => {
173
+ const client = createLlmClient();
174
+ expect(client.model).toBe("llama3.1:8b");
175
+ });
176
+ it("uses custom model from VSKILL_EVAL_MODEL", () => {
177
+ process.env.VSKILL_EVAL_MODEL = "qwen2.5:32b";
178
+ const client = createLlmClient();
179
+ expect(client.model).toBe("qwen2.5:32b");
180
+ });
181
+ it("calls Ollama HTTP API with correct payload", async () => {
182
+ const mockFetch = vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response(JSON.stringify({ response: "Ollama reply" }), { status: 200 }));
183
+ const client = createLlmClient();
184
+ const result = await client.generate("system prompt", "user prompt");
185
+ expect(result).toBe("Ollama reply");
186
+ expect(mockFetch).toHaveBeenCalledWith("http://localhost:11434/api/generate", expect.objectContaining({
187
+ method: "POST",
188
+ body: expect.stringContaining('"model":"llama3.1:8b"'),
189
+ }));
190
+ mockFetch.mockRestore();
191
+ });
192
+ it("uses custom base URL from OLLAMA_BASE_URL", async () => {
193
+ process.env.OLLAMA_BASE_URL = "http://gpu-server:11434";
194
+ const mockFetch = vi.spyOn(globalThis, "fetch").mockResolvedValue(new Response(JSON.stringify({ response: "ok" }), { status: 200 }));
195
+ const client = createLlmClient();
196
+ await client.generate("sys", "usr");
197
+ expect(mockFetch).toHaveBeenCalledWith("http://gpu-server:11434/api/generate", expect.anything());
198
+ mockFetch.mockRestore();
199
+ });
83
200
  });
84
201
  });
85
202
  //# sourceMappingURL=llm.test.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/llm.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAEzE,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;AAE7C,EAAE,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE,CAAC,CAAC;IAClC,OAAO,EAAE,MAAM,aAAa;QAC1B,QAAQ,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;KACnC;CACF,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;AAEtD,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,MAAM,OAAO,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAEnC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,UAAU,CAAC;QAC3C,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,OAAO,CAAC,GAAG,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,UAAU,CAAC,iBAAiB,CAAC;YAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,oBAAoB,EAAE,CAAC;SACxD,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC;QAErE,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAC1C,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,EAAE,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,UAAU,CAAC,iBAAiB,CAAC;YAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACxC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAEpC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,EAAE,KAAK,EAAE,0BAA0B,EAAE,CAAC,EAC9D,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,wBAAwB,CAAC;QACzD,UAAU,CAAC,iBAAiB,CAAC;YAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACxC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAEpC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC,EAC5D,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,UAAU,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACzD,oBAAoB,CACrB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,UAAU,CAAC,iBAAiB,CAAC;YAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACxC,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,CAAC;QAE5D,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC;YACtB,MAAM,EAAE,kBAAkB;YAC1B,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;YACvD,UAAU,EAAE,IAAI;SACjB,CAAC,EACF,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;QACrC,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,8BAA8B,CAAC,CAAC;IAC1E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC1C,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,wBAAwB,CAAC;QACzD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;IACtD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"llm.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/llm.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AAEzE,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;AAC7C,MAAM,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;AAE/C,EAAE,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE,CAAC,CAAC;IAClC,OAAO,EAAE,MAAM,aAAa;QAC1B,QAAQ,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;KACnC;CACF,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE,GAAG,EAAE,CAAC,CAAC;IACnC,QAAQ,EAAE,YAAY;CACvB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,GAAG,EAAE,CAAC,CAAC;IAC1B,SAAS,EAAE,CAAC,EAAO,EAAE,EAAE,CAAC,EAAE;CAC3B,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;AAEtD,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,MAAM,OAAO,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAEnC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,OAAO,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC;QACxC,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;QACrC,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;QACrC,OAAO,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;QACnC,OAAO,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,OAAO,CAAC,GAAG,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAC/B,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,iBAAiB;IACjB,4EAA4E;IAE5E,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACtD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;QACxD,OAAO,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC;QAC7B,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,UAAU,CAAC;QAC3C,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACxD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qEAAqE,EAAE,GAAG,EAAE;QAC7E,OAAO,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC;QAC7B,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,UAAU,CAAC;QAC3C,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAChE,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,QAAQ,CAAC;QAC5C,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,UAAU,CAAC;QAC3C,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACpC,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,WAAW,CAAC;QAC/C,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,2CAA2C,CAAC,CAAC;IACvF,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,qBAAqB;IACrB,4EAA4E;IAE5E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,UAAU,CAAC,GAAG,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,WAAW,CAAC;YAC/C,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,UAAU,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;YAChE,UAAU,CAAC,iBAAiB,CAAC;gBAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,oBAAoB,EAAE,CAAC;aACxD,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;YAC1C,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,EAAE,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,UAAU,CAAC,iBAAiB,CAAC;gBAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;aACxC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAEpC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,EAAE,KAAK,EAAE,0BAA0B,EAAE,CAAC,EAC9D,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACxD,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,wBAAwB,CAAC;YACzD,UAAU,CAAC,iBAAiB,CAAC;gBAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;aACxC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAEpC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC,EAC5D,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,UAAU,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC,CAAC;YAE9D,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACzD,oBAAoB,CACrB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;YACxD,UAAU,CAAC,iBAAiB,CAAC;gBAC3B,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;aACxC,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,kBAAkB,EAAE,gBAAgB,CAAC,CAAC;YAE5D,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC;gBACtB,MAAM,EAAE,kBAAkB;gBAC1B,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,EAAE,CAAC;gBACvD,UAAU,EAAE,IAAI;aACjB,CAAC,EACF,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;YACrC,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,CAAC,8BAA8B,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,sBAAsB;IACtB,4EAA4E;IAE5E,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,UAAU,CAAC,GAAG,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,YAAY,CAAC;QAClD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,YAAY,CAAC,iBAAiB,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC,CAAC;YAE7D,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YACpC,MAAM,CAAC,YAAY,CAAC,CAAC,oBAAoB,CACvC,QAAQ,EACR,CAAC,IAAI,EAAE,8BAA8B,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,CAAC,EACzE,MAAM,CAAC,gBAAgB,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC,CAC9C,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;YAClC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,MAAM,CAAC;YACvC,YAAY,CAAC,iBAAiB,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;YAEnD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YACzC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAEpC,MAAM,CAAC,YAAY,CAAC,CAAC,oBAAoB,CACvC,QAAQ,EACR,MAAM,CAAC,eAAe,CAAC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,EAC3C,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,QAAQ,CAAQ,CAAC;YACvC,GAAG,CAAC,IAAI,GAAG,QAAQ,CAAC;YACpB,YAAY,CAAC,iBAAiB,CAAC,GAAG,CAAC,CAAC;YAEpC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACzD,sBAAsB,CACvB,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4DAA4D,EAAE,GAAG,EAAE;YACpE,OAAO,CAAC,GAAG,CAAC,UAAU,GAAG,GAAG,CAAC;YAC7B,MAAM,CAAC,GAAG,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC,OAAO,CACrC,6DAA6D,CAC9D,CAAC;QACJ,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,kBAAkB;IAClB,4EAA4E;IAE5E,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,UAAU,CAAC,GAAG,EAAE;YACd,OAAO,CAAC,GAAG,CAAC,oBAAoB,GAAG,QAAQ,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,OAAO,CAAC,GAAG,CAAC,iBAAiB,GAAG,aAAa,CAAC;YAC9C,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,MAAM,SAAS,GAAG,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,iBAAiB,CAC/D,IAAI,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAC5E,CAAC;YAEF,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,eAAe,EAAE,aAAa,CAAC,CAAC;YAErE,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YACpC,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CACpC,qCAAqC,EACrC,MAAM,CAAC,gBAAgB,CAAC;gBACtB,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,MAAM,CAAC,gBAAgB,CAAC,uBAAuB,CAAC;aACvD,CAAC,CACH,CAAC;YAEF,SAAS,CAAC,WAAW,EAAE,CAAC;QAC1B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,OAAO,CAAC,GAAG,CAAC,eAAe,GAAG,yBAAyB,CAAC;YAExD,MAAM,SAAS,GAAG,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,iBAAiB,CAC/D,IAAI,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAClE,CAAC;YAEF,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,MAAM,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAEpC,MAAM,CAAC,SAAS,CAAC,CAAC,oBAAoB,CACpC,sCAAsC,EACtC,MAAM,CAAC,QAAQ,EAAE,CAClB,CAAC;YAEF,SAAS,CAAC,WAAW,EAAE,CAAC;QAC1B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -7,14 +7,25 @@ import { scanSkills } from "../skill-scanner.js";
7
7
  // Test helpers
8
8
  // ---------------------------------------------------------------------------
9
9
  let testDir;
10
+ /** Create a skill in plugin layout: {root}/{plugin}/skills/{skill}/ */
10
11
  function createSkill(plugin, skill, opts = {}) {
11
12
  const skillDir = join(testDir, plugin, "skills", skill);
12
13
  mkdirSync(skillDir, { recursive: true });
13
14
  writeFileSync(join(skillDir, "SKILL.md"), `# ${skill}`);
15
+ addEvalFiles(skillDir, opts);
16
+ }
17
+ /** Create a skill in root layout: {root}/skills/{skill}/ */
18
+ function createRootSkill(skill, opts = {}) {
19
+ const skillDir = join(testDir, "skills", skill);
20
+ mkdirSync(skillDir, { recursive: true });
21
+ writeFileSync(join(skillDir, "SKILL.md"), `# ${skill}`);
22
+ addEvalFiles(skillDir, opts);
23
+ }
24
+ function addEvalFiles(skillDir, opts) {
14
25
  if (opts.evals) {
15
26
  const evalsDir = join(skillDir, "evals");
16
27
  mkdirSync(evalsDir, { recursive: true });
17
- writeFileSync(join(evalsDir, "evals.json"), JSON.stringify({ skill_name: skill, evals: [] }));
28
+ writeFileSync(join(evalsDir, "evals.json"), JSON.stringify({ skill_name: "test", evals: [] }));
18
29
  }
19
30
  if (opts.benchmark) {
20
31
  const evalsDir = join(skillDir, "evals");
@@ -33,6 +44,7 @@ describe("scanSkills", () => {
33
44
  afterEach(() => {
34
45
  rmSync(testDir, { recursive: true, force: true });
35
46
  });
47
+ // --- Plugin layout (existing) ---
36
48
  it("discovers skills in plugins directory", async () => {
37
49
  createSkill("marketing", "social-media-posting");
38
50
  createSkill("devtools", "code-review");
@@ -74,5 +86,32 @@ describe("scanSkills", () => {
74
86
  const skills = await scanSkills(testDir);
75
87
  expect(skills).toEqual([]);
76
88
  });
89
+ // --- Root layout (new) ---
90
+ it("discovers root-level skills in skills/ directory", async () => {
91
+ createRootSkill("my-skill");
92
+ const skills = await scanSkills(testDir);
93
+ expect(skills).toHaveLength(1);
94
+ expect(skills[0].skill).toBe("my-skill");
95
+ });
96
+ it("uses root dirname as plugin name for root-level skills", async () => {
97
+ createRootSkill("my-skill");
98
+ const skills = await scanSkills(testDir);
99
+ // plugin name = basename of the root dir
100
+ expect(skills[0].plugin).toBe(testDir.split("/").pop());
101
+ });
102
+ it("discovers both plugin and root-level skills together", async () => {
103
+ createSkill("marketing", "social-media-posting");
104
+ createRootSkill("standalone-skill");
105
+ const skills = await scanSkills(testDir);
106
+ expect(skills).toHaveLength(2);
107
+ const names = skills.map((s) => s.skill).sort();
108
+ expect(names).toEqual(["social-media-posting", "standalone-skill"]);
109
+ });
110
+ it("handles root-level skills with evals", async () => {
111
+ createRootSkill("my-skill", { evals: true, benchmark: true });
112
+ const skills = await scanSkills(testDir);
113
+ expect(skills[0].hasEvals).toBe(true);
114
+ expect(skills[0].hasBenchmark).toBe(true);
115
+ });
77
116
  });
78
117
  //# sourceMappingURL=skill-scanner.test.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"skill-scanner.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/skill-scanner.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,8EAA8E;AAC9E,eAAe;AACf,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,SAAS,WAAW,CAClB,MAAc,EACd,KAAa,EACb,OAAiD,EAAE;IAEnD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;IACxD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC;IAExD,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CACX,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,EAC5B,IAAI,CAAC,SAAS,CAAC,EAAE,UAAU,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CACjD,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CACX,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,EAChC,IAAI,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,sBAAsB,EAAE,CAAC,CACtD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,eAAe,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACtD,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QACjD,WAAW,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;QAEvC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,sBAAsB,CAAC,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAElE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QAEjD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE;YAC/C,KAAK,EAAE,IAAI;YACX,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAElE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QAEjD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"skill-scanner.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/skill-scanner.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAC3D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,qBAAqB,CAAC;AAEjD,8EAA8E;AAC9E,eAAe;AACf,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,uEAAuE;AACvE,SAAS,WAAW,CAClB,MAAc,EACd,KAAa,EACb,OAAiD,EAAE;IAEnD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;IACxD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC;IACxD,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;AAC/B,CAAC;AAED,4DAA4D;AAC5D,SAAS,eAAe,CACtB,KAAa,EACb,OAAiD,EAAE;IAEnD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;IAChD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,KAAK,KAAK,EAAE,CAAC,CAAC;IACxD,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;AAC/B,CAAC;AAED,SAAS,YAAY,CACnB,QAAgB,EAChB,IAA8C;IAE9C,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CACX,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,EAC5B,IAAI,CAAC,SAAS,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAClD,CAAC;IACJ,CAAC;IAED,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACzC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CACX,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,EAChC,IAAI,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,sBAAsB,EAAE,CAAC,CACtD,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,eAAe,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACtD,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1C,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,mCAAmC;IAEnC,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QACjD,WAAW,CAAC,UAAU,EAAE,aAAa,CAAC,CAAC;QAEvC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,sBAAsB,CAAC,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAElE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QAEjD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE;YAC/C,KAAK,EAAE,IAAI;YACX,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,WAAW,CAAC,WAAW,EAAE,sBAAsB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAElE,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QAEjD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,4BAA4B;IAE5B,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,eAAe,CAAC,UAAU,CAAC,CAAC;QAE5B,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wDAAwD,EAAE,KAAK,IAAI,EAAE;QACtE,eAAe,CAAC,UAAU,CAAC,CAAC;QAE5B,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,yCAAyC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,KAAK,IAAI,EAAE;QACpE,WAAW,CAAC,WAAW,EAAE,sBAAsB,CAAC,CAAC;QACjD,eAAe,CAAC,kBAAkB,CAAC,CAAC;QAEpC,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC;QAChD,MAAM,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,CAAC,sBAAsB,EAAE,kBAAkB,CAAC,CAAC,CAAC;IACtE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;QACpD,eAAe,CAAC,UAAU,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE9D,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,OAAO,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,47 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { computeVerdict, verdictColor } from "../verdict.js";
3
+ describe("computeVerdict", () => {
4
+ it("returns EFFECTIVE when passRate >= 0.8 and skill rubric > baseline + 1", () => {
5
+ expect(computeVerdict(0.85, 4.5, 3.0)).toBe("EFFECTIVE");
6
+ expect(computeVerdict(0.80, 4.0, 2.5)).toBe("EFFECTIVE");
7
+ expect(computeVerdict(1.0, 5.0, 1.0)).toBe("EFFECTIVE");
8
+ });
9
+ it("returns MARGINAL when passRate >= 0.6 and skill rubric > baseline (but not EFFECTIVE)", () => {
10
+ expect(computeVerdict(0.70, 3.5, 3.0)).toBe("MARGINAL");
11
+ expect(computeVerdict(0.60, 2.5, 2.0)).toBe("MARGINAL");
12
+ // High pass rate but rubric only slightly better → MARGINAL
13
+ expect(computeVerdict(0.85, 3.5, 3.0)).toBe("MARGINAL");
14
+ });
15
+ it("returns INEFFECTIVE when passRate >= 0.4 (but not MARGINAL)", () => {
16
+ expect(computeVerdict(0.50, 2.5, 3.0)).toBe("INEFFECTIVE");
17
+ expect(computeVerdict(0.45, 3.0, 3.0)).toBe("INEFFECTIVE");
18
+ expect(computeVerdict(0.40, 1.0, 5.0)).toBe("INEFFECTIVE");
19
+ });
20
+ it("returns DEGRADING when passRate < 0.4", () => {
21
+ expect(computeVerdict(0.30, 2.0, 3.0)).toBe("DEGRADING");
22
+ expect(computeVerdict(0.10, 1.0, 1.0)).toBe("DEGRADING");
23
+ expect(computeVerdict(0.0, 0.0, 0.0)).toBe("DEGRADING");
24
+ expect(computeVerdict(0.39, 5.0, 1.0)).toBe("DEGRADING");
25
+ });
26
+ it("handles boundary values correctly", () => {
27
+ // Exactly 0.8 pass rate, exactly +1 rubric → EFFECTIVE
28
+ expect(computeVerdict(0.8, 4.0, 2.9)).toBe("EFFECTIVE");
29
+ // 0.8 pass rate but rubric diff exactly 1 → NOT EFFECTIVE (needs >1)
30
+ expect(computeVerdict(0.8, 4.0, 3.0)).toBe("MARGINAL");
31
+ // Exactly 0.6 pass rate, skill > baseline → MARGINAL
32
+ expect(computeVerdict(0.6, 3.1, 3.0)).toBe("MARGINAL");
33
+ // Exactly 0.6 pass rate, skill = baseline → INEFFECTIVE
34
+ expect(computeVerdict(0.6, 3.0, 3.0)).toBe("INEFFECTIVE");
35
+ // Exactly 0.4 pass rate → INEFFECTIVE
36
+ expect(computeVerdict(0.4, 3.0, 3.0)).toBe("INEFFECTIVE");
37
+ });
38
+ });
39
+ describe("verdictColor", () => {
40
+ it("returns correct colors for each verdict", () => {
41
+ expect(verdictColor("EFFECTIVE")).toBe("green");
42
+ expect(verdictColor("MARGINAL")).toBe("yellow");
43
+ expect(verdictColor("INEFFECTIVE")).toBe("orange");
44
+ expect(verdictColor("DEGRADING")).toBe("red");
45
+ });
46
+ });
47
+ //# sourceMappingURL=verdict.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"verdict.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/verdict.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAE7D,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,wEAAwE,EAAE,GAAG,EAAE;QAChF,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzD,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uFAAuF,EAAE,GAAG,EAAE;QAC/F,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxD,4DAA4D;QAC5D,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,GAAG,EAAE;QACrE,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;QAC/C,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACzD,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxD,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,uDAAuD;QACvD,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACxD,qEAAqE;QACrE,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvD,qDAAqD;QACrD,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACvD,wDAAwD;QACxD,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC1D,sCAAsC;QACtC,MAAM,CAAC,cAAc,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC5D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;IAC5B,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;QACjD,MAAM,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAChD,MAAM,CAAC,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACnD,MAAM,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1,25 @@
1
+ import type { LlmClient } from "./llm.js";
2
+ export interface ActivationPrompt {
3
+ prompt: string;
4
+ expected: "should_activate" | "should_not_activate";
5
+ }
6
+ export interface ActivationResult {
7
+ prompt: string;
8
+ expected: "should_activate" | "should_not_activate";
9
+ activate: boolean;
10
+ confidence: "high" | "medium" | "low";
11
+ reasoning: string;
12
+ classification: "TP" | "TN" | "FP" | "FN";
13
+ }
14
+ export interface ActivationSummary {
15
+ results: ActivationResult[];
16
+ precision: number;
17
+ recall: number;
18
+ reliability: number;
19
+ total: number;
20
+ tp: number;
21
+ tn: number;
22
+ fp: number;
23
+ fn: number;
24
+ }
25
+ export declare function testActivation(skillDescription: string, prompts: ActivationPrompt[], client: LlmClient, onResult?: (result: ActivationResult) => void): Promise<ActivationSummary>;