vskill 0.2.26 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/commands/__tests__/eval-router.test.d.ts +1 -0
  2. package/dist/commands/__tests__/eval-router.test.js +60 -0
  3. package/dist/commands/__tests__/eval-router.test.js.map +1 -0
  4. package/dist/commands/add.js +113 -19
  5. package/dist/commands/add.js.map +1 -1
  6. package/dist/commands/eval/__tests__/coverage.test.d.ts +1 -0
  7. package/dist/commands/eval/__tests__/coverage.test.js +122 -0
  8. package/dist/commands/eval/__tests__/coverage.test.js.map +1 -0
  9. package/dist/commands/eval/__tests__/generate-all.test.d.ts +1 -0
  10. package/dist/commands/eval/__tests__/generate-all.test.js +133 -0
  11. package/dist/commands/eval/__tests__/generate-all.test.js.map +1 -0
  12. package/dist/commands/eval/__tests__/init.test.d.ts +1 -0
  13. package/dist/commands/eval/__tests__/init.test.js +116 -0
  14. package/dist/commands/eval/__tests__/init.test.js.map +1 -0
  15. package/dist/commands/eval/__tests__/run.test.d.ts +1 -0
  16. package/dist/commands/eval/__tests__/run.test.js +149 -0
  17. package/dist/commands/eval/__tests__/run.test.js.map +1 -0
  18. package/dist/commands/eval/coverage.d.ts +1 -0
  19. package/dist/commands/eval/coverage.js +79 -0
  20. package/dist/commands/eval/coverage.js.map +1 -0
  21. package/dist/commands/eval/generate-all.d.ts +1 -0
  22. package/dist/commands/eval/generate-all.js +64 -0
  23. package/dist/commands/eval/generate-all.js.map +1 -0
  24. package/dist/commands/eval/init.d.ts +1 -0
  25. package/dist/commands/eval/init.js +38 -0
  26. package/dist/commands/eval/init.js.map +1 -0
  27. package/dist/commands/eval/run.d.ts +1 -0
  28. package/dist/commands/eval/run.js +107 -0
  29. package/dist/commands/eval/run.js.map +1 -0
  30. package/dist/commands/eval.d.ts +4 -0
  31. package/dist/commands/eval.js +48 -0
  32. package/dist/commands/eval.js.map +1 -0
  33. package/dist/eval/__tests__/benchmark.test.d.ts +1 -0
  34. package/dist/eval/__tests__/benchmark.test.js +65 -0
  35. package/dist/eval/__tests__/benchmark.test.js.map +1 -0
  36. package/dist/eval/__tests__/judge.test.d.ts +1 -0
  37. package/dist/eval/__tests__/judge.test.js +45 -0
  38. package/dist/eval/__tests__/judge.test.js.map +1 -0
  39. package/dist/eval/__tests__/llm.test.d.ts +1 -0
  40. package/dist/eval/__tests__/llm.test.js +85 -0
  41. package/dist/eval/__tests__/llm.test.js.map +1 -0
  42. package/dist/eval/__tests__/prompt-builder.test.d.ts +1 -0
  43. package/dist/eval/__tests__/prompt-builder.test.js +72 -0
  44. package/dist/eval/__tests__/prompt-builder.test.js.map +1 -0
  45. package/dist/eval/__tests__/schema.test.d.ts +1 -0
  46. package/dist/eval/__tests__/schema.test.js +209 -0
  47. package/dist/eval/__tests__/schema.test.js.map +1 -0
  48. package/dist/eval/__tests__/skill-scanner.test.d.ts +1 -0
  49. package/dist/eval/__tests__/skill-scanner.test.js +78 -0
  50. package/dist/eval/__tests__/skill-scanner.test.js.map +1 -0
  51. package/dist/eval/benchmark.d.ts +22 -0
  52. package/dist/eval/benchmark.js +24 -0
  53. package/dist/eval/benchmark.js.map +1 -0
  54. package/dist/eval/judge.d.ts +9 -0
  55. package/dist/eval/judge.js +40 -0
  56. package/dist/eval/judge.js.map +1 -0
  57. package/dist/eval/llm.d.ts +5 -0
  58. package/dist/eval/llm.js +34 -0
  59. package/dist/eval/llm.js.map +1 -0
  60. package/dist/eval/prompt-builder.d.ts +3 -0
  61. package/dist/eval/prompt-builder.js +155 -0
  62. package/dist/eval/prompt-builder.js.map +1 -0
  63. package/dist/eval/schema.d.ts +26 -0
  64. package/dist/eval/schema.js +128 -0
  65. package/dist/eval/schema.js.map +1 -0
  66. package/dist/eval/skill-scanner.d.ts +8 -0
  67. package/dist/eval/skill-scanner.js +44 -0
  68. package/dist/eval/skill-scanner.js.map +1 -0
  69. package/dist/index.js +9 -0
  70. package/dist/index.js.map +1 -1
  71. package/dist/marketplace/index.d.ts +2 -2
  72. package/dist/marketplace/index.js +1 -1
  73. package/dist/marketplace/index.js.map +1 -1
  74. package/dist/marketplace/marketplace.d.ts +13 -0
  75. package/dist/marketplace/marketplace.js +35 -0
  76. package/dist/marketplace/marketplace.js.map +1 -1
  77. package/package.json +2 -1
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generate-all.test.js","sourceRoot":"","sources":["../../../../src/commands/eval/__tests__/generate-all.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACzE,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACvE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEjC,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;AAE/C,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC;QACtB,QAAQ,EAAE,YAAY;QACtB,KAAK,EAAE,YAAY;KACpB,CAAC;CACH,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;AAElE,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,IAAI,OAAe,CAAC;AAEpB,SAAS,WAAW,CAClB,MAAc,EACd,KAAa,EACb,OAA4B,EAAE;IAE9B,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;IACxD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,KAAK,KAAK,UAAU,KAAK,UAAU,CAAC,CAAC;IAE/E,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QACf,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,aAAa,CACX,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,YAAY,CAAC,EACrC,IAAI,CAAC,SAAS,CAAC;YACb,UAAU,EAAE,KAAK;YACjB,KAAK,EAAE;gBACL;oBACE,EAAE,EAAE,CAAC;oBACL,IAAI,EAAE,UAAU;oBAChB,MAAM,EAAE,MAAM;oBACd,eAAe,EAAE,QAAQ;oBACzB,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;iBAC3D;aACF;SACF,CAAC,CACH,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;OAgBjB,CAAC;AAER,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACxD,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxC,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,YAAY,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,WAAW,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACpC,WAAW,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;QACpC,WAAW,CAAC,WAAW,EAAE,SAAS,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QACrD,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,kBAAkB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEzC,4CAA4C;QAC5C,MAAM,CACJ,UAAU,CACR,IAAI,CAAC,OAAO,EAAE,2CAA2C,CAAC,CAC3D,CACF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACb,MAAM,CACJ,UAAU,CACR,IAAI,CAAC,OAAO,EAAE,2CAA2C,CAAC,CAC3D,CACF,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAEb,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;QAChD,WAAW,CAAC,WAAW,EAAE,gBAAgB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,kBAAkB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEzC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;QAC1D,WAAW,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;QACvC,WAAW,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;QACrC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,YAAY,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YACzC,SAAS,EAAE,CAAC;YACZ,IAAI,SAAS,IAAI,CAAC;gBAAE,MAAM,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC;YACjD,OAAO,eAAe,CAAC;QACzB,CAAC,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QACzE,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAExD,MAAM,kBAAkB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAEzC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;QAC7C,WAAW,CAAC,WAAW,EAAE,gBAAgB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC5D,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,kBAAkB,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAExC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,+BAA+B;QAC/B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,EAAE,mBAAmB,CAAC,CAAC;QACtD,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC3C,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,wBAAwB,CAAC,CAAC;QAC5D,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,sBAAsB,CAAC,CAAC;QAElE,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,kBAAkB,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAE5C,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACvC,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,116 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ // ---------------------------------------------------------------------------
3
+ // Mocks
4
+ // ---------------------------------------------------------------------------
5
+ const mocks = vi.hoisted(() => ({
6
+ readFileSync: vi.fn(),
7
+ writeFileSync: vi.fn(),
8
+ existsSync: vi.fn(),
9
+ mkdirSync: vi.fn(),
10
+ generate: vi.fn(),
11
+ }));
12
+ vi.mock("node:fs", () => ({
13
+ readFileSync: mocks.readFileSync,
14
+ writeFileSync: mocks.writeFileSync,
15
+ existsSync: mocks.existsSync,
16
+ mkdirSync: mocks.mkdirSync,
17
+ }));
18
+ vi.mock("../../../eval/llm.js", () => ({
19
+ createLlmClient: () => ({
20
+ generate: mocks.generate,
21
+ model: "test-model",
22
+ }),
23
+ }));
24
+ // ---------------------------------------------------------------------------
25
+ // Import module under test AFTER mocks
26
+ // ---------------------------------------------------------------------------
27
+ const { runEvalInit } = await import("../init.js");
28
+ // ---------------------------------------------------------------------------
29
+ // Helpers
30
+ // ---------------------------------------------------------------------------
31
+ const VALID_GENERATED = `Here is the evals.json:
32
+
33
+ \`\`\`json
34
+ {
35
+ "skill_name": "test-skill",
36
+ "evals": [
37
+ {
38
+ "id": 1,
39
+ "name": "Basic test",
40
+ "prompt": "Test prompt",
41
+ "expected_output": "Expected output",
42
+ "files": [],
43
+ "assertions": [
44
+ { "id": "a1", "text": "Check result", "type": "boolean" }
45
+ ]
46
+ }
47
+ ]
48
+ }
49
+ \`\`\``;
50
+ // ---------------------------------------------------------------------------
51
+ // Tests
52
+ // ---------------------------------------------------------------------------
53
+ describe("runEvalInit", () => {
54
+ beforeEach(() => {
55
+ vi.resetAllMocks();
56
+ // Default: SKILL.md exists, evals.json does not
57
+ mocks.existsSync.mockImplementation((p) => {
58
+ if (p.includes("SKILL.md"))
59
+ return true;
60
+ if (p.includes("evals.json"))
61
+ return false;
62
+ return false;
63
+ });
64
+ mocks.readFileSync.mockReturnValue("# My Skill\nDoes things.");
65
+ mocks.generate.mockResolvedValue(VALID_GENERATED);
66
+ });
67
+ it("creates evals.json when absent", async () => {
68
+ await runEvalInit("/root/plugins/marketing/skills/social-media-posting", false);
69
+ expect(mocks.mkdirSync).toHaveBeenCalled();
70
+ expect(mocks.writeFileSync).toHaveBeenCalledOnce();
71
+ const writtenContent = JSON.parse(mocks.writeFileSync.mock.calls[0][1]);
72
+ expect(writtenContent.skill_name).toBe("test-skill");
73
+ expect(writtenContent.evals).toHaveLength(1);
74
+ });
75
+ it("exits with message when evals.json exists and no --force", async () => {
76
+ mocks.existsSync.mockImplementation((p) => {
77
+ if (p.includes("SKILL.md"))
78
+ return true;
79
+ if (p.includes("evals.json"))
80
+ return true;
81
+ return false;
82
+ });
83
+ const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => { });
84
+ await runEvalInit("/root/plugins/marketing/skills/smp", false);
85
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining("already exists"));
86
+ expect(mocks.writeFileSync).not.toHaveBeenCalled();
87
+ consoleSpy.mockRestore();
88
+ });
89
+ it("overwrites when --force flag is passed", async () => {
90
+ mocks.existsSync.mockImplementation((p) => {
91
+ if (p.includes("SKILL.md"))
92
+ return true;
93
+ if (p.includes("evals.json"))
94
+ return true;
95
+ return false;
96
+ });
97
+ await runEvalInit("/root/plugins/marketing/skills/smp", true);
98
+ expect(mocks.writeFileSync).toHaveBeenCalledOnce();
99
+ });
100
+ it("handles LLM failure gracefully", async () => {
101
+ mocks.generate.mockRejectedValue(new Error("API rate limited"));
102
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => { });
103
+ await runEvalInit("/root/plugins/marketing/skills/smp", false);
104
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining("API rate limited"));
105
+ expect(mocks.writeFileSync).not.toHaveBeenCalled();
106
+ consoleSpy.mockRestore();
107
+ });
108
+ it("throws when SKILL.md does not exist", async () => {
109
+ mocks.existsSync.mockReturnValue(false);
110
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => { });
111
+ await runEvalInit("/root/plugins/marketing/skills/smp", false);
112
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining("SKILL.md"));
113
+ consoleSpy.mockRestore();
114
+ });
115
+ });
116
+ //# sourceMappingURL=init.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"init.test.js","sourceRoot":"","sources":["../../../../src/commands/eval/__tests__/init.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC9B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;IACrB,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE;IACtB,UAAU,EAAE,EAAE,CAAC,EAAE,EAAE;IACnB,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;IAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;CAClB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IACxB,YAAY,EAAE,KAAK,CAAC,YAAY;IAChC,aAAa,EAAE,KAAK,CAAC,aAAa;IAClC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,SAAS,EAAE,KAAK,CAAC,SAAS;CAC3B,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC;QACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,YAAY;KACpB,CAAC;CACH,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;AAEnD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,MAAM,eAAe,GAAG;;;;;;;;;;;;;;;;;;OAkBjB,CAAC;AAER,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,gDAAgD;QAChD,KAAK,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAS,EAAE,EAAE;YAChD,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC3C,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QACH,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,0BAA0B,CAAC,CAAC;QAC/D,KAAK,CAAC,QAAQ,CAAC,iBAAiB,CAAC,eAAe,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,MAAM,WAAW,CAAC,qDAAqD,EAAE,KAAK,CAAC,CAAC;QAEhF,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC3C,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,oBAAoB,EAAE,CAAC;QACnD,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;QACxE,KAAK,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAS,EAAE,EAAE;YAChD,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;YAC1C,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,WAAW,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;QAE/D,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,CAC1C,CAAC;QACF,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QACnD,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,KAAK,CAAC,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAS,EAAE,EAAE;YAChD,IAAI,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC;gBAAE,OAAO,IAAI,CAAC;YACxC,IAAI,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;YAC1C,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,CAAC;QAEH,MAAM,WAAW,CAAC,oCAAoC,EAAE,IAAI,CAAC,CAAC;QAE9D,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,oBAAoB,EAAE,CAAC;IACrD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gCAAgC,EAAE,KAAK,IAAI,EAAE;QAC9C,KAAK,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC;QAChE,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAE3E,MAAM,WAAW,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;QAE/D,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAC5C,CAAC;QACF,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC;QACnD,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACnD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAE3E,MAAM,WAAW,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;QAE/D,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,UAAU,CAAC,CACpC,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,149 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+ // ---------------------------------------------------------------------------
3
+ // Mocks
4
+ // ---------------------------------------------------------------------------
5
+ const mocks = vi.hoisted(() => ({
6
+ readFileSync: vi.fn(),
7
+ writeFileSync: vi.fn(),
8
+ existsSync: vi.fn(),
9
+ mkdirSync: vi.fn(),
10
+ generate: vi.fn(),
11
+ }));
12
+ vi.mock("node:fs", () => ({
13
+ readFileSync: mocks.readFileSync,
14
+ writeFileSync: mocks.writeFileSync,
15
+ existsSync: mocks.existsSync,
16
+ mkdirSync: mocks.mkdirSync,
17
+ }));
18
+ vi.mock("../../../eval/llm.js", () => ({
19
+ createLlmClient: () => ({
20
+ generate: mocks.generate,
21
+ model: "test-model",
22
+ }),
23
+ }));
24
+ // ---------------------------------------------------------------------------
25
+ // Import module under test AFTER mocks
26
+ // ---------------------------------------------------------------------------
27
+ const { runEvalRun } = await import("../run.js");
28
+ // ---------------------------------------------------------------------------
29
+ // Fixtures
30
+ // ---------------------------------------------------------------------------
31
+ const VALID_EVALS = {
32
+ skill_name: "test-skill",
33
+ evals: [
34
+ {
35
+ id: 1,
36
+ name: "Basic test",
37
+ prompt: "Test prompt 1",
38
+ expected_output: "Expected output 1",
39
+ files: [],
40
+ assertions: [
41
+ { id: "a1", text: "Output mentions AI", type: "boolean" },
42
+ { id: "a2", text: "Output is concise", type: "boolean" },
43
+ ],
44
+ },
45
+ {
46
+ id: 2,
47
+ name: "Edge case test",
48
+ prompt: "Test prompt 2",
49
+ expected_output: "Expected output 2",
50
+ files: [],
51
+ assertions: [
52
+ { id: "b1", text: "Output handles edge case", type: "boolean" },
53
+ ],
54
+ },
55
+ ],
56
+ };
57
+ // ---------------------------------------------------------------------------
58
+ // Tests
59
+ // ---------------------------------------------------------------------------
60
+ describe("runEvalRun", () => {
61
+ beforeEach(() => {
62
+ vi.resetAllMocks();
63
+ // Default: evals.json exists with valid content
64
+ mocks.existsSync.mockReturnValue(true);
65
+ mocks.readFileSync.mockReturnValue(JSON.stringify(VALID_EVALS));
66
+ });
67
+ it("prints results table on success", async () => {
68
+ // Mock LLM: first call returns output, subsequent calls judge assertions
69
+ let callCount = 0;
70
+ mocks.generate.mockImplementation(async () => {
71
+ callCount++;
72
+ // First call per case = generate output, remaining = judge assertions
73
+ if (callCount === 1)
74
+ return "AI is great";
75
+ if (callCount <= 3)
76
+ return JSON.stringify({ pass: true, reasoning: "ok" });
77
+ if (callCount === 4)
78
+ return "Edge case handled";
79
+ return JSON.stringify({ pass: false, reasoning: "not quite" });
80
+ });
81
+ const consoleSpy = vi.spyOn(console, "log").mockImplementation(() => { });
82
+ await runEvalRun("/skills/test-skill");
83
+ // Check table was printed
84
+ const output = consoleSpy.mock.calls.map((c) => c[0]).join("\n");
85
+ expect(output).toContain("a1");
86
+ expect(output).toContain("b1");
87
+ consoleSpy.mockRestore();
88
+ });
89
+ it("writes benchmark.json after run", async () => {
90
+ mocks.generate.mockImplementation(async (_sys, prompt) => {
91
+ if (prompt.includes("Test prompt"))
92
+ return "LLM output here";
93
+ return JSON.stringify({ pass: true, reasoning: "ok" });
94
+ });
95
+ vi.spyOn(console, "log").mockImplementation(() => { });
96
+ await runEvalRun("/skills/test-skill");
97
+ expect(mocks.writeFileSync).toHaveBeenCalled();
98
+ const writtenPath = mocks.writeFileSync.mock.calls[0][0];
99
+ expect(writtenPath).toContain("benchmark.json");
100
+ const writtenContent = JSON.parse(mocks.writeFileSync.mock.calls[0][1]);
101
+ expect(writtenContent.skill_name).toBe("test-skill");
102
+ expect(writtenContent.cases).toHaveLength(2);
103
+ vi.restoreAllMocks();
104
+ });
105
+ it("marks error case on LLM failure and continues", async () => {
106
+ let callCount = 0;
107
+ mocks.generate.mockImplementation(async () => {
108
+ callCount++;
109
+ // First case: output generation fails
110
+ if (callCount === 1)
111
+ throw new Error("API timeout");
112
+ // Second case: works fine
113
+ if (callCount === 2)
114
+ return "Edge case handled";
115
+ return JSON.stringify({ pass: true, reasoning: "ok" });
116
+ });
117
+ vi.spyOn(console, "log").mockImplementation(() => { });
118
+ await runEvalRun("/skills/test-skill");
119
+ const writtenContent = JSON.parse(mocks.writeFileSync.mock.calls[0][1]);
120
+ expect(writtenContent.cases[0].status).toBe("error");
121
+ expect(writtenContent.cases[0].error_message).toContain("API timeout");
122
+ expect(writtenContent.cases[1].status).toBe("pass");
123
+ vi.restoreAllMocks();
124
+ });
125
+ it("exits with error for missing evals.json", async () => {
126
+ mocks.existsSync.mockReturnValue(false);
127
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => { });
128
+ const exitSpy = vi
129
+ .spyOn(process, "exit")
130
+ .mockImplementation((() => { }));
131
+ await runEvalRun("/skills/test-skill");
132
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining("No evals.json found"));
133
+ consoleSpy.mockRestore();
134
+ exitSpy.mockRestore();
135
+ });
136
+ it("exits with error for invalid evals.json", async () => {
137
+ mocks.existsSync.mockReturnValue(true);
138
+ mocks.readFileSync.mockReturnValue("{ broken json");
139
+ const consoleSpy = vi.spyOn(console, "error").mockImplementation(() => { });
140
+ const exitSpy = vi
141
+ .spyOn(process, "exit")
142
+ .mockImplementation((() => { }));
143
+ await runEvalRun("/skills/test-skill");
144
+ expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining("Invalid evals.json"));
145
+ consoleSpy.mockRestore();
146
+ exitSpy.mockRestore();
147
+ });
148
+ });
149
+ //# sourceMappingURL=run.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run.test.js","sourceRoot":"","sources":["../../../../src/commands/eval/__tests__/run.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC9B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;IACrB,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE;IACtB,UAAU,EAAE,EAAE,CAAC,EAAE,EAAE;IACnB,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;IAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;CAClB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IACxB,YAAY,EAAE,KAAK,CAAC,YAAY;IAChC,aAAa,EAAE,KAAK,CAAC,aAAa;IAClC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,SAAS,EAAE,KAAK,CAAC,SAAS;CAC3B,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC;QACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,YAAY;KACpB,CAAC;CACH,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;AAEjD,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E,MAAM,WAAW,GAAG;IAClB,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,SAAS,EAAE;gBACzD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,SAAS,EAAE;aACzD;SACF;QACD;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,0BAA0B,EAAE,IAAI,EAAE,SAAS,EAAE;aAChE;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,gDAAgD;QAChD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,yEAAyE;QACzE,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sEAAsE;YACtE,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,aAAa,CAAC;YAC1C,IAAI,SAAS,IAAI,CAAC;gBAAE,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC3E,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,0BAA0B;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAY,EAAE,MAAc,EAAE,EAAE;YACvE,IAAI,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAAE,OAAO,iBAAiB,CAAC;YAC7D,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAChD,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAE7C,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sCAAsC;YACtC,IAAI,SAAS,KAAK,CAAC;gBAAE,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YACpD,0BAA0B;YAC1B,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QACvE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAC/C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,oBAAoB,CAAC,CAC9C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function runEvalCoverage(root: string): Promise<void>;
@@ -0,0 +1,79 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval coverage -- show eval coverage across all skills
3
+ // ---------------------------------------------------------------------------
4
+ import { scanSkills } from "../../eval/skill-scanner.js";
5
+ import { loadAndValidateEvals } from "../../eval/schema.js";
6
+ import { readBenchmark } from "../../eval/benchmark.js";
7
+ import { green, red, yellow, cyan, bold, dim, table } from "../../utils/output.js";
8
+ export async function runEvalCoverage(root) {
9
+ const skills = await scanSkills(root);
10
+ if (skills.length === 0) {
11
+ console.log(dim("No skills found in " + root));
12
+ return;
13
+ }
14
+ const headers = ["PLUGIN", "SKILL", "CASES", "ASSERTIONS", "LAST RUN", "STATUS"];
15
+ const rows = [];
16
+ let missing = 0;
17
+ let pending = 0;
18
+ let passing = 0;
19
+ let failing = 0;
20
+ for (const skill of skills) {
21
+ if (!skill.hasEvals) {
22
+ rows.push([skill.plugin, skill.skill, "-", "-", "-", yellow("MISSING")]);
23
+ missing++;
24
+ continue;
25
+ }
26
+ let evalsFile;
27
+ try {
28
+ evalsFile = loadAndValidateEvals(skill.dir);
29
+ }
30
+ catch {
31
+ rows.push([skill.plugin, skill.skill, "-", "-", "-", red("INVALID")]);
32
+ failing++;
33
+ continue;
34
+ }
35
+ const caseCount = String(evalsFile.evals.length);
36
+ const assertionCount = String(evalsFile.evals.reduce((sum, e) => sum + e.assertions.length, 0));
37
+ const benchmark = await readBenchmark(skill.dir);
38
+ if (!benchmark) {
39
+ rows.push([
40
+ skill.plugin,
41
+ skill.skill,
42
+ caseCount,
43
+ assertionCount,
44
+ "-",
45
+ cyan("PENDING"),
46
+ ]);
47
+ pending++;
48
+ continue;
49
+ }
50
+ const anyFailed = benchmark.cases.some((c) => c.status === "fail" || c.status === "error");
51
+ const lastRun = benchmark.timestamp.split("T")[0];
52
+ if (anyFailed) {
53
+ rows.push([
54
+ skill.plugin,
55
+ skill.skill,
56
+ caseCount,
57
+ assertionCount,
58
+ lastRun,
59
+ red("FAIL"),
60
+ ]);
61
+ failing++;
62
+ }
63
+ else {
64
+ rows.push([
65
+ skill.plugin,
66
+ skill.skill,
67
+ caseCount,
68
+ assertionCount,
69
+ lastRun,
70
+ green("PASS"),
71
+ ]);
72
+ passing++;
73
+ }
74
+ }
75
+ console.log(bold(`\nEval Coverage Report\n`));
76
+ console.log(table(headers, rows));
77
+ console.log(`\n${bold("Summary:")} ${skills.length} skills | ${green(`${passing} pass`)} | ${red(`${failing} fail`)} | ${cyan(`${pending} pending`)} | ${yellow(`${missing} missing`)}`);
78
+ }
79
+ //# sourceMappingURL=coverage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"coverage.js","sourceRoot":"","sources":["../../../src/commands/eval/coverage.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+DAA+D;AAC/D,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,MAAM,6BAA6B,CAAC;AACzD,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AACxD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAEnF,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAY;IAChD,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,MAAM,OAAO,GAAG,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;IACjF,MAAM,IAAI,GAAe,EAAE,CAAC;IAE5B,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC;YACpB,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;YACzE,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,IAAI,SAAS,CAAC;QACd,IAAI,CAAC;YACH,SAAS,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;YACtE,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACjD,MAAM,cAAc,GAAG,MAAM,CAC3B,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CACjE,CAAC;QAEF,MAAM,SAAS,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACjD,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,IAAI,CAAC;gBACR,KAAK,CAAC,MAAM;gBACZ,KAAK,CAAC,KAAK;gBACX,SAAS;gBACT,cAAc;gBACd,GAAG;gBACH,IAAI,CAAC,SAAS,CAAC;aAChB,CAAC,CAAC;YACH,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,IAAI,CACpC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,OAAO,CACnD,CAAC;QACF,MAAM,OAAO,GAAG,SAAS,CAAC,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAElD,IAAI,SAAS,EAAE,CAAC;YACd,IAAI,CAAC,IAAI,CAAC;gBACR,KAAK,CAAC,MAAM;gBACZ,KAAK,CAAC,KAAK;gBACX,SAAS;gBACT,cAAc;gBACd,OAAO;gBACP,GAAG,CAAC,MAAM,CAAC;aACZ,CAAC,CAAC;YACH,OAAO,EAAE,CAAC;QACZ,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,IAAI,CAAC;gBACR,KAAK,CAAC,MAAM;gBACZ,KAAK,CAAC,KAAK;gBACX,SAAS;gBACT,cAAc;gBACd,OAAO;gBACP,KAAK,CAAC,MAAM,CAAC;aACd,CAAC,CAAC;YACH,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAClC,OAAO,CAAC,GAAG,CACT,KAAK,IAAI,CAAC,UAAU,CAAC,IAAI,MAAM,CAAC,MAAM,aAAa,KAAK,CAAC,GAAG,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,GAAG,OAAO,OAAO,CAAC,MAAM,IAAI,CAAC,GAAG,OAAO,UAAU,CAAC,MAAM,MAAM,CAAC,GAAG,OAAO,UAAU,CAAC,EAAE,CAC5K,CAAC;AACJ,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function runEvalGenerateAll(root: string, force: boolean): Promise<void>;
@@ -0,0 +1,64 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval generate-all -- batch scaffold evals.json for all skills
3
+ // ---------------------------------------------------------------------------
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
5
+ import { join } from "node:path";
6
+ import { scanSkills } from "../../eval/skill-scanner.js";
7
+ import { createLlmClient } from "../../eval/llm.js";
8
+ import { buildEvalInitPrompt, parseGeneratedEvals } from "../../eval/prompt-builder.js";
9
+ import { green, red, yellow, bold, dim } from "../../utils/output.js";
10
+ export async function runEvalGenerateAll(root, force) {
11
+ const skills = await scanSkills(root);
12
+ if (skills.length === 0) {
13
+ console.log(dim("No skills found in " + root));
14
+ return;
15
+ }
16
+ const client = createLlmClient();
17
+ let generated = 0;
18
+ let skipped = 0;
19
+ let failed = 0;
20
+ const failedPaths = [];
21
+ for (const skill of skills) {
22
+ const evalsPath = join(skill.dir, "evals", "evals.json");
23
+ // Skip if evals already exist and not forcing
24
+ if (skill.hasEvals && !force) {
25
+ skipped++;
26
+ continue;
27
+ }
28
+ const skillMdPath = join(skill.dir, "SKILL.md");
29
+ if (!existsSync(skillMdPath)) {
30
+ failed++;
31
+ failedPaths.push(`${skill.plugin}/${skill.skill} (no SKILL.md)`);
32
+ continue;
33
+ }
34
+ try {
35
+ const skillContent = readFileSync(skillMdPath, "utf-8");
36
+ const prompt = buildEvalInitPrompt(skillContent);
37
+ const raw = await client.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", prompt);
38
+ const evalsFile = parseGeneratedEvals(raw);
39
+ mkdirSync(join(skill.dir, "evals"), { recursive: true });
40
+ writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
41
+ generated++;
42
+ console.log(green(` Generated: ${skill.plugin}/${skill.skill}`));
43
+ }
44
+ catch (err) {
45
+ failed++;
46
+ failedPaths.push(`${skill.plugin}/${skill.skill}`);
47
+ console.error(red(` Failed: ${skill.plugin}/${skill.skill} - `) +
48
+ dim(err.message));
49
+ }
50
+ }
51
+ // Print summary
52
+ console.log(bold(`\nBatch Generation Summary`));
53
+ console.log(` Scanned: ${skills.length}`);
54
+ console.log(` ${green(`Generated: ${generated}`)}`);
55
+ console.log(` ${yellow(`Skipped: ${skipped}`)}`);
56
+ console.log(` ${failed > 0 ? red(`Failed: ${failed}`) : `Failed: ${failed}`}`);
57
+ if (failedPaths.length > 0) {
58
+ console.log(red("\nFailed skills:"));
59
+ for (const path of failedPaths) {
60
+ console.log(red(` - ${path}`));
61
+ }
62
+ }
63
+ }
64
+ //# sourceMappingURL=generate-all.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generate-all.js","sourceRoot":"","sources":["../../../src/commands/eval/generate-all.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,uEAAuE;AACvE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,6BAA6B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACxF,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,uBAAuB,CAAC;AAEtE,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,KAAc;IAEd,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QAEzD,8CAA8C;QAC9C,IAAI,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,MAAM,EAAE,CAAC;YACT,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,gBAAgB,CAAC,CAAC;YACjE,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACxD,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;YAEjD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,QAAQ,CAC/B,qFAAqF,EACrF,MAAM,CACP,CAAC;YAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;YAE3C,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzD,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,SAAS,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACpE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,EAAE,CAAC;YACT,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACnD,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,KAAK,CAAC;gBAChD,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAC9B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,cAAc,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,YAAY,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,EAAE,CAAC,CAAC;IAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function runEvalInit(skillDir: string, force: boolean): Promise<void>;
@@ -0,0 +1,38 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval init -- scaffold evals.json for a skill using LLM
3
+ // ---------------------------------------------------------------------------
4
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
5
+ import { join } from "node:path";
6
+ import { createLlmClient } from "../../eval/llm.js";
7
+ import { buildEvalInitPrompt, parseGeneratedEvals } from "../../eval/prompt-builder.js";
8
+ import { green, red, dim, yellow } from "../../utils/output.js";
9
+ export async function runEvalInit(skillDir, force) {
10
+ const skillMdPath = join(skillDir, "SKILL.md");
11
+ const evalsDir = join(skillDir, "evals");
12
+ const evalsPath = join(evalsDir, "evals.json");
13
+ // Check SKILL.md exists
14
+ if (!existsSync(skillMdPath)) {
15
+ console.error(red(`SKILL.md not found at ${skillMdPath}`));
16
+ return;
17
+ }
18
+ // Check existing evals.json
19
+ if (existsSync(evalsPath) && !force) {
20
+ console.log(yellow("evals.json already exists, use --force to overwrite"));
21
+ return;
22
+ }
23
+ const skillContent = readFileSync(skillMdPath, "utf-8");
24
+ const prompt = buildEvalInitPrompt(skillContent);
25
+ try {
26
+ const client = createLlmClient();
27
+ const raw = await client.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", prompt);
28
+ const evalsFile = parseGeneratedEvals(raw);
29
+ mkdirSync(evalsDir, { recursive: true });
30
+ writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
31
+ console.log(green(`Created ${evalsPath}`));
32
+ console.log(dim(` ${evalsFile.evals.length} eval cases, ${evalsFile.evals.reduce((sum, e) => sum + e.assertions.length, 0)} assertions`));
33
+ }
34
+ catch (err) {
35
+ console.error(red("Failed to generate evals: ") + dim(err.message));
36
+ }
37
+ }
38
+ //# sourceMappingURL=init.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/eval/init.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACxF,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAEhE,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAgB,EAChB,KAAc;IAEd,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE/C,wBAAwB;IACxB,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yBAAyB,WAAW,EAAE,CAAC,CAAC,CAAC;QAC3D,OAAO;IACT,CAAC;IAED,4BAA4B;IAC5B,IAAI,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CACT,MAAM,CAAC,qDAAqD,CAAC,CAC9D,CAAC;QACF,OAAO;IACT,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACxD,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,QAAQ,CAC/B,qFAAqF,EACrF,MAAM,CACP,CAAC;QAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC;QAE3C,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAEtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,SAAS,CAAC,KAAK,CAAC,MAAM,gBAAgB,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,aAAa,CAAC,CAC9H,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,4BAA4B,CAAC,GAAG,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAChE,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function runEvalRun(skillDir: string): Promise<void>;
@@ -0,0 +1,107 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval run -- execute eval cases and grade assertions
3
+ // ---------------------------------------------------------------------------
4
+ import { loadAndValidateEvals, EvalValidationError } from "../../eval/schema.js";
5
+ import { createLlmClient } from "../../eval/llm.js";
6
+ import { judgeAssertion } from "../../eval/judge.js";
7
+ import { writeBenchmark } from "../../eval/benchmark.js";
8
+ import { green, red, yellow, bold, dim, table } from "../../utils/output.js";
9
+ export async function runEvalRun(skillDir) {
10
+ // Load and validate evals.json
11
+ let evalsFile;
12
+ try {
13
+ evalsFile = loadAndValidateEvals(skillDir);
14
+ }
15
+ catch (err) {
16
+ if (err instanceof EvalValidationError) {
17
+ const firstMsg = err.errors[0]?.message || "";
18
+ if (firstMsg.includes("No evals.json")) {
19
+ console.error(red(`No evals.json found at ${skillDir}/evals/evals.json`));
20
+ }
21
+ else {
22
+ console.error(red(`Invalid evals.json: ${err.message}`));
23
+ }
24
+ }
25
+ else {
26
+ console.error(red(`Error loading evals: ${err.message}`));
27
+ }
28
+ process.exit(1);
29
+ return;
30
+ }
31
+ const client = createLlmClient();
32
+ const model = client.model;
33
+ const benchmarkCases = [];
34
+ const tableRows = [];
35
+ for (const evalCase of evalsFile.evals) {
36
+ try {
37
+ // Step 1: Send prompt to LLM
38
+ const output = await client.generate("You are an AI skill being evaluated. Respond to the prompt as the skill would.", evalCase.prompt);
39
+ // Step 2: Judge each assertion
40
+ const assertionResults = [];
41
+ let passCount = 0;
42
+ for (const assertion of evalCase.assertions) {
43
+ const result = await judgeAssertion(output, assertion, client);
44
+ assertionResults.push(result);
45
+ if (result.pass)
46
+ passCount++;
47
+ const truncatedText = assertion.text.length > 60
48
+ ? assertion.text.slice(0, 57) + "..."
49
+ : assertion.text;
50
+ tableRows.push([
51
+ evalCase.name,
52
+ assertion.id,
53
+ truncatedText,
54
+ result.pass ? green("PASS") : red("FAIL"),
55
+ ]);
56
+ }
57
+ const passRate = evalCase.assertions.length > 0
58
+ ? passCount / evalCase.assertions.length
59
+ : 0;
60
+ const allPassed = passCount === evalCase.assertions.length;
61
+ benchmarkCases.push({
62
+ eval_id: evalCase.id,
63
+ eval_name: evalCase.name,
64
+ status: allPassed ? "pass" : "fail",
65
+ error_message: null,
66
+ pass_rate: passRate,
67
+ assertions: assertionResults,
68
+ });
69
+ }
70
+ catch (err) {
71
+ // Mark case as error, continue with remaining
72
+ benchmarkCases.push({
73
+ eval_id: evalCase.id,
74
+ eval_name: evalCase.name,
75
+ status: "error",
76
+ error_message: err.message,
77
+ pass_rate: 0,
78
+ assertions: [],
79
+ });
80
+ tableRows.push([
81
+ evalCase.name,
82
+ "-",
83
+ dim("Error: " + err.message.slice(0, 50)),
84
+ yellow("ERROR"),
85
+ ]);
86
+ }
87
+ }
88
+ // Print results table
89
+ const headers = ["EVAL", "ASSERTION", "TEXT", "STATUS"];
90
+ console.log(bold(`\nEval Results: ${evalsFile.skill_name}\n`));
91
+ console.log(table(headers, tableRows));
92
+ // Compute summary
93
+ const passed = benchmarkCases.filter((c) => c.status === "pass").length;
94
+ const failed = benchmarkCases.filter((c) => c.status === "fail").length;
95
+ const errors = benchmarkCases.filter((c) => c.status === "error").length;
96
+ console.log(`\n${green(`${passed} passed`)} ${failed > 0 ? red(`${failed} failed`) : ""} ${errors > 0 ? yellow(`${errors} errors`) : ""}`.trim());
97
+ // Write benchmark.json
98
+ const benchmark = {
99
+ timestamp: new Date().toISOString(),
100
+ model,
101
+ skill_name: evalsFile.skill_name,
102
+ cases: benchmarkCases,
103
+ };
104
+ await writeBenchmark(skillDir, benchmark);
105
+ console.log(dim(`\nBenchmark written to ${skillDir}/evals/benchmark.json`));
106
+ }
107
+ //# sourceMappingURL=run.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/commands/eval/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6DAA6D;AAC7D,8EAA8E;AAE9E,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAE7E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,QAAQ,mBAAmB,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,cAAc,GAAoB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAe,EAAE,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,IAAI,CAAC;YACH,6BAA6B;YAC7B,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAClC,gFAAgF,EAChF,QAAQ,CAAC,MAAM,CAChB,CAAC;YAEF,+BAA+B;YAC/B,MAAM,gBAAgB,GAAG,EAAE,CAAC;YAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBAC/D,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC9B,IAAI,MAAM,CAAC,IAAI;oBAAE,SAAS,EAAE,CAAC;gBAE7B,MAAM,aAAa,GACjB,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;oBACxB,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;oBACrC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;gBAErB,SAAS,CAAC,IAAI,CAAC;oBACb,QAAQ,CAAC,IAAI;oBACb,SAAS,CAAC,EAAE;oBACZ,aAAa;oBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;iBAC1C,CAAC,CAAC;YACL,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAC7C,CAAC,CAAC,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACxC,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,SAAS,GAAG,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC;YAE3D,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBACnC,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,8CAA8C;YAC9C,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAG,GAAa,CAAC,OAAO;gBACrC,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,GAAG;gBACH,GAAG,CAAC,SAAS,GAAI,GAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM,CAAC,OAAO,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;IAEvC,kBAAkB;IAClB,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACzE,OAAO,CAAC,GAAG,CACT,KAAK,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,EAAE,CACrI,CAAC;IAEF,uBAAuB;IACvB,MAAM,SAAS,GAAoB;QACjC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK;QACL,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,KAAK,EAAE,cAAc;KACtB,CAAC;IAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,uBAAuB,CAAC,CAAC,CAAC;AAC9E,CAAC"}