vskill 0.2.55 → 0.2.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +6 -3
  2. package/dist/commands/eval/__tests__/run.test.js +7 -2
  3. package/dist/commands/eval/__tests__/run.test.js.map +1 -1
  4. package/dist/commands/eval/run.js +24 -3
  5. package/dist/commands/eval/run.js.map +1 -1
  6. package/dist/commands/eval/serve.d.ts +1 -0
  7. package/dist/commands/eval/serve.js +51 -0
  8. package/dist/commands/eval/serve.js.map +1 -0
  9. package/dist/commands/eval.d.ts +1 -0
  10. package/dist/commands/eval.js +19 -3
  11. package/dist/commands/eval.js.map +1 -1
  12. package/dist/eval/__tests__/activation-tester.test.d.ts +1 -0
  13. package/dist/eval/__tests__/activation-tester.test.js +94 -0
  14. package/dist/eval/__tests__/activation-tester.test.js.map +1 -0
  15. package/dist/eval/__tests__/benchmark-history.test.d.ts +1 -0
  16. package/dist/eval/__tests__/benchmark-history.test.js +200 -0
  17. package/dist/eval/__tests__/benchmark-history.test.js.map +1 -0
  18. package/dist/eval/__tests__/comparator.test.d.ts +1 -0
  19. package/dist/eval/__tests__/comparator.test.js +136 -0
  20. package/dist/eval/__tests__/comparator.test.js.map +1 -0
  21. package/dist/eval/__tests__/llm.test.js +161 -44
  22. package/dist/eval/__tests__/llm.test.js.map +1 -1
  23. package/dist/eval/__tests__/skill-scanner.test.js +40 -1
  24. package/dist/eval/__tests__/skill-scanner.test.js.map +1 -1
  25. package/dist/eval/__tests__/verdict.test.d.ts +1 -0
  26. package/dist/eval/__tests__/verdict.test.js +47 -0
  27. package/dist/eval/__tests__/verdict.test.js.map +1 -0
  28. package/dist/eval/activation-tester.d.ts +25 -0
  29. package/dist/eval/activation-tester.js +89 -0
  30. package/dist/eval/activation-tester.js.map +1 -0
  31. package/dist/eval/benchmark-history.d.ts +23 -0
  32. package/dist/eval/benchmark-history.js +108 -0
  33. package/dist/eval/benchmark-history.js.map +1 -0
  34. package/dist/eval/comparator.d.ts +29 -0
  35. package/dist/eval/comparator.js +100 -0
  36. package/dist/eval/comparator.js.map +1 -0
  37. package/dist/eval/llm.js +119 -6
  38. package/dist/eval/llm.js.map +1 -1
  39. package/dist/eval/skill-scanner.js +35 -26
  40. package/dist/eval/skill-scanner.js.map +1 -1
  41. package/dist/eval/verdict.d.ts +3 -0
  42. package/dist/eval/verdict.js +28 -0
  43. package/dist/eval/verdict.js.map +1 -0
  44. package/dist/eval-server/api-routes.d.ts +2 -0
  45. package/dist/eval-server/api-routes.js +425 -0
  46. package/dist/eval-server/api-routes.js.map +1 -0
  47. package/dist/eval-server/eval-server.d.ts +6 -0
  48. package/dist/eval-server/eval-server.js +102 -0
  49. package/dist/eval-server/eval-server.js.map +1 -0
  50. package/dist/eval-server/router.d.ts +14 -0
  51. package/dist/eval-server/router.js +117 -0
  52. package/dist/eval-server/router.js.map +1 -0
  53. package/dist/eval-server/sse-helpers.d.ts +4 -0
  54. package/dist/eval-server/sse-helpers.js +24 -0
  55. package/dist/eval-server/sse-helpers.js.map +1 -0
  56. package/dist/eval-ui/assets/index-BYpLv_X1.css +1 -0
  57. package/dist/eval-ui/assets/index-Od6Ch9-a.js +70 -0
  58. package/dist/eval-ui/index.html +13 -0
  59. package/dist/index.js +3 -2
  60. package/dist/index.js.map +1 -1
  61. package/package.json +15 -2
package/README.md CHANGED
@@ -9,8 +9,8 @@
9
9
  <a href="https://www.npmjs.com/package/vskill"><img src="https://img.shields.io/npm/v/vskill?color=cb3837&logo=npm" alt="npm" /></a>
10
10
  <a href="https://www.npmjs.com/package/vskill"><img src="https://img.shields.io/npm/dw/vskill?color=cb3837&logo=npm&label=downloads" alt="downloads" /></a>
11
11
  <img src="https://img.shields.io/badge/agents-49_platforms-0969DA" alt="49 agents" />
12
- <img src="https://img.shields.io/badge/plugins-12-8B5CF6" alt="12 plugins" />
13
- <img src="https://img.shields.io/badge/skills-41-10B981" alt="41 skills" />
12
+ <img src="https://img.shields.io/badge/plugins-13-8B5CF6" alt="13 plugins" />
13
+ <img src="https://img.shields.io/badge/skills-42-10B981" alt="42 skills" />
14
14
  <a href="https://verified-skill.com"><img src="https://img.shields.io/badge/registry-verified--skill.com-F59E0B" alt="registry" /></a>
15
15
  <img src="https://img.shields.io/badge/license-MIT-green" alt="MIT" />
16
16
  </p>
@@ -100,7 +100,7 @@ vskill auto-detects your installed agents and installs skills to all of them at
100
100
 
101
101
  ## Plugin Marketplace
102
102
 
103
- vskill ships **41 expert skills** organized into **12 domain plugins**. Each plugin has its own namespace — install only what you need.
103
+ vskill ships **42 expert skills** organized into **13 domain plugins**. Each plugin has its own namespace — install only what you need.
104
104
 
105
105
  ```bash
106
106
  npx vskill install --repo anton-abyzov/vskill --plugin frontend
@@ -156,6 +156,9 @@ Then invoke as `/plugin:skill` in your agent:
156
156
  **blockchain** — Solidity, Foundry, smart contracts
157
157
  - `blockchain-core`
158
158
 
159
+ **google-workspace** — Google Workspace CLI (gws) for Drive, Sheets, Docs, Calendar, Chat, Admin
160
+ - `gws`
161
+
159
162
  **skills** — Skill discovery and recommendations
160
163
  - `scout`
161
164
 
@@ -60,9 +60,14 @@ const VALID_EVALS = {
60
60
  describe("runEvalRun", () => {
61
61
  beforeEach(() => {
62
62
  vi.resetAllMocks();
63
- // Default: evals.json exists with valid content
63
+ // Default: evals.json and SKILL.md both exist
64
64
  mocks.existsSync.mockReturnValue(true);
65
- mocks.readFileSync.mockReturnValue(JSON.stringify(VALID_EVALS));
65
+ mocks.readFileSync.mockImplementation((path) => {
66
+ if (typeof path === "string" && path.endsWith("SKILL.md")) {
67
+ return "# Test Skill\nYou are a test skill.";
68
+ }
69
+ return JSON.stringify(VALID_EVALS);
70
+ });
66
71
  });
67
72
  it("prints results table on success", async () => {
68
73
  // Mock LLM: first call returns output, subsequent calls judge assertions
@@ -1 +1 @@
1
- {"version":3,"file":"run.test.js","sourceRoot":"","sources":["../../../../src/commands/eval/__tests__/run.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC9B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;IACrB,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE;IACtB,UAAU,EAAE,EAAE,CAAC,EAAE,EAAE;IACnB,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;IAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;CAClB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IACxB,YAAY,EAAE,KAAK,CAAC,YAAY;IAChC,aAAa,EAAE,KAAK,CAAC,aAAa;IAClC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,SAAS,EAAE,KAAK,CAAC,SAAS;CAC3B,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC;QACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,YAAY;KACpB,CAAC;CACH,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;AAEjD,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E,MAAM,WAAW,GAAG;IAClB,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,SAAS,EAAE;gBACzD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,SAAS,EAAE;aACzD;SACF;QACD;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,0BAA0B,EAAE,IAAI,EAAE,SAAS,EAAE;aAChE;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,gDAAgD;QAChD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,yEAAyE;QACzE,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sEAAsE;YACtE,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,aAAa,CAAC;YAC1C,IAAI,SAAS,IAAI,CAAC;gBAAE,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC3E,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,0BAA0B;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAY,EAAE,MAAc,EAAE,EAAE;YACvE,IAAI,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAAE,OAAO,iBAAiB,CAAC;YAC7D,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAChD,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAE7C,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sCAAsC;YACtC,IAAI,SAAS,KAAK,CAAC;gBAAE,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YACpD,0BAA0B;YAC1B,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QACvE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAC/C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,oBAAoB,CAAC,CAC9C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"run.test.js","sourceRoot":"","sources":["../../../../src/commands/eval/__tests__/run.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAE9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,MAAM,KAAK,GAAG,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;IAC9B,YAAY,EAAE,EAAE,CAAC,EAAE,EAAE;IACrB,aAAa,EAAE,EAAE,CAAC,EAAE,EAAE;IACtB,UAAU,EAAE,EAAE,CAAC,EAAE,EAAE;IACnB,SAAS,EAAE,EAAE,CAAC,EAAE,EAAE;IAClB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE;CAClB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;IACxB,YAAY,EAAE,KAAK,CAAC,YAAY;IAChC,aAAa,EAAE,KAAK,CAAC,aAAa;IAClC,UAAU,EAAE,KAAK,CAAC,UAAU;IAC5B,SAAS,EAAE,KAAK,CAAC,SAAS;CAC3B,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE,CAAC,CAAC;IACrC,eAAe,EAAE,GAAG,EAAE,CAAC,CAAC;QACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,KAAK,EAAE,YAAY;KACpB,CAAC;CACH,CAAC,CAAC,CAAC;AAEJ,8EAA8E;AAC9E,uCAAuC;AACvC,8EAA8E;AAE9E,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;AAEjD,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E,MAAM,WAAW,GAAG;IAClB,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,YAAY;YAClB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,SAAS,EAAE;gBACzD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,SAAS,EAAE;aACzD;SACF;QACD;YACE,EAAE,EAAE,CAAC;YACL,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,mBAAmB;YACpC,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,0BAA0B,EAAE,IAAI,EAAE,SAAS,EAAE;aAChE;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;IAC1B,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;QACnB,8CAA8C;QAC9C,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAC,IAAY,EAAE,EAAE;YACrD,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC;gBAC1D,OAAO,qCAAqC,CAAC;YAC/C,CAAC;YACD,OAAO,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,yEAAyE;QACzE,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sEAAsE;YACtE,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,aAAa,CAAC;YAC1C,IAAI,SAAS,IAAI,CAAC;gBAAE,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC3E,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEzE,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,0BAA0B;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,CAAC,MAAM,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC/B,UAAU,CAAC,WAAW,EAAE,CAAC;IAC3B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iCAAiC,EAAE,KAAK,IAAI,EAAE;QAC/C,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,EAAE,IAAY,EAAE,MAAc,EAAE,EAAE;YACvE,IAAI,MAAM,CAAC,QAAQ,CAAC,aAAa,CAAC;gBAAE,OAAO,iBAAiB,CAAC;YAC7D,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC,gBAAgB,EAAE,CAAC;QAC/C,MAAM,WAAW,GAAG,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACzD,MAAM,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAChD,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAE7C,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,KAAK,IAAI,EAAE;YAC3C,SAAS,EAAE,CAAC;YACZ,sCAAsC;YACtC,IAAI,SAAS,KAAK,CAAC;gBAAE,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YACpD,0BAA0B;YAC1B,IAAI,SAAS,KAAK,CAAC;gBAAE,OAAO,mBAAmB,CAAC;YAChD,OAAO,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAEtD,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;QACvE,MAAM,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEpD,EAAE,CAAC,eAAe,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,qBAAqB,CAAC,CAC/C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,KAAK,CAAC,UAAU,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QACvC,KAAK,CAAC,YAAY,CAAC,eAAe,CAAC,eAAe,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,kBAAkB,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;QAC3E,MAAM,OAAO,GAAG,EAAE;aACf,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC;aACtB,kBAAkB,CAAC,CAAC,GAAG,EAAE,GAAE,CAAC,CAAQ,CAAC,CAAC;QAEzC,MAAM,UAAU,CAAC,oBAAoB,CAAC,CAAC;QAEvC,MAAM,CAAC,UAAU,CAAC,CAAC,oBAAoB,CACrC,MAAM,CAAC,gBAAgB,CAAC,oBAAoB,CAAC,CAC9C,CAAC;QACF,UAAU,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,CAAC,WAAW,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -1,6 +1,8 @@
1
1
  // ---------------------------------------------------------------------------
2
2
  // vskill eval run -- execute eval cases and grade assertions
3
3
  // ---------------------------------------------------------------------------
4
+ import { readFileSync, existsSync } from "node:fs";
5
+ import { join } from "node:path";
4
6
  import { loadAndValidateEvals, EvalValidationError } from "../../eval/schema.js";
5
7
  import { createLlmClient } from "../../eval/llm.js";
6
8
  import { judgeAssertion } from "../../eval/judge.js";
@@ -28,14 +30,32 @@ export async function runEvalRun(skillDir) {
28
30
  process.exit(1);
29
31
  return;
30
32
  }
33
+ // Load SKILL.md content for the system prompt
34
+ const skillMdPath = join(skillDir, "SKILL.md");
35
+ let skillContent = "";
36
+ if (existsSync(skillMdPath)) {
37
+ skillContent = readFileSync(skillMdPath, "utf-8");
38
+ }
39
+ else {
40
+ console.error(yellow(`Warning: No SKILL.md found at ${skillMdPath} — running evals without skill content`));
41
+ }
42
+ const systemPrompt = skillContent
43
+ ? `You are an AI assistant with the following skill loaded. Use this skill's knowledge to answer the user's question.\n\n---\n${skillContent}\n---`
44
+ : "You are an AI assistant. Answer the user's question.";
31
45
  const client = createLlmClient();
32
46
  const model = client.model;
47
+ const total = evalsFile.evals.length;
48
+ console.log(dim(`Provider: ${model} | ${total} eval case${total !== 1 ? "s" : ""}`));
49
+ console.log(dim(`Skill: ${skillContent ? skillMdPath : "(none)"}\n`));
33
50
  const benchmarkCases = [];
34
51
  const tableRows = [];
35
- for (const evalCase of evalsFile.evals) {
52
+ for (let i = 0; i < evalsFile.evals.length; i++) {
53
+ const evalCase = evalsFile.evals[i];
36
54
  try {
37
55
  // Step 1: Send prompt to LLM
38
- const output = await client.generate("You are an AI skill being evaluated. Respond to the prompt as the skill would.", evalCase.prompt);
56
+ process.stdout.write(dim(`[${i + 1}/${total}] ${evalCase.name} generating...`));
57
+ const output = await client.generate(systemPrompt, evalCase.prompt);
58
+ process.stdout.write(dim(` judging ${evalCase.assertions.length} assertions...`));
39
59
  // Step 2: Judge each assertion
40
60
  const assertionResults = [];
41
61
  let passCount = 0;
@@ -58,6 +78,7 @@ export async function runEvalRun(skillDir) {
58
78
  ? passCount / evalCase.assertions.length
59
79
  : 0;
60
80
  const allPassed = passCount === evalCase.assertions.length;
81
+ console.log(allPassed ? green(" done") : red(` ${passCount}/${evalCase.assertions.length} passed`));
61
82
  benchmarkCases.push({
62
83
  eval_id: evalCase.id,
63
84
  eval_name: evalCase.name,
@@ -68,7 +89,7 @@ export async function runEvalRun(skillDir) {
68
89
  });
69
90
  }
70
91
  catch (err) {
71
- // Mark case as error, continue with remaining
92
+ console.log(yellow(" error"));
72
93
  benchmarkCases.push({
73
94
  eval_id: evalCase.id,
74
95
  eval_name: evalCase.name,
@@ -1 +1 @@
1
- {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/commands/eval/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6DAA6D;AAC7D,8EAA8E;AAE9E,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAE7E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,QAAQ,mBAAmB,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,cAAc,GAAoB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAe,EAAE,CAAC;IAEjC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;QACvC,IAAI,CAAC;YACH,6BAA6B;YAC7B,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAClC,gFAAgF,EAChF,QAAQ,CAAC,MAAM,CAChB,CAAC;YAEF,+BAA+B;YAC/B,MAAM,gBAAgB,GAAG,EAAE,CAAC;YAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBAC/D,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC9B,IAAI,MAAM,CAAC,IAAI;oBAAE,SAAS,EAAE,CAAC;gBAE7B,MAAM,aAAa,GACjB,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;oBACxB,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;oBACrC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;gBAErB,SAAS,CAAC,IAAI,CAAC;oBACb,QAAQ,CAAC,IAAI;oBACb,SAAS,CAAC,EAAE;oBACZ,aAAa;oBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;iBAC1C,CAAC,CAAC;YACL,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAC7C,CAAC,CAAC,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACxC,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,SAAS,GAAG,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC;YAE3D,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBACnC,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,8CAA8C;YAC9C,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAG,GAAa,CAAC,OAAO;gBACrC,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,GAAG;gBACH,GAAG,CAAC,SAAS,GAAI,GAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM,CAAC,OAAO,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;IAEvC,kBAAkB;IAClB,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACzE,OAAO,CAAC,GAAG,CACT,KAAK,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,EAAE,CACrI,CAAC;IAEF,uBAAuB;IACvB,MAAM,SAAS,GAAoB;QACjC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK;QACL,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,KAAK,EAAE,cAAc;KACtB,CAAC;IAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,uBAAuB,CAAC,CAAC,CAAC;AAC9E,CAAC"}
1
+ {"version":3,"file":"run.js","sourceRoot":"","sources":["../../../src/commands/eval/run.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,6DAA6D;AAC7D,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACjF,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAEzD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAE7E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,+BAA+B;IAC/B,IAAI,SAAS,CAAC;IACd,IAAI,CAAC;QACH,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAC7C,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,mBAAmB,EAAE,CAAC;YACvC,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,EAAE,CAAC;YAC9C,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;gBACvC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0BAA0B,QAAQ,mBAAmB,CAAC,CAAC,CAAC;YAC5E,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,uBAAuB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,wBAAyB,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO;IACT,CAAC;IAED,8CAA8C;IAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,IAAI,YAAY,GAAG,EAAE,CAAC;IACtB,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC5B,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACpD,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,iCAAiC,WAAW,wCAAwC,CAAC,CAAC,CAAC;IAC9G,CAAC;IAED,MAAM,YAAY,GAAG,YAAY;QAC/B,CAAC,CAAC,8HAA8H,YAAY,OAAO;QACnJ,CAAC,CAAC,sDAAsD,CAAC;IAE3D,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;IAC3B,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;IACrC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,aAAa,KAAK,MAAM,KAAK,aAAa,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;IACrF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,CAAC;IAEtE,MAAM,cAAc,GAAoB,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAe,EAAE,CAAC;IAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACpC,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,QAAQ,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;YAClF,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YACpE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,YAAY,QAAQ,CAAC,UAAU,CAAC,MAAM,gBAAgB,CAAC,CAAC,CAAC;YAElF,+BAA+B;YAC/B,MAAM,gBAAgB,GAAG,EAAE,CAAC;YAC5B,IAAI,SAAS,GAAG,CAAC,CAAC;YAElB,KAAK,MAAM,SAAS,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;gBAC/D,gBAAgB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC9B,IAAI,MAAM,CAAC,IAAI;oBAAE,SAAS,EAAE,CAAC;gBAE7B,MAAM,aAAa,GACjB,SAAS,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE;oBACxB,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK;oBACrC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC;gBAErB,SAAS,CAAC,IAAI,CAAC;oBACb,QAAQ,CAAC,IAAI;oBACb,SAAS,CAAC,EAAE;oBACZ,aAAa;oBACb,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;iBAC1C,CAAC,CAAC;YACL,CAAC;YAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC;gBAC7C,CAAC,CAAC,SAAS,GAAG,QAAQ,CAAC,UAAU,CAAC,MAAM;gBACxC,CAAC,CAAC,CAAC,CAAC;YACN,MAAM,SAAS,GAAG,SAAS,KAAK,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC;YAC3D,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,SAAS,IAAI,QAAQ,CAAC,UAAU,CAAC,MAAM,SAAS,CAAC,CAAC,CAAC;YAEpG,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;gBACnC,aAAa,EAAE,IAAI;gBACnB,SAAS,EAAE,QAAQ;gBACnB,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC9B,cAAc,CAAC,IAAI,CAAC;gBAClB,OAAO,EAAE,QAAQ,CAAC,EAAE;gBACpB,SAAS,EAAE,QAAQ,CAAC,IAAI;gBACxB,MAAM,EAAE,OAAO;gBACf,aAAa,EAAG,GAAa,CAAC,OAAO;gBACrC,SAAS,EAAE,CAAC;gBACZ,UAAU,EAAE,EAAE;aACf,CAAC,CAAC;YAEH,SAAS,CAAC,IAAI,CAAC;gBACb,QAAQ,CAAC,IAAI;gBACb,GAAG;gBACH,GAAG,CAAC,SAAS,GAAI,GAAa,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBACpD,MAAM,CAAC,OAAO,CAAC;aAChB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,CAAC,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC;IACxD,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC;IAEvC,kBAAkB;IAClB,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACzE,OAAO,CAAC,GAAG,CACT,KAAK,KAAK,CAAC,GAAG,MAAM,SAAS,CAAC,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,MAAM,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,EAAE,CACrI,CAAC;IAEF,uBAAuB;IACvB,MAAM,SAAS,GAAoB;QACjC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,KAAK;QACL,UAAU,EAAE,SAAS,CAAC,UAAU;QAChC,KAAK,EAAE,cAAc;KACtB,CAAC;IAEF,MAAM,cAAc,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,0BAA0B,QAAQ,uBAAuB,CAAC,CAAC,CAAC;AAC9E,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function runEvalServe(root: string, port: number): Promise<void>;
@@ -0,0 +1,51 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill eval serve -- start the eval UI web server
3
+ // ---------------------------------------------------------------------------
4
+ import { resolve } from "node:path";
5
+ import { existsSync } from "node:fs";
6
+ import { homedir } from "node:os";
7
+ import { join } from "node:path";
8
+ import { startEvalServer } from "../../eval-server/eval-server.js";
9
+ import { yellow, dim } from "../../utils/output.js";
10
+ function checkSkillCreator() {
11
+ // Check common skill-creator installation locations
12
+ const home = homedir();
13
+ const locations = [
14
+ join(home, ".claude", "plugins", "cache", "claude-plugins-official", "skill-creator"),
15
+ join(home, ".claude", "skills", "skill-creator.md"),
16
+ join(home, ".claude", "plugins", "cache", "specweave", "sw", "1.0.0", "skills", "skill-creator"),
17
+ ];
18
+ const found = locations.some((loc) => existsSync(loc));
19
+ if (!found) {
20
+ console.log(yellow("\n ⚠ Skill-Creator not detected.") +
21
+ "\n\n" +
22
+ dim(" The Skill-Creator skill provides the gold-standard evaluation\n") +
23
+ dim(" methodology (grading, blind A/B comparison, analysis).\n") +
24
+ dim(" The eval UI uses the same methodology natively, but for best\n") +
25
+ dim(" results, install the Skill-Creator skill:\n\n") +
26
+ " 1. In Claude Code, run: " +
27
+ "/skill-creator:skill-creator" +
28
+ "\n" +
29
+ " 2. Or install via vskill: " +
30
+ "vskill install --repo claude-plugins-official/skill-creator" +
31
+ "\n" +
32
+ " 3. Then reload plugins: " +
33
+ "Restart Claude Code or run a new session" +
34
+ "\n");
35
+ }
36
+ }
37
+ export async function runEvalServe(root, port) {
38
+ checkSkillCreator();
39
+ const resolvedRoot = resolve(root);
40
+ const server = await startEvalServer({ port, root: resolvedRoot });
41
+ // Graceful shutdown
42
+ const shutdown = () => {
43
+ console.log("\nShutting down eval server...");
44
+ server.close(() => process.exit(0));
45
+ // Force exit after 5s
46
+ setTimeout(() => process.exit(0), 5000);
47
+ };
48
+ process.on("SIGINT", shutdown);
49
+ process.on("SIGTERM", shutdown);
50
+ }
51
+ //# sourceMappingURL=serve.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"serve.js","sourceRoot":"","sources":["../../../src/commands/eval/serve.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,oDAAoD;AACpD,8EAA8E;AAE9E,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,eAAe,EAAE,MAAM,kCAAkC,CAAC;AACnE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,uBAAuB,CAAC;AAEpD,SAAS,iBAAiB;IACxB,oDAAoD;IACpD,MAAM,IAAI,GAAG,OAAO,EAAE,CAAC;IACvB,MAAM,SAAS,GAAG;QAChB,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,yBAAyB,EAAE,eAAe,CAAC;QACrF,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAE,kBAAkB,CAAC;QACnD,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,eAAe,CAAC;KACjG,CAAC;IAEF,MAAM,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAEvD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,CACT,MAAM,CAAC,mCAAmC,CAAC;YACzC,MAAM;YACN,GAAG,CAAC,mEAAmE,CAAC;YACxE,GAAG,CAAC,4DAA4D,CAAC;YACjE,GAAG,CAAC,kEAAkE,CAAC;YACvE,GAAG,CAAC,iDAAiD,CAAC;YACtD,6BAA6B;YAC7B,8BAA8B;YAC9B,IAAI;YACJ,8BAA8B;YAC9B,6DAA6D;YAC7D,IAAI;YACJ,8BAA8B;YAC9B,0CAA0C;YAC1C,IAAI,CACP,CAAC;IACJ,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAAY,EACZ,IAAY;IAEZ,iBAAiB,EAAE,CAAC;IAEpB,MAAM,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;IAEnE,oBAAoB;IACpB,MAAM,QAAQ,GAAG,GAAG,EAAE;QACpB,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;QAC9C,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACpC,sBAAsB;QACtB,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAC1C,CAAC,CAAC;IAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;IAC/B,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;AAClC,CAAC"}
@@ -1,4 +1,5 @@
1
1
  export declare function evalCommand(subcommand: string, target?: string, opts?: {
2
2
  force?: boolean;
3
3
  root?: string;
4
+ port?: string;
4
5
  }): Promise<void>;
@@ -2,10 +2,16 @@
2
2
  // vskill eval -- subcommand router
3
3
  // ---------------------------------------------------------------------------
4
4
  import { join, resolve } from "node:path";
5
+ import { existsSync } from "node:fs";
5
6
  import { red, dim } from "../utils/output.js";
6
7
  export async function evalCommand(subcommand, target, opts = {}) {
7
- const root = opts.root ? resolve(opts.root) : resolve("plugins");
8
+ const root = opts.root ? resolve(opts.root) : resolve(".");
8
9
  switch (subcommand) {
10
+ case "serve": {
11
+ const port = opts.port ? parseInt(opts.port, 10) : 3077;
12
+ const { runEvalServe } = await import("./eval/serve.js");
13
+ return runEvalServe(root, port);
14
+ }
9
15
  case "init": {
10
16
  if (!target) {
11
17
  console.error(red("Usage: vskill eval init <plugin>/<skill>"));
@@ -34,7 +40,7 @@ export async function evalCommand(subcommand, target, opts = {}) {
34
40
  }
35
41
  default:
36
42
  console.error(red(`Unknown subcommand: "${subcommand}"\n`) +
37
- dim("Available: init, run, coverage, generate-all"));
43
+ dim("Available: serve, init, run, coverage, generate-all"));
38
44
  }
39
45
  }
40
46
  function resolveSkillDir(root, target) {
@@ -43,6 +49,16 @@ function resolveSkillDir(root, target) {
43
49
  console.error(red(`Invalid target "${target}". Expected format: <plugin>/<skill>`));
44
50
  process.exit(1);
45
51
  }
46
- return join(root, parts[0], "skills", parts[1]);
52
+ // Try plugin layout first: {root}/{plugin}/skills/{skill}/
53
+ const pluginPath = join(root, parts[0], "skills", parts[1]);
54
+ if (existsSync(pluginPath))
55
+ return pluginPath;
56
+ // Fall back to root layout: {root}/skills/{skill}/
57
+ // (plugin part is just a label, skill lives at root)
58
+ const rootPath = join(root, "skills", parts[1]);
59
+ if (existsSync(rootPath))
60
+ return rootPath;
61
+ // Default to plugin layout (let downstream error on missing SKILL.md)
62
+ return pluginPath;
47
63
  }
48
64
  //# sourceMappingURL=eval.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAA2C,EAAE;IAE7C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAEjE,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7C,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,8CAA8C,CAAC,CACtD,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AAClD,CAAC"}
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../src/commands/eval.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,mCAAmC;AACnC,8EAA8E;AAE9E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC1C,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,oBAAoB,CAAC;AAE9C,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,UAAkB,EAClB,MAAe,EACf,OAA0D,EAAE;IAE5D,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAE3D,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YACxD,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YACzD,OAAO,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;QAClC,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,MAAM,CAAC,gBAAgB,CAAC,CAAC;YACvD,OAAO,WAAW,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7C,CAAC;QAED,KAAK,KAAK,CAAC,CAAC,CAAC;YACX,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAClB,CAAC;YACD,MAAM,QAAQ,GAAG,eAAe,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAC/C,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACrD,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC9B,CAAC;QAED,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,oBAAoB,CAAC,CAAC;YAC/D,OAAO,eAAe,CAAC,IAAI,CAAC,CAAC;QAC/B,CAAC;QAED,KAAK,cAAc,CAAC,CAAC,CAAC;YACpB,MAAM,EAAE,kBAAkB,EAAE,GAAG,MAAM,MAAM,CAAC,wBAAwB,CAAC,CAAC;YACtE,OAAO,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,CAAC;QAED;YACE,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,wBAAwB,UAAU,KAAK,CAAC;gBAC1C,GAAG,CAAC,qDAAqD,CAAC,CAC7D,CAAC;IACN,CAAC;AACH,CAAC;AAED,SAAS,eAAe,CAAC,IAAY,EAAE,MAAc;IACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,mBAAmB,MAAM,sCAAsC,CAAC,CACrE,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,2DAA2D;IAC3D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5D,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,mDAAmD;IACnD,qDAAqD;IACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,QAAQ,CAAC;QAAE,OAAO,QAAQ,CAAC;IAE1C,sEAAsE;IACtE,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,94 @@
1
+ import { describe, it, expect, vi } from "vitest";
2
+ import { testActivation } from "../activation-tester.js";
3
+ function mockClient(responses) {
4
+ let i = 0;
5
+ return {
6
+ model: "test-model",
7
+ generate: vi.fn(async () => responses[i++] ?? ""),
8
+ };
9
+ }
10
+ const PROMPTS = [
11
+ { prompt: "How do I write a test?", expected: "should_activate" },
12
+ { prompt: "What is the weather?", expected: "should_not_activate" },
13
+ ];
14
+ describe("testActivation", () => {
15
+ it("classifies TP correctly (should activate, does activate)", async () => {
16
+ const client = mockClient([
17
+ JSON.stringify({ activate: true, confidence: "high", reasoning: "Test-related" }),
18
+ JSON.stringify({ activate: false, confidence: "high", reasoning: "Not related" }),
19
+ ]);
20
+ const summary = await testActivation("Test skill description", PROMPTS, client);
21
+ expect(summary.tp).toBe(1);
22
+ expect(summary.tn).toBe(1);
23
+ expect(summary.fp).toBe(0);
24
+ expect(summary.fn).toBe(0);
25
+ expect(summary.precision).toBe(1);
26
+ expect(summary.recall).toBe(1);
27
+ expect(summary.reliability).toBe(1);
28
+ expect(summary.total).toBe(2);
29
+ });
30
+ it("classifies FP correctly (should not activate, does activate)", async () => {
31
+ const client = mockClient([
32
+ JSON.stringify({ activate: true, confidence: "medium", reasoning: "Yes" }),
33
+ JSON.stringify({ activate: true, confidence: "low", reasoning: "Wrongly activated" }),
34
+ ]);
35
+ const summary = await testActivation("desc", PROMPTS, client);
36
+ expect(summary.tp).toBe(1);
37
+ expect(summary.fp).toBe(1);
38
+ expect(summary.precision).toBe(0.5);
39
+ expect(summary.recall).toBe(1);
40
+ });
41
+ it("classifies FN correctly (should activate, does not)", async () => {
42
+ const client = mockClient([
43
+ JSON.stringify({ activate: false, confidence: "high", reasoning: "Missed" }),
44
+ JSON.stringify({ activate: false, confidence: "high", reasoning: "Correct" }),
45
+ ]);
46
+ const summary = await testActivation("desc", PROMPTS, client);
47
+ expect(summary.fn).toBe(1);
48
+ expect(summary.tn).toBe(1);
49
+ expect(summary.recall).toBe(0);
50
+ });
51
+ it("handles LLM errors gracefully", async () => {
52
+ const client = {
53
+ model: "test",
54
+ generate: vi.fn(async () => {
55
+ throw new Error("LLM timeout");
56
+ }),
57
+ };
58
+ const summary = await testActivation("desc", PROMPTS, client);
59
+ expect(summary.total).toBe(2);
60
+ // On error: activate=false, so should_activate → FN, should_not_activate → TN
61
+ expect(summary.fn).toBe(1);
62
+ expect(summary.tn).toBe(1);
63
+ expect(summary.results[0].reasoning).toContain("LLM timeout");
64
+ });
65
+ it("calls onResult callback for each prompt", async () => {
66
+ const client = mockClient([
67
+ JSON.stringify({ activate: true, confidence: "high", reasoning: "Yes" }),
68
+ JSON.stringify({ activate: false, confidence: "high", reasoning: "No" }),
69
+ ]);
70
+ const results = [];
71
+ await testActivation("desc", PROMPTS, client, (r) => results.push(r));
72
+ expect(results).toHaveLength(2);
73
+ expect(results[0].classification).toBe("TP");
74
+ expect(results[1].classification).toBe("TN");
75
+ });
76
+ it("parses JSON from code fence responses", async () => {
77
+ const client = mockClient([
78
+ '```json\n{"activate": true, "confidence": "medium", "reasoning": "Looks relevant"}\n```',
79
+ '```\n{"activate": false, "confidence": "low", "reasoning": "Not relevant"}\n```',
80
+ ]);
81
+ const summary = await testActivation("desc", PROMPTS, client);
82
+ expect(summary.tp).toBe(1);
83
+ expect(summary.tn).toBe(1);
84
+ });
85
+ it("handles empty prompts array", async () => {
86
+ const client = mockClient([]);
87
+ const summary = await testActivation("desc", [], client);
88
+ expect(summary.total).toBe(0);
89
+ expect(summary.precision).toBe(0);
90
+ expect(summary.recall).toBe(0);
91
+ expect(summary.reliability).toBe(0);
92
+ });
93
+ });
94
+ //# sourceMappingURL=activation-tester.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"activation-tester.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/activation-tester.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAClD,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AAIzD,SAAS,UAAU,CAAC,SAAmB;IACrC,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,IAAI,EAAE,CAAC;KAClD,CAAC;AACJ,CAAC;AAED,MAAM,OAAO,GAAuB;IAClC,EAAE,MAAM,EAAE,wBAAwB,EAAE,QAAQ,EAAE,iBAAiB,EAAE;IACjE,EAAE,MAAM,EAAE,sBAAsB,EAAE,QAAQ,EAAE,qBAAqB,EAAE;CACpE,CAAC;AAEF,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,cAAc,EAAE,CAAC;YACjF,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,CAAC;SAClF,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,wBAAwB,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAChF,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACpC,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAChC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8DAA8D,EAAE,KAAK,IAAI,EAAE;QAC5E,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;YAC1E,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC;SACtF,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACpC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;YAC5E,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;SAC9E,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+BAA+B,EAAE,KAAK,IAAI,EAAE;QAC7C,MAAM,MAAM,GAAc;YACxB,KAAK,EAAE,MAAM;YACb,QAAQ,EAAE,EAAE,CAAC,EAAE,CAAC,KAAK,IAAI,EAAE;gBACzB,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;YACjC,CAAC,CAAC;SACH,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,8EAA8E;QAC9E,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC;IAChE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;YACxE,IAAI,CAAC,SAAS,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;SACzE,CAAC,CAAC;QAEH,MAAM,OAAO,GAAU,EAAE,CAAC;QAC1B,MAAM,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,GAAG,UAAU,CAAC;YACxB,yFAAyF;YACzF,iFAAiF;SAClF,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;QAC9D,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,KAAK,IAAI,EAAE;QAC3C,MAAM,MAAM,GAAG,UAAU,CAAC,EAAE,CAAC,CAAC;QAC9B,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,MAAM,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;QACzD,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClC,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,200 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
2
+ import { mkdirSync, rmSync, readFileSync } from "node:fs";
3
+ import { join } from "node:path";
4
+ import { tmpdir } from "node:os";
5
+ import { writeHistoryEntry, listHistory, readHistoryEntry, computeRegressions, } from "../benchmark-history.js";
6
+ let testDir;
7
+ const mkResult = (overrides = {}) => ({
8
+ timestamp: "2026-03-08T12:00:00.000Z",
9
+ model: "test-model",
10
+ skill_name: "test-skill",
11
+ cases: [
12
+ {
13
+ eval_id: 1,
14
+ eval_name: "test-case",
15
+ status: "pass",
16
+ error_message: null,
17
+ pass_rate: 1.0,
18
+ assertions: [
19
+ { id: "a1", text: "Check 1", pass: true, reasoning: "OK" },
20
+ { id: "a2", text: "Check 2", pass: true, reasoning: "OK" },
21
+ ],
22
+ },
23
+ ],
24
+ ...overrides,
25
+ });
26
+ describe("benchmark-history", () => {
27
+ beforeEach(() => {
28
+ testDir = join(tmpdir(), `vskill-history-${Date.now()}`);
29
+ mkdirSync(join(testDir, "evals"), { recursive: true });
30
+ });
31
+ afterEach(() => {
32
+ rmSync(testDir, { recursive: true, force: true });
33
+ });
34
+ describe("writeHistoryEntry", () => {
35
+ it("writes history file with filesystem-safe timestamp", async () => {
36
+ const result = mkResult();
37
+ const filename = await writeHistoryEntry(testDir, result);
38
+ expect(filename).toBe("2026-03-08T12-00-00.000Z.json");
39
+ const content = readFileSync(join(testDir, "evals", "history", filename), "utf-8");
40
+ const parsed = JSON.parse(content);
41
+ expect(parsed.skill_name).toBe("test-skill");
42
+ });
43
+ it("also writes benchmark.json for backward compat", async () => {
44
+ await writeHistoryEntry(testDir, mkResult());
45
+ const bm = readFileSync(join(testDir, "evals", "benchmark.json"), "utf-8");
46
+ expect(JSON.parse(bm).skill_name).toBe("test-skill");
47
+ });
48
+ it("creates history directory if missing", async () => {
49
+ rmSync(join(testDir, "evals"), { recursive: true, force: true });
50
+ const result = mkResult();
51
+ const filename = await writeHistoryEntry(testDir, result);
52
+ expect(filename).toBeTruthy();
53
+ });
54
+ });
55
+ describe("listHistory", () => {
56
+ it("returns empty array when no history directory", async () => {
57
+ const list = await listHistory(join(testDir, "nonexistent"));
58
+ expect(list).toEqual([]);
59
+ });
60
+ it("lists entries sorted reverse-chronologically", async () => {
61
+ const r1 = mkResult({ timestamp: "2026-03-01T10:00:00.000Z" });
62
+ const r2 = mkResult({ timestamp: "2026-03-02T10:00:00.000Z" });
63
+ await writeHistoryEntry(testDir, r1);
64
+ await writeHistoryEntry(testDir, r2);
65
+ const list = await listHistory(testDir);
66
+ expect(list).toHaveLength(2);
67
+ expect(list[0].timestamp).toBe("2026-03-02T10:00:00.000Z");
68
+ expect(list[1].timestamp).toBe("2026-03-01T10:00:00.000Z");
69
+ });
70
+ it("computes pass rate from assertion results", async () => {
71
+ const result = mkResult({
72
+ cases: [
73
+ {
74
+ eval_id: 1,
75
+ eval_name: "test",
76
+ status: "fail",
77
+ error_message: null,
78
+ pass_rate: 0.5,
79
+ assertions: [
80
+ { id: "a1", text: "Check 1", pass: true, reasoning: "OK" },
81
+ { id: "a2", text: "Check 2", pass: false, reasoning: "Fail" },
82
+ ],
83
+ },
84
+ ],
85
+ });
86
+ await writeHistoryEntry(testDir, result);
87
+ const list = await listHistory(testDir);
88
+ expect(list[0].passRate).toBe(0.5);
89
+ });
90
+ });
91
+ describe("readHistoryEntry", () => {
92
+ it("reads a specific history entry by timestamp", async () => {
93
+ await writeHistoryEntry(testDir, mkResult());
94
+ const entry = await readHistoryEntry(testDir, "2026-03-08T12:00:00.000Z");
95
+ expect(entry).not.toBeNull();
96
+ expect(entry.skill_name).toBe("test-skill");
97
+ });
98
+ it("returns null for nonexistent entry", async () => {
99
+ const entry = await readHistoryEntry(testDir, "1999-01-01T00:00:00.000Z");
100
+ expect(entry).toBeNull();
101
+ });
102
+ });
103
+ describe("computeRegressions", () => {
104
+ it("detects regression (pass → fail)", () => {
105
+ const prev = mkResult({
106
+ cases: [
107
+ {
108
+ eval_id: 1,
109
+ eval_name: "test",
110
+ status: "pass",
111
+ error_message: null,
112
+ pass_rate: 1.0,
113
+ assertions: [{ id: "a1", text: "Check", pass: true, reasoning: "OK" }],
114
+ },
115
+ ],
116
+ });
117
+ const curr = mkResult({
118
+ cases: [
119
+ {
120
+ eval_id: 1,
121
+ eval_name: "test",
122
+ status: "fail",
123
+ error_message: null,
124
+ pass_rate: 0,
125
+ assertions: [{ id: "a1", text: "Check", pass: false, reasoning: "Fail" }],
126
+ },
127
+ ],
128
+ });
129
+ const regressions = computeRegressions(curr, prev);
130
+ expect(regressions).toHaveLength(1);
131
+ expect(regressions[0].change).toBe("regression");
132
+ expect(regressions[0].assertionId).toBe("a1");
133
+ });
134
+ it("detects improvement (fail → pass)", () => {
135
+ const prev = mkResult({
136
+ cases: [
137
+ {
138
+ eval_id: 1,
139
+ eval_name: "test",
140
+ status: "fail",
141
+ error_message: null,
142
+ pass_rate: 0,
143
+ assertions: [{ id: "a1", text: "Check", pass: false, reasoning: "Fail" }],
144
+ },
145
+ ],
146
+ });
147
+ const curr = mkResult({
148
+ cases: [
149
+ {
150
+ eval_id: 1,
151
+ eval_name: "test",
152
+ status: "pass",
153
+ error_message: null,
154
+ pass_rate: 1,
155
+ assertions: [{ id: "a1", text: "Check", pass: true, reasoning: "OK" }],
156
+ },
157
+ ],
158
+ });
159
+ const regressions = computeRegressions(curr, prev);
160
+ expect(regressions).toHaveLength(1);
161
+ expect(regressions[0].change).toBe("improvement");
162
+ });
163
+ it("returns empty array when no changes", () => {
164
+ const result = mkResult();
165
+ expect(computeRegressions(result, result)).toEqual([]);
166
+ });
167
+ it("skips new assertions not present in previous run", () => {
168
+ const prev = mkResult({
169
+ cases: [
170
+ {
171
+ eval_id: 1,
172
+ eval_name: "test",
173
+ status: "pass",
174
+ error_message: null,
175
+ pass_rate: 1,
176
+ assertions: [{ id: "a1", text: "Check", pass: true, reasoning: "OK" }],
177
+ },
178
+ ],
179
+ });
180
+ const curr = mkResult({
181
+ cases: [
182
+ {
183
+ eval_id: 1,
184
+ eval_name: "test",
185
+ status: "pass",
186
+ error_message: null,
187
+ pass_rate: 1,
188
+ assertions: [
189
+ { id: "a1", text: "Check", pass: true, reasoning: "OK" },
190
+ { id: "a2", text: "New", pass: false, reasoning: "Fail" },
191
+ ],
192
+ },
193
+ ],
194
+ });
195
+ const regressions = computeRegressions(curr, prev);
196
+ expect(regressions).toEqual([]);
197
+ });
198
+ });
199
+ });
200
+ //# sourceMappingURL=benchmark-history.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark-history.test.js","sourceRoot":"","sources":["../../../src/eval/__tests__/benchmark-history.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,QAAQ,CAAC;AACrE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,YAAY,EAAiB,MAAM,SAAS,CAAC;AACzE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AACjC,OAAO,EACL,iBAAiB,EACjB,WAAW,EACX,gBAAgB,EAChB,kBAAkB,GACnB,MAAM,yBAAyB,CAAC;AAGjC,IAAI,OAAe,CAAC;AAEpB,MAAM,QAAQ,GAAG,CAAC,YAAsC,EAAE,EAAmB,EAAE,CAAC,CAAC;IAC/E,SAAS,EAAE,0BAA0B;IACrC,KAAK,EAAE,YAAY;IACnB,UAAU,EAAE,YAAY;IACxB,KAAK,EAAE;QACL;YACE,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,WAAW;YACtB,MAAM,EAAE,MAAM;YACd,aAAa,EAAE,IAAI;YACnB,SAAS,EAAE,GAAG;YACd,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;gBAC1D,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;aAC3D;SACF;KACF;IACD,GAAG,SAAS;CACb,CAAC,CAAC;AAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,UAAU,CAAC,GAAG,EAAE;QACd,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,kBAAkB,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzD,SAAS,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC,CAAC,CAAC;IAEH,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;YAClE,MAAM,MAAM,GAAG,QAAQ,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAE1D,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YACvD,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC,EAAE,OAAO,CAAC,CAAC;YACnF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;YAC9D,MAAM,iBAAiB,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC7C,MAAM,EAAE,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,EAAE,gBAAgB,CAAC,EAAE,OAAO,CAAC,CAAC;YAC3E,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACpD,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YACjE,MAAM,MAAM,GAAG,QAAQ,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAC1D,MAAM,CAAC,QAAQ,CAAC,CAAC,UAAU,EAAE,CAAC;QAChC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;QAC3B,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;YAC7D,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAC3B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;YAC5D,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,SAAS,EAAE,0BAA0B,EAAE,CAAC,CAAC;YAC/D,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,SAAS,EAAE,0BAA0B,EAAE,CAAC,CAAC;YAC/D,MAAM,iBAAiB,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YACrC,MAAM,iBAAiB,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;YAErC,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;YAC3D,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,0BAA0B,CAAC,CAAC;QAC7D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,MAAM,GAAG,QAAQ,CAAC;gBACtB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,GAAG;wBACd,UAAU,EAAE;4BACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;4BAC1D,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE;yBAC9D;qBACF;iBACF;aACF,CAAC,CAAC;YACH,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YACzC,MAAM,IAAI,GAAG,MAAM,WAAW,CAAC,OAAO,CAAC,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;YAC3D,MAAM,iBAAiB,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC;YAC7C,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,OAAO,EAAE,0BAA0B,CAAC,CAAC;YAC1E,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;YAC7B,MAAM,CAAC,KAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;YAClD,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,OAAO,EAAE,0BAA0B,CAAC,CAAC;YAC1E,MAAM,CAAC,KAAK,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC3B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAClC,EAAE,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,GAAG;wBACd,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;qBACvE;iBACF;aACF,CAAC,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,CAAC;wBACZ,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;qBAC1E;iBACF;aACF,CAAC,CAAC;YAEH,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YACnD,MAAM,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YACjD,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,CAAC;wBACZ,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC;qBAC1E;iBACF;aACF,CAAC,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,CAAC;wBACZ,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;qBACvE;iBACF;aACF,CAAC,CAAC;YAEH,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YACnD,MAAM,CAAC,WAAW,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACpD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,MAAM,GAAG,QAAQ,EAAE,CAAC;YAC1B,MAAM,CAAC,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,CAAC;wBACZ,UAAU,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;qBACvE;iBACF;aACF,CAAC,CAAC;YACH,MAAM,IAAI,GAAG,QAAQ,CAAC;gBACpB,KAAK,EAAE;oBACL;wBACE,OAAO,EAAE,CAAC;wBACV,SAAS,EAAE,MAAM;wBACjB,MAAM,EAAE,MAAM;wBACd,aAAa,EAAE,IAAI;wBACnB,SAAS,EAAE,CAAC;wBACZ,UAAU,EAAE;4BACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;4BACxD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE;yBAC1D;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YACnD,MAAM,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QAClC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};