vskill 0.5.11 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/commands/eval/credentials.d.ts +12 -0
  2. package/dist/commands/eval/credentials.js +140 -0
  3. package/dist/commands/eval/credentials.js.map +1 -0
  4. package/dist/commands/eval/generate-all.d.ts +1 -1
  5. package/dist/commands/eval/generate-all.js +57 -12
  6. package/dist/commands/eval/generate-all.js.map +1 -1
  7. package/dist/commands/eval/init.d.ts +2 -1
  8. package/dist/commands/eval/init.js +76 -10
  9. package/dist/commands/eval/init.js.map +1 -1
  10. package/dist/commands/eval/run.d.ts +7 -1
  11. package/dist/commands/eval/run.js +207 -26
  12. package/dist/commands/eval/run.js.map +1 -1
  13. package/dist/commands/eval/sweep.d.ts +7 -0
  14. package/dist/commands/eval/sweep.js +99 -0
  15. package/dist/commands/eval/sweep.js.map +1 -0
  16. package/dist/commands/eval.d.ts +10 -0
  17. package/dist/commands/eval.js +62 -4
  18. package/dist/commands/eval.js.map +1 -1
  19. package/dist/eval/batch-judge.d.ts +27 -0
  20. package/dist/eval/batch-judge.js +242 -0
  21. package/dist/eval/batch-judge.js.map +1 -0
  22. package/dist/eval/chrome-profile.d.ts +16 -0
  23. package/dist/eval/chrome-profile.js +65 -0
  24. package/dist/eval/chrome-profile.js.map +1 -0
  25. package/dist/eval/comparator.d.ts +3 -1
  26. package/dist/eval/comparator.js +19 -3
  27. package/dist/eval/comparator.js.map +1 -1
  28. package/dist/eval/concurrency.d.ts +13 -0
  29. package/dist/eval/concurrency.js +53 -0
  30. package/dist/eval/concurrency.js.map +1 -0
  31. package/dist/eval/credential-resolver.d.ts +31 -0
  32. package/dist/eval/credential-resolver.js +111 -0
  33. package/dist/eval/credential-resolver.js.map +1 -0
  34. package/dist/eval/integration-runner.d.ts +12 -0
  35. package/dist/eval/integration-runner.js +303 -0
  36. package/dist/eval/integration-runner.js.map +1 -0
  37. package/dist/eval/integration-types.d.ts +65 -0
  38. package/dist/eval/integration-types.js +18 -0
  39. package/dist/eval/integration-types.js.map +1 -0
  40. package/dist/eval/judge-cache.d.ts +29 -0
  41. package/dist/eval/judge-cache.js +109 -0
  42. package/dist/eval/judge-cache.js.map +1 -0
  43. package/dist/eval/judge.d.ts +1 -1
  44. package/dist/eval/judge.js +20 -3
  45. package/dist/eval/judge.js.map +1 -1
  46. package/dist/eval/llm.d.ts +2 -1
  47. package/dist/eval/llm.js +54 -2
  48. package/dist/eval/llm.js.map +1 -1
  49. package/dist/eval/prompt-builder.d.ts +10 -0
  50. package/dist/eval/prompt-builder.js +167 -0
  51. package/dist/eval/prompt-builder.js.map +1 -1
  52. package/dist/eval/rate-limiter.d.ts +20 -0
  53. package/dist/eval/rate-limiter.js +62 -0
  54. package/dist/eval/rate-limiter.js.map +1 -0
  55. package/dist/eval/schema.d.ts +16 -0
  56. package/dist/eval/schema.js +58 -6
  57. package/dist/eval/schema.js.map +1 -1
  58. package/dist/eval/verdict.d.ts +9 -0
  59. package/dist/eval/verdict.js +50 -0
  60. package/dist/eval/verdict.js.map +1 -1
  61. package/dist/eval-server/api-routes.js +99 -3
  62. package/dist/eval-server/api-routes.js.map +1 -1
  63. package/dist/eval-server/benchmark-runner.d.ts +7 -0
  64. package/dist/eval-server/benchmark-runner.js +158 -42
  65. package/dist/eval-server/benchmark-runner.js.map +1 -1
  66. package/dist/eval-server/concurrency.d.ts +1 -13
  67. package/dist/eval-server/concurrency.js +3 -49
  68. package/dist/eval-server/concurrency.js.map +1 -1
  69. package/dist/eval-server/eval-server.js +4 -0
  70. package/dist/eval-server/eval-server.js.map +1 -1
  71. package/dist/eval-server/integration-routes.d.ts +2 -0
  72. package/dist/eval-server/integration-routes.js +100 -0
  73. package/dist/eval-server/integration-routes.js.map +1 -0
  74. package/dist/eval-server/skill-create-routes.js +151 -22
  75. package/dist/eval-server/skill-create-routes.js.map +1 -1
  76. package/dist/eval-server/sweep-routes.d.ts +2 -0
  77. package/dist/eval-server/sweep-routes.js +93 -0
  78. package/dist/eval-server/sweep-routes.js.map +1 -0
  79. package/dist/eval-server/sweep-runner.d.ts +93 -0
  80. package/dist/eval-server/sweep-runner.js +275 -0
  81. package/dist/eval-server/sweep-runner.js.map +1 -0
  82. package/dist/eval-ui/assets/index-C9_Pey9T.css +1 -0
  83. package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
  84. package/dist/eval-ui/index.html +2 -2
  85. package/dist/index.js +8 -0
  86. package/dist/index.js.map +1 -1
  87. package/package.json +1 -1
  88. package/dist/eval-ui/assets/index-CxHCKEhf.js +0 -74
  89. package/dist/eval-ui/assets/index-D2UkOol1.css +0 -1
@@ -0,0 +1,12 @@
1
+ /**
2
+ * vskill credentials set <KEY> -- prompt for value and store in .env.local
3
+ */
4
+ export declare function runCredentialsSet(skillDir: string, key: string): Promise<void>;
5
+ /**
6
+ * vskill credentials list -- show all credentials referenced by integration tests
7
+ */
8
+ export declare function runCredentialsList(skillDir: string): Promise<void>;
9
+ /**
10
+ * vskill credentials check -- resolve each credential and report source
11
+ */
12
+ export declare function runCredentialsCheck(skillDir: string): Promise<void>;
@@ -0,0 +1,140 @@
1
+ // ---------------------------------------------------------------------------
2
+ // vskill credentials -- manage credentials for integration tests
3
+ // ---------------------------------------------------------------------------
4
+ import { createInterface } from "node:readline";
5
+ import { resolveCredential, resolveAllCredentials, writeCredential } from "../../eval/credential-resolver.js";
6
+ import { loadAndValidateEvals } from "../../eval/schema.js";
7
+ import { green, red, yellow, dim, bold, table } from "../../utils/output.js";
8
+ /**
9
+ * vskill credentials set <KEY> -- prompt for value and store in .env.local
10
+ */
11
+ export async function runCredentialsSet(skillDir, key) {
12
+ const value = await promptHidden(`Enter value for ${key}: `);
13
+ if (!value) {
14
+ console.error(red("No value provided. Aborted."));
15
+ return;
16
+ }
17
+ writeCredential(skillDir, key, value);
18
+ console.log(green(`${key} saved to .env.local`));
19
+ // Verify it resolves
20
+ const result = resolveCredential(key, skillDir);
21
+ if (result) {
22
+ console.log(dim(`Verified: resolves from ${result.source}`));
23
+ }
24
+ }
25
+ /**
26
+ * vskill credentials list -- show all credentials referenced by integration tests
27
+ */
28
+ export async function runCredentialsList(skillDir) {
29
+ const names = collectRequiredCredentials(skillDir);
30
+ if (names.length === 0) {
31
+ console.log(dim("No integration test credentials found in evals."));
32
+ return;
33
+ }
34
+ const statuses = resolveAllCredentials(names, skillDir);
35
+ const rows = statuses.map((s) => [
36
+ s.name,
37
+ s.status === "ready" ? green("Ready") : red("Missing"),
38
+ s.source ?? "-",
39
+ ]);
40
+ console.log(bold("\nCredential Status\n"));
41
+ console.log(table(["NAME", "STATUS", "SOURCE"], rows));
42
+ }
43
+ /**
44
+ * vskill credentials check -- resolve each credential and report source
45
+ */
46
+ export async function runCredentialsCheck(skillDir) {
47
+ const names = collectRequiredCredentials(skillDir);
48
+ if (names.length === 0) {
49
+ console.log(dim("No integration test credentials found in evals."));
50
+ return;
51
+ }
52
+ const statuses = resolveAllCredentials(names, skillDir);
53
+ const rows = statuses.map((s) => [
54
+ s.name,
55
+ s.status === "ready" ? green("Ready") : red("Missing"),
56
+ s.source === "env"
57
+ ? "Environment variable"
58
+ : s.source === "dotenv"
59
+ ? ".env.local"
60
+ : "-",
61
+ ]);
62
+ console.log(bold("\nCredential Resolution Check\n"));
63
+ console.log(table(["NAME", "STATUS", "SOURCE"], rows));
64
+ const missing = statuses.filter((s) => s.status === "missing");
65
+ if (missing.length > 0) {
66
+ console.log(yellow(`\n${missing.length} credential(s) missing. Set them with:`));
67
+ for (const m of missing) {
68
+ console.log(dim(` vskill credentials set ${m.name}`));
69
+ }
70
+ }
71
+ }
72
+ // ---------------------------------------------------------------------------
73
+ // Helpers
74
+ // ---------------------------------------------------------------------------
75
+ function collectRequiredCredentials(skillDir) {
76
+ try {
77
+ const evalsFile = loadAndValidateEvals(skillDir);
78
+ const allCreds = new Set();
79
+ for (const evalCase of evalsFile.evals) {
80
+ if (evalCase.testType === "integration" && evalCase.requiredCredentials) {
81
+ for (const cred of evalCase.requiredCredentials) {
82
+ allCreds.add(cred);
83
+ }
84
+ }
85
+ }
86
+ return [...allCreds].sort();
87
+ }
88
+ catch {
89
+ return [];
90
+ }
91
+ }
92
+ function promptHidden(question) {
93
+ return new Promise((resolve) => {
94
+ const rl = createInterface({
95
+ input: process.stdin,
96
+ output: process.stdout,
97
+ });
98
+ // Attempt to hide input (works in TTY mode)
99
+ if (process.stdin.isTTY) {
100
+ process.stdout.write(question);
101
+ const stdin = process.stdin;
102
+ const wasRaw = stdin.isRaw;
103
+ stdin.setRawMode(true);
104
+ let input = "";
105
+ const onData = (char) => {
106
+ const c = char.toString();
107
+ if (c === "\n" || c === "\r") {
108
+ stdin.setRawMode(wasRaw ?? false);
109
+ stdin.removeListener("data", onData);
110
+ process.stdout.write("\n");
111
+ rl.close();
112
+ resolve(input);
113
+ }
114
+ else if (c === "\u0003") {
115
+ // Ctrl+C
116
+ stdin.setRawMode(wasRaw ?? false);
117
+ stdin.removeListener("data", onData);
118
+ rl.close();
119
+ resolve("");
120
+ }
121
+ else if (c === "\u007F" || c === "\b") {
122
+ // Backspace
123
+ input = input.slice(0, -1);
124
+ }
125
+ else {
126
+ input += c;
127
+ }
128
+ };
129
+ stdin.on("data", onData);
130
+ }
131
+ else {
132
+ // Non-TTY: just use readline normally
133
+ rl.question(question, (answer) => {
134
+ rl.close();
135
+ resolve(answer);
136
+ });
137
+ }
138
+ });
139
+ }
140
+ //# sourceMappingURL=credentials.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"credentials.js","sourceRoot":"","sources":["../../../src/commands/eval/credentials.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAE9E,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAGhD,OAAO,EAAE,iBAAiB,EAAE,qBAAqB,EAAE,eAAe,EAAE,MAAM,mCAAmC,CAAC;AAC9G,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAC;AAC5D,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,uBAAuB,CAAC;AAE7E;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,QAAgB,EAAE,GAAW;IACnE,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,mBAAmB,GAAG,IAAI,CAAC,CAAC;IAC7D,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,6BAA6B,CAAC,CAAC,CAAC;QAClD,OAAO;IACT,CAAC;IAED,eAAe,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,sBAAsB,CAAC,CAAC,CAAC;IAEjD,qBAAqB;IACrB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAChD,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,2BAA2B,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/D,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,KAAK,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC,CAAC;QACpE,OAAO;IACT,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IACxD,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC/B,CAAC,CAAC,IAAI;QACN,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC;QACtD,CAAC,CAAC,MAAM,IAAI,GAAG;KAChB,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AACzD,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAgB;IACxD,MAAM,KAAK,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC,CAAC;QACpE,OAAO;IACT,CAAC;IAED,MAAM,QAAQ,GAAG,qBAAqB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;IACxD,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QAC/B,CAAC,CAAC,IAAI;QACN,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC;QACtD,CAAC,CAAC,MAAM,KAAK,KAAK;YAChB,CAAC,CAAC,sBAAsB;YACxB,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ;gBACrB,CAAC,CAAC,YAAY;gBACd,CAAC,CAAC,GAAG;KACV,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;IAEvD,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;IAC/D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,OAAO,CAAC,MAAM,wCAAwC,CAAC,CAAC,CAAC;QACjF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,0BAA0B,CAAC,QAAgB;IAClD,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;QACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,CAAC,KAAK,EAAE,CAAC;YACvC,IAAI,QAAQ,CAAC,QAAQ,KAAK,aAAa,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;gBACxE,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,mBAAmB,EAAE,CAAC;oBAChD,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;gBACrB,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,CAAC;IACZ,CAAC;AACH,CAAC;AAED,SAAS,YAAY,CAAC,QAAgB;IACpC,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,EAAE,GAAG,eAAe,CAAC;YACzB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QAEH,4CAA4C;QAC5C,IAAI,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACxB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;YAC/B,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;YAC5B,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC;YAC3B,KAAK,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;YACvB,IAAI,KAAK,GAAG,EAAE,CAAC;YACf,MAAM,MAAM,GAAG,CAAC,IAAY,EAAE,EAAE;gBAC9B,MAAM,CAAC,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAC1B,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;oBAC7B,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC;oBAClC,KAAK,CAAC,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;oBACrC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAC3B,EAAE,CAAC,KAAK,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CAAC,CAAC;gBACjB,CAAC;qBAAM,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;oBAC1B,SAAS;oBACT,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,KAAK,CAAC,CAAC;oBAClC,KAAK,CAAC,cAAc,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;oBACrC,EAAE,CAAC,KAAK,EAAE,CAAC;oBACX,OAAO,CAAC,EAAE,CAAC,CAAC;gBACd,CAAC;qBAAM,IAAI,CAAC,KAAK,QAAQ,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;oBACxC,YAAY;oBACZ,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;gBAC7B,CAAC;qBAAM,CAAC;oBACN,KAAK,IAAI,CAAC,CAAC;gBACb,CAAC;YACH,CAAC,CAAC;YACF,KAAK,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,sCAAsC;YACtC,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,EAAE;gBAC/B,EAAE,CAAC,KAAK,EAAE,CAAC;gBACX,OAAO,CAAC,MAAM,CAAC,CAAC;YAClB,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -1 +1 @@
1
- export declare function runEvalGenerateAll(root: string, force: boolean): Promise<void>;
1
+ export declare function runEvalGenerateAll(root: string, force: boolean, concurrency?: number): Promise<void>;
@@ -6,48 +6,93 @@ import { join } from "node:path";
6
6
  import { scanSkills } from "../../eval/skill-scanner.js";
7
7
  import { createLlmClient } from "../../eval/llm.js";
8
8
  import { buildEvalInitPrompt, parseGeneratedEvals } from "../../eval/prompt-builder.js";
9
+ import { Semaphore } from "../../eval/concurrency.js";
9
10
  import { green, red, yellow, bold, dim } from "../../utils/output.js";
10
- export async function runEvalGenerateAll(root, force) {
11
+ const CLI_PROVIDERS = new Set(["claude-cli", "codex-cli", "gemini-cli"]);
12
+ function resolveProvider() {
13
+ const explicit = process.env.VSKILL_EVAL_PROVIDER;
14
+ if (explicit)
15
+ return { provider: explicit, autoSelected: false };
16
+ // Auto-select anthropic for batch ops when API key is available
17
+ if (process.env.ANTHROPIC_API_KEY) {
18
+ return { provider: "anthropic", autoSelected: true };
19
+ }
20
+ return { provider: "claude-cli", autoSelected: false };
21
+ }
22
+ function resolveConcurrency(explicitConcurrency, provider) {
23
+ if (explicitConcurrency !== undefined)
24
+ return Math.max(1, explicitConcurrency);
25
+ return CLI_PROVIDERS.has(provider) ? 1 : 3;
26
+ }
27
+ export async function runEvalGenerateAll(root, force, concurrency) {
11
28
  const skills = await scanSkills(root);
12
29
  if (skills.length === 0) {
13
30
  console.log(dim("No skills found in " + root));
14
31
  return;
15
32
  }
16
- const client = createLlmClient();
33
+ const { provider, autoSelected } = resolveProvider();
34
+ const effectiveConcurrency = resolveConcurrency(concurrency, provider);
35
+ if (autoSelected) {
36
+ console.log(dim("Auto-selected anthropic provider for batch operation"));
37
+ }
38
+ console.log(dim(`Provider: ${provider} | Concurrency: ${effectiveConcurrency}`));
39
+ const client = createLlmClient({ provider });
40
+ const sem = new Semaphore(effectiveConcurrency);
17
41
  let generated = 0;
18
42
  let skipped = 0;
19
43
  let failed = 0;
20
44
  const failedPaths = [];
21
- for (const skill of skills) {
22
- const evalsPath = join(skill.dir, "evals", "evals.json");
23
- // Skip if evals already exist and not forcing
45
+ // Filter skills that need generation
46
+ const toGenerate = skills.filter((skill) => {
24
47
  if (skill.hasEvals && !force) {
25
48
  skipped++;
26
- continue;
49
+ return false;
27
50
  }
28
51
  const skillMdPath = join(skill.dir, "SKILL.md");
29
52
  if (!existsSync(skillMdPath)) {
30
53
  failed++;
31
54
  failedPaths.push(`${skill.plugin}/${skill.skill} (no SKILL.md)`);
32
- continue;
55
+ return false;
33
56
  }
57
+ return true;
58
+ });
59
+ // Process all skills concurrently with semaphore gating
60
+ const results = await Promise.allSettled(toGenerate.map(async (skill) => {
61
+ await sem.acquire();
34
62
  try {
63
+ const skillMdPath = join(skill.dir, "SKILL.md");
64
+ const evalsPath = join(skill.dir, "evals", "evals.json");
35
65
  const skillContent = readFileSync(skillMdPath, "utf-8");
36
66
  const prompt = buildEvalInitPrompt(skillContent);
37
67
  const genResult = await client.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", prompt);
38
68
  const evalsFile = parseGeneratedEvals(genResult.text);
39
69
  mkdirSync(join(skill.dir, "evals"), { recursive: true });
40
70
  writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
41
- generated++;
42
71
  console.log(green(` Generated: ${skill.plugin}/${skill.skill}`));
43
- // Rate limit: 2s delay between LLM calls to avoid API throttling
44
- await new Promise((r) => setTimeout(r, 2000));
72
+ return { skill, success: true };
45
73
  }
46
74
  catch (err) {
47
- failed++;
48
- failedPaths.push(`${skill.plugin}/${skill.skill}`);
49
75
  console.error(red(` Failed: ${skill.plugin}/${skill.skill} - `) +
50
76
  dim(err.message));
77
+ return { skill, success: false, error: err };
78
+ }
79
+ finally {
80
+ sem.release();
81
+ }
82
+ }));
83
+ // Tally results
84
+ for (const r of results) {
85
+ if (r.status === "fulfilled") {
86
+ if (r.value.success) {
87
+ generated++;
88
+ }
89
+ else {
90
+ failed++;
91
+ failedPaths.push(`${r.value.skill.plugin}/${r.value.skill.skill}`);
92
+ }
93
+ }
94
+ else {
95
+ failed++;
51
96
  }
52
97
  }
53
98
  // Print summary
@@ -1 +1 @@
1
- {"version":3,"file":"generate-all.js","sourceRoot":"","sources":["../../../src/commands/eval/generate-all.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,uEAAuE;AACvE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,6BAA6B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACxF,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,uBAAuB,CAAC;AAEtE,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,KAAc;IAEd,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QAEzD,8CAA8C;QAC9C,IAAI,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAC;YACV,SAAS;QACX,CAAC;QAED,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,MAAM,EAAE,CAAC;YACT,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,gBAAgB,CAAC,CAAC;YACjE,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACxD,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;YAEjD,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CACrC,qFAAqF,EACrF,MAAM,CACP,CAAC;YAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAEtD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzD,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,SAAS,EAAE,CAAC;YACZ,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAElE,iEAAiE;YACjE,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,EAAE,CAAC;YACT,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACnD,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,KAAK,CAAC;gBAChD,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAC9B,CAAC;QACJ,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,cAAc,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,YAAY,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,EAAE,CAAC,CAAC;IAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"generate-all.js","sourceRoot":"","sources":["../../../src/commands/eval/generate-all.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,uEAAuE;AACvE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,6BAA6B,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACxF,OAAO,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,uBAAuB,CAAC;AAEtE,MAAM,aAAa,GAAG,IAAI,GAAG,CAAe,CAAC,YAAY,EAAE,WAAW,EAAE,YAAY,CAAC,CAAC,CAAC;AAEvF,SAAS,eAAe;IACtB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,oBAAgD,CAAC;IAC9E,IAAI,QAAQ;QAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC;IAEjE,gEAAgE;IAChE,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE,CAAC;QAClC,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;IACvD,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC;AACzD,CAAC;AAED,SAAS,kBAAkB,CACzB,mBAAuC,EACvC,QAAsB;IAEtB,IAAI,mBAAmB,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,mBAAmB,CAAC,CAAC;IAC/E,OAAO,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,KAAc,EACd,WAAoB;IAEpB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC;IAEtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,qBAAqB,GAAG,IAAI,CAAC,CAAC,CAAC;QAC/C,OAAO;IACT,CAAC;IAED,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,eAAe,EAAE,CAAC;IACrD,MAAM,oBAAoB,GAAG,kBAAkB,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;IAEvE,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,sDAAsD,CAAC,CAAC,CAAC;IAC3E,CAAC;IACD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,aAAa,QAAQ,mBAAmB,oBAAoB,EAAE,CAAC,CAAC,CAAC;IAEjF,MAAM,MAAM,GAAG,eAAe,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC7C,MAAM,GAAG,GAAG,IAAI,SAAS,CAAC,oBAAoB,CAAC,CAAC;IAChD,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,WAAW,GAAa,EAAE,CAAC;IAEjC,qCAAqC;IACrC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QACzC,IAAI,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO,EAAE,CAAC;YACV,OAAO,KAAK,CAAC;QACf,CAAC;QACD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;QAChD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC7B,MAAM,EAAE,CAAC;YACT,WAAW,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,gBAAgB,CAAC,CAAC;YACjE,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,wDAAwD;IACxD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,CACtC,UAAU,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC7B,MAAM,GAAG,CAAC,OAAO,EAAE,CAAC;QACpB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;YAChD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;YACzD,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACxD,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;YAEjD,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CACrC,qFAAqF,EACrF,MAAM,CACP,CAAC;YAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAEtD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzD,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAClE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAa,EAAE,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,KAAK,KAAK,CAAC;gBAChD,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAC9B,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,KAAc,EAAE,KAAK,EAAE,GAAY,EAAE,CAAC;QACjE,CAAC;gBAAS,CAAC;YACT,GAAG,CAAC,OAAO,EAAE,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CACH,CAAC;IAEF,gBAAgB;IAChB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YAC7B,IAAI,CAAC,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;gBACpB,SAAS,EAAE,CAAC;YACd,CAAC;iBAAM,CAAC;gBACN,MAAM,EAAE,CAAC;gBACT,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,CAAC;QACX,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,cAAc,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,KAAK,CAAC,cAAc,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,CAAC,YAAY,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,KAAK,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,MAAM,EAAE,EAAE,CAAC,CAAC;IAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;AACH,CAAC"}
@@ -1 +1,2 @@
1
- export declare function runEvalInit(skillDir: string, force: boolean): Promise<void>;
1
+ export type EvalInitType = "unit" | "integration" | "all";
2
+ export declare function runEvalInit(skillDir: string, force: boolean, type?: EvalInitType): Promise<void>;
@@ -4,9 +4,10 @@
4
4
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
5
5
  import { join } from "node:path";
6
6
  import { createLlmClient } from "../../eval/llm.js";
7
- import { buildEvalInitPrompt, parseGeneratedEvals } from "../../eval/prompt-builder.js";
7
+ import { buildEvalInitPrompt, buildIntegrationEvalPrompt, parseGeneratedEvals, parseGeneratedIntegrationEvals, detectBrowserRequirements, detectPlatformTargets, } from "../../eval/prompt-builder.js";
8
+ import { detectMcpDependencies } from "../../eval/mcp-detector.js";
8
9
  import { green, red, dim, yellow } from "../../utils/output.js";
9
- export async function runEvalInit(skillDir, force) {
10
+ export async function runEvalInit(skillDir, force, type = "unit") {
10
11
  const skillMdPath = join(skillDir, "SKILL.md");
11
12
  const evalsDir = join(skillDir, "evals");
12
13
  const evalsPath = join(evalsDir, "evals.json");
@@ -21,15 +22,80 @@ export async function runEvalInit(skillDir, force) {
21
22
  return;
22
23
  }
23
24
  const skillContent = readFileSync(skillMdPath, "utf-8");
24
- const prompt = buildEvalInitPrompt(skillContent);
25
+ // Detect integration capabilities
26
+ const mcpDeps = detectMcpDependencies(skillContent);
27
+ const browserReqs = detectBrowserRequirements(skillContent);
28
+ const platforms = detectPlatformTargets(skillContent);
29
+ const hasIntegrationTargets = mcpDeps.length > 0 || browserReqs.hasBrowser;
30
+ // AC-US3-05: No integration targets + --type integration → skip
31
+ if (type === "integration" && !hasIntegrationTargets) {
32
+ console.log(dim("No integration targets detected, generating unit tests only"));
33
+ return;
34
+ }
25
35
  try {
26
- const client = createLlmClient();
27
- const genResult = await client.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", prompt);
28
- const evalsFile = parseGeneratedEvals(genResult.text);
29
- mkdirSync(evalsDir, { recursive: true });
30
- writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
31
- console.log(green(`Created ${evalsPath}`));
32
- console.log(dim(` ${evalsFile.evals.length} eval cases, ${evalsFile.evals.reduce((sum, e) => sum + e.assertions.length, 0)} assertions`));
36
+ if (type === "all" && hasIntegrationTargets) {
37
+ // AC-US3-04: Parallel dispatch unit (Haiku) + integration (Sonnet)
38
+ const unitPrompt = buildEvalInitPrompt(skillContent);
39
+ const integrationPrompt = buildIntegrationEvalPrompt(skillContent, mcpDeps, browserReqs, platforms);
40
+ const unitClient = createLlmClient({ model: "haiku" });
41
+ const integrationClient = createLlmClient({ model: "sonnet" });
42
+ const [unitResult, integrationResult] = await Promise.allSettled([
43
+ unitClient.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", unitPrompt),
44
+ integrationClient.generate("You generate integration eval test cases for AI skills. Output only valid JSON in a code fence.", integrationPrompt),
45
+ ]);
46
+ const unitCases = unitResult.status === "fulfilled"
47
+ ? parseGeneratedEvals(unitResult.value.text).evals
48
+ : [];
49
+ const integrationCases = integrationResult.status === "fulfilled"
50
+ ? parseGeneratedIntegrationEvals(integrationResult.value.text)
51
+ : [];
52
+ if (unitResult.status === "rejected") {
53
+ console.log(yellow(`Unit eval generation failed: ${unitResult.reason}`));
54
+ }
55
+ if (integrationResult.status === "rejected") {
56
+ console.log(yellow(`Integration eval generation failed: ${integrationResult.reason}`));
57
+ }
58
+ const allCases = [...unitCases, ...integrationCases];
59
+ if (allCases.length === 0) {
60
+ console.error(red("Both unit and integration eval generation failed"));
61
+ return;
62
+ }
63
+ // Extract skill name from unit result or derive from directory
64
+ const skillName = unitResult.status === "fulfilled"
65
+ ? parseGeneratedEvals(unitResult.value.text).skill_name
66
+ : skillDir.split("/").pop() || "unknown";
67
+ const evalsFile = { skill_name: skillName, evals: allCases };
68
+ mkdirSync(evalsDir, { recursive: true });
69
+ writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
70
+ const unitCount = unitCases.length;
71
+ const intgCount = integrationCases.length;
72
+ console.log(green(`Created ${evalsPath}`));
73
+ console.log(dim(` ${unitCount} unit + ${intgCount} integration cases, ${allCases.reduce((sum, e) => sum + (e.assertions?.length || 0), 0)} assertions`));
74
+ }
75
+ else if (type === "integration") {
76
+ // Integration only
77
+ const integrationPrompt = buildIntegrationEvalPrompt(skillContent, mcpDeps, browserReqs, platforms);
78
+ const client = createLlmClient({ model: "sonnet" });
79
+ const genResult = await client.generate("You generate integration eval test cases for AI skills. Output only valid JSON in a code fence.", integrationPrompt);
80
+ const integrationCases = parseGeneratedIntegrationEvals(genResult.text);
81
+ const skillName = skillDir.split("/").pop() || "unknown";
82
+ const evalsFile = { skill_name: skillName, evals: integrationCases };
83
+ mkdirSync(evalsDir, { recursive: true });
84
+ writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
85
+ console.log(green(`Created ${evalsPath}`));
86
+ console.log(dim(` ${integrationCases.length} integration cases, ${integrationCases.reduce((sum, e) => sum + (e.assertions?.length || 0), 0)} assertions`));
87
+ }
88
+ else {
89
+ // Unit only (default, existing behavior)
90
+ const prompt = buildEvalInitPrompt(skillContent);
91
+ const client = createLlmClient();
92
+ const genResult = await client.generate("You generate eval test cases for AI skills. Output only valid JSON in a code fence.", prompt);
93
+ const evalsFile = parseGeneratedEvals(genResult.text);
94
+ mkdirSync(evalsDir, { recursive: true });
95
+ writeFileSync(evalsPath, JSON.stringify(evalsFile, null, 2), "utf-8");
96
+ console.log(green(`Created ${evalsPath}`));
97
+ console.log(dim(` ${evalsFile.evals.length} eval cases, ${evalsFile.evals.reduce((sum, e) => sum + e.assertions.length, 0)} assertions`));
98
+ }
33
99
  }
34
100
  catch (err) {
35
101
  console.error(red("Failed to generate evals: ") + dim(err.message));
@@ -1 +1 @@
1
- {"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/eval/init.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,8BAA8B,CAAC;AACxF,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAEhE,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAgB,EAChB,KAAc;IAEd,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE/C,wBAAwB;IACxB,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yBAAyB,WAAW,EAAE,CAAC,CAAC,CAAC;QAC3D,OAAO;IACT,CAAC;IAED,4BAA4B;IAC5B,IAAI,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CACT,MAAM,CAAC,qDAAqD,CAAC,CAC9D,CAAC;QACF,OAAO;IACT,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IACxD,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CACrC,qFAAqF,EACrF,MAAM,CACP,CAAC;QAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAEtD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAEtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC,CAAC;QAC3C,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,SAAS,CAAC,KAAK,CAAC,MAAM,gBAAgB,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,aAAa,CAAC,CAC9H,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,4BAA4B,CAAC,GAAG,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAChE,CAAC;IACJ,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"init.js","sourceRoot":"","sources":["../../../src/commands/eval/init.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAE9E,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EACL,mBAAmB,EACnB,0BAA0B,EAC1B,mBAAmB,EACnB,8BAA8B,EAC9B,yBAAyB,EACzB,qBAAqB,GACtB,MAAM,8BAA8B,CAAC;AACtC,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAIhE,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAgB,EAChB,KAAc,EACd,OAAqB,MAAM;IAE3B,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAE/C,wBAAwB;IACxB,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,yBAAyB,WAAW,EAAE,CAAC,CAAC,CAAC;QAC3D,OAAO;IACT,CAAC;IAED,4BAA4B;IAC5B,IAAI,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;QACpC,OAAO,CAAC,GAAG,CACT,MAAM,CAAC,qDAAqD,CAAC,CAC9D,CAAC;QACF,OAAO;IACT,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;IAExD,kCAAkC;IAClC,MAAM,OAAO,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IACpD,MAAM,WAAW,GAAG,yBAAyB,CAAC,YAAY,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,qBAAqB,CAAC,YAAY,CAAC,CAAC;IACtD,MAAM,qBAAqB,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC;IAE3E,gEAAgE;IAChE,IAAI,IAAI,KAAK,aAAa,IAAI,CAAC,qBAAqB,EAAE,CAAC;QACrD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,6DAA6D,CAAC,CAAC,CAAC;QAChF,OAAO;IACT,CAAC;IAED,IAAI,CAAC;QACH,IAAI,IAAI,KAAK,KAAK,IAAI,qBAAqB,EAAE,CAAC;YAC5C,qEAAqE;YACrE,MAAM,UAAU,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;YACrD,MAAM,iBAAiB,GAAG,0BAA0B,CAAC,YAAY,EAAE,OAAO,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;YAEpG,MAAM,UAAU,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;YACvD,MAAM,iBAAiB,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;YAE/D,MAAM,CAAC,UAAU,EAAE,iBAAiB,CAAC,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC;gBAC/D,UAAU,CAAC,QAAQ,CACjB,qFAAqF,EACrF,UAAU,CACX;gBACD,iBAAiB,CAAC,QAAQ,CACxB,iGAAiG,EACjG,iBAAiB,CAClB;aACF,CAAC,CAAC;YAEH,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,KAAK,WAAW;gBACjD,CAAC,CAAC,mBAAmB,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK;gBAClD,CAAC,CAAC,EAAE,CAAC;YACP,MAAM,gBAAgB,GAAG,iBAAiB,CAAC,MAAM,KAAK,WAAW;gBAC/D,CAAC,CAAC,8BAA8B,CAAC,iBAAiB,CAAC,KAAK,CAAC,IAAI,CAAC;gBAC9D,CAAC,CAAC,EAAE,CAAC;YAEP,IAAI,UAAU,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,gCAAgC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAC3E,CAAC;YACD,IAAI,iBAAiB,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBAC5C,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,uCAAuC,iBAAiB,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YACzF,CAAC;YAED,MAAM,QAAQ,GAAG,CAAC,GAAG,SAAS,EAAE,GAAG,gBAAgB,CAAC,CAAC;YACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC,CAAC;gBACvE,OAAO;YACT,CAAC;YAED,+DAA+D;YAC/D,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,KAAK,WAAW;gBACjD,CAAC,CAAC,mBAAmB,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,UAAU;gBACvD,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,SAAS,CAAC;YAE3C,MAAM,SAAS,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;YAC7D,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,MAAM,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC;YACnC,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC;YAC1C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,SAAS,WAAW,SAAS,uBAAuB,QAAQ,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,aAAa,CAAC,CAC7I,CAAC;QACJ,CAAC;aAAM,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;YAClC,mBAAmB;YACnB,MAAM,iBAAiB,GAAG,0BAA0B,CAAC,YAAY,EAAE,OAAO,EAAE,WAAW,EAAE,SAAS,CAAC,CAAC;YACpG,MAAM,MAAM,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;YACpD,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CACrC,iGAAiG,EACjG,iBAAiB,CAClB,CAAC;YAEF,MAAM,gBAAgB,GAAG,8BAA8B,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACxE,MAAM,SAAS,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,SAAS,CAAC;YACzD,MAAM,SAAS,GAAG,EAAE,UAAU,EAAE,SAAS,EAAE,KAAK,EAAE,gBAAgB,EAAE,CAAC;YAErE,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,gBAAgB,CAAC,MAAM,uBAAuB,gBAAgB,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,MAAM,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,aAAa,CAAC,CAC/I,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,yCAAyC;YACzC,MAAM,MAAM,GAAG,mBAAmB,CAAC,YAAY,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,MAAM,MAAM,CAAC,QAAQ,CACrC,qFAAqF,EACrF,MAAM,CACP,CAAC;YAEF,MAAM,SAAS,GAAG,mBAAmB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YAEtD,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACzC,aAAa,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YAEtE,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,WAAW,SAAS,EAAE,CAAC,CAAC,CAAC;YAC3C,OAAO,CAAC,GAAG,CACT,GAAG,CAAC,KAAK,SAAS,CAAC,KAAK,CAAC,MAAM,gBAAgB,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC,aAAa,CAAC,CAC9H,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CACX,GAAG,CAAC,4BAA4B,CAAC,GAAG,GAAG,CAAE,GAAa,CAAC,OAAO,CAAC,CAChE,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -1 +1,7 @@
1
- export declare function runEvalRun(skillDir: string): Promise<void>;
1
+ export interface EvalRunOptions {
2
+ concurrency?: number;
3
+ judgeModel?: string;
4
+ noCache?: boolean;
5
+ batch?: boolean;
6
+ }
7
+ export declare function runEvalRun(skillDir: string, options?: EvalRunOptions): Promise<void>;