vskill 0.5.11 → 0.5.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/commands/eval/credentials.d.ts +12 -0
  2. package/dist/commands/eval/credentials.js +140 -0
  3. package/dist/commands/eval/credentials.js.map +1 -0
  4. package/dist/commands/eval/generate-all.d.ts +1 -1
  5. package/dist/commands/eval/generate-all.js +57 -12
  6. package/dist/commands/eval/generate-all.js.map +1 -1
  7. package/dist/commands/eval/init.d.ts +2 -1
  8. package/dist/commands/eval/init.js +76 -10
  9. package/dist/commands/eval/init.js.map +1 -1
  10. package/dist/commands/eval/run.d.ts +7 -1
  11. package/dist/commands/eval/run.js +207 -26
  12. package/dist/commands/eval/run.js.map +1 -1
  13. package/dist/commands/eval/sweep.d.ts +7 -0
  14. package/dist/commands/eval/sweep.js +99 -0
  15. package/dist/commands/eval/sweep.js.map +1 -0
  16. package/dist/commands/eval.d.ts +10 -0
  17. package/dist/commands/eval.js +62 -4
  18. package/dist/commands/eval.js.map +1 -1
  19. package/dist/eval/batch-judge.d.ts +27 -0
  20. package/dist/eval/batch-judge.js +242 -0
  21. package/dist/eval/batch-judge.js.map +1 -0
  22. package/dist/eval/chrome-profile.d.ts +16 -0
  23. package/dist/eval/chrome-profile.js +65 -0
  24. package/dist/eval/chrome-profile.js.map +1 -0
  25. package/dist/eval/comparator.d.ts +3 -1
  26. package/dist/eval/comparator.js +19 -3
  27. package/dist/eval/comparator.js.map +1 -1
  28. package/dist/eval/concurrency.d.ts +13 -0
  29. package/dist/eval/concurrency.js +53 -0
  30. package/dist/eval/concurrency.js.map +1 -0
  31. package/dist/eval/credential-resolver.d.ts +31 -0
  32. package/dist/eval/credential-resolver.js +111 -0
  33. package/dist/eval/credential-resolver.js.map +1 -0
  34. package/dist/eval/integration-runner.d.ts +12 -0
  35. package/dist/eval/integration-runner.js +303 -0
  36. package/dist/eval/integration-runner.js.map +1 -0
  37. package/dist/eval/integration-types.d.ts +65 -0
  38. package/dist/eval/integration-types.js +18 -0
  39. package/dist/eval/integration-types.js.map +1 -0
  40. package/dist/eval/judge-cache.d.ts +29 -0
  41. package/dist/eval/judge-cache.js +109 -0
  42. package/dist/eval/judge-cache.js.map +1 -0
  43. package/dist/eval/judge.d.ts +1 -1
  44. package/dist/eval/judge.js +20 -3
  45. package/dist/eval/judge.js.map +1 -1
  46. package/dist/eval/llm.d.ts +2 -1
  47. package/dist/eval/llm.js +54 -2
  48. package/dist/eval/llm.js.map +1 -1
  49. package/dist/eval/prompt-builder.d.ts +10 -0
  50. package/dist/eval/prompt-builder.js +167 -0
  51. package/dist/eval/prompt-builder.js.map +1 -1
  52. package/dist/eval/rate-limiter.d.ts +20 -0
  53. package/dist/eval/rate-limiter.js +62 -0
  54. package/dist/eval/rate-limiter.js.map +1 -0
  55. package/dist/eval/schema.d.ts +16 -0
  56. package/dist/eval/schema.js +58 -6
  57. package/dist/eval/schema.js.map +1 -1
  58. package/dist/eval/verdict.d.ts +9 -0
  59. package/dist/eval/verdict.js +50 -0
  60. package/dist/eval/verdict.js.map +1 -1
  61. package/dist/eval-server/api-routes.js +99 -3
  62. package/dist/eval-server/api-routes.js.map +1 -1
  63. package/dist/eval-server/benchmark-runner.d.ts +7 -0
  64. package/dist/eval-server/benchmark-runner.js +158 -42
  65. package/dist/eval-server/benchmark-runner.js.map +1 -1
  66. package/dist/eval-server/concurrency.d.ts +1 -13
  67. package/dist/eval-server/concurrency.js +3 -49
  68. package/dist/eval-server/concurrency.js.map +1 -1
  69. package/dist/eval-server/eval-server.js +4 -0
  70. package/dist/eval-server/eval-server.js.map +1 -1
  71. package/dist/eval-server/integration-routes.d.ts +2 -0
  72. package/dist/eval-server/integration-routes.js +100 -0
  73. package/dist/eval-server/integration-routes.js.map +1 -0
  74. package/dist/eval-server/skill-create-routes.js +151 -22
  75. package/dist/eval-server/skill-create-routes.js.map +1 -1
  76. package/dist/eval-server/sweep-routes.d.ts +2 -0
  77. package/dist/eval-server/sweep-routes.js +93 -0
  78. package/dist/eval-server/sweep-routes.js.map +1 -0
  79. package/dist/eval-server/sweep-runner.d.ts +93 -0
  80. package/dist/eval-server/sweep-runner.js +275 -0
  81. package/dist/eval-server/sweep-runner.js.map +1 -0
  82. package/dist/eval-ui/assets/index-C9_Pey9T.css +1 -0
  83. package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
  84. package/dist/eval-ui/index.html +2 -2
  85. package/dist/index.js +8 -0
  86. package/dist/index.js.map +1 -1
  87. package/package.json +1 -1
  88. package/dist/eval-ui/assets/index-CxHCKEhf.js +0 -74
  89. package/dist/eval-ui/assets/index-D2UkOol1.css +0 -1
@@ -0,0 +1,12 @@
1
+ import type { IntegrationRunResult, IntegrationEvalCase, IntegrationRunOpts } from "./integration-types.js";
2
+ export declare function checkPlaywright(): void;
3
+ export declare function runIntegrationCase(evalCase: IntegrationEvalCase, opts: IntegrationRunOpts): Promise<IntegrationRunResult>;
4
+ export declare function promptConfirmation(platform: string, actions: string[]): Promise<boolean>;
5
+ /**
6
+ * Check if this is the first run against a platform (no history file).
7
+ */
8
+ export declare function isFirstRun(skillDir: string): boolean;
9
+ /**
10
+ * Record a run in the integration history file.
11
+ */
12
+ export declare function recordRun(skillDir: string, result: IntegrationRunResult): void;
@@ -0,0 +1,303 @@
1
+ // ---------------------------------------------------------------------------
2
+ // integration-runner.ts -- 5-phase browser-based integration test runner
3
+ //
4
+ // Phases: Preflight -> Connect -> Execute -> Verify -> Cleanup
5
+ // ---------------------------------------------------------------------------
6
+ import { randomUUID } from "node:crypto";
7
+ import { existsSync, readFileSync, writeFileSync } from "node:fs";
8
+ import { join } from "node:path";
9
+ import { createRequire } from "node:module";
10
+ import { resolveAllCredentials } from "./credential-resolver.js";
11
+ import { resolveProfile } from "./chrome-profile.js";
12
+ import { PlatformRateLimiter } from "./rate-limiter.js";
13
+ import { judgeAssertion } from "./judge.js";
14
+ import { createLlmClient } from "./llm.js";
15
+ // ---------------------------------------------------------------------------
16
+ // SIGINT cleanup state
17
+ // ---------------------------------------------------------------------------
18
+ let cleanupRegistered = false;
19
+ let cleanupFn = null;
20
+ let cleanupDone = false;
21
+ function registerSigintHandler(fn) {
22
+ cleanupFn = fn;
23
+ cleanupDone = false;
24
+ if (!cleanupRegistered) {
25
+ process.on("SIGINT", sigintHandler);
26
+ cleanupRegistered = true;
27
+ }
28
+ }
29
+ function deregisterSigintHandler() {
30
+ process.removeListener("SIGINT", sigintHandler);
31
+ cleanupRegistered = false;
32
+ cleanupFn = null;
33
+ }
34
+ async function sigintHandler() {
35
+ if (cleanupDone)
36
+ return;
37
+ cleanupDone = true;
38
+ console.log("\nSIGINT received — running cleanup...");
39
+ if (cleanupFn) {
40
+ try {
41
+ await cleanupFn();
42
+ console.log("Cleanup complete, exiting.");
43
+ }
44
+ catch (err) {
45
+ console.error("Cleanup failed:", err.message);
46
+ }
47
+ }
48
+ process.exit(0);
49
+ }
50
+ // ---------------------------------------------------------------------------
51
+ // Playwright lazy check
52
+ // ---------------------------------------------------------------------------
53
+ export function checkPlaywright() {
54
+ try {
55
+ const require = createRequire(import.meta.url);
56
+ require.resolve("playwright");
57
+ }
58
+ catch {
59
+ throw new Error("Playwright is required for integration tests. Install it with:\n" +
60
+ " npm install --save-dev playwright && npx playwright install chromium");
61
+ }
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Main runner
65
+ // ---------------------------------------------------------------------------
66
+ export async function runIntegrationCase(evalCase, opts) {
67
+ const runId = opts.runId ?? randomUUID().slice(0, 8).toUpperCase();
68
+ const testPrefix = `[VSKILL-TEST-${runId}]`;
69
+ const phases = [];
70
+ const testArtifactIds = [runId];
71
+ let browser = null;
72
+ let context = null;
73
+ // Register cleanup for SIGINT
74
+ registerSigintHandler(async () => {
75
+ await runCleanup(evalCase, browser, testArtifactIds);
76
+ });
77
+ try {
78
+ // -----------------------------------------------------------------------
79
+ // Phase 1: PREFLIGHT
80
+ // -----------------------------------------------------------------------
81
+ const preflightResult = await runPhase("preflight", async () => {
82
+ // Check credentials
83
+ if (evalCase.requiredCredentials?.length) {
84
+ const statuses = resolveAllCredentials(evalCase.requiredCredentials, opts.skillDir);
85
+ const missing = statuses.filter((s) => s.status === "missing");
86
+ if (missing.length > 0) {
87
+ throw new Error(`Missing credentials: ${missing.map((m) => m.name).join(", ")}. ` +
88
+ `Set them with: vskill credentials set <KEY>`);
89
+ }
90
+ }
91
+ // Check Chrome profile
92
+ const profileName = evalCase.requirements?.chromeProfile;
93
+ const profilePath = evalCase.requirements?.chromeProfilePath;
94
+ if (profileName && !profilePath) {
95
+ resolveProfile(profileName);
96
+ }
97
+ // Check Playwright
98
+ if (!opts.dryRun) {
99
+ checkPlaywright();
100
+ }
101
+ });
102
+ phases.push(preflightResult);
103
+ if (preflightResult.status === "fail") {
104
+ // Abort remaining phases
105
+ for (const p of ["connect", "execute", "verify", "cleanup"]) {
106
+ phases.push({ phase: p, status: "skipped" });
107
+ }
108
+ return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
109
+ }
110
+ // -----------------------------------------------------------------------
111
+ // Phase 2: CONNECT
112
+ // -----------------------------------------------------------------------
113
+ const connectResult = await runPhase("connect", async () => {
114
+ if (opts.dryRun) {
115
+ console.log(`[DRY RUN] Would launch browser with profile: ${evalCase.requirements?.chromeProfile ?? "default"}`);
116
+ return;
117
+ }
118
+ const profileName = evalCase.requirements?.chromeProfile;
119
+ const profilePath = evalCase.requirements?.chromeProfilePath ?? (profileName ? resolveProfile(profileName) : undefined);
120
+ const pw = await import("playwright");
121
+ if (profilePath) {
122
+ context = await pw.chromium.launchPersistentContext(profilePath, {
123
+ headless: false,
124
+ args: ["--disable-blink-features=AutomationControlled"],
125
+ });
126
+ browser = null; // persistent context manages its own browser
127
+ }
128
+ else {
129
+ browser = await pw.chromium.launch({ headless: false });
130
+ context = await browser.newContext();
131
+ }
132
+ });
133
+ phases.push(connectResult);
134
+ if (connectResult.status === "fail") {
135
+ phases.push({ phase: "execute", status: "skipped" });
136
+ phases.push({ phase: "verify", status: "skipped" });
137
+ phases.push(await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds)));
138
+ return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
139
+ }
140
+ // -----------------------------------------------------------------------
141
+ // Phase 3: EXECUTE
142
+ // -----------------------------------------------------------------------
143
+ let generatedOutput = "";
144
+ const executeResult = await runPhase("execute", async () => {
145
+ const platform = evalCase.requirements?.platform;
146
+ // Rate limiting
147
+ if (platform && !opts.dryRun) {
148
+ const rateLimiter = new PlatformRateLimiter(evalCase.requirements?.rateLimit
149
+ ? { [platform]: evalCase.requirements.rateLimit }
150
+ : undefined);
151
+ await rateLimiter.acquire(platform);
152
+ }
153
+ // Build prompt with test prefix
154
+ const promptWithPrefix = `${testPrefix}\n\nIMPORTANT: All content you create or post MUST include the prefix "${testPrefix}" for identification and cleanup.\n\n${evalCase.prompt}`;
155
+ if (opts.dryRun) {
156
+ console.log(`[DRY RUN] Would execute LLM with prompt:\n${promptWithPrefix.slice(0, 200)}...`);
157
+ generatedOutput = `[DRY RUN] Simulated output for: ${evalCase.name}`;
158
+ return;
159
+ }
160
+ // Create LLM client and generate
161
+ const client = createLlmClient();
162
+ const skillMdPath = join(opts.skillDir, "SKILL.md");
163
+ let systemPrompt = "You are executing an integration test. Follow the instructions precisely.";
164
+ if (existsSync(skillMdPath)) {
165
+ systemPrompt = readFileSync(skillMdPath, "utf-8");
166
+ }
167
+ const result = await client.generate(systemPrompt, promptWithPrefix);
168
+ generatedOutput = result.text;
169
+ });
170
+ phases.push(executeResult);
171
+ if (executeResult.status === "fail") {
172
+ phases.push({ phase: "verify", status: "skipped" });
173
+ phases.push(await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds)));
174
+ return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
175
+ }
176
+ // -----------------------------------------------------------------------
177
+ // Phase 4: VERIFY
178
+ // -----------------------------------------------------------------------
179
+ const verifyResult = await runPhase("verify", async () => {
180
+ if (!evalCase.assertions?.length)
181
+ return;
182
+ const client = createLlmClient();
183
+ const results = await Promise.all(evalCase.assertions.map((assertion) => judgeAssertion(generatedOutput, assertion, client)));
184
+ const failed = results.filter((r) => !r.pass);
185
+ if (failed.length > 0) {
186
+ throw new Error(`${failed.length} assertion(s) failed:\n` +
187
+ failed.map((f) => ` - ${f.text}: ${f.reasoning}`).join("\n"));
188
+ }
189
+ });
190
+ phases.push(verifyResult);
191
+ // -----------------------------------------------------------------------
192
+ // Phase 5: CLEANUP
193
+ // -----------------------------------------------------------------------
194
+ const cleanupResult = await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds));
195
+ phases.push(cleanupResult);
196
+ return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
197
+ }
198
+ finally {
199
+ deregisterSigintHandler();
200
+ }
201
+ }
202
+ // ---------------------------------------------------------------------------
203
+ // Phase executor
204
+ // ---------------------------------------------------------------------------
205
+ async function runPhase(phase, fn) {
206
+ const start = Date.now();
207
+ try {
208
+ await fn();
209
+ return { phase, status: "pass", durationMs: Date.now() - start };
210
+ }
211
+ catch (err) {
212
+ return {
213
+ phase,
214
+ status: "fail",
215
+ durationMs: Date.now() - start,
216
+ errorMessage: err.message,
217
+ };
218
+ }
219
+ }
220
+ // ---------------------------------------------------------------------------
221
+ // Cleanup
222
+ // ---------------------------------------------------------------------------
223
+ async function runCleanup(evalCase, browserOrContext, _testArtifactIds) {
224
+ // Run cleanup actions defined in the eval case
225
+ if (evalCase.cleanup?.length) {
226
+ for (const action of evalCase.cleanup) {
227
+ try {
228
+ if (action.execute) {
229
+ await action.execute();
230
+ }
231
+ }
232
+ catch (err) {
233
+ // Log but do not throw — test result stands independently
234
+ console.error(`Cleanup action "${action.description}" failed:`, err.message);
235
+ }
236
+ }
237
+ }
238
+ // Close browser
239
+ if (browserOrContext) {
240
+ try {
241
+ await browserOrContext.close();
242
+ }
243
+ catch {
244
+ // Browser may already be closed
245
+ }
246
+ }
247
+ }
248
+ // ---------------------------------------------------------------------------
249
+ // Confirmation prompt
250
+ // ---------------------------------------------------------------------------
251
+ export async function promptConfirmation(platform, actions) {
252
+ // Skip in CI
253
+ if (process.env.CI === "true")
254
+ return true;
255
+ const { createInterface } = await import("node:readline");
256
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
257
+ const actionList = actions.map((a) => ` - ${a}`).join("\n");
258
+ const question = `\nThis will perform the following actions on ${platform}:\n${actionList}\n\nProceed? (y/N) `;
259
+ return new Promise((resolve) => {
260
+ rl.question(question, (answer) => {
261
+ rl.close();
262
+ resolve(answer.toLowerCase() === "y" || answer.toLowerCase() === "yes");
263
+ });
264
+ });
265
+ }
266
+ /**
267
+ * Check if this is the first run against a platform (no history file).
268
+ */
269
+ export function isFirstRun(skillDir) {
270
+ return !existsSync(join(skillDir, "evals", ".integration-history.json"));
271
+ }
272
+ /**
273
+ * Record a run in the integration history file.
274
+ */
275
+ export function recordRun(skillDir, result) {
276
+ const historyPath = join(skillDir, "evals", ".integration-history.json");
277
+ let history = [];
278
+ if (existsSync(historyPath)) {
279
+ try {
280
+ history = JSON.parse(readFileSync(historyPath, "utf-8"));
281
+ }
282
+ catch {
283
+ history = [];
284
+ }
285
+ }
286
+ history.push(result);
287
+ writeFileSync(historyPath, JSON.stringify(history, null, 2), "utf-8");
288
+ }
289
+ // ---------------------------------------------------------------------------
290
+ // Helpers
291
+ // ---------------------------------------------------------------------------
292
+ function buildResult(evalCase, runId, phases, testArtifactIds, dryRun) {
293
+ const overallPass = phases.every((p) => p.status === "pass" || p.status === "skipped");
294
+ return {
295
+ evalId: String(evalCase.id),
296
+ runId,
297
+ phases,
298
+ overallPass,
299
+ testArtifactIds,
300
+ dryRun,
301
+ };
302
+ }
303
+ //# sourceMappingURL=integration-runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"integration-runner.js","sourceRoot":"","sources":["../../src/eval/integration-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,yEAAyE;AACzE,EAAE;AACF,+DAA+D;AAC/D,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAqB,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAU3C,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAC9E,IAAI,iBAAiB,GAAG,KAAK,CAAC;AAC9B,IAAI,SAAS,GAAiC,IAAI,CAAC;AACnD,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,SAAS,qBAAqB,CAAC,EAAuB;IACpD,SAAS,GAAG,EAAE,CAAC;IACf,WAAW,GAAG,KAAK,CAAC;IACpB,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACvB,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpC,iBAAiB,GAAG,IAAI,CAAC;IAC3B,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB;IAC9B,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAChD,iBAAiB,GAAG,KAAK,CAAC;IAC1B,SAAS,GAAG,IAAI,CAAC;AACnB,CAAC;AAED,KAAK,UAAU,aAAa;IAC1B,IAAI,WAAW;QAAE,OAAO;IACxB,WAAW,GAAG,IAAI,CAAC;IACnB,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACtD,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,SAAS,EAAE,CAAC;YAClB,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,iBAAiB,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E,MAAM,UAAU,eAAe;IAC7B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,kEAAkE;YAClE,wEAAwE,CACzE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAA6B,EAC7B,IAAwB;IAExB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACnE,MAAM,UAAU,GAAG,gBAAgB,KAAK,GAAG,CAAC;IAC5C,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,eAAe,GAAa,CAAC,KAAK,CAAC,CAAC;IAC1C,IAAI,OAAO,GAAQ,IAAI,CAAC;IACxB,IAAI,OAAO,GAAQ,IAAI,CAAC;IAExB,8BAA8B;IAC9B,qBAAqB,CAAC,KAAK,IAAI,EAAE;QAC/B,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,0EAA0E;QAC1E,qBAAqB;QACrB,0EAA0E;QAC1E,MAAM,eAAe,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE;YAC7D,oBAAoB;YACpB,IAAI,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAAC;gBACzC,MAAM,QAAQ,GAAG,qBAAqB,CAAC,QAAQ,CAAC,mBAAmB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACpF,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;gBAC/D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACvB,MAAM,IAAI,KAAK,CACb,wBAAwB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;wBACjE,6CAA6C,CAC9C,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,uBAAuB;YACvB,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,aAAa,CAAC;YACzD,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,iBAAiB,CAAC;YAC7D,IAAI,WAAW,IAAI,CAAC,WAAW,EAAE,CAAC;gBAChC,cAAc,CAAC,WAAW,CAAC,CAAC;YAC9B,CAAC;YAED,mBAAmB;YACnB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,eAAe,EAAE,CAAC;YACpB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE7B,IAAI,eAAe,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACtC,yBAAyB;YACzB,KAAK,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAuB,EAAE,CAAC;gBAClF,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YAC/C,CAAC;YACD,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;YACzD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,gDAAgD,QAAQ,CAAC,YAAY,EAAE,aAAa,IAAI,SAAS,EAAE,CAAC,CAAC;gBACjH,OAAO;YACT,CAAC;YAED,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,aAAa,CAAC;YACzD,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,iBAAiB,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YAExH,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YACtC,IAAI,WAAW,EAAE,CAAC;gBAChB,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,WAAW,EAAE;oBAC/D,QAAQ,EAAE,KAAK;oBACf,IAAI,EAAE,CAAC,+CAA+C,CAAC;iBACxD,CAAC,CAAC;gBACH,OAAO,GAAG,IAAI,CAAC,CAAC,6CAA6C;YAC/D,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;gBACxD,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;YACvC,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,IAAI,aAAa,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACrD,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;YACxG,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,IAAI,eAAe,GAAG,EAAE,CAAC;QACzB,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC;YAEjD,gBAAgB;YAChB,IAAI,QAAQ,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC7B,MAAM,WAAW,GAAG,IAAI,mBAAmB,CACzC,QAAQ,CAAC,YAAY,EAAE,SAAS;oBAC9B,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,SAAS,EAAE;oBACjD,CAAC,CAAC,SAAS,CACd,CAAC;gBACF,MAAM,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACtC,CAAC;YAED,gCAAgC;YAChC,MAAM,gBAAgB,GAAG,GAAG,UAAU,0EAA0E,UAAU,wCAAwC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAEpL,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,6CAA6C,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;gBAC9F,eAAe,GAAG,mCAAmC,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,iCAAiC;YACjC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YACpD,IAAI,YAAY,GAAG,2EAA2E,CAAC;YAC/F,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC5B,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACpD,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;YACrE,eAAe,GAAG,MAAM,CAAC,IAAI,CAAC;QAChC,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,IAAI,aAAa,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;YACxG,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,kBAAkB;QAClB,0EAA0E;QAC1E,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;YACvD,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM;gBAAE,OAAO;YAEzC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CACpC,cAAc,CAAC,eAAe,EAAE,SAAsB,EAAE,MAAM,CAAC,CAChE,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,KAAK,CACb,GAAG,MAAM,CAAC,MAAM,yBAAyB;oBACzC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAC9D,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAE1B,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CACnD,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAC1D,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9E,CAAC;YAAS,CAAC;QACT,uBAAuB,EAAE,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,KAAK,UAAU,QAAQ,CACrB,KAAuB,EACvB,EAAuB;IAEvB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,EAAE,EAAE,CAAC;QACX,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IACnE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,KAAK;YACL,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YAC9B,YAAY,EAAG,GAAa,CAAC,OAAO;SACrC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,KAAK,UAAU,UAAU,CACvB,QAA6B,EAC7B,gBAAqB,EACrB,gBAA0B;IAE1B,+CAA+C;IAC/C,IAAI,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC7B,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACtC,IAAI,CAAC;gBACH,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBACnB,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;gBACzB,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,0DAA0D;gBAC1D,OAAO,CAAC,KAAK,CAAC,mBAAmB,MAAM,CAAC,WAAW,WAAW,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;YAC1F,CAAC;QACH,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,IAAI,gBAAgB,EAAE,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,gBAAgB,CAAC,KAAK,EAAE,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,gCAAgC;QAClC,CAAC;IACH,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,OAAiB;IAEjB,aAAa;IACb,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAE3C,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;IAC1D,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAE7E,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,gDAAgD,QAAQ,MAAM,UAAU,qBAAqB,CAAC;IAE/G,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,EAAE;YAC/B,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,GAAG,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,2BAA2B,CAAC,CAAC,CAAC;AAC3E,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,QAAgB,EAAE,MAA4B;IACtE,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,2BAA2B,CAAC,CAAC;IACzE,IAAI,OAAO,GAA2B,EAAE,CAAC;IACzC,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC5B,IAAI,CAAC;YACH,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACxE,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,WAAW,CAClB,QAA6B,EAC7B,KAAa,EACb,MAAqB,EACrB,eAAyB,EACzB,MAAe;IAEf,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;IACvF,OAAO;QACL,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3B,KAAK;QACL,MAAM;QACN,WAAW;QACX,eAAe;QACf,MAAM;KACP,CAAC;AACJ,CAAC"}
@@ -0,0 +1,65 @@
1
+ export type IntegrationPhase = "preflight" | "connect" | "execute" | "verify" | "cleanup";
2
+ export interface PhaseResult {
3
+ phase: IntegrationPhase;
4
+ status: "pass" | "fail" | "skipped";
5
+ durationMs?: number;
6
+ errorMessage?: string;
7
+ }
8
+ export interface IntegrationRunResult {
9
+ evalId: string;
10
+ runId: string;
11
+ phases: PhaseResult[];
12
+ overallPass: boolean;
13
+ testArtifactIds: string[];
14
+ dryRun: boolean;
15
+ }
16
+ export interface PlatformRateLimit {
17
+ requestsPerMinute: number;
18
+ }
19
+ export interface IntegrationRequirements {
20
+ chromeProfile?: string;
21
+ chromeProfilePath?: string;
22
+ platform?: string;
23
+ rateLimit?: PlatformRateLimit;
24
+ }
25
+ export interface CleanupAction {
26
+ type: "delete_post" | "remove_artifact" | "custom";
27
+ description: string;
28
+ execute?: () => Promise<void>;
29
+ }
30
+ export interface IntegrationEvalCase {
31
+ id: number | string;
32
+ name: string;
33
+ prompt: string;
34
+ expected_output: string;
35
+ assertions: Array<{
36
+ id: string;
37
+ text: string;
38
+ type: string;
39
+ }>;
40
+ testType: "integration";
41
+ requiredCredentials?: string[];
42
+ requirements?: IntegrationRequirements;
43
+ cleanup?: CleanupAction[];
44
+ }
45
+ export interface IntegrationRunOpts {
46
+ dryRun?: boolean;
47
+ confirm?: boolean;
48
+ skillDir: string;
49
+ runId?: string;
50
+ }
51
+ /** Default rate limits per platform (requests per minute). */
52
+ export declare const DEFAULT_RATE_LIMITS: Record<string, PlatformRateLimit>;
53
+ export declare const DEFAULT_RATE_LIMIT: PlatformRateLimit;
54
+ export declare const VALID_CLEANUP_ACTIONS: readonly ["delete_post", "remove_artifact", "custom"];
55
+ export type CleanupActionType = typeof VALID_CLEANUP_ACTIONS[number];
56
+ export interface EvalCleanupSchema {
57
+ action: CleanupActionType;
58
+ platform?: string;
59
+ identifier?: string;
60
+ description?: string;
61
+ }
62
+ export interface EvalRequirementsSchema {
63
+ chromeProfile?: string;
64
+ platform?: string;
65
+ }
@@ -0,0 +1,18 @@
1
+ // ---------------------------------------------------------------------------
2
+ // integration-types.ts -- types for the integration test runner
3
+ // ---------------------------------------------------------------------------
4
+ /** Default rate limits per platform (requests per minute). */
5
+ export const DEFAULT_RATE_LIMITS = {
6
+ x: { requestsPerMinute: 3 },
7
+ twitter: { requestsPerMinute: 3 },
8
+ linkedin: { requestsPerMinute: 2 },
9
+ slack: { requestsPerMinute: 10 },
10
+ instagram: { requestsPerMinute: 5 },
11
+ facebook: { requestsPerMinute: 5 },
12
+ };
13
+ export const DEFAULT_RATE_LIMIT = { requestsPerMinute: 10 };
14
+ // ---------------------------------------------------------------------------
15
+ // Schema types for eval generation validation (US-003, US-004)
16
+ // ---------------------------------------------------------------------------
17
+ export const VALID_CLEANUP_ACTIONS = ["delete_post", "remove_artifact", "custom"];
18
+ //# sourceMappingURL=integration-types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"integration-types.js","sourceRoot":"","sources":["../../src/eval/integration-types.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAwD9E,8DAA8D;AAC9D,MAAM,CAAC,MAAM,mBAAmB,GAAsC;IACpE,CAAC,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IAC3B,OAAO,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IACjC,QAAQ,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IAClC,KAAK,EAAE,EAAE,iBAAiB,EAAE,EAAE,EAAE;IAChC,SAAS,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IACnC,QAAQ,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;CACnC,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAsB,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC;AAE/E,8EAA8E;AAC9E,+DAA+D;AAC/D,8EAA8E;AAE9E,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,aAAa,EAAE,iBAAiB,EAAE,QAAQ,CAAU,CAAC"}
@@ -0,0 +1,29 @@
1
+ import type { AssertionResult } from "./judge.js";
2
+ export interface CacheEntry {
3
+ pass: boolean;
4
+ reasoning: string;
5
+ cachedAt: string;
6
+ judgeModel: string;
7
+ }
8
+ export interface CacheData {
9
+ version: number;
10
+ entries: Record<string, CacheEntry>;
11
+ }
12
+ export declare class JudgeCache {
13
+ private readonly skillDir;
14
+ private data;
15
+ private dirty;
16
+ private readonly cachePath;
17
+ constructor(skillDir: string);
18
+ private load;
19
+ static computeKey(assertionText: string, output: string, judgeModel: string): string;
20
+ getOrCompute(assertionText: string, output: string, judgeModel: string, compute: () => Promise<AssertionResult>): Promise<AssertionResult>;
21
+ has(assertionText: string, output: string, judgeModel: string): boolean;
22
+ get size(): number;
23
+ /**
24
+ * Persist cache to disk. Call after all operations are complete.
25
+ * Also ensures .judge-cache.json is in .gitignore.
26
+ */
27
+ flush(): void;
28
+ private ensureGitignore;
29
+ }
@@ -0,0 +1,109 @@
1
+ // ---------------------------------------------------------------------------
2
+ // judge-cache.ts -- SHA-256 content-hash cache for judge results
3
+ // ---------------------------------------------------------------------------
4
+ import { createHash } from "node:crypto";
5
+ import { readFileSync, writeFileSync, mkdirSync, existsSync, unlinkSync, appendFileSync } from "node:fs";
6
+ import { join, dirname } from "node:path";
7
+ export class JudgeCache {
8
+ skillDir;
9
+ data;
10
+ dirty = false;
11
+ cachePath;
12
+ constructor(skillDir) {
13
+ this.skillDir = skillDir;
14
+ this.cachePath = join(skillDir, "evals", ".judge-cache.json");
15
+ this.data = this.load();
16
+ }
17
+ load() {
18
+ try {
19
+ if (existsSync(this.cachePath)) {
20
+ const raw = readFileSync(this.cachePath, "utf-8");
21
+ const parsed = JSON.parse(raw);
22
+ if (parsed && typeof parsed === "object" && parsed.version === 1 && typeof parsed.entries === "object") {
23
+ return parsed;
24
+ }
25
+ }
26
+ }
27
+ catch {
28
+ // Corruption recovery: delete corrupted file and start fresh
29
+ try {
30
+ if (existsSync(this.cachePath)) {
31
+ unlinkSync(this.cachePath);
32
+ console.warn(`[judge-cache] Corrupted cache file deleted: ${this.cachePath}`);
33
+ }
34
+ }
35
+ catch {
36
+ // ignore deletion failure
37
+ }
38
+ }
39
+ return { version: 1, entries: {} };
40
+ }
41
+ static computeKey(assertionText, output, judgeModel) {
42
+ return createHash("sha256")
43
+ .update(`${assertionText}||${output}||${judgeModel}`)
44
+ .digest("hex");
45
+ }
46
+ async getOrCompute(assertionText, output, judgeModel, compute) {
47
+ const key = JudgeCache.computeKey(assertionText, output, judgeModel);
48
+ const cached = this.data.entries[key];
49
+ if (cached) {
50
+ // Return cached result, reconstructing the AssertionResult shape
51
+ return {
52
+ id: "", // caller overwrites this
53
+ text: assertionText,
54
+ pass: cached.pass,
55
+ reasoning: cached.reasoning,
56
+ };
57
+ }
58
+ const result = await compute();
59
+ // Store in cache
60
+ this.data.entries[key] = {
61
+ pass: result.pass,
62
+ reasoning: result.reasoning,
63
+ cachedAt: new Date().toISOString(),
64
+ judgeModel,
65
+ };
66
+ this.dirty = true;
67
+ return result;
68
+ }
69
+ has(assertionText, output, judgeModel) {
70
+ const key = JudgeCache.computeKey(assertionText, output, judgeModel);
71
+ return key in this.data.entries;
72
+ }
73
+ get size() {
74
+ return Object.keys(this.data.entries).length;
75
+ }
76
+ /**
77
+ * Persist cache to disk. Call after all operations are complete.
78
+ * Also ensures .judge-cache.json is in .gitignore.
79
+ */
80
+ flush() {
81
+ if (!this.dirty)
82
+ return;
83
+ const dir = dirname(this.cachePath);
84
+ mkdirSync(dir, { recursive: true });
85
+ writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8");
86
+ this.dirty = false;
87
+ // T-009: Ensure .judge-cache.json is in .gitignore
88
+ this.ensureGitignore();
89
+ }
90
+ ensureGitignore() {
91
+ const gitignorePath = join(this.skillDir, ".gitignore");
92
+ const pattern = "evals/.judge-cache.json";
93
+ try {
94
+ if (existsSync(gitignorePath)) {
95
+ const content = readFileSync(gitignorePath, "utf-8");
96
+ if (content.includes(pattern))
97
+ return;
98
+ appendFileSync(gitignorePath, `\n${pattern}\n`);
99
+ }
100
+ else {
101
+ writeFileSync(gitignorePath, `${pattern}\n`, "utf-8");
102
+ }
103
+ }
104
+ catch {
105
+ // Non-critical — don't fail the run for gitignore issues
106
+ }
107
+ }
108
+ }
109
+ //# sourceMappingURL=judge-cache.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"judge-cache.js","sourceRoot":"","sources":["../../src/eval/judge-cache.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzG,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAe1C,MAAM,OAAO,UAAU;IAKQ;IAJrB,IAAI,CAAY;IAChB,KAAK,GAAG,KAAK,CAAC;IACL,SAAS,CAAS;IAEnC,YAA6B,QAAgB;QAAhB,aAAQ,GAAR,QAAQ,CAAQ;QAC3C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,mBAAmB,CAAC,CAAC;QAC9D,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAEO,IAAI;QACV,IAAI,CAAC;YACH,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC/B,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC/B,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,OAAO,KAAK,CAAC,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACvG,OAAO,MAAmB,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,6DAA6D;YAC7D,IAAI,CAAC;gBACH,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC/B,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAC3B,OAAO,CAAC,IAAI,CAAC,+CAA+C,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,0BAA0B;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,CAAC,UAAU,CAAC,aAAqB,EAAE,MAAc,EAAE,UAAkB;QACzE,OAAO,UAAU,CAAC,QAAQ,CAAC;aACxB,MAAM,CAAC,GAAG,aAAa,KAAK,MAAM,KAAK,UAAU,EAAE,CAAC;aACpD,MAAM,CAAC,KAAK,CAAC,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,aAAqB,EACrB,MAAc,EACd,UAAkB,EAClB,OAAuC;QAEvC,MAAM,GAAG,GAAG,UAAU,CAAC,UAAU,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAErE,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACtC,IAAI,MAAM,EAAE,CAAC;YACX,iEAAiE;YACjE,OAAO;gBACL,EAAE,EAAE,EAAE,EAAE,yBAAyB;gBACjC,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,EAAE,CAAC;QAE/B,iBAAiB;QACjB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAClC,UAAU;SACX,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,GAAG,CAAC,aAAqB,EAAE,MAAc,EAAE,UAAkB;QAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,UAAU,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QACrE,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC;IAClC,CAAC;IAED,IAAI,IAAI;QACN,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACH,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,OAAO;QAExB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACpC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACpC,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QAEnB,mDAAmD;QACnD,IAAI,CAAC,eAAe,EAAE,CAAC;IACzB,CAAC;IAEO,eAAe;QACrB,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QACxD,MAAM,OAAO,GAAG,yBAAyB,CAAC;QAE1C,IAAI,CAAC;YACH,IAAI,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBACrD,IAAI,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBAAE,OAAO;gBACtC,cAAc,CAAC,aAAa,EAAE,KAAK,OAAO,IAAI,CAAC,CAAC;YAClD,CAAC;iBAAM,CAAC;gBACN,aAAa,CAAC,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE,OAAO,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;QAC3D,CAAC;IACH,CAAC;CACF"}
@@ -8,4 +8,4 @@ export interface AssertionResult {
8
8
  reasoning: string;
9
9
  }
10
10
  export declare function buildJudgeSystemPrompt(mcpDeps?: McpDependency[]): string;
11
- export declare function judgeAssertion(output: string, assertion: Assertion, client: LlmClient, mcpDeps?: McpDependency[]): Promise<AssertionResult>;
11
+ export declare function judgeAssertion(output: string, assertion: Assertion, client: LlmClient, judgeClientOrMcpDeps?: LlmClient | McpDependency[], mcpDeps?: McpDependency[]): Promise<AssertionResult>;
@@ -17,8 +17,25 @@ When evaluating assertions:
17
17
 
18
18
  Respond with ONLY a JSON object: { "pass": boolean, "reasoning": "brief explanation" }`;
19
19
  }
20
- export async function judgeAssertion(output, assertion, client, mcpDeps) {
21
- const systemPrompt = buildJudgeSystemPrompt(mcpDeps);
20
+ export async function judgeAssertion(output, assertion, client, judgeClientOrMcpDeps, mcpDeps) {
21
+ // Support both old signature (client, mcpDeps?) and new (client, judgeClient?, mcpDeps?)
22
+ let effectiveJudgeClient;
23
+ let effectiveMcpDeps;
24
+ if (Array.isArray(judgeClientOrMcpDeps)) {
25
+ // Old-style call: judgeAssertion(output, assertion, client, mcpDeps)
26
+ effectiveJudgeClient = client;
27
+ effectiveMcpDeps = judgeClientOrMcpDeps;
28
+ }
29
+ else if (judgeClientOrMcpDeps && typeof judgeClientOrMcpDeps === "object" && "generate" in judgeClientOrMcpDeps) {
30
+ // New-style call: judgeAssertion(output, assertion, client, judgeClient, mcpDeps?)
31
+ effectiveJudgeClient = judgeClientOrMcpDeps;
32
+ effectiveMcpDeps = mcpDeps;
33
+ }
34
+ else {
35
+ effectiveJudgeClient = client;
36
+ effectiveMcpDeps = mcpDeps;
37
+ }
38
+ const systemPrompt = buildJudgeSystemPrompt(effectiveMcpDeps);
22
39
  const userPrompt = `## LLM Output
23
40
  ${output}
24
41
 
@@ -26,7 +43,7 @@ ${output}
26
43
  ${assertion.text}
27
44
 
28
45
  Does the LLM output satisfy this assertion? Respond with JSON only: { "pass": boolean, "reasoning": "..." }`;
29
- const { text: raw } = await client.generate(systemPrompt, userPrompt);
46
+ const { text: raw } = await effectiveJudgeClient.generate(systemPrompt, userPrompt);
30
47
  const parsed = parseJudgeResponse(raw);
31
48
  return {
32
49
  id: assertion.id,
@@ -1 +1 @@
1
- {"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAa9E,MAAM,YAAY,GAAG,qNAAqN,CAAC;AAE3O,MAAM,UAAU,sBAAsB,CAAC,OAAyB;IAC9D,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3D,OAAO;;oIAE2H,UAAU;;;;;;uFAMvD,CAAC;AACxF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,SAAoB,EACpB,MAAiB,EACjB,OAAyB;IAEzB,MAAM,YAAY,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;IAErD,MAAM,UAAU,GAAG;EACnB,MAAM;;;EAGN,SAAS,CAAC,IAAI;;4GAE4F,CAAC;IAE3G,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAEtE,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IAEvC,OAAO;QACL,EAAE,EAAE,SAAS,CAAC,EAAE;QAChB,IAAI,EAAE,SAAS,CAAC,IAAI;QACpB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,4CAA4C;IAC5C,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACxC,CAAC;QACD,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,OAAO,MAAM,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SACxE,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,sEAAsE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC1F,CAAC;IACJ,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAa9E,MAAM,YAAY,GAAG,qNAAqN,CAAC;AAE3O,MAAM,UAAU,sBAAsB,CAAC,OAAyB;IAC9D,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3D,OAAO;;oIAE2H,UAAU;;;;;;uFAMvD,CAAC;AACxF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,SAAoB,EACpB,MAAiB,EACjB,oBAAkD,EAClD,OAAyB;IAEzB,yFAAyF;IACzF,IAAI,oBAA+B,CAAC;IACpC,IAAI,gBAA6C,CAAC;IAElD,IAAI,KAAK,CAAC,OAAO,CAAC,oBAAoB,CAAC,EAAE,CAAC;QACxC,qEAAqE;QACrE,oBAAoB,GAAG,MAAM,CAAC;QAC9B,gBAAgB,GAAG,oBAAoB,CAAC;IAC1C,CAAC;SAAM,IAAI,oBAAoB,IAAI,OAAO,oBAAoB,KAAK,QAAQ,IAAI,UAAU,IAAI,oBAAoB,EAAE,CAAC;QAClH,mFAAmF;QACnF,oBAAoB,GAAG,oBAAoB,CAAC;QAC5C,gBAAgB,GAAG,OAAO,CAAC;IAC7B,CAAC;SAAM,CAAC;QACN,oBAAoB,GAAG,MAAM,CAAC;QAC9B,gBAAgB,GAAG,OAAO,CAAC;IAC7B,CAAC;IAED,MAAM,YAAY,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAE9D,MAAM,UAAU,GAAG;EACnB,MAAM;;;EAGN,SAAS,CAAC,IAAI;;4GAE4F,CAAC;IAE3G,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,oBAAoB,CAAC,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAEpF,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IAEvC,OAAO;QACL,EAAE,EAAE,SAAS,CAAC,EAAE;QAChB,IAAI,EAAE,SAAS,CAAC,IAAI;QACpB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,4CAA4C;IAC5C,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACxC,CAAC;QACD,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,OAAO,MAAM,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SACxE,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,sEAAsE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC1F,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -3,12 +3,13 @@ export interface GenerateResult {
3
3
  durationMs: number;
4
4
  inputTokens: number | null;
5
5
  outputTokens: number | null;
6
+ cost: number | null;
6
7
  }
7
8
  export interface LlmClient {
8
9
  generate(systemPrompt: string, userPrompt: string): Promise<GenerateResult>;
9
10
  readonly model: string;
10
11
  }
11
- export type ProviderName = "anthropic" | "claude-cli" | "codex-cli" | "gemini-cli" | "ollama";
12
+ export type ProviderName = "anthropic" | "claude-cli" | "codex-cli" | "gemini-cli" | "ollama" | "openrouter";
12
13
  export interface LlmOverrides {
13
14
  provider?: ProviderName;
14
15
  model?: string;