vskill 0.5.11 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/eval/credentials.d.ts +12 -0
- package/dist/commands/eval/credentials.js +140 -0
- package/dist/commands/eval/credentials.js.map +1 -0
- package/dist/commands/eval/generate-all.d.ts +1 -1
- package/dist/commands/eval/generate-all.js +57 -12
- package/dist/commands/eval/generate-all.js.map +1 -1
- package/dist/commands/eval/init.d.ts +2 -1
- package/dist/commands/eval/init.js +76 -10
- package/dist/commands/eval/init.js.map +1 -1
- package/dist/commands/eval/run.d.ts +7 -1
- package/dist/commands/eval/run.js +207 -26
- package/dist/commands/eval/run.js.map +1 -1
- package/dist/commands/eval/sweep.d.ts +7 -0
- package/dist/commands/eval/sweep.js +99 -0
- package/dist/commands/eval/sweep.js.map +1 -0
- package/dist/commands/eval.d.ts +10 -0
- package/dist/commands/eval.js +62 -4
- package/dist/commands/eval.js.map +1 -1
- package/dist/eval/batch-judge.d.ts +27 -0
- package/dist/eval/batch-judge.js +242 -0
- package/dist/eval/batch-judge.js.map +1 -0
- package/dist/eval/chrome-profile.d.ts +16 -0
- package/dist/eval/chrome-profile.js +65 -0
- package/dist/eval/chrome-profile.js.map +1 -0
- package/dist/eval/comparator.d.ts +3 -1
- package/dist/eval/comparator.js +19 -3
- package/dist/eval/comparator.js.map +1 -1
- package/dist/eval/concurrency.d.ts +13 -0
- package/dist/eval/concurrency.js +53 -0
- package/dist/eval/concurrency.js.map +1 -0
- package/dist/eval/credential-resolver.d.ts +31 -0
- package/dist/eval/credential-resolver.js +111 -0
- package/dist/eval/credential-resolver.js.map +1 -0
- package/dist/eval/integration-runner.d.ts +12 -0
- package/dist/eval/integration-runner.js +303 -0
- package/dist/eval/integration-runner.js.map +1 -0
- package/dist/eval/integration-types.d.ts +65 -0
- package/dist/eval/integration-types.js +18 -0
- package/dist/eval/integration-types.js.map +1 -0
- package/dist/eval/judge-cache.d.ts +29 -0
- package/dist/eval/judge-cache.js +109 -0
- package/dist/eval/judge-cache.js.map +1 -0
- package/dist/eval/judge.d.ts +1 -1
- package/dist/eval/judge.js +20 -3
- package/dist/eval/judge.js.map +1 -1
- package/dist/eval/llm.d.ts +2 -1
- package/dist/eval/llm.js +54 -2
- package/dist/eval/llm.js.map +1 -1
- package/dist/eval/prompt-builder.d.ts +10 -0
- package/dist/eval/prompt-builder.js +167 -0
- package/dist/eval/prompt-builder.js.map +1 -1
- package/dist/eval/rate-limiter.d.ts +20 -0
- package/dist/eval/rate-limiter.js +62 -0
- package/dist/eval/rate-limiter.js.map +1 -0
- package/dist/eval/schema.d.ts +16 -0
- package/dist/eval/schema.js +58 -6
- package/dist/eval/schema.js.map +1 -1
- package/dist/eval/verdict.d.ts +9 -0
- package/dist/eval/verdict.js +50 -0
- package/dist/eval/verdict.js.map +1 -1
- package/dist/eval-server/api-routes.js +99 -3
- package/dist/eval-server/api-routes.js.map +1 -1
- package/dist/eval-server/benchmark-runner.d.ts +7 -0
- package/dist/eval-server/benchmark-runner.js +158 -42
- package/dist/eval-server/benchmark-runner.js.map +1 -1
- package/dist/eval-server/concurrency.d.ts +1 -13
- package/dist/eval-server/concurrency.js +3 -49
- package/dist/eval-server/concurrency.js.map +1 -1
- package/dist/eval-server/eval-server.js +4 -0
- package/dist/eval-server/eval-server.js.map +1 -1
- package/dist/eval-server/integration-routes.d.ts +2 -0
- package/dist/eval-server/integration-routes.js +100 -0
- package/dist/eval-server/integration-routes.js.map +1 -0
- package/dist/eval-server/skill-create-routes.js +151 -22
- package/dist/eval-server/skill-create-routes.js.map +1 -1
- package/dist/eval-server/sweep-routes.d.ts +2 -0
- package/dist/eval-server/sweep-routes.js +93 -0
- package/dist/eval-server/sweep-routes.js.map +1 -0
- package/dist/eval-server/sweep-runner.d.ts +93 -0
- package/dist/eval-server/sweep-runner.js +275 -0
- package/dist/eval-server/sweep-runner.js.map +1 -0
- package/dist/eval-ui/assets/index-C9_Pey9T.css +1 -0
- package/dist/eval-ui/assets/index-KfkLPyh3.js +74 -0
- package/dist/eval-ui/index.html +2 -2
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/eval-ui/assets/index-CxHCKEhf.js +0 -74
- package/dist/eval-ui/assets/index-D2UkOol1.css +0 -1
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { IntegrationRunResult, IntegrationEvalCase, IntegrationRunOpts } from "./integration-types.js";
|
|
2
|
+
export declare function checkPlaywright(): void;
|
|
3
|
+
export declare function runIntegrationCase(evalCase: IntegrationEvalCase, opts: IntegrationRunOpts): Promise<IntegrationRunResult>;
|
|
4
|
+
export declare function promptConfirmation(platform: string, actions: string[]): Promise<boolean>;
|
|
5
|
+
/**
|
|
6
|
+
* Check if this is the first run against a platform (no history file).
|
|
7
|
+
*/
|
|
8
|
+
export declare function isFirstRun(skillDir: string): boolean;
|
|
9
|
+
/**
|
|
10
|
+
* Record a run in the integration history file.
|
|
11
|
+
*/
|
|
12
|
+
export declare function recordRun(skillDir: string, result: IntegrationRunResult): void;
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// integration-runner.ts -- 5-phase browser-based integration test runner
|
|
3
|
+
//
|
|
4
|
+
// Phases: Preflight -> Connect -> Execute -> Verify -> Cleanup
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
import { randomUUID } from "node:crypto";
|
|
7
|
+
import { existsSync, readFileSync, writeFileSync } from "node:fs";
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { createRequire } from "node:module";
|
|
10
|
+
import { resolveAllCredentials } from "./credential-resolver.js";
|
|
11
|
+
import { resolveProfile } from "./chrome-profile.js";
|
|
12
|
+
import { PlatformRateLimiter } from "./rate-limiter.js";
|
|
13
|
+
import { judgeAssertion } from "./judge.js";
|
|
14
|
+
import { createLlmClient } from "./llm.js";
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// SIGINT cleanup state
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
let cleanupRegistered = false;
|
|
19
|
+
let cleanupFn = null;
|
|
20
|
+
let cleanupDone = false;
|
|
21
|
+
function registerSigintHandler(fn) {
|
|
22
|
+
cleanupFn = fn;
|
|
23
|
+
cleanupDone = false;
|
|
24
|
+
if (!cleanupRegistered) {
|
|
25
|
+
process.on("SIGINT", sigintHandler);
|
|
26
|
+
cleanupRegistered = true;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
function deregisterSigintHandler() {
|
|
30
|
+
process.removeListener("SIGINT", sigintHandler);
|
|
31
|
+
cleanupRegistered = false;
|
|
32
|
+
cleanupFn = null;
|
|
33
|
+
}
|
|
34
|
+
async function sigintHandler() {
|
|
35
|
+
if (cleanupDone)
|
|
36
|
+
return;
|
|
37
|
+
cleanupDone = true;
|
|
38
|
+
console.log("\nSIGINT received — running cleanup...");
|
|
39
|
+
if (cleanupFn) {
|
|
40
|
+
try {
|
|
41
|
+
await cleanupFn();
|
|
42
|
+
console.log("Cleanup complete, exiting.");
|
|
43
|
+
}
|
|
44
|
+
catch (err) {
|
|
45
|
+
console.error("Cleanup failed:", err.message);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
process.exit(0);
|
|
49
|
+
}
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Playwright lazy check
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
export function checkPlaywright() {
|
|
54
|
+
try {
|
|
55
|
+
const require = createRequire(import.meta.url);
|
|
56
|
+
require.resolve("playwright");
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
throw new Error("Playwright is required for integration tests. Install it with:\n" +
|
|
60
|
+
" npm install --save-dev playwright && npx playwright install chromium");
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// ---------------------------------------------------------------------------
|
|
64
|
+
// Main runner
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
export async function runIntegrationCase(evalCase, opts) {
|
|
67
|
+
const runId = opts.runId ?? randomUUID().slice(0, 8).toUpperCase();
|
|
68
|
+
const testPrefix = `[VSKILL-TEST-${runId}]`;
|
|
69
|
+
const phases = [];
|
|
70
|
+
const testArtifactIds = [runId];
|
|
71
|
+
let browser = null;
|
|
72
|
+
let context = null;
|
|
73
|
+
// Register cleanup for SIGINT
|
|
74
|
+
registerSigintHandler(async () => {
|
|
75
|
+
await runCleanup(evalCase, browser, testArtifactIds);
|
|
76
|
+
});
|
|
77
|
+
try {
|
|
78
|
+
// -----------------------------------------------------------------------
|
|
79
|
+
// Phase 1: PREFLIGHT
|
|
80
|
+
// -----------------------------------------------------------------------
|
|
81
|
+
const preflightResult = await runPhase("preflight", async () => {
|
|
82
|
+
// Check credentials
|
|
83
|
+
if (evalCase.requiredCredentials?.length) {
|
|
84
|
+
const statuses = resolveAllCredentials(evalCase.requiredCredentials, opts.skillDir);
|
|
85
|
+
const missing = statuses.filter((s) => s.status === "missing");
|
|
86
|
+
if (missing.length > 0) {
|
|
87
|
+
throw new Error(`Missing credentials: ${missing.map((m) => m.name).join(", ")}. ` +
|
|
88
|
+
`Set them with: vskill credentials set <KEY>`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
// Check Chrome profile
|
|
92
|
+
const profileName = evalCase.requirements?.chromeProfile;
|
|
93
|
+
const profilePath = evalCase.requirements?.chromeProfilePath;
|
|
94
|
+
if (profileName && !profilePath) {
|
|
95
|
+
resolveProfile(profileName);
|
|
96
|
+
}
|
|
97
|
+
// Check Playwright
|
|
98
|
+
if (!opts.dryRun) {
|
|
99
|
+
checkPlaywright();
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
phases.push(preflightResult);
|
|
103
|
+
if (preflightResult.status === "fail") {
|
|
104
|
+
// Abort remaining phases
|
|
105
|
+
for (const p of ["connect", "execute", "verify", "cleanup"]) {
|
|
106
|
+
phases.push({ phase: p, status: "skipped" });
|
|
107
|
+
}
|
|
108
|
+
return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
|
|
109
|
+
}
|
|
110
|
+
// -----------------------------------------------------------------------
|
|
111
|
+
// Phase 2: CONNECT
|
|
112
|
+
// -----------------------------------------------------------------------
|
|
113
|
+
const connectResult = await runPhase("connect", async () => {
|
|
114
|
+
if (opts.dryRun) {
|
|
115
|
+
console.log(`[DRY RUN] Would launch browser with profile: ${evalCase.requirements?.chromeProfile ?? "default"}`);
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
const profileName = evalCase.requirements?.chromeProfile;
|
|
119
|
+
const profilePath = evalCase.requirements?.chromeProfilePath ?? (profileName ? resolveProfile(profileName) : undefined);
|
|
120
|
+
const pw = await import("playwright");
|
|
121
|
+
if (profilePath) {
|
|
122
|
+
context = await pw.chromium.launchPersistentContext(profilePath, {
|
|
123
|
+
headless: false,
|
|
124
|
+
args: ["--disable-blink-features=AutomationControlled"],
|
|
125
|
+
});
|
|
126
|
+
browser = null; // persistent context manages its own browser
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
browser = await pw.chromium.launch({ headless: false });
|
|
130
|
+
context = await browser.newContext();
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
phases.push(connectResult);
|
|
134
|
+
if (connectResult.status === "fail") {
|
|
135
|
+
phases.push({ phase: "execute", status: "skipped" });
|
|
136
|
+
phases.push({ phase: "verify", status: "skipped" });
|
|
137
|
+
phases.push(await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds)));
|
|
138
|
+
return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
|
|
139
|
+
}
|
|
140
|
+
// -----------------------------------------------------------------------
|
|
141
|
+
// Phase 3: EXECUTE
|
|
142
|
+
// -----------------------------------------------------------------------
|
|
143
|
+
let generatedOutput = "";
|
|
144
|
+
const executeResult = await runPhase("execute", async () => {
|
|
145
|
+
const platform = evalCase.requirements?.platform;
|
|
146
|
+
// Rate limiting
|
|
147
|
+
if (platform && !opts.dryRun) {
|
|
148
|
+
const rateLimiter = new PlatformRateLimiter(evalCase.requirements?.rateLimit
|
|
149
|
+
? { [platform]: evalCase.requirements.rateLimit }
|
|
150
|
+
: undefined);
|
|
151
|
+
await rateLimiter.acquire(platform);
|
|
152
|
+
}
|
|
153
|
+
// Build prompt with test prefix
|
|
154
|
+
const promptWithPrefix = `${testPrefix}\n\nIMPORTANT: All content you create or post MUST include the prefix "${testPrefix}" for identification and cleanup.\n\n${evalCase.prompt}`;
|
|
155
|
+
if (opts.dryRun) {
|
|
156
|
+
console.log(`[DRY RUN] Would execute LLM with prompt:\n${promptWithPrefix.slice(0, 200)}...`);
|
|
157
|
+
generatedOutput = `[DRY RUN] Simulated output for: ${evalCase.name}`;
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
// Create LLM client and generate
|
|
161
|
+
const client = createLlmClient();
|
|
162
|
+
const skillMdPath = join(opts.skillDir, "SKILL.md");
|
|
163
|
+
let systemPrompt = "You are executing an integration test. Follow the instructions precisely.";
|
|
164
|
+
if (existsSync(skillMdPath)) {
|
|
165
|
+
systemPrompt = readFileSync(skillMdPath, "utf-8");
|
|
166
|
+
}
|
|
167
|
+
const result = await client.generate(systemPrompt, promptWithPrefix);
|
|
168
|
+
generatedOutput = result.text;
|
|
169
|
+
});
|
|
170
|
+
phases.push(executeResult);
|
|
171
|
+
if (executeResult.status === "fail") {
|
|
172
|
+
phases.push({ phase: "verify", status: "skipped" });
|
|
173
|
+
phases.push(await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds)));
|
|
174
|
+
return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
|
|
175
|
+
}
|
|
176
|
+
// -----------------------------------------------------------------------
|
|
177
|
+
// Phase 4: VERIFY
|
|
178
|
+
// -----------------------------------------------------------------------
|
|
179
|
+
const verifyResult = await runPhase("verify", async () => {
|
|
180
|
+
if (!evalCase.assertions?.length)
|
|
181
|
+
return;
|
|
182
|
+
const client = createLlmClient();
|
|
183
|
+
const results = await Promise.all(evalCase.assertions.map((assertion) => judgeAssertion(generatedOutput, assertion, client)));
|
|
184
|
+
const failed = results.filter((r) => !r.pass);
|
|
185
|
+
if (failed.length > 0) {
|
|
186
|
+
throw new Error(`${failed.length} assertion(s) failed:\n` +
|
|
187
|
+
failed.map((f) => ` - ${f.text}: ${f.reasoning}`).join("\n"));
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
phases.push(verifyResult);
|
|
191
|
+
// -----------------------------------------------------------------------
|
|
192
|
+
// Phase 5: CLEANUP
|
|
193
|
+
// -----------------------------------------------------------------------
|
|
194
|
+
const cleanupResult = await runPhase("cleanup", () => runCleanup(evalCase, browser ?? context, testArtifactIds));
|
|
195
|
+
phases.push(cleanupResult);
|
|
196
|
+
return buildResult(evalCase, runId, phases, testArtifactIds, !!opts.dryRun);
|
|
197
|
+
}
|
|
198
|
+
finally {
|
|
199
|
+
deregisterSigintHandler();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// ---------------------------------------------------------------------------
|
|
203
|
+
// Phase executor
|
|
204
|
+
// ---------------------------------------------------------------------------
|
|
205
|
+
async function runPhase(phase, fn) {
|
|
206
|
+
const start = Date.now();
|
|
207
|
+
try {
|
|
208
|
+
await fn();
|
|
209
|
+
return { phase, status: "pass", durationMs: Date.now() - start };
|
|
210
|
+
}
|
|
211
|
+
catch (err) {
|
|
212
|
+
return {
|
|
213
|
+
phase,
|
|
214
|
+
status: "fail",
|
|
215
|
+
durationMs: Date.now() - start,
|
|
216
|
+
errorMessage: err.message,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
// ---------------------------------------------------------------------------
|
|
221
|
+
// Cleanup
|
|
222
|
+
// ---------------------------------------------------------------------------
|
|
223
|
+
async function runCleanup(evalCase, browserOrContext, _testArtifactIds) {
|
|
224
|
+
// Run cleanup actions defined in the eval case
|
|
225
|
+
if (evalCase.cleanup?.length) {
|
|
226
|
+
for (const action of evalCase.cleanup) {
|
|
227
|
+
try {
|
|
228
|
+
if (action.execute) {
|
|
229
|
+
await action.execute();
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
// Log but do not throw — test result stands independently
|
|
234
|
+
console.error(`Cleanup action "${action.description}" failed:`, err.message);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
// Close browser
|
|
239
|
+
if (browserOrContext) {
|
|
240
|
+
try {
|
|
241
|
+
await browserOrContext.close();
|
|
242
|
+
}
|
|
243
|
+
catch {
|
|
244
|
+
// Browser may already be closed
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
// Confirmation prompt
|
|
250
|
+
// ---------------------------------------------------------------------------
|
|
251
|
+
export async function promptConfirmation(platform, actions) {
|
|
252
|
+
// Skip in CI
|
|
253
|
+
if (process.env.CI === "true")
|
|
254
|
+
return true;
|
|
255
|
+
const { createInterface } = await import("node:readline");
|
|
256
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
257
|
+
const actionList = actions.map((a) => ` - ${a}`).join("\n");
|
|
258
|
+
const question = `\nThis will perform the following actions on ${platform}:\n${actionList}\n\nProceed? (y/N) `;
|
|
259
|
+
return new Promise((resolve) => {
|
|
260
|
+
rl.question(question, (answer) => {
|
|
261
|
+
rl.close();
|
|
262
|
+
resolve(answer.toLowerCase() === "y" || answer.toLowerCase() === "yes");
|
|
263
|
+
});
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Check if this is the first run against a platform (no history file).
|
|
268
|
+
*/
|
|
269
|
+
export function isFirstRun(skillDir) {
|
|
270
|
+
return !existsSync(join(skillDir, "evals", ".integration-history.json"));
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Record a run in the integration history file.
|
|
274
|
+
*/
|
|
275
|
+
export function recordRun(skillDir, result) {
|
|
276
|
+
const historyPath = join(skillDir, "evals", ".integration-history.json");
|
|
277
|
+
let history = [];
|
|
278
|
+
if (existsSync(historyPath)) {
|
|
279
|
+
try {
|
|
280
|
+
history = JSON.parse(readFileSync(historyPath, "utf-8"));
|
|
281
|
+
}
|
|
282
|
+
catch {
|
|
283
|
+
history = [];
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
history.push(result);
|
|
287
|
+
writeFileSync(historyPath, JSON.stringify(history, null, 2), "utf-8");
|
|
288
|
+
}
|
|
289
|
+
// ---------------------------------------------------------------------------
|
|
290
|
+
// Helpers
|
|
291
|
+
// ---------------------------------------------------------------------------
|
|
292
|
+
function buildResult(evalCase, runId, phases, testArtifactIds, dryRun) {
|
|
293
|
+
const overallPass = phases.every((p) => p.status === "pass" || p.status === "skipped");
|
|
294
|
+
return {
|
|
295
|
+
evalId: String(evalCase.id),
|
|
296
|
+
runId,
|
|
297
|
+
phases,
|
|
298
|
+
overallPass,
|
|
299
|
+
testArtifactIds,
|
|
300
|
+
dryRun,
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
//# sourceMappingURL=integration-runner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"integration-runner.js","sourceRoot":"","sources":["../../src/eval/integration-runner.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,yEAAyE;AACzE,EAAE;AACF,+DAA+D;AAC/D,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,SAAS,CAAC;AAClE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,EAAqB,qBAAqB,EAAE,MAAM,0BAA0B,CAAC;AACpF,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AACxD,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE5C,OAAO,EAAE,eAAe,EAAE,MAAM,UAAU,CAAC;AAU3C,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAC9E,IAAI,iBAAiB,GAAG,KAAK,CAAC;AAC9B,IAAI,SAAS,GAAiC,IAAI,CAAC;AACnD,IAAI,WAAW,GAAG,KAAK,CAAC;AAExB,SAAS,qBAAqB,CAAC,EAAuB;IACpD,SAAS,GAAG,EAAE,CAAC;IACf,WAAW,GAAG,KAAK,CAAC;IACpB,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACvB,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpC,iBAAiB,GAAG,IAAI,CAAC;IAC3B,CAAC;AACH,CAAC;AAED,SAAS,uBAAuB;IAC9B,OAAO,CAAC,cAAc,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;IAChD,iBAAiB,GAAG,KAAK,CAAC;IAC1B,SAAS,GAAG,IAAI,CAAC;AACnB,CAAC;AAED,KAAK,UAAU,aAAa;IAC1B,IAAI,WAAW;QAAE,OAAO;IACxB,WAAW,GAAG,IAAI,CAAC;IACnB,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACtD,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,SAAS,EAAE,CAAC;YAClB,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,iBAAiB,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC;AAED,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E,MAAM,UAAU,eAAe;IAC7B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,OAAO,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,kEAAkE;YAClE,wEAAwE,CACzE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAA6B,EAC7B,IAAwB;IAExB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;IACnE,MAAM,UAAU,GAAG,gBAAgB,KAAK,GAAG,CAAC;IAC5C,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,eAAe,GAAa,CAAC,KAAK,CAAC,CAAC;IAC1C,IAAI,OAAO,GAAQ,IAAI,CAAC;IACxB,IAAI,OAAO,GAAQ,IAAI,CAAC;IAExB,8BAA8B;IAC9B,qBAAqB,CAAC,KAAK,IAAI,EAAE;QAC/B,MAAM,UAAU,CAAC,QAAQ,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC;IACvD,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC;QACH,0EAA0E;QAC1E,qBAAqB;QACrB,0EAA0E;QAC1E,MAAM,eAAe,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,KAAK,IAAI,EAAE;YAC7D,oBAAoB;YACpB,IAAI,QAAQ,CAAC,mBAAmB,EAAE,MAAM,EAAE,CAAC;gBACzC,MAAM,QAAQ,GAAG,qBAAqB,CAAC,QAAQ,CAAC,mBAAmB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACpF,MAAM,OAAO,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;gBAC/D,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACvB,MAAM,IAAI,KAAK,CACb,wBAAwB,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI;wBACjE,6CAA6C,CAC9C,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,uBAAuB;YACvB,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,aAAa,CAAC;YACzD,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,iBAAiB,CAAC;YAC7D,IAAI,WAAW,IAAI,CAAC,WAAW,EAAE,CAAC;gBAChC,cAAc,CAAC,WAAW,CAAC,CAAC;YAC9B,CAAC;YAED,mBAAmB;YACnB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,eAAe,EAAE,CAAC;YACpB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE7B,IAAI,eAAe,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACtC,yBAAyB;YACzB,KAAK,MAAM,CAAC,IAAI,CAAC,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAuB,EAAE,CAAC;gBAClF,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YAC/C,CAAC;YACD,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;YACzD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,gDAAgD,QAAQ,CAAC,YAAY,EAAE,aAAa,IAAI,SAAS,EAAE,CAAC,CAAC;gBACjH,OAAO;YACT,CAAC;YAED,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,aAAa,CAAC;YACzD,MAAM,WAAW,GAAG,QAAQ,CAAC,YAAY,EAAE,iBAAiB,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YAExH,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,YAAY,CAAC,CAAC;YACtC,IAAI,WAAW,EAAE,CAAC;gBAChB,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC,WAAW,EAAE;oBAC/D,QAAQ,EAAE,KAAK;oBACf,IAAI,EAAE,CAAC,+CAA+C,CAAC;iBACxD,CAAC,CAAC;gBACH,OAAO,GAAG,IAAI,CAAC,CAAC,6CAA6C;YAC/D,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;gBACxD,OAAO,GAAG,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;YACvC,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,IAAI,aAAa,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACrD,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;YACxG,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,IAAI,eAAe,GAAG,EAAE,CAAC;QACzB,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,EAAE,QAAQ,CAAC;YAEjD,gBAAgB;YAChB,IAAI,QAAQ,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBAC7B,MAAM,WAAW,GAAG,IAAI,mBAAmB,CACzC,QAAQ,CAAC,YAAY,EAAE,SAAS;oBAC9B,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,QAAQ,CAAC,YAAY,CAAC,SAAS,EAAE;oBACjD,CAAC,CAAC,SAAS,CACd,CAAC;gBACF,MAAM,WAAW,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACtC,CAAC;YAED,gCAAgC;YAChC,MAAM,gBAAgB,GAAG,GAAG,UAAU,0EAA0E,UAAU,wCAAwC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAEpL,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,6CAA6C,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC;gBAC9F,eAAe,GAAG,mCAAmC,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,iCAAiC;YACjC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YACpD,IAAI,YAAY,GAAG,2EAA2E,CAAC;YAC/F,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC5B,YAAY,GAAG,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;YACpD,CAAC;YAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,YAAY,EAAE,gBAAgB,CAAC,CAAC;YACrE,eAAe,GAAG,MAAM,CAAC,IAAI,CAAC;QAChC,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,IAAI,aAAa,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC;YACxG,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9E,CAAC;QAED,0EAA0E;QAC1E,kBAAkB;QAClB,0EAA0E;QAC1E,MAAM,YAAY,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;YACvD,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM;gBAAE,OAAO;YAEzC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CACpC,cAAc,CAAC,eAAe,EAAE,SAAsB,EAAE,MAAM,CAAC,CAChE,CACF,CAAC;YAEF,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC9C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,KAAK,CACb,GAAG,MAAM,CAAC,MAAM,yBAAyB;oBACzC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAC9D,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAE1B,0EAA0E;QAC1E,mBAAmB;QACnB,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE,CACnD,UAAU,CAAC,QAAQ,EAAE,OAAO,IAAI,OAAO,EAAE,eAAe,CAAC,CAC1D,CAAC;QACF,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAE3B,OAAO,WAAW,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9E,CAAC;YAAS,CAAC;QACT,uBAAuB,EAAE,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E,KAAK,UAAU,QAAQ,CACrB,KAAuB,EACvB,EAAuB;IAEvB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,EAAE,EAAE,CAAC;QACX,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,CAAC;IACnE,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,KAAK;YACL,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;YAC9B,YAAY,EAAG,GAAa,CAAC,OAAO;SACrC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,KAAK,UAAU,UAAU,CACvB,QAA6B,EAC7B,gBAAqB,EACrB,gBAA0B;IAE1B,+CAA+C;IAC/C,IAAI,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC;QAC7B,KAAK,MAAM,MAAM,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;YACtC,IAAI,CAAC;gBACH,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBACnB,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;gBACzB,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,0DAA0D;gBAC1D,OAAO,CAAC,KAAK,CAAC,mBAAmB,MAAM,CAAC,WAAW,WAAW,EAAG,GAAa,CAAC,OAAO,CAAC,CAAC;YAC1F,CAAC;QACH,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,IAAI,gBAAgB,EAAE,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,gBAAgB,CAAC,KAAK,EAAE,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,gCAAgC;QAClC,CAAC;IACH,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,sBAAsB;AACtB,8EAA8E;AAE9E,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,QAAgB,EAChB,OAAiB;IAEjB,aAAa;IACb,IAAI,OAAO,CAAC,GAAG,CAAC,EAAE,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAE3C,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;IAC1D,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAE7E,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,gDAAgD,QAAQ,MAAM,UAAU,qBAAqB,CAAC;IAE/G,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,EAAE;YAC/B,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,GAAG,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,2BAA2B,CAAC,CAAC,CAAC;AAC3E,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,QAAgB,EAAE,MAA4B;IACtE,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,2BAA2B,CAAC,CAAC;IACzE,IAAI,OAAO,GAA2B,EAAE,CAAC;IACzC,IAAI,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;QAC5B,IAAI,CAAC;YACH,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,EAAE,CAAC;QACf,CAAC;IACH,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,aAAa,CAAC,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACxE,CAAC;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,WAAW,CAClB,QAA6B,EAC7B,KAAa,EACb,MAAqB,EACrB,eAAyB,EACzB,MAAe;IAEf,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC;IACvF,OAAO;QACL,MAAM,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3B,KAAK;QACL,MAAM;QACN,WAAW;QACX,eAAe;QACf,MAAM;KACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
export type IntegrationPhase = "preflight" | "connect" | "execute" | "verify" | "cleanup";
|
|
2
|
+
export interface PhaseResult {
|
|
3
|
+
phase: IntegrationPhase;
|
|
4
|
+
status: "pass" | "fail" | "skipped";
|
|
5
|
+
durationMs?: number;
|
|
6
|
+
errorMessage?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface IntegrationRunResult {
|
|
9
|
+
evalId: string;
|
|
10
|
+
runId: string;
|
|
11
|
+
phases: PhaseResult[];
|
|
12
|
+
overallPass: boolean;
|
|
13
|
+
testArtifactIds: string[];
|
|
14
|
+
dryRun: boolean;
|
|
15
|
+
}
|
|
16
|
+
export interface PlatformRateLimit {
|
|
17
|
+
requestsPerMinute: number;
|
|
18
|
+
}
|
|
19
|
+
export interface IntegrationRequirements {
|
|
20
|
+
chromeProfile?: string;
|
|
21
|
+
chromeProfilePath?: string;
|
|
22
|
+
platform?: string;
|
|
23
|
+
rateLimit?: PlatformRateLimit;
|
|
24
|
+
}
|
|
25
|
+
export interface CleanupAction {
|
|
26
|
+
type: "delete_post" | "remove_artifact" | "custom";
|
|
27
|
+
description: string;
|
|
28
|
+
execute?: () => Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
export interface IntegrationEvalCase {
|
|
31
|
+
id: number | string;
|
|
32
|
+
name: string;
|
|
33
|
+
prompt: string;
|
|
34
|
+
expected_output: string;
|
|
35
|
+
assertions: Array<{
|
|
36
|
+
id: string;
|
|
37
|
+
text: string;
|
|
38
|
+
type: string;
|
|
39
|
+
}>;
|
|
40
|
+
testType: "integration";
|
|
41
|
+
requiredCredentials?: string[];
|
|
42
|
+
requirements?: IntegrationRequirements;
|
|
43
|
+
cleanup?: CleanupAction[];
|
|
44
|
+
}
|
|
45
|
+
export interface IntegrationRunOpts {
|
|
46
|
+
dryRun?: boolean;
|
|
47
|
+
confirm?: boolean;
|
|
48
|
+
skillDir: string;
|
|
49
|
+
runId?: string;
|
|
50
|
+
}
|
|
51
|
+
/** Default rate limits per platform (requests per minute). */
|
|
52
|
+
export declare const DEFAULT_RATE_LIMITS: Record<string, PlatformRateLimit>;
|
|
53
|
+
export declare const DEFAULT_RATE_LIMIT: PlatformRateLimit;
|
|
54
|
+
export declare const VALID_CLEANUP_ACTIONS: readonly ["delete_post", "remove_artifact", "custom"];
|
|
55
|
+
export type CleanupActionType = typeof VALID_CLEANUP_ACTIONS[number];
|
|
56
|
+
export interface EvalCleanupSchema {
|
|
57
|
+
action: CleanupActionType;
|
|
58
|
+
platform?: string;
|
|
59
|
+
identifier?: string;
|
|
60
|
+
description?: string;
|
|
61
|
+
}
|
|
62
|
+
export interface EvalRequirementsSchema {
|
|
63
|
+
chromeProfile?: string;
|
|
64
|
+
platform?: string;
|
|
65
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// integration-types.ts -- types for the integration test runner
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
/** Default rate limits per platform (requests per minute). */
|
|
5
|
+
export const DEFAULT_RATE_LIMITS = {
|
|
6
|
+
x: { requestsPerMinute: 3 },
|
|
7
|
+
twitter: { requestsPerMinute: 3 },
|
|
8
|
+
linkedin: { requestsPerMinute: 2 },
|
|
9
|
+
slack: { requestsPerMinute: 10 },
|
|
10
|
+
instagram: { requestsPerMinute: 5 },
|
|
11
|
+
facebook: { requestsPerMinute: 5 },
|
|
12
|
+
};
|
|
13
|
+
export const DEFAULT_RATE_LIMIT = { requestsPerMinute: 10 };
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Schema types for eval generation validation (US-003, US-004)
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
export const VALID_CLEANUP_ACTIONS = ["delete_post", "remove_artifact", "custom"];
|
|
18
|
+
//# sourceMappingURL=integration-types.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"integration-types.js","sourceRoot":"","sources":["../../src/eval/integration-types.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gEAAgE;AAChE,8EAA8E;AAwD9E,8DAA8D;AAC9D,MAAM,CAAC,MAAM,mBAAmB,GAAsC;IACpE,CAAC,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IAC3B,OAAO,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IACjC,QAAQ,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IAClC,KAAK,EAAE,EAAE,iBAAiB,EAAE,EAAE,EAAE;IAChC,SAAS,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;IACnC,QAAQ,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE;CACnC,CAAC;AAEF,MAAM,CAAC,MAAM,kBAAkB,GAAsB,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC;AAE/E,8EAA8E;AAC9E,+DAA+D;AAC/D,8EAA8E;AAE9E,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,aAAa,EAAE,iBAAiB,EAAE,QAAQ,CAAU,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import type { AssertionResult } from "./judge.js";
|
|
2
|
+
export interface CacheEntry {
|
|
3
|
+
pass: boolean;
|
|
4
|
+
reasoning: string;
|
|
5
|
+
cachedAt: string;
|
|
6
|
+
judgeModel: string;
|
|
7
|
+
}
|
|
8
|
+
export interface CacheData {
|
|
9
|
+
version: number;
|
|
10
|
+
entries: Record<string, CacheEntry>;
|
|
11
|
+
}
|
|
12
|
+
export declare class JudgeCache {
|
|
13
|
+
private readonly skillDir;
|
|
14
|
+
private data;
|
|
15
|
+
private dirty;
|
|
16
|
+
private readonly cachePath;
|
|
17
|
+
constructor(skillDir: string);
|
|
18
|
+
private load;
|
|
19
|
+
static computeKey(assertionText: string, output: string, judgeModel: string): string;
|
|
20
|
+
getOrCompute(assertionText: string, output: string, judgeModel: string, compute: () => Promise<AssertionResult>): Promise<AssertionResult>;
|
|
21
|
+
has(assertionText: string, output: string, judgeModel: string): boolean;
|
|
22
|
+
get size(): number;
|
|
23
|
+
/**
|
|
24
|
+
* Persist cache to disk. Call after all operations are complete.
|
|
25
|
+
* Also ensures .judge-cache.json is in .gitignore.
|
|
26
|
+
*/
|
|
27
|
+
flush(): void;
|
|
28
|
+
private ensureGitignore;
|
|
29
|
+
}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// judge-cache.ts -- SHA-256 content-hash cache for judge results
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
5
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync, unlinkSync, appendFileSync } from "node:fs";
|
|
6
|
+
import { join, dirname } from "node:path";
|
|
7
|
+
export class JudgeCache {
|
|
8
|
+
skillDir;
|
|
9
|
+
data;
|
|
10
|
+
dirty = false;
|
|
11
|
+
cachePath;
|
|
12
|
+
constructor(skillDir) {
|
|
13
|
+
this.skillDir = skillDir;
|
|
14
|
+
this.cachePath = join(skillDir, "evals", ".judge-cache.json");
|
|
15
|
+
this.data = this.load();
|
|
16
|
+
}
|
|
17
|
+
load() {
|
|
18
|
+
try {
|
|
19
|
+
if (existsSync(this.cachePath)) {
|
|
20
|
+
const raw = readFileSync(this.cachePath, "utf-8");
|
|
21
|
+
const parsed = JSON.parse(raw);
|
|
22
|
+
if (parsed && typeof parsed === "object" && parsed.version === 1 && typeof parsed.entries === "object") {
|
|
23
|
+
return parsed;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
// Corruption recovery: delete corrupted file and start fresh
|
|
29
|
+
try {
|
|
30
|
+
if (existsSync(this.cachePath)) {
|
|
31
|
+
unlinkSync(this.cachePath);
|
|
32
|
+
console.warn(`[judge-cache] Corrupted cache file deleted: ${this.cachePath}`);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
// ignore deletion failure
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return { version: 1, entries: {} };
|
|
40
|
+
}
|
|
41
|
+
static computeKey(assertionText, output, judgeModel) {
|
|
42
|
+
return createHash("sha256")
|
|
43
|
+
.update(`${assertionText}||${output}||${judgeModel}`)
|
|
44
|
+
.digest("hex");
|
|
45
|
+
}
|
|
46
|
+
async getOrCompute(assertionText, output, judgeModel, compute) {
|
|
47
|
+
const key = JudgeCache.computeKey(assertionText, output, judgeModel);
|
|
48
|
+
const cached = this.data.entries[key];
|
|
49
|
+
if (cached) {
|
|
50
|
+
// Return cached result, reconstructing the AssertionResult shape
|
|
51
|
+
return {
|
|
52
|
+
id: "", // caller overwrites this
|
|
53
|
+
text: assertionText,
|
|
54
|
+
pass: cached.pass,
|
|
55
|
+
reasoning: cached.reasoning,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
const result = await compute();
|
|
59
|
+
// Store in cache
|
|
60
|
+
this.data.entries[key] = {
|
|
61
|
+
pass: result.pass,
|
|
62
|
+
reasoning: result.reasoning,
|
|
63
|
+
cachedAt: new Date().toISOString(),
|
|
64
|
+
judgeModel,
|
|
65
|
+
};
|
|
66
|
+
this.dirty = true;
|
|
67
|
+
return result;
|
|
68
|
+
}
|
|
69
|
+
has(assertionText, output, judgeModel) {
|
|
70
|
+
const key = JudgeCache.computeKey(assertionText, output, judgeModel);
|
|
71
|
+
return key in this.data.entries;
|
|
72
|
+
}
|
|
73
|
+
get size() {
|
|
74
|
+
return Object.keys(this.data.entries).length;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Persist cache to disk. Call after all operations are complete.
|
|
78
|
+
* Also ensures .judge-cache.json is in .gitignore.
|
|
79
|
+
*/
|
|
80
|
+
flush() {
|
|
81
|
+
if (!this.dirty)
|
|
82
|
+
return;
|
|
83
|
+
const dir = dirname(this.cachePath);
|
|
84
|
+
mkdirSync(dir, { recursive: true });
|
|
85
|
+
writeFileSync(this.cachePath, JSON.stringify(this.data, null, 2), "utf-8");
|
|
86
|
+
this.dirty = false;
|
|
87
|
+
// T-009: Ensure .judge-cache.json is in .gitignore
|
|
88
|
+
this.ensureGitignore();
|
|
89
|
+
}
|
|
90
|
+
ensureGitignore() {
|
|
91
|
+
const gitignorePath = join(this.skillDir, ".gitignore");
|
|
92
|
+
const pattern = "evals/.judge-cache.json";
|
|
93
|
+
try {
|
|
94
|
+
if (existsSync(gitignorePath)) {
|
|
95
|
+
const content = readFileSync(gitignorePath, "utf-8");
|
|
96
|
+
if (content.includes(pattern))
|
|
97
|
+
return;
|
|
98
|
+
appendFileSync(gitignorePath, `\n${pattern}\n`);
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
writeFileSync(gitignorePath, `${pattern}\n`, "utf-8");
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// Non-critical — don't fail the run for gitignore issues
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=judge-cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"judge-cache.js","sourceRoot":"","sources":["../../src/eval/judge-cache.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,iEAAiE;AACjE,8EAA8E;AAE9E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,aAAa,EAAE,SAAS,EAAE,UAAU,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzG,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAe1C,MAAM,OAAO,UAAU;IAKQ;IAJrB,IAAI,CAAY;IAChB,KAAK,GAAG,KAAK,CAAC;IACL,SAAS,CAAS;IAEnC,YAA6B,QAAgB;QAAhB,aAAQ,GAAR,QAAQ,CAAQ;QAC3C,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,mBAAmB,CAAC,CAAC;QAC9D,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAEO,IAAI;QACV,IAAI,CAAC;YACH,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC/B,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBAC/B,IAAI,MAAM,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,OAAO,KAAK,CAAC,IAAI,OAAO,MAAM,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;oBACvG,OAAO,MAAmB,CAAC;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,6DAA6D;YAC7D,IAAI,CAAC;gBACH,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;oBAC/B,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAC3B,OAAO,CAAC,IAAI,CAAC,+CAA+C,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,0BAA0B;YAC5B,CAAC;QACH,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,CAAC,UAAU,CAAC,aAAqB,EAAE,MAAc,EAAE,UAAkB;QACzE,OAAO,UAAU,CAAC,QAAQ,CAAC;aACxB,MAAM,CAAC,GAAG,aAAa,KAAK,MAAM,KAAK,UAAU,EAAE,CAAC;aACpD,MAAM,CAAC,KAAK,CAAC,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,aAAqB,EACrB,MAAc,EACd,UAAkB,EAClB,OAAuC;QAEvC,MAAM,GAAG,GAAG,UAAU,CAAC,UAAU,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QAErE,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACtC,IAAI,MAAM,EAAE,CAAC;YACX,iEAAiE;YACjE,OAAO;gBACL,EAAE,EAAE,EAAE,EAAE,yBAAyB;gBACjC,IAAI,EAAE,aAAa;gBACnB,IAAI,EAAE,MAAM,CAAC,IAAI;gBACjB,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,EAAE,CAAC;QAE/B,iBAAiB;QACjB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YAClC,UAAU;SACX,CAAC;QACF,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,GAAG,CAAC,aAAqB,EAAE,MAAc,EAAE,UAAkB;QAC3D,MAAM,GAAG,GAAG,UAAU,CAAC,UAAU,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC;QACrE,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC;IAClC,CAAC;IAED,IAAI,IAAI;QACN,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAC/C,CAAC;IAED;;;OAGG;IACH,KAAK;QACH,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,OAAO;QAExB,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACpC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACpC,aAAa,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QAC3E,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QAEnB,mDAAmD;QACnD,IAAI,CAAC,eAAe,EAAE,CAAC;IACzB,CAAC;IAEO,eAAe;QACrB,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QACxD,MAAM,OAAO,GAAG,yBAAyB,CAAC;QAE1C,IAAI,CAAC;YACH,IAAI,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;gBAC9B,MAAM,OAAO,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBACrD,IAAI,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;oBAAE,OAAO;gBACtC,cAAc,CAAC,aAAa,EAAE,KAAK,OAAO,IAAI,CAAC,CAAC;YAClD,CAAC;iBAAM,CAAC;gBACN,aAAa,CAAC,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE,OAAO,CAAC,CAAC;YACxD,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,yDAAyD;QAC3D,CAAC;IACH,CAAC;CACF"}
|
package/dist/eval/judge.d.ts
CHANGED
|
@@ -8,4 +8,4 @@ export interface AssertionResult {
|
|
|
8
8
|
reasoning: string;
|
|
9
9
|
}
|
|
10
10
|
export declare function buildJudgeSystemPrompt(mcpDeps?: McpDependency[]): string;
|
|
11
|
-
export declare function judgeAssertion(output: string, assertion: Assertion, client: LlmClient, mcpDeps?: McpDependency[]): Promise<AssertionResult>;
|
|
11
|
+
export declare function judgeAssertion(output: string, assertion: Assertion, client: LlmClient, judgeClientOrMcpDeps?: LlmClient | McpDependency[], mcpDeps?: McpDependency[]): Promise<AssertionResult>;
|
package/dist/eval/judge.js
CHANGED
|
@@ -17,8 +17,25 @@ When evaluating assertions:
|
|
|
17
17
|
|
|
18
18
|
Respond with ONLY a JSON object: { "pass": boolean, "reasoning": "brief explanation" }`;
|
|
19
19
|
}
|
|
20
|
-
export async function judgeAssertion(output, assertion, client, mcpDeps) {
|
|
21
|
-
|
|
20
|
+
export async function judgeAssertion(output, assertion, client, judgeClientOrMcpDeps, mcpDeps) {
|
|
21
|
+
// Support both old signature (client, mcpDeps?) and new (client, judgeClient?, mcpDeps?)
|
|
22
|
+
let effectiveJudgeClient;
|
|
23
|
+
let effectiveMcpDeps;
|
|
24
|
+
if (Array.isArray(judgeClientOrMcpDeps)) {
|
|
25
|
+
// Old-style call: judgeAssertion(output, assertion, client, mcpDeps)
|
|
26
|
+
effectiveJudgeClient = client;
|
|
27
|
+
effectiveMcpDeps = judgeClientOrMcpDeps;
|
|
28
|
+
}
|
|
29
|
+
else if (judgeClientOrMcpDeps && typeof judgeClientOrMcpDeps === "object" && "generate" in judgeClientOrMcpDeps) {
|
|
30
|
+
// New-style call: judgeAssertion(output, assertion, client, judgeClient, mcpDeps?)
|
|
31
|
+
effectiveJudgeClient = judgeClientOrMcpDeps;
|
|
32
|
+
effectiveMcpDeps = mcpDeps;
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
effectiveJudgeClient = client;
|
|
36
|
+
effectiveMcpDeps = mcpDeps;
|
|
37
|
+
}
|
|
38
|
+
const systemPrompt = buildJudgeSystemPrompt(effectiveMcpDeps);
|
|
22
39
|
const userPrompt = `## LLM Output
|
|
23
40
|
${output}
|
|
24
41
|
|
|
@@ -26,7 +43,7 @@ ${output}
|
|
|
26
43
|
${assertion.text}
|
|
27
44
|
|
|
28
45
|
Does the LLM output satisfy this assertion? Respond with JSON only: { "pass": boolean, "reasoning": "..." }`;
|
|
29
|
-
const { text: raw } = await
|
|
46
|
+
const { text: raw } = await effectiveJudgeClient.generate(systemPrompt, userPrompt);
|
|
30
47
|
const parsed = parseJudgeResponse(raw);
|
|
31
48
|
return {
|
|
32
49
|
id: assertion.id,
|
package/dist/eval/judge.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAa9E,MAAM,YAAY,GAAG,qNAAqN,CAAC;AAE3O,MAAM,UAAU,sBAAsB,CAAC,OAAyB;IAC9D,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3D,OAAO;;oIAE2H,UAAU;;;;;;uFAMvD,CAAC;AACxF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,SAAoB,EACpB,MAAiB,EACjB,OAAyB;IAEzB,MAAM,YAAY,GAAG,sBAAsB,CAAC,
|
|
1
|
+
{"version":3,"file":"judge.js","sourceRoot":"","sources":["../../src/eval/judge.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4BAA4B;AAC5B,8EAA8E;AAa9E,MAAM,YAAY,GAAG,qNAAqN,CAAC;AAE3O,MAAM,UAAU,sBAAsB,CAAC,OAAyB;IAC9D,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3D,OAAO;;oIAE2H,UAAU;;;;;;uFAMvD,CAAC;AACxF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,SAAoB,EACpB,MAAiB,EACjB,oBAAkD,EAClD,OAAyB;IAEzB,yFAAyF;IACzF,IAAI,oBAA+B,CAAC;IACpC,IAAI,gBAA6C,CAAC;IAElD,IAAI,KAAK,CAAC,OAAO,CAAC,oBAAoB,CAAC,EAAE,CAAC;QACxC,qEAAqE;QACrE,oBAAoB,GAAG,MAAM,CAAC;QAC9B,gBAAgB,GAAG,oBAAoB,CAAC;IAC1C,CAAC;SAAM,IAAI,oBAAoB,IAAI,OAAO,oBAAoB,KAAK,QAAQ,IAAI,UAAU,IAAI,oBAAoB,EAAE,CAAC;QAClH,mFAAmF;QACnF,oBAAoB,GAAG,oBAAoB,CAAC;QAC5C,gBAAgB,GAAG,OAAO,CAAC;IAC7B,CAAC;SAAM,CAAC;QACN,oBAAoB,GAAG,MAAM,CAAC;QAC9B,gBAAgB,GAAG,OAAO,CAAC;IAC7B,CAAC;IAED,MAAM,YAAY,GAAG,sBAAsB,CAAC,gBAAgB,CAAC,CAAC;IAE9D,MAAM,UAAU,GAAG;EACnB,MAAM;;;EAGN,SAAS,CAAC,IAAI;;4GAE4F,CAAC;IAE3G,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,MAAM,oBAAoB,CAAC,QAAQ,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;IAEpF,MAAM,MAAM,GAAG,kBAAkB,CAAC,GAAG,CAAC,CAAC;IAEvC,OAAO;QACL,EAAE,EAAE,SAAS,CAAC,EAAE;QAChB,IAAI,EAAE,SAAS,CAAC,IAAI;QACpB,IAAI,EAAE,MAAM,CAAC,IAAI;QACjB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC;AACJ,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,4CAA4C;IAC5C,MAAM,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAEjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACnC,IAAI,OAAO,MAAM,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YACrC,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACxC,CAAC;QACD,OAAO;YACL,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,SAAS,EAAE,OAAO,MAAM,CAAC,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SACxE,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,sEAAsE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAC1F,CAAC;IACJ,CAAC;AACH,CAAC"}
|
package/dist/eval/llm.d.ts
CHANGED
|
@@ -3,12 +3,13 @@ export interface GenerateResult {
|
|
|
3
3
|
durationMs: number;
|
|
4
4
|
inputTokens: number | null;
|
|
5
5
|
outputTokens: number | null;
|
|
6
|
+
cost: number | null;
|
|
6
7
|
}
|
|
7
8
|
export interface LlmClient {
|
|
8
9
|
generate(systemPrompt: string, userPrompt: string): Promise<GenerateResult>;
|
|
9
10
|
readonly model: string;
|
|
10
11
|
}
|
|
11
|
-
export type ProviderName = "anthropic" | "claude-cli" | "codex-cli" | "gemini-cli" | "ollama";
|
|
12
|
+
export type ProviderName = "anthropic" | "claude-cli" | "codex-cli" | "gemini-cli" | "ollama" | "openrouter";
|
|
12
13
|
export interface LlmOverrides {
|
|
13
14
|
provider?: ProviderName;
|
|
14
15
|
model?: string;
|