vskill 0.2.91 → 0.2.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/eval/__tests__/benchmark-history.test.js +130 -1
- package/dist/eval/__tests__/benchmark-history.test.js.map +1 -1
- package/dist/eval/__tests__/llm.test.js +267 -0
- package/dist/eval/__tests__/llm.test.js.map +1 -1
- package/dist/eval/llm.d.ts +1 -1
- package/dist/eval/llm.js +69 -26
- package/dist/eval/llm.js.map +1 -1
- package/dist/eval-server/__tests__/benchmark-runner.test.d.ts +1 -0
- package/dist/eval-server/__tests__/benchmark-runner.test.js +301 -0
- package/dist/eval-server/__tests__/benchmark-runner.test.js.map +1 -0
- package/dist/eval-server/api-routes.js +8 -0
- package/dist/eval-server/api-routes.js.map +1 -1
- package/package.json +1 -1
package/dist/eval/llm.js
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
// ---------------------------------------------------------------------------
|
|
2
|
-
// LLM client for eval commands — supports
|
|
2
|
+
// LLM client for eval commands — supports multiple CLI tools and API providers
|
|
3
3
|
//
|
|
4
4
|
// Provider selection via VSKILL_EVAL_PROVIDER env var:
|
|
5
5
|
// "claude-cli" — Claude Code CLI (uses your Max/Pro plan, no API key)
|
|
6
|
+
// "codex-cli" — OpenAI Codex CLI (uses ChatGPT subscription or CODEX_API_KEY)
|
|
7
|
+
// "gemini-cli" — Google Gemini CLI (free tier or GOOGLE_API_KEY)
|
|
6
8
|
// "anthropic" — Anthropic API (requires ANTHROPIC_API_KEY)
|
|
7
9
|
// "ollama" — Local Ollama server (free, requires ollama running)
|
|
8
10
|
//
|
|
9
11
|
// Auto-detection when VSKILL_EVAL_PROVIDER is not set:
|
|
10
12
|
// 1. claude-cli (default — works everywhere, even inside Claude Code sessions)
|
|
11
|
-
//
|
|
13
|
+
// Other providers only used when explicitly set via VSKILL_EVAL_PROVIDER
|
|
12
14
|
//
|
|
13
15
|
// Model selection via VSKILL_EVAL_MODEL env var:
|
|
14
16
|
// claude-cli: "sonnet" | "opus" | "haiku" (default: sonnet)
|
|
17
|
+
// codex-cli: "o4-mini" | "codex-1" | "gpt-5.3-codex" (default: o4-mini)
|
|
18
|
+
// gemini-cli: "gemini-2.5-pro" | "gemini-2.5-flash" (default: gemini-2.5-pro)
|
|
15
19
|
// anthropic: full model ID (default: claude-sonnet-4-6)
|
|
16
20
|
// ollama: model name (default: llama3.1:8b)
|
|
17
21
|
// ---------------------------------------------------------------------------
|
|
@@ -27,10 +31,14 @@ export function createLlmClient(overrides) {
|
|
|
27
31
|
return createAnthropicClient(modelOverride);
|
|
28
32
|
case "claude-cli":
|
|
29
33
|
return createClaudeCliClient(modelOverride);
|
|
34
|
+
case "codex-cli":
|
|
35
|
+
return createCodexCliClient(modelOverride);
|
|
36
|
+
case "gemini-cli":
|
|
37
|
+
return createGeminiCliClient(modelOverride);
|
|
30
38
|
case "ollama":
|
|
31
39
|
return createOllamaClient(modelOverride);
|
|
32
40
|
default:
|
|
33
|
-
throw new Error(`Unknown VSKILL_EVAL_PROVIDER: "${provider}". Use "claude-cli", "anthropic", or "ollama".`);
|
|
41
|
+
throw new Error(`Unknown VSKILL_EVAL_PROVIDER: "${provider}". Use "claude-cli", "codex-cli", "gemini-cli", "anthropic", or "ollama".`);
|
|
34
42
|
}
|
|
35
43
|
}
|
|
36
44
|
// ---------------------------------------------------------------------------
|
|
@@ -77,31 +85,25 @@ function createAnthropicClient(modelOverride) {
|
|
|
77
85
|
},
|
|
78
86
|
};
|
|
79
87
|
}
|
|
80
|
-
|
|
81
|
-
// Provider: Claude CLI (uses your Max/Pro subscription — no API key needed)
|
|
82
|
-
//
|
|
83
|
-
// Pipes prompt via stdin to avoid OS argument-length limits (ARG_MAX).
|
|
84
|
-
//
|
|
85
|
-
// From a plain terminal: npx vskill eval run mobile/appstore
|
|
86
|
-
// Select model: VSKILL_EVAL_MODEL=opus npx vskill eval run mobile/appstore
|
|
87
|
-
// ---------------------------------------------------------------------------
|
|
88
|
-
function createClaudeCliClient(modelOverride) {
|
|
89
|
-
const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "sonnet";
|
|
88
|
+
function createCliClient(config) {
|
|
90
89
|
return {
|
|
91
|
-
model:
|
|
90
|
+
model: config.displayModel,
|
|
92
91
|
async generate(systemPrompt, userPrompt) {
|
|
93
92
|
const combinedPrompt = `${systemPrompt}\n\n${userPrompt}`;
|
|
94
93
|
const start = Date.now();
|
|
95
94
|
const text = await new Promise((resolve, reject) => {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
let env;
|
|
96
|
+
if (config.stripEnvPrefix) {
|
|
97
|
+
env = {};
|
|
98
|
+
const prefix = config.stripEnvPrefix;
|
|
99
|
+
for (const [k, v] of Object.entries(process.env)) {
|
|
100
|
+
if (v !== undefined && !k.startsWith(prefix))
|
|
101
|
+
env[k] = v;
|
|
102
|
+
}
|
|
101
103
|
}
|
|
102
|
-
const proc = spawn(
|
|
104
|
+
const proc = spawn(config.binary, config.args, {
|
|
103
105
|
stdio: ["pipe", "pipe", "pipe"],
|
|
104
|
-
env:
|
|
106
|
+
...(env ? { env } : {}),
|
|
105
107
|
});
|
|
106
108
|
let stdout = "";
|
|
107
109
|
let stderr = "";
|
|
@@ -109,15 +111,15 @@ function createClaudeCliClient(modelOverride) {
|
|
|
109
111
|
proc.stderr.on("data", (d) => { stderr += d.toString(); });
|
|
110
112
|
const timer = setTimeout(() => {
|
|
111
113
|
proc.kill("SIGTERM");
|
|
112
|
-
reject(new Error(
|
|
114
|
+
reject(new Error(`${config.name} CLI timed out after 120s`));
|
|
113
115
|
}, 120_000);
|
|
114
116
|
proc.on("error", (err) => {
|
|
115
117
|
clearTimeout(timer);
|
|
116
118
|
if (err.code === "ENOENT") {
|
|
117
|
-
reject(new Error(
|
|
119
|
+
reject(new Error(config.notFoundMsg));
|
|
118
120
|
}
|
|
119
121
|
else {
|
|
120
|
-
reject(new Error(
|
|
122
|
+
reject(new Error(`${config.name} CLI failed: ${err.message}`));
|
|
121
123
|
}
|
|
122
124
|
});
|
|
123
125
|
proc.on("close", (code) => {
|
|
@@ -127,10 +129,9 @@ function createClaudeCliClient(modelOverride) {
|
|
|
127
129
|
}
|
|
128
130
|
else {
|
|
129
131
|
const errMsg = (stderr || stdout).slice(0, 300);
|
|
130
|
-
reject(new Error(
|
|
132
|
+
reject(new Error(`${config.name} CLI exited with code ${code}${errMsg ? ": " + errMsg : ""}`));
|
|
131
133
|
}
|
|
132
134
|
});
|
|
133
|
-
// Pipe prompt via stdin — avoids ARG_MAX limits for large SKILL.md files
|
|
134
135
|
proc.stdin.end(combinedPrompt);
|
|
135
136
|
});
|
|
136
137
|
return { text, durationMs: Date.now() - start, inputTokens: null, outputTokens: null };
|
|
@@ -138,6 +139,48 @@ function createClaudeCliClient(modelOverride) {
|
|
|
138
139
|
};
|
|
139
140
|
}
|
|
140
141
|
// ---------------------------------------------------------------------------
|
|
142
|
+
// Provider: Claude CLI (uses your Max/Pro subscription — no API key needed)
|
|
143
|
+
// Strips CLAUDE* env vars so the child process doesn't detect nesting.
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
function createClaudeCliClient(modelOverride) {
|
|
146
|
+
const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "sonnet";
|
|
147
|
+
return createCliClient({
|
|
148
|
+
binary: "claude",
|
|
149
|
+
name: "Claude",
|
|
150
|
+
args: ["-p", "--model", model],
|
|
151
|
+
displayModel: `claude-${model}`,
|
|
152
|
+
stripEnvPrefix: "CLAUDE",
|
|
153
|
+
notFoundMsg: "Claude CLI not found. Install it:\n npm install -g @anthropic-ai/claude-code\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=ollama",
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
// ---------------------------------------------------------------------------
|
|
157
|
+
// Provider: Codex CLI (uses your ChatGPT subscription — or CODEX_API_KEY for CI)
|
|
158
|
+
// ---------------------------------------------------------------------------
|
|
159
|
+
function createCodexCliClient(modelOverride) {
|
|
160
|
+
const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "o4-mini";
|
|
161
|
+
return createCliClient({
|
|
162
|
+
binary: "codex",
|
|
163
|
+
name: "Codex",
|
|
164
|
+
args: ["exec", "--model", model],
|
|
165
|
+
displayModel: `codex-${model}`,
|
|
166
|
+
notFoundMsg: "Codex CLI not found. Install it:\n npm install -g @openai/codex\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=claude-cli",
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
// Provider: Gemini CLI (free tier — 60 req/min, 1000 req/day, or GOOGLE_API_KEY)
|
|
171
|
+
// NOTE: Gemini CLI headless flags are provisional — verify against actual binary.
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
function createGeminiCliClient(modelOverride) {
|
|
174
|
+
const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "gemini-2.5-pro";
|
|
175
|
+
return createCliClient({
|
|
176
|
+
binary: "gemini",
|
|
177
|
+
name: "Gemini",
|
|
178
|
+
args: ["-p", "--model", model],
|
|
179
|
+
displayModel: model,
|
|
180
|
+
notFoundMsg: "Gemini CLI not found. Install it:\n npm install -g @google/gemini-cli\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=claude-cli",
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
// ---------------------------------------------------------------------------
|
|
141
184
|
// Provider: Ollama (local models — free, no API key)
|
|
142
185
|
// ---------------------------------------------------------------------------
|
|
143
186
|
function createOllamaClient(modelOverride) {
|
package/dist/eval/llm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.js","sourceRoot":"","sources":["../../src/eval/llm.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E
|
|
1
|
+
{"version":3,"file":"llm.js","sourceRoot":"","sources":["../../src/eval/llm.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+EAA+E;AAC/E,EAAE;AACF,uDAAuD;AACvD,wEAAwE;AACxE,iFAAiF;AACjF,mEAAmE;AACnE,8DAA8D;AAC9D,uEAAuE;AACvE,EAAE;AACF,uDAAuD;AACvD,iFAAiF;AACjF,2EAA2E;AAC3E,EAAE;AACF,iDAAiD;AACjD,+DAA+D;AAC/D,4EAA4E;AAC5E,iFAAiF;AACjF,4DAA4D;AAC5D,mDAAmD;AACnD,8EAA8E;AAE9E,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAgB3C,SAAS,cAAc;IACrB,OAAO,YAAY,CAAC;AACtB,CAAC;AAOD,MAAM,UAAU,eAAe,CAAC,SAAwB;IACtD,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,cAAc,EAAE,CAAiB,CAAC;IAC/G,MAAM,aAAa,GAAG,SAAS,EAAE,KAAK,CAAC;IACvC,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,WAAW;YACd,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,YAAY;YACf,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,WAAW;YACd,OAAO,oBAAoB,CAAC,aAAa,CAAC,CAAC;QAC7C,KAAK,YAAY;YACf,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,QAAQ;YACX,OAAO,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC3C;YACE,MAAM,IAAI,KAAK,CACb,kCAAkC,QAAQ,2EAA2E,CACtH,CAAC;IACN,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,aAAa,GAAG,mBAAmB,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;IAC7C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CACb,sRAAsR,CACvR,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAC9E,IAAI,cAAc,GAAQ,IAAI,CAAC;IAE/B,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;gBACjE,cAAc,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YAC7C,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;YAC9D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,CACnD;oBACE,KAAK;oBACL,MAAM,EAAE,YAAY;oBACpB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;oBACjD,UAAU,EAAE,IAAI;iBACjB,EACD,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAC9B,CAAC;gBACF,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;gBAEtC,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;gBACvE,MAAM,IAAI,GAAG,SAAS,IAAI,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpE,OAAO;oBACL,IAAI;oBACJ,UAAU;oBACV,WAAW,EAAE,QAAQ,CAAC,KAAK,EAAE,YAAY,IAAI,IAAI;oBACjD,YAAY,EAAE,QAAQ,CAAC,KAAK,EAAE,aAAa,IAAI,IAAI;iBACpD,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,YAAY,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAiBD,SAAS,eAAe,CAAC,MAAiB;IACxC,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,YAAY;QAC1B,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,cAAc,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,IAAI,GAAG,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACzD,IAAI,GAAuC,CAAC;gBAC5C,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;oBAC1B,GAAG,GAAG,EAAE,CAAC;oBACT,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,CAAC;oBACrC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;wBACjD,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;4BAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;oBAC3D,CAAC;gBACH,CAAC;gBAED,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE;oBAC7C,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;oBAC/B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACxB,CAAC,CAAC;gBAEH,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEnE,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC5B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,2BAA2B,CAAC,CAAC,CAAC;gBAC/D,CAAC,EAAE,OAAO,CAAC,CAAC;gBAEZ,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAA0B,EAAE,EAAE;oBAC9C,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAC1B,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;oBACxC,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,gBAAgB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;oBACjE,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;wBACf,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;oBACzB,CAAC;yBAAM,CAAC;wBACN,MAAM,MAAM,GAAG,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;wBAChD,MAAM,CAAC,IAAI,KAAK,CACd,GAAG,MAAM,CAAC,IAAI,yBAAyB,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC5E,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;QACzF,CAAC;KACF,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,4EAA4E;AAC5E,uEAAuE;AACvE,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,QAAQ,CAAC;IACzE,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC;QAC9B,YAAY,EAAE,UAAU,KAAK,EAAE;QAC/B,cAAc,EAAE,QAAQ;QACxB,WAAW,EACT,qJAAqJ;KACxJ,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,iFAAiF;AACjF,8EAA8E;AAC9E,SAAS,oBAAoB,CAAC,aAAsB;IAClD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,SAAS,CAAC;IAC1E,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,OAAO;QACf,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,CAAC,MAAM,EAAE,SAAS,EAAE,KAAK,CAAC;QAChC,YAAY,EAAE,SAAS,KAAK,EAAE;QAC9B,WAAW,EACT,4IAA4I;KAC/I,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,iFAAiF;AACjF,kFAAkF;AAClF,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,gBAAgB,CAAC;IACjF,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC;QAC9B,YAAY,EAAE,KAAK;QACnB,WAAW,EACT,kJAAkJ;KACrJ,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,qDAAqD;AACrD,8EAA8E;AAC9E,SAAS,kBAAkB,CAAC,aAAsB;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,CAAC;IACxE,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAE9E,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,UAAU,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YACtD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,eAAe,EAAE;gBACtD,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK;oBACL,MAAM,EAAE,UAAU;oBAClB,MAAM,EAAE,KAAK;oBACb,OAAO,EAAE;wBACP,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,GAAG;qBACjB;iBACF,CAAC;gBACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC;aACrC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;oBAC3D,MAAM,IAAI,KAAK,CACb,iBAAiB,KAAK,8CAA8C,KAAK,EAAE,CAC5E,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAIlC,CAAC;YACF,OAAO;gBACL,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;gBACzB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;gBAC9B,WAAW,EAAE,IAAI,CAAC,iBAAiB,IAAI,IAAI;gBAC3C,YAAY,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;aACtC,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Integration tests for benchmark-runner.ts
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Mocks
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
vi.mock("../../eval/judge.js", () => ({
|
|
9
|
+
judgeAssertion: vi.fn(),
|
|
10
|
+
}));
|
|
11
|
+
vi.mock("../sse-helpers.js", () => ({
|
|
12
|
+
sendSSE: vi.fn(),
|
|
13
|
+
sendSSEDone: vi.fn(),
|
|
14
|
+
withHeartbeat: vi.fn((_res, _id, _phase, _msg, fn) => fn()),
|
|
15
|
+
}));
|
|
16
|
+
vi.mock("../../eval/benchmark-history.js", () => ({
|
|
17
|
+
writeHistoryEntry: vi.fn(),
|
|
18
|
+
}));
|
|
19
|
+
import { assembleBulkResult, runSingleCaseSSE } from "../benchmark-runner.js";
|
|
20
|
+
import { judgeAssertion } from "../../eval/judge.js";
|
|
21
|
+
import { sendSSE } from "../sse-helpers.js";
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Helpers
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
function makeCase(overrides = {}) {
|
|
26
|
+
return {
|
|
27
|
+
eval_id: 1,
|
|
28
|
+
eval_name: "test-case",
|
|
29
|
+
status: "pass",
|
|
30
|
+
error_message: null,
|
|
31
|
+
pass_rate: 1,
|
|
32
|
+
durationMs: 100,
|
|
33
|
+
tokens: 50,
|
|
34
|
+
inputTokens: 20,
|
|
35
|
+
outputTokens: 30,
|
|
36
|
+
output: "some output",
|
|
37
|
+
assertions: [
|
|
38
|
+
{ id: "a1", text: "checks something", pass: true, reasoning: "ok" },
|
|
39
|
+
],
|
|
40
|
+
...overrides,
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
function makeEvalCase(overrides = {}) {
|
|
44
|
+
return {
|
|
45
|
+
id: 1,
|
|
46
|
+
name: "test eval",
|
|
47
|
+
prompt: "do something",
|
|
48
|
+
expected_output: "something done",
|
|
49
|
+
files: [],
|
|
50
|
+
assertions: [
|
|
51
|
+
{ id: "a1", text: "output is correct", type: "boolean" },
|
|
52
|
+
],
|
|
53
|
+
...overrides,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
function makeMockClient(overrides = {}) {
|
|
57
|
+
return {
|
|
58
|
+
model: "test-model",
|
|
59
|
+
generate: vi.fn().mockResolvedValue({
|
|
60
|
+
text: "generated output",
|
|
61
|
+
durationMs: 150,
|
|
62
|
+
inputTokens: 10,
|
|
63
|
+
outputTokens: 20,
|
|
64
|
+
}),
|
|
65
|
+
...overrides,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
function makeMockRes() {
|
|
69
|
+
return { write: vi.fn() };
|
|
70
|
+
}
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// assembleBulkResult
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
describe("assembleBulkResult", () => {
|
|
75
|
+
const defaultMeta = {
|
|
76
|
+
model: "claude-sonnet",
|
|
77
|
+
skillName: "my-skill",
|
|
78
|
+
runType: "benchmark",
|
|
79
|
+
provider: "anthropic",
|
|
80
|
+
};
|
|
81
|
+
it("computes overall_pass_rate correctly from cases", () => {
|
|
82
|
+
const cases = [
|
|
83
|
+
makeCase({
|
|
84
|
+
assertions: [
|
|
85
|
+
{ id: "a1", text: "x", pass: true, reasoning: "" },
|
|
86
|
+
{ id: "a2", text: "y", pass: false, reasoning: "" },
|
|
87
|
+
],
|
|
88
|
+
}),
|
|
89
|
+
makeCase({
|
|
90
|
+
eval_id: 2,
|
|
91
|
+
assertions: [
|
|
92
|
+
{ id: "a3", text: "z", pass: true, reasoning: "" },
|
|
93
|
+
{ id: "a4", text: "w", pass: true, reasoning: "" },
|
|
94
|
+
],
|
|
95
|
+
}),
|
|
96
|
+
];
|
|
97
|
+
const result = assembleBulkResult(cases, defaultMeta);
|
|
98
|
+
// 3 passed out of 4 total
|
|
99
|
+
expect(result.overall_pass_rate).toBe(0.75);
|
|
100
|
+
});
|
|
101
|
+
it("sets type from meta.runType", () => {
|
|
102
|
+
const result = assembleBulkResult([makeCase()], { ...defaultMeta, runType: "baseline" });
|
|
103
|
+
expect(result.type).toBe("baseline");
|
|
104
|
+
});
|
|
105
|
+
it("computes totalDurationMs from cases", () => {
|
|
106
|
+
const cases = [
|
|
107
|
+
makeCase({ durationMs: 200 }),
|
|
108
|
+
makeCase({ eval_id: 2, durationMs: 350 }),
|
|
109
|
+
];
|
|
110
|
+
const result = assembleBulkResult(cases, defaultMeta);
|
|
111
|
+
expect(result.totalDurationMs).toBe(550);
|
|
112
|
+
});
|
|
113
|
+
it("computes totalInputTokens and totalOutputTokens when present", () => {
|
|
114
|
+
const cases = [
|
|
115
|
+
makeCase({ inputTokens: 10, outputTokens: 20 }),
|
|
116
|
+
makeCase({ eval_id: 2, inputTokens: 30, outputTokens: 40 }),
|
|
117
|
+
];
|
|
118
|
+
const result = assembleBulkResult(cases, defaultMeta);
|
|
119
|
+
expect(result.totalInputTokens).toBe(40);
|
|
120
|
+
expect(result.totalOutputTokens).toBe(60);
|
|
121
|
+
});
|
|
122
|
+
it("sets totalInputTokens/totalOutputTokens to null when no cases have them", () => {
|
|
123
|
+
const cases = [
|
|
124
|
+
makeCase({ inputTokens: undefined, outputTokens: undefined }),
|
|
125
|
+
];
|
|
126
|
+
const result = assembleBulkResult(cases, defaultMeta);
|
|
127
|
+
expect(result.totalInputTokens).toBeNull();
|
|
128
|
+
expect(result.totalOutputTokens).toBeNull();
|
|
129
|
+
});
|
|
130
|
+
it("returns 0 overall_pass_rate when there are no assertions", () => {
|
|
131
|
+
const cases = [makeCase({ assertions: [] })];
|
|
132
|
+
const result = assembleBulkResult(cases, defaultMeta);
|
|
133
|
+
expect(result.overall_pass_rate).toBe(0);
|
|
134
|
+
});
|
|
135
|
+
it("sets scope to bulk", () => {
|
|
136
|
+
const result = assembleBulkResult([makeCase()], defaultMeta);
|
|
137
|
+
expect(result.scope).toBe("bulk");
|
|
138
|
+
});
|
|
139
|
+
it("preserves model, skill_name, and provider from meta", () => {
|
|
140
|
+
const result = assembleBulkResult([makeCase()], defaultMeta);
|
|
141
|
+
expect(result.model).toBe("claude-sonnet");
|
|
142
|
+
expect(result.skill_name).toBe("my-skill");
|
|
143
|
+
expect(result.provider).toBe("anthropic");
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
// runSingleCaseSSE
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
describe("runSingleCaseSSE", () => {
|
|
150
|
+
beforeEach(() => {
|
|
151
|
+
vi.clearAllMocks();
|
|
152
|
+
});
|
|
153
|
+
it("emits case_start, output_ready, assertion_result, case_complete SSE events", async () => {
|
|
154
|
+
const res = makeMockRes();
|
|
155
|
+
const client = makeMockClient();
|
|
156
|
+
const evalCase = makeEvalCase();
|
|
157
|
+
vi.mocked(judgeAssertion).mockResolvedValue({
|
|
158
|
+
id: "a1",
|
|
159
|
+
text: "output is correct",
|
|
160
|
+
pass: true,
|
|
161
|
+
reasoning: "looks good",
|
|
162
|
+
});
|
|
163
|
+
await runSingleCaseSSE({
|
|
164
|
+
res,
|
|
165
|
+
evalCase,
|
|
166
|
+
systemPrompt: "you are a helper",
|
|
167
|
+
client,
|
|
168
|
+
isAborted: () => false,
|
|
169
|
+
});
|
|
170
|
+
const sseEvents = vi.mocked(sendSSE).mock.calls.map((c) => c[1]);
|
|
171
|
+
expect(sseEvents).toContain("case_start");
|
|
172
|
+
expect(sseEvents).toContain("output_ready");
|
|
173
|
+
expect(sseEvents).toContain("assertion_result");
|
|
174
|
+
expect(sseEvents).toContain("case_complete");
|
|
175
|
+
});
|
|
176
|
+
it("maps inputTokens and outputTokens from LLM result to BenchmarkCase", async () => {
|
|
177
|
+
const res = makeMockRes();
|
|
178
|
+
const client = makeMockClient({
|
|
179
|
+
model: "test-model",
|
|
180
|
+
generate: vi.fn().mockResolvedValue({
|
|
181
|
+
text: "output text",
|
|
182
|
+
durationMs: 200,
|
|
183
|
+
inputTokens: 42,
|
|
184
|
+
outputTokens: 58,
|
|
185
|
+
}),
|
|
186
|
+
});
|
|
187
|
+
const evalCase = makeEvalCase();
|
|
188
|
+
vi.mocked(judgeAssertion).mockResolvedValue({
|
|
189
|
+
id: "a1",
|
|
190
|
+
text: "output is correct",
|
|
191
|
+
pass: true,
|
|
192
|
+
reasoning: "ok",
|
|
193
|
+
});
|
|
194
|
+
const result = await runSingleCaseSSE({
|
|
195
|
+
res,
|
|
196
|
+
evalCase,
|
|
197
|
+
systemPrompt: "system",
|
|
198
|
+
client,
|
|
199
|
+
isAborted: () => false,
|
|
200
|
+
});
|
|
201
|
+
expect(result.inputTokens).toBe(42);
|
|
202
|
+
expect(result.outputTokens).toBe(58);
|
|
203
|
+
expect(result.tokens).toBe(100); // 42 + 58
|
|
204
|
+
});
|
|
205
|
+
it("handles LLM error gracefully (returns error status case)", async () => {
|
|
206
|
+
const res = makeMockRes();
|
|
207
|
+
const client = makeMockClient({
|
|
208
|
+
model: "test-model",
|
|
209
|
+
generate: vi.fn().mockRejectedValue(new Error("LLM timeout")),
|
|
210
|
+
});
|
|
211
|
+
const evalCase = makeEvalCase();
|
|
212
|
+
const result = await runSingleCaseSSE({
|
|
213
|
+
res,
|
|
214
|
+
evalCase,
|
|
215
|
+
systemPrompt: "system",
|
|
216
|
+
client,
|
|
217
|
+
isAborted: () => false,
|
|
218
|
+
});
|
|
219
|
+
expect(result.status).toBe("error");
|
|
220
|
+
expect(result.error_message).toBe("LLM timeout");
|
|
221
|
+
expect(result.pass_rate).toBe(0);
|
|
222
|
+
expect(result.assertions).toEqual([]);
|
|
223
|
+
});
|
|
224
|
+
it("sets status to fail when an assertion fails", async () => {
|
|
225
|
+
const res = makeMockRes();
|
|
226
|
+
const client = makeMockClient();
|
|
227
|
+
const evalCase = makeEvalCase({
|
|
228
|
+
assertions: [
|
|
229
|
+
{ id: "a1", text: "first check", type: "boolean" },
|
|
230
|
+
{ id: "a2", text: "second check", type: "boolean" },
|
|
231
|
+
],
|
|
232
|
+
});
|
|
233
|
+
vi.mocked(judgeAssertion)
|
|
234
|
+
.mockResolvedValueOnce({ id: "a1", text: "first check", pass: true, reasoning: "ok" })
|
|
235
|
+
.mockResolvedValueOnce({ id: "a2", text: "second check", pass: false, reasoning: "nope" });
|
|
236
|
+
const result = await runSingleCaseSSE({
|
|
237
|
+
res,
|
|
238
|
+
evalCase,
|
|
239
|
+
systemPrompt: "system",
|
|
240
|
+
client,
|
|
241
|
+
isAborted: () => false,
|
|
242
|
+
});
|
|
243
|
+
expect(result.status).toBe("fail");
|
|
244
|
+
expect(result.pass_rate).toBe(0.5);
|
|
245
|
+
expect(result.assertions).toHaveLength(2);
|
|
246
|
+
});
|
|
247
|
+
it("stops evaluating assertions when aborted", async () => {
|
|
248
|
+
const res = makeMockRes();
|
|
249
|
+
const client = makeMockClient();
|
|
250
|
+
const evalCase = makeEvalCase({
|
|
251
|
+
assertions: [
|
|
252
|
+
{ id: "a1", text: "first", type: "boolean" },
|
|
253
|
+
{ id: "a2", text: "second", type: "boolean" },
|
|
254
|
+
],
|
|
255
|
+
});
|
|
256
|
+
// Abort after the first assertion loop check
|
|
257
|
+
let callCount = 0;
|
|
258
|
+
const isAborted = () => {
|
|
259
|
+
callCount++;
|
|
260
|
+
return callCount > 1; // first call returns false, second returns true
|
|
261
|
+
};
|
|
262
|
+
vi.mocked(judgeAssertion).mockResolvedValue({
|
|
263
|
+
id: "a1",
|
|
264
|
+
text: "first",
|
|
265
|
+
pass: true,
|
|
266
|
+
reasoning: "ok",
|
|
267
|
+
});
|
|
268
|
+
const result = await runSingleCaseSSE({
|
|
269
|
+
res,
|
|
270
|
+
evalCase,
|
|
271
|
+
systemPrompt: "system",
|
|
272
|
+
client,
|
|
273
|
+
isAborted,
|
|
274
|
+
});
|
|
275
|
+
// Only one assertion should have been evaluated
|
|
276
|
+
expect(result.assertions).toHaveLength(1);
|
|
277
|
+
});
|
|
278
|
+
it("passes totalCases through to case_start event", async () => {
|
|
279
|
+
const res = makeMockRes();
|
|
280
|
+
const client = makeMockClient();
|
|
281
|
+
const evalCase = makeEvalCase();
|
|
282
|
+
vi.mocked(judgeAssertion).mockResolvedValue({
|
|
283
|
+
id: "a1",
|
|
284
|
+
text: "output is correct",
|
|
285
|
+
pass: true,
|
|
286
|
+
reasoning: "ok",
|
|
287
|
+
});
|
|
288
|
+
await runSingleCaseSSE({
|
|
289
|
+
res,
|
|
290
|
+
evalCase,
|
|
291
|
+
systemPrompt: "system",
|
|
292
|
+
client,
|
|
293
|
+
isAborted: () => false,
|
|
294
|
+
totalCases: 5,
|
|
295
|
+
});
|
|
296
|
+
const caseStartCall = vi.mocked(sendSSE).mock.calls.find((c) => c[1] === "case_start");
|
|
297
|
+
expect(caseStartCall).toBeDefined();
|
|
298
|
+
expect(caseStartCall[2].total).toBe(5);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
//# sourceMappingURL=benchmark-runner.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark-runner.test.js","sourceRoot":"","sources":["../../../src/eval-server/__tests__/benchmark-runner.test.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4CAA4C;AAC5C,8EAA8E;AAE9E,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAK9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,EAAE,CAAC,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE,CAAC,CAAC;IACpC,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;CACxB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE,CAAC,CAAC;IAClC,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE;IAChB,WAAW,EAAE,EAAE,CAAC,EAAE,EAAE;IACpB,aAAa,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,IAAa,EAAE,GAAY,EAAE,MAAe,EAAE,IAAa,EAAE,EAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;CAC/G,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE,CAAC,CAAC;IAChD,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE;CAC3B,CAAC,CAAC,CAAC;AAEJ,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAE5C,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,YAAoC,EAAE;IACtD,OAAO;QACL,OAAO,EAAE,CAAC;QACV,SAAS,EAAE,WAAW;QACtB,MAAM,EAAE,MAAM;QACd,aAAa,EAAE,IAAI;QACnB,SAAS,EAAE,CAAC;QACZ,UAAU,EAAE,GAAG;QACf,MAAM,EAAE,EAAE;QACV,WAAW,EAAE,EAAE;QACf,YAAY,EAAE,EAAE;QAChB,MAAM,EAAE,aAAa;QACrB,UAAU,EAAE;YACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;SACpE;QACD,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,YAA+B,EAAE;IACrD,OAAO;QACL,EAAE,EAAE,CAAC;QACL,IAAI,EAAE,WAAW;QACjB,MAAM,EAAE,cAAc;QACtB,eAAe,EAAE,gBAAgB;QACjC,KAAK,EAAE,EAAE;QACT,UAAU,EAAE;YACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,SAAS,EAAE;SACzD;QACD,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,YAAgC,EAAE;IACxD,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAClC,IAAI,EAAE,kBAAkB;YACxB,UAAU,EAAE,GAAG;YACf,WAAW,EAAE,EAAE;YACf,YAAY,EAAE,EAAE;SACjB,CAAC;QACF,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,WAAW;IAClB,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE,EAAS,CAAC;AACnC,CAAC;AAED,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,eAAe;QACtB,SAAS,EAAE,UAAU;QACrB,OAAO,EAAE,WAAoB;QAC7B,QAAQ,EAAE,WAAW;KACtB,CAAC;IAEF,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,KAAK,GAAoB;YAC7B,QAAQ,CAAC;gBACP,UAAU,EAAE;oBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;oBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE;iBACpD;aACF,CAAC;YACF,QAAQ,CAAC;gBACP,OAAO,EAAE,CAAC;gBACV,UAAU,EAAE;oBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;oBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;iBACnD;aACF,CAAC;SACH,CAAC;QAEF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,0BAA0B;QAC1B,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,MAAM,GAAG,kBAAkB,CAC/B,CAAC,QAAQ,EAAE,CAAC,EACZ,EAAE,GAAG,WAAW,EAAE,OAAO,EAAE,UAAU,EAAE,CACxC,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;YAC7B,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;SAC1C,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8DAA8D,EAAE,GAAG,EAAE;QACtE,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;YAC/C,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;SAC5D,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yEAAyE,EAAE,GAAG,EAAE;QACjF,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;SAC9D,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,KAAK,GAAG,CAAC,QAAQ,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4EAA4E,EAAE,KAAK,IAAI,EAAE;QAC1F,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,YAAY;SACxB,CAAC,CAAC;QAEH,MAAM,gBAAgB,CAAC;YACrB,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,kBAAkB;YAChC,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QAC1C,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QAC5C,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,KAAK,IAAI,EAAE;QAClF,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBAClC,IAAI,EAAE,aAAa;gBACnB,UAAU,EAAE,GAAG;gBACf,WAAW,EAAE,EAAE;gBACf,YAAY,EAAE,EAAE;aACjB,CAAC;SACH,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;SAC9D,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,CAAC;YAC5B,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE;gBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,SAAS,EAAE;aACpD;SACF,CAAC,CAAC;QAEH,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC;aACtB,qBAAqB,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;aACrF,qBAAqB,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;QAE7F,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,CAAC;YAC5B,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE;gBAC5C,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE;aAC9C;SACF,CAAC,CAAC;QAEH,6CAA6C;QAC7C,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,SAAS,GAAG,GAAG,EAAE;YACrB,SAAS,EAAE,CAAC;YACZ,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,gDAAgD;QACxE,CAAC,CAAC;QAEF,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,OAAO;YACb,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS;SACV,CAAC,CAAC;QAEH,gDAAgD;QAChD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,gBAAgB,CAAC;YACrB,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;YACtB,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC;QACvF,MAAM,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAE,aAAc,CAAC,CAAC,CAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -48,6 +48,14 @@ const PROVIDER_MODELS = {
|
|
|
48
48
|
{ id: "gemma2:9b", label: "Gemma 2 9B" },
|
|
49
49
|
{ id: "mistral:7b", label: "Mistral 7B" },
|
|
50
50
|
],
|
|
51
|
+
"gemini-cli": [
|
|
52
|
+
{ id: "gemini-2.5-pro", label: "Gemini 2.5 Pro" },
|
|
53
|
+
{ id: "gemini-2.5-flash", label: "Gemini 2.5 Flash" },
|
|
54
|
+
],
|
|
55
|
+
"codex-cli": [
|
|
56
|
+
{ id: "o3", label: "OpenAI o3" },
|
|
57
|
+
{ id: "o4-mini", label: "OpenAI o4-mini" },
|
|
58
|
+
],
|
|
51
59
|
};
|
|
52
60
|
// ---------------------------------------------------------------------------
|
|
53
61
|
// Ollama detection cache — avoids 500ms+ probe on every /api/config request.
|