vskill 0.2.91 → 0.2.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/eval/llm.js CHANGED
@@ -1,17 +1,21 @@
1
1
  // ---------------------------------------------------------------------------
2
- // LLM client for eval commands — supports Claude CLI, Anthropic API, and Ollama
2
+ // LLM client for eval commands — supports multiple CLI tools and API providers
3
3
  //
4
4
  // Provider selection via VSKILL_EVAL_PROVIDER env var:
5
5
  // "claude-cli" — Claude Code CLI (uses your Max/Pro plan, no API key)
6
+ // "codex-cli" — OpenAI Codex CLI (uses ChatGPT subscription or CODEX_API_KEY)
7
+ // "gemini-cli" — Google Gemini CLI (free tier or GOOGLE_API_KEY)
6
8
  // "anthropic" — Anthropic API (requires ANTHROPIC_API_KEY)
7
9
  // "ollama" — Local Ollama server (free, requires ollama running)
8
10
  //
9
11
  // Auto-detection when VSKILL_EVAL_PROVIDER is not set:
10
12
  // 1. claude-cli (default — works everywhere, even inside Claude Code sessions)
11
- // Ollama/Anthropic only used when explicitly set via VSKILL_EVAL_PROVIDER
13
+ // Other providers only used when explicitly set via VSKILL_EVAL_PROVIDER
12
14
  //
13
15
  // Model selection via VSKILL_EVAL_MODEL env var:
14
16
  // claude-cli: "sonnet" | "opus" | "haiku" (default: sonnet)
17
+ // codex-cli: "o4-mini" | "codex-1" | "gpt-5.3-codex" (default: o4-mini)
18
+ // gemini-cli: "gemini-2.5-pro" | "gemini-2.5-flash" (default: gemini-2.5-pro)
15
19
  // anthropic: full model ID (default: claude-sonnet-4-6)
16
20
  // ollama: model name (default: llama3.1:8b)
17
21
  // ---------------------------------------------------------------------------
@@ -27,10 +31,14 @@ export function createLlmClient(overrides) {
27
31
  return createAnthropicClient(modelOverride);
28
32
  case "claude-cli":
29
33
  return createClaudeCliClient(modelOverride);
34
+ case "codex-cli":
35
+ return createCodexCliClient(modelOverride);
36
+ case "gemini-cli":
37
+ return createGeminiCliClient(modelOverride);
30
38
  case "ollama":
31
39
  return createOllamaClient(modelOverride);
32
40
  default:
33
- throw new Error(`Unknown VSKILL_EVAL_PROVIDER: "${provider}". Use "claude-cli", "anthropic", or "ollama".`);
41
+ throw new Error(`Unknown VSKILL_EVAL_PROVIDER: "${provider}". Use "claude-cli", "codex-cli", "gemini-cli", "anthropic", or "ollama".`);
34
42
  }
35
43
  }
36
44
  // ---------------------------------------------------------------------------
@@ -77,31 +85,25 @@ function createAnthropicClient(modelOverride) {
77
85
  },
78
86
  };
79
87
  }
80
- // ---------------------------------------------------------------------------
81
- // Provider: Claude CLI (uses your Max/Pro subscription — no API key needed)
82
- //
83
- // Pipes prompt via stdin to avoid OS argument-length limits (ARG_MAX).
84
- //
85
- // From a plain terminal: npx vskill eval run mobile/appstore
86
- // Select model: VSKILL_EVAL_MODEL=opus npx vskill eval run mobile/appstore
87
- // ---------------------------------------------------------------------------
88
- function createClaudeCliClient(modelOverride) {
89
- const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "sonnet";
88
+ function createCliClient(config) {
90
89
  return {
91
- model: `claude-${model}`,
90
+ model: config.displayModel,
92
91
  async generate(systemPrompt, userPrompt) {
93
92
  const combinedPrompt = `${systemPrompt}\n\n${userPrompt}`;
94
93
  const start = Date.now();
95
94
  const text = await new Promise((resolve, reject) => {
96
- // Strip all CLAUDE* env vars so the child process doesn't detect nesting
97
- const cleanEnv = {};
98
- for (const [k, v] of Object.entries(process.env)) {
99
- if (v !== undefined && !k.startsWith("CLAUDE"))
100
- cleanEnv[k] = v;
95
+ let env;
96
+ if (config.stripEnvPrefix) {
97
+ env = {};
98
+ const prefix = config.stripEnvPrefix;
99
+ for (const [k, v] of Object.entries(process.env)) {
100
+ if (v !== undefined && !k.startsWith(prefix))
101
+ env[k] = v;
102
+ }
101
103
  }
102
- const proc = spawn("claude", ["-p", "--model", model], {
104
+ const proc = spawn(config.binary, config.args, {
103
105
  stdio: ["pipe", "pipe", "pipe"],
104
- env: cleanEnv,
106
+ ...(env ? { env } : {}),
105
107
  });
106
108
  let stdout = "";
107
109
  let stderr = "";
@@ -109,15 +111,15 @@ function createClaudeCliClient(modelOverride) {
109
111
  proc.stderr.on("data", (d) => { stderr += d.toString(); });
110
112
  const timer = setTimeout(() => {
111
113
  proc.kill("SIGTERM");
112
- reject(new Error("Claude CLI timed out after 120s"));
114
+ reject(new Error(`${config.name} CLI timed out after 120s`));
113
115
  }, 120_000);
114
116
  proc.on("error", (err) => {
115
117
  clearTimeout(timer);
116
118
  if (err.code === "ENOENT") {
117
- reject(new Error("Claude CLI not found. Install it:\n npm install -g @anthropic-ai/claude-code\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=ollama"));
119
+ reject(new Error(config.notFoundMsg));
118
120
  }
119
121
  else {
120
- reject(new Error(`Claude CLI failed: ${err.message}`));
122
+ reject(new Error(`${config.name} CLI failed: ${err.message}`));
121
123
  }
122
124
  });
123
125
  proc.on("close", (code) => {
@@ -127,10 +129,9 @@ function createClaudeCliClient(modelOverride) {
127
129
  }
128
130
  else {
129
131
  const errMsg = (stderr || stdout).slice(0, 300);
130
- reject(new Error(`Claude CLI exited with code ${code}${errMsg ? ": " + errMsg : ""}`));
132
+ reject(new Error(`${config.name} CLI exited with code ${code}${errMsg ? ": " + errMsg : ""}`));
131
133
  }
132
134
  });
133
- // Pipe prompt via stdin — avoids ARG_MAX limits for large SKILL.md files
134
135
  proc.stdin.end(combinedPrompt);
135
136
  });
136
137
  return { text, durationMs: Date.now() - start, inputTokens: null, outputTokens: null };
@@ -138,6 +139,48 @@ function createClaudeCliClient(modelOverride) {
138
139
  };
139
140
  }
140
141
  // ---------------------------------------------------------------------------
142
+ // Provider: Claude CLI (uses your Max/Pro subscription — no API key needed)
143
+ // Strips CLAUDE* env vars so the child process doesn't detect nesting.
144
+ // ---------------------------------------------------------------------------
145
+ function createClaudeCliClient(modelOverride) {
146
+ const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "sonnet";
147
+ return createCliClient({
148
+ binary: "claude",
149
+ name: "Claude",
150
+ args: ["-p", "--model", model],
151
+ displayModel: `claude-${model}`,
152
+ stripEnvPrefix: "CLAUDE",
153
+ notFoundMsg: "Claude CLI not found. Install it:\n npm install -g @anthropic-ai/claude-code\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=ollama",
154
+ });
155
+ }
156
+ // ---------------------------------------------------------------------------
157
+ // Provider: Codex CLI (uses your ChatGPT subscription — or CODEX_API_KEY for CI)
158
+ // ---------------------------------------------------------------------------
159
+ function createCodexCliClient(modelOverride) {
160
+ const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "o4-mini";
161
+ return createCliClient({
162
+ binary: "codex",
163
+ name: "Codex",
164
+ args: ["exec", "--model", model],
165
+ displayModel: `codex-${model}`,
166
+ notFoundMsg: "Codex CLI not found. Install it:\n npm install -g @openai/codex\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=claude-cli",
167
+ });
168
+ }
169
+ // ---------------------------------------------------------------------------
170
+ // Provider: Gemini CLI (free tier — 60 req/min, 1000 req/day, or GOOGLE_API_KEY)
171
+ // NOTE: Gemini CLI headless flags are provisional — verify against actual binary.
172
+ // ---------------------------------------------------------------------------
173
+ function createGeminiCliClient(modelOverride) {
174
+ const model = modelOverride || process.env.VSKILL_EVAL_MODEL || "gemini-2.5-pro";
175
+ return createCliClient({
176
+ binary: "gemini",
177
+ name: "Gemini",
178
+ args: ["-p", "--model", model],
179
+ displayModel: model,
180
+ notFoundMsg: "Gemini CLI not found. Install it:\n npm install -g @google/gemini-cli\n\nOr use a different provider:\n export VSKILL_EVAL_PROVIDER=claude-cli",
181
+ });
182
+ }
183
+ // ---------------------------------------------------------------------------
141
184
  // Provider: Ollama (local models — free, no API key)
142
185
  // ---------------------------------------------------------------------------
143
186
  function createOllamaClient(modelOverride) {
@@ -1 +1 @@
1
- {"version":3,"file":"llm.js","sourceRoot":"","sources":["../../src/eval/llm.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,gFAAgF;AAChF,EAAE;AACF,uDAAuD;AACvD,wEAAwE;AACxE,8DAA8D;AAC9D,uEAAuE;AACvE,EAAE;AACF,uDAAuD;AACvD,iFAAiF;AACjF,4EAA4E;AAC5E,EAAE;AACF,iDAAiD;AACjD,+DAA+D;AAC/D,4DAA4D;AAC5D,mDAAmD;AACnD,8EAA8E;AAE9E,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAgB3C,SAAS,cAAc;IACrB,OAAO,YAAY,CAAC;AACtB,CAAC;AAOD,MAAM,UAAU,eAAe,CAAC,SAAwB;IACtD,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,cAAc,EAAE,CAAiB,CAAC;IAC/G,MAAM,aAAa,GAAG,SAAS,EAAE,KAAK,CAAC;IACvC,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,WAAW;YACd,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,YAAY;YACf,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,QAAQ;YACX,OAAO,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC3C;YACE,MAAM,IAAI,KAAK,CACb,kCAAkC,QAAQ,gDAAgD,CAC3F,CAAC;IACN,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,aAAa,GAAG,mBAAmB,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;IAC7C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CACb,sRAAsR,CACvR,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAC9E,IAAI,cAAc,GAAQ,IAAI,CAAC;IAE/B,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;gBACjE,cAAc,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YAC7C,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;YAC9D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,CACnD;oBACE,KAAK;oBACL,MAAM,EAAE,YAAY;oBACpB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;oBACjD,UAAU,EAAE,IAAI;iBACjB,EACD,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAC9B,CAAC;gBACF,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;gBAEtC,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;gBACvE,MAAM,IAAI,GAAG,SAAS,IAAI,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpE,OAAO;oBACL,IAAI;oBACJ,UAAU;oBACV,WAAW,EAAE,QAAQ,CAAC,KAAK,EAAE,YAAY,IAAI,IAAI;oBACjD,YAAY,EAAE,QAAQ,CAAC,KAAK,EAAE,aAAa,IAAI,IAAI;iBACpD,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,YAAY,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,4EAA4E;AAC5E,EAAE;AACF,uEAAuE;AACvE,EAAE;AACF,6DAA6D;AAC7D,oFAAoF;AACpF,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,QAAQ,CAAC;IAEzE,OAAO;QACL,KAAK,EAAE,UAAU,KAAK,EAAE;QACxB,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,cAAc,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,IAAI,GAAG,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACzD,yEAAyE;gBACzE,MAAM,QAAQ,GAA2B,EAAE,CAAC;gBAC5C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;oBACjD,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC;wBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBAClE,CAAC;gBACD,MAAM,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC,EAAE;oBACrD,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;oBAC/B,GAAG,EAAE,QAAQ;iBACd,CAAC,CAAC;gBAEH,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEnE,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC5B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,MAAM,CAAC,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC,CAAC;gBACvD,CAAC,EAAE,OAAO,CAAC,CAAC;gBAEZ,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAA0B,EAAE,EAAE;oBAC9C,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAC1B,MAAM,CAAC,IAAI,KAAK,CACd,qJAAqJ,CACtJ,CAAC,CAAC;oBACL,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,IAAI,KAAK,CAAC,sBAAsB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;oBACzD,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;wBACf,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;oBACzB,CAAC;yBAAM,CAAC;wBACN,MAAM,MAAM,GAAG,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;wBAChD,MAAM,CAAC,IAAI,KAAK,CAAC,+BAA+B,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;oBACzF,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,yEAAyE;gBACzE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;QACzF,CAAC;KACF,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,qDAAqD;AACrD,8EAA8E;AAC9E,SAAS,kBAAkB,CAAC,aAAsB;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,CAAC;IACxE,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAE9E,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,UAAU,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YACtD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,eAAe,EAAE;gBACtD,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK;oBACL,MAAM,EAAE,UAAU;oBAClB,MAAM,EAAE,KAAK;oBACb,OAAO,EAAE;wBACP,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,GAAG;qBACjB;iBACF,CAAC;gBACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC;aACrC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;oBAC3D,MAAM,IAAI,KAAK,CACb,iBAAiB,KAAK,8CAA8C,KAAK,EAAE,CAC5E,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAIlC,CAAC;YACF,OAAO;gBACL,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;gBACzB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;gBAC9B,WAAW,EAAE,IAAI,CAAC,iBAAiB,IAAI,IAAI;gBAC3C,YAAY,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;aACtC,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"llm.js","sourceRoot":"","sources":["../../src/eval/llm.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,+EAA+E;AAC/E,EAAE;AACF,uDAAuD;AACvD,wEAAwE;AACxE,iFAAiF;AACjF,mEAAmE;AACnE,8DAA8D;AAC9D,uEAAuE;AACvE,EAAE;AACF,uDAAuD;AACvD,iFAAiF;AACjF,2EAA2E;AAC3E,EAAE;AACF,iDAAiD;AACjD,+DAA+D;AAC/D,4EAA4E;AAC5E,iFAAiF;AACjF,4DAA4D;AAC5D,mDAAmD;AACnD,8EAA8E;AAE9E,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAgB3C,SAAS,cAAc;IACrB,OAAO,YAAY,CAAC;AACtB,CAAC;AAOD,MAAM,UAAU,eAAe,CAAC,SAAwB;IACtD,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,cAAc,EAAE,CAAiB,CAAC;IAC/G,MAAM,aAAa,GAAG,SAAS,EAAE,KAAK,CAAC;IACvC,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,WAAW;YACd,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,YAAY;YACf,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,WAAW;YACd,OAAO,oBAAoB,CAAC,aAAa,CAAC,CAAC;QAC7C,KAAK,YAAY;YACf,OAAO,qBAAqB,CAAC,aAAa,CAAC,CAAC;QAC9C,KAAK,QAAQ;YACX,OAAO,kBAAkB,CAAC,aAAa,CAAC,CAAC;QAC3C;YACE,MAAM,IAAI,KAAK,CACb,kCAAkC,QAAQ,2EAA2E,CACtH,CAAC;IACN,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,aAAa,GAAG,mBAAmB,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;IAC7C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CACb,sRAAsR,CACvR,CAAC;IACJ,CAAC;IAED,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAC9E,IAAI,cAAc,GAAQ,IAAI,CAAC;IAE/B,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,CAAC;gBACjE,cAAc,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;YAC7C,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;YACzC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,OAAO,CAAC,CAAC;YAC9D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,MAAM,CACnD;oBACE,KAAK;oBACL,MAAM,EAAE,YAAY;oBACpB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;oBACjD,UAAU,EAAE,IAAI;iBACjB,EACD,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAC9B,CAAC;gBACF,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC;gBAEtC,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;gBACvE,MAAM,IAAI,GAAG,SAAS,IAAI,MAAM,IAAI,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpE,OAAO;oBACL,IAAI;oBACJ,UAAU;oBACV,WAAW,EAAE,QAAQ,CAAC,KAAK,EAAE,YAAY,IAAI,IAAI;oBACjD,YAAY,EAAE,QAAQ,CAAC,KAAK,EAAE,aAAa,IAAI,IAAI;iBACpD,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,YAAY,CAAC,OAAO,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAiBD,SAAS,eAAe,CAAC,MAAiB;IACxC,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,YAAY;QAC1B,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,cAAc,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,IAAI,GAAG,MAAM,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;gBACzD,IAAI,GAAuC,CAAC;gBAC5C,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;oBAC1B,GAAG,GAAG,EAAE,CAAC;oBACT,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,CAAC;oBACrC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;wBACjD,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;4BAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;oBAC3D,CAAC;gBACH,CAAC;gBAED,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,IAAI,EAAE;oBAC7C,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;oBAC/B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACxB,CAAC,CAAC;gBAEH,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBACnE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAS,EAAE,EAAE,GAAG,MAAM,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEnE,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE;oBAC5B,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBACrB,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,2BAA2B,CAAC,CAAC,CAAC;gBAC/D,CAAC,EAAE,OAAO,CAAC,CAAC;gBAEZ,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAA0B,EAAE,EAAE;oBAC9C,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;wBAC1B,MAAM,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC;oBACxC,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,IAAI,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,gBAAgB,GAAG,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;oBACjE,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,EAAE;oBACxB,YAAY,CAAC,KAAK,CAAC,CAAC;oBACpB,IAAI,IAAI,KAAK,CAAC,EAAE,CAAC;wBACf,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;oBACzB,CAAC;yBAAM,CAAC;wBACN,MAAM,MAAM,GAAG,CAAC,MAAM,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;wBAChD,MAAM,CAAC,IAAI,KAAK,CACd,GAAG,MAAM,CAAC,IAAI,yBAAyB,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC5E,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,EAAE,WAAW,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC;QACzF,CAAC;KACF,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,4EAA4E;AAC5E,uEAAuE;AACvE,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,QAAQ,CAAC;IACzE,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC;QAC9B,YAAY,EAAE,UAAU,KAAK,EAAE;QAC/B,cAAc,EAAE,QAAQ;QACxB,WAAW,EACT,qJAAqJ;KACxJ,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,iFAAiF;AACjF,8EAA8E;AAC9E,SAAS,oBAAoB,CAAC,aAAsB;IAClD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,SAAS,CAAC;IAC1E,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,OAAO;QACf,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,CAAC,MAAM,EAAE,SAAS,EAAE,KAAK,CAAC;QAChC,YAAY,EAAE,SAAS,KAAK,EAAE;QAC9B,WAAW,EACT,4IAA4I;KAC/I,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,iFAAiF;AACjF,kFAAkF;AAClF,8EAA8E;AAC9E,SAAS,qBAAqB,CAAC,aAAsB;IACnD,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,gBAAgB,CAAC;IACjF,OAAO,eAAe,CAAC;QACrB,MAAM,EAAE,QAAQ;QAChB,IAAI,EAAE,QAAQ;QACd,IAAI,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,KAAK,CAAC;QAC9B,YAAY,EAAE,KAAK;QACnB,WAAW,EACT,kJAAkJ;KACrJ,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,qDAAqD;AACrD,8EAA8E;AAC9E,SAAS,kBAAkB,CAAC,aAAsB;IAChD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,CAAC;IACxE,MAAM,KAAK,GAAG,aAAa,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,aAAa,CAAC;IAE9E,OAAO;QACL,KAAK;QACL,KAAK,CAAC,QAAQ,CAAC,YAAoB,EAAE,UAAkB;YACrD,MAAM,UAAU,GAAG,GAAG,YAAY,OAAO,UAAU,EAAE,CAAC;YACtD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,eAAe,EAAE;gBACtD,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK;oBACL,MAAM,EAAE,UAAU;oBAClB,MAAM,EAAE,KAAK;oBACb,OAAO,EAAE;wBACP,WAAW,EAAE,IAAI;wBACjB,WAAW,EAAE,GAAG;qBACjB;iBACF,CAAC;gBACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC;aACrC,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,KAAK,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACpC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;oBAC3D,MAAM,IAAI,KAAK,CACb,iBAAiB,KAAK,8CAA8C,KAAK,EAAE,CAC5E,CAAC;gBACJ,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC;YAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAIlC,CAAC;YACF,OAAO;gBACL,IAAI,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;gBACzB,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;gBAC9B,WAAW,EAAE,IAAI,CAAC,iBAAiB,IAAI,IAAI;gBAC3C,YAAY,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;aACtC,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,301 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Integration tests for benchmark-runner.ts
3
+ // ---------------------------------------------------------------------------
4
+ import { describe, it, expect, vi, beforeEach } from "vitest";
5
+ // ---------------------------------------------------------------------------
6
+ // Mocks
7
+ // ---------------------------------------------------------------------------
8
+ vi.mock("../../eval/judge.js", () => ({
9
+ judgeAssertion: vi.fn(),
10
+ }));
11
+ vi.mock("../sse-helpers.js", () => ({
12
+ sendSSE: vi.fn(),
13
+ sendSSEDone: vi.fn(),
14
+ withHeartbeat: vi.fn((_res, _id, _phase, _msg, fn) => fn()),
15
+ }));
16
+ vi.mock("../../eval/benchmark-history.js", () => ({
17
+ writeHistoryEntry: vi.fn(),
18
+ }));
19
+ import { assembleBulkResult, runSingleCaseSSE } from "../benchmark-runner.js";
20
+ import { judgeAssertion } from "../../eval/judge.js";
21
+ import { sendSSE } from "../sse-helpers.js";
22
+ // ---------------------------------------------------------------------------
23
+ // Helpers
24
+ // ---------------------------------------------------------------------------
25
+ function makeCase(overrides = {}) {
26
+ return {
27
+ eval_id: 1,
28
+ eval_name: "test-case",
29
+ status: "pass",
30
+ error_message: null,
31
+ pass_rate: 1,
32
+ durationMs: 100,
33
+ tokens: 50,
34
+ inputTokens: 20,
35
+ outputTokens: 30,
36
+ output: "some output",
37
+ assertions: [
38
+ { id: "a1", text: "checks something", pass: true, reasoning: "ok" },
39
+ ],
40
+ ...overrides,
41
+ };
42
+ }
43
+ function makeEvalCase(overrides = {}) {
44
+ return {
45
+ id: 1,
46
+ name: "test eval",
47
+ prompt: "do something",
48
+ expected_output: "something done",
49
+ files: [],
50
+ assertions: [
51
+ { id: "a1", text: "output is correct", type: "boolean" },
52
+ ],
53
+ ...overrides,
54
+ };
55
+ }
56
+ function makeMockClient(overrides = {}) {
57
+ return {
58
+ model: "test-model",
59
+ generate: vi.fn().mockResolvedValue({
60
+ text: "generated output",
61
+ durationMs: 150,
62
+ inputTokens: 10,
63
+ outputTokens: 20,
64
+ }),
65
+ ...overrides,
66
+ };
67
+ }
68
+ function makeMockRes() {
69
+ return { write: vi.fn() };
70
+ }
71
+ // ---------------------------------------------------------------------------
72
+ // assembleBulkResult
73
+ // ---------------------------------------------------------------------------
74
+ describe("assembleBulkResult", () => {
75
+ const defaultMeta = {
76
+ model: "claude-sonnet",
77
+ skillName: "my-skill",
78
+ runType: "benchmark",
79
+ provider: "anthropic",
80
+ };
81
+ it("computes overall_pass_rate correctly from cases", () => {
82
+ const cases = [
83
+ makeCase({
84
+ assertions: [
85
+ { id: "a1", text: "x", pass: true, reasoning: "" },
86
+ { id: "a2", text: "y", pass: false, reasoning: "" },
87
+ ],
88
+ }),
89
+ makeCase({
90
+ eval_id: 2,
91
+ assertions: [
92
+ { id: "a3", text: "z", pass: true, reasoning: "" },
93
+ { id: "a4", text: "w", pass: true, reasoning: "" },
94
+ ],
95
+ }),
96
+ ];
97
+ const result = assembleBulkResult(cases, defaultMeta);
98
+ // 3 passed out of 4 total
99
+ expect(result.overall_pass_rate).toBe(0.75);
100
+ });
101
+ it("sets type from meta.runType", () => {
102
+ const result = assembleBulkResult([makeCase()], { ...defaultMeta, runType: "baseline" });
103
+ expect(result.type).toBe("baseline");
104
+ });
105
+ it("computes totalDurationMs from cases", () => {
106
+ const cases = [
107
+ makeCase({ durationMs: 200 }),
108
+ makeCase({ eval_id: 2, durationMs: 350 }),
109
+ ];
110
+ const result = assembleBulkResult(cases, defaultMeta);
111
+ expect(result.totalDurationMs).toBe(550);
112
+ });
113
+ it("computes totalInputTokens and totalOutputTokens when present", () => {
114
+ const cases = [
115
+ makeCase({ inputTokens: 10, outputTokens: 20 }),
116
+ makeCase({ eval_id: 2, inputTokens: 30, outputTokens: 40 }),
117
+ ];
118
+ const result = assembleBulkResult(cases, defaultMeta);
119
+ expect(result.totalInputTokens).toBe(40);
120
+ expect(result.totalOutputTokens).toBe(60);
121
+ });
122
+ it("sets totalInputTokens/totalOutputTokens to null when no cases have them", () => {
123
+ const cases = [
124
+ makeCase({ inputTokens: undefined, outputTokens: undefined }),
125
+ ];
126
+ const result = assembleBulkResult(cases, defaultMeta);
127
+ expect(result.totalInputTokens).toBeNull();
128
+ expect(result.totalOutputTokens).toBeNull();
129
+ });
130
+ it("returns 0 overall_pass_rate when there are no assertions", () => {
131
+ const cases = [makeCase({ assertions: [] })];
132
+ const result = assembleBulkResult(cases, defaultMeta);
133
+ expect(result.overall_pass_rate).toBe(0);
134
+ });
135
+ it("sets scope to bulk", () => {
136
+ const result = assembleBulkResult([makeCase()], defaultMeta);
137
+ expect(result.scope).toBe("bulk");
138
+ });
139
+ it("preserves model, skill_name, and provider from meta", () => {
140
+ const result = assembleBulkResult([makeCase()], defaultMeta);
141
+ expect(result.model).toBe("claude-sonnet");
142
+ expect(result.skill_name).toBe("my-skill");
143
+ expect(result.provider).toBe("anthropic");
144
+ });
145
+ });
146
+ // ---------------------------------------------------------------------------
147
+ // runSingleCaseSSE
148
+ // ---------------------------------------------------------------------------
149
+ describe("runSingleCaseSSE", () => {
150
+ beforeEach(() => {
151
+ vi.clearAllMocks();
152
+ });
153
+ it("emits case_start, output_ready, assertion_result, case_complete SSE events", async () => {
154
+ const res = makeMockRes();
155
+ const client = makeMockClient();
156
+ const evalCase = makeEvalCase();
157
+ vi.mocked(judgeAssertion).mockResolvedValue({
158
+ id: "a1",
159
+ text: "output is correct",
160
+ pass: true,
161
+ reasoning: "looks good",
162
+ });
163
+ await runSingleCaseSSE({
164
+ res,
165
+ evalCase,
166
+ systemPrompt: "you are a helper",
167
+ client,
168
+ isAborted: () => false,
169
+ });
170
+ const sseEvents = vi.mocked(sendSSE).mock.calls.map((c) => c[1]);
171
+ expect(sseEvents).toContain("case_start");
172
+ expect(sseEvents).toContain("output_ready");
173
+ expect(sseEvents).toContain("assertion_result");
174
+ expect(sseEvents).toContain("case_complete");
175
+ });
176
+ it("maps inputTokens and outputTokens from LLM result to BenchmarkCase", async () => {
177
+ const res = makeMockRes();
178
+ const client = makeMockClient({
179
+ model: "test-model",
180
+ generate: vi.fn().mockResolvedValue({
181
+ text: "output text",
182
+ durationMs: 200,
183
+ inputTokens: 42,
184
+ outputTokens: 58,
185
+ }),
186
+ });
187
+ const evalCase = makeEvalCase();
188
+ vi.mocked(judgeAssertion).mockResolvedValue({
189
+ id: "a1",
190
+ text: "output is correct",
191
+ pass: true,
192
+ reasoning: "ok",
193
+ });
194
+ const result = await runSingleCaseSSE({
195
+ res,
196
+ evalCase,
197
+ systemPrompt: "system",
198
+ client,
199
+ isAborted: () => false,
200
+ });
201
+ expect(result.inputTokens).toBe(42);
202
+ expect(result.outputTokens).toBe(58);
203
+ expect(result.tokens).toBe(100); // 42 + 58
204
+ });
205
+ it("handles LLM error gracefully (returns error status case)", async () => {
206
+ const res = makeMockRes();
207
+ const client = makeMockClient({
208
+ model: "test-model",
209
+ generate: vi.fn().mockRejectedValue(new Error("LLM timeout")),
210
+ });
211
+ const evalCase = makeEvalCase();
212
+ const result = await runSingleCaseSSE({
213
+ res,
214
+ evalCase,
215
+ systemPrompt: "system",
216
+ client,
217
+ isAborted: () => false,
218
+ });
219
+ expect(result.status).toBe("error");
220
+ expect(result.error_message).toBe("LLM timeout");
221
+ expect(result.pass_rate).toBe(0);
222
+ expect(result.assertions).toEqual([]);
223
+ });
224
+ it("sets status to fail when an assertion fails", async () => {
225
+ const res = makeMockRes();
226
+ const client = makeMockClient();
227
+ const evalCase = makeEvalCase({
228
+ assertions: [
229
+ { id: "a1", text: "first check", type: "boolean" },
230
+ { id: "a2", text: "second check", type: "boolean" },
231
+ ],
232
+ });
233
+ vi.mocked(judgeAssertion)
234
+ .mockResolvedValueOnce({ id: "a1", text: "first check", pass: true, reasoning: "ok" })
235
+ .mockResolvedValueOnce({ id: "a2", text: "second check", pass: false, reasoning: "nope" });
236
+ const result = await runSingleCaseSSE({
237
+ res,
238
+ evalCase,
239
+ systemPrompt: "system",
240
+ client,
241
+ isAborted: () => false,
242
+ });
243
+ expect(result.status).toBe("fail");
244
+ expect(result.pass_rate).toBe(0.5);
245
+ expect(result.assertions).toHaveLength(2);
246
+ });
247
+ it("stops evaluating assertions when aborted", async () => {
248
+ const res = makeMockRes();
249
+ const client = makeMockClient();
250
+ const evalCase = makeEvalCase({
251
+ assertions: [
252
+ { id: "a1", text: "first", type: "boolean" },
253
+ { id: "a2", text: "second", type: "boolean" },
254
+ ],
255
+ });
256
+ // Abort after the first assertion loop check
257
+ let callCount = 0;
258
+ const isAborted = () => {
259
+ callCount++;
260
+ return callCount > 1; // first call returns false, second returns true
261
+ };
262
+ vi.mocked(judgeAssertion).mockResolvedValue({
263
+ id: "a1",
264
+ text: "first",
265
+ pass: true,
266
+ reasoning: "ok",
267
+ });
268
+ const result = await runSingleCaseSSE({
269
+ res,
270
+ evalCase,
271
+ systemPrompt: "system",
272
+ client,
273
+ isAborted,
274
+ });
275
+ // Only one assertion should have been evaluated
276
+ expect(result.assertions).toHaveLength(1);
277
+ });
278
+ it("passes totalCases through to case_start event", async () => {
279
+ const res = makeMockRes();
280
+ const client = makeMockClient();
281
+ const evalCase = makeEvalCase();
282
+ vi.mocked(judgeAssertion).mockResolvedValue({
283
+ id: "a1",
284
+ text: "output is correct",
285
+ pass: true,
286
+ reasoning: "ok",
287
+ });
288
+ await runSingleCaseSSE({
289
+ res,
290
+ evalCase,
291
+ systemPrompt: "system",
292
+ client,
293
+ isAborted: () => false,
294
+ totalCases: 5,
295
+ });
296
+ const caseStartCall = vi.mocked(sendSSE).mock.calls.find((c) => c[1] === "case_start");
297
+ expect(caseStartCall).toBeDefined();
298
+ expect(caseStartCall[2].total).toBe(5);
299
+ });
300
+ });
301
+ //# sourceMappingURL=benchmark-runner.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"benchmark-runner.test.js","sourceRoot":"","sources":["../../../src/eval-server/__tests__/benchmark-runner.test.ts"],"names":[],"mappings":"AAAA,8EAA8E;AAC9E,4CAA4C;AAC5C,8EAA8E;AAE9E,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAK9D,8EAA8E;AAC9E,QAAQ;AACR,8EAA8E;AAE9E,EAAE,CAAC,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE,CAAC,CAAC;IACpC,cAAc,EAAE,EAAE,CAAC,EAAE,EAAE;CACxB,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE,CAAC,CAAC;IAClC,OAAO,EAAE,EAAE,CAAC,EAAE,EAAE;IAChB,WAAW,EAAE,EAAE,CAAC,EAAE,EAAE;IACpB,aAAa,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,IAAa,EAAE,GAAY,EAAE,MAAe,EAAE,IAAa,EAAE,EAAiB,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;CAC/G,CAAC,CAAC,CAAC;AAEJ,EAAE,CAAC,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE,CAAC,CAAC;IAChD,iBAAiB,EAAE,EAAE,CAAC,EAAE,EAAE;CAC3B,CAAC,CAAC,CAAC;AAEJ,OAAO,EAAE,kBAAkB,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAC;AAE5C,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,YAAoC,EAAE;IACtD,OAAO;QACL,OAAO,EAAE,CAAC;QACV,SAAS,EAAE,WAAW;QACtB,MAAM,EAAE,MAAM;QACd,aAAa,EAAE,IAAI;QACnB,SAAS,EAAE,CAAC;QACZ,UAAU,EAAE,GAAG;QACf,MAAM,EAAE,EAAE;QACV,WAAW,EAAE,EAAE;QACf,YAAY,EAAE,EAAE;QAChB,MAAM,EAAE,aAAa;QACrB,UAAU,EAAE;YACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE;SACpE;QACD,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,YAAY,CAAC,YAA+B,EAAE;IACrD,OAAO;QACL,EAAE,EAAE,CAAC;QACL,IAAI,EAAE,WAAW;QACjB,MAAM,EAAE,cAAc;QACtB,eAAe,EAAE,gBAAgB;QACjC,KAAK,EAAE,EAAE;QACT,UAAU,EAAE;YACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,mBAAmB,EAAE,IAAI,EAAE,SAAS,EAAE;SACzD;QACD,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,cAAc,CAAC,YAAgC,EAAE;IACxD,OAAO;QACL,KAAK,EAAE,YAAY;QACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;YAClC,IAAI,EAAE,kBAAkB;YACxB,UAAU,EAAE,GAAG;YACf,WAAW,EAAE,EAAE;YACf,YAAY,EAAE,EAAE;SACjB,CAAC;QACF,GAAG,SAAS;KACb,CAAC;AACJ,CAAC;AAED,SAAS,WAAW;IAClB,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,EAAE,EAAE,EAAS,CAAC;AACnC,CAAC;AAED,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,MAAM,WAAW,GAAG;QAClB,KAAK,EAAE,eAAe;QACtB,SAAS,EAAE,UAAU;QACrB,OAAO,EAAE,WAAoB;QAC7B,QAAQ,EAAE,WAAW;KACtB,CAAC;IAEF,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,KAAK,GAAoB;YAC7B,QAAQ,CAAC;gBACP,UAAU,EAAE;oBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;oBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,EAAE,EAAE;iBACpD;aACF,CAAC;YACF,QAAQ,CAAC;gBACP,OAAO,EAAE,CAAC;gBACV,UAAU,EAAE;oBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;oBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;iBACnD;aACF,CAAC;SACH,CAAC;QAEF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,0BAA0B;QAC1B,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,MAAM,GAAG,kBAAkB,CAC/B,CAAC,QAAQ,EAAE,CAAC,EACZ,EAAE,GAAG,WAAW,EAAE,OAAO,EAAE,UAAU,EAAE,CACxC,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;YAC7B,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC;SAC1C,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8DAA8D,EAAE,GAAG,EAAE;QACtE,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;YAC/C,QAAQ,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,CAAC;SAC5D,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yEAAyE,EAAE,GAAG,EAAE;QACjF,MAAM,KAAK,GAAG;YACZ,QAAQ,CAAC,EAAE,WAAW,EAAE,SAAS,EAAE,YAAY,EAAE,SAAS,EAAE,CAAC;SAC9D,CAAC;QACF,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,gBAAgB,CAAC,CAAC,QAAQ,EAAE,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,KAAK,GAAG,CAAC,QAAQ,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;QAC7C,MAAM,MAAM,GAAG,kBAAkB,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;QACtD,MAAM,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC5B,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;QAC7D,MAAM,MAAM,GAAG,kBAAkB,CAAC,CAAC,QAAQ,EAAE,CAAC,EAAE,WAAW,CAAC,CAAC;QAC7D,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAC3C,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAChC,UAAU,CAAC,GAAG,EAAE;QACd,EAAE,CAAC,aAAa,EAAE,CAAC;IACrB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4EAA4E,EAAE,KAAK,IAAI,EAAE;QAC1F,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,YAAY;SACxB,CAAC,CAAC;QAEH,MAAM,gBAAgB,CAAC;YACrB,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,kBAAkB;YAChC,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QAC1C,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAC;QAC5C,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QAChD,MAAM,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,KAAK,IAAI,EAAE;QAClF,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC;gBAClC,IAAI,EAAE,aAAa;gBACnB,UAAU,EAAE,GAAG;gBACf,WAAW,EAAE,EAAE;gBACf,YAAY,EAAE,EAAE;aACjB,CAAC;SACH,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU;IAC7C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,KAAK,EAAE,YAAY;YACnB,QAAQ,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,iBAAiB,CAAC,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;SAC9D,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QACjD,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6CAA6C,EAAE,KAAK,IAAI,EAAE;QAC3D,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,CAAC;YAC5B,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,SAAS,EAAE;gBAClD,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,SAAS,EAAE;aACpD;SACF,CAAC,CAAC;QAEH,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC;aACtB,qBAAqB,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;aACrF,qBAAqB,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,cAAc,EAAE,IAAI,EAAE,KAAK,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;QAE7F,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;SACvB,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,CAAC;YAC5B,UAAU,EAAE;gBACV,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE;gBAC5C,EAAE,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,SAAS,EAAE;aAC9C;SACF,CAAC,CAAC;QAEH,6CAA6C;QAC7C,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,MAAM,SAAS,GAAG,GAAG,EAAE;YACrB,SAAS,EAAE,CAAC;YACZ,OAAO,SAAS,GAAG,CAAC,CAAC,CAAC,gDAAgD;QACxE,CAAC,CAAC;QAEF,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,OAAO;YACb,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,gBAAgB,CAAC;YACpC,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS;SACV,CAAC,CAAC;QAEH,gDAAgD;QAChD,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+CAA+C,EAAE,KAAK,IAAI,EAAE;QAC7D,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,cAAc,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC;QAEhC,EAAE,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,iBAAiB,CAAC;YAC1C,EAAE,EAAE,IAAI;YACR,IAAI,EAAE,mBAAmB;YACzB,IAAI,EAAE,IAAI;YACV,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,MAAM,gBAAgB,CAAC;YACrB,GAAG;YACH,QAAQ;YACR,YAAY,EAAE,QAAQ;YACtB,MAAM;YACN,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK;YACtB,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QAEH,MAAM,aAAa,GAAG,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC;QACvF,MAAM,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,CAAE,aAAc,CAAC,CAAC,CAAS,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -48,6 +48,14 @@ const PROVIDER_MODELS = {
48
48
  { id: "gemma2:9b", label: "Gemma 2 9B" },
49
49
  { id: "mistral:7b", label: "Mistral 7B" },
50
50
  ],
51
+ "gemini-cli": [
52
+ { id: "gemini-2.5-pro", label: "Gemini 2.5 Pro" },
53
+ { id: "gemini-2.5-flash", label: "Gemini 2.5 Flash" },
54
+ ],
55
+ "codex-cli": [
56
+ { id: "o3", label: "OpenAI o3" },
57
+ { id: "o4-mini", label: "OpenAI o4-mini" },
58
+ ],
51
59
  };
52
60
  // ---------------------------------------------------------------------------
53
61
  // Ollama detection cache — avoids 500ms+ probe on every /api/config request.