vieval 0.0.10 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +31 -31
  2. package/dist/bin/vieval.mjs +1 -1
  3. package/dist/bin/vieval.mjs.map +1 -1
  4. package/dist/cli/index.d.mts +1 -1
  5. package/dist/cli/index.mjs +1 -1
  6. package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
  7. package/dist/cli-uzS81IPd.mjs.map +1 -0
  8. package/dist/config.d.mts +1 -1
  9. package/dist/config.mjs +1 -1
  10. package/dist/config.mjs.map +1 -1
  11. package/dist/core/assertions/index.d.mts +156 -156
  12. package/dist/core/assertions/index.mjs +82 -82
  13. package/dist/core/assertions/index.mjs.map +1 -1
  14. package/dist/core/inference-executors/index.d.mts +37 -37
  15. package/dist/core/inference-executors/index.mjs +54 -53
  16. package/dist/core/inference-executors/index.mjs.map +1 -1
  17. package/dist/core/processors/results/index.d.mts +18 -18
  18. package/dist/core/processors/results/index.mjs.map +1 -1
  19. package/dist/core/runner/index.d.mts +2 -2
  20. package/dist/core/runner/index.mjs +259 -259
  21. package/dist/core/runner/index.mjs.map +1 -1
  22. package/dist/core/scheduler/index.d.mts +1 -1
  23. package/dist/core/scheduler/index.mjs +65 -65
  24. package/dist/core/scheduler/index.mjs.map +1 -1
  25. package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
  26. package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
  27. package/dist/env-egxaJtNn.mjs.map +1 -0
  28. package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
  29. package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
  30. package/dist/expect.d.mts +1 -3
  31. package/dist/expect.mjs +1 -1
  32. package/dist/expect.mjs.map +1 -1
  33. package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
  34. package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
  35. package/dist/index.d.mts +208 -197
  36. package/dist/index.mjs +148 -148
  37. package/dist/index.mjs.map +1 -1
  38. package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
  39. package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
  40. package/dist/plugins/chat-models/index.d.mts +279 -279
  41. package/dist/plugins/chat-models/index.mjs +360 -360
  42. package/dist/plugins/chat-models/index.mjs.map +1 -1
  43. package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
  44. package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
  45. package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
  46. package/dist/registry-BK7k6X81.mjs.map +1 -0
  47. package/dist/testing/expect-extensions.d.mts +27 -27
  48. package/dist/testing/expect-extensions.mjs +1 -1
  49. package/package.json +12 -12
  50. package/dist/cli-DTDgaqeI.mjs.map +0 -1
  51. package/dist/env-nV5rVErX.mjs.map +0 -1
  52. package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
  53. package/dist/registry-DMnwE_mY.mjs.map +0 -1
@@ -1,84 +1,25 @@
1
- import { n as requiredEnvFrom, t as envFrom } from "../../env-nV5rVErX.mjs";
2
- import { t as resolveModelByName } from "../../models-pBSRUZhY.mjs";
1
+ import { t as resolveModelByName } from "../../models-CaCOUPZw.mjs";
2
+ import { n as requiredEnvFrom, t as envFrom } from "../../env-egxaJtNn.mjs";
3
3
  import process from "node:process";
4
4
  import { errorMessageFrom } from "@moeru/std";
5
5
  //#region src/plugins/chat-models/runtime-config.ts
6
- function getParameters(model) {
7
- return model.parameters ?? {};
8
- }
9
- function parseOptionalStringParameter(parameters, key, modelId) {
10
- const value = parameters[key];
11
- const normalized = value == null ? void 0 : String(value);
12
- const name = `${modelId}.parameters.${key}`;
13
- return envFrom({ [name]: normalized }, {
14
- name,
15
- type: "string"
16
- });
17
- }
18
- function parseRequiredStringParameter(parameters, key, modelId) {
19
- const value = parameters[key];
20
- const normalized = value == null ? void 0 : String(value);
21
- const name = `${modelId}.parameters.${key}`;
22
- return requiredEnvFrom({ [name]: normalized }, {
23
- name,
24
- type: "string"
25
- });
26
- }
27
- function parseHeadersParameter(parameters, modelId) {
28
- const headers = parameters.headers;
29
- if (headers == null) return;
30
- if (typeof headers !== "object" || Array.isArray(headers)) throw new TypeError(`Invalid ${modelId}.parameters.headers: expected an object.`);
31
- const normalized = {};
32
- for (const [key, value] of Object.entries(headers)) {
33
- if (typeof value === "string") {
34
- normalized[key] = value;
35
- continue;
36
- }
37
- if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
38
- normalized[key] = value;
39
- continue;
40
- }
41
- throw new Error(`Invalid ${modelId}.parameters.headers.${key}: expected string or string[].`);
42
- }
43
- return normalized;
44
- }
45
6
  /**
46
- * Normalizes one configured chat model into runtime executor config.
7
+ * Resolves Ollama runtime config from one resolved run-context model.
47
8
  *
48
9
  * Use when:
49
- * - eval code needs typed provider constructor options from a resolved model
50
- * - model parameters should be validated once with clear error messages
10
+ * - task execution already has a model resolved through chat-model helpers
11
+ * - eval code wants typed Ollama provider options with a concise helper name
51
12
  *
52
13
  * Expects:
53
- * - `model.inferenceExecutorId` to be one of the supported executor ids
54
- * - required OpenAI fields (apiKey) to exist in `model.parameters`
14
+ * - `model` to resolve to an Ollama-backed chat model
55
15
  *
56
16
  * Returns:
57
- * - validated runtime config union for OpenAI or Ollama
17
+ * - validated Ollama runtime config
58
18
  */
59
- function toChatModelRuntimeConfig(model) {
60
- const parameters = getParameters(model);
61
- if (model.inferenceExecutorId === "openai") return {
62
- apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
63
- baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
64
- headers: parseHeadersParameter(parameters, model.id),
65
- inferenceExecutor: "openai",
66
- model: model.model
67
- };
68
- if (model.inferenceExecutorId === "ollama") return {
69
- baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
70
- headers: parseHeadersParameter(parameters, model.id),
71
- inferenceExecutor: "ollama",
72
- model: model.model
73
- };
74
- if (model.inferenceExecutorId === "openrouter") return {
75
- apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
76
- baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
77
- headers: parseHeadersParameter(parameters, model.id),
78
- inferenceExecutor: "openrouter",
79
- model: model.model
80
- };
81
- throw new Error(`Unsupported chat inference executor "${model.inferenceExecutorId}" for model "${model.id}".`);
19
+ function ollamaFromRunContext(model) {
20
+ const runtimeConfig = toChatModelRuntimeConfig(model);
21
+ if (runtimeConfig.inferenceExecutor !== "ollama") throw new Error(`Expected ollama model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
22
+ return runtimeConfig;
82
23
  }
83
24
  /**
84
25
  * Resolves OpenAI runtime config from one resolved run-context model.
@@ -99,104 +40,141 @@ function openaiFromRunContext(model) {
99
40
  return runtimeConfig;
100
41
  }
101
42
  /**
102
- * Resolves Ollama runtime config from one resolved run-context model.
43
+ * Resolves OpenRouter runtime config from one resolved run-context model.
103
44
  *
104
45
  * Use when:
105
46
  * - task execution already has a model resolved through chat-model helpers
106
- * - eval code wants typed Ollama provider options with a concise helper name
47
+ * - eval code wants typed OpenRouter provider options with a concise helper name
107
48
  *
108
49
  * Expects:
109
- * - `model` to resolve to an Ollama-backed chat model
50
+ * - `model` to resolve to an OpenRouter-backed chat model
110
51
  *
111
52
  * Returns:
112
- * - validated Ollama runtime config
53
+ * - validated OpenRouter runtime config
113
54
  */
114
- function ollamaFromRunContext(model) {
55
+ function openrouterFromRunContext(model) {
115
56
  const runtimeConfig = toChatModelRuntimeConfig(model);
116
- if (runtimeConfig.inferenceExecutor !== "ollama") throw new Error(`Expected ollama model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
57
+ if (runtimeConfig.inferenceExecutor !== "openrouter") throw new Error(`Expected openrouter model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
117
58
  return runtimeConfig;
118
59
  }
119
60
  /**
120
- * Resolves OpenRouter runtime config from one resolved run-context model.
61
+ * Normalizes one configured chat model into runtime executor config.
121
62
  *
122
63
  * Use when:
123
- * - task execution already has a model resolved through chat-model helpers
124
- * - eval code wants typed OpenRouter provider options with a concise helper name
64
+ * - eval code needs typed provider constructor options from a resolved model
65
+ * - model parameters should be validated once with clear error messages
125
66
  *
126
67
  * Expects:
127
- * - `model` to resolve to an OpenRouter-backed chat model
68
+ * - `model.inferenceExecutorId` to be one of the supported executor ids
69
+ * - required OpenAI fields (apiKey) to exist in `model.parameters`
128
70
  *
129
71
  * Returns:
130
- * - validated OpenRouter runtime config
72
+ * - validated runtime config union for OpenAI or Ollama
131
73
  */
132
- function openrouterFromRunContext(model) {
133
- const runtimeConfig = toChatModelRuntimeConfig(model);
134
- if (runtimeConfig.inferenceExecutor !== "openrouter") throw new Error(`Expected openrouter model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
135
- return runtimeConfig;
74
+ function toChatModelRuntimeConfig(model) {
75
+ const parameters = getParameters(model);
76
+ if (model.inferenceExecutorId === "openai") return {
77
+ apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
78
+ baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
79
+ headers: parseHeadersParameter(parameters, model.id),
80
+ inferenceExecutor: "openai",
81
+ model: model.model
82
+ };
83
+ if (model.inferenceExecutorId === "ollama") return {
84
+ baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
85
+ headers: parseHeadersParameter(parameters, model.id),
86
+ inferenceExecutor: "ollama",
87
+ model: model.model
88
+ };
89
+ if (model.inferenceExecutorId === "openrouter") return {
90
+ apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
91
+ baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
92
+ headers: parseHeadersParameter(parameters, model.id),
93
+ inferenceExecutor: "openrouter",
94
+ model: model.model
95
+ };
96
+ throw new Error(`Unsupported chat inference executor "${model.inferenceExecutorId}" for model "${model.id}".`);
136
97
  }
137
- //#endregion
138
- //#region src/plugins/chat-models/telemetry.ts
139
- function asRecord(value) {
140
- if (value == null || typeof value !== "object") return;
141
- return value;
98
+ function getParameters(model) {
99
+ return model.parameters ?? {};
142
100
  }
143
- function parseMaybeJson(value) {
144
- if (typeof value !== "string") return value;
145
- try {
146
- return JSON.parse(value);
147
- } catch {
148
- return value;
101
+ function parseHeadersParameter(parameters, modelId) {
102
+ const headers = parameters.headers;
103
+ if (headers == null) return;
104
+ if (typeof headers !== "object" || Array.isArray(headers)) throw new TypeError(`Invalid ${modelId}.parameters.headers: expected an object.`);
105
+ const normalized = {};
106
+ for (const [key, value] of Object.entries(headers)) {
107
+ if (typeof value === "string") {
108
+ normalized[key] = value;
109
+ continue;
110
+ }
111
+ if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
112
+ normalized[key] = value;
113
+ continue;
114
+ }
115
+ throw new Error(`Invalid ${modelId}.parameters.headers.${key}: expected string or string[].`);
149
116
  }
117
+ return normalized;
118
+ }
119
+ function parseOptionalStringParameter(parameters, key, modelId) {
120
+ const value = parameters[key];
121
+ const normalized = value == null ? void 0 : String(value);
122
+ const name = `${modelId}.parameters.${key}`;
123
+ return envFrom({ [name]: normalized }, {
124
+ name,
125
+ type: "string"
126
+ });
127
+ }
128
+ function parseRequiredStringParameter(parameters, key, modelId) {
129
+ const value = parameters[key];
130
+ const normalized = value == null ? void 0 : String(value);
131
+ const name = `${modelId}.parameters.${key}`;
132
+ return requiredEnvFrom({ [name]: normalized }, {
133
+ name,
134
+ type: "string"
135
+ });
150
136
  }
137
+ //#endregion
138
+ //#region src/plugins/chat-models/telemetry.ts
151
139
  /**
152
- * Extracts normalized tool calls from one chat-model response shape.
140
+ * Emits chat-model failure telemetry as a reportable task event.
153
141
  *
154
142
  * Use when:
155
- * - downstream scoring, reporting, or analysis should inspect tool call usage
156
- * - provider payload differences should stay hidden behind one stable shape
143
+ * - one inference call fails and report artifacts should include normalized error context
157
144
  *
158
- * Returns:
159
- * - normalized list of `{ id?, name, args }` tool calls
145
+ * Expects:
146
+ * - `context.reporterHooks?.onEvent` to be available in CLI execution paths
160
147
  */
161
- function extractChatModelToolCalls(response) {
162
- const responseRecord = asRecord(response);
163
- if (responseRecord == null) return [];
164
- const rawToolCalls = responseRecord.toolCalls ?? responseRecord.tool_calls;
165
- if (!Array.isArray(rawToolCalls)) return [];
166
- const toolCalls = [];
167
- for (const rawToolCall of rawToolCalls) {
168
- const toolCallRecord = asRecord(rawToolCall);
169
- if (toolCallRecord == null) continue;
170
- const functionPayload = asRecord(toolCallRecord.function);
171
- const name = typeof toolCallRecord.name === "string" ? toolCallRecord.name : typeof functionPayload?.name === "string" ? functionPayload.name : void 0;
172
- if (name == null || name.length === 0) continue;
173
- const rawArgs = toolCallRecord.args ?? toolCallRecord.arguments ?? functionPayload?.args ?? functionPayload?.arguments;
174
- toolCalls.push({
175
- args: parseMaybeJson(rawArgs),
176
- id: typeof toolCallRecord.id === "string" ? toolCallRecord.id : void 0,
177
- name
178
- });
179
- }
180
- return toolCalls;
148
+ function emitChatModelErrorTelemetry(context, options) {
149
+ context.reporterHooks?.onEvent?.({
150
+ caseId: options.caseId,
151
+ data: {
152
+ error: errorMessageFrom(options.error) ?? "Unknown inference error.",
153
+ modality: "chat",
154
+ provider: options.provider
155
+ },
156
+ event: "InferenceError"
157
+ });
181
158
  }
182
159
  /**
183
- * Extracts numeric metering dimensions from one chat-model response usage block.
160
+ * Emits chat-model request telemetry as a reportable task event.
184
161
  *
185
162
  * Use when:
186
- * - report events should capture usage dimensions in a modality-neutral map
163
+ * - task code submits one model request and wants request-side traceability
187
164
  *
188
- * Returns:
189
- * - numeric dimensions keyed by provider usage field names
165
+ * Expects:
166
+ * - `context.reporterHooks?.onEvent` to be available in CLI execution paths
190
167
  */
191
- function extractMeteringDimensions(response) {
192
- const usage = asRecord(asRecord(response)?.usage);
193
- if (usage == null) return {};
194
- const dimensions = {};
195
- for (const [key, value] of Object.entries(usage)) {
196
- if (typeof value !== "number" || Number.isNaN(value)) continue;
197
- dimensions[key] = value;
198
- }
199
- return dimensions;
168
+ function emitChatModelRequestTelemetry(context, options) {
169
+ context.reporterHooks?.onEvent?.({
170
+ caseId: options.caseId,
171
+ data: {
172
+ data: options.data,
173
+ modality: "chat",
174
+ provider: options.provider
175
+ },
176
+ event: "InferenceRequest"
177
+ });
200
178
  }
201
179
  /**
202
180
  * Emits chat-model response telemetry as reportable task events.
@@ -252,184 +230,69 @@ function emitChatModelResponseTelemetry(context, options) {
252
230
  }
253
231
  }
254
232
  /**
255
- * Emits chat-model request telemetry as a reportable task event.
233
+ * Extracts normalized tool calls from one chat-model response shape.
256
234
  *
257
235
  * Use when:
258
- * - task code submits one model request and wants request-side traceability
236
+ * - downstream scoring, reporting, or analysis should inspect tool call usage
237
+ * - provider payload differences should stay hidden behind one stable shape
259
238
  *
260
- * Expects:
261
- * - `context.reporterHooks?.onEvent` to be available in CLI execution paths
239
+ * Returns:
240
+ * - normalized list of `{ id?, name, args }` tool calls
262
241
  */
263
- function emitChatModelRequestTelemetry(context, options) {
264
- context.reporterHooks?.onEvent?.({
265
- caseId: options.caseId,
266
- data: {
267
- data: options.data,
268
- modality: "chat",
269
- provider: options.provider
270
- },
271
- event: "InferenceRequest"
272
- });
242
+ function extractChatModelToolCalls(response) {
243
+ const responseRecord = asRecord(response);
244
+ if (responseRecord == null) return [];
245
+ const rawToolCalls = responseRecord.toolCalls ?? responseRecord.tool_calls;
246
+ if (!Array.isArray(rawToolCalls)) return [];
247
+ const toolCalls = [];
248
+ for (const rawToolCall of rawToolCalls) {
249
+ const toolCallRecord = asRecord(rawToolCall);
250
+ if (toolCallRecord == null) continue;
251
+ const functionPayload = asRecord(toolCallRecord.function);
252
+ const name = typeof toolCallRecord.name === "string" ? toolCallRecord.name : typeof functionPayload?.name === "string" ? functionPayload.name : void 0;
253
+ if (name == null || name.length === 0) continue;
254
+ const rawArgs = toolCallRecord.args ?? toolCallRecord.arguments ?? functionPayload?.args ?? functionPayload?.arguments;
255
+ toolCalls.push({
256
+ args: parseMaybeJson(rawArgs),
257
+ id: typeof toolCallRecord.id === "string" ? toolCallRecord.id : void 0,
258
+ name
259
+ });
260
+ }
261
+ return toolCalls;
273
262
  }
274
263
  /**
275
- * Emits chat-model failure telemetry as a reportable task event.
264
+ * Extracts numeric metering dimensions from one chat-model response usage block.
276
265
  *
277
266
  * Use when:
278
- * - one inference call fails and report artifacts should include normalized error context
267
+ * - report events should capture usage dimensions in a modality-neutral map
279
268
  *
280
- * Expects:
281
- * - `context.reporterHooks?.onEvent` to be available in CLI execution paths
269
+ * Returns:
270
+ * - numeric dimensions keyed by provider usage field names
282
271
  */
283
- function emitChatModelErrorTelemetry(context, options) {
284
- context.reporterHooks?.onEvent?.({
285
- caseId: options.caseId,
286
- data: {
287
- error: errorMessageFrom(options.error) ?? "Unknown inference error.",
288
- modality: "chat",
289
- provider: options.provider
290
- },
291
- event: "InferenceError"
292
- });
293
- }
294
- //#endregion
295
- //#region src/plugins/chat-models/index.ts
296
- function normalizeExecutionPolicy(policy) {
297
- if (policy == null) return;
298
- const normalized = {
299
- autoAttempt: policy.autoAttempt,
300
- autoRetry: policy.autoRetry,
301
- autoRetryDelay: policy.autoRetryDelay,
302
- timeout: policy.timeout
303
- };
304
- return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
305
- }
306
- function hasJudgeAlias(model) {
307
- return [
308
- ...model.aliases ?? [],
309
- ...model.id == null ? [] : [model.id],
310
- model.model
311
- ].some((value) => value.toLowerCase().includes("judge"));
312
- }
313
- function resolveModelExecutionPolicy(options) {
314
- const explicitPolicy = normalizeExecutionPolicy({
315
- autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
316
- autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
317
- autoRetryDelay: options.autoRetryDelay ?? options.executionPolicy?.autoRetryDelay,
318
- timeout: options.timeout ?? options.executionPolicy?.timeout
319
- });
320
- if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
321
- if (hasJudgeAlias(options)) return { autoRetry: 3 };
322
- }
323
- function normalizeInferenceExecutorId(inferenceExecutor, inferenceExecutorId) {
324
- if (typeof inferenceExecutor === "string") return inferenceExecutor;
325
- return inferenceExecutorId ?? "custom";
326
- }
327
- function createDefaultModelId(inferenceExecutorId, model) {
328
- return `${inferenceExecutorId}:${model}`;
329
- }
330
- function normalizeEnvRecord(env) {
331
- const normalized = {};
332
- for (const [key, value] of Object.entries(env)) if (typeof value === "string") normalized[key] = value;
333
- return normalized;
334
- }
335
- async function resolveChatModelResolverValue(value, context) {
336
- if (typeof value === "function") return await value(context);
337
- return value;
338
- }
339
- function resolveRequiredStringValue(value, name) {
340
- return requiredEnvFrom({ [name]: value }, {
341
- name,
342
- type: "string"
343
- });
344
- }
345
- function resolveOptionalStringValue(value, name) {
346
- return envFrom({ [name]: value }, {
347
- name,
348
- type: "string"
349
- });
350
- }
351
- function resolveOptionalEnvValue(env, envKey) {
352
- return envFrom(env, {
353
- name: envKey,
354
- type: "string"
355
- });
356
- }
357
- function resolveRequiredEnvValue(env, envKey) {
358
- return requiredEnvFrom(env, {
359
- name: envKey,
360
- type: "string"
361
- });
362
- }
363
- function resolveProviderParameters(provider, env) {
364
- const parameters = { ...provider.parameters };
365
- for (const [parameterName, envKey] of Object.entries(provider.optionalEnv ?? {})) {
366
- const resolved = resolveOptionalEnvValue(env, envKey);
367
- if (resolved != null) parameters[parameterName] = resolved;
272
+ function extractMeteringDimensions(response) {
273
+ const usage = asRecord(asRecord(response)?.usage);
274
+ if (usage == null) return {};
275
+ const dimensions = {};
276
+ for (const [key, value] of Object.entries(usage)) {
277
+ if (typeof value !== "number" || Number.isNaN(value)) continue;
278
+ dimensions[key] = value;
368
279
  }
369
- for (const [parameterName, envKey] of Object.entries(provider.requiredEnv ?? {})) parameters[parameterName] = resolveRequiredEnvValue(env, envKey);
370
- return Object.keys(parameters).length > 0 ? parameters : void 0;
371
- }
372
- function normalizeChatProviderDefinition(provider, env) {
373
- return {
374
- id: provider.id,
375
- inferenceExecutor: provider.inferenceExecutor,
376
- inferenceExecutorId: normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
377
- optionalEnv: provider.optionalEnv,
378
- parameters: resolveProviderParameters(provider, env),
379
- requiredEnv: provider.requiredEnv
380
- };
381
- }
382
- function createProviderMap(config) {
383
- const providerMap = /* @__PURE__ */ new Map();
384
- for (const provider of config.chatProviders ?? []) providerMap.set(provider.id, provider);
385
- return providerMap;
280
+ return dimensions;
386
281
  }
387
- function resolveModelProvider(model, providerMap) {
388
- if (model.provider == null) return model;
389
- const provider = providerMap.get(model.provider);
390
- if (provider == null) throw new Error(`Unknown chat provider "${model.provider}" referenced by model "${model.id}".`);
391
- return {
392
- ...model,
393
- inferenceExecutor: provider.inferenceExecutor,
394
- inferenceExecutorId: provider.inferenceExecutorId ?? normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
395
- parameters: {
396
- ...provider.parameters,
397
- ...model.parameters
398
- }
399
- };
282
+ function asRecord(value) {
283
+ if (value == null || typeof value !== "object") return;
284
+ return value;
400
285
  }
401
- async function resolveModelRuntimeResolvers(model, context) {
402
- if (model.runtimeResolvers == null) return;
403
- const resolvedParameters = {};
404
- if (model.runtimeResolvers.apiKey != null) resolvedParameters.apiKey = resolveRequiredStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.apiKey, context), `${model.id}.apiKey`);
405
- if (model.runtimeResolvers.baseURL != null) {
406
- const normalizedBaseURL = resolveOptionalStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.baseURL, context), `${model.id}.baseURL`);
407
- if (normalizedBaseURL != null) resolvedParameters.baseURL = normalizedBaseURL;
286
+ function parseMaybeJson(value) {
287
+ if (typeof value !== "string") return value;
288
+ try {
289
+ return JSON.parse(value);
290
+ } catch {
291
+ return value;
408
292
  }
409
- if (model.runtimeResolvers.headers != null) resolvedParameters.headers = await resolveChatModelResolverValue(model.runtimeResolvers.headers, context);
410
- return Object.keys(resolvedParameters).length > 0 ? resolvedParameters : void 0;
411
- }
412
- async function resolveChatModelDefinition(model, config) {
413
- const providerResolvedModel = resolveModelProvider(model, createProviderMap(config));
414
- const resolvedRuntimeParameters = await resolveModelRuntimeResolvers(providerResolvedModel, { env: normalizeEnvRecord(config.env ?? process.env) });
415
- if (resolvedRuntimeParameters == null) return providerResolvedModel;
416
- return {
417
- ...providerResolvedModel,
418
- parameters: {
419
- ...providerResolvedModel.parameters,
420
- ...resolvedRuntimeParameters
421
- }
422
- };
423
- }
424
- function isOpenAIChatModelInferenceExecutor(options) {
425
- return options.inferenceExecutor === "openai";
426
- }
427
- function isOllamaChatModelInferenceExecutor(options) {
428
- return options.inferenceExecutor === "ollama";
429
- }
430
- function isOpenRouterChatModelInferenceExecutor(options) {
431
- return options.inferenceExecutor === "openrouter";
432
293
  }
294
+ //#endregion
295
+ //#region src/plugins/chat-models/index.ts
433
296
  /**
434
297
  * Builds one normalized chat model definition.
435
298
  *
@@ -465,6 +328,41 @@ function chatModelFrom(options) {
465
328
  };
466
329
  }
467
330
  /**
331
+ * Creates a run-matrix `model` axis from configured chat model names.
332
+ *
333
+ * Use when:
334
+ * - run matrix should iterate over explicit chat model ids/aliases
335
+ * - project configs want a concise model-axis helper
336
+ *
337
+ * Expects:
338
+ * - each provided name to match a configured model id or alias at runtime
339
+ *
340
+ * Returns:
341
+ * - matrix axis object compatible with `runMatrix.extend/override`
342
+ */
343
+ function chatModelMatrix(...names) {
344
+ return { model: Array.from(new Set(names)) };
345
+ }
346
+ /**
347
+ * Built-in chat models plugin that contributes model definitions to vieval config.
348
+ *
349
+ * Use when:
350
+ * - chat-model registration should stay in config-level plugin setup
351
+ * - tasks and assertions resolve models by name or alias at runtime
352
+ */
353
+ function ChatModels(options) {
354
+ return {
355
+ async configVieval(config) {
356
+ const resolvedModels = await Promise.all(options.models.map(async (model) => resolveChatModelDefinition(model, config)));
357
+ return {
358
+ ...config,
359
+ models: [...config.models ?? [], ...resolvedModels]
360
+ };
361
+ },
362
+ name: "vieval:chat-models"
363
+ };
364
+ }
365
+ /**
468
366
  * Builds one normalized chat provider definition.
469
367
  *
470
368
  * Use when:
@@ -482,6 +380,42 @@ function chatProviderFrom(options) {
482
380
  };
483
381
  }
484
382
  /**
383
+ * Built-in chat providers plugin that contributes provider presets to config.
384
+ *
385
+ * Use when:
386
+ * - provider runtime config should be centralized and reusable
387
+ * - provider parameters should be resolved from env via `envFrom`/`requiredEnvFrom`
388
+ */
389
+ function ChatProviders(options) {
390
+ return {
391
+ configVieval(config) {
392
+ const env = config.env ?? options.env ?? process.env;
393
+ const normalizedProviders = options.providers.map((provider) => normalizeChatProviderDefinition(provider, env));
394
+ return {
395
+ ...config,
396
+ chatProviders: [...config.chatProviders ?? [], ...normalizedProviders]
397
+ };
398
+ },
399
+ name: "vieval:chat-providers"
400
+ };
401
+ }
402
+ /**
403
+ * Resolves a configured chat model from one eval-matrix axis.
404
+ *
405
+ * Use when:
406
+ * - eval matrix selects a judge, rubric, or evaluator model
407
+ * - callers want the scoped helper instead of passing `scope: 'eval'`
408
+ *
409
+ * Expects:
410
+ * - `options.axis` to exist in `context.task.matrix.eval`
411
+ *
412
+ * Returns:
413
+ * - the configured model matching the selected eval-matrix value
414
+ */
415
+ function modelFromEval(context, options) {
416
+ return modelFromMatrix(context, "eval", options);
417
+ }
418
+ /**
485
419
  * Resolves a configured chat model from one scoped matrix axis.
486
420
  *
487
421
  * Use when:
@@ -518,77 +452,143 @@ function modelFromMatrix(context, scope, options) {
518
452
  function modelFromRun(context, options) {
519
453
  return modelFromMatrix(context, "run", options);
520
454
  }
521
- /**
522
- * Resolves a configured chat model from one eval-matrix axis.
523
- *
524
- * Use when:
525
- * - eval matrix selects a judge, rubric, or evaluator model
526
- * - callers want the scoped helper instead of passing `scope: 'eval'`
527
- *
528
- * Expects:
529
- * - `options.axis` to exist in `context.task.matrix.eval`
530
- *
531
- * Returns:
532
- * - the configured model matching the selected eval-matrix value
533
- */
534
- function modelFromEval(context, options) {
535
- return modelFromMatrix(context, "eval", options);
455
+ function createDefaultModelId(inferenceExecutorId, model) {
456
+ return `${inferenceExecutorId}:${model}`;
536
457
  }
537
- /**
538
- * Creates a run-matrix `model` axis from configured chat model names.
539
- *
540
- * Use when:
541
- * - run matrix should iterate over explicit chat model ids/aliases
542
- * - project configs want a concise model-axis helper
543
- *
544
- * Expects:
545
- * - each provided name to match a configured model id or alias at runtime
546
- *
547
- * Returns:
548
- * - matrix axis object compatible with `runMatrix.extend/override`
549
- */
550
- function chatModelMatrix(...names) {
551
- return { model: Array.from(new Set(names)) };
458
+ function createProviderMap(config) {
459
+ const providerMap = /* @__PURE__ */ new Map();
460
+ for (const provider of config.chatProviders ?? []) providerMap.set(provider.id, provider);
461
+ return providerMap;
552
462
  }
553
- /**
554
- * Built-in chat providers plugin that contributes provider presets to config.
555
- *
556
- * Use when:
557
- * - provider runtime config should be centralized and reusable
558
- * - provider parameters should be resolved from env via `envFrom`/`requiredEnvFrom`
559
- */
560
- function ChatProviders(options) {
463
+ function hasJudgeAlias(model) {
464
+ return [
465
+ ...model.aliases ?? [],
466
+ ...model.id == null ? [] : [model.id],
467
+ model.model
468
+ ].some((value) => value.toLowerCase().includes("judge"));
469
+ }
470
+ function isOllamaChatModelInferenceExecutor(options) {
471
+ return options.inferenceExecutor === "ollama";
472
+ }
473
+ function isOpenAIChatModelInferenceExecutor(options) {
474
+ return options.inferenceExecutor === "openai";
475
+ }
476
+ function isOpenRouterChatModelInferenceExecutor(options) {
477
+ return options.inferenceExecutor === "openrouter";
478
+ }
479
+ function normalizeChatProviderDefinition(provider, env) {
561
480
  return {
562
- configVieval(config) {
563
- const env = config.env ?? options.env ?? process.env;
564
- const normalizedProviders = options.providers.map((provider) => normalizeChatProviderDefinition(provider, env));
565
- return {
566
- ...config,
567
- chatProviders: [...config.chatProviders ?? [], ...normalizedProviders]
568
- };
569
- },
570
- name: "vieval:chat-providers"
481
+ id: provider.id,
482
+ inferenceExecutor: provider.inferenceExecutor,
483
+ inferenceExecutorId: normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
484
+ optionalEnv: provider.optionalEnv,
485
+ parameters: resolveProviderParameters(provider, env),
486
+ requiredEnv: provider.requiredEnv
571
487
  };
572
488
  }
573
- /**
574
- * Built-in chat models plugin that contributes model definitions to vieval config.
575
- *
576
- * Use when:
577
- * - chat-model registration should stay in config-level plugin setup
578
- * - tasks and assertions resolve models by name or alias at runtime
579
- */
580
- function ChatModels(options) {
489
+ function normalizeEnvRecord(env) {
490
+ const normalized = {};
491
+ for (const [key, value] of Object.entries(env)) if (typeof value === "string") normalized[key] = value;
492
+ return normalized;
493
+ }
494
+ function normalizeExecutionPolicy(policy) {
495
+ if (policy == null) return;
496
+ const normalized = {
497
+ autoAttempt: policy.autoAttempt,
498
+ autoRetry: policy.autoRetry,
499
+ autoRetryDelay: policy.autoRetryDelay,
500
+ timeout: policy.timeout
501
+ };
502
+ return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
503
+ }
504
+ function normalizeInferenceExecutorId(inferenceExecutor, inferenceExecutorId) {
505
+ if (typeof inferenceExecutor === "string") return inferenceExecutor;
506
+ return inferenceExecutorId ?? "custom";
507
+ }
508
+ async function resolveChatModelDefinition(model, config) {
509
+ const providerResolvedModel = resolveModelProvider(model, createProviderMap(config));
510
+ const resolvedRuntimeParameters = await resolveModelRuntimeResolvers(providerResolvedModel, { env: normalizeEnvRecord(config.env ?? process.env) });
511
+ if (resolvedRuntimeParameters == null) return providerResolvedModel;
581
512
  return {
582
- async configVieval(config) {
583
- const resolvedModels = await Promise.all(options.models.map(async (model) => resolveChatModelDefinition(model, config)));
584
- return {
585
- ...config,
586
- models: [...config.models ?? [], ...resolvedModels]
587
- };
588
- },
589
- name: "vieval:chat-models"
513
+ ...providerResolvedModel,
514
+ parameters: {
515
+ ...providerResolvedModel.parameters,
516
+ ...resolvedRuntimeParameters
517
+ }
590
518
  };
591
519
  }
520
+ async function resolveChatModelResolverValue(value, context) {
521
+ if (typeof value === "function") return await value(context);
522
+ return value;
523
+ }
524
+ function resolveModelExecutionPolicy(options) {
525
+ const explicitPolicy = normalizeExecutionPolicy({
526
+ autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
527
+ autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
528
+ autoRetryDelay: options.autoRetryDelay ?? options.executionPolicy?.autoRetryDelay,
529
+ timeout: options.timeout ?? options.executionPolicy?.timeout
530
+ });
531
+ if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
532
+ if (hasJudgeAlias(options)) return { autoRetry: 3 };
533
+ }
534
+ function resolveModelProvider(model, providerMap) {
535
+ if (model.provider == null) return model;
536
+ const provider = providerMap.get(model.provider);
537
+ if (provider == null) throw new Error(`Unknown chat provider "${model.provider}" referenced by model "${model.id}".`);
538
+ return {
539
+ ...model,
540
+ inferenceExecutor: provider.inferenceExecutor,
541
+ inferenceExecutorId: provider.inferenceExecutorId ?? normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
542
+ parameters: {
543
+ ...provider.parameters,
544
+ ...model.parameters
545
+ }
546
+ };
547
+ }
548
+ async function resolveModelRuntimeResolvers(model, context) {
549
+ if (model.runtimeResolvers == null) return;
550
+ const resolvedParameters = {};
551
+ if (model.runtimeResolvers.apiKey != null) resolvedParameters.apiKey = resolveRequiredStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.apiKey, context), `${model.id}.apiKey`);
552
+ if (model.runtimeResolvers.baseURL != null) {
553
+ const normalizedBaseURL = resolveOptionalStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.baseURL, context), `${model.id}.baseURL`);
554
+ if (normalizedBaseURL != null) resolvedParameters.baseURL = normalizedBaseURL;
555
+ }
556
+ if (model.runtimeResolvers.headers != null) resolvedParameters.headers = await resolveChatModelResolverValue(model.runtimeResolvers.headers, context);
557
+ return Object.keys(resolvedParameters).length > 0 ? resolvedParameters : void 0;
558
+ }
559
+ function resolveOptionalEnvValue(env, envKey) {
560
+ return envFrom(env, {
561
+ name: envKey,
562
+ type: "string"
563
+ });
564
+ }
565
+ function resolveOptionalStringValue(value, name) {
566
+ return envFrom({ [name]: value }, {
567
+ name,
568
+ type: "string"
569
+ });
570
+ }
571
+ function resolveProviderParameters(provider, env) {
572
+ const parameters = { ...provider.parameters };
573
+ for (const [parameterName, envKey] of Object.entries(provider.optionalEnv ?? {})) {
574
+ const resolved = resolveOptionalEnvValue(env, envKey);
575
+ if (resolved != null) parameters[parameterName] = resolved;
576
+ }
577
+ for (const [parameterName, envKey] of Object.entries(provider.requiredEnv ?? {})) parameters[parameterName] = resolveRequiredEnvValue(env, envKey);
578
+ return Object.keys(parameters).length > 0 ? parameters : void 0;
579
+ }
580
+ function resolveRequiredEnvValue(env, envKey) {
581
+ return requiredEnvFrom(env, {
582
+ name: envKey,
583
+ type: "string"
584
+ });
585
+ }
586
+ function resolveRequiredStringValue(value, name) {
587
+ return requiredEnvFrom({ [name]: value }, {
588
+ name,
589
+ type: "string"
590
+ });
591
+ }
592
592
  //#endregion
593
593
  export { ChatModels, ChatProviders, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, modelFromEval, modelFromMatrix, modelFromRun, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
594
594