vieval 0.0.10 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -31
- package/dist/bin/vieval.mjs +1 -1
- package/dist/bin/vieval.mjs.map +1 -1
- package/dist/cli/index.d.mts +1 -1
- package/dist/cli/index.mjs +1 -1
- package/dist/{cli-DTDgaqeI.mjs → cli-uzS81IPd.mjs} +1483 -1483
- package/dist/cli-uzS81IPd.mjs.map +1 -0
- package/dist/config.d.mts +1 -1
- package/dist/config.mjs +1 -1
- package/dist/config.mjs.map +1 -1
- package/dist/core/assertions/index.d.mts +156 -156
- package/dist/core/assertions/index.mjs +82 -82
- package/dist/core/assertions/index.mjs.map +1 -1
- package/dist/core/inference-executors/index.d.mts +37 -37
- package/dist/core/inference-executors/index.mjs +54 -53
- package/dist/core/inference-executors/index.mjs.map +1 -1
- package/dist/core/processors/results/index.d.mts +18 -18
- package/dist/core/processors/results/index.mjs.map +1 -1
- package/dist/core/runner/index.d.mts +2 -2
- package/dist/core/runner/index.mjs +259 -259
- package/dist/core/runner/index.mjs.map +1 -1
- package/dist/core/scheduler/index.d.mts +1 -1
- package/dist/core/scheduler/index.mjs +65 -65
- package/dist/core/scheduler/index.mjs.map +1 -1
- package/dist/{env-DfWZy_n4.d.mts → env-Br6jaWGL.d.mts} +9 -9
- package/dist/{env-nV5rVErX.mjs → env-egxaJtNn.mjs} +8 -8
- package/dist/env-egxaJtNn.mjs.map +1 -0
- package/dist/{expect-extensions-DCSqlneN.mjs → expect-extensions-BKdEPt3h.mjs} +46 -46
- package/dist/expect-extensions-BKdEPt3h.mjs.map +1 -0
- package/dist/expect.d.mts +1 -3
- package/dist/expect.mjs +1 -1
- package/dist/expect.mjs.map +1 -1
- package/dist/{index-D_aMeWqO.d.mts → index-BLIlhiWT.d.mts} +565 -565
- package/dist/{index-Bg0atWBF.d.mts → index-CIaJClcC.d.mts} +48 -48
- package/dist/index.d.mts +208 -197
- package/dist/index.mjs +148 -148
- package/dist/index.mjs.map +1 -1
- package/dist/{models-pBSRUZhY.mjs → models-CaCOUPZw.mjs} +1 -1
- package/dist/{models-pBSRUZhY.mjs.map → models-CaCOUPZw.mjs.map} +1 -1
- package/dist/plugins/chat-models/index.d.mts +279 -279
- package/dist/plugins/chat-models/index.mjs +360 -360
- package/dist/plugins/chat-models/index.mjs.map +1 -1
- package/dist/{queue-DsZQkZO_.mjs → queue-BL86z2W_.mjs} +1 -1
- package/dist/{queue-DsZQkZO_.mjs.map → queue-BL86z2W_.mjs.map} +1 -1
- package/dist/{registry-DMnwE_mY.mjs → registry-BK7k6X81.mjs} +294 -294
- package/dist/registry-BK7k6X81.mjs.map +1 -0
- package/dist/testing/expect-extensions.d.mts +27 -27
- package/dist/testing/expect-extensions.mjs +1 -1
- package/package.json +12 -12
- package/dist/cli-DTDgaqeI.mjs.map +0 -1
- package/dist/env-nV5rVErX.mjs.map +0 -1
- package/dist/expect-extensions-DCSqlneN.mjs.map +0 -1
- package/dist/registry-DMnwE_mY.mjs.map +0 -1
|
@@ -1,84 +1,25 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { t as
|
|
1
|
+
import { t as resolveModelByName } from "../../models-CaCOUPZw.mjs";
|
|
2
|
+
import { n as requiredEnvFrom, t as envFrom } from "../../env-egxaJtNn.mjs";
|
|
3
3
|
import process from "node:process";
|
|
4
4
|
import { errorMessageFrom } from "@moeru/std";
|
|
5
5
|
//#region src/plugins/chat-models/runtime-config.ts
|
|
6
|
-
function getParameters(model) {
|
|
7
|
-
return model.parameters ?? {};
|
|
8
|
-
}
|
|
9
|
-
function parseOptionalStringParameter(parameters, key, modelId) {
|
|
10
|
-
const value = parameters[key];
|
|
11
|
-
const normalized = value == null ? void 0 : String(value);
|
|
12
|
-
const name = `${modelId}.parameters.${key}`;
|
|
13
|
-
return envFrom({ [name]: normalized }, {
|
|
14
|
-
name,
|
|
15
|
-
type: "string"
|
|
16
|
-
});
|
|
17
|
-
}
|
|
18
|
-
function parseRequiredStringParameter(parameters, key, modelId) {
|
|
19
|
-
const value = parameters[key];
|
|
20
|
-
const normalized = value == null ? void 0 : String(value);
|
|
21
|
-
const name = `${modelId}.parameters.${key}`;
|
|
22
|
-
return requiredEnvFrom({ [name]: normalized }, {
|
|
23
|
-
name,
|
|
24
|
-
type: "string"
|
|
25
|
-
});
|
|
26
|
-
}
|
|
27
|
-
function parseHeadersParameter(parameters, modelId) {
|
|
28
|
-
const headers = parameters.headers;
|
|
29
|
-
if (headers == null) return;
|
|
30
|
-
if (typeof headers !== "object" || Array.isArray(headers)) throw new TypeError(`Invalid ${modelId}.parameters.headers: expected an object.`);
|
|
31
|
-
const normalized = {};
|
|
32
|
-
for (const [key, value] of Object.entries(headers)) {
|
|
33
|
-
if (typeof value === "string") {
|
|
34
|
-
normalized[key] = value;
|
|
35
|
-
continue;
|
|
36
|
-
}
|
|
37
|
-
if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
|
|
38
|
-
normalized[key] = value;
|
|
39
|
-
continue;
|
|
40
|
-
}
|
|
41
|
-
throw new Error(`Invalid ${modelId}.parameters.headers.${key}: expected string or string[].`);
|
|
42
|
-
}
|
|
43
|
-
return normalized;
|
|
44
|
-
}
|
|
45
6
|
/**
|
|
46
|
-
*
|
|
7
|
+
* Resolves Ollama runtime config from one resolved run-context model.
|
|
47
8
|
*
|
|
48
9
|
* Use when:
|
|
49
|
-
* -
|
|
50
|
-
* -
|
|
10
|
+
* - task execution already has a model resolved through chat-model helpers
|
|
11
|
+
* - eval code wants typed Ollama provider options with a concise helper name
|
|
51
12
|
*
|
|
52
13
|
* Expects:
|
|
53
|
-
* - `model
|
|
54
|
-
* - required OpenAI fields (apiKey) to exist in `model.parameters`
|
|
14
|
+
* - `model` to resolve to an Ollama-backed chat model
|
|
55
15
|
*
|
|
56
16
|
* Returns:
|
|
57
|
-
* - validated runtime config
|
|
17
|
+
* - validated Ollama runtime config
|
|
58
18
|
*/
|
|
59
|
-
function
|
|
60
|
-
const
|
|
61
|
-
if (
|
|
62
|
-
|
|
63
|
-
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
64
|
-
headers: parseHeadersParameter(parameters, model.id),
|
|
65
|
-
inferenceExecutor: "openai",
|
|
66
|
-
model: model.model
|
|
67
|
-
};
|
|
68
|
-
if (model.inferenceExecutorId === "ollama") return {
|
|
69
|
-
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
70
|
-
headers: parseHeadersParameter(parameters, model.id),
|
|
71
|
-
inferenceExecutor: "ollama",
|
|
72
|
-
model: model.model
|
|
73
|
-
};
|
|
74
|
-
if (model.inferenceExecutorId === "openrouter") return {
|
|
75
|
-
apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
|
|
76
|
-
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
77
|
-
headers: parseHeadersParameter(parameters, model.id),
|
|
78
|
-
inferenceExecutor: "openrouter",
|
|
79
|
-
model: model.model
|
|
80
|
-
};
|
|
81
|
-
throw new Error(`Unsupported chat inference executor "${model.inferenceExecutorId}" for model "${model.id}".`);
|
|
19
|
+
function ollamaFromRunContext(model) {
|
|
20
|
+
const runtimeConfig = toChatModelRuntimeConfig(model);
|
|
21
|
+
if (runtimeConfig.inferenceExecutor !== "ollama") throw new Error(`Expected ollama model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
|
|
22
|
+
return runtimeConfig;
|
|
82
23
|
}
|
|
83
24
|
/**
|
|
84
25
|
* Resolves OpenAI runtime config from one resolved run-context model.
|
|
@@ -99,104 +40,141 @@ function openaiFromRunContext(model) {
|
|
|
99
40
|
return runtimeConfig;
|
|
100
41
|
}
|
|
101
42
|
/**
|
|
102
|
-
* Resolves
|
|
43
|
+
* Resolves OpenRouter runtime config from one resolved run-context model.
|
|
103
44
|
*
|
|
104
45
|
* Use when:
|
|
105
46
|
* - task execution already has a model resolved through chat-model helpers
|
|
106
|
-
* - eval code wants typed
|
|
47
|
+
* - eval code wants typed OpenRouter provider options with a concise helper name
|
|
107
48
|
*
|
|
108
49
|
* Expects:
|
|
109
|
-
* - `model` to resolve to an
|
|
50
|
+
* - `model` to resolve to an OpenRouter-backed chat model
|
|
110
51
|
*
|
|
111
52
|
* Returns:
|
|
112
|
-
* - validated
|
|
53
|
+
* - validated OpenRouter runtime config
|
|
113
54
|
*/
|
|
114
|
-
function
|
|
55
|
+
function openrouterFromRunContext(model) {
|
|
115
56
|
const runtimeConfig = toChatModelRuntimeConfig(model);
|
|
116
|
-
if (runtimeConfig.inferenceExecutor !== "
|
|
57
|
+
if (runtimeConfig.inferenceExecutor !== "openrouter") throw new Error(`Expected openrouter model, got "${runtimeConfig.inferenceExecutor}" for "${model.id}".`);
|
|
117
58
|
return runtimeConfig;
|
|
118
59
|
}
|
|
119
60
|
/**
|
|
120
|
-
*
|
|
61
|
+
* Normalizes one configured chat model into runtime executor config.
|
|
121
62
|
*
|
|
122
63
|
* Use when:
|
|
123
|
-
* -
|
|
124
|
-
* -
|
|
64
|
+
* - eval code needs typed provider constructor options from a resolved model
|
|
65
|
+
* - model parameters should be validated once with clear error messages
|
|
125
66
|
*
|
|
126
67
|
* Expects:
|
|
127
|
-
* - `model` to
|
|
68
|
+
* - `model.inferenceExecutorId` to be one of the supported executor ids
|
|
69
|
+
* - required OpenAI fields (apiKey) to exist in `model.parameters`
|
|
128
70
|
*
|
|
129
71
|
* Returns:
|
|
130
|
-
* - validated
|
|
72
|
+
* - validated runtime config union for OpenAI or Ollama
|
|
131
73
|
*/
|
|
132
|
-
function
|
|
133
|
-
const
|
|
134
|
-
if (
|
|
135
|
-
|
|
74
|
+
function toChatModelRuntimeConfig(model) {
|
|
75
|
+
const parameters = getParameters(model);
|
|
76
|
+
if (model.inferenceExecutorId === "openai") return {
|
|
77
|
+
apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
|
|
78
|
+
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
79
|
+
headers: parseHeadersParameter(parameters, model.id),
|
|
80
|
+
inferenceExecutor: "openai",
|
|
81
|
+
model: model.model
|
|
82
|
+
};
|
|
83
|
+
if (model.inferenceExecutorId === "ollama") return {
|
|
84
|
+
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
85
|
+
headers: parseHeadersParameter(parameters, model.id),
|
|
86
|
+
inferenceExecutor: "ollama",
|
|
87
|
+
model: model.model
|
|
88
|
+
};
|
|
89
|
+
if (model.inferenceExecutorId === "openrouter") return {
|
|
90
|
+
apiKey: parseRequiredStringParameter(parameters, "apiKey", model.id),
|
|
91
|
+
baseURL: parseOptionalStringParameter(parameters, "baseURL", model.id),
|
|
92
|
+
headers: parseHeadersParameter(parameters, model.id),
|
|
93
|
+
inferenceExecutor: "openrouter",
|
|
94
|
+
model: model.model
|
|
95
|
+
};
|
|
96
|
+
throw new Error(`Unsupported chat inference executor "${model.inferenceExecutorId}" for model "${model.id}".`);
|
|
136
97
|
}
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
function asRecord(value) {
|
|
140
|
-
if (value == null || typeof value !== "object") return;
|
|
141
|
-
return value;
|
|
98
|
+
function getParameters(model) {
|
|
99
|
+
return model.parameters ?? {};
|
|
142
100
|
}
|
|
143
|
-
function
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
101
|
+
function parseHeadersParameter(parameters, modelId) {
|
|
102
|
+
const headers = parameters.headers;
|
|
103
|
+
if (headers == null) return;
|
|
104
|
+
if (typeof headers !== "object" || Array.isArray(headers)) throw new TypeError(`Invalid ${modelId}.parameters.headers: expected an object.`);
|
|
105
|
+
const normalized = {};
|
|
106
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
107
|
+
if (typeof value === "string") {
|
|
108
|
+
normalized[key] = value;
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
if (Array.isArray(value) && value.every((item) => typeof item === "string")) {
|
|
112
|
+
normalized[key] = value;
|
|
113
|
+
continue;
|
|
114
|
+
}
|
|
115
|
+
throw new Error(`Invalid ${modelId}.parameters.headers.${key}: expected string or string[].`);
|
|
149
116
|
}
|
|
117
|
+
return normalized;
|
|
118
|
+
}
|
|
119
|
+
function parseOptionalStringParameter(parameters, key, modelId) {
|
|
120
|
+
const value = parameters[key];
|
|
121
|
+
const normalized = value == null ? void 0 : String(value);
|
|
122
|
+
const name = `${modelId}.parameters.${key}`;
|
|
123
|
+
return envFrom({ [name]: normalized }, {
|
|
124
|
+
name,
|
|
125
|
+
type: "string"
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
function parseRequiredStringParameter(parameters, key, modelId) {
|
|
129
|
+
const value = parameters[key];
|
|
130
|
+
const normalized = value == null ? void 0 : String(value);
|
|
131
|
+
const name = `${modelId}.parameters.${key}`;
|
|
132
|
+
return requiredEnvFrom({ [name]: normalized }, {
|
|
133
|
+
name,
|
|
134
|
+
type: "string"
|
|
135
|
+
});
|
|
150
136
|
}
|
|
137
|
+
//#endregion
|
|
138
|
+
//#region src/plugins/chat-models/telemetry.ts
|
|
151
139
|
/**
|
|
152
|
-
*
|
|
140
|
+
* Emits chat-model failure telemetry as a reportable task event.
|
|
153
141
|
*
|
|
154
142
|
* Use when:
|
|
155
|
-
* -
|
|
156
|
-
* - provider payload differences should stay hidden behind one stable shape
|
|
143
|
+
* - one inference call fails and report artifacts should include normalized error context
|
|
157
144
|
*
|
|
158
|
-
*
|
|
159
|
-
* -
|
|
145
|
+
* Expects:
|
|
146
|
+
* - `context.reporterHooks?.onEvent` to be available in CLI execution paths
|
|
160
147
|
*/
|
|
161
|
-
function
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
const name = typeof toolCallRecord.name === "string" ? toolCallRecord.name : typeof functionPayload?.name === "string" ? functionPayload.name : void 0;
|
|
172
|
-
if (name == null || name.length === 0) continue;
|
|
173
|
-
const rawArgs = toolCallRecord.args ?? toolCallRecord.arguments ?? functionPayload?.args ?? functionPayload?.arguments;
|
|
174
|
-
toolCalls.push({
|
|
175
|
-
args: parseMaybeJson(rawArgs),
|
|
176
|
-
id: typeof toolCallRecord.id === "string" ? toolCallRecord.id : void 0,
|
|
177
|
-
name
|
|
178
|
-
});
|
|
179
|
-
}
|
|
180
|
-
return toolCalls;
|
|
148
|
+
function emitChatModelErrorTelemetry(context, options) {
|
|
149
|
+
context.reporterHooks?.onEvent?.({
|
|
150
|
+
caseId: options.caseId,
|
|
151
|
+
data: {
|
|
152
|
+
error: errorMessageFrom(options.error) ?? "Unknown inference error.",
|
|
153
|
+
modality: "chat",
|
|
154
|
+
provider: options.provider
|
|
155
|
+
},
|
|
156
|
+
event: "InferenceError"
|
|
157
|
+
});
|
|
181
158
|
}
|
|
182
159
|
/**
|
|
183
|
-
*
|
|
160
|
+
* Emits chat-model request telemetry as a reportable task event.
|
|
184
161
|
*
|
|
185
162
|
* Use when:
|
|
186
|
-
* -
|
|
163
|
+
* - task code submits one model request and wants request-side traceability
|
|
187
164
|
*
|
|
188
|
-
*
|
|
189
|
-
* -
|
|
165
|
+
* Expects:
|
|
166
|
+
* - `context.reporterHooks?.onEvent` to be available in CLI execution paths
|
|
190
167
|
*/
|
|
191
|
-
function
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
168
|
+
function emitChatModelRequestTelemetry(context, options) {
|
|
169
|
+
context.reporterHooks?.onEvent?.({
|
|
170
|
+
caseId: options.caseId,
|
|
171
|
+
data: {
|
|
172
|
+
data: options.data,
|
|
173
|
+
modality: "chat",
|
|
174
|
+
provider: options.provider
|
|
175
|
+
},
|
|
176
|
+
event: "InferenceRequest"
|
|
177
|
+
});
|
|
200
178
|
}
|
|
201
179
|
/**
|
|
202
180
|
* Emits chat-model response telemetry as reportable task events.
|
|
@@ -252,184 +230,69 @@ function emitChatModelResponseTelemetry(context, options) {
|
|
|
252
230
|
}
|
|
253
231
|
}
|
|
254
232
|
/**
|
|
255
|
-
*
|
|
233
|
+
* Extracts normalized tool calls from one chat-model response shape.
|
|
256
234
|
*
|
|
257
235
|
* Use when:
|
|
258
|
-
* -
|
|
236
|
+
* - downstream scoring, reporting, or analysis should inspect tool call usage
|
|
237
|
+
* - provider payload differences should stay hidden behind one stable shape
|
|
259
238
|
*
|
|
260
|
-
*
|
|
261
|
-
* - `
|
|
239
|
+
* Returns:
|
|
240
|
+
* - normalized list of `{ id?, name, args }` tool calls
|
|
262
241
|
*/
|
|
263
|
-
function
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
242
|
+
function extractChatModelToolCalls(response) {
|
|
243
|
+
const responseRecord = asRecord(response);
|
|
244
|
+
if (responseRecord == null) return [];
|
|
245
|
+
const rawToolCalls = responseRecord.toolCalls ?? responseRecord.tool_calls;
|
|
246
|
+
if (!Array.isArray(rawToolCalls)) return [];
|
|
247
|
+
const toolCalls = [];
|
|
248
|
+
for (const rawToolCall of rawToolCalls) {
|
|
249
|
+
const toolCallRecord = asRecord(rawToolCall);
|
|
250
|
+
if (toolCallRecord == null) continue;
|
|
251
|
+
const functionPayload = asRecord(toolCallRecord.function);
|
|
252
|
+
const name = typeof toolCallRecord.name === "string" ? toolCallRecord.name : typeof functionPayload?.name === "string" ? functionPayload.name : void 0;
|
|
253
|
+
if (name == null || name.length === 0) continue;
|
|
254
|
+
const rawArgs = toolCallRecord.args ?? toolCallRecord.arguments ?? functionPayload?.args ?? functionPayload?.arguments;
|
|
255
|
+
toolCalls.push({
|
|
256
|
+
args: parseMaybeJson(rawArgs),
|
|
257
|
+
id: typeof toolCallRecord.id === "string" ? toolCallRecord.id : void 0,
|
|
258
|
+
name
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
return toolCalls;
|
|
273
262
|
}
|
|
274
263
|
/**
|
|
275
|
-
*
|
|
264
|
+
* Extracts numeric metering dimensions from one chat-model response usage block.
|
|
276
265
|
*
|
|
277
266
|
* Use when:
|
|
278
|
-
* -
|
|
267
|
+
* - report events should capture usage dimensions in a modality-neutral map
|
|
279
268
|
*
|
|
280
|
-
*
|
|
281
|
-
* -
|
|
269
|
+
* Returns:
|
|
270
|
+
* - numeric dimensions keyed by provider usage field names
|
|
282
271
|
*/
|
|
283
|
-
function
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
},
|
|
291
|
-
event: "InferenceError"
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
//#endregion
|
|
295
|
-
//#region src/plugins/chat-models/index.ts
|
|
296
|
-
function normalizeExecutionPolicy(policy) {
|
|
297
|
-
if (policy == null) return;
|
|
298
|
-
const normalized = {
|
|
299
|
-
autoAttempt: policy.autoAttempt,
|
|
300
|
-
autoRetry: policy.autoRetry,
|
|
301
|
-
autoRetryDelay: policy.autoRetryDelay,
|
|
302
|
-
timeout: policy.timeout
|
|
303
|
-
};
|
|
304
|
-
return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
|
|
305
|
-
}
|
|
306
|
-
function hasJudgeAlias(model) {
|
|
307
|
-
return [
|
|
308
|
-
...model.aliases ?? [],
|
|
309
|
-
...model.id == null ? [] : [model.id],
|
|
310
|
-
model.model
|
|
311
|
-
].some((value) => value.toLowerCase().includes("judge"));
|
|
312
|
-
}
|
|
313
|
-
function resolveModelExecutionPolicy(options) {
|
|
314
|
-
const explicitPolicy = normalizeExecutionPolicy({
|
|
315
|
-
autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
|
|
316
|
-
autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
|
|
317
|
-
autoRetryDelay: options.autoRetryDelay ?? options.executionPolicy?.autoRetryDelay,
|
|
318
|
-
timeout: options.timeout ?? options.executionPolicy?.timeout
|
|
319
|
-
});
|
|
320
|
-
if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
|
|
321
|
-
if (hasJudgeAlias(options)) return { autoRetry: 3 };
|
|
322
|
-
}
|
|
323
|
-
function normalizeInferenceExecutorId(inferenceExecutor, inferenceExecutorId) {
|
|
324
|
-
if (typeof inferenceExecutor === "string") return inferenceExecutor;
|
|
325
|
-
return inferenceExecutorId ?? "custom";
|
|
326
|
-
}
|
|
327
|
-
function createDefaultModelId(inferenceExecutorId, model) {
|
|
328
|
-
return `${inferenceExecutorId}:${model}`;
|
|
329
|
-
}
|
|
330
|
-
function normalizeEnvRecord(env) {
|
|
331
|
-
const normalized = {};
|
|
332
|
-
for (const [key, value] of Object.entries(env)) if (typeof value === "string") normalized[key] = value;
|
|
333
|
-
return normalized;
|
|
334
|
-
}
|
|
335
|
-
async function resolveChatModelResolverValue(value, context) {
|
|
336
|
-
if (typeof value === "function") return await value(context);
|
|
337
|
-
return value;
|
|
338
|
-
}
|
|
339
|
-
function resolveRequiredStringValue(value, name) {
|
|
340
|
-
return requiredEnvFrom({ [name]: value }, {
|
|
341
|
-
name,
|
|
342
|
-
type: "string"
|
|
343
|
-
});
|
|
344
|
-
}
|
|
345
|
-
function resolveOptionalStringValue(value, name) {
|
|
346
|
-
return envFrom({ [name]: value }, {
|
|
347
|
-
name,
|
|
348
|
-
type: "string"
|
|
349
|
-
});
|
|
350
|
-
}
|
|
351
|
-
function resolveOptionalEnvValue(env, envKey) {
|
|
352
|
-
return envFrom(env, {
|
|
353
|
-
name: envKey,
|
|
354
|
-
type: "string"
|
|
355
|
-
});
|
|
356
|
-
}
|
|
357
|
-
function resolveRequiredEnvValue(env, envKey) {
|
|
358
|
-
return requiredEnvFrom(env, {
|
|
359
|
-
name: envKey,
|
|
360
|
-
type: "string"
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
function resolveProviderParameters(provider, env) {
|
|
364
|
-
const parameters = { ...provider.parameters };
|
|
365
|
-
for (const [parameterName, envKey] of Object.entries(provider.optionalEnv ?? {})) {
|
|
366
|
-
const resolved = resolveOptionalEnvValue(env, envKey);
|
|
367
|
-
if (resolved != null) parameters[parameterName] = resolved;
|
|
272
|
+
function extractMeteringDimensions(response) {
|
|
273
|
+
const usage = asRecord(asRecord(response)?.usage);
|
|
274
|
+
if (usage == null) return {};
|
|
275
|
+
const dimensions = {};
|
|
276
|
+
for (const [key, value] of Object.entries(usage)) {
|
|
277
|
+
if (typeof value !== "number" || Number.isNaN(value)) continue;
|
|
278
|
+
dimensions[key] = value;
|
|
368
279
|
}
|
|
369
|
-
|
|
370
|
-
return Object.keys(parameters).length > 0 ? parameters : void 0;
|
|
371
|
-
}
|
|
372
|
-
function normalizeChatProviderDefinition(provider, env) {
|
|
373
|
-
return {
|
|
374
|
-
id: provider.id,
|
|
375
|
-
inferenceExecutor: provider.inferenceExecutor,
|
|
376
|
-
inferenceExecutorId: normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
|
|
377
|
-
optionalEnv: provider.optionalEnv,
|
|
378
|
-
parameters: resolveProviderParameters(provider, env),
|
|
379
|
-
requiredEnv: provider.requiredEnv
|
|
380
|
-
};
|
|
381
|
-
}
|
|
382
|
-
function createProviderMap(config) {
|
|
383
|
-
const providerMap = /* @__PURE__ */ new Map();
|
|
384
|
-
for (const provider of config.chatProviders ?? []) providerMap.set(provider.id, provider);
|
|
385
|
-
return providerMap;
|
|
280
|
+
return dimensions;
|
|
386
281
|
}
|
|
387
|
-
function
|
|
388
|
-
if (
|
|
389
|
-
|
|
390
|
-
if (provider == null) throw new Error(`Unknown chat provider "${model.provider}" referenced by model "${model.id}".`);
|
|
391
|
-
return {
|
|
392
|
-
...model,
|
|
393
|
-
inferenceExecutor: provider.inferenceExecutor,
|
|
394
|
-
inferenceExecutorId: provider.inferenceExecutorId ?? normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
|
|
395
|
-
parameters: {
|
|
396
|
-
...provider.parameters,
|
|
397
|
-
...model.parameters
|
|
398
|
-
}
|
|
399
|
-
};
|
|
282
|
+
function asRecord(value) {
|
|
283
|
+
if (value == null || typeof value !== "object") return;
|
|
284
|
+
return value;
|
|
400
285
|
}
|
|
401
|
-
|
|
402
|
-
if (
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
if (normalizedBaseURL != null) resolvedParameters.baseURL = normalizedBaseURL;
|
|
286
|
+
function parseMaybeJson(value) {
|
|
287
|
+
if (typeof value !== "string") return value;
|
|
288
|
+
try {
|
|
289
|
+
return JSON.parse(value);
|
|
290
|
+
} catch {
|
|
291
|
+
return value;
|
|
408
292
|
}
|
|
409
|
-
if (model.runtimeResolvers.headers != null) resolvedParameters.headers = await resolveChatModelResolverValue(model.runtimeResolvers.headers, context);
|
|
410
|
-
return Object.keys(resolvedParameters).length > 0 ? resolvedParameters : void 0;
|
|
411
|
-
}
|
|
412
|
-
async function resolveChatModelDefinition(model, config) {
|
|
413
|
-
const providerResolvedModel = resolveModelProvider(model, createProviderMap(config));
|
|
414
|
-
const resolvedRuntimeParameters = await resolveModelRuntimeResolvers(providerResolvedModel, { env: normalizeEnvRecord(config.env ?? process.env) });
|
|
415
|
-
if (resolvedRuntimeParameters == null) return providerResolvedModel;
|
|
416
|
-
return {
|
|
417
|
-
...providerResolvedModel,
|
|
418
|
-
parameters: {
|
|
419
|
-
...providerResolvedModel.parameters,
|
|
420
|
-
...resolvedRuntimeParameters
|
|
421
|
-
}
|
|
422
|
-
};
|
|
423
|
-
}
|
|
424
|
-
function isOpenAIChatModelInferenceExecutor(options) {
|
|
425
|
-
return options.inferenceExecutor === "openai";
|
|
426
|
-
}
|
|
427
|
-
function isOllamaChatModelInferenceExecutor(options) {
|
|
428
|
-
return options.inferenceExecutor === "ollama";
|
|
429
|
-
}
|
|
430
|
-
function isOpenRouterChatModelInferenceExecutor(options) {
|
|
431
|
-
return options.inferenceExecutor === "openrouter";
|
|
432
293
|
}
|
|
294
|
+
//#endregion
|
|
295
|
+
//#region src/plugins/chat-models/index.ts
|
|
433
296
|
/**
|
|
434
297
|
* Builds one normalized chat model definition.
|
|
435
298
|
*
|
|
@@ -465,6 +328,41 @@ function chatModelFrom(options) {
|
|
|
465
328
|
};
|
|
466
329
|
}
|
|
467
330
|
/**
|
|
331
|
+
* Creates a run-matrix `model` axis from configured chat model names.
|
|
332
|
+
*
|
|
333
|
+
* Use when:
|
|
334
|
+
* - run matrix should iterate over explicit chat model ids/aliases
|
|
335
|
+
* - project configs want a concise model-axis helper
|
|
336
|
+
*
|
|
337
|
+
* Expects:
|
|
338
|
+
* - each provided name to match a configured model id or alias at runtime
|
|
339
|
+
*
|
|
340
|
+
* Returns:
|
|
341
|
+
* - matrix axis object compatible with `runMatrix.extend/override`
|
|
342
|
+
*/
|
|
343
|
+
function chatModelMatrix(...names) {
|
|
344
|
+
return { model: Array.from(new Set(names)) };
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* Built-in chat models plugin that contributes model definitions to vieval config.
|
|
348
|
+
*
|
|
349
|
+
* Use when:
|
|
350
|
+
* - chat-model registration should stay in config-level plugin setup
|
|
351
|
+
* - tasks and assertions resolve models by name or alias at runtime
|
|
352
|
+
*/
|
|
353
|
+
function ChatModels(options) {
|
|
354
|
+
return {
|
|
355
|
+
async configVieval(config) {
|
|
356
|
+
const resolvedModels = await Promise.all(options.models.map(async (model) => resolveChatModelDefinition(model, config)));
|
|
357
|
+
return {
|
|
358
|
+
...config,
|
|
359
|
+
models: [...config.models ?? [], ...resolvedModels]
|
|
360
|
+
};
|
|
361
|
+
},
|
|
362
|
+
name: "vieval:chat-models"
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
468
366
|
* Builds one normalized chat provider definition.
|
|
469
367
|
*
|
|
470
368
|
* Use when:
|
|
@@ -482,6 +380,42 @@ function chatProviderFrom(options) {
|
|
|
482
380
|
};
|
|
483
381
|
}
|
|
484
382
|
/**
|
|
383
|
+
* Built-in chat providers plugin that contributes provider presets to config.
|
|
384
|
+
*
|
|
385
|
+
* Use when:
|
|
386
|
+
* - provider runtime config should be centralized and reusable
|
|
387
|
+
* - provider parameters should be resolved from env via `envFrom`/`requiredEnvFrom`
|
|
388
|
+
*/
|
|
389
|
+
function ChatProviders(options) {
|
|
390
|
+
return {
|
|
391
|
+
configVieval(config) {
|
|
392
|
+
const env = config.env ?? options.env ?? process.env;
|
|
393
|
+
const normalizedProviders = options.providers.map((provider) => normalizeChatProviderDefinition(provider, env));
|
|
394
|
+
return {
|
|
395
|
+
...config,
|
|
396
|
+
chatProviders: [...config.chatProviders ?? [], ...normalizedProviders]
|
|
397
|
+
};
|
|
398
|
+
},
|
|
399
|
+
name: "vieval:chat-providers"
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Resolves a configured chat model from one eval-matrix axis.
|
|
404
|
+
*
|
|
405
|
+
* Use when:
|
|
406
|
+
* - eval matrix selects a judge, rubric, or evaluator model
|
|
407
|
+
* - callers want the scoped helper instead of passing `scope: 'eval'`
|
|
408
|
+
*
|
|
409
|
+
* Expects:
|
|
410
|
+
* - `options.axis` to exist in `context.task.matrix.eval`
|
|
411
|
+
*
|
|
412
|
+
* Returns:
|
|
413
|
+
* - the configured model matching the selected eval-matrix value
|
|
414
|
+
*/
|
|
415
|
+
function modelFromEval(context, options) {
|
|
416
|
+
return modelFromMatrix(context, "eval", options);
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
485
419
|
* Resolves a configured chat model from one scoped matrix axis.
|
|
486
420
|
*
|
|
487
421
|
* Use when:
|
|
@@ -518,77 +452,143 @@ function modelFromMatrix(context, scope, options) {
|
|
|
518
452
|
function modelFromRun(context, options) {
|
|
519
453
|
return modelFromMatrix(context, "run", options);
|
|
520
454
|
}
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
*
|
|
524
|
-
* Use when:
|
|
525
|
-
* - eval matrix selects a judge, rubric, or evaluator model
|
|
526
|
-
* - callers want the scoped helper instead of passing `scope: 'eval'`
|
|
527
|
-
*
|
|
528
|
-
* Expects:
|
|
529
|
-
* - `options.axis` to exist in `context.task.matrix.eval`
|
|
530
|
-
*
|
|
531
|
-
* Returns:
|
|
532
|
-
* - the configured model matching the selected eval-matrix value
|
|
533
|
-
*/
|
|
534
|
-
function modelFromEval(context, options) {
|
|
535
|
-
return modelFromMatrix(context, "eval", options);
|
|
455
|
+
function createDefaultModelId(inferenceExecutorId, model) {
|
|
456
|
+
return `${inferenceExecutorId}:${model}`;
|
|
536
457
|
}
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
* - run matrix should iterate over explicit chat model ids/aliases
|
|
542
|
-
* - project configs want a concise model-axis helper
|
|
543
|
-
*
|
|
544
|
-
* Expects:
|
|
545
|
-
* - each provided name to match a configured model id or alias at runtime
|
|
546
|
-
*
|
|
547
|
-
* Returns:
|
|
548
|
-
* - matrix axis object compatible with `runMatrix.extend/override`
|
|
549
|
-
*/
|
|
550
|
-
function chatModelMatrix(...names) {
|
|
551
|
-
return { model: Array.from(new Set(names)) };
|
|
458
|
+
function createProviderMap(config) {
|
|
459
|
+
const providerMap = /* @__PURE__ */ new Map();
|
|
460
|
+
for (const provider of config.chatProviders ?? []) providerMap.set(provider.id, provider);
|
|
461
|
+
return providerMap;
|
|
552
462
|
}
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
function
|
|
463
|
+
function hasJudgeAlias(model) {
|
|
464
|
+
return [
|
|
465
|
+
...model.aliases ?? [],
|
|
466
|
+
...model.id == null ? [] : [model.id],
|
|
467
|
+
model.model
|
|
468
|
+
].some((value) => value.toLowerCase().includes("judge"));
|
|
469
|
+
}
|
|
470
|
+
function isOllamaChatModelInferenceExecutor(options) {
|
|
471
|
+
return options.inferenceExecutor === "ollama";
|
|
472
|
+
}
|
|
473
|
+
function isOpenAIChatModelInferenceExecutor(options) {
|
|
474
|
+
return options.inferenceExecutor === "openai";
|
|
475
|
+
}
|
|
476
|
+
function isOpenRouterChatModelInferenceExecutor(options) {
|
|
477
|
+
return options.inferenceExecutor === "openrouter";
|
|
478
|
+
}
|
|
479
|
+
function normalizeChatProviderDefinition(provider, env) {
|
|
561
480
|
return {
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
};
|
|
569
|
-
},
|
|
570
|
-
name: "vieval:chat-providers"
|
|
481
|
+
id: provider.id,
|
|
482
|
+
inferenceExecutor: provider.inferenceExecutor,
|
|
483
|
+
inferenceExecutorId: normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
|
|
484
|
+
optionalEnv: provider.optionalEnv,
|
|
485
|
+
parameters: resolveProviderParameters(provider, env),
|
|
486
|
+
requiredEnv: provider.requiredEnv
|
|
571
487
|
};
|
|
572
488
|
}
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
489
|
+
function normalizeEnvRecord(env) {
|
|
490
|
+
const normalized = {};
|
|
491
|
+
for (const [key, value] of Object.entries(env)) if (typeof value === "string") normalized[key] = value;
|
|
492
|
+
return normalized;
|
|
493
|
+
}
|
|
494
|
+
function normalizeExecutionPolicy(policy) {
|
|
495
|
+
if (policy == null) return;
|
|
496
|
+
const normalized = {
|
|
497
|
+
autoAttempt: policy.autoAttempt,
|
|
498
|
+
autoRetry: policy.autoRetry,
|
|
499
|
+
autoRetryDelay: policy.autoRetryDelay,
|
|
500
|
+
timeout: policy.timeout
|
|
501
|
+
};
|
|
502
|
+
return Object.values(normalized).some((value) => value != null) ? normalized : void 0;
|
|
503
|
+
}
|
|
504
|
+
function normalizeInferenceExecutorId(inferenceExecutor, inferenceExecutorId) {
|
|
505
|
+
if (typeof inferenceExecutor === "string") return inferenceExecutor;
|
|
506
|
+
return inferenceExecutorId ?? "custom";
|
|
507
|
+
}
|
|
508
|
+
async function resolveChatModelDefinition(model, config) {
|
|
509
|
+
const providerResolvedModel = resolveModelProvider(model, createProviderMap(config));
|
|
510
|
+
const resolvedRuntimeParameters = await resolveModelRuntimeResolvers(providerResolvedModel, { env: normalizeEnvRecord(config.env ?? process.env) });
|
|
511
|
+
if (resolvedRuntimeParameters == null) return providerResolvedModel;
|
|
581
512
|
return {
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
};
|
|
588
|
-
},
|
|
589
|
-
name: "vieval:chat-models"
|
|
513
|
+
...providerResolvedModel,
|
|
514
|
+
parameters: {
|
|
515
|
+
...providerResolvedModel.parameters,
|
|
516
|
+
...resolvedRuntimeParameters
|
|
517
|
+
}
|
|
590
518
|
};
|
|
591
519
|
}
|
|
520
|
+
async function resolveChatModelResolverValue(value, context) {
|
|
521
|
+
if (typeof value === "function") return await value(context);
|
|
522
|
+
return value;
|
|
523
|
+
}
|
|
524
|
+
function resolveModelExecutionPolicy(options) {
|
|
525
|
+
const explicitPolicy = normalizeExecutionPolicy({
|
|
526
|
+
autoAttempt: options.autoAttempt ?? options.executionPolicy?.autoAttempt,
|
|
527
|
+
autoRetry: options.autoRetry ?? options.executionPolicy?.autoRetry,
|
|
528
|
+
autoRetryDelay: options.autoRetryDelay ?? options.executionPolicy?.autoRetryDelay,
|
|
529
|
+
timeout: options.timeout ?? options.executionPolicy?.timeout
|
|
530
|
+
});
|
|
531
|
+
if (explicitPolicy != null && Object.keys(explicitPolicy).length > 0) return explicitPolicy;
|
|
532
|
+
if (hasJudgeAlias(options)) return { autoRetry: 3 };
|
|
533
|
+
}
|
|
534
|
+
function resolveModelProvider(model, providerMap) {
|
|
535
|
+
if (model.provider == null) return model;
|
|
536
|
+
const provider = providerMap.get(model.provider);
|
|
537
|
+
if (provider == null) throw new Error(`Unknown chat provider "${model.provider}" referenced by model "${model.id}".`);
|
|
538
|
+
return {
|
|
539
|
+
...model,
|
|
540
|
+
inferenceExecutor: provider.inferenceExecutor,
|
|
541
|
+
inferenceExecutorId: provider.inferenceExecutorId ?? normalizeInferenceExecutorId(provider.inferenceExecutor, provider.inferenceExecutorId),
|
|
542
|
+
parameters: {
|
|
543
|
+
...provider.parameters,
|
|
544
|
+
...model.parameters
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
async function resolveModelRuntimeResolvers(model, context) {
|
|
549
|
+
if (model.runtimeResolvers == null) return;
|
|
550
|
+
const resolvedParameters = {};
|
|
551
|
+
if (model.runtimeResolvers.apiKey != null) resolvedParameters.apiKey = resolveRequiredStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.apiKey, context), `${model.id}.apiKey`);
|
|
552
|
+
if (model.runtimeResolvers.baseURL != null) {
|
|
553
|
+
const normalizedBaseURL = resolveOptionalStringValue(await resolveChatModelResolverValue(model.runtimeResolvers.baseURL, context), `${model.id}.baseURL`);
|
|
554
|
+
if (normalizedBaseURL != null) resolvedParameters.baseURL = normalizedBaseURL;
|
|
555
|
+
}
|
|
556
|
+
if (model.runtimeResolvers.headers != null) resolvedParameters.headers = await resolveChatModelResolverValue(model.runtimeResolvers.headers, context);
|
|
557
|
+
return Object.keys(resolvedParameters).length > 0 ? resolvedParameters : void 0;
|
|
558
|
+
}
|
|
559
|
+
function resolveOptionalEnvValue(env, envKey) {
|
|
560
|
+
return envFrom(env, {
|
|
561
|
+
name: envKey,
|
|
562
|
+
type: "string"
|
|
563
|
+
});
|
|
564
|
+
}
|
|
565
|
+
function resolveOptionalStringValue(value, name) {
|
|
566
|
+
return envFrom({ [name]: value }, {
|
|
567
|
+
name,
|
|
568
|
+
type: "string"
|
|
569
|
+
});
|
|
570
|
+
}
|
|
571
|
+
function resolveProviderParameters(provider, env) {
|
|
572
|
+
const parameters = { ...provider.parameters };
|
|
573
|
+
for (const [parameterName, envKey] of Object.entries(provider.optionalEnv ?? {})) {
|
|
574
|
+
const resolved = resolveOptionalEnvValue(env, envKey);
|
|
575
|
+
if (resolved != null) parameters[parameterName] = resolved;
|
|
576
|
+
}
|
|
577
|
+
for (const [parameterName, envKey] of Object.entries(provider.requiredEnv ?? {})) parameters[parameterName] = resolveRequiredEnvValue(env, envKey);
|
|
578
|
+
return Object.keys(parameters).length > 0 ? parameters : void 0;
|
|
579
|
+
}
|
|
580
|
+
function resolveRequiredEnvValue(env, envKey) {
|
|
581
|
+
return requiredEnvFrom(env, {
|
|
582
|
+
name: envKey,
|
|
583
|
+
type: "string"
|
|
584
|
+
});
|
|
585
|
+
}
|
|
586
|
+
function resolveRequiredStringValue(value, name) {
|
|
587
|
+
return requiredEnvFrom({ [name]: value }, {
|
|
588
|
+
name,
|
|
589
|
+
type: "string"
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
592
|
//#endregion
|
|
593
593
|
export { ChatModels, ChatProviders, chatModelFrom, chatModelMatrix, chatProviderFrom, emitChatModelErrorTelemetry, emitChatModelRequestTelemetry, emitChatModelResponseTelemetry, extractChatModelToolCalls, extractMeteringDimensions, modelFromEval, modelFromMatrix, modelFromRun, ollamaFromRunContext, openaiFromRunContext, openrouterFromRunContext, toChatModelRuntimeConfig };
|
|
594
594
|
|