@wix/evalforge-evaluator 0.58.0 → 0.60.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -194,6 +194,16 @@ function createApiClient(serverUrl, options = "") {
|
|
|
194
194
|
|
|
195
195
|
// src/fetch-evaluation-data.ts
|
|
196
196
|
var import_evalforge_types = require("@wix/evalforge-types");
|
|
197
|
+
function parseSkillNamesFromParams(value) {
|
|
198
|
+
if (typeof value !== "string") {
|
|
199
|
+
return [];
|
|
200
|
+
}
|
|
201
|
+
const parsed = JSON.parse(value);
|
|
202
|
+
if (Array.isArray(parsed)) {
|
|
203
|
+
return parsed.map(String);
|
|
204
|
+
}
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
197
207
|
function applyParamsToAssertion(assertion, params) {
|
|
198
208
|
if (!params || Object.keys(params).length === 0) {
|
|
199
209
|
return assertion;
|
|
@@ -215,6 +225,12 @@ function applyParamsToAssertion(assertion, params) {
|
|
|
215
225
|
}
|
|
216
226
|
return { ...assertion, prompt, systemPrompt };
|
|
217
227
|
}
|
|
228
|
+
if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
|
|
229
|
+
return {
|
|
230
|
+
...assertion,
|
|
231
|
+
skillNames: parseSkillNamesFromParams(params.skillNames)
|
|
232
|
+
};
|
|
233
|
+
}
|
|
218
234
|
return { ...assertion, ...params };
|
|
219
235
|
}
|
|
220
236
|
function resolveSystemAssertion(assertionId, params) {
|
|
@@ -224,7 +240,7 @@ function resolveSystemAssertion(assertionId, params) {
|
|
|
224
240
|
case "skill_was_called":
|
|
225
241
|
baseAssertion = {
|
|
226
242
|
type: "skill_was_called",
|
|
227
|
-
|
|
243
|
+
skillNames: parseSkillNamesFromParams(params?.skillNames)
|
|
228
244
|
};
|
|
229
245
|
break;
|
|
230
246
|
case "build_passed":
|
|
@@ -249,38 +265,15 @@ function resolveSystemAssertion(assertionId, params) {
|
|
|
249
265
|
}
|
|
250
266
|
function customAssertionToAssertion(ca, params) {
|
|
251
267
|
const config = ca.config;
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
baseAssertion = {
|
|
262
|
-
type: "build_passed",
|
|
263
|
-
command: config?.command,
|
|
264
|
-
expectedExitCode: config?.expectedExitCode
|
|
265
|
-
};
|
|
266
|
-
break;
|
|
267
|
-
case "llm_judge":
|
|
268
|
-
baseAssertion = {
|
|
269
|
-
type: "llm_judge",
|
|
270
|
-
prompt: config?.prompt ?? "",
|
|
271
|
-
systemPrompt: config?.systemPrompt,
|
|
272
|
-
minScore: config?.minScore,
|
|
273
|
-
model: config?.model,
|
|
274
|
-
maxTokens: config?.maxTokens,
|
|
275
|
-
temperature: config?.temperature
|
|
276
|
-
};
|
|
277
|
-
break;
|
|
278
|
-
default:
|
|
279
|
-
baseAssertion = {
|
|
280
|
-
type: "llm_judge",
|
|
281
|
-
prompt: ""
|
|
282
|
-
};
|
|
283
|
-
}
|
|
268
|
+
const baseAssertion = {
|
|
269
|
+
type: "llm_judge",
|
|
270
|
+
prompt: config?.prompt ?? "",
|
|
271
|
+
systemPrompt: config?.systemPrompt,
|
|
272
|
+
minScore: config?.minScore,
|
|
273
|
+
model: config?.model,
|
|
274
|
+
maxTokens: config?.maxTokens,
|
|
275
|
+
temperature: config?.temperature
|
|
276
|
+
};
|
|
284
277
|
return applyParamsToAssertion(baseAssertion, params);
|
|
285
278
|
}
|
|
286
279
|
async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
@@ -6376,7 +6369,15 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6376
6369
|
if (mcps.length === 0) return;
|
|
6377
6370
|
const mcpServers = {};
|
|
6378
6371
|
for (const mcp of mcps) {
|
|
6379
|
-
|
|
6372
|
+
const config = mcp.config;
|
|
6373
|
+
for (const [key, value] of Object.entries(config)) {
|
|
6374
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
6375
|
+
throw new Error(
|
|
6376
|
+
`MCP "${mcp.name}" has invalid config: value for key "${key}" must be an object (got ${typeof value}). Config must use keyed format, e.g. { "server-name": { "command": "npx", ... } }`
|
|
6377
|
+
);
|
|
6378
|
+
}
|
|
6379
|
+
mcpServers[key] = value;
|
|
6380
|
+
}
|
|
6380
6381
|
}
|
|
6381
6382
|
const content = JSON.stringify(
|
|
6382
6383
|
{ [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: mcpServers },
|
|
@@ -6732,9 +6733,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6732
6733
|
})
|
|
6733
6734
|
);
|
|
6734
6735
|
let messageCount = 0;
|
|
6735
|
-
const canUseTool = async () => {
|
|
6736
|
-
return { behavior: "allow" };
|
|
6737
|
-
};
|
|
6736
|
+
const canUseTool = async (_toolName, input) => ({ behavior: "allow", updatedInput: input });
|
|
6738
6737
|
const baseAllowedTools = [
|
|
6739
6738
|
"Skill",
|
|
6740
6739
|
"Read",
|