@wix/evalforge-evaluator 0.58.0 → 0.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -194,6 +194,16 @@ function createApiClient(serverUrl, options = "") {
194
194
 
195
195
  // src/fetch-evaluation-data.ts
196
196
  var import_evalforge_types = require("@wix/evalforge-types");
197
+ function parseSkillNamesFromParams(value) {
198
+ if (typeof value !== "string") {
199
+ return [];
200
+ }
201
+ const parsed = JSON.parse(value);
202
+ if (Array.isArray(parsed)) {
203
+ return parsed.map(String);
204
+ }
205
+ return [];
206
+ }
197
207
  function applyParamsToAssertion(assertion, params) {
198
208
  if (!params || Object.keys(params).length === 0) {
199
209
  return assertion;
@@ -215,6 +225,12 @@ function applyParamsToAssertion(assertion, params) {
215
225
  }
216
226
  return { ...assertion, prompt, systemPrompt };
217
227
  }
228
+ if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
229
+ return {
230
+ ...assertion,
231
+ skillNames: parseSkillNamesFromParams(params.skillNames)
232
+ };
233
+ }
218
234
  return { ...assertion, ...params };
219
235
  }
220
236
  function resolveSystemAssertion(assertionId, params) {
@@ -224,7 +240,7 @@ function resolveSystemAssertion(assertionId, params) {
224
240
  case "skill_was_called":
225
241
  baseAssertion = {
226
242
  type: "skill_was_called",
227
- skillName: params?.skillName ?? ""
243
+ skillNames: parseSkillNamesFromParams(params?.skillNames)
228
244
  };
229
245
  break;
230
246
  case "build_passed":
@@ -249,38 +265,15 @@ function resolveSystemAssertion(assertionId, params) {
249
265
  }
250
266
  function customAssertionToAssertion(ca, params) {
251
267
  const config = ca.config;
252
- let baseAssertion;
253
- switch (ca.type) {
254
- case "skill_was_called":
255
- baseAssertion = {
256
- type: "skill_was_called",
257
- skillName: config?.skillName ?? ""
258
- };
259
- break;
260
- case "build_passed":
261
- baseAssertion = {
262
- type: "build_passed",
263
- command: config?.command,
264
- expectedExitCode: config?.expectedExitCode
265
- };
266
- break;
267
- case "llm_judge":
268
- baseAssertion = {
269
- type: "llm_judge",
270
- prompt: config?.prompt ?? "",
271
- systemPrompt: config?.systemPrompt,
272
- minScore: config?.minScore,
273
- model: config?.model,
274
- maxTokens: config?.maxTokens,
275
- temperature: config?.temperature
276
- };
277
- break;
278
- default:
279
- baseAssertion = {
280
- type: "llm_judge",
281
- prompt: ""
282
- };
283
- }
268
+ const baseAssertion = {
269
+ type: "llm_judge",
270
+ prompt: config?.prompt ?? "",
271
+ systemPrompt: config?.systemPrompt,
272
+ minScore: config?.minScore,
273
+ model: config?.model,
274
+ maxTokens: config?.maxTokens,
275
+ temperature: config?.temperature
276
+ };
284
277
  return applyParamsToAssertion(baseAssertion, params);
285
278
  }
286
279
  async function fetchEvaluationData(api, projectId2, evalRunId2) {
@@ -6376,7 +6369,15 @@ async function writeMcpToFilesystem(cwd, mcps) {
6376
6369
  if (mcps.length === 0) return;
6377
6370
  const mcpServers = {};
6378
6371
  for (const mcp of mcps) {
6379
- mcpServers[mcp.name] = mcp.config;
6372
+ const config = mcp.config;
6373
+ for (const [key, value] of Object.entries(config)) {
6374
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
6375
+ throw new Error(
6376
+ `MCP "${mcp.name}" has invalid config: value for key "${key}" must be an object (got ${typeof value}). Config must use keyed format, e.g. { "server-name": { "command": "npx", ... } }`
6377
+ );
6378
+ }
6379
+ mcpServers[key] = value;
6380
+ }
6380
6381
  }
6381
6382
  const content = JSON.stringify(
6382
6383
  { [import_evalforge_types2.MCP_SERVERS_JSON_KEY]: mcpServers },
@@ -6732,9 +6733,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
6732
6733
  })
6733
6734
  );
6734
6735
  let messageCount = 0;
6735
- const canUseTool = async () => {
6736
- return { behavior: "allow" };
6737
- };
6736
+ const canUseTool = async (_toolName, input) => ({ behavior: "allow", updatedInput: input });
6738
6737
  const baseAllowedTools = [
6739
6738
  "Skill",
6740
6739
  "Read",