@wix/evalforge-evaluator 0.139.0 → 0.140.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -972,6 +972,26 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
972
972
 
973
973
  // src/run-scenario/agents/claude-code/execute.ts
974
974
  var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
975
+ async function* buildPromptStream(triggerPrompt, images) {
976
+ yield {
977
+ type: "user",
978
+ message: {
979
+ role: "user",
980
+ content: [
981
+ { type: "text", text: triggerPrompt },
982
+ ...images.map((img) => ({
983
+ type: "image",
984
+ source: {
985
+ type: "base64",
986
+ data: img.base64,
987
+ media_type: img.mediaType
988
+ }
989
+ }))
990
+ ]
991
+ },
992
+ parent_tool_use_id: null
993
+ };
994
+ }
975
995
  function extractToolActionDescription(toolName, toolArgs) {
976
996
  if (!toolName) {
977
997
  return "Using tool...";
@@ -1347,8 +1367,13 @@ async function executeWithClaudeCode(skills, scenario, options) {
1347
1367
  }, HEARTBEAT_INTERVAL_MS);
1348
1368
  }
1349
1369
  const sdkPromise = (async () => {
1370
+ const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
1371
+ const prompt = hasImages ? buildPromptStream(
1372
+ scenario.triggerPrompt,
1373
+ scenario.triggerPromptImages
1374
+ ) : scenario.triggerPrompt;
1350
1375
  for await (const message of query({
1351
- prompt: scenario.triggerPrompt,
1376
+ prompt,
1352
1377
  options: queryOptions
1353
1378
  })) {
1354
1379
  messageCount++;
@@ -2651,6 +2676,21 @@ function extractToolAction(toolName, args) {
2651
2676
  }
2652
2677
  return `Using ${toolName}...`;
2653
2678
  }
2679
+ async function writePromptImages(cwd, images) {
2680
+ const imagesDir = (0, import_path10.join)(cwd, "prompt-images");
2681
+ await (0, import_promises9.mkdir)(imagesDir, { recursive: true });
2682
+ const filePaths = [];
2683
+ for (let i = 0; i < images.length; i++) {
2684
+ const img = images[i];
2685
+ const ext = img.mediaType.split("/")[1] || "png";
2686
+ const filename = `image-${i}.${ext}`;
2687
+ const filepath = (0, import_path10.join)(imagesDir, filename);
2688
+ const buffer = Buffer.from(img.base64, "base64");
2689
+ await (0, import_promises9.writeFile)(filepath, buffer);
2690
+ filePaths.push(filepath);
2691
+ }
2692
+ return filePaths;
2693
+ }
2654
2694
  function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
2655
2695
  const base = {
2656
2696
  evalRunId: context.evalRunId,
@@ -3080,6 +3120,17 @@ async function executeWithOpenCode(skills, scenario, options) {
3080
3120
  "--dir",
3081
3121
  options.cwd
3082
3122
  ];
3123
+ const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
3124
+ const imageFileArgs = [];
3125
+ if (hasImages) {
3126
+ const imagePaths = await writePromptImages(
3127
+ options.cwd,
3128
+ scenario.triggerPromptImages
3129
+ );
3130
+ for (const imgPath of imagePaths) {
3131
+ imageFileArgs.push("-f", imgPath);
3132
+ }
3133
+ }
3083
3134
  const accumulatedEvents = [];
3084
3135
  let traceStepNumber = 0;
3085
3136
  let lastAttemptResult;
@@ -3114,7 +3165,7 @@ async function executeWithOpenCode(skills, scenario, options) {
3114
3165
  );
3115
3166
  }
3116
3167
  }
3117
- const args = [...baseArgs, prompt];
3168
+ const args = [...baseArgs, ...imageFileArgs, prompt];
3118
3169
  console.log(
3119
3170
  `[executeWithOpenCode] Spawning attempt ${attempt}: opencode`,
3120
3171
  args.slice(0, 5)
@@ -3597,10 +3648,26 @@ async function executeWithAiSdk(context) {
3597
3648
  }
3598
3649
  };
3599
3650
  const stepTimestamps = [];
3600
- const result = await (0, import_ai.generateText)({
3651
+ const { triggerPromptImages } = context;
3652
+ const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
3653
+ const generateTextParams = {
3601
3654
  model,
3602
3655
  system: systemPrompt,
3603
- prompt: scenario.triggerPrompt,
3656
+ ...hasImages ? {
3657
+ messages: [
3658
+ {
3659
+ role: "user",
3660
+ content: [
3661
+ { type: "text", text: scenario.triggerPrompt },
3662
+ ...triggerPromptImages.map((img) => ({
3663
+ type: "image",
3664
+ image: img.base64,
3665
+ mediaType: img.mediaType
3666
+ }))
3667
+ ]
3668
+ }
3669
+ ]
3670
+ } : { prompt: scenario.triggerPrompt },
3604
3671
  temperature: supportsThinking ? void 0 : modelConfig.temperature,
3605
3672
  maxOutputTokens: modelConfig.maxTokens,
3606
3673
  tools: mcpTools,
@@ -3633,7 +3700,8 @@ async function executeWithAiSdk(context) {
3633
3700
  );
3634
3701
  }
3635
3702
  }
3636
- });
3703
+ };
3704
+ const result = await (0, import_ai.generateText)(generateTextParams);
3637
3705
  const durationMs = Date.now() - startTime;
3638
3706
  const usage = {
3639
3707
  inputTokens: result.usage.inputTokens ?? 0,
@@ -4577,7 +4645,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
4577
4645
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
4578
4646
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
4579
4647
  rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
4580
- systemPrompt: agent?.systemPrompt
4648
+ systemPrompt: agent?.systemPrompt,
4649
+ triggerPromptImages: scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0 ? scenario.triggerPromptImages : void 0
4581
4650
  };
4582
4651
  const hasPrepare = !!adapter.prepareEnvironment;
4583
4652
  const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};