@wix/evalforge-evaluator 0.139.0 → 0.140.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -964,6 +964,26 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
964
964
 
965
965
  // src/run-scenario/agents/claude-code/execute.ts
966
966
  var DEFAULT_MODEL = ClaudeModel.CLAUDE_4_5_SONNET_1_0;
967
+ async function* buildPromptStream(triggerPrompt, images) {
968
+ yield {
969
+ type: "user",
970
+ message: {
971
+ role: "user",
972
+ content: [
973
+ { type: "text", text: triggerPrompt },
974
+ ...images.map((img) => ({
975
+ type: "image",
976
+ source: {
977
+ type: "base64",
978
+ data: img.base64,
979
+ media_type: img.mediaType
980
+ }
981
+ }))
982
+ ]
983
+ },
984
+ parent_tool_use_id: null
985
+ };
986
+ }
967
987
  function extractToolActionDescription(toolName, toolArgs) {
968
988
  if (!toolName) {
969
989
  return "Using tool...";
@@ -1339,8 +1359,13 @@ async function executeWithClaudeCode(skills, scenario, options) {
1339
1359
  }, HEARTBEAT_INTERVAL_MS);
1340
1360
  }
1341
1361
  const sdkPromise = (async () => {
1362
+ const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
1363
+ const prompt = hasImages ? buildPromptStream(
1364
+ scenario.triggerPrompt,
1365
+ scenario.triggerPromptImages
1366
+ ) : scenario.triggerPrompt;
1342
1367
  for await (const message of query({
1343
- prompt: scenario.triggerPrompt,
1368
+ prompt,
1344
1369
  options: queryOptions
1345
1370
  })) {
1346
1371
  messageCount++;
@@ -2651,6 +2676,21 @@ function extractToolAction(toolName, args) {
2651
2676
  }
2652
2677
  return `Using ${toolName}...`;
2653
2678
  }
2679
+ async function writePromptImages(cwd, images) {
2680
+ const imagesDir = join8(cwd, "prompt-images");
2681
+ await mkdir7(imagesDir, { recursive: true });
2682
+ const filePaths = [];
2683
+ for (let i = 0; i < images.length; i++) {
2684
+ const img = images[i];
2685
+ const ext = img.mediaType.split("/")[1] || "png";
2686
+ const filename = `image-${i}.${ext}`;
2687
+ const filepath = join8(imagesDir, filename);
2688
+ const buffer = Buffer.from(img.base64, "base64");
2689
+ await writeFile6(filepath, buffer);
2690
+ filePaths.push(filepath);
2691
+ }
2692
+ return filePaths;
2693
+ }
2654
2694
  function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
2655
2695
  const base = {
2656
2696
  evalRunId: context.evalRunId,
@@ -3080,6 +3120,17 @@ async function executeWithOpenCode(skills, scenario, options) {
3080
3120
  "--dir",
3081
3121
  options.cwd
3082
3122
  ];
3123
+ const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
3124
+ const imageFileArgs = [];
3125
+ if (hasImages) {
3126
+ const imagePaths = await writePromptImages(
3127
+ options.cwd,
3128
+ scenario.triggerPromptImages
3129
+ );
3130
+ for (const imgPath of imagePaths) {
3131
+ imageFileArgs.push("-f", imgPath);
3132
+ }
3133
+ }
3083
3134
  const accumulatedEvents = [];
3084
3135
  let traceStepNumber = 0;
3085
3136
  let lastAttemptResult;
@@ -3114,7 +3165,7 @@ async function executeWithOpenCode(skills, scenario, options) {
3114
3165
  );
3115
3166
  }
3116
3167
  }
3117
- const args = [...baseArgs, prompt];
3168
+ const args = [...baseArgs, ...imageFileArgs, prompt];
3118
3169
  console.log(
3119
3170
  `[executeWithOpenCode] Spawning attempt ${attempt}: opencode`,
3120
3171
  args.slice(0, 5)
@@ -3605,10 +3656,26 @@ async function executeWithAiSdk(context) {
3605
3656
  }
3606
3657
  };
3607
3658
  const stepTimestamps = [];
3608
- const result = await generateText({
3659
+ const { triggerPromptImages } = context;
3660
+ const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
3661
+ const generateTextParams = {
3609
3662
  model,
3610
3663
  system: systemPrompt,
3611
- prompt: scenario.triggerPrompt,
3664
+ ...hasImages ? {
3665
+ messages: [
3666
+ {
3667
+ role: "user",
3668
+ content: [
3669
+ { type: "text", text: scenario.triggerPrompt },
3670
+ ...triggerPromptImages.map((img) => ({
3671
+ type: "image",
3672
+ image: img.base64,
3673
+ mediaType: img.mediaType
3674
+ }))
3675
+ ]
3676
+ }
3677
+ ]
3678
+ } : { prompt: scenario.triggerPrompt },
3612
3679
  temperature: supportsThinking ? void 0 : modelConfig.temperature,
3613
3680
  maxOutputTokens: modelConfig.maxTokens,
3614
3681
  tools: mcpTools,
@@ -3641,7 +3708,8 @@ async function executeWithAiSdk(context) {
3641
3708
  );
3642
3709
  }
3643
3710
  }
3644
- });
3711
+ };
3712
+ const result = await generateText(generateTextParams);
3645
3713
  const durationMs = Date.now() - startTime;
3646
3714
  const usage = {
3647
3715
  inputTokens: result.usage.inputTokens ?? 0,
@@ -4585,7 +4653,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
4585
4653
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
4586
4654
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
4587
4655
  rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
4588
- systemPrompt: agent?.systemPrompt
4656
+ systemPrompt: agent?.systemPrompt,
4657
+ triggerPromptImages: scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0 ? scenario.triggerPromptImages : void 0
4589
4658
  };
4590
4659
  const hasPrepare = !!adapter.prepareEnvironment;
4591
4660
  const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};