@wix/evalforge-evaluator 0.139.0 → 0.140.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs
CHANGED
|
@@ -964,6 +964,26 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
|
964
964
|
|
|
965
965
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
966
966
|
var DEFAULT_MODEL = ClaudeModel.CLAUDE_4_5_SONNET_1_0;
|
|
967
|
+
async function* buildPromptStream(triggerPrompt, images) {
|
|
968
|
+
yield {
|
|
969
|
+
type: "user",
|
|
970
|
+
message: {
|
|
971
|
+
role: "user",
|
|
972
|
+
content: [
|
|
973
|
+
{ type: "text", text: triggerPrompt },
|
|
974
|
+
...images.map((img) => ({
|
|
975
|
+
type: "image",
|
|
976
|
+
source: {
|
|
977
|
+
type: "base64",
|
|
978
|
+
data: img.base64,
|
|
979
|
+
media_type: img.mediaType
|
|
980
|
+
}
|
|
981
|
+
}))
|
|
982
|
+
]
|
|
983
|
+
},
|
|
984
|
+
parent_tool_use_id: null
|
|
985
|
+
};
|
|
986
|
+
}
|
|
967
987
|
function extractToolActionDescription(toolName, toolArgs) {
|
|
968
988
|
if (!toolName) {
|
|
969
989
|
return "Using tool...";
|
|
@@ -1339,8 +1359,13 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1339
1359
|
}, HEARTBEAT_INTERVAL_MS);
|
|
1340
1360
|
}
|
|
1341
1361
|
const sdkPromise = (async () => {
|
|
1362
|
+
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
1363
|
+
const prompt = hasImages ? buildPromptStream(
|
|
1364
|
+
scenario.triggerPrompt,
|
|
1365
|
+
scenario.triggerPromptImages
|
|
1366
|
+
) : scenario.triggerPrompt;
|
|
1342
1367
|
for await (const message of query({
|
|
1343
|
-
prompt
|
|
1368
|
+
prompt,
|
|
1344
1369
|
options: queryOptions
|
|
1345
1370
|
})) {
|
|
1346
1371
|
messageCount++;
|
|
@@ -2651,6 +2676,21 @@ function extractToolAction(toolName, args) {
|
|
|
2651
2676
|
}
|
|
2652
2677
|
return `Using ${toolName}...`;
|
|
2653
2678
|
}
|
|
2679
|
+
async function writePromptImages(cwd, images) {
|
|
2680
|
+
const imagesDir = join8(cwd, "prompt-images");
|
|
2681
|
+
await mkdir7(imagesDir, { recursive: true });
|
|
2682
|
+
const filePaths = [];
|
|
2683
|
+
for (let i = 0; i < images.length; i++) {
|
|
2684
|
+
const img = images[i];
|
|
2685
|
+
const ext = img.mediaType.split("/")[1] || "png";
|
|
2686
|
+
const filename = `image-${i}.${ext}`;
|
|
2687
|
+
const filepath = join8(imagesDir, filename);
|
|
2688
|
+
const buffer = Buffer.from(img.base64, "base64");
|
|
2689
|
+
await writeFile6(filepath, buffer);
|
|
2690
|
+
filePaths.push(filepath);
|
|
2691
|
+
}
|
|
2692
|
+
return filePaths;
|
|
2693
|
+
}
|
|
2654
2694
|
function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
|
|
2655
2695
|
const base = {
|
|
2656
2696
|
evalRunId: context.evalRunId,
|
|
@@ -3080,6 +3120,17 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3080
3120
|
"--dir",
|
|
3081
3121
|
options.cwd
|
|
3082
3122
|
];
|
|
3123
|
+
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
3124
|
+
const imageFileArgs = [];
|
|
3125
|
+
if (hasImages) {
|
|
3126
|
+
const imagePaths = await writePromptImages(
|
|
3127
|
+
options.cwd,
|
|
3128
|
+
scenario.triggerPromptImages
|
|
3129
|
+
);
|
|
3130
|
+
for (const imgPath of imagePaths) {
|
|
3131
|
+
imageFileArgs.push("-f", imgPath);
|
|
3132
|
+
}
|
|
3133
|
+
}
|
|
3083
3134
|
const accumulatedEvents = [];
|
|
3084
3135
|
let traceStepNumber = 0;
|
|
3085
3136
|
let lastAttemptResult;
|
|
@@ -3114,7 +3165,7 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3114
3165
|
);
|
|
3115
3166
|
}
|
|
3116
3167
|
}
|
|
3117
|
-
const args = [...baseArgs, prompt];
|
|
3168
|
+
const args = [...baseArgs, ...imageFileArgs, prompt];
|
|
3118
3169
|
console.log(
|
|
3119
3170
|
`[executeWithOpenCode] Spawning attempt ${attempt}: opencode`,
|
|
3120
3171
|
args.slice(0, 5)
|
|
@@ -3605,10 +3656,26 @@ async function executeWithAiSdk(context) {
|
|
|
3605
3656
|
}
|
|
3606
3657
|
};
|
|
3607
3658
|
const stepTimestamps = [];
|
|
3608
|
-
const
|
|
3659
|
+
const { triggerPromptImages } = context;
|
|
3660
|
+
const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
|
|
3661
|
+
const generateTextParams = {
|
|
3609
3662
|
model,
|
|
3610
3663
|
system: systemPrompt,
|
|
3611
|
-
|
|
3664
|
+
...hasImages ? {
|
|
3665
|
+
messages: [
|
|
3666
|
+
{
|
|
3667
|
+
role: "user",
|
|
3668
|
+
content: [
|
|
3669
|
+
{ type: "text", text: scenario.triggerPrompt },
|
|
3670
|
+
...triggerPromptImages.map((img) => ({
|
|
3671
|
+
type: "image",
|
|
3672
|
+
image: img.base64,
|
|
3673
|
+
mediaType: img.mediaType
|
|
3674
|
+
}))
|
|
3675
|
+
]
|
|
3676
|
+
}
|
|
3677
|
+
]
|
|
3678
|
+
} : { prompt: scenario.triggerPrompt },
|
|
3612
3679
|
temperature: supportsThinking ? void 0 : modelConfig.temperature,
|
|
3613
3680
|
maxOutputTokens: modelConfig.maxTokens,
|
|
3614
3681
|
tools: mcpTools,
|
|
@@ -3641,7 +3708,8 @@ async function executeWithAiSdk(context) {
|
|
|
3641
3708
|
);
|
|
3642
3709
|
}
|
|
3643
3710
|
}
|
|
3644
|
-
}
|
|
3711
|
+
};
|
|
3712
|
+
const result = await generateText(generateTextParams);
|
|
3645
3713
|
const durationMs = Date.now() - startTime;
|
|
3646
3714
|
const usage = {
|
|
3647
3715
|
inputTokens: result.usage.inputTokens ?? 0,
|
|
@@ -4585,7 +4653,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
4585
4653
|
mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
|
|
4586
4654
|
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
|
|
4587
4655
|
rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
|
|
4588
|
-
systemPrompt: agent?.systemPrompt
|
|
4656
|
+
systemPrompt: agent?.systemPrompt,
|
|
4657
|
+
triggerPromptImages: scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0 ? scenario.triggerPromptImages : void 0
|
|
4589
4658
|
};
|
|
4590
4659
|
const hasPrepare = !!adapter.prepareEnvironment;
|
|
4591
4660
|
const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};
|