@wix/evalforge-evaluator 0.139.0 → 0.140.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -972,6 +972,26 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
|
972
972
|
|
|
973
973
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
974
974
|
var DEFAULT_MODEL = import_evalforge_types4.ClaudeModel.CLAUDE_4_5_SONNET_1_0;
|
|
975
|
+
async function* buildPromptStream(triggerPrompt, images) {
|
|
976
|
+
yield {
|
|
977
|
+
type: "user",
|
|
978
|
+
message: {
|
|
979
|
+
role: "user",
|
|
980
|
+
content: [
|
|
981
|
+
{ type: "text", text: triggerPrompt },
|
|
982
|
+
...images.map((img) => ({
|
|
983
|
+
type: "image",
|
|
984
|
+
source: {
|
|
985
|
+
type: "base64",
|
|
986
|
+
data: img.base64,
|
|
987
|
+
media_type: img.mediaType
|
|
988
|
+
}
|
|
989
|
+
}))
|
|
990
|
+
]
|
|
991
|
+
},
|
|
992
|
+
parent_tool_use_id: null
|
|
993
|
+
};
|
|
994
|
+
}
|
|
975
995
|
function extractToolActionDescription(toolName, toolArgs) {
|
|
976
996
|
if (!toolName) {
|
|
977
997
|
return "Using tool...";
|
|
@@ -1347,8 +1367,13 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1347
1367
|
}, HEARTBEAT_INTERVAL_MS);
|
|
1348
1368
|
}
|
|
1349
1369
|
const sdkPromise = (async () => {
|
|
1370
|
+
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
1371
|
+
const prompt = hasImages ? buildPromptStream(
|
|
1372
|
+
scenario.triggerPrompt,
|
|
1373
|
+
scenario.triggerPromptImages
|
|
1374
|
+
) : scenario.triggerPrompt;
|
|
1350
1375
|
for await (const message of query({
|
|
1351
|
-
prompt
|
|
1376
|
+
prompt,
|
|
1352
1377
|
options: queryOptions
|
|
1353
1378
|
})) {
|
|
1354
1379
|
messageCount++;
|
|
@@ -2651,6 +2676,21 @@ function extractToolAction(toolName, args) {
|
|
|
2651
2676
|
}
|
|
2652
2677
|
return `Using ${toolName}...`;
|
|
2653
2678
|
}
|
|
2679
|
+
async function writePromptImages(cwd, images) {
|
|
2680
|
+
const imagesDir = (0, import_path10.join)(cwd, "prompt-images");
|
|
2681
|
+
await (0, import_promises9.mkdir)(imagesDir, { recursive: true });
|
|
2682
|
+
const filePaths = [];
|
|
2683
|
+
for (let i = 0; i < images.length; i++) {
|
|
2684
|
+
const img = images[i];
|
|
2685
|
+
const ext = img.mediaType.split("/")[1] || "png";
|
|
2686
|
+
const filename = `image-${i}.${ext}`;
|
|
2687
|
+
const filepath = (0, import_path10.join)(imagesDir, filename);
|
|
2688
|
+
const buffer = Buffer.from(img.base64, "base64");
|
|
2689
|
+
await (0, import_promises9.writeFile)(filepath, buffer);
|
|
2690
|
+
filePaths.push(filepath);
|
|
2691
|
+
}
|
|
2692
|
+
return filePaths;
|
|
2693
|
+
}
|
|
2654
2694
|
function createTraceEventFromNdjson(evt, context, stepNumber, isComplete) {
|
|
2655
2695
|
const base = {
|
|
2656
2696
|
evalRunId: context.evalRunId,
|
|
@@ -3080,6 +3120,17 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3080
3120
|
"--dir",
|
|
3081
3121
|
options.cwd
|
|
3082
3122
|
];
|
|
3123
|
+
const hasImages = scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0;
|
|
3124
|
+
const imageFileArgs = [];
|
|
3125
|
+
if (hasImages) {
|
|
3126
|
+
const imagePaths = await writePromptImages(
|
|
3127
|
+
options.cwd,
|
|
3128
|
+
scenario.triggerPromptImages
|
|
3129
|
+
);
|
|
3130
|
+
for (const imgPath of imagePaths) {
|
|
3131
|
+
imageFileArgs.push("-f", imgPath);
|
|
3132
|
+
}
|
|
3133
|
+
}
|
|
3083
3134
|
const accumulatedEvents = [];
|
|
3084
3135
|
let traceStepNumber = 0;
|
|
3085
3136
|
let lastAttemptResult;
|
|
@@ -3114,7 +3165,7 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3114
3165
|
);
|
|
3115
3166
|
}
|
|
3116
3167
|
}
|
|
3117
|
-
const args = [...baseArgs, prompt];
|
|
3168
|
+
const args = [...baseArgs, ...imageFileArgs, prompt];
|
|
3118
3169
|
console.log(
|
|
3119
3170
|
`[executeWithOpenCode] Spawning attempt ${attempt}: opencode`,
|
|
3120
3171
|
args.slice(0, 5)
|
|
@@ -3597,10 +3648,26 @@ async function executeWithAiSdk(context) {
|
|
|
3597
3648
|
}
|
|
3598
3649
|
};
|
|
3599
3650
|
const stepTimestamps = [];
|
|
3600
|
-
const
|
|
3651
|
+
const { triggerPromptImages } = context;
|
|
3652
|
+
const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
|
|
3653
|
+
const generateTextParams = {
|
|
3601
3654
|
model,
|
|
3602
3655
|
system: systemPrompt,
|
|
3603
|
-
|
|
3656
|
+
...hasImages ? {
|
|
3657
|
+
messages: [
|
|
3658
|
+
{
|
|
3659
|
+
role: "user",
|
|
3660
|
+
content: [
|
|
3661
|
+
{ type: "text", text: scenario.triggerPrompt },
|
|
3662
|
+
...triggerPromptImages.map((img) => ({
|
|
3663
|
+
type: "image",
|
|
3664
|
+
image: img.base64,
|
|
3665
|
+
mediaType: img.mediaType
|
|
3666
|
+
}))
|
|
3667
|
+
]
|
|
3668
|
+
}
|
|
3669
|
+
]
|
|
3670
|
+
} : { prompt: scenario.triggerPrompt },
|
|
3604
3671
|
temperature: supportsThinking ? void 0 : modelConfig.temperature,
|
|
3605
3672
|
maxOutputTokens: modelConfig.maxTokens,
|
|
3606
3673
|
tools: mcpTools,
|
|
@@ -3633,7 +3700,8 @@ async function executeWithAiSdk(context) {
|
|
|
3633
3700
|
);
|
|
3634
3701
|
}
|
|
3635
3702
|
}
|
|
3636
|
-
}
|
|
3703
|
+
};
|
|
3704
|
+
const result = await (0, import_ai.generateText)(generateTextParams);
|
|
3637
3705
|
const durationMs = Date.now() - startTime;
|
|
3638
3706
|
const usage = {
|
|
3639
3707
|
inputTokens: result.usage.inputTokens ?? 0,
|
|
@@ -4577,7 +4645,8 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
4577
4645
|
mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
|
|
4578
4646
|
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0,
|
|
4579
4647
|
rules: evalData.rules?.length > 0 ? evalData.rules : void 0,
|
|
4580
|
-
systemPrompt: agent?.systemPrompt
|
|
4648
|
+
systemPrompt: agent?.systemPrompt,
|
|
4649
|
+
triggerPromptImages: scenario.triggerPromptImages && scenario.triggerPromptImages.length > 0 ? scenario.triggerPromptImages : void 0
|
|
4581
4650
|
};
|
|
4582
4651
|
const hasPrepare = !!adapter.prepareEnvironment;
|
|
4583
4652
|
const prePrepSnapshot = hasPrepare && workDir ? snapshotDirectory(workDir) : {};
|