@wix/evalforge-evaluator 0.198.0 → 0.200.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +72 -15
- package/build/index.js.map +2 -2
- package/build/index.mjs +72 -15
- package/build/index.mjs.map +2 -2
- package/build/types/run-scenario/install-dependencies.d.ts +7 -0
- package/package.json +3 -3
package/build/index.mjs
CHANGED
|
@@ -7453,6 +7453,46 @@ function defaultExec(cmd, args, opts) {
|
|
|
7453
7453
|
});
|
|
7454
7454
|
});
|
|
7455
7455
|
}
|
|
7456
|
+
function createLoggingInstallExec(onProgress) {
|
|
7457
|
+
return (cmd, args, opts) => new Promise((resolve3, reject) => {
|
|
7458
|
+
const child = spawn(cmd, args, {
|
|
7459
|
+
cwd: opts.cwd,
|
|
7460
|
+
env: opts.env,
|
|
7461
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
7462
|
+
});
|
|
7463
|
+
let output = "";
|
|
7464
|
+
const collect = (chunk) => {
|
|
7465
|
+
output += chunk.toString("utf8");
|
|
7466
|
+
};
|
|
7467
|
+
child.stdout?.on("data", collect);
|
|
7468
|
+
child.stderr?.on("data", collect);
|
|
7469
|
+
const timer = setTimeout(() => {
|
|
7470
|
+
child.kill("SIGKILL");
|
|
7471
|
+
reject(
|
|
7472
|
+
new Error(
|
|
7473
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7474
|
+
)
|
|
7475
|
+
);
|
|
7476
|
+
}, opts.timeoutMs);
|
|
7477
|
+
child.on("error", (err) => {
|
|
7478
|
+
clearTimeout(timer);
|
|
7479
|
+
reject(err);
|
|
7480
|
+
});
|
|
7481
|
+
child.on("close", (code) => {
|
|
7482
|
+
clearTimeout(timer);
|
|
7483
|
+
const trimmed = output.trimEnd();
|
|
7484
|
+
if (trimmed.length > 0) {
|
|
7485
|
+
onProgress(`[install:output]
|
|
7486
|
+
${trimmed}`);
|
|
7487
|
+
}
|
|
7488
|
+
if (code === 0) {
|
|
7489
|
+
resolve3();
|
|
7490
|
+
} else {
|
|
7491
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7492
|
+
}
|
|
7493
|
+
});
|
|
7494
|
+
});
|
|
7495
|
+
}
|
|
7456
7496
|
function detectPackageManager(workDir) {
|
|
7457
7497
|
if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
|
|
7458
7498
|
return {
|
|
@@ -7741,7 +7781,8 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7741
7781
|
writeWixEnvFile(workDir2);
|
|
7742
7782
|
onProgress("[diag] entering installDependencies");
|
|
7743
7783
|
await installDependencies(workDir2, onProgress, {
|
|
7744
|
-
cacheBase: nodeModulesCacheDir
|
|
7784
|
+
cacheBase: nodeModulesCacheDir,
|
|
7785
|
+
exec: createLoggingInstallExec(onProgress)
|
|
7745
7786
|
});
|
|
7746
7787
|
onProgress("[diag] installDependencies returned");
|
|
7747
7788
|
onProgress("Environment ready");
|
|
@@ -11039,22 +11080,38 @@ async function executeWithAiSdk(context) {
|
|
|
11039
11080
|
...topLevelExtras,
|
|
11040
11081
|
model,
|
|
11041
11082
|
abortSignal: abortController.signal,
|
|
11042
|
-
|
|
11043
|
-
|
|
11044
|
-
|
|
11083
|
+
// Anthropic prompt caching: the Vercel AI SDK can't attach cache_control
|
|
11084
|
+
// to the top-level `system` string, so the system prompt is sent as a
|
|
11085
|
+
// `system` message carrying an ephemeral cache breakpoint (Anthropic only).
|
|
11086
|
+
// Anthropic renders tools -> system -> messages, so a breakpoint on the
|
|
11087
|
+
// system block caches the tool definitions too — high value for MCP
|
|
11088
|
+
// scenarios that share a large tool set + system prompt across runs.
|
|
11089
|
+
messages: [
|
|
11090
|
+
...systemPrompt ? [
|
|
11045
11091
|
{
|
|
11046
|
-
role: "
|
|
11047
|
-
content:
|
|
11048
|
-
|
|
11049
|
-
|
|
11050
|
-
|
|
11051
|
-
|
|
11052
|
-
|
|
11053
|
-
}
|
|
11054
|
-
|
|
11092
|
+
role: "system",
|
|
11093
|
+
content: systemPrompt,
|
|
11094
|
+
...isAnthropic ? {
|
|
11095
|
+
providerOptions: {
|
|
11096
|
+
anthropic: {
|
|
11097
|
+
cacheControl: { type: "ephemeral" }
|
|
11098
|
+
}
|
|
11099
|
+
}
|
|
11100
|
+
} : {}
|
|
11055
11101
|
}
|
|
11056
|
-
]
|
|
11057
|
-
|
|
11102
|
+
] : [],
|
|
11103
|
+
{
|
|
11104
|
+
role: "user",
|
|
11105
|
+
content: hasImages ? [
|
|
11106
|
+
{ type: "text", text: scenario.triggerPrompt },
|
|
11107
|
+
...triggerPromptImages.map((img) => ({
|
|
11108
|
+
type: "image",
|
|
11109
|
+
image: img.base64,
|
|
11110
|
+
mediaType: img.mediaType
|
|
11111
|
+
}))
|
|
11112
|
+
] : scenario.triggerPrompt
|
|
11113
|
+
}
|
|
11114
|
+
],
|
|
11058
11115
|
temperature: supportsThinking ? void 0 : cfg.temperature,
|
|
11059
11116
|
topP: supportsThinking ? void 0 : cfg.topP,
|
|
11060
11117
|
frequencyPenalty: cfg.frequencyPenalty,
|