@wix/evalforge-evaluator 0.198.0 → 0.200.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +72 -15
- package/build/index.js.map +2 -2
- package/build/index.mjs +72 -15
- package/build/index.mjs.map +2 -2
- package/build/types/run-scenario/install-dependencies.d.ts +7 -0
- package/package.json +3 -3
package/build/index.js
CHANGED
|
@@ -7424,6 +7424,46 @@ function defaultExec(cmd, args, opts) {
|
|
|
7424
7424
|
});
|
|
7425
7425
|
});
|
|
7426
7426
|
}
|
|
7427
|
+
function createLoggingInstallExec(onProgress) {
|
|
7428
|
+
return (cmd, args, opts) => new Promise((resolve3, reject) => {
|
|
7429
|
+
const child = (0, import_child_process.spawn)(cmd, args, {
|
|
7430
|
+
cwd: opts.cwd,
|
|
7431
|
+
env: opts.env,
|
|
7432
|
+
stdio: ["ignore", "pipe", "pipe"]
|
|
7433
|
+
});
|
|
7434
|
+
let output = "";
|
|
7435
|
+
const collect = (chunk) => {
|
|
7436
|
+
output += chunk.toString("utf8");
|
|
7437
|
+
};
|
|
7438
|
+
child.stdout?.on("data", collect);
|
|
7439
|
+
child.stderr?.on("data", collect);
|
|
7440
|
+
const timer = setTimeout(() => {
|
|
7441
|
+
child.kill("SIGKILL");
|
|
7442
|
+
reject(
|
|
7443
|
+
new Error(
|
|
7444
|
+
`${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
|
|
7445
|
+
)
|
|
7446
|
+
);
|
|
7447
|
+
}, opts.timeoutMs);
|
|
7448
|
+
child.on("error", (err) => {
|
|
7449
|
+
clearTimeout(timer);
|
|
7450
|
+
reject(err);
|
|
7451
|
+
});
|
|
7452
|
+
child.on("close", (code) => {
|
|
7453
|
+
clearTimeout(timer);
|
|
7454
|
+
const trimmed = output.trimEnd();
|
|
7455
|
+
if (trimmed.length > 0) {
|
|
7456
|
+
onProgress(`[install:output]
|
|
7457
|
+
${trimmed}`);
|
|
7458
|
+
}
|
|
7459
|
+
if (code === 0) {
|
|
7460
|
+
resolve3();
|
|
7461
|
+
} else {
|
|
7462
|
+
reject(new Error(`${cmd} exited with code ${code}`));
|
|
7463
|
+
}
|
|
7464
|
+
});
|
|
7465
|
+
});
|
|
7466
|
+
}
|
|
7427
7467
|
function detectPackageManager(workDir) {
|
|
7428
7468
|
if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "pnpm-lock.yaml"))) {
|
|
7429
7469
|
return {
|
|
@@ -7712,7 +7752,8 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
|
|
|
7712
7752
|
writeWixEnvFile(workDir2);
|
|
7713
7753
|
onProgress("[diag] entering installDependencies");
|
|
7714
7754
|
await installDependencies(workDir2, onProgress, {
|
|
7715
|
-
cacheBase: nodeModulesCacheDir
|
|
7755
|
+
cacheBase: nodeModulesCacheDir,
|
|
7756
|
+
exec: createLoggingInstallExec(onProgress)
|
|
7716
7757
|
});
|
|
7717
7758
|
onProgress("[diag] installDependencies returned");
|
|
7718
7759
|
onProgress("Environment ready");
|
|
@@ -10980,22 +11021,38 @@ async function executeWithAiSdk(context) {
|
|
|
10980
11021
|
...topLevelExtras,
|
|
10981
11022
|
model,
|
|
10982
11023
|
abortSignal: abortController.signal,
|
|
10983
|
-
|
|
10984
|
-
|
|
10985
|
-
|
|
11024
|
+
// Anthropic prompt caching: the Vercel AI SDK can't attach cache_control
|
|
11025
|
+
// to the top-level `system` string, so the system prompt is sent as a
|
|
11026
|
+
// `system` message carrying an ephemeral cache breakpoint (Anthropic only).
|
|
11027
|
+
// Anthropic renders tools -> system -> messages, so a breakpoint on the
|
|
11028
|
+
// system block caches the tool definitions too — high value for MCP
|
|
11029
|
+
// scenarios that share a large tool set + system prompt across runs.
|
|
11030
|
+
messages: [
|
|
11031
|
+
...systemPrompt ? [
|
|
10986
11032
|
{
|
|
10987
|
-
role: "
|
|
10988
|
-
content:
|
|
10989
|
-
|
|
10990
|
-
|
|
10991
|
-
|
|
10992
|
-
|
|
10993
|
-
|
|
10994
|
-
}
|
|
10995
|
-
|
|
11033
|
+
role: "system",
|
|
11034
|
+
content: systemPrompt,
|
|
11035
|
+
...isAnthropic ? {
|
|
11036
|
+
providerOptions: {
|
|
11037
|
+
anthropic: {
|
|
11038
|
+
cacheControl: { type: "ephemeral" }
|
|
11039
|
+
}
|
|
11040
|
+
}
|
|
11041
|
+
} : {}
|
|
10996
11042
|
}
|
|
10997
|
-
]
|
|
10998
|
-
|
|
11043
|
+
] : [],
|
|
11044
|
+
{
|
|
11045
|
+
role: "user",
|
|
11046
|
+
content: hasImages ? [
|
|
11047
|
+
{ type: "text", text: scenario.triggerPrompt },
|
|
11048
|
+
...triggerPromptImages.map((img) => ({
|
|
11049
|
+
type: "image",
|
|
11050
|
+
image: img.base64,
|
|
11051
|
+
mediaType: img.mediaType
|
|
11052
|
+
}))
|
|
11053
|
+
] : scenario.triggerPrompt
|
|
11054
|
+
}
|
|
11055
|
+
],
|
|
10999
11056
|
temperature: supportsThinking ? void 0 : cfg.temperature,
|
|
11000
11057
|
topP: supportsThinking ? void 0 : cfg.topP,
|
|
11001
11058
|
frequencyPenalty: cfg.frequencyPenalty,
|