@wix/evalforge-evaluator 0.198.0 → 0.200.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -7424,6 +7424,46 @@ function defaultExec(cmd, args, opts) {
7424
7424
  });
7425
7425
  });
7426
7426
  }
7427
+ function createLoggingInstallExec(onProgress) {
7428
+ return (cmd, args, opts) => new Promise((resolve3, reject) => {
7429
+ const child = (0, import_child_process.spawn)(cmd, args, {
7430
+ cwd: opts.cwd,
7431
+ env: opts.env,
7432
+ stdio: ["ignore", "pipe", "pipe"]
7433
+ });
7434
+ let output = "";
7435
+ const collect = (chunk) => {
7436
+ output += chunk.toString("utf8");
7437
+ };
7438
+ child.stdout?.on("data", collect);
7439
+ child.stderr?.on("data", collect);
7440
+ const timer = setTimeout(() => {
7441
+ child.kill("SIGKILL");
7442
+ reject(
7443
+ new Error(
7444
+ `${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
7445
+ )
7446
+ );
7447
+ }, opts.timeoutMs);
7448
+ child.on("error", (err) => {
7449
+ clearTimeout(timer);
7450
+ reject(err);
7451
+ });
7452
+ child.on("close", (code) => {
7453
+ clearTimeout(timer);
7454
+ const trimmed = output.trimEnd();
7455
+ if (trimmed.length > 0) {
7456
+ onProgress(`[install:output]
7457
+ ${trimmed}`);
7458
+ }
7459
+ if (code === 0) {
7460
+ resolve3();
7461
+ } else {
7462
+ reject(new Error(`${cmd} exited with code ${code}`));
7463
+ }
7464
+ });
7465
+ });
7466
+ }
7427
7467
  function detectPackageManager(workDir) {
7428
7468
  if ((0, import_fs.existsSync)(import_path2.default.join(workDir, "pnpm-lock.yaml"))) {
7429
7469
  return {
@@ -7712,7 +7752,8 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7712
7752
  writeWixEnvFile(workDir2);
7713
7753
  onProgress("[diag] entering installDependencies");
7714
7754
  await installDependencies(workDir2, onProgress, {
7715
- cacheBase: nodeModulesCacheDir
7755
+ cacheBase: nodeModulesCacheDir,
7756
+ exec: createLoggingInstallExec(onProgress)
7716
7757
  });
7717
7758
  onProgress("[diag] installDependencies returned");
7718
7759
  onProgress("Environment ready");
@@ -10980,22 +11021,38 @@ async function executeWithAiSdk(context) {
10980
11021
  ...topLevelExtras,
10981
11022
  model,
10982
11023
  abortSignal: abortController.signal,
10983
- system: systemPrompt,
10984
- ...hasImages ? {
10985
- messages: [
11024
+ // Anthropic prompt caching: the Vercel AI SDK can't attach cache_control
11025
+ // to the top-level `system` string, so the system prompt is sent as a
11026
+ // `system` message carrying an ephemeral cache breakpoint (Anthropic only).
11027
+ // Anthropic renders tools -> system -> messages, so a breakpoint on the
11028
+ // system block caches the tool definitions too — high value for MCP
11029
+ // scenarios that share a large tool set + system prompt across runs.
11030
+ messages: [
11031
+ ...systemPrompt ? [
10986
11032
  {
10987
- role: "user",
10988
- content: [
10989
- { type: "text", text: scenario.triggerPrompt },
10990
- ...triggerPromptImages.map((img) => ({
10991
- type: "image",
10992
- image: img.base64,
10993
- mediaType: img.mediaType
10994
- }))
10995
- ]
11033
+ role: "system",
11034
+ content: systemPrompt,
11035
+ ...isAnthropic ? {
11036
+ providerOptions: {
11037
+ anthropic: {
11038
+ cacheControl: { type: "ephemeral" }
11039
+ }
11040
+ }
11041
+ } : {}
10996
11042
  }
10997
- ]
10998
- } : { prompt: scenario.triggerPrompt },
11043
+ ] : [],
11044
+ {
11045
+ role: "user",
11046
+ content: hasImages ? [
11047
+ { type: "text", text: scenario.triggerPrompt },
11048
+ ...triggerPromptImages.map((img) => ({
11049
+ type: "image",
11050
+ image: img.base64,
11051
+ mediaType: img.mediaType
11052
+ }))
11053
+ ] : scenario.triggerPrompt
11054
+ }
11055
+ ],
10999
11056
  temperature: supportsThinking ? void 0 : cfg.temperature,
11000
11057
  topP: supportsThinking ? void 0 : cfg.topP,
11001
11058
  frequencyPenalty: cfg.frequencyPenalty,