@wix/evalforge-evaluator 0.198.0 → 0.200.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -7453,6 +7453,46 @@ function defaultExec(cmd, args, opts) {
7453
7453
  });
7454
7454
  });
7455
7455
  }
7456
+ function createLoggingInstallExec(onProgress) {
7457
+ return (cmd, args, opts) => new Promise((resolve3, reject) => {
7458
+ const child = spawn(cmd, args, {
7459
+ cwd: opts.cwd,
7460
+ env: opts.env,
7461
+ stdio: ["ignore", "pipe", "pipe"]
7462
+ });
7463
+ let output = "";
7464
+ const collect = (chunk) => {
7465
+ output += chunk.toString("utf8");
7466
+ };
7467
+ child.stdout?.on("data", collect);
7468
+ child.stderr?.on("data", collect);
7469
+ const timer = setTimeout(() => {
7470
+ child.kill("SIGKILL");
7471
+ reject(
7472
+ new Error(
7473
+ `${cmd} ${args.join(" ")} timed out after ${opts.timeoutMs}ms`
7474
+ )
7475
+ );
7476
+ }, opts.timeoutMs);
7477
+ child.on("error", (err) => {
7478
+ clearTimeout(timer);
7479
+ reject(err);
7480
+ });
7481
+ child.on("close", (code) => {
7482
+ clearTimeout(timer);
7483
+ const trimmed = output.trimEnd();
7484
+ if (trimmed.length > 0) {
7485
+ onProgress(`[install:output]
7486
+ ${trimmed}`);
7487
+ }
7488
+ if (code === 0) {
7489
+ resolve3();
7490
+ } else {
7491
+ reject(new Error(`${cmd} exited with code ${code}`));
7492
+ }
7493
+ });
7494
+ });
7495
+ }
7456
7496
  function detectPackageManager(workDir) {
7457
7497
  if (existsSync(path.join(workDir, "pnpm-lock.yaml"))) {
7458
7498
  return {
@@ -7741,7 +7781,8 @@ async function prepareWorkingDirectory(config, evalRunId2, targetId, scenarioId,
7741
7781
  writeWixEnvFile(workDir2);
7742
7782
  onProgress("[diag] entering installDependencies");
7743
7783
  await installDependencies(workDir2, onProgress, {
7744
- cacheBase: nodeModulesCacheDir
7784
+ cacheBase: nodeModulesCacheDir,
7785
+ exec: createLoggingInstallExec(onProgress)
7745
7786
  });
7746
7787
  onProgress("[diag] installDependencies returned");
7747
7788
  onProgress("Environment ready");
@@ -11039,22 +11080,38 @@ async function executeWithAiSdk(context) {
11039
11080
  ...topLevelExtras,
11040
11081
  model,
11041
11082
  abortSignal: abortController.signal,
11042
- system: systemPrompt,
11043
- ...hasImages ? {
11044
- messages: [
11083
+ // Anthropic prompt caching: the Vercel AI SDK can't attach cache_control
11084
+ // to the top-level `system` string, so the system prompt is sent as a
11085
+ // `system` message carrying an ephemeral cache breakpoint (Anthropic only).
11086
+ // Anthropic renders tools -> system -> messages, so a breakpoint on the
11087
+ // system block caches the tool definitions too — high value for MCP
11088
+ // scenarios that share a large tool set + system prompt across runs.
11089
+ messages: [
11090
+ ...systemPrompt ? [
11045
11091
  {
11046
- role: "user",
11047
- content: [
11048
- { type: "text", text: scenario.triggerPrompt },
11049
- ...triggerPromptImages.map((img) => ({
11050
- type: "image",
11051
- image: img.base64,
11052
- mediaType: img.mediaType
11053
- }))
11054
- ]
11092
+ role: "system",
11093
+ content: systemPrompt,
11094
+ ...isAnthropic ? {
11095
+ providerOptions: {
11096
+ anthropic: {
11097
+ cacheControl: { type: "ephemeral" }
11098
+ }
11099
+ }
11100
+ } : {}
11055
11101
  }
11056
- ]
11057
- } : { prompt: scenario.triggerPrompt },
11102
+ ] : [],
11103
+ {
11104
+ role: "user",
11105
+ content: hasImages ? [
11106
+ { type: "text", text: scenario.triggerPrompt },
11107
+ ...triggerPromptImages.map((img) => ({
11108
+ type: "image",
11109
+ image: img.base64,
11110
+ mediaType: img.mediaType
11111
+ }))
11112
+ ] : scenario.triggerPrompt
11113
+ }
11114
+ ],
11058
11115
  temperature: supportsThinking ? void 0 : cfg.temperature,
11059
11116
  topP: supportsThinking ? void 0 : cfg.topP,
11060
11117
  frequencyPenalty: cfg.frequencyPenalty,