@wix/evalforge-evaluator 0.155.0 → 0.157.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -664,6 +664,14 @@ async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_git
664
664
  }
665
665
  }
666
666
 
667
+ // src/run-scenario/agents/timeout.ts
668
+ var UNLIMITED_RUN_TIMEOUT_MS = 60 * 6e4;
669
+ function resolveTimeoutMs(maxTurns, maxDurationMs) {
670
+ if (maxDurationMs !== void 0) return maxDurationMs;
671
+ if (maxTurns === void 0) return UNLIMITED_RUN_TIMEOUT_MS;
672
+ return Math.max(3e5, maxTurns * 6e4);
673
+ }
674
+
667
675
  // src/run-scenario/agents/claude-code/execute.ts
668
676
  var import_crypto = require("crypto");
669
677
 
@@ -1294,7 +1302,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
1294
1302
  traceContext.authToken
1295
1303
  );
1296
1304
  }
1297
- const SDK_TIMEOUT_MS = options.maxDurationMs ?? Math.max(3e5, (maxTurns ?? 10) * 6e4);
1305
+ const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
1298
1306
  let timeoutHandle;
1299
1307
  let timedOut = false;
1300
1308
  const HEARTBEAT_INTERVAL_MS = 1e4;
@@ -3097,7 +3105,7 @@ async function executeWithOpenCode(skills, scenario, options) {
3097
3105
  model: options.model
3098
3106
  });
3099
3107
  const maxTurns = options.maxTurns || void 0;
3100
- const sdkTimeoutMs = options.maxDurationMs ?? Math.max(3e5, (maxTurns ?? 10) * 6e4);
3108
+ const sdkTimeoutMs = resolveTimeoutMs(maxTurns, options.maxDurationMs);
3101
3109
  const { env, providerID, modelID } = await buildOpenCodeEnv({
3102
3110
  model: options.model,
3103
3111
  temperature: options.temperature,
@@ -3707,12 +3715,15 @@ async function executeWithAiSdk(context) {
3707
3715
  emitStartEvent(traceContext, startTime);
3708
3716
  }
3709
3717
  const effectiveMaxTurns = cfg.maxTurns === 0 ? void 0 : cfg.maxTurns ?? DEFAULT_MAX_TOOL_STEPS;
3710
- const SDK_TIMEOUT_MS = cfg.maxDurationMs ?? Math.max(3e5, (effectiveMaxTurns ?? 25) * 6e4);
3718
+ const SDK_TIMEOUT_MS = resolveTimeoutMs(
3719
+ effectiveMaxTurns,
3720
+ cfg.maxDurationMs
3721
+ );
3711
3722
  const abortController = new AbortController();
3712
3723
  const timeoutHandle = setTimeout(() => {
3713
3724
  abortController.abort(
3714
3725
  new Error(
3715
- `Simple Agent generateText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
3726
+ `Simple Agent streamText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
3716
3727
  )
3717
3728
  );
3718
3729
  }, SDK_TIMEOUT_MS);
@@ -3758,7 +3769,7 @@ async function executeWithAiSdk(context) {
3758
3769
  const stepTimestamps = [];
3759
3770
  const { triggerPromptImages } = context;
3760
3771
  const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
3761
- const result = await (0, import_ai.generateText)({
3772
+ const result = (0, import_ai.streamText)({
3762
3773
  ...topLevelExtras,
3763
3774
  model,
3764
3775
  abortSignal: abortController.signal,
@@ -3816,14 +3827,19 @@ async function executeWithAiSdk(context) {
3816
3827
  }
3817
3828
  }
3818
3829
  });
3830
+ const [resolvedText, resolvedSteps, resolvedUsage] = await Promise.all([
3831
+ result.text,
3832
+ result.steps,
3833
+ result.usage
3834
+ ]);
3819
3835
  const durationMs = Date.now() - startTime;
3820
3836
  const usage = {
3821
- inputTokens: result.usage.inputTokens ?? 0,
3822
- outputTokens: result.usage.outputTokens ?? 0,
3823
- totalTokens: result.usage.totalTokens ?? 0
3837
+ inputTokens: resolvedUsage.inputTokens ?? 0,
3838
+ outputTokens: resolvedUsage.outputTokens ?? 0,
3839
+ totalTokens: resolvedUsage.totalTokens ?? 0
3824
3840
  };
3825
3841
  const llmTrace = buildLLMTrace2(
3826
- result.steps,
3842
+ resolvedSteps,
3827
3843
  durationMs,
3828
3844
  usage,
3829
3845
  modelId,
@@ -3836,12 +3852,12 @@ async function executeWithAiSdk(context) {
3836
3852
  }
3837
3853
  const conversation = buildConversation3(
3838
3854
  scenario.triggerPrompt,
3839
- result.steps,
3855
+ resolvedSteps,
3840
3856
  startTime,
3841
3857
  stepTimestamps
3842
3858
  );
3843
3859
  return {
3844
- outputText: result.text,
3860
+ outputText: resolvedText,
3845
3861
  durationMs,
3846
3862
  usage,
3847
3863
  llmTrace,