@wix/evalforge-evaluator 0.155.0 → 0.157.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -659,6 +659,14 @@ async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2)
659
659
  }
660
660
  }
661
661
 
662
+ // src/run-scenario/agents/timeout.ts
663
+ var UNLIMITED_RUN_TIMEOUT_MS = 60 * 6e4;
664
+ function resolveTimeoutMs(maxTurns, maxDurationMs) {
665
+ if (maxDurationMs !== void 0) return maxDurationMs;
666
+ if (maxTurns === void 0) return UNLIMITED_RUN_TIMEOUT_MS;
667
+ return Math.max(3e5, maxTurns * 6e4);
668
+ }
669
+
662
670
  // src/run-scenario/agents/claude-code/execute.ts
663
671
  import { randomUUID } from "crypto";
664
672
 
@@ -1291,7 +1299,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
1291
1299
  traceContext.authToken
1292
1300
  );
1293
1301
  }
1294
- const SDK_TIMEOUT_MS = options.maxDurationMs ?? Math.max(3e5, (maxTurns ?? 10) * 6e4);
1302
+ const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
1295
1303
  let timeoutHandle;
1296
1304
  let timedOut = false;
1297
1305
  const HEARTBEAT_INTERVAL_MS = 1e4;
@@ -3104,7 +3112,7 @@ async function executeWithOpenCode(skills, scenario, options) {
3104
3112
  model: options.model
3105
3113
  });
3106
3114
  const maxTurns = options.maxTurns || void 0;
3107
- const sdkTimeoutMs = options.maxDurationMs ?? Math.max(3e5, (maxTurns ?? 10) * 6e4);
3115
+ const sdkTimeoutMs = resolveTimeoutMs(maxTurns, options.maxDurationMs);
3108
3116
  const { env, providerID, modelID } = await buildOpenCodeEnv({
3109
3117
  model: options.model,
3110
3118
  temperature: options.temperature,
@@ -3396,8 +3404,8 @@ defaultRegistry.register(openCodeAdapter);
3396
3404
  // src/run-scenario/agents/simple-agent/execute.ts
3397
3405
  import {
3398
3406
  APICallError,
3399
- generateText,
3400
- stepCountIs
3407
+ stepCountIs,
3408
+ streamText
3401
3409
  } from "ai";
3402
3410
  import { createAnthropic } from "@ai-sdk/anthropic";
3403
3411
  import { createGoogleGenerativeAI } from "@ai-sdk/google";
@@ -3726,12 +3734,15 @@ async function executeWithAiSdk(context) {
3726
3734
  emitStartEvent(traceContext, startTime);
3727
3735
  }
3728
3736
  const effectiveMaxTurns = cfg.maxTurns === 0 ? void 0 : cfg.maxTurns ?? DEFAULT_MAX_TOOL_STEPS;
3729
- const SDK_TIMEOUT_MS = cfg.maxDurationMs ?? Math.max(3e5, (effectiveMaxTurns ?? 25) * 6e4);
3737
+ const SDK_TIMEOUT_MS = resolveTimeoutMs(
3738
+ effectiveMaxTurns,
3739
+ cfg.maxDurationMs
3740
+ );
3730
3741
  const abortController = new AbortController();
3731
3742
  const timeoutHandle = setTimeout(() => {
3732
3743
  abortController.abort(
3733
3744
  new Error(
3734
- `Simple Agent generateText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
3745
+ `Simple Agent streamText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
3735
3746
  )
3736
3747
  );
3737
3748
  }, SDK_TIMEOUT_MS);
@@ -3777,7 +3788,7 @@ async function executeWithAiSdk(context) {
3777
3788
  const stepTimestamps = [];
3778
3789
  const { triggerPromptImages } = context;
3779
3790
  const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
3780
- const result = await generateText({
3791
+ const result = streamText({
3781
3792
  ...topLevelExtras,
3782
3793
  model,
3783
3794
  abortSignal: abortController.signal,
@@ -3835,14 +3846,19 @@ async function executeWithAiSdk(context) {
3835
3846
  }
3836
3847
  }
3837
3848
  });
3849
+ const [resolvedText, resolvedSteps, resolvedUsage] = await Promise.all([
3850
+ result.text,
3851
+ result.steps,
3852
+ result.usage
3853
+ ]);
3838
3854
  const durationMs = Date.now() - startTime;
3839
3855
  const usage = {
3840
- inputTokens: result.usage.inputTokens ?? 0,
3841
- outputTokens: result.usage.outputTokens ?? 0,
3842
- totalTokens: result.usage.totalTokens ?? 0
3856
+ inputTokens: resolvedUsage.inputTokens ?? 0,
3857
+ outputTokens: resolvedUsage.outputTokens ?? 0,
3858
+ totalTokens: resolvedUsage.totalTokens ?? 0
3843
3859
  };
3844
3860
  const llmTrace = buildLLMTrace2(
3845
- result.steps,
3861
+ resolvedSteps,
3846
3862
  durationMs,
3847
3863
  usage,
3848
3864
  modelId,
@@ -3855,12 +3871,12 @@ async function executeWithAiSdk(context) {
3855
3871
  }
3856
3872
  const conversation = buildConversation3(
3857
3873
  scenario.triggerPrompt,
3858
- result.steps,
3874
+ resolvedSteps,
3859
3875
  startTime,
3860
3876
  stepTimestamps
3861
3877
  );
3862
3878
  return {
3863
- outputText: result.text,
3879
+ outputText: resolvedText,
3864
3880
  durationMs,
3865
3881
  usage,
3866
3882
  llmTrace,