@wix/evalforge-evaluator 0.155.0 → 0.157.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +27 -11
- package/build/index.js.map +3 -3
- package/build/index.mjs +29 -13
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/agents/timeout.d.ts +9 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -664,6 +664,14 @@ async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_git
|
|
|
664
664
|
}
|
|
665
665
|
}
|
|
666
666
|
|
|
667
|
+
// src/run-scenario/agents/timeout.ts
|
|
668
|
+
var UNLIMITED_RUN_TIMEOUT_MS = 60 * 6e4;
|
|
669
|
+
function resolveTimeoutMs(maxTurns, maxDurationMs) {
|
|
670
|
+
if (maxDurationMs !== void 0) return maxDurationMs;
|
|
671
|
+
if (maxTurns === void 0) return UNLIMITED_RUN_TIMEOUT_MS;
|
|
672
|
+
return Math.max(3e5, maxTurns * 6e4);
|
|
673
|
+
}
|
|
674
|
+
|
|
667
675
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
668
676
|
var import_crypto = require("crypto");
|
|
669
677
|
|
|
@@ -1294,7 +1302,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1294
1302
|
traceContext.authToken
|
|
1295
1303
|
);
|
|
1296
1304
|
}
|
|
1297
|
-
const SDK_TIMEOUT_MS = options.maxDurationMs
|
|
1305
|
+
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
1298
1306
|
let timeoutHandle;
|
|
1299
1307
|
let timedOut = false;
|
|
1300
1308
|
const HEARTBEAT_INTERVAL_MS = 1e4;
|
|
@@ -3097,7 +3105,7 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3097
3105
|
model: options.model
|
|
3098
3106
|
});
|
|
3099
3107
|
const maxTurns = options.maxTurns || void 0;
|
|
3100
|
-
const sdkTimeoutMs = options.maxDurationMs
|
|
3108
|
+
const sdkTimeoutMs = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
3101
3109
|
const { env, providerID, modelID } = await buildOpenCodeEnv({
|
|
3102
3110
|
model: options.model,
|
|
3103
3111
|
temperature: options.temperature,
|
|
@@ -3707,12 +3715,15 @@ async function executeWithAiSdk(context) {
|
|
|
3707
3715
|
emitStartEvent(traceContext, startTime);
|
|
3708
3716
|
}
|
|
3709
3717
|
const effectiveMaxTurns = cfg.maxTurns === 0 ? void 0 : cfg.maxTurns ?? DEFAULT_MAX_TOOL_STEPS;
|
|
3710
|
-
const SDK_TIMEOUT_MS =
|
|
3718
|
+
const SDK_TIMEOUT_MS = resolveTimeoutMs(
|
|
3719
|
+
effectiveMaxTurns,
|
|
3720
|
+
cfg.maxDurationMs
|
|
3721
|
+
);
|
|
3711
3722
|
const abortController = new AbortController();
|
|
3712
3723
|
const timeoutHandle = setTimeout(() => {
|
|
3713
3724
|
abortController.abort(
|
|
3714
3725
|
new Error(
|
|
3715
|
-
`Simple Agent
|
|
3726
|
+
`Simple Agent streamText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
|
|
3716
3727
|
)
|
|
3717
3728
|
);
|
|
3718
3729
|
}, SDK_TIMEOUT_MS);
|
|
@@ -3758,7 +3769,7 @@ async function executeWithAiSdk(context) {
|
|
|
3758
3769
|
const stepTimestamps = [];
|
|
3759
3770
|
const { triggerPromptImages } = context;
|
|
3760
3771
|
const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
|
|
3761
|
-
const result =
|
|
3772
|
+
const result = (0, import_ai.streamText)({
|
|
3762
3773
|
...topLevelExtras,
|
|
3763
3774
|
model,
|
|
3764
3775
|
abortSignal: abortController.signal,
|
|
@@ -3816,14 +3827,19 @@ async function executeWithAiSdk(context) {
|
|
|
3816
3827
|
}
|
|
3817
3828
|
}
|
|
3818
3829
|
});
|
|
3830
|
+
const [resolvedText, resolvedSteps, resolvedUsage] = await Promise.all([
|
|
3831
|
+
result.text,
|
|
3832
|
+
result.steps,
|
|
3833
|
+
result.usage
|
|
3834
|
+
]);
|
|
3819
3835
|
const durationMs = Date.now() - startTime;
|
|
3820
3836
|
const usage = {
|
|
3821
|
-
inputTokens:
|
|
3822
|
-
outputTokens:
|
|
3823
|
-
totalTokens:
|
|
3837
|
+
inputTokens: resolvedUsage.inputTokens ?? 0,
|
|
3838
|
+
outputTokens: resolvedUsage.outputTokens ?? 0,
|
|
3839
|
+
totalTokens: resolvedUsage.totalTokens ?? 0
|
|
3824
3840
|
};
|
|
3825
3841
|
const llmTrace = buildLLMTrace2(
|
|
3826
|
-
|
|
3842
|
+
resolvedSteps,
|
|
3827
3843
|
durationMs,
|
|
3828
3844
|
usage,
|
|
3829
3845
|
modelId,
|
|
@@ -3836,12 +3852,12 @@ async function executeWithAiSdk(context) {
|
|
|
3836
3852
|
}
|
|
3837
3853
|
const conversation = buildConversation3(
|
|
3838
3854
|
scenario.triggerPrompt,
|
|
3839
|
-
|
|
3855
|
+
resolvedSteps,
|
|
3840
3856
|
startTime,
|
|
3841
3857
|
stepTimestamps
|
|
3842
3858
|
);
|
|
3843
3859
|
return {
|
|
3844
|
-
outputText:
|
|
3860
|
+
outputText: resolvedText,
|
|
3845
3861
|
durationMs,
|
|
3846
3862
|
usage,
|
|
3847
3863
|
llmTrace,
|