@wix/evalforge-evaluator 0.155.0 → 0.157.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +27 -11
- package/build/index.js.map +3 -3
- package/build/index.mjs +29 -13
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/agents/timeout.d.ts +9 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -659,6 +659,14 @@ async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchGitHubFolder2)
|
|
|
659
659
|
}
|
|
660
660
|
}
|
|
661
661
|
|
|
662
|
+
// src/run-scenario/agents/timeout.ts
|
|
663
|
+
var UNLIMITED_RUN_TIMEOUT_MS = 60 * 6e4;
|
|
664
|
+
function resolveTimeoutMs(maxTurns, maxDurationMs) {
|
|
665
|
+
if (maxDurationMs !== void 0) return maxDurationMs;
|
|
666
|
+
if (maxTurns === void 0) return UNLIMITED_RUN_TIMEOUT_MS;
|
|
667
|
+
return Math.max(3e5, maxTurns * 6e4);
|
|
668
|
+
}
|
|
669
|
+
|
|
662
670
|
// src/run-scenario/agents/claude-code/execute.ts
|
|
663
671
|
import { randomUUID } from "crypto";
|
|
664
672
|
|
|
@@ -1291,7 +1299,7 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
1291
1299
|
traceContext.authToken
|
|
1292
1300
|
);
|
|
1293
1301
|
}
|
|
1294
|
-
const SDK_TIMEOUT_MS = options.maxDurationMs
|
|
1302
|
+
const SDK_TIMEOUT_MS = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
1295
1303
|
let timeoutHandle;
|
|
1296
1304
|
let timedOut = false;
|
|
1297
1305
|
const HEARTBEAT_INTERVAL_MS = 1e4;
|
|
@@ -3104,7 +3112,7 @@ async function executeWithOpenCode(skills, scenario, options) {
|
|
|
3104
3112
|
model: options.model
|
|
3105
3113
|
});
|
|
3106
3114
|
const maxTurns = options.maxTurns || void 0;
|
|
3107
|
-
const sdkTimeoutMs = options.maxDurationMs
|
|
3115
|
+
const sdkTimeoutMs = resolveTimeoutMs(maxTurns, options.maxDurationMs);
|
|
3108
3116
|
const { env, providerID, modelID } = await buildOpenCodeEnv({
|
|
3109
3117
|
model: options.model,
|
|
3110
3118
|
temperature: options.temperature,
|
|
@@ -3396,8 +3404,8 @@ defaultRegistry.register(openCodeAdapter);
|
|
|
3396
3404
|
// src/run-scenario/agents/simple-agent/execute.ts
|
|
3397
3405
|
import {
|
|
3398
3406
|
APICallError,
|
|
3399
|
-
|
|
3400
|
-
|
|
3407
|
+
stepCountIs,
|
|
3408
|
+
streamText
|
|
3401
3409
|
} from "ai";
|
|
3402
3410
|
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
3403
3411
|
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
@@ -3726,12 +3734,15 @@ async function executeWithAiSdk(context) {
|
|
|
3726
3734
|
emitStartEvent(traceContext, startTime);
|
|
3727
3735
|
}
|
|
3728
3736
|
const effectiveMaxTurns = cfg.maxTurns === 0 ? void 0 : cfg.maxTurns ?? DEFAULT_MAX_TOOL_STEPS;
|
|
3729
|
-
const SDK_TIMEOUT_MS =
|
|
3737
|
+
const SDK_TIMEOUT_MS = resolveTimeoutMs(
|
|
3738
|
+
effectiveMaxTurns,
|
|
3739
|
+
cfg.maxDurationMs
|
|
3740
|
+
);
|
|
3730
3741
|
const abortController = new AbortController();
|
|
3731
3742
|
const timeoutHandle = setTimeout(() => {
|
|
3732
3743
|
abortController.abort(
|
|
3733
3744
|
new Error(
|
|
3734
|
-
`Simple Agent
|
|
3745
|
+
`Simple Agent streamText timed out after ${SDK_TIMEOUT_MS}ms (model=${modelId}, scenario=${scenario.name})`
|
|
3735
3746
|
)
|
|
3736
3747
|
);
|
|
3737
3748
|
}, SDK_TIMEOUT_MS);
|
|
@@ -3777,7 +3788,7 @@ async function executeWithAiSdk(context) {
|
|
|
3777
3788
|
const stepTimestamps = [];
|
|
3778
3789
|
const { triggerPromptImages } = context;
|
|
3779
3790
|
const hasImages = triggerPromptImages && triggerPromptImages.length > 0;
|
|
3780
|
-
const result =
|
|
3791
|
+
const result = streamText({
|
|
3781
3792
|
...topLevelExtras,
|
|
3782
3793
|
model,
|
|
3783
3794
|
abortSignal: abortController.signal,
|
|
@@ -3835,14 +3846,19 @@ async function executeWithAiSdk(context) {
|
|
|
3835
3846
|
}
|
|
3836
3847
|
}
|
|
3837
3848
|
});
|
|
3849
|
+
const [resolvedText, resolvedSteps, resolvedUsage] = await Promise.all([
|
|
3850
|
+
result.text,
|
|
3851
|
+
result.steps,
|
|
3852
|
+
result.usage
|
|
3853
|
+
]);
|
|
3838
3854
|
const durationMs = Date.now() - startTime;
|
|
3839
3855
|
const usage = {
|
|
3840
|
-
inputTokens:
|
|
3841
|
-
outputTokens:
|
|
3842
|
-
totalTokens:
|
|
3856
|
+
inputTokens: resolvedUsage.inputTokens ?? 0,
|
|
3857
|
+
outputTokens: resolvedUsage.outputTokens ?? 0,
|
|
3858
|
+
totalTokens: resolvedUsage.totalTokens ?? 0
|
|
3843
3859
|
};
|
|
3844
3860
|
const llmTrace = buildLLMTrace2(
|
|
3845
|
-
|
|
3861
|
+
resolvedSteps,
|
|
3846
3862
|
durationMs,
|
|
3847
3863
|
usage,
|
|
3848
3864
|
modelId,
|
|
@@ -3855,12 +3871,12 @@ async function executeWithAiSdk(context) {
|
|
|
3855
3871
|
}
|
|
3856
3872
|
const conversation = buildConversation3(
|
|
3857
3873
|
scenario.triggerPrompt,
|
|
3858
|
-
|
|
3874
|
+
resolvedSteps,
|
|
3859
3875
|
startTime,
|
|
3860
3876
|
stepTimestamps
|
|
3861
3877
|
);
|
|
3862
3878
|
return {
|
|
3863
|
-
outputText:
|
|
3879
|
+
outputText: resolvedText,
|
|
3864
3880
|
durationMs,
|
|
3865
3881
|
usage,
|
|
3866
3882
|
llmTrace,
|