@agentv/core 2.7.1-next.5 → 2.8.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-6W5E3VR6.js → chunk-P2465XAH.js} +24 -49
- package/dist/chunk-P2465XAH.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +28 -58
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +21 -44
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +295 -220
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +44 -42
- package/dist/index.d.ts +44 -42
- package/dist/index.js +273 -173
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-6W5E3VR6.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -2141,6 +2141,24 @@ function extractCacheConfig(suite) {
|
|
|
2141
2141
|
const resolvedCachePath = typeof cachePath === "string" && cachePath.trim().length > 0 ? cachePath.trim() : void 0;
|
|
2142
2142
|
return { enabled: cache, cachePath: resolvedCachePath };
|
|
2143
2143
|
}
|
|
2144
|
+
function extractTotalBudgetUsd(suite) {
|
|
2145
|
+
const execution = suite.execution;
|
|
2146
|
+
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
2147
|
+
return void 0;
|
|
2148
|
+
}
|
|
2149
|
+
const executionObj = execution;
|
|
2150
|
+
const rawBudget = executionObj.total_budget_usd ?? executionObj.totalBudgetUsd;
|
|
2151
|
+
if (rawBudget === void 0 || rawBudget === null) {
|
|
2152
|
+
return void 0;
|
|
2153
|
+
}
|
|
2154
|
+
if (typeof rawBudget === "number" && rawBudget > 0) {
|
|
2155
|
+
return rawBudget;
|
|
2156
|
+
}
|
|
2157
|
+
logWarning(
|
|
2158
|
+
`Invalid execution.total_budget_usd: ${rawBudget}. Must be a positive number. Ignoring.`
|
|
2159
|
+
);
|
|
2160
|
+
return void 0;
|
|
2161
|
+
}
|
|
2144
2162
|
function logWarning(message) {
|
|
2145
2163
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET2}`);
|
|
2146
2164
|
}
|
|
@@ -2273,24 +2291,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2273
2291
|
continue;
|
|
2274
2292
|
}
|
|
2275
2293
|
if (typeValue === "code_judge") {
|
|
2276
|
-
let
|
|
2277
|
-
const
|
|
2278
|
-
if (typeof
|
|
2279
|
-
const trimmed =
|
|
2294
|
+
let command;
|
|
2295
|
+
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
2296
|
+
if (typeof rawCommand === "string") {
|
|
2297
|
+
const trimmed = rawCommand.trim();
|
|
2280
2298
|
if (trimmed.length === 0) {
|
|
2281
2299
|
throw new Error(
|
|
2282
|
-
`Invalid code_judge
|
|
2300
|
+
`Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
|
|
2283
2301
|
);
|
|
2284
2302
|
}
|
|
2285
|
-
|
|
2303
|
+
command = parseCommandToArgv(trimmed);
|
|
2286
2304
|
} else {
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
`code_judge
|
|
2305
|
+
command = asStringArray(
|
|
2306
|
+
rawCommand,
|
|
2307
|
+
`code_judge command for evaluator '${name}' in '${evalId}'`
|
|
2290
2308
|
);
|
|
2291
2309
|
}
|
|
2292
|
-
if (!
|
|
2293
|
-
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing
|
|
2310
|
+
if (!command) {
|
|
2311
|
+
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
|
|
2294
2312
|
continue;
|
|
2295
2313
|
}
|
|
2296
2314
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
@@ -2335,6 +2353,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2335
2353
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
2336
2354
|
"name",
|
|
2337
2355
|
"type",
|
|
2356
|
+
"command",
|
|
2338
2357
|
"script",
|
|
2339
2358
|
"cwd",
|
|
2340
2359
|
"weight",
|
|
@@ -2351,7 +2370,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2351
2370
|
evaluators.push({
|
|
2352
2371
|
name,
|
|
2353
2372
|
type: "code",
|
|
2354
|
-
|
|
2373
|
+
command,
|
|
2355
2374
|
cwd,
|
|
2356
2375
|
resolvedCwd,
|
|
2357
2376
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
@@ -2953,20 +2972,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2953
2972
|
let resolvedPromptScript;
|
|
2954
2973
|
let promptScriptConfig;
|
|
2955
2974
|
if (isJsonObject2(rawPrompt)) {
|
|
2956
|
-
const
|
|
2957
|
-
rawPrompt.script,
|
|
2958
|
-
`prompt.
|
|
2975
|
+
const commandArray = asStringArray(
|
|
2976
|
+
rawPrompt.command ?? rawPrompt.script,
|
|
2977
|
+
`prompt.command for evaluator '${name}' in '${evalId}'`
|
|
2959
2978
|
);
|
|
2960
|
-
if (!
|
|
2961
|
-
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires
|
|
2979
|
+
if (!commandArray) {
|
|
2980
|
+
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
2962
2981
|
}
|
|
2963
|
-
const
|
|
2964
|
-
const resolved = await resolveFileReference2(
|
|
2982
|
+
const commandPath = commandArray[commandArray.length - 1];
|
|
2983
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
2965
2984
|
if (resolved.resolvedPath) {
|
|
2966
|
-
resolvedPromptScript = [...
|
|
2985
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
|
|
2967
2986
|
} else {
|
|
2968
2987
|
throw new Error(
|
|
2969
|
-
`Evaluator '${name}' in '${evalId}': prompt
|
|
2988
|
+
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
2970
2989
|
);
|
|
2971
2990
|
}
|
|
2972
2991
|
if (isJsonObject2(rawPrompt.config)) {
|
|
@@ -4197,6 +4216,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
4197
4216
|
trials: extractTrialsConfig(parsed),
|
|
4198
4217
|
targets: extractTargetsFromSuite(parsed),
|
|
4199
4218
|
cacheConfig: extractCacheConfig(parsed),
|
|
4219
|
+
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
4200
4220
|
...metadata !== void 0 && { metadata }
|
|
4201
4221
|
};
|
|
4202
4222
|
}
|
|
@@ -4387,16 +4407,16 @@ var loadEvalCaseById = loadTestById;
|
|
|
4387
4407
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
4388
4408
|
if (!isJsonObject(raw)) return void 0;
|
|
4389
4409
|
const obj = raw;
|
|
4390
|
-
const
|
|
4391
|
-
if (!Array.isArray(
|
|
4392
|
-
const
|
|
4393
|
-
if (
|
|
4410
|
+
const commandSource = obj.command ?? obj.script;
|
|
4411
|
+
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
4412
|
+
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
4413
|
+
if (commandArr.length === 0) return void 0;
|
|
4394
4414
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
4395
4415
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
4396
4416
|
if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
|
|
4397
4417
|
cwd = import_node_path8.default.resolve(evalFileDir, cwd);
|
|
4398
4418
|
}
|
|
4399
|
-
const config = {
|
|
4419
|
+
const config = { command: commandArr };
|
|
4400
4420
|
if (timeoutMs !== void 0) {
|
|
4401
4421
|
return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
|
|
4402
4422
|
}
|
|
@@ -5589,50 +5609,58 @@ var CliProvider = class {
|
|
|
5589
5609
|
await this.ensureHealthy(request.signal);
|
|
5590
5610
|
const effectiveCwd = request.cwd ?? this.config.cwd;
|
|
5591
5611
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
5592
|
-
const templateValues = buildTemplateValues(
|
|
5593
|
-
|
|
5612
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5613
|
+
request,
|
|
5614
|
+
this.config,
|
|
5615
|
+
outputFilePath
|
|
5616
|
+
);
|
|
5617
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
5594
5618
|
if (this.verbose) {
|
|
5595
5619
|
console.log(
|
|
5596
5620
|
`[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
5597
5621
|
);
|
|
5598
5622
|
}
|
|
5599
|
-
|
|
5600
|
-
|
|
5601
|
-
|
|
5602
|
-
env: process.env,
|
|
5603
|
-
timeoutMs: this.config.timeoutMs,
|
|
5604
|
-
signal: request.signal
|
|
5605
|
-
});
|
|
5606
|
-
const measuredDurationMs = Date.now() - startTime;
|
|
5607
|
-
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5608
|
-
if (request.signal?.aborted) {
|
|
5609
|
-
throw new Error("CLI provider request was aborted");
|
|
5610
|
-
}
|
|
5611
|
-
if (result.timedOut) {
|
|
5612
|
-
throw new Error(
|
|
5613
|
-
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5614
|
-
);
|
|
5615
|
-
}
|
|
5616
|
-
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5617
|
-
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5618
|
-
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5619
|
-
throw new Error(message);
|
|
5620
|
-
}
|
|
5621
|
-
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5622
|
-
const parsed = this.parseOutputContent(responseContent);
|
|
5623
|
-
return {
|
|
5624
|
-
output: parsed.output,
|
|
5625
|
-
tokenUsage: parsed.tokenUsage,
|
|
5626
|
-
costUsd: parsed.costUsd,
|
|
5627
|
-
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
5628
|
-
raw: {
|
|
5629
|
-
command: renderedCommand,
|
|
5630
|
-
stderr: result.stderr,
|
|
5631
|
-
exitCode: result.exitCode ?? 0,
|
|
5623
|
+
try {
|
|
5624
|
+
const startTime = Date.now();
|
|
5625
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5632
5626
|
cwd: effectiveCwd,
|
|
5633
|
-
|
|
5627
|
+
env: process.env,
|
|
5628
|
+
timeoutMs: this.config.timeoutMs,
|
|
5629
|
+
signal: request.signal
|
|
5630
|
+
});
|
|
5631
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
5632
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5633
|
+
if (request.signal?.aborted) {
|
|
5634
|
+
throw new Error("CLI provider request was aborted");
|
|
5635
|
+
}
|
|
5636
|
+
if (result.timedOut) {
|
|
5637
|
+
throw new Error(
|
|
5638
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5639
|
+
);
|
|
5640
|
+
}
|
|
5641
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5642
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5643
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5644
|
+
throw new Error(message);
|
|
5634
5645
|
}
|
|
5635
|
-
|
|
5646
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5647
|
+
const parsed = this.parseOutputContent(responseContent);
|
|
5648
|
+
return {
|
|
5649
|
+
output: parsed.output,
|
|
5650
|
+
tokenUsage: parsed.tokenUsage,
|
|
5651
|
+
costUsd: parsed.costUsd,
|
|
5652
|
+
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
5653
|
+
raw: {
|
|
5654
|
+
command: renderedCommand,
|
|
5655
|
+
stderr: result.stderr,
|
|
5656
|
+
exitCode: result.exitCode ?? 0,
|
|
5657
|
+
cwd: effectiveCwd,
|
|
5658
|
+
outputFile: outputFilePath
|
|
5659
|
+
}
|
|
5660
|
+
};
|
|
5661
|
+
} finally {
|
|
5662
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5663
|
+
}
|
|
5636
5664
|
}
|
|
5637
5665
|
async invokeBatch(requests) {
|
|
5638
5666
|
if (requests.length === 0) {
|
|
@@ -5655,7 +5683,7 @@ var CliProvider = class {
|
|
|
5655
5683
|
batchInputFiles.push(...request.inputFiles);
|
|
5656
5684
|
}
|
|
5657
5685
|
}
|
|
5658
|
-
const templateValues = buildTemplateValues(
|
|
5686
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5659
5687
|
{
|
|
5660
5688
|
question: "",
|
|
5661
5689
|
guidelines: "",
|
|
@@ -5666,87 +5694,91 @@ var CliProvider = class {
|
|
|
5666
5694
|
this.config,
|
|
5667
5695
|
outputFilePath
|
|
5668
5696
|
);
|
|
5669
|
-
const renderedCommand = renderTemplate(this.config.
|
|
5697
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
5670
5698
|
if (this.verbose) {
|
|
5671
5699
|
console.log(
|
|
5672
5700
|
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
|
|
5673
5701
|
);
|
|
5674
5702
|
}
|
|
5675
|
-
|
|
5676
|
-
|
|
5677
|
-
|
|
5678
|
-
|
|
5679
|
-
|
|
5680
|
-
|
|
5681
|
-
|
|
5682
|
-
|
|
5683
|
-
|
|
5684
|
-
if (
|
|
5685
|
-
|
|
5686
|
-
|
|
5687
|
-
|
|
5688
|
-
|
|
5689
|
-
|
|
5690
|
-
|
|
5691
|
-
|
|
5692
|
-
|
|
5693
|
-
|
|
5694
|
-
|
|
5695
|
-
|
|
5696
|
-
|
|
5697
|
-
|
|
5698
|
-
|
|
5699
|
-
|
|
5700
|
-
|
|
5701
|
-
const
|
|
5702
|
-
|
|
5703
|
-
|
|
5704
|
-
|
|
5705
|
-
|
|
5706
|
-
|
|
5707
|
-
|
|
5708
|
-
|
|
5709
|
-
|
|
5710
|
-
|
|
5711
|
-
|
|
5703
|
+
try {
|
|
5704
|
+
const startTime = Date.now();
|
|
5705
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5706
|
+
cwd: this.config.cwd,
|
|
5707
|
+
env: process.env,
|
|
5708
|
+
timeoutMs: this.config.timeoutMs,
|
|
5709
|
+
signal: controller.signal
|
|
5710
|
+
});
|
|
5711
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
5712
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5713
|
+
if (controller.signal.aborted) {
|
|
5714
|
+
throw new Error("CLI provider request was aborted");
|
|
5715
|
+
}
|
|
5716
|
+
if (result.timedOut) {
|
|
5717
|
+
throw new Error(
|
|
5718
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5719
|
+
);
|
|
5720
|
+
}
|
|
5721
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5722
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5723
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5724
|
+
throw new Error(message);
|
|
5725
|
+
}
|
|
5726
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5727
|
+
const recordsById = this.parseJsonlBatchOutput(responseContent);
|
|
5728
|
+
const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
|
|
5729
|
+
const responses = requests.map((request) => {
|
|
5730
|
+
const evalCaseId = request.evalCaseId;
|
|
5731
|
+
if (!evalCaseId) {
|
|
5732
|
+
return {
|
|
5733
|
+
output: [],
|
|
5734
|
+
durationMs: perRequestFallbackMs,
|
|
5735
|
+
raw: {
|
|
5736
|
+
command: renderedCommand,
|
|
5737
|
+
stderr: result.stderr,
|
|
5738
|
+
exitCode: result.exitCode ?? 0,
|
|
5739
|
+
cwd: this.config.cwd,
|
|
5740
|
+
outputFile: outputFilePath
|
|
5741
|
+
}
|
|
5742
|
+
};
|
|
5743
|
+
}
|
|
5744
|
+
const parsed = recordsById.get(evalCaseId);
|
|
5745
|
+
if (!parsed) {
|
|
5746
|
+
const errorMessage = `Batch output missing id '${evalCaseId}'`;
|
|
5747
|
+
if (this.verbose) {
|
|
5748
|
+
console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
|
|
5712
5749
|
}
|
|
5713
|
-
|
|
5714
|
-
|
|
5715
|
-
|
|
5716
|
-
|
|
5717
|
-
|
|
5718
|
-
|
|
5719
|
-
|
|
5750
|
+
return {
|
|
5751
|
+
output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
|
|
5752
|
+
durationMs: perRequestFallbackMs,
|
|
5753
|
+
raw: {
|
|
5754
|
+
command: renderedCommand,
|
|
5755
|
+
stderr: result.stderr,
|
|
5756
|
+
exitCode: result.exitCode ?? 0,
|
|
5757
|
+
cwd: this.config.cwd,
|
|
5758
|
+
outputFile: outputFilePath,
|
|
5759
|
+
error: errorMessage
|
|
5760
|
+
}
|
|
5761
|
+
};
|
|
5720
5762
|
}
|
|
5721
5763
|
return {
|
|
5722
|
-
output:
|
|
5723
|
-
|
|
5764
|
+
output: parsed.output,
|
|
5765
|
+
tokenUsage: parsed.tokenUsage,
|
|
5766
|
+
costUsd: parsed.costUsd,
|
|
5767
|
+
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
5724
5768
|
raw: {
|
|
5725
5769
|
command: renderedCommand,
|
|
5726
5770
|
stderr: result.stderr,
|
|
5727
5771
|
exitCode: result.exitCode ?? 0,
|
|
5728
5772
|
cwd: this.config.cwd,
|
|
5729
5773
|
outputFile: outputFilePath,
|
|
5730
|
-
|
|
5774
|
+
recordId: evalCaseId
|
|
5731
5775
|
}
|
|
5732
5776
|
};
|
|
5733
|
-
}
|
|
5734
|
-
return
|
|
5735
|
-
|
|
5736
|
-
|
|
5737
|
-
|
|
5738
|
-
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
5739
|
-
raw: {
|
|
5740
|
-
command: renderedCommand,
|
|
5741
|
-
stderr: result.stderr,
|
|
5742
|
-
exitCode: result.exitCode ?? 0,
|
|
5743
|
-
cwd: this.config.cwd,
|
|
5744
|
-
outputFile: outputFilePath,
|
|
5745
|
-
recordId: evalCaseId
|
|
5746
|
-
}
|
|
5747
|
-
};
|
|
5748
|
-
});
|
|
5749
|
-
return responses;
|
|
5777
|
+
});
|
|
5778
|
+
return responses;
|
|
5779
|
+
} finally {
|
|
5780
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5781
|
+
}
|
|
5750
5782
|
}
|
|
5751
5783
|
/**
|
|
5752
5784
|
* Parse output content from CLI.
|
|
@@ -5861,7 +5893,7 @@ var CliProvider = class {
|
|
|
5861
5893
|
return;
|
|
5862
5894
|
}
|
|
5863
5895
|
const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
|
|
5864
|
-
if (healthcheck
|
|
5896
|
+
if ("url" in healthcheck && healthcheck.url) {
|
|
5865
5897
|
const controller = new AbortController();
|
|
5866
5898
|
const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
|
|
5867
5899
|
signal?.addEventListener("abort", () => controller.abort(), { once: true });
|
|
@@ -5880,50 +5912,70 @@ var CliProvider = class {
|
|
|
5880
5912
|
}
|
|
5881
5913
|
return;
|
|
5882
5914
|
}
|
|
5883
|
-
const
|
|
5884
|
-
|
|
5885
|
-
|
|
5886
|
-
|
|
5887
|
-
|
|
5888
|
-
|
|
5889
|
-
|
|
5890
|
-
|
|
5891
|
-
|
|
5892
|
-
|
|
5893
|
-
|
|
5894
|
-
|
|
5895
|
-
|
|
5915
|
+
const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
|
|
5916
|
+
if (!hcCommand) {
|
|
5917
|
+
throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
|
|
5918
|
+
}
|
|
5919
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5920
|
+
{
|
|
5921
|
+
question: "",
|
|
5922
|
+
guidelines: "",
|
|
5923
|
+
inputFiles: [],
|
|
5924
|
+
evalCaseId: "healthcheck",
|
|
5925
|
+
attempt: 0
|
|
5926
|
+
},
|
|
5927
|
+
this.config,
|
|
5928
|
+
generateOutputFilePath("healthcheck")
|
|
5896
5929
|
);
|
|
5930
|
+
const renderedCommand = renderTemplate(hcCommand, templateValues);
|
|
5931
|
+
const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
|
|
5897
5932
|
if (this.verbose) {
|
|
5898
5933
|
console.log(
|
|
5899
|
-
`[cli-provider:${this.targetName}] (healthcheck) cwd=${
|
|
5934
|
+
`[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
|
|
5900
5935
|
);
|
|
5901
5936
|
}
|
|
5902
|
-
|
|
5903
|
-
|
|
5904
|
-
|
|
5905
|
-
|
|
5906
|
-
|
|
5907
|
-
|
|
5908
|
-
|
|
5909
|
-
|
|
5910
|
-
|
|
5911
|
-
|
|
5912
|
-
|
|
5937
|
+
try {
|
|
5938
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5939
|
+
cwd: hcCwd ?? this.config.cwd,
|
|
5940
|
+
env: process.env,
|
|
5941
|
+
timeoutMs,
|
|
5942
|
+
signal
|
|
5943
|
+
});
|
|
5944
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5945
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5946
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5947
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
|
|
5948
|
+
throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
|
|
5949
|
+
}
|
|
5950
|
+
} finally {
|
|
5951
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5913
5952
|
}
|
|
5914
5953
|
}
|
|
5915
5954
|
};
|
|
5916
|
-
function buildTemplateValues(request, config, outputFilePath) {
|
|
5955
|
+
async function buildTemplateValues(request, config, outputFilePath) {
|
|
5917
5956
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
5957
|
+
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
5958
|
+
await import_promises11.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
5918
5959
|
return {
|
|
5919
|
-
|
|
5920
|
-
|
|
5921
|
-
|
|
5922
|
-
|
|
5923
|
-
|
|
5924
|
-
|
|
5960
|
+
values: {
|
|
5961
|
+
PROMPT: shellEscape(request.question ?? ""),
|
|
5962
|
+
PROMPT_FILE: shellEscape(promptFilePath),
|
|
5963
|
+
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
5964
|
+
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
5965
|
+
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
5966
|
+
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
5967
|
+
OUTPUT_FILE: shellEscape(outputFilePath)
|
|
5968
|
+
},
|
|
5969
|
+
promptFilePath
|
|
5925
5970
|
};
|
|
5926
5971
|
}
|
|
5972
|
+
async function cleanupTempFile(filePath, keepTempFiles) {
|
|
5973
|
+
if (!filePath || keepTempFiles) {
|
|
5974
|
+
return;
|
|
5975
|
+
}
|
|
5976
|
+
await import_promises11.default.unlink(filePath).catch(() => {
|
|
5977
|
+
});
|
|
5978
|
+
}
|
|
5927
5979
|
function normalizeInputFiles2(inputFiles) {
|
|
5928
5980
|
if (!inputFiles || inputFiles.length === 0) {
|
|
5929
5981
|
return void 0;
|
|
@@ -8285,29 +8337,25 @@ var ProviderRegistry = class {
|
|
|
8285
8337
|
var import_node_path18 = __toESM(require("path"), 1);
|
|
8286
8338
|
var import_zod3 = require("zod");
|
|
8287
8339
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
8288
|
-
type: import_zod3.z.literal("http"),
|
|
8289
8340
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
8290
8341
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
8291
8342
|
timeoutSeconds: import_zod3.z.number().positive().optional()
|
|
8292
8343
|
});
|
|
8293
8344
|
var CliHealthcheckCommandInputSchema = import_zod3.z.object({
|
|
8294
|
-
|
|
8295
|
-
command_template: import_zod3.z.string().optional(),
|
|
8296
|
-
commandTemplate: import_zod3.z.string().optional(),
|
|
8345
|
+
command: import_zod3.z.string().min(1, "healthcheck command is required"),
|
|
8297
8346
|
cwd: import_zod3.z.string().optional(),
|
|
8298
8347
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
8299
8348
|
timeoutSeconds: import_zod3.z.number().positive().optional()
|
|
8300
8349
|
});
|
|
8301
|
-
var CliHealthcheckInputSchema = import_zod3.z.
|
|
8350
|
+
var CliHealthcheckInputSchema = import_zod3.z.union([
|
|
8302
8351
|
CliHealthcheckHttpInputSchema,
|
|
8303
8352
|
CliHealthcheckCommandInputSchema
|
|
8304
8353
|
]);
|
|
8305
8354
|
var CliTargetInputSchema = import_zod3.z.object({
|
|
8306
8355
|
name: import_zod3.z.string().min(1, "target name is required"),
|
|
8307
8356
|
provider: import_zod3.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
|
|
8308
|
-
// Command
|
|
8309
|
-
|
|
8310
|
-
commandTemplate: import_zod3.z.string().optional(),
|
|
8357
|
+
// Command - required
|
|
8358
|
+
command: import_zod3.z.string(),
|
|
8311
8359
|
// Files format - optional
|
|
8312
8360
|
files_format: import_zod3.z.string().optional(),
|
|
8313
8361
|
filesFormat: import_zod3.z.string().optional(),
|
|
@@ -8337,26 +8385,22 @@ var CliTargetInputSchema = import_zod3.z.object({
|
|
|
8337
8385
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
8338
8386
|
provider_batching: import_zod3.z.boolean().optional(),
|
|
8339
8387
|
providerBatching: import_zod3.z.boolean().optional()
|
|
8340
|
-
}).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
|
|
8341
|
-
message: "Either command_template or commandTemplate is required"
|
|
8342
8388
|
});
|
|
8343
8389
|
var CliHealthcheckHttpSchema = import_zod3.z.object({
|
|
8344
|
-
type: import_zod3.z.literal("http"),
|
|
8345
8390
|
url: import_zod3.z.string().min(1),
|
|
8346
8391
|
timeoutMs: import_zod3.z.number().positive().optional()
|
|
8347
8392
|
}).strict();
|
|
8348
8393
|
var CliHealthcheckCommandSchema = import_zod3.z.object({
|
|
8349
|
-
|
|
8350
|
-
commandTemplate: import_zod3.z.string().min(1),
|
|
8394
|
+
command: import_zod3.z.string().min(1),
|
|
8351
8395
|
cwd: import_zod3.z.string().optional(),
|
|
8352
8396
|
timeoutMs: import_zod3.z.number().positive().optional()
|
|
8353
8397
|
}).strict();
|
|
8354
|
-
var CliHealthcheckSchema = import_zod3.z.
|
|
8398
|
+
var CliHealthcheckSchema = import_zod3.z.union([
|
|
8355
8399
|
CliHealthcheckHttpSchema,
|
|
8356
8400
|
CliHealthcheckCommandSchema
|
|
8357
8401
|
]);
|
|
8358
8402
|
var CliTargetConfigSchema = import_zod3.z.object({
|
|
8359
|
-
|
|
8403
|
+
command: import_zod3.z.string().min(1),
|
|
8360
8404
|
filesFormat: import_zod3.z.string().optional(),
|
|
8361
8405
|
cwd: import_zod3.z.string().optional(),
|
|
8362
8406
|
workspaceTemplate: import_zod3.z.string().optional(),
|
|
@@ -8368,26 +8412,19 @@ var CliTargetConfigSchema = import_zod3.z.object({
|
|
|
8368
8412
|
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
8369
8413
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
8370
8414
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
8371
|
-
if (input
|
|
8415
|
+
if ("url" in input && input.url) {
|
|
8372
8416
|
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
8373
8417
|
return {
|
|
8374
|
-
type: "http",
|
|
8375
8418
|
url,
|
|
8376
8419
|
timeoutMs
|
|
8377
8420
|
};
|
|
8378
8421
|
}
|
|
8379
|
-
|
|
8380
|
-
if (commandTemplateSource === void 0) {
|
|
8422
|
+
if (!("command" in input) || !input.command) {
|
|
8381
8423
|
throw new Error(
|
|
8382
|
-
`${targetName} healthcheck: Either
|
|
8424
|
+
`${targetName} healthcheck: Either 'command' or 'url' is required for healthcheck`
|
|
8383
8425
|
);
|
|
8384
8426
|
}
|
|
8385
|
-
const
|
|
8386
|
-
commandTemplateSource,
|
|
8387
|
-
env,
|
|
8388
|
-
`${targetName} healthcheck command template`,
|
|
8389
|
-
true
|
|
8390
|
-
);
|
|
8427
|
+
const command = resolveString(input.command, env, `${targetName} healthcheck command`, true);
|
|
8391
8428
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
|
|
8392
8429
|
allowLiteral: true,
|
|
8393
8430
|
optionalEnv: true
|
|
@@ -8399,24 +8436,14 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
8399
8436
|
cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
|
|
8400
8437
|
}
|
|
8401
8438
|
return {
|
|
8402
|
-
|
|
8403
|
-
commandTemplate,
|
|
8439
|
+
command,
|
|
8404
8440
|
cwd,
|
|
8405
8441
|
timeoutMs
|
|
8406
8442
|
};
|
|
8407
8443
|
}
|
|
8408
8444
|
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
8409
8445
|
const targetName = input.name;
|
|
8410
|
-
const
|
|
8411
|
-
if (commandTemplateSource === void 0) {
|
|
8412
|
-
throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
|
|
8413
|
-
}
|
|
8414
|
-
const commandTemplate = resolveString(
|
|
8415
|
-
commandTemplateSource,
|
|
8416
|
-
env,
|
|
8417
|
-
`${targetName} CLI command template`,
|
|
8418
|
-
true
|
|
8419
|
-
);
|
|
8446
|
+
const command = resolveString(input.command, env, `${targetName} CLI command`, true);
|
|
8420
8447
|
const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
|
|
8421
8448
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
8422
8449
|
const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
|
|
@@ -8455,7 +8482,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
8455
8482
|
);
|
|
8456
8483
|
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
8457
8484
|
return {
|
|
8458
|
-
|
|
8485
|
+
command,
|
|
8459
8486
|
filesFormat,
|
|
8460
8487
|
cwd,
|
|
8461
8488
|
workspaceTemplate,
|
|
@@ -8467,6 +8494,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
8467
8494
|
}
|
|
8468
8495
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
8469
8496
|
"PROMPT",
|
|
8497
|
+
"PROMPT_FILE",
|
|
8470
8498
|
"GUIDELINES",
|
|
8471
8499
|
"EVAL_ID",
|
|
8472
8500
|
"ATTEMPT",
|
|
@@ -9181,8 +9209,8 @@ var cliErrorMap = (issue, ctx) => {
|
|
|
9181
9209
|
if (issue.code === import_zod3.z.ZodIssueCode.unrecognized_keys) {
|
|
9182
9210
|
return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
|
|
9183
9211
|
}
|
|
9184
|
-
if (issue.code === import_zod3.z.ZodIssueCode.
|
|
9185
|
-
return { message: "healthcheck
|
|
9212
|
+
if (issue.code === import_zod3.z.ZodIssueCode.invalid_union) {
|
|
9213
|
+
return { message: "healthcheck must have either 'url' (HTTP) or 'command' (command)" };
|
|
9186
9214
|
}
|
|
9187
9215
|
if (issue.code === import_zod3.z.ZodIssueCode.invalid_type && issue.expected === "string") {
|
|
9188
9216
|
return { message: `${ctx.defaultError} (expected a string value)` };
|
|
@@ -9198,18 +9226,17 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
9198
9226
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
9199
9227
|
}
|
|
9200
9228
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
9201
|
-
assertSupportedCliPlaceholders(normalized.
|
|
9202
|
-
if (normalized.healthcheck
|
|
9229
|
+
assertSupportedCliPlaceholders(normalized.command, `${target.name} CLI command`);
|
|
9230
|
+
if ("command" in (normalized.healthcheck ?? {}) && normalized.healthcheck.command) {
|
|
9203
9231
|
assertSupportedCliPlaceholders(
|
|
9204
|
-
normalized.healthcheck.
|
|
9205
|
-
`${target.name} healthcheck command
|
|
9232
|
+
normalized.healthcheck.command,
|
|
9233
|
+
`${target.name} healthcheck command`
|
|
9206
9234
|
);
|
|
9207
9235
|
}
|
|
9208
9236
|
return normalized;
|
|
9209
9237
|
}
|
|
9210
9238
|
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
9211
|
-
const
|
|
9212
|
-
const commandTemplate = commandTemplateSource ? resolveString(commandTemplateSource, env, `${target.name} command template`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
9239
|
+
const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
9213
9240
|
const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
|
|
9214
9241
|
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
9215
9242
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
@@ -9223,7 +9250,7 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
9223
9250
|
cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
|
|
9224
9251
|
}
|
|
9225
9252
|
return {
|
|
9226
|
-
|
|
9253
|
+
command,
|
|
9227
9254
|
cwd,
|
|
9228
9255
|
timeoutMs
|
|
9229
9256
|
};
|
|
@@ -10926,7 +10953,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
10926
10953
|
}
|
|
10927
10954
|
registry.register(kindName, (target) => {
|
|
10928
10955
|
return new CliProvider(target.name, {
|
|
10929
|
-
|
|
10956
|
+
command: `bun run ${filePath} {PROMPT}`
|
|
10930
10957
|
});
|
|
10931
10958
|
});
|
|
10932
10959
|
discoveredKinds.push(kindName);
|
|
@@ -11439,13 +11466,13 @@ function toCamelCaseDeep(obj) {
|
|
|
11439
11466
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
11440
11467
|
var CodeEvaluator = class {
|
|
11441
11468
|
kind = "code";
|
|
11442
|
-
|
|
11469
|
+
command;
|
|
11443
11470
|
cwd;
|
|
11444
11471
|
agentTimeoutMs;
|
|
11445
11472
|
config;
|
|
11446
11473
|
target;
|
|
11447
11474
|
constructor(options) {
|
|
11448
|
-
this.
|
|
11475
|
+
this.command = options.command ?? options.script ?? [];
|
|
11449
11476
|
this.cwd = options.cwd;
|
|
11450
11477
|
this.agentTimeoutMs = options.agentTimeoutMs;
|
|
11451
11478
|
this.config = options.config;
|
|
@@ -11504,7 +11531,7 @@ var CodeEvaluator = class {
|
|
|
11504
11531
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
11505
11532
|
try {
|
|
11506
11533
|
const stdout = await executeScript(
|
|
11507
|
-
this.
|
|
11534
|
+
this.command,
|
|
11508
11535
|
inputPayload,
|
|
11509
11536
|
this.agentTimeoutMs,
|
|
11510
11537
|
this.cwd,
|
|
@@ -11518,7 +11545,7 @@ var CodeEvaluator = class {
|
|
|
11518
11545
|
const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
|
|
11519
11546
|
const proxyUsage = getProxyUsage?.();
|
|
11520
11547
|
const evaluatorRawRequest = {
|
|
11521
|
-
|
|
11548
|
+
command: this.command,
|
|
11522
11549
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
11523
11550
|
...proxyUsage ? {
|
|
11524
11551
|
target_proxy: {
|
|
@@ -11548,7 +11575,7 @@ var CodeEvaluator = class {
|
|
|
11548
11575
|
expectedAspectCount: 1,
|
|
11549
11576
|
reasoning: message,
|
|
11550
11577
|
evaluatorRawRequest: {
|
|
11551
|
-
|
|
11578
|
+
command: this.command,
|
|
11552
11579
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
11553
11580
|
...proxyUsage ? {
|
|
11554
11581
|
target_proxy: {
|
|
@@ -14507,7 +14534,7 @@ var llmJudgeFactory = (config, context2) => {
|
|
|
14507
14534
|
var codeFactory = (config, context2) => {
|
|
14508
14535
|
const c = config;
|
|
14509
14536
|
return new CodeEvaluator({
|
|
14510
|
-
|
|
14537
|
+
command: c.command ?? c.script ?? [],
|
|
14511
14538
|
cwd: c.resolvedCwd ?? c.cwd,
|
|
14512
14539
|
agentTimeoutMs: context2.agentTimeoutMs,
|
|
14513
14540
|
config: c.config,
|
|
@@ -14689,7 +14716,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
14689
14716
|
}
|
|
14690
14717
|
const factory = (_config, context2) => {
|
|
14691
14718
|
return new CodeEvaluator({
|
|
14692
|
-
|
|
14719
|
+
command: ["bun", "run", filePath],
|
|
14693
14720
|
agentTimeoutMs: context2.agentTimeoutMs
|
|
14694
14721
|
});
|
|
14695
14722
|
};
|
|
@@ -15043,7 +15070,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
15043
15070
|
});
|
|
15044
15071
|
const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
|
|
15045
15072
|
const cwd = config.cwd;
|
|
15046
|
-
const
|
|
15073
|
+
const commandArray = config.command ?? config.script ?? [];
|
|
15074
|
+
const result = await execFileWithStdin(commandArray, stdin, {
|
|
15047
15075
|
timeoutMs,
|
|
15048
15076
|
cwd
|
|
15049
15077
|
});
|
|
@@ -15090,7 +15118,8 @@ async function runEvaluation(options) {
|
|
|
15090
15118
|
keepWorkspaces,
|
|
15091
15119
|
cleanupWorkspaces,
|
|
15092
15120
|
trials,
|
|
15093
|
-
streamCallbacks
|
|
15121
|
+
streamCallbacks,
|
|
15122
|
+
totalBudgetUsd
|
|
15094
15123
|
} = options;
|
|
15095
15124
|
let useCache = options.useCache;
|
|
15096
15125
|
if (trials && trials.count > 1 && useCache) {
|
|
@@ -15263,10 +15292,39 @@ async function runEvaluation(options) {
|
|
|
15263
15292
|
let nextWorkerId = 1;
|
|
15264
15293
|
const workerIdByEvalId = /* @__PURE__ */ new Map();
|
|
15265
15294
|
let beforeAllOutputAttached = false;
|
|
15295
|
+
let cumulativeBudgetCost = 0;
|
|
15296
|
+
let budgetExhausted = false;
|
|
15266
15297
|
const promises = filteredEvalCases.map(
|
|
15267
15298
|
(evalCase) => limit(async () => {
|
|
15268
15299
|
const workerId = nextWorkerId++;
|
|
15269
15300
|
workerIdByEvalId.set(evalCase.id, workerId);
|
|
15301
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
15302
|
+
const budgetResult = {
|
|
15303
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
15304
|
+
testId: evalCase.id,
|
|
15305
|
+
dataset: evalCase.dataset,
|
|
15306
|
+
score: 0,
|
|
15307
|
+
hits: [],
|
|
15308
|
+
misses: [],
|
|
15309
|
+
answer: "",
|
|
15310
|
+
target: target.name,
|
|
15311
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
15312
|
+
budgetExceeded: true
|
|
15313
|
+
};
|
|
15314
|
+
if (onProgress) {
|
|
15315
|
+
await onProgress({
|
|
15316
|
+
workerId,
|
|
15317
|
+
testId: evalCase.id,
|
|
15318
|
+
status: "failed",
|
|
15319
|
+
completedAt: Date.now(),
|
|
15320
|
+
error: budgetResult.error
|
|
15321
|
+
});
|
|
15322
|
+
}
|
|
15323
|
+
if (onResult) {
|
|
15324
|
+
await onResult(budgetResult);
|
|
15325
|
+
}
|
|
15326
|
+
return budgetResult;
|
|
15327
|
+
}
|
|
15270
15328
|
if (onProgress) {
|
|
15271
15329
|
await onProgress({
|
|
15272
15330
|
workerId,
|
|
@@ -15300,6 +15358,23 @@ async function runEvaluation(options) {
|
|
|
15300
15358
|
typeRegistry
|
|
15301
15359
|
};
|
|
15302
15360
|
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
15361
|
+
if (totalBudgetUsd !== void 0) {
|
|
15362
|
+
let caseCost;
|
|
15363
|
+
if (result.trials && result.trials.length > 0) {
|
|
15364
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
15365
|
+
if (trialCostSum > 0) {
|
|
15366
|
+
caseCost = trialCostSum;
|
|
15367
|
+
}
|
|
15368
|
+
} else {
|
|
15369
|
+
caseCost = result.trace?.costUsd;
|
|
15370
|
+
}
|
|
15371
|
+
if (caseCost !== void 0) {
|
|
15372
|
+
cumulativeBudgetCost += caseCost;
|
|
15373
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
15374
|
+
budgetExhausted = true;
|
|
15375
|
+
}
|
|
15376
|
+
}
|
|
15377
|
+
}
|
|
15303
15378
|
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
15304
15379
|
result = { ...result, beforeAllOutput };
|
|
15305
15380
|
beforeAllOutputAttached = true;
|