@agentv/core 2.7.1-next.4 → 2.7.1-next.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-6W5E3VR6.js → chunk-5SV2QC6V.js} +34 -47
- package/dist/chunk-5SV2QC6V.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +48 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +35 -44
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +244 -219
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -40
- package/dist/index.d.ts +38 -40
- package/dist/index.js +212 -174
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-6W5E3VR6.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
readTextFile,
|
|
18
18
|
resolveFileReference,
|
|
19
19
|
resolveTargetDefinition
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-5SV2QC6V.js";
|
|
21
21
|
import {
|
|
22
22
|
OtlpJsonFileExporter
|
|
23
23
|
} from "./chunk-HFSYZHGF.js";
|
|
@@ -670,24 +670,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
670
670
|
continue;
|
|
671
671
|
}
|
|
672
672
|
if (typeValue === "code_judge") {
|
|
673
|
-
let
|
|
674
|
-
const
|
|
675
|
-
if (typeof
|
|
676
|
-
const trimmed =
|
|
673
|
+
let command;
|
|
674
|
+
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
675
|
+
if (typeof rawCommand === "string") {
|
|
676
|
+
const trimmed = rawCommand.trim();
|
|
677
677
|
if (trimmed.length === 0) {
|
|
678
678
|
throw new Error(
|
|
679
|
-
`Invalid code_judge
|
|
679
|
+
`Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
|
|
680
680
|
);
|
|
681
681
|
}
|
|
682
|
-
|
|
682
|
+
command = parseCommandToArgv(trimmed);
|
|
683
683
|
} else {
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
`code_judge
|
|
684
|
+
command = asStringArray(
|
|
685
|
+
rawCommand,
|
|
686
|
+
`code_judge command for evaluator '${name}' in '${evalId}'`
|
|
687
687
|
);
|
|
688
688
|
}
|
|
689
|
-
if (!
|
|
690
|
-
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing
|
|
689
|
+
if (!command) {
|
|
690
|
+
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
|
|
691
691
|
continue;
|
|
692
692
|
}
|
|
693
693
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
@@ -732,6 +732,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
732
732
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
733
733
|
"name",
|
|
734
734
|
"type",
|
|
735
|
+
"command",
|
|
735
736
|
"script",
|
|
736
737
|
"cwd",
|
|
737
738
|
"weight",
|
|
@@ -748,7 +749,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
748
749
|
evaluators.push({
|
|
749
750
|
name,
|
|
750
751
|
type: "code",
|
|
751
|
-
|
|
752
|
+
command,
|
|
752
753
|
cwd,
|
|
753
754
|
resolvedCwd,
|
|
754
755
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
@@ -1350,20 +1351,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1350
1351
|
let resolvedPromptScript;
|
|
1351
1352
|
let promptScriptConfig;
|
|
1352
1353
|
if (isJsonObject2(rawPrompt)) {
|
|
1353
|
-
const
|
|
1354
|
-
rawPrompt.script,
|
|
1355
|
-
`prompt.
|
|
1354
|
+
const commandArray = asStringArray(
|
|
1355
|
+
rawPrompt.command ?? rawPrompt.script,
|
|
1356
|
+
`prompt.command for evaluator '${name}' in '${evalId}'`
|
|
1356
1357
|
);
|
|
1357
|
-
if (!
|
|
1358
|
-
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires
|
|
1358
|
+
if (!commandArray) {
|
|
1359
|
+
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
1359
1360
|
}
|
|
1360
|
-
const
|
|
1361
|
-
const resolved = await resolveFileReference3(
|
|
1361
|
+
const commandPath = commandArray[commandArray.length - 1];
|
|
1362
|
+
const resolved = await resolveFileReference3(commandPath, searchRoots);
|
|
1362
1363
|
if (resolved.resolvedPath) {
|
|
1363
|
-
resolvedPromptScript = [...
|
|
1364
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
|
|
1364
1365
|
} else {
|
|
1365
1366
|
throw new Error(
|
|
1366
|
-
`Evaluator '${name}' in '${evalId}': prompt
|
|
1367
|
+
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
1367
1368
|
);
|
|
1368
1369
|
}
|
|
1369
1370
|
if (isJsonObject2(rawPrompt.config)) {
|
|
@@ -2637,6 +2638,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2637
2638
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
2638
2639
|
}
|
|
2639
2640
|
const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
|
|
2641
|
+
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
2640
2642
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
2641
2643
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
2642
2644
|
const globalExecution = suite.assert !== void 0 ? { ...rawGlobalExecution ?? {}, assert: suite.assert } : rawGlobalExecution;
|
|
@@ -2661,14 +2663,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
2661
2663
|
);
|
|
2662
2664
|
}
|
|
2663
2665
|
}
|
|
2664
|
-
const
|
|
2666
|
+
const testInputMessages = resolveInputMessages(evalcase);
|
|
2665
2667
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
2666
|
-
if (!id || !outcome || !
|
|
2668
|
+
if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
|
|
2667
2669
|
logError2(
|
|
2668
2670
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
|
|
2669
2671
|
);
|
|
2670
2672
|
continue;
|
|
2671
2673
|
}
|
|
2674
|
+
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
2675
|
+
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
2676
|
+
const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
2672
2677
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
2673
2678
|
const guidelinePaths = [];
|
|
2674
2679
|
const inputTextParts = [];
|
|
@@ -2780,16 +2785,16 @@ var loadEvalCaseById = loadTestById;
|
|
|
2780
2785
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
2781
2786
|
if (!isJsonObject(raw)) return void 0;
|
|
2782
2787
|
const obj = raw;
|
|
2783
|
-
const
|
|
2784
|
-
if (!Array.isArray(
|
|
2785
|
-
const
|
|
2786
|
-
if (
|
|
2788
|
+
const commandSource = obj.command ?? obj.script;
|
|
2789
|
+
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
2790
|
+
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
2791
|
+
if (commandArr.length === 0) return void 0;
|
|
2787
2792
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
2788
2793
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
2789
2794
|
if (cwd && !path8.isAbsolute(cwd)) {
|
|
2790
2795
|
cwd = path8.resolve(evalFileDir, cwd);
|
|
2791
2796
|
}
|
|
2792
|
-
const config = {
|
|
2797
|
+
const config = { command: commandArr };
|
|
2793
2798
|
if (timeoutMs !== void 0) {
|
|
2794
2799
|
return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
|
|
2795
2800
|
}
|
|
@@ -3867,50 +3872,58 @@ var CliProvider = class {
|
|
|
3867
3872
|
await this.ensureHealthy(request.signal);
|
|
3868
3873
|
const effectiveCwd = request.cwd ?? this.config.cwd;
|
|
3869
3874
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
3870
|
-
const templateValues = buildTemplateValues(
|
|
3871
|
-
|
|
3875
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
3876
|
+
request,
|
|
3877
|
+
this.config,
|
|
3878
|
+
outputFilePath
|
|
3879
|
+
);
|
|
3880
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
3872
3881
|
if (this.verbose) {
|
|
3873
3882
|
console.log(
|
|
3874
3883
|
`[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
3875
3884
|
);
|
|
3876
3885
|
}
|
|
3877
|
-
|
|
3878
|
-
|
|
3879
|
-
|
|
3880
|
-
env: process.env,
|
|
3881
|
-
timeoutMs: this.config.timeoutMs,
|
|
3882
|
-
signal: request.signal
|
|
3883
|
-
});
|
|
3884
|
-
const measuredDurationMs = Date.now() - startTime;
|
|
3885
|
-
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
3886
|
-
if (request.signal?.aborted) {
|
|
3887
|
-
throw new Error("CLI provider request was aborted");
|
|
3888
|
-
}
|
|
3889
|
-
if (result.timedOut) {
|
|
3890
|
-
throw new Error(
|
|
3891
|
-
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
3892
|
-
);
|
|
3893
|
-
}
|
|
3894
|
-
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
3895
|
-
const detail = result.stderr.trim() || result.stdout.trim();
|
|
3896
|
-
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
3897
|
-
throw new Error(message);
|
|
3898
|
-
}
|
|
3899
|
-
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
3900
|
-
const parsed = this.parseOutputContent(responseContent);
|
|
3901
|
-
return {
|
|
3902
|
-
output: parsed.output,
|
|
3903
|
-
tokenUsage: parsed.tokenUsage,
|
|
3904
|
-
costUsd: parsed.costUsd,
|
|
3905
|
-
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
3906
|
-
raw: {
|
|
3907
|
-
command: renderedCommand,
|
|
3908
|
-
stderr: result.stderr,
|
|
3909
|
-
exitCode: result.exitCode ?? 0,
|
|
3886
|
+
try {
|
|
3887
|
+
const startTime = Date.now();
|
|
3888
|
+
const result = await this.runCommand(renderedCommand, {
|
|
3910
3889
|
cwd: effectiveCwd,
|
|
3911
|
-
|
|
3890
|
+
env: process.env,
|
|
3891
|
+
timeoutMs: this.config.timeoutMs,
|
|
3892
|
+
signal: request.signal
|
|
3893
|
+
});
|
|
3894
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
3895
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
3896
|
+
if (request.signal?.aborted) {
|
|
3897
|
+
throw new Error("CLI provider request was aborted");
|
|
3898
|
+
}
|
|
3899
|
+
if (result.timedOut) {
|
|
3900
|
+
throw new Error(
|
|
3901
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
3902
|
+
);
|
|
3903
|
+
}
|
|
3904
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
3905
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
3906
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
3907
|
+
throw new Error(message);
|
|
3912
3908
|
}
|
|
3913
|
-
|
|
3909
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
3910
|
+
const parsed = this.parseOutputContent(responseContent);
|
|
3911
|
+
return {
|
|
3912
|
+
output: parsed.output,
|
|
3913
|
+
tokenUsage: parsed.tokenUsage,
|
|
3914
|
+
costUsd: parsed.costUsd,
|
|
3915
|
+
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
3916
|
+
raw: {
|
|
3917
|
+
command: renderedCommand,
|
|
3918
|
+
stderr: result.stderr,
|
|
3919
|
+
exitCode: result.exitCode ?? 0,
|
|
3920
|
+
cwd: effectiveCwd,
|
|
3921
|
+
outputFile: outputFilePath
|
|
3922
|
+
}
|
|
3923
|
+
};
|
|
3924
|
+
} finally {
|
|
3925
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
3926
|
+
}
|
|
3914
3927
|
}
|
|
3915
3928
|
async invokeBatch(requests) {
|
|
3916
3929
|
if (requests.length === 0) {
|
|
@@ -3933,7 +3946,7 @@ var CliProvider = class {
|
|
|
3933
3946
|
batchInputFiles.push(...request.inputFiles);
|
|
3934
3947
|
}
|
|
3935
3948
|
}
|
|
3936
|
-
const templateValues = buildTemplateValues(
|
|
3949
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
3937
3950
|
{
|
|
3938
3951
|
question: "",
|
|
3939
3952
|
guidelines: "",
|
|
@@ -3944,87 +3957,91 @@ var CliProvider = class {
|
|
|
3944
3957
|
this.config,
|
|
3945
3958
|
outputFilePath
|
|
3946
3959
|
);
|
|
3947
|
-
const renderedCommand = renderTemplate(this.config.
|
|
3960
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
3948
3961
|
if (this.verbose) {
|
|
3949
3962
|
console.log(
|
|
3950
3963
|
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
|
|
3951
3964
|
);
|
|
3952
3965
|
}
|
|
3953
|
-
|
|
3954
|
-
|
|
3955
|
-
|
|
3956
|
-
|
|
3957
|
-
|
|
3958
|
-
|
|
3959
|
-
|
|
3960
|
-
|
|
3961
|
-
|
|
3962
|
-
if (
|
|
3963
|
-
|
|
3964
|
-
|
|
3965
|
-
|
|
3966
|
-
|
|
3967
|
-
|
|
3968
|
-
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
const
|
|
3980
|
-
|
|
3981
|
-
|
|
3982
|
-
|
|
3983
|
-
|
|
3984
|
-
|
|
3985
|
-
|
|
3986
|
-
|
|
3987
|
-
|
|
3988
|
-
|
|
3989
|
-
|
|
3966
|
+
try {
|
|
3967
|
+
const startTime = Date.now();
|
|
3968
|
+
const result = await this.runCommand(renderedCommand, {
|
|
3969
|
+
cwd: this.config.cwd,
|
|
3970
|
+
env: process.env,
|
|
3971
|
+
timeoutMs: this.config.timeoutMs,
|
|
3972
|
+
signal: controller.signal
|
|
3973
|
+
});
|
|
3974
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
3975
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
3976
|
+
if (controller.signal.aborted) {
|
|
3977
|
+
throw new Error("CLI provider request was aborted");
|
|
3978
|
+
}
|
|
3979
|
+
if (result.timedOut) {
|
|
3980
|
+
throw new Error(
|
|
3981
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
3982
|
+
);
|
|
3983
|
+
}
|
|
3984
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
3985
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
3986
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
3987
|
+
throw new Error(message);
|
|
3988
|
+
}
|
|
3989
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
3990
|
+
const recordsById = this.parseJsonlBatchOutput(responseContent);
|
|
3991
|
+
const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
|
|
3992
|
+
const responses = requests.map((request) => {
|
|
3993
|
+
const evalCaseId = request.evalCaseId;
|
|
3994
|
+
if (!evalCaseId) {
|
|
3995
|
+
return {
|
|
3996
|
+
output: [],
|
|
3997
|
+
durationMs: perRequestFallbackMs,
|
|
3998
|
+
raw: {
|
|
3999
|
+
command: renderedCommand,
|
|
4000
|
+
stderr: result.stderr,
|
|
4001
|
+
exitCode: result.exitCode ?? 0,
|
|
4002
|
+
cwd: this.config.cwd,
|
|
4003
|
+
outputFile: outputFilePath
|
|
4004
|
+
}
|
|
4005
|
+
};
|
|
4006
|
+
}
|
|
4007
|
+
const parsed = recordsById.get(evalCaseId);
|
|
4008
|
+
if (!parsed) {
|
|
4009
|
+
const errorMessage = `Batch output missing id '${evalCaseId}'`;
|
|
4010
|
+
if (this.verbose) {
|
|
4011
|
+
console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
|
|
3990
4012
|
}
|
|
3991
|
-
|
|
3992
|
-
|
|
3993
|
-
|
|
3994
|
-
|
|
3995
|
-
|
|
3996
|
-
|
|
3997
|
-
|
|
4013
|
+
return {
|
|
4014
|
+
output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
|
|
4015
|
+
durationMs: perRequestFallbackMs,
|
|
4016
|
+
raw: {
|
|
4017
|
+
command: renderedCommand,
|
|
4018
|
+
stderr: result.stderr,
|
|
4019
|
+
exitCode: result.exitCode ?? 0,
|
|
4020
|
+
cwd: this.config.cwd,
|
|
4021
|
+
outputFile: outputFilePath,
|
|
4022
|
+
error: errorMessage
|
|
4023
|
+
}
|
|
4024
|
+
};
|
|
3998
4025
|
}
|
|
3999
4026
|
return {
|
|
4000
|
-
output:
|
|
4001
|
-
|
|
4027
|
+
output: parsed.output,
|
|
4028
|
+
tokenUsage: parsed.tokenUsage,
|
|
4029
|
+
costUsd: parsed.costUsd,
|
|
4030
|
+
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
4002
4031
|
raw: {
|
|
4003
4032
|
command: renderedCommand,
|
|
4004
4033
|
stderr: result.stderr,
|
|
4005
4034
|
exitCode: result.exitCode ?? 0,
|
|
4006
4035
|
cwd: this.config.cwd,
|
|
4007
4036
|
outputFile: outputFilePath,
|
|
4008
|
-
|
|
4037
|
+
recordId: evalCaseId
|
|
4009
4038
|
}
|
|
4010
4039
|
};
|
|
4011
|
-
}
|
|
4012
|
-
return
|
|
4013
|
-
|
|
4014
|
-
|
|
4015
|
-
|
|
4016
|
-
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
4017
|
-
raw: {
|
|
4018
|
-
command: renderedCommand,
|
|
4019
|
-
stderr: result.stderr,
|
|
4020
|
-
exitCode: result.exitCode ?? 0,
|
|
4021
|
-
cwd: this.config.cwd,
|
|
4022
|
-
outputFile: outputFilePath,
|
|
4023
|
-
recordId: evalCaseId
|
|
4024
|
-
}
|
|
4025
|
-
};
|
|
4026
|
-
});
|
|
4027
|
-
return responses;
|
|
4040
|
+
});
|
|
4041
|
+
return responses;
|
|
4042
|
+
} finally {
|
|
4043
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
4044
|
+
}
|
|
4028
4045
|
}
|
|
4029
4046
|
/**
|
|
4030
4047
|
* Parse output content from CLI.
|
|
@@ -4139,7 +4156,7 @@ var CliProvider = class {
|
|
|
4139
4156
|
return;
|
|
4140
4157
|
}
|
|
4141
4158
|
const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
|
|
4142
|
-
if (healthcheck
|
|
4159
|
+
if ("url" in healthcheck && healthcheck.url) {
|
|
4143
4160
|
const controller = new AbortController();
|
|
4144
4161
|
const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
|
|
4145
4162
|
signal?.addEventListener("abort", () => controller.abort(), { once: true });
|
|
@@ -4158,50 +4175,70 @@ var CliProvider = class {
|
|
|
4158
4175
|
}
|
|
4159
4176
|
return;
|
|
4160
4177
|
}
|
|
4161
|
-
const
|
|
4162
|
-
|
|
4163
|
-
|
|
4164
|
-
|
|
4165
|
-
|
|
4166
|
-
|
|
4167
|
-
|
|
4168
|
-
|
|
4169
|
-
|
|
4170
|
-
|
|
4171
|
-
|
|
4172
|
-
|
|
4173
|
-
|
|
4178
|
+
const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
|
|
4179
|
+
if (!hcCommand) {
|
|
4180
|
+
throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
|
|
4181
|
+
}
|
|
4182
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
4183
|
+
{
|
|
4184
|
+
question: "",
|
|
4185
|
+
guidelines: "",
|
|
4186
|
+
inputFiles: [],
|
|
4187
|
+
evalCaseId: "healthcheck",
|
|
4188
|
+
attempt: 0
|
|
4189
|
+
},
|
|
4190
|
+
this.config,
|
|
4191
|
+
generateOutputFilePath("healthcheck")
|
|
4174
4192
|
);
|
|
4193
|
+
const renderedCommand = renderTemplate(hcCommand, templateValues);
|
|
4194
|
+
const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
|
|
4175
4195
|
if (this.verbose) {
|
|
4176
4196
|
console.log(
|
|
4177
|
-
`[cli-provider:${this.targetName}] (healthcheck) cwd=${
|
|
4197
|
+
`[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
|
|
4178
4198
|
);
|
|
4179
4199
|
}
|
|
4180
|
-
|
|
4181
|
-
|
|
4182
|
-
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4187
|
-
|
|
4188
|
-
|
|
4189
|
-
|
|
4190
|
-
|
|
4200
|
+
try {
|
|
4201
|
+
const result = await this.runCommand(renderedCommand, {
|
|
4202
|
+
cwd: hcCwd ?? this.config.cwd,
|
|
4203
|
+
env: process.env,
|
|
4204
|
+
timeoutMs,
|
|
4205
|
+
signal
|
|
4206
|
+
});
|
|
4207
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
4208
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
4209
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
4210
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
|
|
4211
|
+
throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
|
|
4212
|
+
}
|
|
4213
|
+
} finally {
|
|
4214
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
4191
4215
|
}
|
|
4192
4216
|
}
|
|
4193
4217
|
};
|
|
4194
|
-
function buildTemplateValues(request, config, outputFilePath) {
|
|
4218
|
+
async function buildTemplateValues(request, config, outputFilePath) {
|
|
4195
4219
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
4220
|
+
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
4221
|
+
await fs.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
4196
4222
|
return {
|
|
4197
|
-
|
|
4198
|
-
|
|
4199
|
-
|
|
4200
|
-
|
|
4201
|
-
|
|
4202
|
-
|
|
4223
|
+
values: {
|
|
4224
|
+
PROMPT: shellEscape(request.question ?? ""),
|
|
4225
|
+
PROMPT_FILE: shellEscape(promptFilePath),
|
|
4226
|
+
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
4227
|
+
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
4228
|
+
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
4229
|
+
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
4230
|
+
OUTPUT_FILE: shellEscape(outputFilePath)
|
|
4231
|
+
},
|
|
4232
|
+
promptFilePath
|
|
4203
4233
|
};
|
|
4204
4234
|
}
|
|
4235
|
+
async function cleanupTempFile(filePath, keepTempFiles) {
|
|
4236
|
+
if (!filePath || keepTempFiles) {
|
|
4237
|
+
return;
|
|
4238
|
+
}
|
|
4239
|
+
await fs.unlink(filePath).catch(() => {
|
|
4240
|
+
});
|
|
4241
|
+
}
|
|
4205
4242
|
function normalizeInputFiles2(inputFiles) {
|
|
4206
4243
|
if (!inputFiles || inputFiles.length === 0) {
|
|
4207
4244
|
return void 0;
|
|
@@ -8082,7 +8119,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
8082
8119
|
}
|
|
8083
8120
|
registry.register(kindName, (target) => {
|
|
8084
8121
|
return new CliProvider(target.name, {
|
|
8085
|
-
|
|
8122
|
+
command: `bun run ${filePath} {PROMPT}`
|
|
8086
8123
|
});
|
|
8087
8124
|
});
|
|
8088
8125
|
discoveredKinds.push(kindName);
|
|
@@ -8595,13 +8632,13 @@ function toCamelCaseDeep(obj) {
|
|
|
8595
8632
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
8596
8633
|
var CodeEvaluator = class {
|
|
8597
8634
|
kind = "code";
|
|
8598
|
-
|
|
8635
|
+
command;
|
|
8599
8636
|
cwd;
|
|
8600
8637
|
agentTimeoutMs;
|
|
8601
8638
|
config;
|
|
8602
8639
|
target;
|
|
8603
8640
|
constructor(options) {
|
|
8604
|
-
this.
|
|
8641
|
+
this.command = options.command ?? options.script ?? [];
|
|
8605
8642
|
this.cwd = options.cwd;
|
|
8606
8643
|
this.agentTimeoutMs = options.agentTimeoutMs;
|
|
8607
8644
|
this.config = options.config;
|
|
@@ -8660,7 +8697,7 @@ var CodeEvaluator = class {
|
|
|
8660
8697
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
8661
8698
|
try {
|
|
8662
8699
|
const stdout = await executeScript(
|
|
8663
|
-
this.
|
|
8700
|
+
this.command,
|
|
8664
8701
|
inputPayload,
|
|
8665
8702
|
this.agentTimeoutMs,
|
|
8666
8703
|
this.cwd,
|
|
@@ -8674,7 +8711,7 @@ var CodeEvaluator = class {
|
|
|
8674
8711
|
const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
|
|
8675
8712
|
const proxyUsage = getProxyUsage?.();
|
|
8676
8713
|
const evaluatorRawRequest = {
|
|
8677
|
-
|
|
8714
|
+
command: this.command,
|
|
8678
8715
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
8679
8716
|
...proxyUsage ? {
|
|
8680
8717
|
target_proxy: {
|
|
@@ -8704,7 +8741,7 @@ var CodeEvaluator = class {
|
|
|
8704
8741
|
expectedAspectCount: 1,
|
|
8705
8742
|
reasoning: message,
|
|
8706
8743
|
evaluatorRawRequest: {
|
|
8707
|
-
|
|
8744
|
+
command: this.command,
|
|
8708
8745
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
8709
8746
|
...proxyUsage ? {
|
|
8710
8747
|
target_proxy: {
|
|
@@ -11634,7 +11671,7 @@ var llmJudgeFactory = (config, context) => {
|
|
|
11634
11671
|
var codeFactory = (config, context) => {
|
|
11635
11672
|
const c = config;
|
|
11636
11673
|
return new CodeEvaluator({
|
|
11637
|
-
|
|
11674
|
+
command: c.command ?? c.script ?? [],
|
|
11638
11675
|
cwd: c.resolvedCwd ?? c.cwd,
|
|
11639
11676
|
agentTimeoutMs: context.agentTimeoutMs,
|
|
11640
11677
|
config: c.config,
|
|
@@ -11816,7 +11853,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
11816
11853
|
}
|
|
11817
11854
|
const factory = (_config, context) => {
|
|
11818
11855
|
return new CodeEvaluator({
|
|
11819
|
-
|
|
11856
|
+
command: ["bun", "run", filePath],
|
|
11820
11857
|
agentTimeoutMs: context.agentTimeoutMs
|
|
11821
11858
|
});
|
|
11822
11859
|
};
|
|
@@ -12170,7 +12207,8 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
|
|
|
12170
12207
|
});
|
|
12171
12208
|
const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
|
|
12172
12209
|
const cwd = config.cwd;
|
|
12173
|
-
const
|
|
12210
|
+
const commandArray = config.command ?? config.script ?? [];
|
|
12211
|
+
const result = await execFileWithStdin(commandArray, stdin, {
|
|
12174
12212
|
timeoutMs,
|
|
12175
12213
|
cwd
|
|
12176
12214
|
});
|