@agentv/core 2.7.1-next.4 → 2.7.1-next.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-6W5E3VR6.js → chunk-5SV2QC6V.js} +34 -47
- package/dist/chunk-5SV2QC6V.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +48 -57
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +35 -44
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +244 -219
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -40
- package/dist/index.d.ts +38 -40
- package/dist/index.js +212 -174
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-6W5E3VR6.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -2273,24 +2273,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2273
2273
|
continue;
|
|
2274
2274
|
}
|
|
2275
2275
|
if (typeValue === "code_judge") {
|
|
2276
|
-
let
|
|
2277
|
-
const
|
|
2278
|
-
if (typeof
|
|
2279
|
-
const trimmed =
|
|
2276
|
+
let command;
|
|
2277
|
+
const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
|
|
2278
|
+
if (typeof rawCommand === "string") {
|
|
2279
|
+
const trimmed = rawCommand.trim();
|
|
2280
2280
|
if (trimmed.length === 0) {
|
|
2281
2281
|
throw new Error(
|
|
2282
|
-
`Invalid code_judge
|
|
2282
|
+
`Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
|
|
2283
2283
|
);
|
|
2284
2284
|
}
|
|
2285
|
-
|
|
2285
|
+
command = parseCommandToArgv(trimmed);
|
|
2286
2286
|
} else {
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
`code_judge
|
|
2287
|
+
command = asStringArray(
|
|
2288
|
+
rawCommand,
|
|
2289
|
+
`code_judge command for evaluator '${name}' in '${evalId}'`
|
|
2290
2290
|
);
|
|
2291
2291
|
}
|
|
2292
|
-
if (!
|
|
2293
|
-
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing
|
|
2292
|
+
if (!command) {
|
|
2293
|
+
logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
|
|
2294
2294
|
continue;
|
|
2295
2295
|
}
|
|
2296
2296
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
@@ -2335,6 +2335,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2335
2335
|
const knownProps2 = /* @__PURE__ */ new Set([
|
|
2336
2336
|
"name",
|
|
2337
2337
|
"type",
|
|
2338
|
+
"command",
|
|
2338
2339
|
"script",
|
|
2339
2340
|
"cwd",
|
|
2340
2341
|
"weight",
|
|
@@ -2351,7 +2352,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2351
2352
|
evaluators.push({
|
|
2352
2353
|
name,
|
|
2353
2354
|
type: "code",
|
|
2354
|
-
|
|
2355
|
+
command,
|
|
2355
2356
|
cwd,
|
|
2356
2357
|
resolvedCwd,
|
|
2357
2358
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
@@ -2953,20 +2954,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
2953
2954
|
let resolvedPromptScript;
|
|
2954
2955
|
let promptScriptConfig;
|
|
2955
2956
|
if (isJsonObject2(rawPrompt)) {
|
|
2956
|
-
const
|
|
2957
|
-
rawPrompt.script,
|
|
2958
|
-
`prompt.
|
|
2957
|
+
const commandArray = asStringArray(
|
|
2958
|
+
rawPrompt.command ?? rawPrompt.script,
|
|
2959
|
+
`prompt.command for evaluator '${name}' in '${evalId}'`
|
|
2959
2960
|
);
|
|
2960
|
-
if (!
|
|
2961
|
-
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires
|
|
2961
|
+
if (!commandArray) {
|
|
2962
|
+
throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
|
|
2962
2963
|
}
|
|
2963
|
-
const
|
|
2964
|
-
const resolved = await resolveFileReference2(
|
|
2964
|
+
const commandPath = commandArray[commandArray.length - 1];
|
|
2965
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
2965
2966
|
if (resolved.resolvedPath) {
|
|
2966
|
-
resolvedPromptScript = [...
|
|
2967
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
|
|
2967
2968
|
} else {
|
|
2968
2969
|
throw new Error(
|
|
2969
|
-
`Evaluator '${name}' in '${evalId}': prompt
|
|
2970
|
+
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
2970
2971
|
);
|
|
2971
2972
|
}
|
|
2972
2973
|
if (isJsonObject2(rawPrompt.config)) {
|
|
@@ -4240,6 +4241,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4240
4241
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
4241
4242
|
}
|
|
4242
4243
|
const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
|
|
4244
|
+
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
4243
4245
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
4244
4246
|
const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
|
|
4245
4247
|
const globalExecution = suite.assert !== void 0 ? { ...rawGlobalExecution ?? {}, assert: suite.assert } : rawGlobalExecution;
|
|
@@ -4264,14 +4266,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
4264
4266
|
);
|
|
4265
4267
|
}
|
|
4266
4268
|
}
|
|
4267
|
-
const
|
|
4269
|
+
const testInputMessages = resolveInputMessages(evalcase);
|
|
4268
4270
|
const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
|
|
4269
|
-
if (!id || !outcome || !
|
|
4271
|
+
if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
|
|
4270
4272
|
logError2(
|
|
4271
4273
|
`Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
|
|
4272
4274
|
);
|
|
4273
4275
|
continue;
|
|
4274
4276
|
}
|
|
4277
|
+
const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
|
|
4278
|
+
const skipDefaults = caseExecution?.skip_defaults === true;
|
|
4279
|
+
const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
|
|
4275
4280
|
const hasExpectedMessages = expectedMessages.length > 0;
|
|
4276
4281
|
const guidelinePaths = [];
|
|
4277
4282
|
const inputTextParts = [];
|
|
@@ -4383,16 +4388,16 @@ var loadEvalCaseById = loadTestById;
|
|
|
4383
4388
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
4384
4389
|
if (!isJsonObject(raw)) return void 0;
|
|
4385
4390
|
const obj = raw;
|
|
4386
|
-
const
|
|
4387
|
-
if (!Array.isArray(
|
|
4388
|
-
const
|
|
4389
|
-
if (
|
|
4391
|
+
const commandSource = obj.command ?? obj.script;
|
|
4392
|
+
if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
|
|
4393
|
+
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
4394
|
+
if (commandArr.length === 0) return void 0;
|
|
4390
4395
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
4391
4396
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
4392
4397
|
if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
|
|
4393
4398
|
cwd = import_node_path8.default.resolve(evalFileDir, cwd);
|
|
4394
4399
|
}
|
|
4395
|
-
const config = {
|
|
4400
|
+
const config = { command: commandArr };
|
|
4396
4401
|
if (timeoutMs !== void 0) {
|
|
4397
4402
|
return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
|
|
4398
4403
|
}
|
|
@@ -5585,50 +5590,58 @@ var CliProvider = class {
|
|
|
5585
5590
|
await this.ensureHealthy(request.signal);
|
|
5586
5591
|
const effectiveCwd = request.cwd ?? this.config.cwd;
|
|
5587
5592
|
const outputFilePath = generateOutputFilePath(request.evalCaseId);
|
|
5588
|
-
const templateValues = buildTemplateValues(
|
|
5589
|
-
|
|
5593
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5594
|
+
request,
|
|
5595
|
+
this.config,
|
|
5596
|
+
outputFilePath
|
|
5597
|
+
);
|
|
5598
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
5590
5599
|
if (this.verbose) {
|
|
5591
5600
|
console.log(
|
|
5592
5601
|
`[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
|
|
5593
5602
|
);
|
|
5594
5603
|
}
|
|
5595
|
-
|
|
5596
|
-
|
|
5597
|
-
|
|
5598
|
-
env: process.env,
|
|
5599
|
-
timeoutMs: this.config.timeoutMs,
|
|
5600
|
-
signal: request.signal
|
|
5601
|
-
});
|
|
5602
|
-
const measuredDurationMs = Date.now() - startTime;
|
|
5603
|
-
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5604
|
-
if (request.signal?.aborted) {
|
|
5605
|
-
throw new Error("CLI provider request was aborted");
|
|
5606
|
-
}
|
|
5607
|
-
if (result.timedOut) {
|
|
5608
|
-
throw new Error(
|
|
5609
|
-
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5610
|
-
);
|
|
5611
|
-
}
|
|
5612
|
-
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5613
|
-
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5614
|
-
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5615
|
-
throw new Error(message);
|
|
5616
|
-
}
|
|
5617
|
-
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5618
|
-
const parsed = this.parseOutputContent(responseContent);
|
|
5619
|
-
return {
|
|
5620
|
-
output: parsed.output,
|
|
5621
|
-
tokenUsage: parsed.tokenUsage,
|
|
5622
|
-
costUsd: parsed.costUsd,
|
|
5623
|
-
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
5624
|
-
raw: {
|
|
5625
|
-
command: renderedCommand,
|
|
5626
|
-
stderr: result.stderr,
|
|
5627
|
-
exitCode: result.exitCode ?? 0,
|
|
5604
|
+
try {
|
|
5605
|
+
const startTime = Date.now();
|
|
5606
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5628
5607
|
cwd: effectiveCwd,
|
|
5629
|
-
|
|
5608
|
+
env: process.env,
|
|
5609
|
+
timeoutMs: this.config.timeoutMs,
|
|
5610
|
+
signal: request.signal
|
|
5611
|
+
});
|
|
5612
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
5613
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5614
|
+
if (request.signal?.aborted) {
|
|
5615
|
+
throw new Error("CLI provider request was aborted");
|
|
5616
|
+
}
|
|
5617
|
+
if (result.timedOut) {
|
|
5618
|
+
throw new Error(
|
|
5619
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5620
|
+
);
|
|
5621
|
+
}
|
|
5622
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5623
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5624
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5625
|
+
throw new Error(message);
|
|
5630
5626
|
}
|
|
5631
|
-
|
|
5627
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5628
|
+
const parsed = this.parseOutputContent(responseContent);
|
|
5629
|
+
return {
|
|
5630
|
+
output: parsed.output,
|
|
5631
|
+
tokenUsage: parsed.tokenUsage,
|
|
5632
|
+
costUsd: parsed.costUsd,
|
|
5633
|
+
durationMs: parsed.durationMs ?? measuredDurationMs,
|
|
5634
|
+
raw: {
|
|
5635
|
+
command: renderedCommand,
|
|
5636
|
+
stderr: result.stderr,
|
|
5637
|
+
exitCode: result.exitCode ?? 0,
|
|
5638
|
+
cwd: effectiveCwd,
|
|
5639
|
+
outputFile: outputFilePath
|
|
5640
|
+
}
|
|
5641
|
+
};
|
|
5642
|
+
} finally {
|
|
5643
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5644
|
+
}
|
|
5632
5645
|
}
|
|
5633
5646
|
async invokeBatch(requests) {
|
|
5634
5647
|
if (requests.length === 0) {
|
|
@@ -5651,7 +5664,7 @@ var CliProvider = class {
|
|
|
5651
5664
|
batchInputFiles.push(...request.inputFiles);
|
|
5652
5665
|
}
|
|
5653
5666
|
}
|
|
5654
|
-
const templateValues = buildTemplateValues(
|
|
5667
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5655
5668
|
{
|
|
5656
5669
|
question: "",
|
|
5657
5670
|
guidelines: "",
|
|
@@ -5662,87 +5675,91 @@ var CliProvider = class {
|
|
|
5662
5675
|
this.config,
|
|
5663
5676
|
outputFilePath
|
|
5664
5677
|
);
|
|
5665
|
-
const renderedCommand = renderTemplate(this.config.
|
|
5678
|
+
const renderedCommand = renderTemplate(this.config.command, templateValues);
|
|
5666
5679
|
if (this.verbose) {
|
|
5667
5680
|
console.log(
|
|
5668
5681
|
`[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
|
|
5669
5682
|
);
|
|
5670
5683
|
}
|
|
5671
|
-
|
|
5672
|
-
|
|
5673
|
-
|
|
5674
|
-
|
|
5675
|
-
|
|
5676
|
-
|
|
5677
|
-
|
|
5678
|
-
|
|
5679
|
-
|
|
5680
|
-
if (
|
|
5681
|
-
|
|
5682
|
-
|
|
5683
|
-
|
|
5684
|
-
|
|
5685
|
-
|
|
5686
|
-
|
|
5687
|
-
|
|
5688
|
-
|
|
5689
|
-
|
|
5690
|
-
|
|
5691
|
-
|
|
5692
|
-
|
|
5693
|
-
|
|
5694
|
-
|
|
5695
|
-
|
|
5696
|
-
|
|
5697
|
-
const
|
|
5698
|
-
|
|
5699
|
-
|
|
5700
|
-
|
|
5701
|
-
|
|
5702
|
-
|
|
5703
|
-
|
|
5704
|
-
|
|
5705
|
-
|
|
5706
|
-
|
|
5707
|
-
|
|
5684
|
+
try {
|
|
5685
|
+
const startTime = Date.now();
|
|
5686
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5687
|
+
cwd: this.config.cwd,
|
|
5688
|
+
env: process.env,
|
|
5689
|
+
timeoutMs: this.config.timeoutMs,
|
|
5690
|
+
signal: controller.signal
|
|
5691
|
+
});
|
|
5692
|
+
const measuredDurationMs = Date.now() - startTime;
|
|
5693
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5694
|
+
if (controller.signal.aborted) {
|
|
5695
|
+
throw new Error("CLI provider request was aborted");
|
|
5696
|
+
}
|
|
5697
|
+
if (result.timedOut) {
|
|
5698
|
+
throw new Error(
|
|
5699
|
+
`CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
|
|
5700
|
+
);
|
|
5701
|
+
}
|
|
5702
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5703
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5704
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
5705
|
+
throw new Error(message);
|
|
5706
|
+
}
|
|
5707
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
5708
|
+
const recordsById = this.parseJsonlBatchOutput(responseContent);
|
|
5709
|
+
const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
|
|
5710
|
+
const responses = requests.map((request) => {
|
|
5711
|
+
const evalCaseId = request.evalCaseId;
|
|
5712
|
+
if (!evalCaseId) {
|
|
5713
|
+
return {
|
|
5714
|
+
output: [],
|
|
5715
|
+
durationMs: perRequestFallbackMs,
|
|
5716
|
+
raw: {
|
|
5717
|
+
command: renderedCommand,
|
|
5718
|
+
stderr: result.stderr,
|
|
5719
|
+
exitCode: result.exitCode ?? 0,
|
|
5720
|
+
cwd: this.config.cwd,
|
|
5721
|
+
outputFile: outputFilePath
|
|
5722
|
+
}
|
|
5723
|
+
};
|
|
5724
|
+
}
|
|
5725
|
+
const parsed = recordsById.get(evalCaseId);
|
|
5726
|
+
if (!parsed) {
|
|
5727
|
+
const errorMessage = `Batch output missing id '${evalCaseId}'`;
|
|
5728
|
+
if (this.verbose) {
|
|
5729
|
+
console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
|
|
5708
5730
|
}
|
|
5709
|
-
|
|
5710
|
-
|
|
5711
|
-
|
|
5712
|
-
|
|
5713
|
-
|
|
5714
|
-
|
|
5715
|
-
|
|
5731
|
+
return {
|
|
5732
|
+
output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
|
|
5733
|
+
durationMs: perRequestFallbackMs,
|
|
5734
|
+
raw: {
|
|
5735
|
+
command: renderedCommand,
|
|
5736
|
+
stderr: result.stderr,
|
|
5737
|
+
exitCode: result.exitCode ?? 0,
|
|
5738
|
+
cwd: this.config.cwd,
|
|
5739
|
+
outputFile: outputFilePath,
|
|
5740
|
+
error: errorMessage
|
|
5741
|
+
}
|
|
5742
|
+
};
|
|
5716
5743
|
}
|
|
5717
5744
|
return {
|
|
5718
|
-
output:
|
|
5719
|
-
|
|
5745
|
+
output: parsed.output,
|
|
5746
|
+
tokenUsage: parsed.tokenUsage,
|
|
5747
|
+
costUsd: parsed.costUsd,
|
|
5748
|
+
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
5720
5749
|
raw: {
|
|
5721
5750
|
command: renderedCommand,
|
|
5722
5751
|
stderr: result.stderr,
|
|
5723
5752
|
exitCode: result.exitCode ?? 0,
|
|
5724
5753
|
cwd: this.config.cwd,
|
|
5725
5754
|
outputFile: outputFilePath,
|
|
5726
|
-
|
|
5755
|
+
recordId: evalCaseId
|
|
5727
5756
|
}
|
|
5728
5757
|
};
|
|
5729
|
-
}
|
|
5730
|
-
return
|
|
5731
|
-
|
|
5732
|
-
|
|
5733
|
-
|
|
5734
|
-
durationMs: parsed.durationMs ?? perRequestFallbackMs,
|
|
5735
|
-
raw: {
|
|
5736
|
-
command: renderedCommand,
|
|
5737
|
-
stderr: result.stderr,
|
|
5738
|
-
exitCode: result.exitCode ?? 0,
|
|
5739
|
-
cwd: this.config.cwd,
|
|
5740
|
-
outputFile: outputFilePath,
|
|
5741
|
-
recordId: evalCaseId
|
|
5742
|
-
}
|
|
5743
|
-
};
|
|
5744
|
-
});
|
|
5745
|
-
return responses;
|
|
5758
|
+
});
|
|
5759
|
+
return responses;
|
|
5760
|
+
} finally {
|
|
5761
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5762
|
+
}
|
|
5746
5763
|
}
|
|
5747
5764
|
/**
|
|
5748
5765
|
* Parse output content from CLI.
|
|
@@ -5857,7 +5874,7 @@ var CliProvider = class {
|
|
|
5857
5874
|
return;
|
|
5858
5875
|
}
|
|
5859
5876
|
const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
|
|
5860
|
-
if (healthcheck
|
|
5877
|
+
if ("url" in healthcheck && healthcheck.url) {
|
|
5861
5878
|
const controller = new AbortController();
|
|
5862
5879
|
const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
|
|
5863
5880
|
signal?.addEventListener("abort", () => controller.abort(), { once: true });
|
|
@@ -5876,50 +5893,70 @@ var CliProvider = class {
|
|
|
5876
5893
|
}
|
|
5877
5894
|
return;
|
|
5878
5895
|
}
|
|
5879
|
-
const
|
|
5880
|
-
|
|
5881
|
-
|
|
5882
|
-
|
|
5883
|
-
|
|
5884
|
-
|
|
5885
|
-
|
|
5886
|
-
|
|
5887
|
-
|
|
5888
|
-
|
|
5889
|
-
|
|
5890
|
-
|
|
5891
|
-
|
|
5896
|
+
const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
|
|
5897
|
+
if (!hcCommand) {
|
|
5898
|
+
throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
|
|
5899
|
+
}
|
|
5900
|
+
const { values: templateValues, promptFilePath } = await buildTemplateValues(
|
|
5901
|
+
{
|
|
5902
|
+
question: "",
|
|
5903
|
+
guidelines: "",
|
|
5904
|
+
inputFiles: [],
|
|
5905
|
+
evalCaseId: "healthcheck",
|
|
5906
|
+
attempt: 0
|
|
5907
|
+
},
|
|
5908
|
+
this.config,
|
|
5909
|
+
generateOutputFilePath("healthcheck")
|
|
5892
5910
|
);
|
|
5911
|
+
const renderedCommand = renderTemplate(hcCommand, templateValues);
|
|
5912
|
+
const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
|
|
5893
5913
|
if (this.verbose) {
|
|
5894
5914
|
console.log(
|
|
5895
|
-
`[cli-provider:${this.targetName}] (healthcheck) cwd=${
|
|
5915
|
+
`[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
|
|
5896
5916
|
);
|
|
5897
5917
|
}
|
|
5898
|
-
|
|
5899
|
-
|
|
5900
|
-
|
|
5901
|
-
|
|
5902
|
-
|
|
5903
|
-
|
|
5904
|
-
|
|
5905
|
-
|
|
5906
|
-
|
|
5907
|
-
|
|
5908
|
-
|
|
5918
|
+
try {
|
|
5919
|
+
const result = await this.runCommand(renderedCommand, {
|
|
5920
|
+
cwd: hcCwd ?? this.config.cwd,
|
|
5921
|
+
env: process.env,
|
|
5922
|
+
timeoutMs,
|
|
5923
|
+
signal
|
|
5924
|
+
});
|
|
5925
|
+
if (result.failed || (result.exitCode ?? 0) !== 0) {
|
|
5926
|
+
const codeText = result.exitCode !== null ? result.exitCode : "unknown";
|
|
5927
|
+
const detail = result.stderr.trim() || result.stdout.trim();
|
|
5928
|
+
const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
|
|
5929
|
+
throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
|
|
5930
|
+
}
|
|
5931
|
+
} finally {
|
|
5932
|
+
await cleanupTempFile(promptFilePath, this.keepTempFiles);
|
|
5909
5933
|
}
|
|
5910
5934
|
}
|
|
5911
5935
|
};
|
|
5912
|
-
function buildTemplateValues(request, config, outputFilePath) {
|
|
5936
|
+
async function buildTemplateValues(request, config, outputFilePath) {
|
|
5913
5937
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
5938
|
+
const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
|
|
5939
|
+
await import_promises11.default.writeFile(promptFilePath, request.question ?? "", "utf8");
|
|
5914
5940
|
return {
|
|
5915
|
-
|
|
5916
|
-
|
|
5917
|
-
|
|
5918
|
-
|
|
5919
|
-
|
|
5920
|
-
|
|
5941
|
+
values: {
|
|
5942
|
+
PROMPT: shellEscape(request.question ?? ""),
|
|
5943
|
+
PROMPT_FILE: shellEscape(promptFilePath),
|
|
5944
|
+
GUIDELINES: shellEscape(request.guidelines ?? ""),
|
|
5945
|
+
EVAL_ID: shellEscape(request.evalCaseId ?? ""),
|
|
5946
|
+
ATTEMPT: shellEscape(String(request.attempt ?? 0)),
|
|
5947
|
+
FILES: formatFileList(inputFiles, config.filesFormat),
|
|
5948
|
+
OUTPUT_FILE: shellEscape(outputFilePath)
|
|
5949
|
+
},
|
|
5950
|
+
promptFilePath
|
|
5921
5951
|
};
|
|
5922
5952
|
}
|
|
5953
|
+
async function cleanupTempFile(filePath, keepTempFiles) {
|
|
5954
|
+
if (!filePath || keepTempFiles) {
|
|
5955
|
+
return;
|
|
5956
|
+
}
|
|
5957
|
+
await import_promises11.default.unlink(filePath).catch(() => {
|
|
5958
|
+
});
|
|
5959
|
+
}
|
|
5923
5960
|
function normalizeInputFiles2(inputFiles) {
|
|
5924
5961
|
if (!inputFiles || inputFiles.length === 0) {
|
|
5925
5962
|
return void 0;
|
|
@@ -8281,27 +8318,25 @@ var ProviderRegistry = class {
|
|
|
8281
8318
|
var import_node_path18 = __toESM(require("path"), 1);
|
|
8282
8319
|
var import_zod3 = require("zod");
|
|
8283
8320
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
8284
|
-
type: import_zod3.z.literal("http"),
|
|
8285
8321
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
8286
8322
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
8287
8323
|
timeoutSeconds: import_zod3.z.number().positive().optional()
|
|
8288
8324
|
});
|
|
8289
8325
|
var CliHealthcheckCommandInputSchema = import_zod3.z.object({
|
|
8290
|
-
|
|
8291
|
-
command_template: import_zod3.z.string().optional(),
|
|
8292
|
-
commandTemplate: import_zod3.z.string().optional(),
|
|
8326
|
+
command: import_zod3.z.string().min(1, "healthcheck command is required"),
|
|
8293
8327
|
cwd: import_zod3.z.string().optional(),
|
|
8294
8328
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
8295
8329
|
timeoutSeconds: import_zod3.z.number().positive().optional()
|
|
8296
8330
|
});
|
|
8297
|
-
var CliHealthcheckInputSchema = import_zod3.z.
|
|
8331
|
+
var CliHealthcheckInputSchema = import_zod3.z.union([
|
|
8298
8332
|
CliHealthcheckHttpInputSchema,
|
|
8299
8333
|
CliHealthcheckCommandInputSchema
|
|
8300
8334
|
]);
|
|
8301
8335
|
var CliTargetInputSchema = import_zod3.z.object({
|
|
8302
8336
|
name: import_zod3.z.string().min(1, "target name is required"),
|
|
8303
8337
|
provider: import_zod3.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
|
|
8304
|
-
// Command
|
|
8338
|
+
// Command - required (accept both naming conventions)
|
|
8339
|
+
command: import_zod3.z.string().optional(),
|
|
8305
8340
|
command_template: import_zod3.z.string().optional(),
|
|
8306
8341
|
commandTemplate: import_zod3.z.string().optional(),
|
|
8307
8342
|
// Files format - optional
|
|
@@ -8333,26 +8368,27 @@ var CliTargetInputSchema = import_zod3.z.object({
|
|
|
8333
8368
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
8334
8369
|
provider_batching: import_zod3.z.boolean().optional(),
|
|
8335
8370
|
providerBatching: import_zod3.z.boolean().optional()
|
|
8336
|
-
}).refine(
|
|
8337
|
-
|
|
8338
|
-
|
|
8371
|
+
}).refine(
|
|
8372
|
+
(data) => data.command !== void 0 || data.command_template !== void 0 || data.commandTemplate !== void 0,
|
|
8373
|
+
{
|
|
8374
|
+
message: "'command' is required"
|
|
8375
|
+
}
|
|
8376
|
+
);
|
|
8339
8377
|
var CliHealthcheckHttpSchema = import_zod3.z.object({
|
|
8340
|
-
type: import_zod3.z.literal("http"),
|
|
8341
8378
|
url: import_zod3.z.string().min(1),
|
|
8342
8379
|
timeoutMs: import_zod3.z.number().positive().optional()
|
|
8343
8380
|
}).strict();
|
|
8344
8381
|
var CliHealthcheckCommandSchema = import_zod3.z.object({
|
|
8345
|
-
|
|
8346
|
-
commandTemplate: import_zod3.z.string().min(1),
|
|
8382
|
+
command: import_zod3.z.string().min(1),
|
|
8347
8383
|
cwd: import_zod3.z.string().optional(),
|
|
8348
8384
|
timeoutMs: import_zod3.z.number().positive().optional()
|
|
8349
8385
|
}).strict();
|
|
8350
|
-
var CliHealthcheckSchema = import_zod3.z.
|
|
8386
|
+
var CliHealthcheckSchema = import_zod3.z.union([
|
|
8351
8387
|
CliHealthcheckHttpSchema,
|
|
8352
8388
|
CliHealthcheckCommandSchema
|
|
8353
8389
|
]);
|
|
8354
8390
|
var CliTargetConfigSchema = import_zod3.z.object({
|
|
8355
|
-
|
|
8391
|
+
command: import_zod3.z.string().min(1),
|
|
8356
8392
|
filesFormat: import_zod3.z.string().optional(),
|
|
8357
8393
|
cwd: import_zod3.z.string().optional(),
|
|
8358
8394
|
workspaceTemplate: import_zod3.z.string().optional(),
|
|
@@ -8364,26 +8400,19 @@ var CliTargetConfigSchema = import_zod3.z.object({
|
|
|
8364
8400
|
function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
8365
8401
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
8366
8402
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
8367
|
-
if (input
|
|
8403
|
+
if ("url" in input && input.url) {
|
|
8368
8404
|
const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
|
|
8369
8405
|
return {
|
|
8370
|
-
type: "http",
|
|
8371
8406
|
url,
|
|
8372
8407
|
timeoutMs
|
|
8373
8408
|
};
|
|
8374
8409
|
}
|
|
8375
|
-
|
|
8376
|
-
if (commandTemplateSource === void 0) {
|
|
8410
|
+
if (!("command" in input) || !input.command) {
|
|
8377
8411
|
throw new Error(
|
|
8378
|
-
`${targetName} healthcheck: Either
|
|
8412
|
+
`${targetName} healthcheck: Either 'command' or 'url' is required for healthcheck`
|
|
8379
8413
|
);
|
|
8380
8414
|
}
|
|
8381
|
-
const
|
|
8382
|
-
commandTemplateSource,
|
|
8383
|
-
env,
|
|
8384
|
-
`${targetName} healthcheck command template`,
|
|
8385
|
-
true
|
|
8386
|
-
);
|
|
8415
|
+
const command = resolveString(input.command, env, `${targetName} healthcheck command`, true);
|
|
8387
8416
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
|
|
8388
8417
|
allowLiteral: true,
|
|
8389
8418
|
optionalEnv: true
|
|
@@ -8395,24 +8424,18 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
8395
8424
|
cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
|
|
8396
8425
|
}
|
|
8397
8426
|
return {
|
|
8398
|
-
|
|
8399
|
-
commandTemplate,
|
|
8427
|
+
command,
|
|
8400
8428
|
cwd,
|
|
8401
8429
|
timeoutMs
|
|
8402
8430
|
};
|
|
8403
8431
|
}
|
|
8404
8432
|
function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
8405
8433
|
const targetName = input.name;
|
|
8406
|
-
const
|
|
8407
|
-
if (
|
|
8408
|
-
throw new Error(`${targetName}:
|
|
8434
|
+
const commandSource = input.command ?? input.command_template ?? input.commandTemplate;
|
|
8435
|
+
if (commandSource === void 0) {
|
|
8436
|
+
throw new Error(`${targetName}: 'command' is required`);
|
|
8409
8437
|
}
|
|
8410
|
-
const
|
|
8411
|
-
commandTemplateSource,
|
|
8412
|
-
env,
|
|
8413
|
-
`${targetName} CLI command template`,
|
|
8414
|
-
true
|
|
8415
|
-
);
|
|
8438
|
+
const command = resolveString(commandSource, env, `${targetName} CLI command`, true);
|
|
8416
8439
|
const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
|
|
8417
8440
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
8418
8441
|
const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
|
|
@@ -8451,7 +8474,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
8451
8474
|
);
|
|
8452
8475
|
const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
|
|
8453
8476
|
return {
|
|
8454
|
-
|
|
8477
|
+
command,
|
|
8455
8478
|
filesFormat,
|
|
8456
8479
|
cwd,
|
|
8457
8480
|
workspaceTemplate,
|
|
@@ -8463,6 +8486,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
8463
8486
|
}
|
|
8464
8487
|
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
|
|
8465
8488
|
"PROMPT",
|
|
8489
|
+
"PROMPT_FILE",
|
|
8466
8490
|
"GUIDELINES",
|
|
8467
8491
|
"EVAL_ID",
|
|
8468
8492
|
"ATTEMPT",
|
|
@@ -9177,8 +9201,8 @@ var cliErrorMap = (issue, ctx) => {
|
|
|
9177
9201
|
if (issue.code === import_zod3.z.ZodIssueCode.unrecognized_keys) {
|
|
9178
9202
|
return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
|
|
9179
9203
|
}
|
|
9180
|
-
if (issue.code === import_zod3.z.ZodIssueCode.
|
|
9181
|
-
return { message: "healthcheck
|
|
9204
|
+
if (issue.code === import_zod3.z.ZodIssueCode.invalid_union) {
|
|
9205
|
+
return { message: "healthcheck must have either 'url' (HTTP) or 'command' (command)" };
|
|
9182
9206
|
}
|
|
9183
9207
|
if (issue.code === import_zod3.z.ZodIssueCode.invalid_type && issue.expected === "string") {
|
|
9184
9208
|
return { message: `${ctx.defaultError} (expected a string value)` };
|
|
@@ -9194,18 +9218,18 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
9194
9218
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
9195
9219
|
}
|
|
9196
9220
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
9197
|
-
assertSupportedCliPlaceholders(normalized.
|
|
9198
|
-
if (normalized.healthcheck
|
|
9221
|
+
assertSupportedCliPlaceholders(normalized.command, `${target.name} CLI command`);
|
|
9222
|
+
if ("command" in (normalized.healthcheck ?? {}) && normalized.healthcheck.command) {
|
|
9199
9223
|
assertSupportedCliPlaceholders(
|
|
9200
|
-
normalized.healthcheck.
|
|
9201
|
-
`${target.name} healthcheck command
|
|
9224
|
+
normalized.healthcheck.command,
|
|
9225
|
+
`${target.name} healthcheck command`
|
|
9202
9226
|
);
|
|
9203
9227
|
}
|
|
9204
9228
|
return normalized;
|
|
9205
9229
|
}
|
|
9206
9230
|
function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
|
|
9207
|
-
const
|
|
9208
|
-
const
|
|
9231
|
+
const commandSource = target.command ?? target.command_template ?? target.commandTemplate;
|
|
9232
|
+
const command = commandSource ? resolveString(commandSource, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
|
|
9209
9233
|
const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
|
|
9210
9234
|
const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
|
|
9211
9235
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
@@ -9219,7 +9243,7 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
9219
9243
|
cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
|
|
9220
9244
|
}
|
|
9221
9245
|
return {
|
|
9222
|
-
|
|
9246
|
+
command,
|
|
9223
9247
|
cwd,
|
|
9224
9248
|
timeoutMs
|
|
9225
9249
|
};
|
|
@@ -10922,7 +10946,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
10922
10946
|
}
|
|
10923
10947
|
registry.register(kindName, (target) => {
|
|
10924
10948
|
return new CliProvider(target.name, {
|
|
10925
|
-
|
|
10949
|
+
command: `bun run ${filePath} {PROMPT}`
|
|
10926
10950
|
});
|
|
10927
10951
|
});
|
|
10928
10952
|
discoveredKinds.push(kindName);
|
|
@@ -11435,13 +11459,13 @@ function toCamelCaseDeep(obj) {
|
|
|
11435
11459
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
11436
11460
|
var CodeEvaluator = class {
|
|
11437
11461
|
kind = "code";
|
|
11438
|
-
|
|
11462
|
+
command;
|
|
11439
11463
|
cwd;
|
|
11440
11464
|
agentTimeoutMs;
|
|
11441
11465
|
config;
|
|
11442
11466
|
target;
|
|
11443
11467
|
constructor(options) {
|
|
11444
|
-
this.
|
|
11468
|
+
this.command = options.command ?? options.script ?? [];
|
|
11445
11469
|
this.cwd = options.cwd;
|
|
11446
11470
|
this.agentTimeoutMs = options.agentTimeoutMs;
|
|
11447
11471
|
this.config = options.config;
|
|
@@ -11500,7 +11524,7 @@ var CodeEvaluator = class {
|
|
|
11500
11524
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
11501
11525
|
try {
|
|
11502
11526
|
const stdout = await executeScript(
|
|
11503
|
-
this.
|
|
11527
|
+
this.command,
|
|
11504
11528
|
inputPayload,
|
|
11505
11529
|
this.agentTimeoutMs,
|
|
11506
11530
|
this.cwd,
|
|
@@ -11514,7 +11538,7 @@ var CodeEvaluator = class {
|
|
|
11514
11538
|
const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
|
|
11515
11539
|
const proxyUsage = getProxyUsage?.();
|
|
11516
11540
|
const evaluatorRawRequest = {
|
|
11517
|
-
|
|
11541
|
+
command: this.command,
|
|
11518
11542
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
11519
11543
|
...proxyUsage ? {
|
|
11520
11544
|
target_proxy: {
|
|
@@ -11544,7 +11568,7 @@ var CodeEvaluator = class {
|
|
|
11544
11568
|
expectedAspectCount: 1,
|
|
11545
11569
|
reasoning: message,
|
|
11546
11570
|
evaluatorRawRequest: {
|
|
11547
|
-
|
|
11571
|
+
command: this.command,
|
|
11548
11572
|
...this.cwd ? { cwd: this.cwd } : {},
|
|
11549
11573
|
...proxyUsage ? {
|
|
11550
11574
|
target_proxy: {
|
|
@@ -14503,7 +14527,7 @@ var llmJudgeFactory = (config, context2) => {
|
|
|
14503
14527
|
var codeFactory = (config, context2) => {
|
|
14504
14528
|
const c = config;
|
|
14505
14529
|
return new CodeEvaluator({
|
|
14506
|
-
|
|
14530
|
+
command: c.command ?? c.script ?? [],
|
|
14507
14531
|
cwd: c.resolvedCwd ?? c.cwd,
|
|
14508
14532
|
agentTimeoutMs: context2.agentTimeoutMs,
|
|
14509
14533
|
config: c.config,
|
|
@@ -14685,7 +14709,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
14685
14709
|
}
|
|
14686
14710
|
const factory = (_config, context2) => {
|
|
14687
14711
|
return new CodeEvaluator({
|
|
14688
|
-
|
|
14712
|
+
command: ["bun", "run", filePath],
|
|
14689
14713
|
agentTimeoutMs: context2.agentTimeoutMs
|
|
14690
14714
|
});
|
|
14691
14715
|
};
|
|
@@ -15039,7 +15063,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
|
|
|
15039
15063
|
});
|
|
15040
15064
|
const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
|
|
15041
15065
|
const cwd = config.cwd;
|
|
15042
|
-
const
|
|
15066
|
+
const commandArray = config.command ?? config.script ?? [];
|
|
15067
|
+
const result = await execFileWithStdin(commandArray, stdin, {
|
|
15043
15068
|
timeoutMs,
|
|
15044
15069
|
cwd
|
|
15045
15070
|
});
|