@agentv/core 2.7.1-next.4 → 2.7.1-next.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2273,24 +2273,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2273
2273
  continue;
2274
2274
  }
2275
2275
  if (typeValue === "code_judge") {
2276
- let script;
2277
- const rawScript = rawEvaluator.script;
2278
- if (typeof rawScript === "string") {
2279
- const trimmed = rawScript.trim();
2276
+ let command;
2277
+ const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
2278
+ if (typeof rawCommand === "string") {
2279
+ const trimmed = rawCommand.trim();
2280
2280
  if (trimmed.length === 0) {
2281
2281
  throw new Error(
2282
- `Invalid code_judge script for evaluator '${name}' in '${evalId}': script cannot be empty`
2282
+ `Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
2283
2283
  );
2284
2284
  }
2285
- script = parseCommandToArgv(trimmed);
2285
+ command = parseCommandToArgv(trimmed);
2286
2286
  } else {
2287
- script = asStringArray(
2288
- rawScript,
2289
- `code_judge script for evaluator '${name}' in '${evalId}'`
2287
+ command = asStringArray(
2288
+ rawCommand,
2289
+ `code_judge command for evaluator '${name}' in '${evalId}'`
2290
2290
  );
2291
2291
  }
2292
- if (!script) {
2293
- logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing script`);
2292
+ if (!command) {
2293
+ logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
2294
2294
  continue;
2295
2295
  }
2296
2296
  const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
@@ -2335,6 +2335,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2335
2335
  const knownProps2 = /* @__PURE__ */ new Set([
2336
2336
  "name",
2337
2337
  "type",
2338
+ "command",
2338
2339
  "script",
2339
2340
  "cwd",
2340
2341
  "weight",
@@ -2351,7 +2352,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2351
2352
  evaluators.push({
2352
2353
  name,
2353
2354
  type: "code",
2354
- script,
2355
+ command,
2355
2356
  cwd,
2356
2357
  resolvedCwd,
2357
2358
  ...weight2 !== void 0 ? { weight: weight2 } : {},
@@ -2953,20 +2954,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2953
2954
  let resolvedPromptScript;
2954
2955
  let promptScriptConfig;
2955
2956
  if (isJsonObject2(rawPrompt)) {
2956
- const scriptArray = asStringArray(
2957
- rawPrompt.script,
2958
- `prompt.script for evaluator '${name}' in '${evalId}'`
2957
+ const commandArray = asStringArray(
2958
+ rawPrompt.command ?? rawPrompt.script,
2959
+ `prompt.command for evaluator '${name}' in '${evalId}'`
2959
2960
  );
2960
- if (!scriptArray) {
2961
- throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires script array`);
2961
+ if (!commandArray) {
2962
+ throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
2962
2963
  }
2963
- const scriptPath = scriptArray[scriptArray.length - 1];
2964
- const resolved = await resolveFileReference2(scriptPath, searchRoots);
2964
+ const commandPath = commandArray[commandArray.length - 1];
2965
+ const resolved = await resolveFileReference2(commandPath, searchRoots);
2965
2966
  if (resolved.resolvedPath) {
2966
- resolvedPromptScript = [...scriptArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
2967
+ resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path4.default.resolve(resolved.resolvedPath)];
2967
2968
  } else {
2968
2969
  throw new Error(
2969
- `Evaluator '${name}' in '${evalId}': prompt script file not found: ${resolved.displayPath}`
2970
+ `Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
2970
2971
  );
2971
2972
  }
2972
2973
  if (isJsonObject2(rawPrompt.config)) {
@@ -4240,6 +4241,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4240
4241
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
4241
4242
  }
4242
4243
  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
4244
+ const suiteInputMessages = expandInputShorthand(suite.input);
4243
4245
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
4244
4246
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
4245
4247
  const globalExecution = suite.assert !== void 0 ? { ...rawGlobalExecution ?? {}, assert: suite.assert } : rawGlobalExecution;
@@ -4264,14 +4266,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
4264
4266
  );
4265
4267
  }
4266
4268
  }
4267
- const inputMessages = resolveInputMessages(evalcase);
4269
+ const testInputMessages = resolveInputMessages(evalcase);
4268
4270
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
4269
- if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
4271
+ if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
4270
4272
  logError2(
4271
4273
  `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
4272
4274
  );
4273
4275
  continue;
4274
4276
  }
4277
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
4278
+ const skipDefaults = caseExecution?.skip_defaults === true;
4279
+ const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
4275
4280
  const hasExpectedMessages = expectedMessages.length > 0;
4276
4281
  const guidelinePaths = [];
4277
4282
  const inputTextParts = [];
@@ -4383,16 +4388,16 @@ var loadEvalCaseById = loadTestById;
4383
4388
  function parseWorkspaceScriptConfig(raw, evalFileDir) {
4384
4389
  if (!isJsonObject(raw)) return void 0;
4385
4390
  const obj = raw;
4386
- const script = obj.script;
4387
- if (!Array.isArray(script) || script.length === 0) return void 0;
4388
- const scriptArr = script.filter((s) => typeof s === "string");
4389
- if (scriptArr.length === 0) return void 0;
4391
+ const commandSource = obj.command ?? obj.script;
4392
+ if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
4393
+ const commandArr = commandSource.filter((s) => typeof s === "string");
4394
+ if (commandArr.length === 0) return void 0;
4390
4395
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
4391
4396
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
4392
4397
  if (cwd && !import_node_path8.default.isAbsolute(cwd)) {
4393
4398
  cwd = import_node_path8.default.resolve(evalFileDir, cwd);
4394
4399
  }
4395
- const config = { script: scriptArr };
4400
+ const config = { command: commandArr };
4396
4401
  if (timeoutMs !== void 0) {
4397
4402
  return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
4398
4403
  }
@@ -5585,50 +5590,58 @@ var CliProvider = class {
5585
5590
  await this.ensureHealthy(request.signal);
5586
5591
  const effectiveCwd = request.cwd ?? this.config.cwd;
5587
5592
  const outputFilePath = generateOutputFilePath(request.evalCaseId);
5588
- const templateValues = buildTemplateValues(request, this.config, outputFilePath);
5589
- const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
5593
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
5594
+ request,
5595
+ this.config,
5596
+ outputFilePath
5597
+ );
5598
+ const renderedCommand = renderTemplate(this.config.command, templateValues);
5590
5599
  if (this.verbose) {
5591
5600
  console.log(
5592
5601
  `[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
5593
5602
  );
5594
5603
  }
5595
- const startTime = Date.now();
5596
- const result = await this.runCommand(renderedCommand, {
5597
- cwd: effectiveCwd,
5598
- env: process.env,
5599
- timeoutMs: this.config.timeoutMs,
5600
- signal: request.signal
5601
- });
5602
- const measuredDurationMs = Date.now() - startTime;
5603
- if (result.failed || (result.exitCode ?? 0) !== 0) {
5604
- if (request.signal?.aborted) {
5605
- throw new Error("CLI provider request was aborted");
5606
- }
5607
- if (result.timedOut) {
5608
- throw new Error(
5609
- `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
5610
- );
5611
- }
5612
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5613
- const detail = result.stderr.trim() || result.stdout.trim();
5614
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
5615
- throw new Error(message);
5616
- }
5617
- const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
5618
- const parsed = this.parseOutputContent(responseContent);
5619
- return {
5620
- output: parsed.output,
5621
- tokenUsage: parsed.tokenUsage,
5622
- costUsd: parsed.costUsd,
5623
- durationMs: parsed.durationMs ?? measuredDurationMs,
5624
- raw: {
5625
- command: renderedCommand,
5626
- stderr: result.stderr,
5627
- exitCode: result.exitCode ?? 0,
5604
+ try {
5605
+ const startTime = Date.now();
5606
+ const result = await this.runCommand(renderedCommand, {
5628
5607
  cwd: effectiveCwd,
5629
- outputFile: outputFilePath
5608
+ env: process.env,
5609
+ timeoutMs: this.config.timeoutMs,
5610
+ signal: request.signal
5611
+ });
5612
+ const measuredDurationMs = Date.now() - startTime;
5613
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
5614
+ if (request.signal?.aborted) {
5615
+ throw new Error("CLI provider request was aborted");
5616
+ }
5617
+ if (result.timedOut) {
5618
+ throw new Error(
5619
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
5620
+ );
5621
+ }
5622
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5623
+ const detail = result.stderr.trim() || result.stdout.trim();
5624
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
5625
+ throw new Error(message);
5630
5626
  }
5631
- };
5627
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
5628
+ const parsed = this.parseOutputContent(responseContent);
5629
+ return {
5630
+ output: parsed.output,
5631
+ tokenUsage: parsed.tokenUsage,
5632
+ costUsd: parsed.costUsd,
5633
+ durationMs: parsed.durationMs ?? measuredDurationMs,
5634
+ raw: {
5635
+ command: renderedCommand,
5636
+ stderr: result.stderr,
5637
+ exitCode: result.exitCode ?? 0,
5638
+ cwd: effectiveCwd,
5639
+ outputFile: outputFilePath
5640
+ }
5641
+ };
5642
+ } finally {
5643
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
5644
+ }
5632
5645
  }
5633
5646
  async invokeBatch(requests) {
5634
5647
  if (requests.length === 0) {
@@ -5651,7 +5664,7 @@ var CliProvider = class {
5651
5664
  batchInputFiles.push(...request.inputFiles);
5652
5665
  }
5653
5666
  }
5654
- const templateValues = buildTemplateValues(
5667
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
5655
5668
  {
5656
5669
  question: "",
5657
5670
  guidelines: "",
@@ -5662,87 +5675,91 @@ var CliProvider = class {
5662
5675
  this.config,
5663
5676
  outputFilePath
5664
5677
  );
5665
- const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
5678
+ const renderedCommand = renderTemplate(this.config.command, templateValues);
5666
5679
  if (this.verbose) {
5667
5680
  console.log(
5668
5681
  `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
5669
5682
  );
5670
5683
  }
5671
- const startTime = Date.now();
5672
- const result = await this.runCommand(renderedCommand, {
5673
- cwd: this.config.cwd,
5674
- env: process.env,
5675
- timeoutMs: this.config.timeoutMs,
5676
- signal: controller.signal
5677
- });
5678
- const measuredDurationMs = Date.now() - startTime;
5679
- if (result.failed || (result.exitCode ?? 0) !== 0) {
5680
- if (controller.signal.aborted) {
5681
- throw new Error("CLI provider request was aborted");
5682
- }
5683
- if (result.timedOut) {
5684
- throw new Error(
5685
- `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
5686
- );
5687
- }
5688
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5689
- const detail = result.stderr.trim() || result.stdout.trim();
5690
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
5691
- throw new Error(message);
5692
- }
5693
- const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
5694
- const recordsById = this.parseJsonlBatchOutput(responseContent);
5695
- const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
5696
- const responses = requests.map((request) => {
5697
- const evalCaseId = request.evalCaseId;
5698
- if (!evalCaseId) {
5699
- return {
5700
- output: [],
5701
- durationMs: perRequestFallbackMs,
5702
- raw: {
5703
- command: renderedCommand,
5704
- stderr: result.stderr,
5705
- exitCode: result.exitCode ?? 0,
5706
- cwd: this.config.cwd,
5707
- outputFile: outputFilePath
5684
+ try {
5685
+ const startTime = Date.now();
5686
+ const result = await this.runCommand(renderedCommand, {
5687
+ cwd: this.config.cwd,
5688
+ env: process.env,
5689
+ timeoutMs: this.config.timeoutMs,
5690
+ signal: controller.signal
5691
+ });
5692
+ const measuredDurationMs = Date.now() - startTime;
5693
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
5694
+ if (controller.signal.aborted) {
5695
+ throw new Error("CLI provider request was aborted");
5696
+ }
5697
+ if (result.timedOut) {
5698
+ throw new Error(
5699
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
5700
+ );
5701
+ }
5702
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5703
+ const detail = result.stderr.trim() || result.stdout.trim();
5704
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
5705
+ throw new Error(message);
5706
+ }
5707
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
5708
+ const recordsById = this.parseJsonlBatchOutput(responseContent);
5709
+ const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
5710
+ const responses = requests.map((request) => {
5711
+ const evalCaseId = request.evalCaseId;
5712
+ if (!evalCaseId) {
5713
+ return {
5714
+ output: [],
5715
+ durationMs: perRequestFallbackMs,
5716
+ raw: {
5717
+ command: renderedCommand,
5718
+ stderr: result.stderr,
5719
+ exitCode: result.exitCode ?? 0,
5720
+ cwd: this.config.cwd,
5721
+ outputFile: outputFilePath
5722
+ }
5723
+ };
5724
+ }
5725
+ const parsed = recordsById.get(evalCaseId);
5726
+ if (!parsed) {
5727
+ const errorMessage = `Batch output missing id '${evalCaseId}'`;
5728
+ if (this.verbose) {
5729
+ console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
5708
5730
  }
5709
- };
5710
- }
5711
- const parsed = recordsById.get(evalCaseId);
5712
- if (!parsed) {
5713
- const errorMessage = `Batch output missing id '${evalCaseId}'`;
5714
- if (this.verbose) {
5715
- console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
5731
+ return {
5732
+ output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
5733
+ durationMs: perRequestFallbackMs,
5734
+ raw: {
5735
+ command: renderedCommand,
5736
+ stderr: result.stderr,
5737
+ exitCode: result.exitCode ?? 0,
5738
+ cwd: this.config.cwd,
5739
+ outputFile: outputFilePath,
5740
+ error: errorMessage
5741
+ }
5742
+ };
5716
5743
  }
5717
5744
  return {
5718
- output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
5719
- durationMs: perRequestFallbackMs,
5745
+ output: parsed.output,
5746
+ tokenUsage: parsed.tokenUsage,
5747
+ costUsd: parsed.costUsd,
5748
+ durationMs: parsed.durationMs ?? perRequestFallbackMs,
5720
5749
  raw: {
5721
5750
  command: renderedCommand,
5722
5751
  stderr: result.stderr,
5723
5752
  exitCode: result.exitCode ?? 0,
5724
5753
  cwd: this.config.cwd,
5725
5754
  outputFile: outputFilePath,
5726
- error: errorMessage
5755
+ recordId: evalCaseId
5727
5756
  }
5728
5757
  };
5729
- }
5730
- return {
5731
- output: parsed.output,
5732
- tokenUsage: parsed.tokenUsage,
5733
- costUsd: parsed.costUsd,
5734
- durationMs: parsed.durationMs ?? perRequestFallbackMs,
5735
- raw: {
5736
- command: renderedCommand,
5737
- stderr: result.stderr,
5738
- exitCode: result.exitCode ?? 0,
5739
- cwd: this.config.cwd,
5740
- outputFile: outputFilePath,
5741
- recordId: evalCaseId
5742
- }
5743
- };
5744
- });
5745
- return responses;
5758
+ });
5759
+ return responses;
5760
+ } finally {
5761
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
5762
+ }
5746
5763
  }
5747
5764
  /**
5748
5765
  * Parse output content from CLI.
@@ -5857,7 +5874,7 @@ var CliProvider = class {
5857
5874
  return;
5858
5875
  }
5859
5876
  const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
5860
- if (healthcheck.type === "http") {
5877
+ if ("url" in healthcheck && healthcheck.url) {
5861
5878
  const controller = new AbortController();
5862
5879
  const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
5863
5880
  signal?.addEventListener("abort", () => controller.abort(), { once: true });
@@ -5876,50 +5893,70 @@ var CliProvider = class {
5876
5893
  }
5877
5894
  return;
5878
5895
  }
5879
- const renderedCommand = renderTemplate(
5880
- healthcheck.commandTemplate,
5881
- buildTemplateValues(
5882
- {
5883
- question: "",
5884
- guidelines: "",
5885
- inputFiles: [],
5886
- evalCaseId: "healthcheck",
5887
- attempt: 0
5888
- },
5889
- this.config,
5890
- generateOutputFilePath("healthcheck")
5891
- )
5896
+ const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
5897
+ if (!hcCommand) {
5898
+ throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
5899
+ }
5900
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
5901
+ {
5902
+ question: "",
5903
+ guidelines: "",
5904
+ inputFiles: [],
5905
+ evalCaseId: "healthcheck",
5906
+ attempt: 0
5907
+ },
5908
+ this.config,
5909
+ generateOutputFilePath("healthcheck")
5892
5910
  );
5911
+ const renderedCommand = renderTemplate(hcCommand, templateValues);
5912
+ const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
5893
5913
  if (this.verbose) {
5894
5914
  console.log(
5895
- `[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
5915
+ `[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
5896
5916
  );
5897
5917
  }
5898
- const result = await this.runCommand(renderedCommand, {
5899
- cwd: healthcheck.cwd ?? this.config.cwd,
5900
- env: process.env,
5901
- timeoutMs,
5902
- signal
5903
- });
5904
- if (result.failed || (result.exitCode ?? 0) !== 0) {
5905
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5906
- const detail = result.stderr.trim() || result.stdout.trim();
5907
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
5908
- throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
5918
+ try {
5919
+ const result = await this.runCommand(renderedCommand, {
5920
+ cwd: hcCwd ?? this.config.cwd,
5921
+ env: process.env,
5922
+ timeoutMs,
5923
+ signal
5924
+ });
5925
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
5926
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
5927
+ const detail = result.stderr.trim() || result.stdout.trim();
5928
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
5929
+ throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
5930
+ }
5931
+ } finally {
5932
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
5909
5933
  }
5910
5934
  }
5911
5935
  };
5912
- function buildTemplateValues(request, config, outputFilePath) {
5936
+ async function buildTemplateValues(request, config, outputFilePath) {
5913
5937
  const inputFiles = normalizeInputFiles2(request.inputFiles);
5938
+ const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
5939
+ await import_promises11.default.writeFile(promptFilePath, request.question ?? "", "utf8");
5914
5940
  return {
5915
- PROMPT: shellEscape(request.question ?? ""),
5916
- GUIDELINES: shellEscape(request.guidelines ?? ""),
5917
- EVAL_ID: shellEscape(request.evalCaseId ?? ""),
5918
- ATTEMPT: shellEscape(String(request.attempt ?? 0)),
5919
- FILES: formatFileList(inputFiles, config.filesFormat),
5920
- OUTPUT_FILE: shellEscape(outputFilePath)
5941
+ values: {
5942
+ PROMPT: shellEscape(request.question ?? ""),
5943
+ PROMPT_FILE: shellEscape(promptFilePath),
5944
+ GUIDELINES: shellEscape(request.guidelines ?? ""),
5945
+ EVAL_ID: shellEscape(request.evalCaseId ?? ""),
5946
+ ATTEMPT: shellEscape(String(request.attempt ?? 0)),
5947
+ FILES: formatFileList(inputFiles, config.filesFormat),
5948
+ OUTPUT_FILE: shellEscape(outputFilePath)
5949
+ },
5950
+ promptFilePath
5921
5951
  };
5922
5952
  }
5953
+ async function cleanupTempFile(filePath, keepTempFiles) {
5954
+ if (!filePath || keepTempFiles) {
5955
+ return;
5956
+ }
5957
+ await import_promises11.default.unlink(filePath).catch(() => {
5958
+ });
5959
+ }
5923
5960
  function normalizeInputFiles2(inputFiles) {
5924
5961
  if (!inputFiles || inputFiles.length === 0) {
5925
5962
  return void 0;
@@ -8281,27 +8318,25 @@ var ProviderRegistry = class {
8281
8318
  var import_node_path18 = __toESM(require("path"), 1);
8282
8319
  var import_zod3 = require("zod");
8283
8320
  var CliHealthcheckHttpInputSchema = import_zod3.z.object({
8284
- type: import_zod3.z.literal("http"),
8285
8321
  url: import_zod3.z.string().min(1, "healthcheck URL is required"),
8286
8322
  timeout_seconds: import_zod3.z.number().positive().optional(),
8287
8323
  timeoutSeconds: import_zod3.z.number().positive().optional()
8288
8324
  });
8289
8325
  var CliHealthcheckCommandInputSchema = import_zod3.z.object({
8290
- type: import_zod3.z.literal("command"),
8291
- command_template: import_zod3.z.string().optional(),
8292
- commandTemplate: import_zod3.z.string().optional(),
8326
+ command: import_zod3.z.string().min(1, "healthcheck command is required"),
8293
8327
  cwd: import_zod3.z.string().optional(),
8294
8328
  timeout_seconds: import_zod3.z.number().positive().optional(),
8295
8329
  timeoutSeconds: import_zod3.z.number().positive().optional()
8296
8330
  });
8297
- var CliHealthcheckInputSchema = import_zod3.z.discriminatedUnion("type", [
8331
+ var CliHealthcheckInputSchema = import_zod3.z.union([
8298
8332
  CliHealthcheckHttpInputSchema,
8299
8333
  CliHealthcheckCommandInputSchema
8300
8334
  ]);
8301
8335
  var CliTargetInputSchema = import_zod3.z.object({
8302
8336
  name: import_zod3.z.string().min(1, "target name is required"),
8303
8337
  provider: import_zod3.z.string().refine((p) => p.toLowerCase() === "cli", { message: "provider must be 'cli'" }),
8304
- // Command template - required (accept both naming conventions)
8338
+ // Command - required (accept both naming conventions)
8339
+ command: import_zod3.z.string().optional(),
8305
8340
  command_template: import_zod3.z.string().optional(),
8306
8341
  commandTemplate: import_zod3.z.string().optional(),
8307
8342
  // Files format - optional
@@ -8333,26 +8368,27 @@ var CliTargetInputSchema = import_zod3.z.object({
8333
8368
  workers: import_zod3.z.number().int().min(1).optional(),
8334
8369
  provider_batching: import_zod3.z.boolean().optional(),
8335
8370
  providerBatching: import_zod3.z.boolean().optional()
8336
- }).refine((data) => data.command_template !== void 0 || data.commandTemplate !== void 0, {
8337
- message: "Either command_template or commandTemplate is required"
8338
- });
8371
+ }).refine(
8372
+ (data) => data.command !== void 0 || data.command_template !== void 0 || data.commandTemplate !== void 0,
8373
+ {
8374
+ message: "'command' is required"
8375
+ }
8376
+ );
8339
8377
  var CliHealthcheckHttpSchema = import_zod3.z.object({
8340
- type: import_zod3.z.literal("http"),
8341
8378
  url: import_zod3.z.string().min(1),
8342
8379
  timeoutMs: import_zod3.z.number().positive().optional()
8343
8380
  }).strict();
8344
8381
  var CliHealthcheckCommandSchema = import_zod3.z.object({
8345
- type: import_zod3.z.literal("command"),
8346
- commandTemplate: import_zod3.z.string().min(1),
8382
+ command: import_zod3.z.string().min(1),
8347
8383
  cwd: import_zod3.z.string().optional(),
8348
8384
  timeoutMs: import_zod3.z.number().positive().optional()
8349
8385
  }).strict();
8350
- var CliHealthcheckSchema = import_zod3.z.discriminatedUnion("type", [
8386
+ var CliHealthcheckSchema = import_zod3.z.union([
8351
8387
  CliHealthcheckHttpSchema,
8352
8388
  CliHealthcheckCommandSchema
8353
8389
  ]);
8354
8390
  var CliTargetConfigSchema = import_zod3.z.object({
8355
- commandTemplate: import_zod3.z.string().min(1),
8391
+ command: import_zod3.z.string().min(1),
8356
8392
  filesFormat: import_zod3.z.string().optional(),
8357
8393
  cwd: import_zod3.z.string().optional(),
8358
8394
  workspaceTemplate: import_zod3.z.string().optional(),
@@ -8364,26 +8400,19 @@ var CliTargetConfigSchema = import_zod3.z.object({
8364
8400
  function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
8365
8401
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
8366
8402
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
8367
- if (input.type === "http") {
8403
+ if ("url" in input && input.url) {
8368
8404
  const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
8369
8405
  return {
8370
- type: "http",
8371
8406
  url,
8372
8407
  timeoutMs
8373
8408
  };
8374
8409
  }
8375
- const commandTemplateSource = input.command_template ?? input.commandTemplate;
8376
- if (commandTemplateSource === void 0) {
8410
+ if (!("command" in input) || !input.command) {
8377
8411
  throw new Error(
8378
- `${targetName} healthcheck: Either command_template or commandTemplate is required for command healthcheck`
8412
+ `${targetName} healthcheck: Either 'command' or 'url' is required for healthcheck`
8379
8413
  );
8380
8414
  }
8381
- const commandTemplate = resolveString(
8382
- commandTemplateSource,
8383
- env,
8384
- `${targetName} healthcheck command template`,
8385
- true
8386
- );
8415
+ const command = resolveString(input.command, env, `${targetName} healthcheck command`, true);
8387
8416
  let cwd = resolveOptionalString(input.cwd, env, `${targetName} healthcheck cwd`, {
8388
8417
  allowLiteral: true,
8389
8418
  optionalEnv: true
@@ -8395,24 +8424,18 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
8395
8424
  cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
8396
8425
  }
8397
8426
  return {
8398
- type: "command",
8399
- commandTemplate,
8427
+ command,
8400
8428
  cwd,
8401
8429
  timeoutMs
8402
8430
  };
8403
8431
  }
8404
8432
  function normalizeCliTargetInput(input, env, evalFilePath) {
8405
8433
  const targetName = input.name;
8406
- const commandTemplateSource = input.command_template ?? input.commandTemplate;
8407
- if (commandTemplateSource === void 0) {
8408
- throw new Error(`${targetName}: Either command_template or commandTemplate is required`);
8434
+ const commandSource = input.command ?? input.command_template ?? input.commandTemplate;
8435
+ if (commandSource === void 0) {
8436
+ throw new Error(`${targetName}: 'command' is required`);
8409
8437
  }
8410
- const commandTemplate = resolveString(
8411
- commandTemplateSource,
8412
- env,
8413
- `${targetName} CLI command template`,
8414
- true
8415
- );
8438
+ const command = resolveString(commandSource, env, `${targetName} CLI command`, true);
8416
8439
  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
8417
8440
  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
8418
8441
  const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
@@ -8451,7 +8474,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
8451
8474
  );
8452
8475
  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
8453
8476
  return {
8454
- commandTemplate,
8477
+ command,
8455
8478
  filesFormat,
8456
8479
  cwd,
8457
8480
  workspaceTemplate,
@@ -8463,6 +8486,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
8463
8486
  }
8464
8487
  var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
8465
8488
  "PROMPT",
8489
+ "PROMPT_FILE",
8466
8490
  "GUIDELINES",
8467
8491
  "EVAL_ID",
8468
8492
  "ATTEMPT",
@@ -9177,8 +9201,8 @@ var cliErrorMap = (issue, ctx) => {
9177
9201
  if (issue.code === import_zod3.z.ZodIssueCode.unrecognized_keys) {
9178
9202
  return { message: `Unknown CLI provider settings: ${issue.keys.join(", ")}` };
9179
9203
  }
9180
- if (issue.code === import_zod3.z.ZodIssueCode.invalid_union_discriminator) {
9181
- return { message: "healthcheck type must be 'http' or 'command'" };
9204
+ if (issue.code === import_zod3.z.ZodIssueCode.invalid_union) {
9205
+ return { message: "healthcheck must have either 'url' (HTTP) or 'command' (command)" };
9182
9206
  }
9183
9207
  if (issue.code === import_zod3.z.ZodIssueCode.invalid_type && issue.expected === "string") {
9184
9208
  return { message: `${ctx.defaultError} (expected a string value)` };
@@ -9194,18 +9218,18 @@ function resolveCliConfig(target, env, evalFilePath) {
9194
9218
  throw new Error(`${prefix}${firstError?.message}`);
9195
9219
  }
9196
9220
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
9197
- assertSupportedCliPlaceholders(normalized.commandTemplate, `${target.name} CLI command template`);
9198
- if (normalized.healthcheck?.type === "command") {
9221
+ assertSupportedCliPlaceholders(normalized.command, `${target.name} CLI command`);
9222
+ if ("command" in (normalized.healthcheck ?? {}) && normalized.healthcheck.command) {
9199
9223
  assertSupportedCliPlaceholders(
9200
- normalized.healthcheck.commandTemplate,
9201
- `${target.name} healthcheck command template`
9224
+ normalized.healthcheck.command,
9225
+ `${target.name} healthcheck command`
9202
9226
  );
9203
9227
  }
9204
9228
  return normalized;
9205
9229
  }
9206
9230
  function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
9207
- const commandTemplateSource = target.command_template ?? target.commandTemplate;
9208
- const commandTemplate = commandTemplateSource ? resolveString(commandTemplateSource, env, `${target.name} command template`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
9231
+ const commandSource = target.command ?? target.command_template ?? target.commandTemplate;
9232
+ const command = commandSource ? resolveString(commandSource, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
9209
9233
  const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
9210
9234
  const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
9211
9235
  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
@@ -9219,7 +9243,7 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
9219
9243
  cwd = import_node_path18.default.dirname(import_node_path18.default.resolve(evalFilePath));
9220
9244
  }
9221
9245
  return {
9222
- commandTemplate,
9246
+ command,
9223
9247
  cwd,
9224
9248
  timeoutMs
9225
9249
  };
@@ -10922,7 +10946,7 @@ async function discoverProviders(registry, baseDir) {
10922
10946
  }
10923
10947
  registry.register(kindName, (target) => {
10924
10948
  return new CliProvider(target.name, {
10925
- commandTemplate: `bun run ${filePath} {PROMPT}`
10949
+ command: `bun run ${filePath} {PROMPT}`
10926
10950
  });
10927
10951
  });
10928
10952
  discoveredKinds.push(kindName);
@@ -11435,13 +11459,13 @@ function toCamelCaseDeep(obj) {
11435
11459
  var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
11436
11460
  var CodeEvaluator = class {
11437
11461
  kind = "code";
11438
- script;
11462
+ command;
11439
11463
  cwd;
11440
11464
  agentTimeoutMs;
11441
11465
  config;
11442
11466
  target;
11443
11467
  constructor(options) {
11444
- this.script = options.script;
11468
+ this.command = options.command ?? options.script ?? [];
11445
11469
  this.cwd = options.cwd;
11446
11470
  this.agentTimeoutMs = options.agentTimeoutMs;
11447
11471
  this.config = options.config;
@@ -11500,7 +11524,7 @@ var CodeEvaluator = class {
11500
11524
  const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
11501
11525
  try {
11502
11526
  const stdout = await executeScript(
11503
- this.script,
11527
+ this.command,
11504
11528
  inputPayload,
11505
11529
  this.agentTimeoutMs,
11506
11530
  this.cwd,
@@ -11514,7 +11538,7 @@ var CodeEvaluator = class {
11514
11538
  const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
11515
11539
  const proxyUsage = getProxyUsage?.();
11516
11540
  const evaluatorRawRequest = {
11517
- script: this.script,
11541
+ command: this.command,
11518
11542
  ...this.cwd ? { cwd: this.cwd } : {},
11519
11543
  ...proxyUsage ? {
11520
11544
  target_proxy: {
@@ -11544,7 +11568,7 @@ var CodeEvaluator = class {
11544
11568
  expectedAspectCount: 1,
11545
11569
  reasoning: message,
11546
11570
  evaluatorRawRequest: {
11547
- script: this.script,
11571
+ command: this.command,
11548
11572
  ...this.cwd ? { cwd: this.cwd } : {},
11549
11573
  ...proxyUsage ? {
11550
11574
  target_proxy: {
@@ -14503,7 +14527,7 @@ var llmJudgeFactory = (config, context2) => {
14503
14527
  var codeFactory = (config, context2) => {
14504
14528
  const c = config;
14505
14529
  return new CodeEvaluator({
14506
- script: c.script,
14530
+ command: c.command ?? c.script ?? [],
14507
14531
  cwd: c.resolvedCwd ?? c.cwd,
14508
14532
  agentTimeoutMs: context2.agentTimeoutMs,
14509
14533
  config: c.config,
@@ -14685,7 +14709,7 @@ async function discoverAssertions(registry, baseDir) {
14685
14709
  }
14686
14710
  const factory = (_config, context2) => {
14687
14711
  return new CodeEvaluator({
14688
- script: ["bun", "run", filePath],
14712
+ command: ["bun", "run", filePath],
14689
14713
  agentTimeoutMs: context2.agentTimeoutMs
14690
14714
  });
14691
14715
  };
@@ -15039,7 +15063,8 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
15039
15063
  });
15040
15064
  const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
15041
15065
  const cwd = config.cwd;
15042
- const result = await execFileWithStdin(config.script, stdin, {
15066
+ const commandArray = config.command ?? config.script ?? [];
15067
+ const result = await execFileWithStdin(commandArray, stdin, {
15043
15068
  timeoutMs,
15044
15069
  cwd
15045
15070
  });