@agentv/core 2.7.1-next.4 → 2.7.1-next.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -17,7 +17,7 @@ import {
17
17
  readTextFile,
18
18
  resolveFileReference,
19
19
  resolveTargetDefinition
20
- } from "./chunk-6W5E3VR6.js";
20
+ } from "./chunk-5SV2QC6V.js";
21
21
  import {
22
22
  OtlpJsonFileExporter
23
23
  } from "./chunk-HFSYZHGF.js";
@@ -670,24 +670,24 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
670
670
  continue;
671
671
  }
672
672
  if (typeValue === "code_judge") {
673
- let script;
674
- const rawScript = rawEvaluator.script;
675
- if (typeof rawScript === "string") {
676
- const trimmed = rawScript.trim();
673
+ let command;
674
+ const rawCommand = rawEvaluator.command ?? rawEvaluator.script;
675
+ if (typeof rawCommand === "string") {
676
+ const trimmed = rawCommand.trim();
677
677
  if (trimmed.length === 0) {
678
678
  throw new Error(
679
- `Invalid code_judge script for evaluator '${name}' in '${evalId}': script cannot be empty`
679
+ `Invalid code_judge command for evaluator '${name}' in '${evalId}': command cannot be empty`
680
680
  );
681
681
  }
682
- script = parseCommandToArgv(trimmed);
682
+ command = parseCommandToArgv(trimmed);
683
683
  } else {
684
- script = asStringArray(
685
- rawScript,
686
- `code_judge script for evaluator '${name}' in '${evalId}'`
684
+ command = asStringArray(
685
+ rawCommand,
686
+ `code_judge command for evaluator '${name}' in '${evalId}'`
687
687
  );
688
688
  }
689
- if (!script) {
690
- logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing script`);
689
+ if (!command) {
690
+ logWarning2(`Skipping code_judge evaluator '${name}' in '${evalId}': missing command`);
691
691
  continue;
692
692
  }
693
693
  const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
@@ -732,6 +732,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
732
732
  const knownProps2 = /* @__PURE__ */ new Set([
733
733
  "name",
734
734
  "type",
735
+ "command",
735
736
  "script",
736
737
  "cwd",
737
738
  "weight",
@@ -748,7 +749,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
748
749
  evaluators.push({
749
750
  name,
750
751
  type: "code",
751
- script,
752
+ command,
752
753
  cwd,
753
754
  resolvedCwd,
754
755
  ...weight2 !== void 0 ? { weight: weight2 } : {},
@@ -1350,20 +1351,20 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1350
1351
  let resolvedPromptScript;
1351
1352
  let promptScriptConfig;
1352
1353
  if (isJsonObject2(rawPrompt)) {
1353
- const scriptArray = asStringArray(
1354
- rawPrompt.script,
1355
- `prompt.script for evaluator '${name}' in '${evalId}'`
1354
+ const commandArray = asStringArray(
1355
+ rawPrompt.command ?? rawPrompt.script,
1356
+ `prompt.command for evaluator '${name}' in '${evalId}'`
1356
1357
  );
1357
- if (!scriptArray) {
1358
- throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires script array`);
1358
+ if (!commandArray) {
1359
+ throw new Error(`Evaluator '${name}' in '${evalId}': prompt object requires command array`);
1359
1360
  }
1360
- const scriptPath = scriptArray[scriptArray.length - 1];
1361
- const resolved = await resolveFileReference3(scriptPath, searchRoots);
1361
+ const commandPath = commandArray[commandArray.length - 1];
1362
+ const resolved = await resolveFileReference3(commandPath, searchRoots);
1362
1363
  if (resolved.resolvedPath) {
1363
- resolvedPromptScript = [...scriptArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
1364
+ resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
1364
1365
  } else {
1365
1366
  throw new Error(
1366
- `Evaluator '${name}' in '${evalId}': prompt script file not found: ${resolved.displayPath}`
1367
+ `Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
1367
1368
  );
1368
1369
  }
1369
1370
  if (isJsonObject2(rawPrompt.config)) {
@@ -2637,6 +2638,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2637
2638
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
2638
2639
  }
2639
2640
  const suiteWorkspace = parseWorkspaceConfig(suite.workspace, evalFileDir);
2641
+ const suiteInputMessages = expandInputShorthand(suite.input);
2640
2642
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
2641
2643
  const _globalTarget = asString6(rawGlobalExecution?.target) ?? asString6(suite.target);
2642
2644
  const globalExecution = suite.assert !== void 0 ? { ...rawGlobalExecution ?? {}, assert: suite.assert } : rawGlobalExecution;
@@ -2661,14 +2663,17 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
2661
2663
  );
2662
2664
  }
2663
2665
  }
2664
- const inputMessages = resolveInputMessages(evalcase);
2666
+ const testInputMessages = resolveInputMessages(evalcase);
2665
2667
  const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
2666
- if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
2668
+ if (!id || !outcome || !testInputMessages || testInputMessages.length === 0) {
2667
2669
  logError2(
2668
2670
  `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, criteria, and/or input`
2669
2671
  );
2670
2672
  continue;
2671
2673
  }
2674
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
2675
+ const skipDefaults = caseExecution?.skip_defaults === true;
2676
+ const inputMessages = suiteInputMessages && !skipDefaults ? [...suiteInputMessages, ...testInputMessages] : testInputMessages;
2672
2677
  const hasExpectedMessages = expectedMessages.length > 0;
2673
2678
  const guidelinePaths = [];
2674
2679
  const inputTextParts = [];
@@ -2780,16 +2785,16 @@ var loadEvalCaseById = loadTestById;
2780
2785
  function parseWorkspaceScriptConfig(raw, evalFileDir) {
2781
2786
  if (!isJsonObject(raw)) return void 0;
2782
2787
  const obj = raw;
2783
- const script = obj.script;
2784
- if (!Array.isArray(script) || script.length === 0) return void 0;
2785
- const scriptArr = script.filter((s) => typeof s === "string");
2786
- if (scriptArr.length === 0) return void 0;
2788
+ const commandSource = obj.command ?? obj.script;
2789
+ if (!Array.isArray(commandSource) || commandSource.length === 0) return void 0;
2790
+ const commandArr = commandSource.filter((s) => typeof s === "string");
2791
+ if (commandArr.length === 0) return void 0;
2787
2792
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
2788
2793
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
2789
2794
  if (cwd && !path8.isAbsolute(cwd)) {
2790
2795
  cwd = path8.resolve(evalFileDir, cwd);
2791
2796
  }
2792
- const config = { script: scriptArr };
2797
+ const config = { command: commandArr };
2793
2798
  if (timeoutMs !== void 0) {
2794
2799
  return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
2795
2800
  }
@@ -3867,50 +3872,58 @@ var CliProvider = class {
3867
3872
  await this.ensureHealthy(request.signal);
3868
3873
  const effectiveCwd = request.cwd ?? this.config.cwd;
3869
3874
  const outputFilePath = generateOutputFilePath(request.evalCaseId);
3870
- const templateValues = buildTemplateValues(request, this.config, outputFilePath);
3871
- const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
3875
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
3876
+ request,
3877
+ this.config,
3878
+ outputFilePath
3879
+ );
3880
+ const renderedCommand = renderTemplate(this.config.command, templateValues);
3872
3881
  if (this.verbose) {
3873
3882
  console.log(
3874
3883
  `[cli-provider:${this.targetName}] cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
3875
3884
  );
3876
3885
  }
3877
- const startTime = Date.now();
3878
- const result = await this.runCommand(renderedCommand, {
3879
- cwd: effectiveCwd,
3880
- env: process.env,
3881
- timeoutMs: this.config.timeoutMs,
3882
- signal: request.signal
3883
- });
3884
- const measuredDurationMs = Date.now() - startTime;
3885
- if (result.failed || (result.exitCode ?? 0) !== 0) {
3886
- if (request.signal?.aborted) {
3887
- throw new Error("CLI provider request was aborted");
3888
- }
3889
- if (result.timedOut) {
3890
- throw new Error(
3891
- `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
3892
- );
3893
- }
3894
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
3895
- const detail = result.stderr.trim() || result.stdout.trim();
3896
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
3897
- throw new Error(message);
3898
- }
3899
- const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
3900
- const parsed = this.parseOutputContent(responseContent);
3901
- return {
3902
- output: parsed.output,
3903
- tokenUsage: parsed.tokenUsage,
3904
- costUsd: parsed.costUsd,
3905
- durationMs: parsed.durationMs ?? measuredDurationMs,
3906
- raw: {
3907
- command: renderedCommand,
3908
- stderr: result.stderr,
3909
- exitCode: result.exitCode ?? 0,
3886
+ try {
3887
+ const startTime = Date.now();
3888
+ const result = await this.runCommand(renderedCommand, {
3910
3889
  cwd: effectiveCwd,
3911
- outputFile: outputFilePath
3890
+ env: process.env,
3891
+ timeoutMs: this.config.timeoutMs,
3892
+ signal: request.signal
3893
+ });
3894
+ const measuredDurationMs = Date.now() - startTime;
3895
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
3896
+ if (request.signal?.aborted) {
3897
+ throw new Error("CLI provider request was aborted");
3898
+ }
3899
+ if (result.timedOut) {
3900
+ throw new Error(
3901
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
3902
+ );
3903
+ }
3904
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
3905
+ const detail = result.stderr.trim() || result.stdout.trim();
3906
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
3907
+ throw new Error(message);
3912
3908
  }
3913
- };
3909
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
3910
+ const parsed = this.parseOutputContent(responseContent);
3911
+ return {
3912
+ output: parsed.output,
3913
+ tokenUsage: parsed.tokenUsage,
3914
+ costUsd: parsed.costUsd,
3915
+ durationMs: parsed.durationMs ?? measuredDurationMs,
3916
+ raw: {
3917
+ command: renderedCommand,
3918
+ stderr: result.stderr,
3919
+ exitCode: result.exitCode ?? 0,
3920
+ cwd: effectiveCwd,
3921
+ outputFile: outputFilePath
3922
+ }
3923
+ };
3924
+ } finally {
3925
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
3926
+ }
3914
3927
  }
3915
3928
  async invokeBatch(requests) {
3916
3929
  if (requests.length === 0) {
@@ -3933,7 +3946,7 @@ var CliProvider = class {
3933
3946
  batchInputFiles.push(...request.inputFiles);
3934
3947
  }
3935
3948
  }
3936
- const templateValues = buildTemplateValues(
3949
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
3937
3950
  {
3938
3951
  question: "",
3939
3952
  guidelines: "",
@@ -3944,87 +3957,91 @@ var CliProvider = class {
3944
3957
  this.config,
3945
3958
  outputFilePath
3946
3959
  );
3947
- const renderedCommand = renderTemplate(this.config.commandTemplate, templateValues);
3960
+ const renderedCommand = renderTemplate(this.config.command, templateValues);
3948
3961
  if (this.verbose) {
3949
3962
  console.log(
3950
3963
  `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
3951
3964
  );
3952
3965
  }
3953
- const startTime = Date.now();
3954
- const result = await this.runCommand(renderedCommand, {
3955
- cwd: this.config.cwd,
3956
- env: process.env,
3957
- timeoutMs: this.config.timeoutMs,
3958
- signal: controller.signal
3959
- });
3960
- const measuredDurationMs = Date.now() - startTime;
3961
- if (result.failed || (result.exitCode ?? 0) !== 0) {
3962
- if (controller.signal.aborted) {
3963
- throw new Error("CLI provider request was aborted");
3964
- }
3965
- if (result.timedOut) {
3966
- throw new Error(
3967
- `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
3968
- );
3969
- }
3970
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
3971
- const detail = result.stderr.trim() || result.stdout.trim();
3972
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
3973
- throw new Error(message);
3974
- }
3975
- const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
3976
- const recordsById = this.parseJsonlBatchOutput(responseContent);
3977
- const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
3978
- const responses = requests.map((request) => {
3979
- const evalCaseId = request.evalCaseId;
3980
- if (!evalCaseId) {
3981
- return {
3982
- output: [],
3983
- durationMs: perRequestFallbackMs,
3984
- raw: {
3985
- command: renderedCommand,
3986
- stderr: result.stderr,
3987
- exitCode: result.exitCode ?? 0,
3988
- cwd: this.config.cwd,
3989
- outputFile: outputFilePath
3966
+ try {
3967
+ const startTime = Date.now();
3968
+ const result = await this.runCommand(renderedCommand, {
3969
+ cwd: this.config.cwd,
3970
+ env: process.env,
3971
+ timeoutMs: this.config.timeoutMs,
3972
+ signal: controller.signal
3973
+ });
3974
+ const measuredDurationMs = Date.now() - startTime;
3975
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
3976
+ if (controller.signal.aborted) {
3977
+ throw new Error("CLI provider request was aborted");
3978
+ }
3979
+ if (result.timedOut) {
3980
+ throw new Error(
3981
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
3982
+ );
3983
+ }
3984
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
3985
+ const detail = result.stderr.trim() || result.stdout.trim();
3986
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
3987
+ throw new Error(message);
3988
+ }
3989
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
3990
+ const recordsById = this.parseJsonlBatchOutput(responseContent);
3991
+ const perRequestFallbackMs = Math.round(measuredDurationMs / requests.length);
3992
+ const responses = requests.map((request) => {
3993
+ const evalCaseId = request.evalCaseId;
3994
+ if (!evalCaseId) {
3995
+ return {
3996
+ output: [],
3997
+ durationMs: perRequestFallbackMs,
3998
+ raw: {
3999
+ command: renderedCommand,
4000
+ stderr: result.stderr,
4001
+ exitCode: result.exitCode ?? 0,
4002
+ cwd: this.config.cwd,
4003
+ outputFile: outputFilePath
4004
+ }
4005
+ };
4006
+ }
4007
+ const parsed = recordsById.get(evalCaseId);
4008
+ if (!parsed) {
4009
+ const errorMessage = `Batch output missing id '${evalCaseId}'`;
4010
+ if (this.verbose) {
4011
+ console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
3990
4012
  }
3991
- };
3992
- }
3993
- const parsed = recordsById.get(evalCaseId);
3994
- if (!parsed) {
3995
- const errorMessage = `Batch output missing id '${evalCaseId}'`;
3996
- if (this.verbose) {
3997
- console.warn(`[cli-provider:${this.targetName}] ${errorMessage}`);
4013
+ return {
4014
+ output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
4015
+ durationMs: perRequestFallbackMs,
4016
+ raw: {
4017
+ command: renderedCommand,
4018
+ stderr: result.stderr,
4019
+ exitCode: result.exitCode ?? 0,
4020
+ cwd: this.config.cwd,
4021
+ outputFile: outputFilePath,
4022
+ error: errorMessage
4023
+ }
4024
+ };
3998
4025
  }
3999
4026
  return {
4000
- output: [{ role: "assistant", content: `Error: ${errorMessage}` }],
4001
- durationMs: perRequestFallbackMs,
4027
+ output: parsed.output,
4028
+ tokenUsage: parsed.tokenUsage,
4029
+ costUsd: parsed.costUsd,
4030
+ durationMs: parsed.durationMs ?? perRequestFallbackMs,
4002
4031
  raw: {
4003
4032
  command: renderedCommand,
4004
4033
  stderr: result.stderr,
4005
4034
  exitCode: result.exitCode ?? 0,
4006
4035
  cwd: this.config.cwd,
4007
4036
  outputFile: outputFilePath,
4008
- error: errorMessage
4037
+ recordId: evalCaseId
4009
4038
  }
4010
4039
  };
4011
- }
4012
- return {
4013
- output: parsed.output,
4014
- tokenUsage: parsed.tokenUsage,
4015
- costUsd: parsed.costUsd,
4016
- durationMs: parsed.durationMs ?? perRequestFallbackMs,
4017
- raw: {
4018
- command: renderedCommand,
4019
- stderr: result.stderr,
4020
- exitCode: result.exitCode ?? 0,
4021
- cwd: this.config.cwd,
4022
- outputFile: outputFilePath,
4023
- recordId: evalCaseId
4024
- }
4025
- };
4026
- });
4027
- return responses;
4040
+ });
4041
+ return responses;
4042
+ } finally {
4043
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
4044
+ }
4028
4045
  }
4029
4046
  /**
4030
4047
  * Parse output content from CLI.
@@ -4139,7 +4156,7 @@ var CliProvider = class {
4139
4156
  return;
4140
4157
  }
4141
4158
  const timeoutMs = healthcheck.timeoutMs ?? this.config.timeoutMs;
4142
- if (healthcheck.type === "http") {
4159
+ if ("url" in healthcheck && healthcheck.url) {
4143
4160
  const controller = new AbortController();
4144
4161
  const timer = timeoutMs ? setTimeout(() => controller.abort(), timeoutMs) : void 0;
4145
4162
  signal?.addEventListener("abort", () => controller.abort(), { once: true });
@@ -4158,50 +4175,70 @@ var CliProvider = class {
4158
4175
  }
4159
4176
  return;
4160
4177
  }
4161
- const renderedCommand = renderTemplate(
4162
- healthcheck.commandTemplate,
4163
- buildTemplateValues(
4164
- {
4165
- question: "",
4166
- guidelines: "",
4167
- inputFiles: [],
4168
- evalCaseId: "healthcheck",
4169
- attempt: 0
4170
- },
4171
- this.config,
4172
- generateOutputFilePath("healthcheck")
4173
- )
4178
+ const hcCommand = "command" in healthcheck ? healthcheck.command : void 0;
4179
+ if (!hcCommand) {
4180
+ throw new Error(`CLI healthcheck for '${this.targetName}': 'command' or 'url' is required`);
4181
+ }
4182
+ const { values: templateValues, promptFilePath } = await buildTemplateValues(
4183
+ {
4184
+ question: "",
4185
+ guidelines: "",
4186
+ inputFiles: [],
4187
+ evalCaseId: "healthcheck",
4188
+ attempt: 0
4189
+ },
4190
+ this.config,
4191
+ generateOutputFilePath("healthcheck")
4174
4192
  );
4193
+ const renderedCommand = renderTemplate(hcCommand, templateValues);
4194
+ const hcCwd = "cwd" in healthcheck ? healthcheck.cwd : void 0;
4175
4195
  if (this.verbose) {
4176
4196
  console.log(
4177
- `[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
4197
+ `[cli-provider:${this.targetName}] (healthcheck) cwd=${hcCwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
4178
4198
  );
4179
4199
  }
4180
- const result = await this.runCommand(renderedCommand, {
4181
- cwd: healthcheck.cwd ?? this.config.cwd,
4182
- env: process.env,
4183
- timeoutMs,
4184
- signal
4185
- });
4186
- if (result.failed || (result.exitCode ?? 0) !== 0) {
4187
- const codeText = result.exitCode !== null ? result.exitCode : "unknown";
4188
- const detail = result.stderr.trim() || result.stdout.trim();
4189
- const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
4190
- throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
4200
+ try {
4201
+ const result = await this.runCommand(renderedCommand, {
4202
+ cwd: hcCwd ?? this.config.cwd,
4203
+ env: process.env,
4204
+ timeoutMs,
4205
+ signal
4206
+ });
4207
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
4208
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
4209
+ const detail = result.stderr.trim() || result.stdout.trim();
4210
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI healthcheck command exited with code ${codeText}`;
4211
+ throw new Error(`CLI healthcheck failed for '${this.targetName}': ${message}`);
4212
+ }
4213
+ } finally {
4214
+ await cleanupTempFile(promptFilePath, this.keepTempFiles);
4191
4215
  }
4192
4216
  }
4193
4217
  };
4194
- function buildTemplateValues(request, config, outputFilePath) {
4218
+ async function buildTemplateValues(request, config, outputFilePath) {
4195
4219
  const inputFiles = normalizeInputFiles2(request.inputFiles);
4220
+ const promptFilePath = generateOutputFilePath(request.evalCaseId, ".prompt.txt");
4221
+ await fs.writeFile(promptFilePath, request.question ?? "", "utf8");
4196
4222
  return {
4197
- PROMPT: shellEscape(request.question ?? ""),
4198
- GUIDELINES: shellEscape(request.guidelines ?? ""),
4199
- EVAL_ID: shellEscape(request.evalCaseId ?? ""),
4200
- ATTEMPT: shellEscape(String(request.attempt ?? 0)),
4201
- FILES: formatFileList(inputFiles, config.filesFormat),
4202
- OUTPUT_FILE: shellEscape(outputFilePath)
4223
+ values: {
4224
+ PROMPT: shellEscape(request.question ?? ""),
4225
+ PROMPT_FILE: shellEscape(promptFilePath),
4226
+ GUIDELINES: shellEscape(request.guidelines ?? ""),
4227
+ EVAL_ID: shellEscape(request.evalCaseId ?? ""),
4228
+ ATTEMPT: shellEscape(String(request.attempt ?? 0)),
4229
+ FILES: formatFileList(inputFiles, config.filesFormat),
4230
+ OUTPUT_FILE: shellEscape(outputFilePath)
4231
+ },
4232
+ promptFilePath
4203
4233
  };
4204
4234
  }
4235
+ async function cleanupTempFile(filePath, keepTempFiles) {
4236
+ if (!filePath || keepTempFiles) {
4237
+ return;
4238
+ }
4239
+ await fs.unlink(filePath).catch(() => {
4240
+ });
4241
+ }
4205
4242
  function normalizeInputFiles2(inputFiles) {
4206
4243
  if (!inputFiles || inputFiles.length === 0) {
4207
4244
  return void 0;
@@ -8082,7 +8119,7 @@ async function discoverProviders(registry, baseDir) {
8082
8119
  }
8083
8120
  registry.register(kindName, (target) => {
8084
8121
  return new CliProvider(target.name, {
8085
- commandTemplate: `bun run ${filePath} {PROMPT}`
8122
+ command: `bun run ${filePath} {PROMPT}`
8086
8123
  });
8087
8124
  });
8088
8125
  discoveredKinds.push(kindName);
@@ -8595,13 +8632,13 @@ function toCamelCaseDeep(obj) {
8595
8632
  var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
8596
8633
  var CodeEvaluator = class {
8597
8634
  kind = "code";
8598
- script;
8635
+ command;
8599
8636
  cwd;
8600
8637
  agentTimeoutMs;
8601
8638
  config;
8602
8639
  target;
8603
8640
  constructor(options) {
8604
- this.script = options.script;
8641
+ this.command = options.command ?? options.script ?? [];
8605
8642
  this.cwd = options.cwd;
8606
8643
  this.agentTimeoutMs = options.agentTimeoutMs;
8607
8644
  this.config = options.config;
@@ -8660,7 +8697,7 @@ var CodeEvaluator = class {
8660
8697
  const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
8661
8698
  try {
8662
8699
  const stdout = await executeScript(
8663
- this.script,
8700
+ this.command,
8664
8701
  inputPayload,
8665
8702
  this.agentTimeoutMs,
8666
8703
  this.cwd,
@@ -8674,7 +8711,7 @@ var CodeEvaluator = class {
8674
8711
  const details = parsed?.details && typeof parsed.details === "object" && !Array.isArray(parsed.details) ? parsed.details : void 0;
8675
8712
  const proxyUsage = getProxyUsage?.();
8676
8713
  const evaluatorRawRequest = {
8677
- script: this.script,
8714
+ command: this.command,
8678
8715
  ...this.cwd ? { cwd: this.cwd } : {},
8679
8716
  ...proxyUsage ? {
8680
8717
  target_proxy: {
@@ -8704,7 +8741,7 @@ var CodeEvaluator = class {
8704
8741
  expectedAspectCount: 1,
8705
8742
  reasoning: message,
8706
8743
  evaluatorRawRequest: {
8707
- script: this.script,
8744
+ command: this.command,
8708
8745
  ...this.cwd ? { cwd: this.cwd } : {},
8709
8746
  ...proxyUsage ? {
8710
8747
  target_proxy: {
@@ -11634,7 +11671,7 @@ var llmJudgeFactory = (config, context) => {
11634
11671
  var codeFactory = (config, context) => {
11635
11672
  const c = config;
11636
11673
  return new CodeEvaluator({
11637
- script: c.script,
11674
+ command: c.command ?? c.script ?? [],
11638
11675
  cwd: c.resolvedCwd ?? c.cwd,
11639
11676
  agentTimeoutMs: context.agentTimeoutMs,
11640
11677
  config: c.config,
@@ -11816,7 +11853,7 @@ async function discoverAssertions(registry, baseDir) {
11816
11853
  }
11817
11854
  const factory = (_config, context) => {
11818
11855
  return new CodeEvaluator({
11819
- script: ["bun", "run", filePath],
11856
+ command: ["bun", "run", filePath],
11820
11857
  agentTimeoutMs: context.agentTimeoutMs
11821
11858
  });
11822
11859
  };
@@ -12170,7 +12207,8 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
12170
12207
  });
12171
12208
  const timeoutMs = config.timeout_ms ?? (failureMode === "fatal" ? 6e4 : 3e4);
12172
12209
  const cwd = config.cwd;
12173
- const result = await execFileWithStdin(config.script, stdin, {
12210
+ const commandArray = config.command ?? config.script ?? [];
12211
+ const result = await execFileWithStdin(commandArray, stdin, {
12174
12212
  timeoutMs,
12175
12213
  cwd
12176
12214
  });