@agentv/core 0.5.0 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -953,1323 +953,1581 @@ function formatTimeoutSuffix(timeoutMs) {
953
953
  return ` after ${seconds}s`;
954
954
  }
955
955
 
956
- // src/evaluation/providers/mock.ts
957
- var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
958
- var MockProvider = class {
956
+ // src/evaluation/providers/codex.ts
957
+ import { exec as execCallback, spawn } from "node:child_process";
958
+ import { randomUUID } from "node:crypto";
959
+ import { constants as constants2, createWriteStream } from "node:fs";
960
+ import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
961
+ import { tmpdir } from "node:os";
962
+ import path4 from "node:path";
963
+ import { promisify as promisify2 } from "node:util";
964
+
965
+ // src/evaluation/providers/preread.ts
966
+ import path3 from "node:path";
967
+ function buildPromptDocument(request, inputFiles, options) {
968
+ const parts = [];
969
+ const guidelineFiles = collectGuidelineFiles(
970
+ inputFiles,
971
+ options?.guidelinePatterns ?? request.guideline_patterns,
972
+ options?.guidelineOverrides
973
+ );
974
+ const inputFilesList = collectInputFiles(inputFiles);
975
+ const nonGuidelineInputFiles = inputFilesList.filter(
976
+ (file) => !guidelineFiles.includes(file)
977
+ );
978
+ const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
979
+ if (prereadBlock.length > 0) {
980
+ parts.push("\n", prereadBlock);
981
+ }
982
+ parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
983
+ return parts.join("\n").trim();
984
+ }
985
+ function normalizeInputFiles2(inputFiles) {
986
+ if (!inputFiles || inputFiles.length === 0) {
987
+ return void 0;
988
+ }
989
+ const deduped = /* @__PURE__ */ new Map();
990
+ for (const inputFile of inputFiles) {
991
+ const absolutePath = path3.resolve(inputFile);
992
+ if (!deduped.has(absolutePath)) {
993
+ deduped.set(absolutePath, absolutePath);
994
+ }
995
+ }
996
+ return Array.from(deduped.values());
997
+ }
998
+ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
999
+ if (!inputFiles || inputFiles.length === 0) {
1000
+ return [];
1001
+ }
1002
+ const unique = /* @__PURE__ */ new Map();
1003
+ for (const inputFile of inputFiles) {
1004
+ const absolutePath = path3.resolve(inputFile);
1005
+ if (overrides?.has(absolutePath)) {
1006
+ if (!unique.has(absolutePath)) {
1007
+ unique.set(absolutePath, absolutePath);
1008
+ }
1009
+ continue;
1010
+ }
1011
+ const normalized = absolutePath.split(path3.sep).join("/");
1012
+ if (isGuidelineFile(normalized, guidelinePatterns)) {
1013
+ if (!unique.has(absolutePath)) {
1014
+ unique.set(absolutePath, absolutePath);
1015
+ }
1016
+ }
1017
+ }
1018
+ return Array.from(unique.values());
1019
+ }
1020
+ function collectInputFiles(inputFiles) {
1021
+ if (!inputFiles || inputFiles.length === 0) {
1022
+ return [];
1023
+ }
1024
+ const unique = /* @__PURE__ */ new Map();
1025
+ for (const inputFile of inputFiles) {
1026
+ const absolutePath = path3.resolve(inputFile);
1027
+ if (!unique.has(absolutePath)) {
1028
+ unique.set(absolutePath, absolutePath);
1029
+ }
1030
+ }
1031
+ return Array.from(unique.values());
1032
+ }
1033
+ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
1034
+ if (guidelineFiles.length === 0 && inputFiles.length === 0) {
1035
+ return "";
1036
+ }
1037
+ const buildList = (files) => files.map((absolutePath) => {
1038
+ const fileName = path3.basename(absolutePath);
1039
+ const fileUri = pathToFileUri(absolutePath);
1040
+ return `* [${fileName}](${fileUri})`;
1041
+ });
1042
+ const sections = [];
1043
+ if (guidelineFiles.length > 0) {
1044
+ sections.push(`Read all guideline files:
1045
+ ${buildList(guidelineFiles).join("\n")}.`);
1046
+ }
1047
+ if (inputFiles.length > 0) {
1048
+ sections.push(`Read all input files:
1049
+ ${buildList(inputFiles).join("\n")}.`);
1050
+ }
1051
+ sections.push(
1052
+ "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
1053
+ "Then apply system_instructions on the user query below."
1054
+ );
1055
+ return sections.join("\n");
1056
+ }
1057
+ function pathToFileUri(filePath) {
1058
+ const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
1059
+ const normalizedPath = absolutePath.replace(/\\/g, "/");
1060
+ if (/^[a-zA-Z]:\//.test(normalizedPath)) {
1061
+ return `file:///${normalizedPath}`;
1062
+ }
1063
+ return `file://${normalizedPath}`;
1064
+ }
1065
+
1066
+ // src/evaluation/providers/codex.ts
1067
+ var execAsync2 = promisify2(execCallback);
1068
+ var WORKSPACE_PREFIX = "agentv-codex-";
1069
+ var PROMPT_FILENAME = "prompt.md";
1070
+ var FILES_DIR = "files";
1071
+ var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
1072
+ var CodexProvider = class {
959
1073
  id;
960
- kind = "mock";
1074
+ kind = "codex";
961
1075
  targetName;
962
- cannedResponse;
963
- delayMs;
964
- delayMinMs;
965
- delayMaxMs;
966
- constructor(targetName, config) {
967
- this.id = `mock:${targetName}`;
1076
+ supportsBatch = false;
1077
+ config;
1078
+ runCodex;
1079
+ environmentCheck;
1080
+ resolvedExecutable;
1081
+ constructor(targetName, config, runner = defaultCodexRunner) {
1082
+ this.id = `codex:${targetName}`;
968
1083
  this.targetName = targetName;
969
- this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
970
- this.delayMs = config.delayMs ?? 0;
971
- this.delayMinMs = config.delayMinMs ?? 0;
972
- this.delayMaxMs = config.delayMaxMs ?? 0;
1084
+ this.config = config;
1085
+ this.runCodex = runner;
973
1086
  }
974
1087
  async invoke(request) {
975
- const delay = this.calculateDelay();
976
- if (delay > 0) {
977
- await new Promise((resolve) => setTimeout(resolve, delay));
1088
+ if (request.signal?.aborted) {
1089
+ throw new Error("Codex provider request was aborted before execution");
978
1090
  }
979
- return {
980
- text: this.cannedResponse,
981
- raw: {
982
- prompt: request.prompt,
983
- guidelines: request.guidelines
1091
+ await this.ensureEnvironmentReady();
1092
+ const inputFiles = normalizeInputFiles2(request.inputFiles);
1093
+ const originalGuidelines = new Set(
1094
+ collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
1095
+ );
1096
+ const workspaceRoot = await this.createWorkspace();
1097
+ const logger = await this.createStreamLogger(request).catch(() => void 0);
1098
+ try {
1099
+ const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
1100
+ inputFiles,
1101
+ workspaceRoot,
1102
+ originalGuidelines
1103
+ );
1104
+ const promptContent = buildPromptDocument(request, mirroredInputFiles, {
1105
+ guidelinePatterns: request.guideline_patterns,
1106
+ guidelineOverrides: guidelineMirrors
1107
+ });
1108
+ const promptFile = path4.join(workspaceRoot, PROMPT_FILENAME);
1109
+ await writeFile(promptFile, promptContent, "utf8");
1110
+ const args = this.buildCodexArgs();
1111
+ const cwd = this.resolveCwd(workspaceRoot);
1112
+ const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
1113
+ if (result.timedOut) {
1114
+ throw new Error(
1115
+ `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
1116
+ );
984
1117
  }
985
- };
1118
+ if (result.exitCode !== 0) {
1119
+ const detail = pickDetail(result.stderr, result.stdout);
1120
+ const prefix = `Codex CLI exited with code ${result.exitCode}`;
1121
+ throw new Error(detail ? `${prefix}: ${detail}` : prefix);
1122
+ }
1123
+ const parsed = parseCodexJson(result.stdout);
1124
+ const assistantText = extractAssistantText(parsed);
1125
+ return {
1126
+ text: assistantText,
1127
+ raw: {
1128
+ response: parsed,
1129
+ stdout: result.stdout,
1130
+ stderr: result.stderr,
1131
+ exitCode: result.exitCode,
1132
+ args,
1133
+ executable: this.resolvedExecutable ?? this.config.executable,
1134
+ promptFile,
1135
+ workspace: workspaceRoot,
1136
+ inputFiles: mirroredInputFiles,
1137
+ logFile: logger?.filePath
1138
+ }
1139
+ };
1140
+ } finally {
1141
+ await logger?.close();
1142
+ await this.cleanupWorkspace(workspaceRoot);
1143
+ }
986
1144
  }
987
- calculateDelay() {
988
- if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
989
- const min = Math.max(0, this.delayMinMs);
990
- const max = Math.max(min, this.delayMaxMs);
991
- return Math.floor(Math.random() * (max - min + 1)) + min;
1145
+ async ensureEnvironmentReady() {
1146
+ if (!this.environmentCheck) {
1147
+ this.environmentCheck = this.validateEnvironment();
992
1148
  }
993
- return this.delayMs;
1149
+ await this.environmentCheck;
994
1150
  }
995
- };
996
-
997
- // src/evaluation/providers/targets.ts
998
- import { z } from "zod";
999
- var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
1000
- var BASE_TARGET_SCHEMA = z.object({
1001
- name: z.string().min(1, "target name is required"),
1002
- provider: z.string().min(1, "provider is required"),
1003
- settings: z.record(z.unknown()).optional(),
1004
- judge_target: z.string().optional(),
1005
- workers: z.number().int().min(1).optional()
1006
- });
1007
- var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
1008
- function normalizeAzureApiVersion(value) {
1009
- if (!value) {
1010
- return DEFAULT_AZURE_API_VERSION;
1151
+ async validateEnvironment() {
1152
+ this.resolvedExecutable = await locateExecutable(this.config.executable);
1011
1153
  }
1012
- const trimmed = value.trim();
1013
- if (trimmed.length === 0) {
1014
- return DEFAULT_AZURE_API_VERSION;
1154
+ resolveCwd(workspaceRoot) {
1155
+ if (!this.config.cwd) {
1156
+ return workspaceRoot;
1157
+ }
1158
+ return path4.resolve(this.config.cwd);
1015
1159
  }
1016
- const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
1017
- return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
1018
- }
1019
- function resolveTargetDefinition(definition, env = process.env) {
1020
- const parsed = BASE_TARGET_SCHEMA.parse(definition);
1021
- const provider = parsed.provider.toLowerCase();
1022
- const providerBatching = resolveOptionalBoolean(
1023
- parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
1024
- );
1025
- switch (provider) {
1026
- case "azure":
1027
- case "azure-openai":
1028
- return {
1029
- kind: "azure",
1030
- name: parsed.name,
1031
- judgeTarget: parsed.judge_target,
1032
- workers: parsed.workers,
1033
- providerBatching,
1034
- config: resolveAzureConfig(parsed, env)
1035
- };
1036
- case "anthropic":
1037
- return {
1038
- kind: "anthropic",
1039
- name: parsed.name,
1040
- judgeTarget: parsed.judge_target,
1041
- workers: parsed.workers,
1042
- providerBatching,
1043
- config: resolveAnthropicConfig(parsed, env)
1044
- };
1045
- case "gemini":
1046
- case "google":
1047
- case "google-gemini":
1048
- return {
1049
- kind: "gemini",
1050
- name: parsed.name,
1051
- judgeTarget: parsed.judge_target,
1052
- workers: parsed.workers,
1053
- providerBatching,
1054
- config: resolveGeminiConfig(parsed, env)
1055
- };
1056
- case "codex":
1057
- case "codex-cli":
1058
- return {
1059
- kind: "codex",
1060
- name: parsed.name,
1061
- judgeTarget: parsed.judge_target,
1062
- workers: parsed.workers,
1063
- providerBatching,
1064
- config: resolveCodexConfig(parsed, env)
1065
- };
1066
- case "mock":
1067
- return {
1068
- kind: "mock",
1069
- name: parsed.name,
1070
- judgeTarget: parsed.judge_target,
1071
- workers: parsed.workers,
1072
- providerBatching,
1073
- config: resolveMockConfig(parsed)
1074
- };
1075
- case "vscode":
1076
- case "vscode-insiders":
1077
- return {
1078
- kind: provider,
1079
- name: parsed.name,
1080
- judgeTarget: parsed.judge_target,
1081
- workers: parsed.workers,
1082
- providerBatching,
1083
- config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
1084
- };
1085
- case "cli":
1160
+ buildCodexArgs() {
1161
+ const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
1162
+ if (this.config.args && this.config.args.length > 0) {
1163
+ args.push(...this.config.args);
1164
+ }
1165
+ args.push("-");
1166
+ return args;
1167
+ }
1168
+ async executeCodex(args, cwd, promptContent, signal, logger) {
1169
+ try {
1170
+ return await this.runCodex({
1171
+ executable: this.resolvedExecutable ?? this.config.executable,
1172
+ args,
1173
+ cwd,
1174
+ prompt: promptContent,
1175
+ timeoutMs: this.config.timeoutMs,
1176
+ env: process.env,
1177
+ signal,
1178
+ onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
1179
+ onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
1180
+ });
1181
+ } catch (error) {
1182
+ const err = error;
1183
+ if (err.code === "ENOENT") {
1184
+ throw new Error(
1185
+ `Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
1186
+ );
1187
+ }
1188
+ throw error;
1189
+ }
1190
+ }
1191
+ async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
1192
+ if (!inputFiles || inputFiles.length === 0) {
1086
1193
  return {
1087
- kind: "cli",
1088
- name: parsed.name,
1089
- judgeTarget: parsed.judge_target,
1090
- workers: parsed.workers,
1091
- providerBatching,
1092
- config: resolveCliConfig(parsed, env)
1194
+ mirroredInputFiles: void 0,
1195
+ guidelineMirrors: /* @__PURE__ */ new Set()
1093
1196
  };
1094
- default:
1095
- throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
1197
+ }
1198
+ const filesRoot = path4.join(workspaceRoot, FILES_DIR);
1199
+ await mkdir(filesRoot, { recursive: true });
1200
+ const mirrored = [];
1201
+ const guidelineMirrors = /* @__PURE__ */ new Set();
1202
+ const nameCounts = /* @__PURE__ */ new Map();
1203
+ for (const inputFile of inputFiles) {
1204
+ const absoluteSource = path4.resolve(inputFile);
1205
+ const baseName = path4.basename(absoluteSource);
1206
+ const count = nameCounts.get(baseName) ?? 0;
1207
+ nameCounts.set(baseName, count + 1);
1208
+ const finalName = count === 0 ? baseName : `${baseName}.${count}`;
1209
+ const destination = path4.join(filesRoot, finalName);
1210
+ await copyFile(absoluteSource, destination);
1211
+ const resolvedDestination = path4.resolve(destination);
1212
+ mirrored.push(resolvedDestination);
1213
+ if (guidelineOriginals.has(absoluteSource)) {
1214
+ guidelineMirrors.add(resolvedDestination);
1215
+ }
1216
+ }
1217
+ return {
1218
+ mirroredInputFiles: mirrored,
1219
+ guidelineMirrors
1220
+ };
1096
1221
  }
1222
+ async createWorkspace() {
1223
+ return await mkdtemp(path4.join(tmpdir(), WORKSPACE_PREFIX));
1224
+ }
1225
+ async cleanupWorkspace(workspaceRoot) {
1226
+ try {
1227
+ await rm(workspaceRoot, { recursive: true, force: true });
1228
+ } catch {
1229
+ }
1230
+ }
1231
+ resolveLogDirectory() {
1232
+ const disabled = isCodexLogStreamingDisabled();
1233
+ if (disabled) {
1234
+ return void 0;
1235
+ }
1236
+ if (this.config.logDir) {
1237
+ return path4.resolve(this.config.logDir);
1238
+ }
1239
+ return path4.join(process.cwd(), ".agentv", "logs", "codex");
1240
+ }
1241
+ async createStreamLogger(request) {
1242
+ const logDir = this.resolveLogDirectory();
1243
+ if (!logDir) {
1244
+ return void 0;
1245
+ }
1246
+ try {
1247
+ await mkdir(logDir, { recursive: true });
1248
+ } catch (error) {
1249
+ const message = error instanceof Error ? error.message : String(error);
1250
+ console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
1251
+ return void 0;
1252
+ }
1253
+ const filePath = path4.join(logDir, buildLogFilename(request, this.targetName));
1254
+ try {
1255
+ const logger = await CodexStreamLogger.create({
1256
+ filePath,
1257
+ targetName: this.targetName,
1258
+ evalCaseId: request.evalCaseId,
1259
+ attempt: request.attempt,
1260
+ format: this.config.logFormat ?? "summary"
1261
+ });
1262
+ console.log(`Streaming Codex CLI output to ${filePath}`);
1263
+ return logger;
1264
+ } catch (error) {
1265
+ const message = error instanceof Error ? error.message : String(error);
1266
+ console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
1267
+ return void 0;
1268
+ }
1269
+ }
1270
+ };
1271
+ var CodexStreamLogger = class _CodexStreamLogger {
1272
+ filePath;
1273
+ stream;
1274
+ startedAt = Date.now();
1275
+ stdoutBuffer = "";
1276
+ stderrBuffer = "";
1277
+ format;
1278
+ constructor(filePath, format) {
1279
+ this.filePath = filePath;
1280
+ this.format = format;
1281
+ this.stream = createWriteStream(filePath, { flags: "a" });
1282
+ }
1283
+ static async create(options) {
1284
+ const logger = new _CodexStreamLogger(options.filePath, options.format);
1285
+ const header = [
1286
+ "# Codex CLI stream log",
1287
+ `# target: ${options.targetName}`,
1288
+ options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
1289
+ options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
1290
+ `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
1291
+ ""
1292
+ ].filter((line) => Boolean(line));
1293
+ logger.writeLines(header);
1294
+ return logger;
1295
+ }
1296
+ handleStdoutChunk(chunk) {
1297
+ this.stdoutBuffer += chunk;
1298
+ this.flushBuffer("stdout");
1299
+ }
1300
+ handleStderrChunk(chunk) {
1301
+ this.stderrBuffer += chunk;
1302
+ this.flushBuffer("stderr");
1303
+ }
1304
+ async close() {
1305
+ this.flushBuffer("stdout");
1306
+ this.flushBuffer("stderr");
1307
+ this.flushRemainder();
1308
+ await new Promise((resolve, reject) => {
1309
+ this.stream.once("error", reject);
1310
+ this.stream.end(() => resolve());
1311
+ });
1312
+ }
1313
+ writeLines(lines) {
1314
+ for (const line of lines) {
1315
+ this.stream.write(`${line}
1316
+ `);
1317
+ }
1318
+ }
1319
+ flushBuffer(source) {
1320
+ const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
1321
+ const lines = buffer.split(/\r?\n/);
1322
+ const remainder = lines.pop() ?? "";
1323
+ if (source === "stdout") {
1324
+ this.stdoutBuffer = remainder;
1325
+ } else {
1326
+ this.stderrBuffer = remainder;
1327
+ }
1328
+ for (const line of lines) {
1329
+ const formatted = this.formatLine(line, source);
1330
+ if (formatted) {
1331
+ this.stream.write(formatted);
1332
+ this.stream.write("\n");
1333
+ }
1334
+ }
1335
+ }
1336
+ formatLine(rawLine, source) {
1337
+ const trimmed = rawLine.trim();
1338
+ if (trimmed.length === 0) {
1339
+ return void 0;
1340
+ }
1341
+ const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
1342
+ return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
1343
+ }
1344
+ flushRemainder() {
1345
+ const stdoutRemainder = this.stdoutBuffer.trim();
1346
+ if (stdoutRemainder.length > 0) {
1347
+ const formatted = this.formatLine(stdoutRemainder, "stdout");
1348
+ if (formatted) {
1349
+ this.stream.write(formatted);
1350
+ this.stream.write("\n");
1351
+ }
1352
+ }
1353
+ const stderrRemainder = this.stderrBuffer.trim();
1354
+ if (stderrRemainder.length > 0) {
1355
+ const formatted = this.formatLine(stderrRemainder, "stderr");
1356
+ if (formatted) {
1357
+ this.stream.write(formatted);
1358
+ this.stream.write("\n");
1359
+ }
1360
+ }
1361
+ this.stdoutBuffer = "";
1362
+ this.stderrBuffer = "";
1363
+ }
1364
+ };
1365
+ function isCodexLogStreamingDisabled() {
1366
+ const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
1367
+ if (!envValue) {
1368
+ return false;
1369
+ }
1370
+ const normalized = envValue.trim().toLowerCase();
1371
+ return normalized === "false" || normalized === "0" || normalized === "off";
1097
1372
  }
1098
- function resolveAzureConfig(target, env) {
1099
- const settings = target.settings ?? {};
1100
- const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
1101
- const apiKeySource = settings.api_key ?? settings.apiKey;
1102
- const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
1103
- const versionSource = settings.version ?? settings.api_version;
1104
- const temperatureSource = settings.temperature;
1105
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1106
- const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
1107
- const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
1108
- const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
1109
- const version = normalizeAzureApiVersion(
1110
- resolveOptionalString(versionSource, env, `${target.name} api version`)
1111
- );
1112
- const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
1113
- const maxOutputTokens = resolveOptionalNumber(
1114
- maxTokensSource,
1115
- `${target.name} max output tokens`
1116
- );
1117
- return {
1118
- resourceName,
1119
- deploymentName,
1120
- apiKey,
1121
- version,
1122
- temperature,
1123
- maxOutputTokens
1124
- };
1125
- }
1126
- function resolveAnthropicConfig(target, env) {
1127
- const settings = target.settings ?? {};
1128
- const apiKeySource = settings.api_key ?? settings.apiKey;
1129
- const modelSource = settings.model ?? settings.deployment ?? settings.variant;
1130
- const temperatureSource = settings.temperature;
1131
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1132
- const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
1133
- const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
1134
- const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
1135
- return {
1136
- apiKey,
1137
- model,
1138
- temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1139
- maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
1140
- thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
1141
- };
1142
- }
1143
- function resolveGeminiConfig(target, env) {
1144
- const settings = target.settings ?? {};
1145
- const apiKeySource = settings.api_key ?? settings.apiKey;
1146
- const modelSource = settings.model ?? settings.deployment ?? settings.variant;
1147
- const temperatureSource = settings.temperature;
1148
- const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1149
- const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
1150
- const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
1151
- allowLiteral: true,
1152
- optionalEnv: true
1153
- }) ?? "gemini-2.5-flash";
1154
- return {
1155
- apiKey,
1156
- model,
1157
- temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1158
- maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
1159
- };
1160
- }
1161
- function resolveCodexConfig(target, env) {
1162
- const settings = target.settings ?? {};
1163
- const executableSource = settings.executable ?? settings.command ?? settings.binary;
1164
- const argsSource = settings.args ?? settings.arguments;
1165
- const cwdSource = settings.cwd;
1166
- const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
1167
- const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
1168
- allowLiteral: true,
1169
- optionalEnv: true
1170
- }) ?? "codex";
1171
- const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
1172
- const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
1173
- allowLiteral: true,
1174
- optionalEnv: true
1175
- });
1176
- const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
1177
- return {
1178
- executable,
1179
- args,
1180
- cwd,
1181
- timeoutMs
1182
- };
1183
- }
1184
- function resolveMockConfig(target) {
1185
- const settings = target.settings ?? {};
1186
- const response = typeof settings.response === "string" ? settings.response : void 0;
1187
- return { response };
1188
- }
1189
- function resolveVSCodeConfig(target, env, insiders) {
1190
- const settings = target.settings ?? {};
1191
- const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
1192
- const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
1193
- allowLiteral: false,
1194
- optionalEnv: true
1195
- }) : void 0;
1196
- const commandSource = settings.vscode_cmd ?? settings.command;
1197
- const waitSource = settings.wait;
1198
- const dryRunSource = settings.dry_run ?? settings.dryRun;
1199
- const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
1200
- const defaultCommand = insiders ? "code-insiders" : "code";
1201
- const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
1202
- return {
1203
- command,
1204
- waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
1205
- dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
1206
- subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
1207
- allowLiteral: true,
1208
- optionalEnv: true
1209
- }),
1210
- workspaceTemplate
1211
- };
1212
- }
1213
- function resolveCliConfig(target, env) {
1214
- const settings = target.settings ?? {};
1215
- const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
1216
- const filesFormat = resolveOptionalLiteralString(
1217
- settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
1218
- );
1219
- const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
1220
- allowLiteral: true,
1221
- optionalEnv: true
1222
- });
1223
- const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
1224
- const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
1225
- const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
1226
- const commandTemplate = resolveString(
1227
- commandTemplateSource,
1228
- env,
1229
- `${target.name} CLI command template`,
1230
- true
1231
- );
1232
- assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
1233
- return {
1234
- commandTemplate,
1235
- filesFormat,
1236
- cwd,
1237
- env: envOverrides,
1238
- timeoutMs,
1239
- healthcheck
1240
- };
1373
+ function buildLogFilename(request, targetName) {
1374
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1375
+ const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
1376
+ const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
1377
+ const target = sanitizeForFilename(targetName);
1378
+ return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
1241
1379
  }
1242
- function resolveEnvOverrides(source, env, targetName) {
1243
- if (source === void 0 || source === null) {
1380
+ function sanitizeForFilename(value) {
1381
+ const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
1382
+ return sanitized.length > 0 ? sanitized : "codex";
1383
+ }
1384
+ function formatElapsed(startedAt) {
1385
+ const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
1386
+ const hours = Math.floor(elapsedSeconds / 3600);
1387
+ const minutes = Math.floor(elapsedSeconds % 3600 / 60);
1388
+ const seconds = elapsedSeconds % 60;
1389
+ if (hours > 0) {
1390
+ return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1391
+ }
1392
+ return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1393
+ }
1394
+ function formatCodexLogMessage(rawLine, source) {
1395
+ const parsed = tryParseJsonValue(rawLine);
1396
+ if (parsed) {
1397
+ const summary = summarizeCodexEvent(parsed);
1398
+ if (summary) {
1399
+ return summary;
1400
+ }
1401
+ }
1402
+ if (source === "stderr") {
1403
+ return `stderr: ${rawLine}`;
1404
+ }
1405
+ return rawLine;
1406
+ }
1407
+ function formatCodexJsonLog(rawLine) {
1408
+ const parsed = tryParseJsonValue(rawLine);
1409
+ if (!parsed) {
1410
+ return rawLine;
1411
+ }
1412
+ try {
1413
+ return JSON.stringify(parsed, null, 2);
1414
+ } catch {
1415
+ return rawLine;
1416
+ }
1417
+ }
1418
+ function summarizeCodexEvent(event) {
1419
+ if (!event || typeof event !== "object") {
1244
1420
  return void 0;
1245
1421
  }
1246
- if (typeof source !== "object" || Array.isArray(source)) {
1247
- throw new Error(`${targetName} env overrides must be an object map of strings`);
1422
+ const record = event;
1423
+ const type = typeof record.type === "string" ? record.type : void 0;
1424
+ let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
1425
+ if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
1426
+ const item = record.item;
1427
+ if (item && typeof item === "object") {
1428
+ const candidate = flattenContent(
1429
+ item.text ?? item.content ?? item.output
1430
+ );
1431
+ if (candidate) {
1432
+ message = candidate;
1433
+ }
1434
+ }
1248
1435
  }
1249
- const entries = Object.entries(source);
1250
- const resolved = {};
1251
- for (const [key, value] of entries) {
1252
- if (typeof value !== "string") {
1253
- throw new Error(`${targetName} env override '${key}' must be a string`);
1436
+ if (!message) {
1437
+ const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
1438
+ if (type && itemType) {
1439
+ return `${type}:${itemType}`;
1440
+ }
1441
+ if (type) {
1442
+ return type;
1254
1443
  }
1255
- const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
1256
- resolved[key] = resolvedValue;
1257
1444
  }
1258
- return Object.keys(resolved).length > 0 ? resolved : void 0;
1445
+ if (type && message) {
1446
+ return `${type}: ${message}`;
1447
+ }
1448
+ if (message) {
1449
+ return message;
1450
+ }
1451
+ return type;
1259
1452
  }
1260
- function resolveTimeoutMs(source, description) {
1261
- const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
1262
- if (seconds === void 0) {
1453
+ function tryParseJsonValue(rawLine) {
1454
+ try {
1455
+ return JSON.parse(rawLine);
1456
+ } catch {
1263
1457
  return void 0;
1264
1458
  }
1265
- if (seconds <= 0) {
1266
- throw new Error(`${description} must be greater than zero seconds`);
1459
+ }
1460
+ async function locateExecutable(candidate) {
1461
+ const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
1462
+ if (includesPathSeparator) {
1463
+ const resolved = path4.isAbsolute(candidate) ? candidate : path4.resolve(candidate);
1464
+ const executablePath = await ensureWindowsExecutableVariant(resolved);
1465
+ await access2(executablePath, constants2.F_OK);
1466
+ return executablePath;
1267
1467
  }
1268
- return Math.floor(seconds * 1e3);
1468
+ const locator = process.platform === "win32" ? "where" : "which";
1469
+ try {
1470
+ const { stdout } = await execAsync2(`${locator} ${candidate}`);
1471
+ const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
1472
+ const preferred = selectExecutableCandidate(lines);
1473
+ if (preferred) {
1474
+ const executablePath = await ensureWindowsExecutableVariant(preferred);
1475
+ await access2(executablePath, constants2.F_OK);
1476
+ return executablePath;
1477
+ }
1478
+ } catch {
1479
+ }
1480
+ throw new Error(`Codex executable '${candidate}' was not found on PATH`);
1269
1481
  }
1270
- function resolveCliHealthcheck(source, env, targetName) {
1271
- if (source === void 0 || source === null) {
1482
+ function selectExecutableCandidate(candidates) {
1483
+ if (candidates.length === 0) {
1272
1484
  return void 0;
1273
1485
  }
1274
- if (typeof source !== "object" || Array.isArray(source)) {
1275
- throw new Error(`${targetName} healthcheck must be an object`);
1486
+ if (process.platform !== "win32") {
1487
+ return candidates[0];
1276
1488
  }
1277
- const candidate = source;
1278
- const type = candidate.type;
1279
- const timeoutMs = resolveTimeoutMs(
1280
- candidate.timeout_seconds ?? candidate.timeoutSeconds,
1281
- `${targetName} healthcheck timeout`
1282
- );
1283
- if (type === "http") {
1284
- const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
1285
- return {
1286
- type: "http",
1287
- url,
1288
- timeoutMs
1289
- };
1489
+ const extensions = getWindowsExecutableExtensions();
1490
+ for (const ext of extensions) {
1491
+ const match = candidates.find((candidate) => candidate.toLowerCase().endsWith(ext));
1492
+ if (match) {
1493
+ return match;
1494
+ }
1290
1495
  }
1291
- if (type === "command") {
1292
- const commandTemplate = resolveString(
1293
- candidate.command_template ?? candidate.commandTemplate,
1294
- env,
1295
- `${targetName} healthcheck command template`,
1296
- true
1297
- );
1298
- assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
1299
- const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
1300
- allowLiteral: true,
1301
- optionalEnv: true
1302
- });
1303
- return {
1304
- type: "command",
1305
- commandTemplate,
1306
- timeoutMs,
1307
- cwd
1308
- };
1496
+ return candidates[0];
1497
+ }
1498
+ async function ensureWindowsExecutableVariant(candidate) {
1499
+ if (process.platform !== "win32") {
1500
+ return candidate;
1309
1501
  }
1310
- throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
1502
+ if (hasExecutableExtension(candidate)) {
1503
+ return candidate;
1504
+ }
1505
+ const extensions = getWindowsExecutableExtensions();
1506
+ for (const ext of extensions) {
1507
+ const withExtension = `${candidate}${ext}`;
1508
+ try {
1509
+ await access2(withExtension, constants2.F_OK);
1510
+ return withExtension;
1511
+ } catch {
1512
+ }
1513
+ }
1514
+ return candidate;
1311
1515
  }
1312
- function assertSupportedCliPlaceholders(template, description) {
1313
- const placeholders = extractCliPlaceholders(template);
1314
- for (const placeholder of placeholders) {
1315
- if (!CLI_PLACEHOLDERS.has(placeholder)) {
1316
- throw new Error(
1317
- `${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
1318
- );
1516
+ function hasExecutableExtension(candidate) {
1517
+ const lower = candidate.toLowerCase();
1518
+ return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
1519
+ }
1520
+ var DEFAULT_WINDOWS_EXTENSIONS = [".com", ".exe", ".bat", ".cmd", ".ps1"];
1521
+ function getWindowsExecutableExtensions() {
1522
+ if (process.platform !== "win32") {
1523
+ return [];
1524
+ }
1525
+ const fromEnv = process.env.PATHEXT?.split(";").map((ext) => ext.trim().toLowerCase()).filter((ext) => ext.length > 0);
1526
+ return fromEnv && fromEnv.length > 0 ? fromEnv : DEFAULT_WINDOWS_EXTENSIONS;
1527
+ }
1528
+ function parseCodexJson(output) {
1529
+ const trimmed = output.trim();
1530
+ if (trimmed.length === 0) {
1531
+ throw new Error("Codex CLI produced no output in --json mode");
1532
+ }
1533
+ try {
1534
+ return JSON.parse(trimmed);
1535
+ } catch {
1536
+ const lineObjects = parseJsonLines(trimmed);
1537
+ if (lineObjects) {
1538
+ return lineObjects;
1539
+ }
1540
+ const lastBrace = trimmed.lastIndexOf("{");
1541
+ if (lastBrace >= 0) {
1542
+ const candidate = trimmed.slice(lastBrace);
1543
+ try {
1544
+ return JSON.parse(candidate);
1545
+ } catch {
1546
+ }
1319
1547
  }
1548
+ const preview = trimmed.slice(0, 200);
1549
+ throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
1320
1550
  }
1321
1551
  }
1322
- function extractCliPlaceholders(template) {
1323
- const matches = template.matchAll(/\{([A-Z_]+)\}/g);
1324
- const results = [];
1325
- for (const match of matches) {
1326
- if (match[1]) {
1327
- results.push(match[1]);
1552
+ function extractAssistantText(parsed) {
1553
+ if (Array.isArray(parsed)) {
1554
+ const text = extractFromEventStream(parsed);
1555
+ if (text) {
1556
+ return text;
1557
+ }
1558
+ }
1559
+ if (!parsed || typeof parsed !== "object") {
1560
+ throw new Error("Codex CLI JSON response did not include an assistant message");
1561
+ }
1562
+ const record = parsed;
1563
+ const eventText = extractFromEvent(record);
1564
+ if (eventText) {
1565
+ return eventText;
1566
+ }
1567
+ const messages = Array.isArray(record.messages) ? record.messages : void 0;
1568
+ if (messages) {
1569
+ for (let index = messages.length - 1; index >= 0; index -= 1) {
1570
+ const entry = messages[index];
1571
+ if (!entry || typeof entry !== "object") {
1572
+ continue;
1573
+ }
1574
+ const role = entry.role;
1575
+ if (role !== "assistant") {
1576
+ continue;
1577
+ }
1578
+ const content = entry.content;
1579
+ const flattened = flattenContent(content);
1580
+ if (flattened) {
1581
+ return flattened;
1582
+ }
1583
+ }
1584
+ }
1585
+ const response = record.response;
1586
+ if (response && typeof response === "object") {
1587
+ const content = response.content;
1588
+ const flattened = flattenContent(content);
1589
+ if (flattened) {
1590
+ return flattened;
1328
1591
  }
1329
1592
  }
1330
- return results;
1593
+ const output = record.output;
1594
+ const flattenedOutput = flattenContent(output);
1595
+ if (flattenedOutput) {
1596
+ return flattenedOutput;
1597
+ }
1598
+ throw new Error("Codex CLI JSON response did not include an assistant message");
1331
1599
  }
1332
- function resolveString(source, env, description, allowLiteral = false) {
1333
- const value = resolveOptionalString(source, env, description, {
1334
- allowLiteral,
1335
- optionalEnv: false
1336
- });
1337
- if (value === void 0) {
1338
- throw new Error(`${description} is required`);
1600
+ function extractFromEventStream(events) {
1601
+ for (let index = events.length - 1; index >= 0; index -= 1) {
1602
+ const candidate = events[index];
1603
+ const text = extractFromEvent(candidate);
1604
+ if (text) {
1605
+ return text;
1606
+ }
1339
1607
  }
1340
- return value;
1608
+ return void 0;
1341
1609
  }
1342
- function resolveOptionalString(source, env, description, options) {
1343
- if (source === void 0 || source === null) {
1344
- return void 0;
1345
- }
1346
- if (typeof source !== "string") {
1347
- throw new Error(`${description} must be a string`);
1348
- }
1349
- const trimmed = source.trim();
1350
- if (trimmed.length === 0) {
1610
+ function extractFromEvent(event) {
1611
+ if (!event || typeof event !== "object") {
1351
1612
  return void 0;
1352
1613
  }
1353
- const envValue = env[trimmed];
1354
- if (envValue !== void 0) {
1355
- if (envValue.trim().length === 0) {
1356
- throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
1614
+ const record = event;
1615
+ const type = typeof record.type === "string" ? record.type : void 0;
1616
+ if (type === JSONL_TYPE_ITEM_COMPLETED) {
1617
+ const item = record.item;
1618
+ const text = extractFromItem(item);
1619
+ if (text) {
1620
+ return text;
1357
1621
  }
1358
- return envValue;
1359
1622
  }
1360
- const allowLiteral = options?.allowLiteral ?? false;
1361
- const optionalEnv = options?.optionalEnv ?? false;
1362
- const looksLikeEnv = isLikelyEnvReference(trimmed);
1363
- if (looksLikeEnv) {
1364
- if (optionalEnv) {
1365
- return void 0;
1366
- }
1367
- if (!allowLiteral) {
1368
- throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
1369
- }
1623
+ const output = record.output ?? record.content;
1624
+ const flattened = flattenContent(output);
1625
+ if (flattened) {
1626
+ return flattened;
1370
1627
  }
1371
- return trimmed;
1628
+ return void 0;
1372
1629
  }
1373
- function resolveOptionalLiteralString(source) {
1374
- if (source === void 0 || source === null) {
1630
+ function extractFromItem(item) {
1631
+ if (!item || typeof item !== "object") {
1375
1632
  return void 0;
1376
1633
  }
1377
- if (typeof source !== "string") {
1378
- throw new Error("expected string value");
1634
+ const record = item;
1635
+ const itemType = typeof record.type === "string" ? record.type : void 0;
1636
+ if (itemType === "agent_message" || itemType === "response" || itemType === "output") {
1637
+ const text = flattenContent(record.text ?? record.content ?? record.output);
1638
+ if (text) {
1639
+ return text;
1640
+ }
1379
1641
  }
1380
- const trimmed = source.trim();
1381
- return trimmed.length > 0 ? trimmed : void 0;
1642
+ return void 0;
1382
1643
  }
1383
- function resolveOptionalNumber(source, description) {
1384
- if (source === void 0 || source === null || source === "") {
1385
- return void 0;
1644
+ function flattenContent(value) {
1645
+ if (typeof value === "string") {
1646
+ return value;
1386
1647
  }
1387
- if (typeof source === "number") {
1388
- return Number.isFinite(source) ? source : void 0;
1648
+ if (Array.isArray(value)) {
1649
+ const parts = value.map((segment) => {
1650
+ if (typeof segment === "string") {
1651
+ return segment;
1652
+ }
1653
+ if (segment && typeof segment === "object" && "text" in segment) {
1654
+ const text = segment.text;
1655
+ return typeof text === "string" ? text : void 0;
1656
+ }
1657
+ return void 0;
1658
+ }).filter((part) => typeof part === "string" && part.length > 0);
1659
+ return parts.length > 0 ? parts.join(" \n") : void 0;
1389
1660
  }
1390
- if (typeof source === "string") {
1391
- const numeric = Number(source);
1392
- if (Number.isFinite(numeric)) {
1393
- return numeric;
1394
- }
1661
+ if (value && typeof value === "object" && "text" in value) {
1662
+ const text = value.text;
1663
+ return typeof text === "string" ? text : void 0;
1395
1664
  }
1396
- throw new Error(`${description} must be a number`);
1665
+ return void 0;
1397
1666
  }
1398
- function resolveOptionalBoolean(source) {
1399
- if (source === void 0 || source === null || source === "") {
1667
+ function parseJsonLines(output) {
1668
+ const lines = output.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
1669
+ if (lines.length <= 1) {
1400
1670
  return void 0;
1401
1671
  }
1402
- if (typeof source === "boolean") {
1403
- return source;
1404
- }
1405
- if (typeof source === "string") {
1406
- const lowered = source.trim().toLowerCase();
1407
- if (lowered === "true" || lowered === "1") {
1408
- return true;
1409
- }
1410
- if (lowered === "false" || lowered === "0") {
1411
- return false;
1672
+ const parsed = [];
1673
+ for (const line of lines) {
1674
+ try {
1675
+ parsed.push(JSON.parse(line));
1676
+ } catch {
1677
+ return void 0;
1412
1678
  }
1413
1679
  }
1414
- throw new Error("expected boolean value");
1415
- }
1416
- function isLikelyEnvReference(value) {
1417
- return /^[A-Z0-9_]+$/.test(value);
1680
+ return parsed;
1418
1681
  }
1419
- function resolveOptionalStringArray(source, env, description) {
1420
- if (source === void 0 || source === null) {
1421
- return void 0;
1422
- }
1423
- if (!Array.isArray(source)) {
1424
- throw new Error(`${description} must be an array of strings`);
1682
+ function pickDetail(stderr, stdout) {
1683
+ const errorText = stderr.trim();
1684
+ if (errorText.length > 0) {
1685
+ return errorText;
1425
1686
  }
1426
- if (source.length === 0) {
1427
- return void 0;
1687
+ const stdoutText = stdout.trim();
1688
+ return stdoutText.length > 0 ? stdoutText : void 0;
1689
+ }
1690
+ function formatTimeoutSuffix2(timeoutMs) {
1691
+ if (!timeoutMs || timeoutMs <= 0) {
1692
+ return "";
1428
1693
  }
1429
- const resolved = [];
1430
- for (let i = 0; i < source.length; i++) {
1431
- const item = source[i];
1432
- if (typeof item !== "string") {
1433
- throw new Error(`${description}[${i}] must be a string`);
1694
+ const seconds = Math.ceil(timeoutMs / 1e3);
1695
+ return ` after ${seconds}s`;
1696
+ }
1697
+ async function defaultCodexRunner(options) {
1698
+ return await new Promise((resolve, reject) => {
1699
+ const child = spawn(options.executable, options.args, {
1700
+ cwd: options.cwd,
1701
+ env: options.env,
1702
+ stdio: ["pipe", "pipe", "pipe"],
1703
+ shell: shouldShellExecute(options.executable)
1704
+ });
1705
+ let stdout = "";
1706
+ let stderr = "";
1707
+ let timedOut = false;
1708
+ const onAbort = () => {
1709
+ child.kill("SIGTERM");
1710
+ };
1711
+ if (options.signal) {
1712
+ if (options.signal.aborted) {
1713
+ onAbort();
1714
+ } else {
1715
+ options.signal.addEventListener("abort", onAbort, { once: true });
1716
+ }
1434
1717
  }
1435
- const trimmed = item.trim();
1436
- if (trimmed.length === 0) {
1437
- throw new Error(`${description}[${i}] cannot be empty`);
1718
+ let timeoutHandle;
1719
+ if (options.timeoutMs && options.timeoutMs > 0) {
1720
+ timeoutHandle = setTimeout(() => {
1721
+ timedOut = true;
1722
+ child.kill("SIGTERM");
1723
+ }, options.timeoutMs);
1724
+ timeoutHandle.unref?.();
1438
1725
  }
1439
- const envValue = env[trimmed];
1440
- if (envValue !== void 0) {
1441
- if (envValue.trim().length === 0) {
1442
- throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
1726
+ child.stdout.setEncoding("utf8");
1727
+ child.stdout.on("data", (chunk) => {
1728
+ stdout += chunk;
1729
+ options.onStdoutChunk?.(chunk);
1730
+ });
1731
+ child.stderr.setEncoding("utf8");
1732
+ child.stderr.on("data", (chunk) => {
1733
+ stderr += chunk;
1734
+ options.onStderrChunk?.(chunk);
1735
+ });
1736
+ child.stdin.end(options.prompt);
1737
+ const cleanup = () => {
1738
+ if (timeoutHandle) {
1739
+ clearTimeout(timeoutHandle);
1443
1740
  }
1444
- resolved.push(envValue);
1445
- } else {
1446
- resolved.push(trimmed);
1447
- }
1741
+ if (options.signal) {
1742
+ options.signal.removeEventListener("abort", onAbort);
1743
+ }
1744
+ };
1745
+ child.on("error", (error) => {
1746
+ cleanup();
1747
+ reject(error);
1748
+ });
1749
+ child.on("close", (code) => {
1750
+ cleanup();
1751
+ resolve({
1752
+ stdout,
1753
+ stderr,
1754
+ exitCode: typeof code === "number" ? code : -1,
1755
+ timedOut
1756
+ });
1757
+ });
1758
+ });
1759
+ }
1760
+ function shouldShellExecute(executable) {
1761
+ if (process.platform !== "win32") {
1762
+ return false;
1448
1763
  }
1449
- return resolved.length > 0 ? resolved : void 0;
1764
+ const lower = executable.toLowerCase();
1765
+ return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
1450
1766
  }
1451
1767
 
1452
- // src/evaluation/providers/vscode.ts
1453
- import { readFile as readFile2 } from "node:fs/promises";
1454
- import path3 from "node:path";
1455
- import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
1456
- var VSCodeProvider = class {
1768
+ // src/evaluation/providers/mock.ts
1769
+ var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
1770
+ var MockProvider = class {
1457
1771
  id;
1458
- kind;
1772
+ kind = "mock";
1459
1773
  targetName;
1460
- supportsBatch = true;
1461
- config;
1462
- constructor(targetName, config, kind) {
1463
- this.id = `${kind}:${targetName}`;
1464
- this.kind = kind;
1774
+ cannedResponse;
1775
+ delayMs;
1776
+ delayMinMs;
1777
+ delayMaxMs;
1778
+ constructor(targetName, config) {
1779
+ this.id = `mock:${targetName}`;
1465
1780
  this.targetName = targetName;
1466
- this.config = config;
1781
+ this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
1782
+ this.delayMs = config.delayMs ?? 0;
1783
+ this.delayMinMs = config.delayMinMs ?? 0;
1784
+ this.delayMaxMs = config.delayMaxMs ?? 0;
1467
1785
  }
1468
1786
  async invoke(request) {
1469
- if (request.signal?.aborted) {
1470
- throw new Error("VS Code provider request was aborted before dispatch");
1471
- }
1472
- const inputFiles = normalizeAttachments(request.inputFiles);
1473
- const promptContent = buildPromptDocument(request, inputFiles, request.guideline_patterns);
1474
- const session = await dispatchAgentSession({
1475
- userQuery: promptContent,
1476
- extraAttachments: inputFiles,
1477
- wait: this.config.waitForResponse,
1478
- dryRun: this.config.dryRun,
1479
- vscodeCmd: this.config.command,
1480
- subagentRoot: this.config.subagentRoot,
1481
- workspaceTemplate: this.config.workspaceTemplate,
1482
- silent: true
1483
- });
1484
- if (session.exitCode !== 0 || !session.responseFile) {
1485
- const failure = session.error ?? "VS Code subagent did not produce a response";
1486
- throw new Error(failure);
1487
- }
1488
- if (this.config.dryRun) {
1489
- return {
1490
- text: "",
1491
- raw: {
1492
- session,
1493
- inputFiles
1494
- }
1495
- };
1787
+ const delay = this.calculateDelay();
1788
+ if (delay > 0) {
1789
+ await new Promise((resolve) => setTimeout(resolve, delay));
1496
1790
  }
1497
- const responseText = await readFile2(session.responseFile, "utf8");
1498
1791
  return {
1499
- text: responseText,
1792
+ text: this.cannedResponse,
1500
1793
  raw: {
1501
- session,
1502
- inputFiles
1794
+ prompt: request.prompt,
1795
+ guidelines: request.guidelines
1503
1796
  }
1504
1797
  };
1505
1798
  }
1506
- async invokeBatch(requests) {
1507
- if (requests.length === 0) {
1508
- return [];
1509
- }
1510
- const normalizedRequests = requests.map((req) => ({
1511
- request: req,
1512
- inputFiles: normalizeAttachments(req.inputFiles)
1513
- }));
1514
- const combinedInputFiles = mergeAttachments(
1515
- normalizedRequests.map(({ inputFiles }) => inputFiles)
1516
- );
1517
- const userQueries = normalizedRequests.map(
1518
- ({ request, inputFiles }) => buildPromptDocument(request, inputFiles, request.guideline_patterns)
1519
- );
1520
- const session = await dispatchBatchAgent({
1521
- userQueries,
1522
- extraAttachments: combinedInputFiles,
1523
- wait: this.config.waitForResponse,
1524
- dryRun: this.config.dryRun,
1525
- vscodeCmd: this.config.command,
1526
- subagentRoot: this.config.subagentRoot,
1527
- workspaceTemplate: this.config.workspaceTemplate,
1528
- silent: true
1529
- });
1530
- if (session.exitCode !== 0 || !session.responseFiles) {
1531
- const failure = session.error ?? "VS Code subagent did not produce batch responses";
1532
- throw new Error(failure);
1533
- }
1534
- if (this.config.dryRun) {
1535
- return normalizedRequests.map(({ inputFiles }) => ({
1536
- text: "",
1537
- raw: {
1538
- session,
1539
- inputFiles,
1540
- allInputFiles: combinedInputFiles
1541
- }
1542
- }));
1543
- }
1544
- if (session.responseFiles.length !== requests.length) {
1545
- throw new Error(
1546
- `VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
1547
- );
1548
- }
1549
- const responses = [];
1550
- for (const [index, responseFile] of session.responseFiles.entries()) {
1551
- const responseText = await readFile2(responseFile, "utf8");
1552
- responses.push({
1553
- text: responseText,
1554
- raw: {
1555
- session,
1556
- inputFiles: normalizedRequests[index]?.inputFiles,
1557
- allInputFiles: combinedInputFiles,
1558
- responseFile
1559
- }
1560
- });
1799
+ calculateDelay() {
1800
+ if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
1801
+ const min = Math.max(0, this.delayMinMs);
1802
+ const max = Math.max(min, this.delayMaxMs);
1803
+ return Math.floor(Math.random() * (max - min + 1)) + min;
1561
1804
  }
1562
- return responses;
1805
+ return this.delayMs;
1563
1806
  }
1564
1807
  };
1565
- function buildPromptDocument(request, attachments, guidelinePatterns) {
1566
- const parts = [];
1567
- const guidelineFiles = collectGuidelineFiles(attachments, guidelinePatterns);
1568
- const attachmentFiles = collectAttachmentFiles(attachments);
1569
- const nonGuidelineAttachments = attachmentFiles.filter(
1570
- (file) => !guidelineFiles.includes(file)
1571
- );
1572
- const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineAttachments);
1573
- if (prereadBlock.length > 0) {
1574
- parts.push("\n", prereadBlock);
1575
- }
1576
- parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
1577
- return parts.join("\n").trim();
1578
- }
1579
- function buildMandatoryPrereadBlock(guidelineFiles, attachmentFiles) {
1580
- if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
1581
- return "";
1582
- }
1583
- const buildList = (files) => files.map((absolutePath) => {
1584
- const fileName = path3.basename(absolutePath);
1585
- const fileUri = pathToFileUri(absolutePath);
1586
- return `* [${fileName}](${fileUri})`;
1587
- });
1588
- const sections = [];
1589
- if (guidelineFiles.length > 0) {
1590
- sections.push(`Read all guideline files:
1591
- ${buildList(guidelineFiles).join("\n")}.`);
1808
+
1809
+ // src/evaluation/providers/targets.ts
1810
+ import { z } from "zod";
1811
+ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
1812
+ var BASE_TARGET_SCHEMA = z.object({
1813
+ name: z.string().min(1, "target name is required"),
1814
+ provider: z.string().min(1, "provider is required"),
1815
+ settings: z.record(z.unknown()).optional(),
1816
+ judge_target: z.string().optional(),
1817
+ workers: z.number().int().min(1).optional()
1818
+ });
1819
+ var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
1820
+ function normalizeAzureApiVersion(value) {
1821
+ if (!value) {
1822
+ return DEFAULT_AZURE_API_VERSION;
1592
1823
  }
1593
- if (attachmentFiles.length > 0) {
1594
- sections.push(`Read all attachment files:
1595
- ${buildList(attachmentFiles).join("\n")}.`);
1824
+ const trimmed = value.trim();
1825
+ if (trimmed.length === 0) {
1826
+ return DEFAULT_AZURE_API_VERSION;
1596
1827
  }
1597
- sections.push(
1598
- "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
1599
- "Then apply system_instructions on the user query below."
1600
- );
1601
- return sections.join("\n");
1828
+ const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
1829
+ return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
1602
1830
  }
1603
- function collectGuidelineFiles(attachments, guidelinePatterns) {
1604
- if (!attachments || attachments.length === 0) {
1605
- return [];
1606
- }
1607
- const unique = /* @__PURE__ */ new Map();
1608
- for (const attachment of attachments) {
1609
- const absolutePath = path3.resolve(attachment);
1610
- const normalized = absolutePath.split(path3.sep).join("/");
1611
- if (isGuidelineFile(normalized, guidelinePatterns)) {
1612
- if (!unique.has(absolutePath)) {
1613
- unique.set(absolutePath, absolutePath);
1614
- }
1615
- }
1831
+ function resolveTargetDefinition(definition, env = process.env) {
1832
+ const parsed = BASE_TARGET_SCHEMA.parse(definition);
1833
+ const provider = parsed.provider.toLowerCase();
1834
+ const providerBatching = resolveOptionalBoolean(
1835
+ parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
1836
+ );
1837
+ switch (provider) {
1838
+ case "azure":
1839
+ case "azure-openai":
1840
+ return {
1841
+ kind: "azure",
1842
+ name: parsed.name,
1843
+ judgeTarget: parsed.judge_target,
1844
+ workers: parsed.workers,
1845
+ providerBatching,
1846
+ config: resolveAzureConfig(parsed, env)
1847
+ };
1848
+ case "anthropic":
1849
+ return {
1850
+ kind: "anthropic",
1851
+ name: parsed.name,
1852
+ judgeTarget: parsed.judge_target,
1853
+ workers: parsed.workers,
1854
+ providerBatching,
1855
+ config: resolveAnthropicConfig(parsed, env)
1856
+ };
1857
+ case "gemini":
1858
+ case "google":
1859
+ case "google-gemini":
1860
+ return {
1861
+ kind: "gemini",
1862
+ name: parsed.name,
1863
+ judgeTarget: parsed.judge_target,
1864
+ workers: parsed.workers,
1865
+ providerBatching,
1866
+ config: resolveGeminiConfig(parsed, env)
1867
+ };
1868
+ case "codex":
1869
+ case "codex-cli":
1870
+ return {
1871
+ kind: "codex",
1872
+ name: parsed.name,
1873
+ judgeTarget: parsed.judge_target,
1874
+ workers: parsed.workers,
1875
+ providerBatching,
1876
+ config: resolveCodexConfig(parsed, env)
1877
+ };
1878
+ case "mock":
1879
+ return {
1880
+ kind: "mock",
1881
+ name: parsed.name,
1882
+ judgeTarget: parsed.judge_target,
1883
+ workers: parsed.workers,
1884
+ providerBatching,
1885
+ config: resolveMockConfig(parsed)
1886
+ };
1887
+ case "vscode":
1888
+ case "vscode-insiders":
1889
+ return {
1890
+ kind: provider,
1891
+ name: parsed.name,
1892
+ judgeTarget: parsed.judge_target,
1893
+ workers: parsed.workers,
1894
+ providerBatching,
1895
+ config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
1896
+ };
1897
+ case "cli":
1898
+ return {
1899
+ kind: "cli",
1900
+ name: parsed.name,
1901
+ judgeTarget: parsed.judge_target,
1902
+ workers: parsed.workers,
1903
+ providerBatching,
1904
+ config: resolveCliConfig(parsed, env)
1905
+ };
1906
+ default:
1907
+ throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
1616
1908
  }
1617
- return Array.from(unique.values());
1618
1909
  }
1619
- function collectAttachmentFiles(attachments) {
1620
- if (!attachments || attachments.length === 0) {
1621
- return [];
1622
- }
1623
- const unique = /* @__PURE__ */ new Map();
1624
- for (const attachment of attachments) {
1625
- const absolutePath = path3.resolve(attachment);
1626
- if (!unique.has(absolutePath)) {
1627
- unique.set(absolutePath, absolutePath);
1628
- }
1629
- }
1630
- return Array.from(unique.values());
1910
+ function resolveAzureConfig(target, env) {
1911
+ const settings = target.settings ?? {};
1912
+ const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
1913
+ const apiKeySource = settings.api_key ?? settings.apiKey;
1914
+ const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
1915
+ const versionSource = settings.version ?? settings.api_version;
1916
+ const temperatureSource = settings.temperature;
1917
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1918
+ const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
1919
+ const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
1920
+ const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
1921
+ const version = normalizeAzureApiVersion(
1922
+ resolveOptionalString(versionSource, env, `${target.name} api version`)
1923
+ );
1924
+ const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
1925
+ const maxOutputTokens = resolveOptionalNumber(
1926
+ maxTokensSource,
1927
+ `${target.name} max output tokens`
1928
+ );
1929
+ return {
1930
+ resourceName,
1931
+ deploymentName,
1932
+ apiKey,
1933
+ version,
1934
+ temperature,
1935
+ maxOutputTokens
1936
+ };
1937
+ }
1938
+ function resolveAnthropicConfig(target, env) {
1939
+ const settings = target.settings ?? {};
1940
+ const apiKeySource = settings.api_key ?? settings.apiKey;
1941
+ const modelSource = settings.model ?? settings.deployment ?? settings.variant;
1942
+ const temperatureSource = settings.temperature;
1943
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1944
+ const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
1945
+ const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
1946
+ const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
1947
+ return {
1948
+ apiKey,
1949
+ model,
1950
+ temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1951
+ maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
1952
+ thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
1953
+ };
1631
1954
  }
1632
- function pathToFileUri(filePath) {
1633
- const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
1634
- const normalizedPath = absolutePath.replace(/\\/g, "/");
1635
- if (/^[a-zA-Z]:\//.test(normalizedPath)) {
1636
- return `file:///${normalizedPath}`;
1637
- }
1638
- return `file://${normalizedPath}`;
1955
+ function resolveGeminiConfig(target, env) {
1956
+ const settings = target.settings ?? {};
1957
+ const apiKeySource = settings.api_key ?? settings.apiKey;
1958
+ const modelSource = settings.model ?? settings.deployment ?? settings.variant;
1959
+ const temperatureSource = settings.temperature;
1960
+ const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
1961
+ const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
1962
+ const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
1963
+ allowLiteral: true,
1964
+ optionalEnv: true
1965
+ }) ?? "gemini-2.5-flash";
1966
+ return {
1967
+ apiKey,
1968
+ model,
1969
+ temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1970
+ maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
1971
+ };
1639
1972
  }
1640
- function normalizeAttachments(attachments) {
1641
- if (!attachments || attachments.length === 0) {
1973
+ function resolveCodexConfig(target, env) {
1974
+ const settings = target.settings ?? {};
1975
+ const executableSource = settings.executable ?? settings.command ?? settings.binary;
1976
+ const argsSource = settings.args ?? settings.arguments;
1977
+ const cwdSource = settings.cwd;
1978
+ const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
1979
+ const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
1980
+ const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
1981
+ const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
1982
+ allowLiteral: true,
1983
+ optionalEnv: true
1984
+ }) ?? "codex";
1985
+ const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
1986
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
1987
+ allowLiteral: true,
1988
+ optionalEnv: true
1989
+ });
1990
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
1991
+ const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
1992
+ allowLiteral: true,
1993
+ optionalEnv: true
1994
+ });
1995
+ const logFormat = normalizeCodexLogFormat(logFormatSource);
1996
+ return {
1997
+ executable,
1998
+ args,
1999
+ cwd,
2000
+ timeoutMs,
2001
+ logDir,
2002
+ logFormat
2003
+ };
2004
+ }
2005
+ function normalizeCodexLogFormat(value) {
2006
+ if (value === void 0 || value === null) {
1642
2007
  return void 0;
1643
2008
  }
1644
- const deduped = /* @__PURE__ */ new Set();
1645
- for (const attachment of attachments) {
1646
- deduped.add(path3.resolve(attachment));
2009
+ if (typeof value !== "string") {
2010
+ throw new Error("codex log format must be 'summary' or 'json'");
1647
2011
  }
1648
- return Array.from(deduped);
1649
- }
1650
- function mergeAttachments(all) {
1651
- const deduped = /* @__PURE__ */ new Set();
1652
- for (const list of all) {
1653
- if (!list) continue;
1654
- for (const inputFile of list) {
1655
- deduped.add(path3.resolve(inputFile));
1656
- }
2012
+ const normalized = value.trim().toLowerCase();
2013
+ if (normalized === "json" || normalized === "summary") {
2014
+ return normalized;
1657
2015
  }
1658
- return deduped.size > 0 ? Array.from(deduped) : void 0;
2016
+ throw new Error("codex log format must be 'summary' or 'json'");
1659
2017
  }
1660
- async function ensureVSCodeSubagents(options) {
1661
- const { kind, count, verbose = false } = options;
1662
- const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
1663
- const subagentRoot = getSubagentRoot(vscodeCmd);
1664
- try {
1665
- if (verbose) {
1666
- console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
1667
- }
1668
- const result = await provisionSubagents({
1669
- targetRoot: subagentRoot,
1670
- subagents: count,
1671
- dryRun: false
1672
- });
1673
- if (verbose) {
1674
- if (result.created.length > 0) {
1675
- console.log(`Created ${result.created.length} new subagent(s)`);
1676
- }
1677
- if (result.skippedExisting.length > 0) {
1678
- console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
1679
- }
1680
- console.log(`
1681
- total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
1682
- }
1683
- return {
1684
- provisioned: true,
1685
- message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
1686
- };
1687
- } catch (error) {
1688
- const errorMessage = error instanceof Error ? error.message : String(error);
1689
- if (verbose) {
1690
- console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
1691
- }
1692
- return {
1693
- provisioned: false,
1694
- message: `Provisioning failed: ${errorMessage}`
1695
- };
1696
- }
2018
+ function resolveMockConfig(target) {
2019
+ const settings = target.settings ?? {};
2020
+ const response = typeof settings.response === "string" ? settings.response : void 0;
2021
+ return { response };
1697
2022
  }
1698
-
1699
- // src/evaluation/providers/codex.ts
1700
- import { exec as execCallback, spawn } from "node:child_process";
1701
- import { constants as constants2 } from "node:fs";
1702
- import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
1703
- import { tmpdir } from "node:os";
1704
- import path5 from "node:path";
1705
- import { promisify as promisify2 } from "node:util";
1706
-
1707
- // src/evaluation/providers/preread.ts
1708
- import path4 from "node:path";
1709
- function buildPromptDocument2(request, inputFiles, options) {
1710
- const parts = [];
1711
- const guidelineFiles = collectGuidelineFiles2(
1712
- inputFiles,
1713
- options?.guidelinePatterns ?? request.guideline_patterns,
1714
- options?.guidelineOverrides
2023
+ function resolveVSCodeConfig(target, env, insiders) {
2024
+ const settings = target.settings ?? {};
2025
+ const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
2026
+ const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
2027
+ allowLiteral: false,
2028
+ optionalEnv: true
2029
+ }) : void 0;
2030
+ const commandSource = settings.vscode_cmd ?? settings.command;
2031
+ const waitSource = settings.wait;
2032
+ const dryRunSource = settings.dry_run ?? settings.dryRun;
2033
+ const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
2034
+ const defaultCommand = insiders ? "code-insiders" : "code";
2035
+ const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
2036
+ return {
2037
+ command,
2038
+ waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
2039
+ dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
2040
+ subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
2041
+ allowLiteral: true,
2042
+ optionalEnv: true
2043
+ }),
2044
+ workspaceTemplate
2045
+ };
2046
+ }
2047
+ function resolveCliConfig(target, env) {
2048
+ const settings = target.settings ?? {};
2049
+ const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
2050
+ const filesFormat = resolveOptionalLiteralString(
2051
+ settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
1715
2052
  );
1716
- const inputFilesList = collectInputFiles(inputFiles);
1717
- const nonGuidelineInputFiles = inputFilesList.filter(
1718
- (file) => !guidelineFiles.includes(file)
2053
+ const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
2054
+ allowLiteral: true,
2055
+ optionalEnv: true
2056
+ });
2057
+ const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
2058
+ const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
2059
+ const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
2060
+ const commandTemplate = resolveString(
2061
+ commandTemplateSource,
2062
+ env,
2063
+ `${target.name} CLI command template`,
2064
+ true
1719
2065
  );
1720
- const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineInputFiles);
1721
- if (prereadBlock.length > 0) {
1722
- parts.push("\n", prereadBlock);
1723
- }
1724
- parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
1725
- return parts.join("\n").trim();
2066
+ assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
2067
+ return {
2068
+ commandTemplate,
2069
+ filesFormat,
2070
+ cwd,
2071
+ env: envOverrides,
2072
+ timeoutMs,
2073
+ healthcheck
2074
+ };
1726
2075
  }
1727
- function normalizeInputFiles2(inputFiles) {
1728
- if (!inputFiles || inputFiles.length === 0) {
2076
+ function resolveEnvOverrides(source, env, targetName) {
2077
+ if (source === void 0 || source === null) {
1729
2078
  return void 0;
1730
2079
  }
1731
- const deduped = /* @__PURE__ */ new Map();
1732
- for (const inputFile of inputFiles) {
1733
- const absolutePath = path4.resolve(inputFile);
1734
- if (!deduped.has(absolutePath)) {
1735
- deduped.set(absolutePath, absolutePath);
1736
- }
1737
- }
1738
- return Array.from(deduped.values());
1739
- }
1740
- function collectGuidelineFiles2(inputFiles, guidelinePatterns, overrides) {
1741
- if (!inputFiles || inputFiles.length === 0) {
1742
- return [];
1743
- }
1744
- const unique = /* @__PURE__ */ new Map();
1745
- for (const inputFile of inputFiles) {
1746
- const absolutePath = path4.resolve(inputFile);
1747
- if (overrides?.has(absolutePath)) {
1748
- if (!unique.has(absolutePath)) {
1749
- unique.set(absolutePath, absolutePath);
1750
- }
1751
- continue;
1752
- }
1753
- const normalized = absolutePath.split(path4.sep).join("/");
1754
- if (isGuidelineFile(normalized, guidelinePatterns)) {
1755
- if (!unique.has(absolutePath)) {
1756
- unique.set(absolutePath, absolutePath);
1757
- }
1758
- }
1759
- }
1760
- return Array.from(unique.values());
1761
- }
1762
- function collectInputFiles(inputFiles) {
1763
- if (!inputFiles || inputFiles.length === 0) {
1764
- return [];
2080
+ if (typeof source !== "object" || Array.isArray(source)) {
2081
+ throw new Error(`${targetName} env overrides must be an object map of strings`);
1765
2082
  }
1766
- const unique = /* @__PURE__ */ new Map();
1767
- for (const inputFile of inputFiles) {
1768
- const absolutePath = path4.resolve(inputFile);
1769
- if (!unique.has(absolutePath)) {
1770
- unique.set(absolutePath, absolutePath);
2083
+ const entries = Object.entries(source);
2084
+ const resolved = {};
2085
+ for (const [key, value] of entries) {
2086
+ if (typeof value !== "string") {
2087
+ throw new Error(`${targetName} env override '${key}' must be a string`);
1771
2088
  }
2089
+ const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
2090
+ resolved[key] = resolvedValue;
1772
2091
  }
1773
- return Array.from(unique.values());
2092
+ return Object.keys(resolved).length > 0 ? resolved : void 0;
1774
2093
  }
1775
- function buildMandatoryPrereadBlock2(guidelineFiles, inputFiles) {
1776
- if (guidelineFiles.length === 0 && inputFiles.length === 0) {
1777
- return "";
1778
- }
1779
- const buildList = (files) => files.map((absolutePath) => {
1780
- const fileName = path4.basename(absolutePath);
1781
- const fileUri = pathToFileUri2(absolutePath);
1782
- return `* [${fileName}](${fileUri})`;
1783
- });
1784
- const sections = [];
1785
- if (guidelineFiles.length > 0) {
1786
- sections.push(`Read all guideline files:
1787
- ${buildList(guidelineFiles).join("\n")}.`);
2094
+ function resolveTimeoutMs(source, description) {
2095
+ const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
2096
+ if (seconds === void 0) {
2097
+ return void 0;
1788
2098
  }
1789
- if (inputFiles.length > 0) {
1790
- sections.push(`Read all input files:
1791
- ${buildList(inputFiles).join("\n")}.`);
2099
+ if (seconds <= 0) {
2100
+ throw new Error(`${description} must be greater than zero seconds`);
1792
2101
  }
1793
- sections.push(
1794
- "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
1795
- "Then apply system_instructions on the user query below."
1796
- );
1797
- return sections.join("\n");
2102
+ return Math.floor(seconds * 1e3);
1798
2103
  }
1799
- function pathToFileUri2(filePath) {
1800
- const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
1801
- const normalizedPath = absolutePath.replace(/\\/g, "/");
1802
- if (/^[a-zA-Z]:\//.test(normalizedPath)) {
1803
- return `file:///${normalizedPath}`;
2104
+ function resolveCliHealthcheck(source, env, targetName) {
2105
+ if (source === void 0 || source === null) {
2106
+ return void 0;
1804
2107
  }
1805
- return `file://${normalizedPath}`;
1806
- }
1807
-
1808
- // src/evaluation/providers/codex.ts
1809
- var execAsync2 = promisify2(execCallback);
1810
- var WORKSPACE_PREFIX = "agentv-codex-";
1811
- var PROMPT_FILENAME = "prompt.md";
1812
- var FILES_DIR = "files";
1813
- var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
1814
- var CodexProvider = class {
1815
- id;
1816
- kind = "codex";
1817
- targetName;
1818
- supportsBatch = false;
1819
- config;
1820
- runCodex;
1821
- environmentCheck;
1822
- resolvedExecutable;
1823
- constructor(targetName, config, runner = defaultCodexRunner) {
1824
- this.id = `codex:${targetName}`;
1825
- this.targetName = targetName;
1826
- this.config = config;
1827
- this.runCodex = runner;
2108
+ if (typeof source !== "object" || Array.isArray(source)) {
2109
+ throw new Error(`${targetName} healthcheck must be an object`);
1828
2110
  }
1829
- async invoke(request) {
1830
- if (request.signal?.aborted) {
1831
- throw new Error("Codex provider request was aborted before execution");
1832
- }
1833
- await this.ensureEnvironmentReady();
1834
- const inputFiles = normalizeInputFiles2(request.inputFiles);
1835
- const originalGuidelines = new Set(
1836
- collectGuidelineFiles2(inputFiles, request.guideline_patterns).map((file) => path5.resolve(file))
2111
+ const candidate = source;
2112
+ const type = candidate.type;
2113
+ const timeoutMs = resolveTimeoutMs(
2114
+ candidate.timeout_seconds ?? candidate.timeoutSeconds,
2115
+ `${targetName} healthcheck timeout`
2116
+ );
2117
+ if (type === "http") {
2118
+ const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
2119
+ return {
2120
+ type: "http",
2121
+ url,
2122
+ timeoutMs
2123
+ };
2124
+ }
2125
+ if (type === "command") {
2126
+ const commandTemplate = resolveString(
2127
+ candidate.command_template ?? candidate.commandTemplate,
2128
+ env,
2129
+ `${targetName} healthcheck command template`,
2130
+ true
1837
2131
  );
1838
- const workspaceRoot = await this.createWorkspace();
1839
- try {
1840
- const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
1841
- inputFiles,
1842
- workspaceRoot,
1843
- originalGuidelines
2132
+ assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
2133
+ const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
2134
+ allowLiteral: true,
2135
+ optionalEnv: true
2136
+ });
2137
+ return {
2138
+ type: "command",
2139
+ commandTemplate,
2140
+ timeoutMs,
2141
+ cwd
2142
+ };
2143
+ }
2144
+ throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
2145
+ }
2146
+ function assertSupportedCliPlaceholders(template, description) {
2147
+ const placeholders = extractCliPlaceholders(template);
2148
+ for (const placeholder of placeholders) {
2149
+ if (!CLI_PLACEHOLDERS.has(placeholder)) {
2150
+ throw new Error(
2151
+ `${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
1844
2152
  );
1845
- const promptContent = buildPromptDocument2(request, mirroredInputFiles, {
1846
- guidelinePatterns: request.guideline_patterns,
1847
- guidelineOverrides: guidelineMirrors
1848
- });
1849
- const promptFile = path5.join(workspaceRoot, PROMPT_FILENAME);
1850
- await writeFile(promptFile, promptContent, "utf8");
1851
- const args = this.buildCodexArgs();
1852
- const cwd = this.resolveCwd(workspaceRoot);
1853
- const result = await this.executeCodex(args, cwd, promptContent, request.signal);
1854
- if (result.timedOut) {
1855
- throw new Error(
1856
- `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
1857
- );
1858
- }
1859
- if (result.exitCode !== 0) {
1860
- const detail = pickDetail(result.stderr, result.stdout);
1861
- const prefix = `Codex CLI exited with code ${result.exitCode}`;
1862
- throw new Error(detail ? `${prefix}: ${detail}` : prefix);
1863
- }
1864
- const parsed = parseCodexJson(result.stdout);
1865
- const assistantText = extractAssistantText(parsed);
1866
- return {
1867
- text: assistantText,
1868
- raw: {
1869
- response: parsed,
1870
- stdout: result.stdout,
1871
- stderr: result.stderr,
1872
- exitCode: result.exitCode,
1873
- args,
1874
- executable: this.resolvedExecutable ?? this.config.executable,
1875
- promptFile,
1876
- workspace: workspaceRoot,
1877
- inputFiles: mirroredInputFiles
1878
- }
1879
- };
1880
- } finally {
1881
- await this.cleanupWorkspace(workspaceRoot);
1882
2153
  }
1883
2154
  }
1884
- async ensureEnvironmentReady() {
1885
- if (!this.environmentCheck) {
1886
- this.environmentCheck = this.validateEnvironment();
2155
+ }
2156
+ function extractCliPlaceholders(template) {
2157
+ const matches = template.matchAll(/\{([A-Z_]+)\}/g);
2158
+ const results = [];
2159
+ for (const match of matches) {
2160
+ if (match[1]) {
2161
+ results.push(match[1]);
1887
2162
  }
1888
- await this.environmentCheck;
1889
2163
  }
1890
- async validateEnvironment() {
1891
- this.resolvedExecutable = await locateExecutable(this.config.executable);
2164
+ return results;
2165
+ }
2166
+ function resolveString(source, env, description, allowLiteral = false) {
2167
+ const value = resolveOptionalString(source, env, description, {
2168
+ allowLiteral,
2169
+ optionalEnv: false
2170
+ });
2171
+ if (value === void 0) {
2172
+ throw new Error(`${description} is required`);
1892
2173
  }
1893
- resolveCwd(workspaceRoot) {
1894
- if (!this.config.cwd) {
1895
- return workspaceRoot;
1896
- }
1897
- return path5.resolve(this.config.cwd);
2174
+ return value;
2175
+ }
2176
+ function resolveOptionalString(source, env, description, options) {
2177
+ if (source === void 0 || source === null) {
2178
+ return void 0;
1898
2179
  }
1899
- buildCodexArgs() {
1900
- const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
1901
- if (this.config.args && this.config.args.length > 0) {
1902
- args.push(...this.config.args);
1903
- }
1904
- args.push("-");
1905
- return args;
2180
+ if (typeof source !== "string") {
2181
+ throw new Error(`${description} must be a string`);
1906
2182
  }
1907
- async executeCodex(args, cwd, promptContent, signal) {
1908
- try {
1909
- return await this.runCodex({
1910
- executable: this.resolvedExecutable ?? this.config.executable,
1911
- args,
1912
- cwd,
1913
- prompt: promptContent,
1914
- timeoutMs: this.config.timeoutMs,
1915
- env: process.env,
1916
- signal
1917
- });
1918
- } catch (error) {
1919
- const err = error;
1920
- if (err.code === "ENOENT") {
1921
- throw new Error(
1922
- `Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
1923
- );
1924
- }
1925
- throw error;
1926
- }
2183
+ const trimmed = source.trim();
2184
+ if (trimmed.length === 0) {
2185
+ return void 0;
1927
2186
  }
1928
- async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
1929
- if (!inputFiles || inputFiles.length === 0) {
1930
- return {
1931
- mirroredInputFiles: void 0,
1932
- guidelineMirrors: /* @__PURE__ */ new Set()
1933
- };
1934
- }
1935
- const filesRoot = path5.join(workspaceRoot, FILES_DIR);
1936
- await mkdir(filesRoot, { recursive: true });
1937
- const mirrored = [];
1938
- const guidelineMirrors = /* @__PURE__ */ new Set();
1939
- const nameCounts = /* @__PURE__ */ new Map();
1940
- for (const inputFile of inputFiles) {
1941
- const absoluteSource = path5.resolve(inputFile);
1942
- const baseName = path5.basename(absoluteSource);
1943
- const count = nameCounts.get(baseName) ?? 0;
1944
- nameCounts.set(baseName, count + 1);
1945
- const finalName = count === 0 ? baseName : `${baseName}.${count}`;
1946
- const destination = path5.join(filesRoot, finalName);
1947
- await copyFile(absoluteSource, destination);
1948
- const resolvedDestination = path5.resolve(destination);
1949
- mirrored.push(resolvedDestination);
1950
- if (guidelineOriginals.has(absoluteSource)) {
1951
- guidelineMirrors.add(resolvedDestination);
1952
- }
2187
+ const envValue = env[trimmed];
2188
+ if (envValue !== void 0) {
2189
+ if (envValue.trim().length === 0) {
2190
+ throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
1953
2191
  }
1954
- return {
1955
- mirroredInputFiles: mirrored,
1956
- guidelineMirrors
1957
- };
1958
- }
1959
- async createWorkspace() {
1960
- return await mkdtemp(path5.join(tmpdir(), WORKSPACE_PREFIX));
2192
+ return envValue;
1961
2193
  }
1962
- async cleanupWorkspace(workspaceRoot) {
1963
- try {
1964
- await rm(workspaceRoot, { recursive: true, force: true });
1965
- } catch {
2194
+ const allowLiteral = options?.allowLiteral ?? false;
2195
+ const optionalEnv = options?.optionalEnv ?? false;
2196
+ const looksLikeEnv = isLikelyEnvReference(trimmed);
2197
+ if (looksLikeEnv) {
2198
+ if (optionalEnv) {
2199
+ return void 0;
2200
+ }
2201
+ if (!allowLiteral) {
2202
+ throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
1966
2203
  }
1967
2204
  }
1968
- };
1969
- async function locateExecutable(candidate) {
1970
- const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
1971
- if (includesPathSeparator) {
1972
- const resolved = path5.isAbsolute(candidate) ? candidate : path5.resolve(candidate);
1973
- const executablePath = await ensureWindowsExecutableVariant(resolved);
1974
- await access2(executablePath, constants2.F_OK);
1975
- return executablePath;
2205
+ return trimmed;
2206
+ }
2207
+ function resolveOptionalLiteralString(source) {
2208
+ if (source === void 0 || source === null) {
2209
+ return void 0;
1976
2210
  }
1977
- const locator = process.platform === "win32" ? "where" : "which";
1978
- try {
1979
- const { stdout } = await execAsync2(`${locator} ${candidate}`);
1980
- const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
1981
- const preferred = selectExecutableCandidate(lines);
1982
- if (preferred) {
1983
- const executablePath = await ensureWindowsExecutableVariant(preferred);
1984
- await access2(executablePath, constants2.F_OK);
1985
- return executablePath;
1986
- }
1987
- } catch {
2211
+ if (typeof source !== "string") {
2212
+ throw new Error("expected string value");
1988
2213
  }
1989
- throw new Error(`Codex executable '${candidate}' was not found on PATH`);
2214
+ const trimmed = source.trim();
2215
+ return trimmed.length > 0 ? trimmed : void 0;
1990
2216
  }
1991
- function selectExecutableCandidate(candidates) {
1992
- if (candidates.length === 0) {
2217
+ function resolveOptionalNumber(source, description) {
2218
+ if (source === void 0 || source === null || source === "") {
1993
2219
  return void 0;
1994
2220
  }
1995
- if (process.platform !== "win32") {
1996
- return candidates[0];
2221
+ if (typeof source === "number") {
2222
+ return Number.isFinite(source) ? source : void 0;
1997
2223
  }
1998
- const extensions = getWindowsExecutableExtensions();
1999
- for (const ext of extensions) {
2000
- const match = candidates.find((candidate) => candidate.toLowerCase().endsWith(ext));
2001
- if (match) {
2002
- return match;
2224
+ if (typeof source === "string") {
2225
+ const numeric = Number(source);
2226
+ if (Number.isFinite(numeric)) {
2227
+ return numeric;
2003
2228
  }
2004
2229
  }
2005
- return candidates[0];
2230
+ throw new Error(`${description} must be a number`);
2006
2231
  }
2007
- async function ensureWindowsExecutableVariant(candidate) {
2008
- if (process.platform !== "win32") {
2009
- return candidate;
2232
+ function resolveOptionalBoolean(source) {
2233
+ if (source === void 0 || source === null || source === "") {
2234
+ return void 0;
2010
2235
  }
2011
- if (hasExecutableExtension(candidate)) {
2012
- return candidate;
2236
+ if (typeof source === "boolean") {
2237
+ return source;
2013
2238
  }
2014
- const extensions = getWindowsExecutableExtensions();
2015
- for (const ext of extensions) {
2016
- const withExtension = `${candidate}${ext}`;
2017
- try {
2018
- await access2(withExtension, constants2.F_OK);
2019
- return withExtension;
2020
- } catch {
2239
+ if (typeof source === "string") {
2240
+ const lowered = source.trim().toLowerCase();
2241
+ if (lowered === "true" || lowered === "1") {
2242
+ return true;
2243
+ }
2244
+ if (lowered === "false" || lowered === "0") {
2245
+ return false;
2021
2246
  }
2022
2247
  }
2023
- return candidate;
2248
+ throw new Error("expected boolean value");
2024
2249
  }
2025
- function hasExecutableExtension(candidate) {
2026
- const lower = candidate.toLowerCase();
2027
- return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
2250
+ function isLikelyEnvReference(value) {
2251
+ return /^[A-Z0-9_]+$/.test(value);
2028
2252
  }
2029
- var DEFAULT_WINDOWS_EXTENSIONS = [".com", ".exe", ".bat", ".cmd", ".ps1"];
2030
- function getWindowsExecutableExtensions() {
2031
- if (process.platform !== "win32") {
2032
- return [];
2253
+ function resolveOptionalStringArray(source, env, description) {
2254
+ if (source === void 0 || source === null) {
2255
+ return void 0;
2033
2256
  }
2034
- const fromEnv = process.env.PATHEXT?.split(";").map((ext) => ext.trim().toLowerCase()).filter((ext) => ext.length > 0);
2035
- return fromEnv && fromEnv.length > 0 ? fromEnv : DEFAULT_WINDOWS_EXTENSIONS;
2036
- }
2037
- function parseCodexJson(output) {
2038
- const trimmed = output.trim();
2039
- if (trimmed.length === 0) {
2040
- throw new Error("Codex CLI produced no output in --json mode");
2257
+ if (!Array.isArray(source)) {
2258
+ throw new Error(`${description} must be an array of strings`);
2041
2259
  }
2042
- try {
2043
- return JSON.parse(trimmed);
2044
- } catch {
2045
- const lineObjects = parseJsonLines(trimmed);
2046
- if (lineObjects) {
2047
- return lineObjects;
2260
+ if (source.length === 0) {
2261
+ return void 0;
2262
+ }
2263
+ const resolved = [];
2264
+ for (let i = 0; i < source.length; i++) {
2265
+ const item = source[i];
2266
+ if (typeof item !== "string") {
2267
+ throw new Error(`${description}[${i}] must be a string`);
2048
2268
  }
2049
- const lastBrace = trimmed.lastIndexOf("{");
2050
- if (lastBrace >= 0) {
2051
- const candidate = trimmed.slice(lastBrace);
2052
- try {
2053
- return JSON.parse(candidate);
2054
- } catch {
2269
+ const trimmed = item.trim();
2270
+ if (trimmed.length === 0) {
2271
+ throw new Error(`${description}[${i}] cannot be empty`);
2272
+ }
2273
+ const envValue = env[trimmed];
2274
+ if (envValue !== void 0) {
2275
+ if (envValue.trim().length === 0) {
2276
+ throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
2055
2277
  }
2278
+ resolved.push(envValue);
2279
+ } else {
2280
+ resolved.push(trimmed);
2056
2281
  }
2057
- const preview = trimmed.slice(0, 200);
2058
- throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
2059
2282
  }
2283
+ return resolved.length > 0 ? resolved : void 0;
2060
2284
  }
2061
- function extractAssistantText(parsed) {
2062
- if (Array.isArray(parsed)) {
2063
- const text = extractFromEventStream(parsed);
2064
- if (text) {
2065
- return text;
2066
- }
2067
- }
2068
- if (!parsed || typeof parsed !== "object") {
2069
- throw new Error("Codex CLI JSON response did not include an assistant message");
2070
- }
2071
- const record = parsed;
2072
- const eventText = extractFromEvent(record);
2073
- if (eventText) {
2074
- return eventText;
2285
+
2286
+ // src/evaluation/providers/vscode.ts
2287
+ import { readFile as readFile2 } from "node:fs/promises";
2288
+ import path5 from "node:path";
2289
+ import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
2290
+ var VSCodeProvider = class {
2291
+ id;
2292
+ kind;
2293
+ targetName;
2294
+ supportsBatch = true;
2295
+ config;
2296
+ constructor(targetName, config, kind) {
2297
+ this.id = `${kind}:${targetName}`;
2298
+ this.kind = kind;
2299
+ this.targetName = targetName;
2300
+ this.config = config;
2075
2301
  }
2076
- const messages = Array.isArray(record.messages) ? record.messages : void 0;
2077
- if (messages) {
2078
- for (let index = messages.length - 1; index >= 0; index -= 1) {
2079
- const entry = messages[index];
2080
- if (!entry || typeof entry !== "object") {
2081
- continue;
2082
- }
2083
- const role = entry.role;
2084
- if (role !== "assistant") {
2085
- continue;
2086
- }
2087
- const content = entry.content;
2088
- const flattened = flattenContent(content);
2089
- if (flattened) {
2090
- return flattened;
2091
- }
2302
+ async invoke(request) {
2303
+ if (request.signal?.aborted) {
2304
+ throw new Error("VS Code provider request was aborted before dispatch");
2305
+ }
2306
+ const inputFiles = normalizeAttachments(request.inputFiles);
2307
+ const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
2308
+ const session = await dispatchAgentSession({
2309
+ userQuery: promptContent,
2310
+ extraAttachments: inputFiles,
2311
+ wait: this.config.waitForResponse,
2312
+ dryRun: this.config.dryRun,
2313
+ vscodeCmd: this.config.command,
2314
+ subagentRoot: this.config.subagentRoot,
2315
+ workspaceTemplate: this.config.workspaceTemplate,
2316
+ silent: true
2317
+ });
2318
+ if (session.exitCode !== 0 || !session.responseFile) {
2319
+ const failure = session.error ?? "VS Code subagent did not produce a response";
2320
+ throw new Error(failure);
2321
+ }
2322
+ if (this.config.dryRun) {
2323
+ return {
2324
+ text: "",
2325
+ raw: {
2326
+ session,
2327
+ inputFiles
2328
+ }
2329
+ };
2092
2330
  }
2331
+ const responseText = await readFile2(session.responseFile, "utf8");
2332
+ return {
2333
+ text: responseText,
2334
+ raw: {
2335
+ session,
2336
+ inputFiles
2337
+ }
2338
+ };
2093
2339
  }
2094
- const response = record.response;
2095
- if (response && typeof response === "object") {
2096
- const content = response.content;
2097
- const flattened = flattenContent(content);
2098
- if (flattened) {
2099
- return flattened;
2340
+ async invokeBatch(requests) {
2341
+ if (requests.length === 0) {
2342
+ return [];
2343
+ }
2344
+ const normalizedRequests = requests.map((req) => ({
2345
+ request: req,
2346
+ inputFiles: normalizeAttachments(req.inputFiles)
2347
+ }));
2348
+ const combinedInputFiles = mergeAttachments(
2349
+ normalizedRequests.map(({ inputFiles }) => inputFiles)
2350
+ );
2351
+ const userQueries = normalizedRequests.map(
2352
+ ({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
2353
+ );
2354
+ const session = await dispatchBatchAgent({
2355
+ userQueries,
2356
+ extraAttachments: combinedInputFiles,
2357
+ wait: this.config.waitForResponse,
2358
+ dryRun: this.config.dryRun,
2359
+ vscodeCmd: this.config.command,
2360
+ subagentRoot: this.config.subagentRoot,
2361
+ workspaceTemplate: this.config.workspaceTemplate,
2362
+ silent: true
2363
+ });
2364
+ if (session.exitCode !== 0 || !session.responseFiles) {
2365
+ const failure = session.error ?? "VS Code subagent did not produce batch responses";
2366
+ throw new Error(failure);
2367
+ }
2368
+ if (this.config.dryRun) {
2369
+ return normalizedRequests.map(({ inputFiles }) => ({
2370
+ text: "",
2371
+ raw: {
2372
+ session,
2373
+ inputFiles,
2374
+ allInputFiles: combinedInputFiles
2375
+ }
2376
+ }));
2377
+ }
2378
+ if (session.responseFiles.length !== requests.length) {
2379
+ throw new Error(
2380
+ `VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
2381
+ );
2100
2382
  }
2101
- }
2102
- const output = record.output;
2103
- const flattenedOutput = flattenContent(output);
2104
- if (flattenedOutput) {
2105
- return flattenedOutput;
2106
- }
2107
- throw new Error("Codex CLI JSON response did not include an assistant message");
2108
- }
2109
- function extractFromEventStream(events) {
2110
- for (let index = events.length - 1; index >= 0; index -= 1) {
2111
- const candidate = events[index];
2112
- const text = extractFromEvent(candidate);
2113
- if (text) {
2114
- return text;
2383
+ const responses = [];
2384
+ for (const [index, responseFile] of session.responseFiles.entries()) {
2385
+ const responseText = await readFile2(responseFile, "utf8");
2386
+ responses.push({
2387
+ text: responseText,
2388
+ raw: {
2389
+ session,
2390
+ inputFiles: normalizedRequests[index]?.inputFiles,
2391
+ allInputFiles: combinedInputFiles,
2392
+ responseFile
2393
+ }
2394
+ });
2115
2395
  }
2396
+ return responses;
2116
2397
  }
2117
- return void 0;
2398
+ };
2399
+ function buildPromptDocument2(request, attachments, guidelinePatterns) {
2400
+ const parts = [];
2401
+ const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
2402
+ const attachmentFiles = collectAttachmentFiles(attachments);
2403
+ const nonGuidelineAttachments = attachmentFiles.filter(
2404
+ (file) => !guidelineFiles.includes(file)
2405
+ );
2406
+ const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
2407
+ if (prereadBlock.length > 0) {
2408
+ parts.push("\n", prereadBlock);
2409
+ }
2410
+ parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
2411
+ return parts.join("\n").trim();
2118
2412
  }
2119
- function extractFromEvent(event) {
2120
- if (!event || typeof event !== "object") {
2121
- return void 0;
2413
+ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
2414
+ if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
2415
+ return "";
2122
2416
  }
2123
- const record = event;
2124
- const type = typeof record.type === "string" ? record.type : void 0;
2125
- if (type === JSONL_TYPE_ITEM_COMPLETED) {
2126
- const item = record.item;
2127
- const text = extractFromItem(item);
2128
- if (text) {
2129
- return text;
2130
- }
2417
+ const buildList = (files) => files.map((absolutePath) => {
2418
+ const fileName = path5.basename(absolutePath);
2419
+ const fileUri = pathToFileUri2(absolutePath);
2420
+ return `* [${fileName}](${fileUri})`;
2421
+ });
2422
+ const sections = [];
2423
+ if (guidelineFiles.length > 0) {
2424
+ sections.push(`Read all guideline files:
2425
+ ${buildList(guidelineFiles).join("\n")}.`);
2131
2426
  }
2132
- const output = record.output ?? record.content;
2133
- const flattened = flattenContent(output);
2134
- if (flattened) {
2135
- return flattened;
2427
+ if (attachmentFiles.length > 0) {
2428
+ sections.push(`Read all attachment files:
2429
+ ${buildList(attachmentFiles).join("\n")}.`);
2136
2430
  }
2137
- return void 0;
2431
+ sections.push(
2432
+ "If any file is missing, fail with ERROR: missing-file <filename> and stop.",
2433
+ "Then apply system_instructions on the user query below."
2434
+ );
2435
+ return sections.join("\n");
2138
2436
  }
2139
- function extractFromItem(item) {
2140
- if (!item || typeof item !== "object") {
2141
- return void 0;
2437
+ function collectGuidelineFiles2(attachments, guidelinePatterns) {
2438
+ if (!attachments || attachments.length === 0) {
2439
+ return [];
2142
2440
  }
2143
- const record = item;
2144
- const itemType = typeof record.type === "string" ? record.type : void 0;
2145
- if (itemType === "agent_message" || itemType === "response" || itemType === "output") {
2146
- const text = flattenContent(record.text ?? record.content ?? record.output);
2147
- if (text) {
2148
- return text;
2441
+ const unique = /* @__PURE__ */ new Map();
2442
+ for (const attachment of attachments) {
2443
+ const absolutePath = path5.resolve(attachment);
2444
+ const normalized = absolutePath.split(path5.sep).join("/");
2445
+ if (isGuidelineFile(normalized, guidelinePatterns)) {
2446
+ if (!unique.has(absolutePath)) {
2447
+ unique.set(absolutePath, absolutePath);
2448
+ }
2149
2449
  }
2150
2450
  }
2151
- return void 0;
2451
+ return Array.from(unique.values());
2152
2452
  }
2153
- function flattenContent(value) {
2154
- if (typeof value === "string") {
2155
- return value;
2453
+ function collectAttachmentFiles(attachments) {
2454
+ if (!attachments || attachments.length === 0) {
2455
+ return [];
2156
2456
  }
2157
- if (Array.isArray(value)) {
2158
- const parts = value.map((segment) => {
2159
- if (typeof segment === "string") {
2160
- return segment;
2161
- }
2162
- if (segment && typeof segment === "object" && "text" in segment) {
2163
- const text = segment.text;
2164
- return typeof text === "string" ? text : void 0;
2165
- }
2166
- return void 0;
2167
- }).filter((part) => typeof part === "string" && part.length > 0);
2168
- return parts.length > 0 ? parts.join(" \n") : void 0;
2457
+ const unique = /* @__PURE__ */ new Map();
2458
+ for (const attachment of attachments) {
2459
+ const absolutePath = path5.resolve(attachment);
2460
+ if (!unique.has(absolutePath)) {
2461
+ unique.set(absolutePath, absolutePath);
2462
+ }
2169
2463
  }
2170
- if (value && typeof value === "object" && "text" in value) {
2171
- const text = value.text;
2172
- return typeof text === "string" ? text : void 0;
2464
+ return Array.from(unique.values());
2465
+ }
2466
+ function pathToFileUri2(filePath) {
2467
+ const absolutePath = path5.isAbsolute(filePath) ? filePath : path5.resolve(filePath);
2468
+ const normalizedPath = absolutePath.replace(/\\/g, "/");
2469
+ if (/^[a-zA-Z]:\//.test(normalizedPath)) {
2470
+ return `file:///${normalizedPath}`;
2173
2471
  }
2174
- return void 0;
2472
+ return `file://${normalizedPath}`;
2175
2473
  }
2176
- function parseJsonLines(output) {
2177
- const lines = output.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
2178
- if (lines.length <= 1) {
2474
+ function normalizeAttachments(attachments) {
2475
+ if (!attachments || attachments.length === 0) {
2179
2476
  return void 0;
2180
2477
  }
2181
- const parsed = [];
2182
- for (const line of lines) {
2183
- try {
2184
- parsed.push(JSON.parse(line));
2185
- } catch {
2186
- return void 0;
2187
- }
2188
- }
2189
- return parsed;
2190
- }
2191
- function pickDetail(stderr, stdout) {
2192
- const errorText = stderr.trim();
2193
- if (errorText.length > 0) {
2194
- return errorText;
2478
+ const deduped = /* @__PURE__ */ new Set();
2479
+ for (const attachment of attachments) {
2480
+ deduped.add(path5.resolve(attachment));
2195
2481
  }
2196
- const stdoutText = stdout.trim();
2197
- return stdoutText.length > 0 ? stdoutText : void 0;
2482
+ return Array.from(deduped);
2198
2483
  }
2199
- function formatTimeoutSuffix2(timeoutMs) {
2200
- if (!timeoutMs || timeoutMs <= 0) {
2201
- return "";
2484
+ function mergeAttachments(all) {
2485
+ const deduped = /* @__PURE__ */ new Set();
2486
+ for (const list of all) {
2487
+ if (!list) continue;
2488
+ for (const inputFile of list) {
2489
+ deduped.add(path5.resolve(inputFile));
2490
+ }
2202
2491
  }
2203
- const seconds = Math.ceil(timeoutMs / 1e3);
2204
- return ` after ${seconds}s`;
2492
+ return deduped.size > 0 ? Array.from(deduped) : void 0;
2205
2493
  }
2206
- async function defaultCodexRunner(options) {
2207
- return await new Promise((resolve, reject) => {
2208
- const child = spawn(options.executable, options.args, {
2209
- cwd: options.cwd,
2210
- env: options.env,
2211
- stdio: ["pipe", "pipe", "pipe"],
2212
- shell: shouldShellExecute(options.executable)
2213
- });
2214
- let stdout = "";
2215
- let stderr = "";
2216
- let timedOut = false;
2217
- const onAbort = () => {
2218
- child.kill("SIGTERM");
2219
- };
2220
- if (options.signal) {
2221
- if (options.signal.aborted) {
2222
- onAbort();
2223
- } else {
2224
- options.signal.addEventListener("abort", onAbort, { once: true });
2225
- }
2226
- }
2227
- let timeoutHandle;
2228
- if (options.timeoutMs && options.timeoutMs > 0) {
2229
- timeoutHandle = setTimeout(() => {
2230
- timedOut = true;
2231
- child.kill("SIGTERM");
2232
- }, options.timeoutMs);
2233
- timeoutHandle.unref?.();
2494
+ async function ensureVSCodeSubagents(options) {
2495
+ const { kind, count, verbose = false } = options;
2496
+ const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
2497
+ const subagentRoot = getSubagentRoot(vscodeCmd);
2498
+ try {
2499
+ if (verbose) {
2500
+ console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
2234
2501
  }
2235
- child.stdout.setEncoding("utf8");
2236
- child.stdout.on("data", (chunk) => {
2237
- stdout += chunk;
2238
- });
2239
- child.stderr.setEncoding("utf8");
2240
- child.stderr.on("data", (chunk) => {
2241
- stderr += chunk;
2502
+ const result = await provisionSubagents({
2503
+ targetRoot: subagentRoot,
2504
+ subagents: count,
2505
+ dryRun: false
2242
2506
  });
2243
- child.stdin.end(options.prompt);
2244
- const cleanup = () => {
2245
- if (timeoutHandle) {
2246
- clearTimeout(timeoutHandle);
2507
+ if (verbose) {
2508
+ if (result.created.length > 0) {
2509
+ console.log(`Created ${result.created.length} new subagent(s)`);
2247
2510
  }
2248
- if (options.signal) {
2249
- options.signal.removeEventListener("abort", onAbort);
2511
+ if (result.skippedExisting.length > 0) {
2512
+ console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
2250
2513
  }
2514
+ console.log(`
2515
+ total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
2516
+ }
2517
+ return {
2518
+ provisioned: true,
2519
+ message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
2520
+ };
2521
+ } catch (error) {
2522
+ const errorMessage = error instanceof Error ? error.message : String(error);
2523
+ if (verbose) {
2524
+ console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
2525
+ }
2526
+ return {
2527
+ provisioned: false,
2528
+ message: `Provisioning failed: ${errorMessage}`
2251
2529
  };
2252
- child.on("error", (error) => {
2253
- cleanup();
2254
- reject(error);
2255
- });
2256
- child.on("close", (code) => {
2257
- cleanup();
2258
- resolve({
2259
- stdout,
2260
- stderr,
2261
- exitCode: typeof code === "number" ? code : -1,
2262
- timedOut
2263
- });
2264
- });
2265
- });
2266
- }
2267
- function shouldShellExecute(executable) {
2268
- if (process.platform !== "win32") {
2269
- return false;
2270
2530
  }
2271
- const lower = executable.toLowerCase();
2272
- return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
2273
2531
  }
2274
2532
 
2275
2533
  // src/evaluation/providers/targets-file.ts
@@ -2386,7 +2644,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
2386
2644
  }
2387
2645
 
2388
2646
  // src/evaluation/evaluators.ts
2389
- import { randomUUID } from "node:crypto";
2647
+ import { randomUUID as randomUUID2 } from "node:crypto";
2390
2648
  var LlmJudgeEvaluator = class {
2391
2649
  kind = "llm_judge";
2392
2650
  resolveJudgeProvider;
@@ -2424,7 +2682,7 @@ var LlmJudgeEvaluator = class {
2424
2682
  const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
2425
2683
  const reasoning = parsed.reasoning ?? response.reasoning;
2426
2684
  const evaluatorRawRequest = {
2427
- id: randomUUID(),
2685
+ id: randomUUID2(),
2428
2686
  provider: judgeProvider.id,
2429
2687
  prompt,
2430
2688
  target: context.target.name,
@@ -2663,7 +2921,7 @@ function parseJsonSafe(payload) {
2663
2921
  }
2664
2922
 
2665
2923
  // src/evaluation/orchestrator.ts
2666
- import { createHash, randomUUID as randomUUID2 } from "node:crypto";
2924
+ import { createHash, randomUUID as randomUUID3 } from "node:crypto";
2667
2925
  import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
2668
2926
  import path7 from "node:path";
2669
2927
 
@@ -3436,7 +3694,7 @@ function sanitizeFilename(value) {
3436
3694
  return "prompt";
3437
3695
  }
3438
3696
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
3439
- return sanitized.length > 0 ? sanitized : randomUUID2();
3697
+ return sanitized.length > 0 ? sanitized : randomUUID3();
3440
3698
  }
3441
3699
  async function invokeProvider(provider, options) {
3442
3700
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;