@agentv/core 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1401 -1143
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +1403 -1145
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -953,1323 +953,1581 @@ function formatTimeoutSuffix(timeoutMs) {
|
|
|
953
953
|
return ` after ${seconds}s`;
|
|
954
954
|
}
|
|
955
955
|
|
|
956
|
-
// src/evaluation/providers/
|
|
957
|
-
|
|
958
|
-
|
|
956
|
+
// src/evaluation/providers/codex.ts
|
|
957
|
+
import { exec as execCallback, spawn } from "node:child_process";
|
|
958
|
+
import { randomUUID } from "node:crypto";
|
|
959
|
+
import { constants as constants2, createWriteStream } from "node:fs";
|
|
960
|
+
import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
|
|
961
|
+
import { tmpdir } from "node:os";
|
|
962
|
+
import path4 from "node:path";
|
|
963
|
+
import { promisify as promisify2 } from "node:util";
|
|
964
|
+
|
|
965
|
+
// src/evaluation/providers/preread.ts
|
|
966
|
+
import path3 from "node:path";
|
|
967
|
+
function buildPromptDocument(request, inputFiles, options) {
|
|
968
|
+
const parts = [];
|
|
969
|
+
const guidelineFiles = collectGuidelineFiles(
|
|
970
|
+
inputFiles,
|
|
971
|
+
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
972
|
+
options?.guidelineOverrides
|
|
973
|
+
);
|
|
974
|
+
const inputFilesList = collectInputFiles(inputFiles);
|
|
975
|
+
const nonGuidelineInputFiles = inputFilesList.filter(
|
|
976
|
+
(file) => !guidelineFiles.includes(file)
|
|
977
|
+
);
|
|
978
|
+
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
979
|
+
if (prereadBlock.length > 0) {
|
|
980
|
+
parts.push("\n", prereadBlock);
|
|
981
|
+
}
|
|
982
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
983
|
+
return parts.join("\n").trim();
|
|
984
|
+
}
|
|
985
|
+
function normalizeInputFiles2(inputFiles) {
|
|
986
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
987
|
+
return void 0;
|
|
988
|
+
}
|
|
989
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
990
|
+
for (const inputFile of inputFiles) {
|
|
991
|
+
const absolutePath = path3.resolve(inputFile);
|
|
992
|
+
if (!deduped.has(absolutePath)) {
|
|
993
|
+
deduped.set(absolutePath, absolutePath);
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
return Array.from(deduped.values());
|
|
997
|
+
}
|
|
998
|
+
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
999
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1000
|
+
return [];
|
|
1001
|
+
}
|
|
1002
|
+
const unique = /* @__PURE__ */ new Map();
|
|
1003
|
+
for (const inputFile of inputFiles) {
|
|
1004
|
+
const absolutePath = path3.resolve(inputFile);
|
|
1005
|
+
if (overrides?.has(absolutePath)) {
|
|
1006
|
+
if (!unique.has(absolutePath)) {
|
|
1007
|
+
unique.set(absolutePath, absolutePath);
|
|
1008
|
+
}
|
|
1009
|
+
continue;
|
|
1010
|
+
}
|
|
1011
|
+
const normalized = absolutePath.split(path3.sep).join("/");
|
|
1012
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
1013
|
+
if (!unique.has(absolutePath)) {
|
|
1014
|
+
unique.set(absolutePath, absolutePath);
|
|
1015
|
+
}
|
|
1016
|
+
}
|
|
1017
|
+
}
|
|
1018
|
+
return Array.from(unique.values());
|
|
1019
|
+
}
|
|
1020
|
+
function collectInputFiles(inputFiles) {
|
|
1021
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1022
|
+
return [];
|
|
1023
|
+
}
|
|
1024
|
+
const unique = /* @__PURE__ */ new Map();
|
|
1025
|
+
for (const inputFile of inputFiles) {
|
|
1026
|
+
const absolutePath = path3.resolve(inputFile);
|
|
1027
|
+
if (!unique.has(absolutePath)) {
|
|
1028
|
+
unique.set(absolutePath, absolutePath);
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
return Array.from(unique.values());
|
|
1032
|
+
}
|
|
1033
|
+
function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
1034
|
+
if (guidelineFiles.length === 0 && inputFiles.length === 0) {
|
|
1035
|
+
return "";
|
|
1036
|
+
}
|
|
1037
|
+
const buildList = (files) => files.map((absolutePath) => {
|
|
1038
|
+
const fileName = path3.basename(absolutePath);
|
|
1039
|
+
const fileUri = pathToFileUri(absolutePath);
|
|
1040
|
+
return `* [${fileName}](${fileUri})`;
|
|
1041
|
+
});
|
|
1042
|
+
const sections = [];
|
|
1043
|
+
if (guidelineFiles.length > 0) {
|
|
1044
|
+
sections.push(`Read all guideline files:
|
|
1045
|
+
${buildList(guidelineFiles).join("\n")}.`);
|
|
1046
|
+
}
|
|
1047
|
+
if (inputFiles.length > 0) {
|
|
1048
|
+
sections.push(`Read all input files:
|
|
1049
|
+
${buildList(inputFiles).join("\n")}.`);
|
|
1050
|
+
}
|
|
1051
|
+
sections.push(
|
|
1052
|
+
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
1053
|
+
"Then apply system_instructions on the user query below."
|
|
1054
|
+
);
|
|
1055
|
+
return sections.join("\n");
|
|
1056
|
+
}
|
|
1057
|
+
function pathToFileUri(filePath) {
|
|
1058
|
+
const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
|
|
1059
|
+
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
1060
|
+
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
1061
|
+
return `file:///${normalizedPath}`;
|
|
1062
|
+
}
|
|
1063
|
+
return `file://${normalizedPath}`;
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
// src/evaluation/providers/codex.ts
|
|
1067
|
+
var execAsync2 = promisify2(execCallback);
|
|
1068
|
+
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1069
|
+
var PROMPT_FILENAME = "prompt.md";
|
|
1070
|
+
var FILES_DIR = "files";
|
|
1071
|
+
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1072
|
+
var CodexProvider = class {
|
|
959
1073
|
id;
|
|
960
|
-
kind = "
|
|
1074
|
+
kind = "codex";
|
|
961
1075
|
targetName;
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
1076
|
+
supportsBatch = false;
|
|
1077
|
+
config;
|
|
1078
|
+
runCodex;
|
|
1079
|
+
environmentCheck;
|
|
1080
|
+
resolvedExecutable;
|
|
1081
|
+
constructor(targetName, config, runner = defaultCodexRunner) {
|
|
1082
|
+
this.id = `codex:${targetName}`;
|
|
968
1083
|
this.targetName = targetName;
|
|
969
|
-
this.
|
|
970
|
-
this.
|
|
971
|
-
this.delayMinMs = config.delayMinMs ?? 0;
|
|
972
|
-
this.delayMaxMs = config.delayMaxMs ?? 0;
|
|
1084
|
+
this.config = config;
|
|
1085
|
+
this.runCodex = runner;
|
|
973
1086
|
}
|
|
974
1087
|
async invoke(request) {
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1088
|
+
if (request.signal?.aborted) {
|
|
1089
|
+
throw new Error("Codex provider request was aborted before execution");
|
|
978
1090
|
}
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
1091
|
+
await this.ensureEnvironmentReady();
|
|
1092
|
+
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
1093
|
+
const originalGuidelines = new Set(
|
|
1094
|
+
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
|
|
1095
|
+
);
|
|
1096
|
+
const workspaceRoot = await this.createWorkspace();
|
|
1097
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
1098
|
+
try {
|
|
1099
|
+
const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
|
|
1100
|
+
inputFiles,
|
|
1101
|
+
workspaceRoot,
|
|
1102
|
+
originalGuidelines
|
|
1103
|
+
);
|
|
1104
|
+
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
1105
|
+
guidelinePatterns: request.guideline_patterns,
|
|
1106
|
+
guidelineOverrides: guidelineMirrors
|
|
1107
|
+
});
|
|
1108
|
+
const promptFile = path4.join(workspaceRoot, PROMPT_FILENAME);
|
|
1109
|
+
await writeFile(promptFile, promptContent, "utf8");
|
|
1110
|
+
const args = this.buildCodexArgs();
|
|
1111
|
+
const cwd = this.resolveCwd(workspaceRoot);
|
|
1112
|
+
const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
|
|
1113
|
+
if (result.timedOut) {
|
|
1114
|
+
throw new Error(
|
|
1115
|
+
`Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
|
|
1116
|
+
);
|
|
984
1117
|
}
|
|
985
|
-
|
|
1118
|
+
if (result.exitCode !== 0) {
|
|
1119
|
+
const detail = pickDetail(result.stderr, result.stdout);
|
|
1120
|
+
const prefix = `Codex CLI exited with code ${result.exitCode}`;
|
|
1121
|
+
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
1122
|
+
}
|
|
1123
|
+
const parsed = parseCodexJson(result.stdout);
|
|
1124
|
+
const assistantText = extractAssistantText(parsed);
|
|
1125
|
+
return {
|
|
1126
|
+
text: assistantText,
|
|
1127
|
+
raw: {
|
|
1128
|
+
response: parsed,
|
|
1129
|
+
stdout: result.stdout,
|
|
1130
|
+
stderr: result.stderr,
|
|
1131
|
+
exitCode: result.exitCode,
|
|
1132
|
+
args,
|
|
1133
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1134
|
+
promptFile,
|
|
1135
|
+
workspace: workspaceRoot,
|
|
1136
|
+
inputFiles: mirroredInputFiles,
|
|
1137
|
+
logFile: logger?.filePath
|
|
1138
|
+
}
|
|
1139
|
+
};
|
|
1140
|
+
} finally {
|
|
1141
|
+
await logger?.close();
|
|
1142
|
+
await this.cleanupWorkspace(workspaceRoot);
|
|
1143
|
+
}
|
|
986
1144
|
}
|
|
987
|
-
|
|
988
|
-
if (this.
|
|
989
|
-
|
|
990
|
-
const max = Math.max(min, this.delayMaxMs);
|
|
991
|
-
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
1145
|
+
async ensureEnvironmentReady() {
|
|
1146
|
+
if (!this.environmentCheck) {
|
|
1147
|
+
this.environmentCheck = this.validateEnvironment();
|
|
992
1148
|
}
|
|
993
|
-
|
|
1149
|
+
await this.environmentCheck;
|
|
994
1150
|
}
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
// src/evaluation/providers/targets.ts
|
|
998
|
-
import { z } from "zod";
|
|
999
|
-
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
1000
|
-
var BASE_TARGET_SCHEMA = z.object({
|
|
1001
|
-
name: z.string().min(1, "target name is required"),
|
|
1002
|
-
provider: z.string().min(1, "provider is required"),
|
|
1003
|
-
settings: z.record(z.unknown()).optional(),
|
|
1004
|
-
judge_target: z.string().optional(),
|
|
1005
|
-
workers: z.number().int().min(1).optional()
|
|
1006
|
-
});
|
|
1007
|
-
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
1008
|
-
function normalizeAzureApiVersion(value) {
|
|
1009
|
-
if (!value) {
|
|
1010
|
-
return DEFAULT_AZURE_API_VERSION;
|
|
1151
|
+
async validateEnvironment() {
|
|
1152
|
+
this.resolvedExecutable = await locateExecutable(this.config.executable);
|
|
1011
1153
|
}
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1154
|
+
resolveCwd(workspaceRoot) {
|
|
1155
|
+
if (!this.config.cwd) {
|
|
1156
|
+
return workspaceRoot;
|
|
1157
|
+
}
|
|
1158
|
+
return path4.resolve(this.config.cwd);
|
|
1015
1159
|
}
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
)
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
kind: "gemini",
|
|
1050
|
-
name: parsed.name,
|
|
1051
|
-
judgeTarget: parsed.judge_target,
|
|
1052
|
-
workers: parsed.workers,
|
|
1053
|
-
providerBatching,
|
|
1054
|
-
config: resolveGeminiConfig(parsed, env)
|
|
1055
|
-
};
|
|
1056
|
-
case "codex":
|
|
1057
|
-
case "codex-cli":
|
|
1058
|
-
return {
|
|
1059
|
-
kind: "codex",
|
|
1060
|
-
name: parsed.name,
|
|
1061
|
-
judgeTarget: parsed.judge_target,
|
|
1062
|
-
workers: parsed.workers,
|
|
1063
|
-
providerBatching,
|
|
1064
|
-
config: resolveCodexConfig(parsed, env)
|
|
1065
|
-
};
|
|
1066
|
-
case "mock":
|
|
1067
|
-
return {
|
|
1068
|
-
kind: "mock",
|
|
1069
|
-
name: parsed.name,
|
|
1070
|
-
judgeTarget: parsed.judge_target,
|
|
1071
|
-
workers: parsed.workers,
|
|
1072
|
-
providerBatching,
|
|
1073
|
-
config: resolveMockConfig(parsed)
|
|
1074
|
-
};
|
|
1075
|
-
case "vscode":
|
|
1076
|
-
case "vscode-insiders":
|
|
1077
|
-
return {
|
|
1078
|
-
kind: provider,
|
|
1079
|
-
name: parsed.name,
|
|
1080
|
-
judgeTarget: parsed.judge_target,
|
|
1081
|
-
workers: parsed.workers,
|
|
1082
|
-
providerBatching,
|
|
1083
|
-
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
1084
|
-
};
|
|
1085
|
-
case "cli":
|
|
1160
|
+
buildCodexArgs() {
|
|
1161
|
+
const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
|
|
1162
|
+
if (this.config.args && this.config.args.length > 0) {
|
|
1163
|
+
args.push(...this.config.args);
|
|
1164
|
+
}
|
|
1165
|
+
args.push("-");
|
|
1166
|
+
return args;
|
|
1167
|
+
}
|
|
1168
|
+
async executeCodex(args, cwd, promptContent, signal, logger) {
|
|
1169
|
+
try {
|
|
1170
|
+
return await this.runCodex({
|
|
1171
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1172
|
+
args,
|
|
1173
|
+
cwd,
|
|
1174
|
+
prompt: promptContent,
|
|
1175
|
+
timeoutMs: this.config.timeoutMs,
|
|
1176
|
+
env: process.env,
|
|
1177
|
+
signal,
|
|
1178
|
+
onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
|
|
1179
|
+
onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
|
|
1180
|
+
});
|
|
1181
|
+
} catch (error) {
|
|
1182
|
+
const err = error;
|
|
1183
|
+
if (err.code === "ENOENT") {
|
|
1184
|
+
throw new Error(
|
|
1185
|
+
`Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
|
|
1186
|
+
);
|
|
1187
|
+
}
|
|
1188
|
+
throw error;
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
1192
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1086
1193
|
return {
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
judgeTarget: parsed.judge_target,
|
|
1090
|
-
workers: parsed.workers,
|
|
1091
|
-
providerBatching,
|
|
1092
|
-
config: resolveCliConfig(parsed, env)
|
|
1194
|
+
mirroredInputFiles: void 0,
|
|
1195
|
+
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
1093
1196
|
};
|
|
1094
|
-
|
|
1095
|
-
|
|
1197
|
+
}
|
|
1198
|
+
const filesRoot = path4.join(workspaceRoot, FILES_DIR);
|
|
1199
|
+
await mkdir(filesRoot, { recursive: true });
|
|
1200
|
+
const mirrored = [];
|
|
1201
|
+
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
1202
|
+
const nameCounts = /* @__PURE__ */ new Map();
|
|
1203
|
+
for (const inputFile of inputFiles) {
|
|
1204
|
+
const absoluteSource = path4.resolve(inputFile);
|
|
1205
|
+
const baseName = path4.basename(absoluteSource);
|
|
1206
|
+
const count = nameCounts.get(baseName) ?? 0;
|
|
1207
|
+
nameCounts.set(baseName, count + 1);
|
|
1208
|
+
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
1209
|
+
const destination = path4.join(filesRoot, finalName);
|
|
1210
|
+
await copyFile(absoluteSource, destination);
|
|
1211
|
+
const resolvedDestination = path4.resolve(destination);
|
|
1212
|
+
mirrored.push(resolvedDestination);
|
|
1213
|
+
if (guidelineOriginals.has(absoluteSource)) {
|
|
1214
|
+
guidelineMirrors.add(resolvedDestination);
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
return {
|
|
1218
|
+
mirroredInputFiles: mirrored,
|
|
1219
|
+
guidelineMirrors
|
|
1220
|
+
};
|
|
1096
1221
|
}
|
|
1222
|
+
async createWorkspace() {
|
|
1223
|
+
return await mkdtemp(path4.join(tmpdir(), WORKSPACE_PREFIX));
|
|
1224
|
+
}
|
|
1225
|
+
async cleanupWorkspace(workspaceRoot) {
|
|
1226
|
+
try {
|
|
1227
|
+
await rm(workspaceRoot, { recursive: true, force: true });
|
|
1228
|
+
} catch {
|
|
1229
|
+
}
|
|
1230
|
+
}
|
|
1231
|
+
resolveLogDirectory() {
|
|
1232
|
+
const disabled = isCodexLogStreamingDisabled();
|
|
1233
|
+
if (disabled) {
|
|
1234
|
+
return void 0;
|
|
1235
|
+
}
|
|
1236
|
+
if (this.config.logDir) {
|
|
1237
|
+
return path4.resolve(this.config.logDir);
|
|
1238
|
+
}
|
|
1239
|
+
return path4.join(process.cwd(), ".agentv", "logs", "codex");
|
|
1240
|
+
}
|
|
1241
|
+
async createStreamLogger(request) {
|
|
1242
|
+
const logDir = this.resolveLogDirectory();
|
|
1243
|
+
if (!logDir) {
|
|
1244
|
+
return void 0;
|
|
1245
|
+
}
|
|
1246
|
+
try {
|
|
1247
|
+
await mkdir(logDir, { recursive: true });
|
|
1248
|
+
} catch (error) {
|
|
1249
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1250
|
+
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
1251
|
+
return void 0;
|
|
1252
|
+
}
|
|
1253
|
+
const filePath = path4.join(logDir, buildLogFilename(request, this.targetName));
|
|
1254
|
+
try {
|
|
1255
|
+
const logger = await CodexStreamLogger.create({
|
|
1256
|
+
filePath,
|
|
1257
|
+
targetName: this.targetName,
|
|
1258
|
+
evalCaseId: request.evalCaseId,
|
|
1259
|
+
attempt: request.attempt,
|
|
1260
|
+
format: this.config.logFormat ?? "summary"
|
|
1261
|
+
});
|
|
1262
|
+
console.log(`Streaming Codex CLI output to ${filePath}`);
|
|
1263
|
+
return logger;
|
|
1264
|
+
} catch (error) {
|
|
1265
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1266
|
+
console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
|
|
1267
|
+
return void 0;
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
};
|
|
1271
|
+
var CodexStreamLogger = class _CodexStreamLogger {
|
|
1272
|
+
filePath;
|
|
1273
|
+
stream;
|
|
1274
|
+
startedAt = Date.now();
|
|
1275
|
+
stdoutBuffer = "";
|
|
1276
|
+
stderrBuffer = "";
|
|
1277
|
+
format;
|
|
1278
|
+
constructor(filePath, format) {
|
|
1279
|
+
this.filePath = filePath;
|
|
1280
|
+
this.format = format;
|
|
1281
|
+
this.stream = createWriteStream(filePath, { flags: "a" });
|
|
1282
|
+
}
|
|
1283
|
+
static async create(options) {
|
|
1284
|
+
const logger = new _CodexStreamLogger(options.filePath, options.format);
|
|
1285
|
+
const header = [
|
|
1286
|
+
"# Codex CLI stream log",
|
|
1287
|
+
`# target: ${options.targetName}`,
|
|
1288
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
1289
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
1290
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
1291
|
+
""
|
|
1292
|
+
].filter((line) => Boolean(line));
|
|
1293
|
+
logger.writeLines(header);
|
|
1294
|
+
return logger;
|
|
1295
|
+
}
|
|
1296
|
+
handleStdoutChunk(chunk) {
|
|
1297
|
+
this.stdoutBuffer += chunk;
|
|
1298
|
+
this.flushBuffer("stdout");
|
|
1299
|
+
}
|
|
1300
|
+
handleStderrChunk(chunk) {
|
|
1301
|
+
this.stderrBuffer += chunk;
|
|
1302
|
+
this.flushBuffer("stderr");
|
|
1303
|
+
}
|
|
1304
|
+
async close() {
|
|
1305
|
+
this.flushBuffer("stdout");
|
|
1306
|
+
this.flushBuffer("stderr");
|
|
1307
|
+
this.flushRemainder();
|
|
1308
|
+
await new Promise((resolve, reject) => {
|
|
1309
|
+
this.stream.once("error", reject);
|
|
1310
|
+
this.stream.end(() => resolve());
|
|
1311
|
+
});
|
|
1312
|
+
}
|
|
1313
|
+
writeLines(lines) {
|
|
1314
|
+
for (const line of lines) {
|
|
1315
|
+
this.stream.write(`${line}
|
|
1316
|
+
`);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
flushBuffer(source) {
|
|
1320
|
+
const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
|
|
1321
|
+
const lines = buffer.split(/\r?\n/);
|
|
1322
|
+
const remainder = lines.pop() ?? "";
|
|
1323
|
+
if (source === "stdout") {
|
|
1324
|
+
this.stdoutBuffer = remainder;
|
|
1325
|
+
} else {
|
|
1326
|
+
this.stderrBuffer = remainder;
|
|
1327
|
+
}
|
|
1328
|
+
for (const line of lines) {
|
|
1329
|
+
const formatted = this.formatLine(line, source);
|
|
1330
|
+
if (formatted) {
|
|
1331
|
+
this.stream.write(formatted);
|
|
1332
|
+
this.stream.write("\n");
|
|
1333
|
+
}
|
|
1334
|
+
}
|
|
1335
|
+
}
|
|
1336
|
+
formatLine(rawLine, source) {
|
|
1337
|
+
const trimmed = rawLine.trim();
|
|
1338
|
+
if (trimmed.length === 0) {
|
|
1339
|
+
return void 0;
|
|
1340
|
+
}
|
|
1341
|
+
const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
|
|
1342
|
+
return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
|
|
1343
|
+
}
|
|
1344
|
+
flushRemainder() {
|
|
1345
|
+
const stdoutRemainder = this.stdoutBuffer.trim();
|
|
1346
|
+
if (stdoutRemainder.length > 0) {
|
|
1347
|
+
const formatted = this.formatLine(stdoutRemainder, "stdout");
|
|
1348
|
+
if (formatted) {
|
|
1349
|
+
this.stream.write(formatted);
|
|
1350
|
+
this.stream.write("\n");
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
const stderrRemainder = this.stderrBuffer.trim();
|
|
1354
|
+
if (stderrRemainder.length > 0) {
|
|
1355
|
+
const formatted = this.formatLine(stderrRemainder, "stderr");
|
|
1356
|
+
if (formatted) {
|
|
1357
|
+
this.stream.write(formatted);
|
|
1358
|
+
this.stream.write("\n");
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
this.stdoutBuffer = "";
|
|
1362
|
+
this.stderrBuffer = "";
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
function isCodexLogStreamingDisabled() {
|
|
1366
|
+
const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
|
|
1367
|
+
if (!envValue) {
|
|
1368
|
+
return false;
|
|
1369
|
+
}
|
|
1370
|
+
const normalized = envValue.trim().toLowerCase();
|
|
1371
|
+
return normalized === "false" || normalized === "0" || normalized === "off";
|
|
1097
1372
|
}
|
|
1098
|
-
function
|
|
1099
|
-
const
|
|
1100
|
-
const
|
|
1101
|
-
const
|
|
1102
|
-
const
|
|
1103
|
-
|
|
1104
|
-
const temperatureSource = settings.temperature;
|
|
1105
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1106
|
-
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1107
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1108
|
-
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1109
|
-
const version = normalizeAzureApiVersion(
|
|
1110
|
-
resolveOptionalString(versionSource, env, `${target.name} api version`)
|
|
1111
|
-
);
|
|
1112
|
-
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1113
|
-
const maxOutputTokens = resolveOptionalNumber(
|
|
1114
|
-
maxTokensSource,
|
|
1115
|
-
`${target.name} max output tokens`
|
|
1116
|
-
);
|
|
1117
|
-
return {
|
|
1118
|
-
resourceName,
|
|
1119
|
-
deploymentName,
|
|
1120
|
-
apiKey,
|
|
1121
|
-
version,
|
|
1122
|
-
temperature,
|
|
1123
|
-
maxOutputTokens
|
|
1124
|
-
};
|
|
1125
|
-
}
|
|
1126
|
-
function resolveAnthropicConfig(target, env) {
|
|
1127
|
-
const settings = target.settings ?? {};
|
|
1128
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1129
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1130
|
-
const temperatureSource = settings.temperature;
|
|
1131
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1132
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
1133
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1134
|
-
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1135
|
-
return {
|
|
1136
|
-
apiKey,
|
|
1137
|
-
model,
|
|
1138
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1139
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1140
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
1141
|
-
};
|
|
1142
|
-
}
|
|
1143
|
-
function resolveGeminiConfig(target, env) {
|
|
1144
|
-
const settings = target.settings ?? {};
|
|
1145
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1146
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1147
|
-
const temperatureSource = settings.temperature;
|
|
1148
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1149
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
1150
|
-
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
1151
|
-
allowLiteral: true,
|
|
1152
|
-
optionalEnv: true
|
|
1153
|
-
}) ?? "gemini-2.5-flash";
|
|
1154
|
-
return {
|
|
1155
|
-
apiKey,
|
|
1156
|
-
model,
|
|
1157
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1158
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
1159
|
-
};
|
|
1160
|
-
}
|
|
1161
|
-
function resolveCodexConfig(target, env) {
|
|
1162
|
-
const settings = target.settings ?? {};
|
|
1163
|
-
const executableSource = settings.executable ?? settings.command ?? settings.binary;
|
|
1164
|
-
const argsSource = settings.args ?? settings.arguments;
|
|
1165
|
-
const cwdSource = settings.cwd;
|
|
1166
|
-
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
1167
|
-
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
1168
|
-
allowLiteral: true,
|
|
1169
|
-
optionalEnv: true
|
|
1170
|
-
}) ?? "codex";
|
|
1171
|
-
const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
|
|
1172
|
-
const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
|
|
1173
|
-
allowLiteral: true,
|
|
1174
|
-
optionalEnv: true
|
|
1175
|
-
});
|
|
1176
|
-
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
1177
|
-
return {
|
|
1178
|
-
executable,
|
|
1179
|
-
args,
|
|
1180
|
-
cwd,
|
|
1181
|
-
timeoutMs
|
|
1182
|
-
};
|
|
1183
|
-
}
|
|
1184
|
-
function resolveMockConfig(target) {
|
|
1185
|
-
const settings = target.settings ?? {};
|
|
1186
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
1187
|
-
return { response };
|
|
1188
|
-
}
|
|
1189
|
-
function resolveVSCodeConfig(target, env, insiders) {
|
|
1190
|
-
const settings = target.settings ?? {};
|
|
1191
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
1192
|
-
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
1193
|
-
allowLiteral: false,
|
|
1194
|
-
optionalEnv: true
|
|
1195
|
-
}) : void 0;
|
|
1196
|
-
const commandSource = settings.vscode_cmd ?? settings.command;
|
|
1197
|
-
const waitSource = settings.wait;
|
|
1198
|
-
const dryRunSource = settings.dry_run ?? settings.dryRun;
|
|
1199
|
-
const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
|
|
1200
|
-
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
1201
|
-
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
1202
|
-
return {
|
|
1203
|
-
command,
|
|
1204
|
-
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
1205
|
-
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
1206
|
-
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
1207
|
-
allowLiteral: true,
|
|
1208
|
-
optionalEnv: true
|
|
1209
|
-
}),
|
|
1210
|
-
workspaceTemplate
|
|
1211
|
-
};
|
|
1212
|
-
}
|
|
1213
|
-
function resolveCliConfig(target, env) {
|
|
1214
|
-
const settings = target.settings ?? {};
|
|
1215
|
-
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
1216
|
-
const filesFormat = resolveOptionalLiteralString(
|
|
1217
|
-
settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
|
|
1218
|
-
);
|
|
1219
|
-
const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
|
|
1220
|
-
allowLiteral: true,
|
|
1221
|
-
optionalEnv: true
|
|
1222
|
-
});
|
|
1223
|
-
const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
|
|
1224
|
-
const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
|
|
1225
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
1226
|
-
const commandTemplate = resolveString(
|
|
1227
|
-
commandTemplateSource,
|
|
1228
|
-
env,
|
|
1229
|
-
`${target.name} CLI command template`,
|
|
1230
|
-
true
|
|
1231
|
-
);
|
|
1232
|
-
assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
|
|
1233
|
-
return {
|
|
1234
|
-
commandTemplate,
|
|
1235
|
-
filesFormat,
|
|
1236
|
-
cwd,
|
|
1237
|
-
env: envOverrides,
|
|
1238
|
-
timeoutMs,
|
|
1239
|
-
healthcheck
|
|
1240
|
-
};
|
|
1373
|
+
function buildLogFilename(request, targetName) {
|
|
1374
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1375
|
+
const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
|
|
1376
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
1377
|
+
const target = sanitizeForFilename(targetName);
|
|
1378
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
|
|
1241
1379
|
}
|
|
1242
|
-
function
|
|
1243
|
-
|
|
1380
|
+
function sanitizeForFilename(value) {
|
|
1381
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
1382
|
+
return sanitized.length > 0 ? sanitized : "codex";
|
|
1383
|
+
}
|
|
1384
|
+
function formatElapsed(startedAt) {
|
|
1385
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
1386
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
1387
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
1388
|
+
const seconds = elapsedSeconds % 60;
|
|
1389
|
+
if (hours > 0) {
|
|
1390
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1391
|
+
}
|
|
1392
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1393
|
+
}
|
|
1394
|
+
function formatCodexLogMessage(rawLine, source) {
|
|
1395
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1396
|
+
if (parsed) {
|
|
1397
|
+
const summary = summarizeCodexEvent(parsed);
|
|
1398
|
+
if (summary) {
|
|
1399
|
+
return summary;
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
1402
|
+
if (source === "stderr") {
|
|
1403
|
+
return `stderr: ${rawLine}`;
|
|
1404
|
+
}
|
|
1405
|
+
return rawLine;
|
|
1406
|
+
}
|
|
1407
|
+
function formatCodexJsonLog(rawLine) {
|
|
1408
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1409
|
+
if (!parsed) {
|
|
1410
|
+
return rawLine;
|
|
1411
|
+
}
|
|
1412
|
+
try {
|
|
1413
|
+
return JSON.stringify(parsed, null, 2);
|
|
1414
|
+
} catch {
|
|
1415
|
+
return rawLine;
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
function summarizeCodexEvent(event) {
|
|
1419
|
+
if (!event || typeof event !== "object") {
|
|
1244
1420
|
return void 0;
|
|
1245
1421
|
}
|
|
1246
|
-
|
|
1247
|
-
|
|
1422
|
+
const record = event;
|
|
1423
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
1424
|
+
let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
|
|
1425
|
+
if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
|
|
1426
|
+
const item = record.item;
|
|
1427
|
+
if (item && typeof item === "object") {
|
|
1428
|
+
const candidate = flattenContent(
|
|
1429
|
+
item.text ?? item.content ?? item.output
|
|
1430
|
+
);
|
|
1431
|
+
if (candidate) {
|
|
1432
|
+
message = candidate;
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1248
1435
|
}
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1436
|
+
if (!message) {
|
|
1437
|
+
const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
|
|
1438
|
+
if (type && itemType) {
|
|
1439
|
+
return `${type}:${itemType}`;
|
|
1440
|
+
}
|
|
1441
|
+
if (type) {
|
|
1442
|
+
return type;
|
|
1254
1443
|
}
|
|
1255
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
1256
|
-
resolved[key] = resolvedValue;
|
|
1257
1444
|
}
|
|
1258
|
-
|
|
1445
|
+
if (type && message) {
|
|
1446
|
+
return `${type}: ${message}`;
|
|
1447
|
+
}
|
|
1448
|
+
if (message) {
|
|
1449
|
+
return message;
|
|
1450
|
+
}
|
|
1451
|
+
return type;
|
|
1259
1452
|
}
|
|
1260
|
-
function
|
|
1261
|
-
|
|
1262
|
-
|
|
1453
|
+
function tryParseJsonValue(rawLine) {
|
|
1454
|
+
try {
|
|
1455
|
+
return JSON.parse(rawLine);
|
|
1456
|
+
} catch {
|
|
1263
1457
|
return void 0;
|
|
1264
1458
|
}
|
|
1265
|
-
|
|
1266
|
-
|
|
1459
|
+
}
|
|
1460
|
+
async function locateExecutable(candidate) {
|
|
1461
|
+
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
1462
|
+
if (includesPathSeparator) {
|
|
1463
|
+
const resolved = path4.isAbsolute(candidate) ? candidate : path4.resolve(candidate);
|
|
1464
|
+
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
1465
|
+
await access2(executablePath, constants2.F_OK);
|
|
1466
|
+
return executablePath;
|
|
1267
1467
|
}
|
|
1268
|
-
|
|
1468
|
+
const locator = process.platform === "win32" ? "where" : "which";
|
|
1469
|
+
try {
|
|
1470
|
+
const { stdout } = await execAsync2(`${locator} ${candidate}`);
|
|
1471
|
+
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1472
|
+
const preferred = selectExecutableCandidate(lines);
|
|
1473
|
+
if (preferred) {
|
|
1474
|
+
const executablePath = await ensureWindowsExecutableVariant(preferred);
|
|
1475
|
+
await access2(executablePath, constants2.F_OK);
|
|
1476
|
+
return executablePath;
|
|
1477
|
+
}
|
|
1478
|
+
} catch {
|
|
1479
|
+
}
|
|
1480
|
+
throw new Error(`Codex executable '${candidate}' was not found on PATH`);
|
|
1269
1481
|
}
|
|
1270
|
-
function
|
|
1271
|
-
if (
|
|
1482
|
+
function selectExecutableCandidate(candidates) {
|
|
1483
|
+
if (candidates.length === 0) {
|
|
1272
1484
|
return void 0;
|
|
1273
1485
|
}
|
|
1274
|
-
if (
|
|
1275
|
-
|
|
1486
|
+
if (process.platform !== "win32") {
|
|
1487
|
+
return candidates[0];
|
|
1276
1488
|
}
|
|
1277
|
-
const
|
|
1278
|
-
const
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
if (type === "http") {
|
|
1284
|
-
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
1285
|
-
return {
|
|
1286
|
-
type: "http",
|
|
1287
|
-
url,
|
|
1288
|
-
timeoutMs
|
|
1289
|
-
};
|
|
1489
|
+
const extensions = getWindowsExecutableExtensions();
|
|
1490
|
+
for (const ext of extensions) {
|
|
1491
|
+
const match = candidates.find((candidate) => candidate.toLowerCase().endsWith(ext));
|
|
1492
|
+
if (match) {
|
|
1493
|
+
return match;
|
|
1494
|
+
}
|
|
1290
1495
|
}
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
true
|
|
1297
|
-
);
|
|
1298
|
-
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
1299
|
-
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
1300
|
-
allowLiteral: true,
|
|
1301
|
-
optionalEnv: true
|
|
1302
|
-
});
|
|
1303
|
-
return {
|
|
1304
|
-
type: "command",
|
|
1305
|
-
commandTemplate,
|
|
1306
|
-
timeoutMs,
|
|
1307
|
-
cwd
|
|
1308
|
-
};
|
|
1496
|
+
return candidates[0];
|
|
1497
|
+
}
|
|
1498
|
+
async function ensureWindowsExecutableVariant(candidate) {
|
|
1499
|
+
if (process.platform !== "win32") {
|
|
1500
|
+
return candidate;
|
|
1309
1501
|
}
|
|
1310
|
-
|
|
1502
|
+
if (hasExecutableExtension(candidate)) {
|
|
1503
|
+
return candidate;
|
|
1504
|
+
}
|
|
1505
|
+
const extensions = getWindowsExecutableExtensions();
|
|
1506
|
+
for (const ext of extensions) {
|
|
1507
|
+
const withExtension = `${candidate}${ext}`;
|
|
1508
|
+
try {
|
|
1509
|
+
await access2(withExtension, constants2.F_OK);
|
|
1510
|
+
return withExtension;
|
|
1511
|
+
} catch {
|
|
1512
|
+
}
|
|
1513
|
+
}
|
|
1514
|
+
return candidate;
|
|
1311
1515
|
}
|
|
1312
|
-
function
|
|
1313
|
-
const
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1516
|
+
function hasExecutableExtension(candidate) {
|
|
1517
|
+
const lower = candidate.toLowerCase();
|
|
1518
|
+
return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
|
|
1519
|
+
}
|
|
1520
|
+
var DEFAULT_WINDOWS_EXTENSIONS = [".com", ".exe", ".bat", ".cmd", ".ps1"];
|
|
1521
|
+
function getWindowsExecutableExtensions() {
|
|
1522
|
+
if (process.platform !== "win32") {
|
|
1523
|
+
return [];
|
|
1524
|
+
}
|
|
1525
|
+
const fromEnv = process.env.PATHEXT?.split(";").map((ext) => ext.trim().toLowerCase()).filter((ext) => ext.length > 0);
|
|
1526
|
+
return fromEnv && fromEnv.length > 0 ? fromEnv : DEFAULT_WINDOWS_EXTENSIONS;
|
|
1527
|
+
}
|
|
1528
|
+
function parseCodexJson(output) {
|
|
1529
|
+
const trimmed = output.trim();
|
|
1530
|
+
if (trimmed.length === 0) {
|
|
1531
|
+
throw new Error("Codex CLI produced no output in --json mode");
|
|
1532
|
+
}
|
|
1533
|
+
try {
|
|
1534
|
+
return JSON.parse(trimmed);
|
|
1535
|
+
} catch {
|
|
1536
|
+
const lineObjects = parseJsonLines(trimmed);
|
|
1537
|
+
if (lineObjects) {
|
|
1538
|
+
return lineObjects;
|
|
1539
|
+
}
|
|
1540
|
+
const lastBrace = trimmed.lastIndexOf("{");
|
|
1541
|
+
if (lastBrace >= 0) {
|
|
1542
|
+
const candidate = trimmed.slice(lastBrace);
|
|
1543
|
+
try {
|
|
1544
|
+
return JSON.parse(candidate);
|
|
1545
|
+
} catch {
|
|
1546
|
+
}
|
|
1319
1547
|
}
|
|
1548
|
+
const preview = trimmed.slice(0, 200);
|
|
1549
|
+
throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
|
|
1320
1550
|
}
|
|
1321
1551
|
}
|
|
1322
|
-
function
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1552
|
+
function extractAssistantText(parsed) {
|
|
1553
|
+
if (Array.isArray(parsed)) {
|
|
1554
|
+
const text = extractFromEventStream(parsed);
|
|
1555
|
+
if (text) {
|
|
1556
|
+
return text;
|
|
1557
|
+
}
|
|
1558
|
+
}
|
|
1559
|
+
if (!parsed || typeof parsed !== "object") {
|
|
1560
|
+
throw new Error("Codex CLI JSON response did not include an assistant message");
|
|
1561
|
+
}
|
|
1562
|
+
const record = parsed;
|
|
1563
|
+
const eventText = extractFromEvent(record);
|
|
1564
|
+
if (eventText) {
|
|
1565
|
+
return eventText;
|
|
1566
|
+
}
|
|
1567
|
+
const messages = Array.isArray(record.messages) ? record.messages : void 0;
|
|
1568
|
+
if (messages) {
|
|
1569
|
+
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
1570
|
+
const entry = messages[index];
|
|
1571
|
+
if (!entry || typeof entry !== "object") {
|
|
1572
|
+
continue;
|
|
1573
|
+
}
|
|
1574
|
+
const role = entry.role;
|
|
1575
|
+
if (role !== "assistant") {
|
|
1576
|
+
continue;
|
|
1577
|
+
}
|
|
1578
|
+
const content = entry.content;
|
|
1579
|
+
const flattened = flattenContent(content);
|
|
1580
|
+
if (flattened) {
|
|
1581
|
+
return flattened;
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
const response = record.response;
|
|
1586
|
+
if (response && typeof response === "object") {
|
|
1587
|
+
const content = response.content;
|
|
1588
|
+
const flattened = flattenContent(content);
|
|
1589
|
+
if (flattened) {
|
|
1590
|
+
return flattened;
|
|
1328
1591
|
}
|
|
1329
1592
|
}
|
|
1330
|
-
|
|
1593
|
+
const output = record.output;
|
|
1594
|
+
const flattenedOutput = flattenContent(output);
|
|
1595
|
+
if (flattenedOutput) {
|
|
1596
|
+
return flattenedOutput;
|
|
1597
|
+
}
|
|
1598
|
+
throw new Error("Codex CLI JSON response did not include an assistant message");
|
|
1331
1599
|
}
|
|
1332
|
-
function
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1600
|
+
function extractFromEventStream(events) {
|
|
1601
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
1602
|
+
const candidate = events[index];
|
|
1603
|
+
const text = extractFromEvent(candidate);
|
|
1604
|
+
if (text) {
|
|
1605
|
+
return text;
|
|
1606
|
+
}
|
|
1339
1607
|
}
|
|
1340
|
-
return
|
|
1608
|
+
return void 0;
|
|
1341
1609
|
}
|
|
1342
|
-
function
|
|
1343
|
-
if (
|
|
1344
|
-
return void 0;
|
|
1345
|
-
}
|
|
1346
|
-
if (typeof source !== "string") {
|
|
1347
|
-
throw new Error(`${description} must be a string`);
|
|
1348
|
-
}
|
|
1349
|
-
const trimmed = source.trim();
|
|
1350
|
-
if (trimmed.length === 0) {
|
|
1610
|
+
function extractFromEvent(event) {
|
|
1611
|
+
if (!event || typeof event !== "object") {
|
|
1351
1612
|
return void 0;
|
|
1352
1613
|
}
|
|
1353
|
-
const
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1614
|
+
const record = event;
|
|
1615
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
1616
|
+
if (type === JSONL_TYPE_ITEM_COMPLETED) {
|
|
1617
|
+
const item = record.item;
|
|
1618
|
+
const text = extractFromItem(item);
|
|
1619
|
+
if (text) {
|
|
1620
|
+
return text;
|
|
1357
1621
|
}
|
|
1358
|
-
return envValue;
|
|
1359
1622
|
}
|
|
1360
|
-
const
|
|
1361
|
-
const
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
if (optionalEnv) {
|
|
1365
|
-
return void 0;
|
|
1366
|
-
}
|
|
1367
|
-
if (!allowLiteral) {
|
|
1368
|
-
throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
|
|
1369
|
-
}
|
|
1623
|
+
const output = record.output ?? record.content;
|
|
1624
|
+
const flattened = flattenContent(output);
|
|
1625
|
+
if (flattened) {
|
|
1626
|
+
return flattened;
|
|
1370
1627
|
}
|
|
1371
|
-
return
|
|
1628
|
+
return void 0;
|
|
1372
1629
|
}
|
|
1373
|
-
function
|
|
1374
|
-
if (
|
|
1630
|
+
function extractFromItem(item) {
|
|
1631
|
+
if (!item || typeof item !== "object") {
|
|
1375
1632
|
return void 0;
|
|
1376
1633
|
}
|
|
1377
|
-
|
|
1378
|
-
|
|
1634
|
+
const record = item;
|
|
1635
|
+
const itemType = typeof record.type === "string" ? record.type : void 0;
|
|
1636
|
+
if (itemType === "agent_message" || itemType === "response" || itemType === "output") {
|
|
1637
|
+
const text = flattenContent(record.text ?? record.content ?? record.output);
|
|
1638
|
+
if (text) {
|
|
1639
|
+
return text;
|
|
1640
|
+
}
|
|
1379
1641
|
}
|
|
1380
|
-
|
|
1381
|
-
return trimmed.length > 0 ? trimmed : void 0;
|
|
1642
|
+
return void 0;
|
|
1382
1643
|
}
|
|
1383
|
-
function
|
|
1384
|
-
if (
|
|
1385
|
-
return
|
|
1644
|
+
function flattenContent(value) {
|
|
1645
|
+
if (typeof value === "string") {
|
|
1646
|
+
return value;
|
|
1386
1647
|
}
|
|
1387
|
-
if (
|
|
1388
|
-
|
|
1648
|
+
if (Array.isArray(value)) {
|
|
1649
|
+
const parts = value.map((segment) => {
|
|
1650
|
+
if (typeof segment === "string") {
|
|
1651
|
+
return segment;
|
|
1652
|
+
}
|
|
1653
|
+
if (segment && typeof segment === "object" && "text" in segment) {
|
|
1654
|
+
const text = segment.text;
|
|
1655
|
+
return typeof text === "string" ? text : void 0;
|
|
1656
|
+
}
|
|
1657
|
+
return void 0;
|
|
1658
|
+
}).filter((part) => typeof part === "string" && part.length > 0);
|
|
1659
|
+
return parts.length > 0 ? parts.join(" \n") : void 0;
|
|
1389
1660
|
}
|
|
1390
|
-
if (typeof
|
|
1391
|
-
const
|
|
1392
|
-
|
|
1393
|
-
return numeric;
|
|
1394
|
-
}
|
|
1661
|
+
if (value && typeof value === "object" && "text" in value) {
|
|
1662
|
+
const text = value.text;
|
|
1663
|
+
return typeof text === "string" ? text : void 0;
|
|
1395
1664
|
}
|
|
1396
|
-
|
|
1665
|
+
return void 0;
|
|
1397
1666
|
}
|
|
1398
|
-
function
|
|
1399
|
-
|
|
1667
|
+
function parseJsonLines(output) {
|
|
1668
|
+
const lines = output.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1669
|
+
if (lines.length <= 1) {
|
|
1400
1670
|
return void 0;
|
|
1401
1671
|
}
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
return true;
|
|
1409
|
-
}
|
|
1410
|
-
if (lowered === "false" || lowered === "0") {
|
|
1411
|
-
return false;
|
|
1672
|
+
const parsed = [];
|
|
1673
|
+
for (const line of lines) {
|
|
1674
|
+
try {
|
|
1675
|
+
parsed.push(JSON.parse(line));
|
|
1676
|
+
} catch {
|
|
1677
|
+
return void 0;
|
|
1412
1678
|
}
|
|
1413
1679
|
}
|
|
1414
|
-
|
|
1415
|
-
}
|
|
1416
|
-
function isLikelyEnvReference(value) {
|
|
1417
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
1680
|
+
return parsed;
|
|
1418
1681
|
}
|
|
1419
|
-
function
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
if (!Array.isArray(source)) {
|
|
1424
|
-
throw new Error(`${description} must be an array of strings`);
|
|
1682
|
+
function pickDetail(stderr, stdout) {
|
|
1683
|
+
const errorText = stderr.trim();
|
|
1684
|
+
if (errorText.length > 0) {
|
|
1685
|
+
return errorText;
|
|
1425
1686
|
}
|
|
1426
|
-
|
|
1427
|
-
|
|
1687
|
+
const stdoutText = stdout.trim();
|
|
1688
|
+
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
1689
|
+
}
|
|
1690
|
+
function formatTimeoutSuffix2(timeoutMs) {
|
|
1691
|
+
if (!timeoutMs || timeoutMs <= 0) {
|
|
1692
|
+
return "";
|
|
1428
1693
|
}
|
|
1429
|
-
const
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1694
|
+
const seconds = Math.ceil(timeoutMs / 1e3);
|
|
1695
|
+
return ` after ${seconds}s`;
|
|
1696
|
+
}
|
|
1697
|
+
async function defaultCodexRunner(options) {
|
|
1698
|
+
return await new Promise((resolve, reject) => {
|
|
1699
|
+
const child = spawn(options.executable, options.args, {
|
|
1700
|
+
cwd: options.cwd,
|
|
1701
|
+
env: options.env,
|
|
1702
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1703
|
+
shell: shouldShellExecute(options.executable)
|
|
1704
|
+
});
|
|
1705
|
+
let stdout = "";
|
|
1706
|
+
let stderr = "";
|
|
1707
|
+
let timedOut = false;
|
|
1708
|
+
const onAbort = () => {
|
|
1709
|
+
child.kill("SIGTERM");
|
|
1710
|
+
};
|
|
1711
|
+
if (options.signal) {
|
|
1712
|
+
if (options.signal.aborted) {
|
|
1713
|
+
onAbort();
|
|
1714
|
+
} else {
|
|
1715
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
1716
|
+
}
|
|
1434
1717
|
}
|
|
1435
|
-
|
|
1436
|
-
if (
|
|
1437
|
-
|
|
1718
|
+
let timeoutHandle;
|
|
1719
|
+
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
1720
|
+
timeoutHandle = setTimeout(() => {
|
|
1721
|
+
timedOut = true;
|
|
1722
|
+
child.kill("SIGTERM");
|
|
1723
|
+
}, options.timeoutMs);
|
|
1724
|
+
timeoutHandle.unref?.();
|
|
1438
1725
|
}
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1726
|
+
child.stdout.setEncoding("utf8");
|
|
1727
|
+
child.stdout.on("data", (chunk) => {
|
|
1728
|
+
stdout += chunk;
|
|
1729
|
+
options.onStdoutChunk?.(chunk);
|
|
1730
|
+
});
|
|
1731
|
+
child.stderr.setEncoding("utf8");
|
|
1732
|
+
child.stderr.on("data", (chunk) => {
|
|
1733
|
+
stderr += chunk;
|
|
1734
|
+
options.onStderrChunk?.(chunk);
|
|
1735
|
+
});
|
|
1736
|
+
child.stdin.end(options.prompt);
|
|
1737
|
+
const cleanup = () => {
|
|
1738
|
+
if (timeoutHandle) {
|
|
1739
|
+
clearTimeout(timeoutHandle);
|
|
1443
1740
|
}
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
}
|
|
1741
|
+
if (options.signal) {
|
|
1742
|
+
options.signal.removeEventListener("abort", onAbort);
|
|
1743
|
+
}
|
|
1744
|
+
};
|
|
1745
|
+
child.on("error", (error) => {
|
|
1746
|
+
cleanup();
|
|
1747
|
+
reject(error);
|
|
1748
|
+
});
|
|
1749
|
+
child.on("close", (code) => {
|
|
1750
|
+
cleanup();
|
|
1751
|
+
resolve({
|
|
1752
|
+
stdout,
|
|
1753
|
+
stderr,
|
|
1754
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
1755
|
+
timedOut
|
|
1756
|
+
});
|
|
1757
|
+
});
|
|
1758
|
+
});
|
|
1759
|
+
}
|
|
1760
|
+
function shouldShellExecute(executable) {
|
|
1761
|
+
if (process.platform !== "win32") {
|
|
1762
|
+
return false;
|
|
1448
1763
|
}
|
|
1449
|
-
|
|
1764
|
+
const lower = executable.toLowerCase();
|
|
1765
|
+
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
1450
1766
|
}
|
|
1451
1767
|
|
|
1452
|
-
// src/evaluation/providers/
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
|
|
1456
|
-
var VSCodeProvider = class {
|
|
1768
|
+
// src/evaluation/providers/mock.ts
|
|
1769
|
+
var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
|
|
1770
|
+
var MockProvider = class {
|
|
1457
1771
|
id;
|
|
1458
|
-
kind;
|
|
1772
|
+
kind = "mock";
|
|
1459
1773
|
targetName;
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1774
|
+
cannedResponse;
|
|
1775
|
+
delayMs;
|
|
1776
|
+
delayMinMs;
|
|
1777
|
+
delayMaxMs;
|
|
1778
|
+
constructor(targetName, config) {
|
|
1779
|
+
this.id = `mock:${targetName}`;
|
|
1465
1780
|
this.targetName = targetName;
|
|
1466
|
-
this.
|
|
1781
|
+
this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
|
|
1782
|
+
this.delayMs = config.delayMs ?? 0;
|
|
1783
|
+
this.delayMinMs = config.delayMinMs ?? 0;
|
|
1784
|
+
this.delayMaxMs = config.delayMaxMs ?? 0;
|
|
1467
1785
|
}
|
|
1468
1786
|
async invoke(request) {
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
1473
|
-
const promptContent = buildPromptDocument(request, inputFiles, request.guideline_patterns);
|
|
1474
|
-
const session = await dispatchAgentSession({
|
|
1475
|
-
userQuery: promptContent,
|
|
1476
|
-
extraAttachments: inputFiles,
|
|
1477
|
-
wait: this.config.waitForResponse,
|
|
1478
|
-
dryRun: this.config.dryRun,
|
|
1479
|
-
vscodeCmd: this.config.command,
|
|
1480
|
-
subagentRoot: this.config.subagentRoot,
|
|
1481
|
-
workspaceTemplate: this.config.workspaceTemplate,
|
|
1482
|
-
silent: true
|
|
1483
|
-
});
|
|
1484
|
-
if (session.exitCode !== 0 || !session.responseFile) {
|
|
1485
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
1486
|
-
throw new Error(failure);
|
|
1487
|
-
}
|
|
1488
|
-
if (this.config.dryRun) {
|
|
1489
|
-
return {
|
|
1490
|
-
text: "",
|
|
1491
|
-
raw: {
|
|
1492
|
-
session,
|
|
1493
|
-
inputFiles
|
|
1494
|
-
}
|
|
1495
|
-
};
|
|
1787
|
+
const delay = this.calculateDelay();
|
|
1788
|
+
if (delay > 0) {
|
|
1789
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1496
1790
|
}
|
|
1497
|
-
const responseText = await readFile2(session.responseFile, "utf8");
|
|
1498
1791
|
return {
|
|
1499
|
-
text:
|
|
1792
|
+
text: this.cannedResponse,
|
|
1500
1793
|
raw: {
|
|
1501
|
-
|
|
1502
|
-
|
|
1794
|
+
prompt: request.prompt,
|
|
1795
|
+
guidelines: request.guidelines
|
|
1503
1796
|
}
|
|
1504
1797
|
};
|
|
1505
1798
|
}
|
|
1506
|
-
|
|
1507
|
-
if (
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
request: req,
|
|
1512
|
-
inputFiles: normalizeAttachments(req.inputFiles)
|
|
1513
|
-
}));
|
|
1514
|
-
const combinedInputFiles = mergeAttachments(
|
|
1515
|
-
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
1516
|
-
);
|
|
1517
|
-
const userQueries = normalizedRequests.map(
|
|
1518
|
-
({ request, inputFiles }) => buildPromptDocument(request, inputFiles, request.guideline_patterns)
|
|
1519
|
-
);
|
|
1520
|
-
const session = await dispatchBatchAgent({
|
|
1521
|
-
userQueries,
|
|
1522
|
-
extraAttachments: combinedInputFiles,
|
|
1523
|
-
wait: this.config.waitForResponse,
|
|
1524
|
-
dryRun: this.config.dryRun,
|
|
1525
|
-
vscodeCmd: this.config.command,
|
|
1526
|
-
subagentRoot: this.config.subagentRoot,
|
|
1527
|
-
workspaceTemplate: this.config.workspaceTemplate,
|
|
1528
|
-
silent: true
|
|
1529
|
-
});
|
|
1530
|
-
if (session.exitCode !== 0 || !session.responseFiles) {
|
|
1531
|
-
const failure = session.error ?? "VS Code subagent did not produce batch responses";
|
|
1532
|
-
throw new Error(failure);
|
|
1533
|
-
}
|
|
1534
|
-
if (this.config.dryRun) {
|
|
1535
|
-
return normalizedRequests.map(({ inputFiles }) => ({
|
|
1536
|
-
text: "",
|
|
1537
|
-
raw: {
|
|
1538
|
-
session,
|
|
1539
|
-
inputFiles,
|
|
1540
|
-
allInputFiles: combinedInputFiles
|
|
1541
|
-
}
|
|
1542
|
-
}));
|
|
1543
|
-
}
|
|
1544
|
-
if (session.responseFiles.length !== requests.length) {
|
|
1545
|
-
throw new Error(
|
|
1546
|
-
`VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
|
|
1547
|
-
);
|
|
1548
|
-
}
|
|
1549
|
-
const responses = [];
|
|
1550
|
-
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
1551
|
-
const responseText = await readFile2(responseFile, "utf8");
|
|
1552
|
-
responses.push({
|
|
1553
|
-
text: responseText,
|
|
1554
|
-
raw: {
|
|
1555
|
-
session,
|
|
1556
|
-
inputFiles: normalizedRequests[index]?.inputFiles,
|
|
1557
|
-
allInputFiles: combinedInputFiles,
|
|
1558
|
-
responseFile
|
|
1559
|
-
}
|
|
1560
|
-
});
|
|
1799
|
+
calculateDelay() {
|
|
1800
|
+
if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
|
|
1801
|
+
const min = Math.max(0, this.delayMinMs);
|
|
1802
|
+
const max = Math.max(min, this.delayMaxMs);
|
|
1803
|
+
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
1561
1804
|
}
|
|
1562
|
-
return
|
|
1805
|
+
return this.delayMs;
|
|
1563
1806
|
}
|
|
1564
1807
|
};
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
)
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
|
|
1581
|
-
return "";
|
|
1582
|
-
}
|
|
1583
|
-
const buildList = (files) => files.map((absolutePath) => {
|
|
1584
|
-
const fileName = path3.basename(absolutePath);
|
|
1585
|
-
const fileUri = pathToFileUri(absolutePath);
|
|
1586
|
-
return `* [${fileName}](${fileUri})`;
|
|
1587
|
-
});
|
|
1588
|
-
const sections = [];
|
|
1589
|
-
if (guidelineFiles.length > 0) {
|
|
1590
|
-
sections.push(`Read all guideline files:
|
|
1591
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
1808
|
+
|
|
1809
|
+
// src/evaluation/providers/targets.ts
|
|
1810
|
+
import { z } from "zod";
|
|
1811
|
+
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
1812
|
+
var BASE_TARGET_SCHEMA = z.object({
|
|
1813
|
+
name: z.string().min(1, "target name is required"),
|
|
1814
|
+
provider: z.string().min(1, "provider is required"),
|
|
1815
|
+
settings: z.record(z.unknown()).optional(),
|
|
1816
|
+
judge_target: z.string().optional(),
|
|
1817
|
+
workers: z.number().int().min(1).optional()
|
|
1818
|
+
});
|
|
1819
|
+
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
1820
|
+
function normalizeAzureApiVersion(value) {
|
|
1821
|
+
if (!value) {
|
|
1822
|
+
return DEFAULT_AZURE_API_VERSION;
|
|
1592
1823
|
}
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1824
|
+
const trimmed = value.trim();
|
|
1825
|
+
if (trimmed.length === 0) {
|
|
1826
|
+
return DEFAULT_AZURE_API_VERSION;
|
|
1596
1827
|
}
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
"Then apply system_instructions on the user query below."
|
|
1600
|
-
);
|
|
1601
|
-
return sections.join("\n");
|
|
1828
|
+
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
1829
|
+
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
1602
1830
|
}
|
|
1603
|
-
function
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1831
|
+
function resolveTargetDefinition(definition, env = process.env) {
|
|
1832
|
+
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
1833
|
+
const provider = parsed.provider.toLowerCase();
|
|
1834
|
+
const providerBatching = resolveOptionalBoolean(
|
|
1835
|
+
parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
|
|
1836
|
+
);
|
|
1837
|
+
switch (provider) {
|
|
1838
|
+
case "azure":
|
|
1839
|
+
case "azure-openai":
|
|
1840
|
+
return {
|
|
1841
|
+
kind: "azure",
|
|
1842
|
+
name: parsed.name,
|
|
1843
|
+
judgeTarget: parsed.judge_target,
|
|
1844
|
+
workers: parsed.workers,
|
|
1845
|
+
providerBatching,
|
|
1846
|
+
config: resolveAzureConfig(parsed, env)
|
|
1847
|
+
};
|
|
1848
|
+
case "anthropic":
|
|
1849
|
+
return {
|
|
1850
|
+
kind: "anthropic",
|
|
1851
|
+
name: parsed.name,
|
|
1852
|
+
judgeTarget: parsed.judge_target,
|
|
1853
|
+
workers: parsed.workers,
|
|
1854
|
+
providerBatching,
|
|
1855
|
+
config: resolveAnthropicConfig(parsed, env)
|
|
1856
|
+
};
|
|
1857
|
+
case "gemini":
|
|
1858
|
+
case "google":
|
|
1859
|
+
case "google-gemini":
|
|
1860
|
+
return {
|
|
1861
|
+
kind: "gemini",
|
|
1862
|
+
name: parsed.name,
|
|
1863
|
+
judgeTarget: parsed.judge_target,
|
|
1864
|
+
workers: parsed.workers,
|
|
1865
|
+
providerBatching,
|
|
1866
|
+
config: resolveGeminiConfig(parsed, env)
|
|
1867
|
+
};
|
|
1868
|
+
case "codex":
|
|
1869
|
+
case "codex-cli":
|
|
1870
|
+
return {
|
|
1871
|
+
kind: "codex",
|
|
1872
|
+
name: parsed.name,
|
|
1873
|
+
judgeTarget: parsed.judge_target,
|
|
1874
|
+
workers: parsed.workers,
|
|
1875
|
+
providerBatching,
|
|
1876
|
+
config: resolveCodexConfig(parsed, env)
|
|
1877
|
+
};
|
|
1878
|
+
case "mock":
|
|
1879
|
+
return {
|
|
1880
|
+
kind: "mock",
|
|
1881
|
+
name: parsed.name,
|
|
1882
|
+
judgeTarget: parsed.judge_target,
|
|
1883
|
+
workers: parsed.workers,
|
|
1884
|
+
providerBatching,
|
|
1885
|
+
config: resolveMockConfig(parsed)
|
|
1886
|
+
};
|
|
1887
|
+
case "vscode":
|
|
1888
|
+
case "vscode-insiders":
|
|
1889
|
+
return {
|
|
1890
|
+
kind: provider,
|
|
1891
|
+
name: parsed.name,
|
|
1892
|
+
judgeTarget: parsed.judge_target,
|
|
1893
|
+
workers: parsed.workers,
|
|
1894
|
+
providerBatching,
|
|
1895
|
+
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
1896
|
+
};
|
|
1897
|
+
case "cli":
|
|
1898
|
+
return {
|
|
1899
|
+
kind: "cli",
|
|
1900
|
+
name: parsed.name,
|
|
1901
|
+
judgeTarget: parsed.judge_target,
|
|
1902
|
+
workers: parsed.workers,
|
|
1903
|
+
providerBatching,
|
|
1904
|
+
config: resolveCliConfig(parsed, env)
|
|
1905
|
+
};
|
|
1906
|
+
default:
|
|
1907
|
+
throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
|
|
1616
1908
|
}
|
|
1617
|
-
return Array.from(unique.values());
|
|
1618
1909
|
}
|
|
1619
|
-
function
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
const
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
}
|
|
1630
|
-
|
|
1910
|
+
function resolveAzureConfig(target, env) {
|
|
1911
|
+
const settings = target.settings ?? {};
|
|
1912
|
+
const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
|
|
1913
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1914
|
+
const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
|
|
1915
|
+
const versionSource = settings.version ?? settings.api_version;
|
|
1916
|
+
const temperatureSource = settings.temperature;
|
|
1917
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1918
|
+
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1919
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1920
|
+
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1921
|
+
const version = normalizeAzureApiVersion(
|
|
1922
|
+
resolveOptionalString(versionSource, env, `${target.name} api version`)
|
|
1923
|
+
);
|
|
1924
|
+
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1925
|
+
const maxOutputTokens = resolveOptionalNumber(
|
|
1926
|
+
maxTokensSource,
|
|
1927
|
+
`${target.name} max output tokens`
|
|
1928
|
+
);
|
|
1929
|
+
return {
|
|
1930
|
+
resourceName,
|
|
1931
|
+
deploymentName,
|
|
1932
|
+
apiKey,
|
|
1933
|
+
version,
|
|
1934
|
+
temperature,
|
|
1935
|
+
maxOutputTokens
|
|
1936
|
+
};
|
|
1937
|
+
}
|
|
1938
|
+
function resolveAnthropicConfig(target, env) {
|
|
1939
|
+
const settings = target.settings ?? {};
|
|
1940
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1941
|
+
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1942
|
+
const temperatureSource = settings.temperature;
|
|
1943
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1944
|
+
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
1945
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1946
|
+
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1947
|
+
return {
|
|
1948
|
+
apiKey,
|
|
1949
|
+
model,
|
|
1950
|
+
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1951
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1952
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
1953
|
+
};
|
|
1631
1954
|
}
|
|
1632
|
-
function
|
|
1633
|
-
const
|
|
1634
|
-
const
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1955
|
+
function resolveGeminiConfig(target, env) {
|
|
1956
|
+
const settings = target.settings ?? {};
|
|
1957
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1958
|
+
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1959
|
+
const temperatureSource = settings.temperature;
|
|
1960
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1961
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
1962
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
1963
|
+
allowLiteral: true,
|
|
1964
|
+
optionalEnv: true
|
|
1965
|
+
}) ?? "gemini-2.5-flash";
|
|
1966
|
+
return {
|
|
1967
|
+
apiKey,
|
|
1968
|
+
model,
|
|
1969
|
+
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1970
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
1971
|
+
};
|
|
1639
1972
|
}
|
|
1640
|
-
function
|
|
1641
|
-
|
|
1973
|
+
function resolveCodexConfig(target, env) {
|
|
1974
|
+
const settings = target.settings ?? {};
|
|
1975
|
+
const executableSource = settings.executable ?? settings.command ?? settings.binary;
|
|
1976
|
+
const argsSource = settings.args ?? settings.arguments;
|
|
1977
|
+
const cwdSource = settings.cwd;
|
|
1978
|
+
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
1979
|
+
const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
|
|
1980
|
+
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
1981
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
1982
|
+
allowLiteral: true,
|
|
1983
|
+
optionalEnv: true
|
|
1984
|
+
}) ?? "codex";
|
|
1985
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
|
|
1986
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
|
|
1987
|
+
allowLiteral: true,
|
|
1988
|
+
optionalEnv: true
|
|
1989
|
+
});
|
|
1990
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
1991
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
1992
|
+
allowLiteral: true,
|
|
1993
|
+
optionalEnv: true
|
|
1994
|
+
});
|
|
1995
|
+
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
1996
|
+
return {
|
|
1997
|
+
executable,
|
|
1998
|
+
args,
|
|
1999
|
+
cwd,
|
|
2000
|
+
timeoutMs,
|
|
2001
|
+
logDir,
|
|
2002
|
+
logFormat
|
|
2003
|
+
};
|
|
2004
|
+
}
|
|
2005
|
+
function normalizeCodexLogFormat(value) {
|
|
2006
|
+
if (value === void 0 || value === null) {
|
|
1642
2007
|
return void 0;
|
|
1643
2008
|
}
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
deduped.add(path3.resolve(attachment));
|
|
2009
|
+
if (typeof value !== "string") {
|
|
2010
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1647
2011
|
}
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
const deduped = /* @__PURE__ */ new Set();
|
|
1652
|
-
for (const list of all) {
|
|
1653
|
-
if (!list) continue;
|
|
1654
|
-
for (const inputFile of list) {
|
|
1655
|
-
deduped.add(path3.resolve(inputFile));
|
|
1656
|
-
}
|
|
2012
|
+
const normalized = value.trim().toLowerCase();
|
|
2013
|
+
if (normalized === "json" || normalized === "summary") {
|
|
2014
|
+
return normalized;
|
|
1657
2015
|
}
|
|
1658
|
-
|
|
2016
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1659
2017
|
}
|
|
1660
|
-
|
|
1661
|
-
const
|
|
1662
|
-
const
|
|
1663
|
-
|
|
1664
|
-
try {
|
|
1665
|
-
if (verbose) {
|
|
1666
|
-
console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
|
|
1667
|
-
}
|
|
1668
|
-
const result = await provisionSubagents({
|
|
1669
|
-
targetRoot: subagentRoot,
|
|
1670
|
-
subagents: count,
|
|
1671
|
-
dryRun: false
|
|
1672
|
-
});
|
|
1673
|
-
if (verbose) {
|
|
1674
|
-
if (result.created.length > 0) {
|
|
1675
|
-
console.log(`Created ${result.created.length} new subagent(s)`);
|
|
1676
|
-
}
|
|
1677
|
-
if (result.skippedExisting.length > 0) {
|
|
1678
|
-
console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
|
|
1679
|
-
}
|
|
1680
|
-
console.log(`
|
|
1681
|
-
total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
|
|
1682
|
-
}
|
|
1683
|
-
return {
|
|
1684
|
-
provisioned: true,
|
|
1685
|
-
message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
|
|
1686
|
-
};
|
|
1687
|
-
} catch (error) {
|
|
1688
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1689
|
-
if (verbose) {
|
|
1690
|
-
console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
|
|
1691
|
-
}
|
|
1692
|
-
return {
|
|
1693
|
-
provisioned: false,
|
|
1694
|
-
message: `Provisioning failed: ${errorMessage}`
|
|
1695
|
-
};
|
|
1696
|
-
}
|
|
2018
|
+
function resolveMockConfig(target) {
|
|
2019
|
+
const settings = target.settings ?? {};
|
|
2020
|
+
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
2021
|
+
return { response };
|
|
1697
2022
|
}
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
const
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
2023
|
+
function resolveVSCodeConfig(target, env, insiders) {
|
|
2024
|
+
const settings = target.settings ?? {};
|
|
2025
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
2026
|
+
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
2027
|
+
allowLiteral: false,
|
|
2028
|
+
optionalEnv: true
|
|
2029
|
+
}) : void 0;
|
|
2030
|
+
const commandSource = settings.vscode_cmd ?? settings.command;
|
|
2031
|
+
const waitSource = settings.wait;
|
|
2032
|
+
const dryRunSource = settings.dry_run ?? settings.dryRun;
|
|
2033
|
+
const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
|
|
2034
|
+
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
2035
|
+
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
2036
|
+
return {
|
|
2037
|
+
command,
|
|
2038
|
+
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
2039
|
+
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
2040
|
+
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
2041
|
+
allowLiteral: true,
|
|
2042
|
+
optionalEnv: true
|
|
2043
|
+
}),
|
|
2044
|
+
workspaceTemplate
|
|
2045
|
+
};
|
|
2046
|
+
}
|
|
2047
|
+
function resolveCliConfig(target, env) {
|
|
2048
|
+
const settings = target.settings ?? {};
|
|
2049
|
+
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
2050
|
+
const filesFormat = resolveOptionalLiteralString(
|
|
2051
|
+
settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
|
|
1715
2052
|
);
|
|
1716
|
-
const
|
|
1717
|
-
|
|
1718
|
-
|
|
2053
|
+
const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
|
|
2054
|
+
allowLiteral: true,
|
|
2055
|
+
optionalEnv: true
|
|
2056
|
+
});
|
|
2057
|
+
const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
|
|
2058
|
+
const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
|
|
2059
|
+
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
2060
|
+
const commandTemplate = resolveString(
|
|
2061
|
+
commandTemplateSource,
|
|
2062
|
+
env,
|
|
2063
|
+
`${target.name} CLI command template`,
|
|
2064
|
+
true
|
|
1719
2065
|
);
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
2066
|
+
assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
|
|
2067
|
+
return {
|
|
2068
|
+
commandTemplate,
|
|
2069
|
+
filesFormat,
|
|
2070
|
+
cwd,
|
|
2071
|
+
env: envOverrides,
|
|
2072
|
+
timeoutMs,
|
|
2073
|
+
healthcheck
|
|
2074
|
+
};
|
|
1726
2075
|
}
|
|
1727
|
-
function
|
|
1728
|
-
if (
|
|
2076
|
+
function resolveEnvOverrides(source, env, targetName) {
|
|
2077
|
+
if (source === void 0 || source === null) {
|
|
1729
2078
|
return void 0;
|
|
1730
2079
|
}
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
const absolutePath = path4.resolve(inputFile);
|
|
1734
|
-
if (!deduped.has(absolutePath)) {
|
|
1735
|
-
deduped.set(absolutePath, absolutePath);
|
|
1736
|
-
}
|
|
1737
|
-
}
|
|
1738
|
-
return Array.from(deduped.values());
|
|
1739
|
-
}
|
|
1740
|
-
function collectGuidelineFiles2(inputFiles, guidelinePatterns, overrides) {
|
|
1741
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
1742
|
-
return [];
|
|
1743
|
-
}
|
|
1744
|
-
const unique = /* @__PURE__ */ new Map();
|
|
1745
|
-
for (const inputFile of inputFiles) {
|
|
1746
|
-
const absolutePath = path4.resolve(inputFile);
|
|
1747
|
-
if (overrides?.has(absolutePath)) {
|
|
1748
|
-
if (!unique.has(absolutePath)) {
|
|
1749
|
-
unique.set(absolutePath, absolutePath);
|
|
1750
|
-
}
|
|
1751
|
-
continue;
|
|
1752
|
-
}
|
|
1753
|
-
const normalized = absolutePath.split(path4.sep).join("/");
|
|
1754
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
1755
|
-
if (!unique.has(absolutePath)) {
|
|
1756
|
-
unique.set(absolutePath, absolutePath);
|
|
1757
|
-
}
|
|
1758
|
-
}
|
|
1759
|
-
}
|
|
1760
|
-
return Array.from(unique.values());
|
|
1761
|
-
}
|
|
1762
|
-
function collectInputFiles(inputFiles) {
|
|
1763
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
1764
|
-
return [];
|
|
2080
|
+
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2081
|
+
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
1765
2082
|
}
|
|
1766
|
-
const
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
if (
|
|
1770
|
-
|
|
2083
|
+
const entries = Object.entries(source);
|
|
2084
|
+
const resolved = {};
|
|
2085
|
+
for (const [key, value] of entries) {
|
|
2086
|
+
if (typeof value !== "string") {
|
|
2087
|
+
throw new Error(`${targetName} env override '${key}' must be a string`);
|
|
1771
2088
|
}
|
|
2089
|
+
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
2090
|
+
resolved[key] = resolvedValue;
|
|
1772
2091
|
}
|
|
1773
|
-
return
|
|
2092
|
+
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
1774
2093
|
}
|
|
1775
|
-
function
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
const buildList = (files) => files.map((absolutePath) => {
|
|
1780
|
-
const fileName = path4.basename(absolutePath);
|
|
1781
|
-
const fileUri = pathToFileUri2(absolutePath);
|
|
1782
|
-
return `* [${fileName}](${fileUri})`;
|
|
1783
|
-
});
|
|
1784
|
-
const sections = [];
|
|
1785
|
-
if (guidelineFiles.length > 0) {
|
|
1786
|
-
sections.push(`Read all guideline files:
|
|
1787
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
2094
|
+
function resolveTimeoutMs(source, description) {
|
|
2095
|
+
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
2096
|
+
if (seconds === void 0) {
|
|
2097
|
+
return void 0;
|
|
1788
2098
|
}
|
|
1789
|
-
if (
|
|
1790
|
-
|
|
1791
|
-
${buildList(inputFiles).join("\n")}.`);
|
|
2099
|
+
if (seconds <= 0) {
|
|
2100
|
+
throw new Error(`${description} must be greater than zero seconds`);
|
|
1792
2101
|
}
|
|
1793
|
-
|
|
1794
|
-
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
1795
|
-
"Then apply system_instructions on the user query below."
|
|
1796
|
-
);
|
|
1797
|
-
return sections.join("\n");
|
|
2102
|
+
return Math.floor(seconds * 1e3);
|
|
1798
2103
|
}
|
|
1799
|
-
function
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
1803
|
-
return `file:///${normalizedPath}`;
|
|
2104
|
+
function resolveCliHealthcheck(source, env, targetName) {
|
|
2105
|
+
if (source === void 0 || source === null) {
|
|
2106
|
+
return void 0;
|
|
1804
2107
|
}
|
|
1805
|
-
|
|
1806
|
-
}
|
|
1807
|
-
|
|
1808
|
-
// src/evaluation/providers/codex.ts
|
|
1809
|
-
var execAsync2 = promisify2(execCallback);
|
|
1810
|
-
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1811
|
-
var PROMPT_FILENAME = "prompt.md";
|
|
1812
|
-
var FILES_DIR = "files";
|
|
1813
|
-
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1814
|
-
var CodexProvider = class {
|
|
1815
|
-
id;
|
|
1816
|
-
kind = "codex";
|
|
1817
|
-
targetName;
|
|
1818
|
-
supportsBatch = false;
|
|
1819
|
-
config;
|
|
1820
|
-
runCodex;
|
|
1821
|
-
environmentCheck;
|
|
1822
|
-
resolvedExecutable;
|
|
1823
|
-
constructor(targetName, config, runner = defaultCodexRunner) {
|
|
1824
|
-
this.id = `codex:${targetName}`;
|
|
1825
|
-
this.targetName = targetName;
|
|
1826
|
-
this.config = config;
|
|
1827
|
-
this.runCodex = runner;
|
|
2108
|
+
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2109
|
+
throw new Error(`${targetName} healthcheck must be an object`);
|
|
1828
2110
|
}
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1835
|
-
|
|
1836
|
-
|
|
2111
|
+
const candidate = source;
|
|
2112
|
+
const type = candidate.type;
|
|
2113
|
+
const timeoutMs = resolveTimeoutMs(
|
|
2114
|
+
candidate.timeout_seconds ?? candidate.timeoutSeconds,
|
|
2115
|
+
`${targetName} healthcheck timeout`
|
|
2116
|
+
);
|
|
2117
|
+
if (type === "http") {
|
|
2118
|
+
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
2119
|
+
return {
|
|
2120
|
+
type: "http",
|
|
2121
|
+
url,
|
|
2122
|
+
timeoutMs
|
|
2123
|
+
};
|
|
2124
|
+
}
|
|
2125
|
+
if (type === "command") {
|
|
2126
|
+
const commandTemplate = resolveString(
|
|
2127
|
+
candidate.command_template ?? candidate.commandTemplate,
|
|
2128
|
+
env,
|
|
2129
|
+
`${targetName} healthcheck command template`,
|
|
2130
|
+
true
|
|
1837
2131
|
);
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
1841
|
-
|
|
1842
|
-
|
|
1843
|
-
|
|
2132
|
+
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
2133
|
+
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
2134
|
+
allowLiteral: true,
|
|
2135
|
+
optionalEnv: true
|
|
2136
|
+
});
|
|
2137
|
+
return {
|
|
2138
|
+
type: "command",
|
|
2139
|
+
commandTemplate,
|
|
2140
|
+
timeoutMs,
|
|
2141
|
+
cwd
|
|
2142
|
+
};
|
|
2143
|
+
}
|
|
2144
|
+
throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
|
|
2145
|
+
}
|
|
2146
|
+
function assertSupportedCliPlaceholders(template, description) {
|
|
2147
|
+
const placeholders = extractCliPlaceholders(template);
|
|
2148
|
+
for (const placeholder of placeholders) {
|
|
2149
|
+
if (!CLI_PLACEHOLDERS.has(placeholder)) {
|
|
2150
|
+
throw new Error(
|
|
2151
|
+
`${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
|
|
1844
2152
|
);
|
|
1845
|
-
const promptContent = buildPromptDocument2(request, mirroredInputFiles, {
|
|
1846
|
-
guidelinePatterns: request.guideline_patterns,
|
|
1847
|
-
guidelineOverrides: guidelineMirrors
|
|
1848
|
-
});
|
|
1849
|
-
const promptFile = path5.join(workspaceRoot, PROMPT_FILENAME);
|
|
1850
|
-
await writeFile(promptFile, promptContent, "utf8");
|
|
1851
|
-
const args = this.buildCodexArgs();
|
|
1852
|
-
const cwd = this.resolveCwd(workspaceRoot);
|
|
1853
|
-
const result = await this.executeCodex(args, cwd, promptContent, request.signal);
|
|
1854
|
-
if (result.timedOut) {
|
|
1855
|
-
throw new Error(
|
|
1856
|
-
`Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
|
|
1857
|
-
);
|
|
1858
|
-
}
|
|
1859
|
-
if (result.exitCode !== 0) {
|
|
1860
|
-
const detail = pickDetail(result.stderr, result.stdout);
|
|
1861
|
-
const prefix = `Codex CLI exited with code ${result.exitCode}`;
|
|
1862
|
-
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
1863
|
-
}
|
|
1864
|
-
const parsed = parseCodexJson(result.stdout);
|
|
1865
|
-
const assistantText = extractAssistantText(parsed);
|
|
1866
|
-
return {
|
|
1867
|
-
text: assistantText,
|
|
1868
|
-
raw: {
|
|
1869
|
-
response: parsed,
|
|
1870
|
-
stdout: result.stdout,
|
|
1871
|
-
stderr: result.stderr,
|
|
1872
|
-
exitCode: result.exitCode,
|
|
1873
|
-
args,
|
|
1874
|
-
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1875
|
-
promptFile,
|
|
1876
|
-
workspace: workspaceRoot,
|
|
1877
|
-
inputFiles: mirroredInputFiles
|
|
1878
|
-
}
|
|
1879
|
-
};
|
|
1880
|
-
} finally {
|
|
1881
|
-
await this.cleanupWorkspace(workspaceRoot);
|
|
1882
2153
|
}
|
|
1883
2154
|
}
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
2155
|
+
}
|
|
2156
|
+
function extractCliPlaceholders(template) {
|
|
2157
|
+
const matches = template.matchAll(/\{([A-Z_]+)\}/g);
|
|
2158
|
+
const results = [];
|
|
2159
|
+
for (const match of matches) {
|
|
2160
|
+
if (match[1]) {
|
|
2161
|
+
results.push(match[1]);
|
|
1887
2162
|
}
|
|
1888
|
-
await this.environmentCheck;
|
|
1889
2163
|
}
|
|
1890
|
-
|
|
1891
|
-
|
|
2164
|
+
return results;
|
|
2165
|
+
}
|
|
2166
|
+
function resolveString(source, env, description, allowLiteral = false) {
|
|
2167
|
+
const value = resolveOptionalString(source, env, description, {
|
|
2168
|
+
allowLiteral,
|
|
2169
|
+
optionalEnv: false
|
|
2170
|
+
});
|
|
2171
|
+
if (value === void 0) {
|
|
2172
|
+
throw new Error(`${description} is required`);
|
|
1892
2173
|
}
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
return
|
|
2174
|
+
return value;
|
|
2175
|
+
}
|
|
2176
|
+
function resolveOptionalString(source, env, description, options) {
|
|
2177
|
+
if (source === void 0 || source === null) {
|
|
2178
|
+
return void 0;
|
|
1898
2179
|
}
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
if (this.config.args && this.config.args.length > 0) {
|
|
1902
|
-
args.push(...this.config.args);
|
|
1903
|
-
}
|
|
1904
|
-
args.push("-");
|
|
1905
|
-
return args;
|
|
2180
|
+
if (typeof source !== "string") {
|
|
2181
|
+
throw new Error(`${description} must be a string`);
|
|
1906
2182
|
}
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
1910
|
-
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1911
|
-
args,
|
|
1912
|
-
cwd,
|
|
1913
|
-
prompt: promptContent,
|
|
1914
|
-
timeoutMs: this.config.timeoutMs,
|
|
1915
|
-
env: process.env,
|
|
1916
|
-
signal
|
|
1917
|
-
});
|
|
1918
|
-
} catch (error) {
|
|
1919
|
-
const err = error;
|
|
1920
|
-
if (err.code === "ENOENT") {
|
|
1921
|
-
throw new Error(
|
|
1922
|
-
`Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
|
|
1923
|
-
);
|
|
1924
|
-
}
|
|
1925
|
-
throw error;
|
|
1926
|
-
}
|
|
2183
|
+
const trimmed = source.trim();
|
|
2184
|
+
if (trimmed.length === 0) {
|
|
2185
|
+
return void 0;
|
|
1927
2186
|
}
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
1933
|
-
};
|
|
1934
|
-
}
|
|
1935
|
-
const filesRoot = path5.join(workspaceRoot, FILES_DIR);
|
|
1936
|
-
await mkdir(filesRoot, { recursive: true });
|
|
1937
|
-
const mirrored = [];
|
|
1938
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
1939
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
1940
|
-
for (const inputFile of inputFiles) {
|
|
1941
|
-
const absoluteSource = path5.resolve(inputFile);
|
|
1942
|
-
const baseName = path5.basename(absoluteSource);
|
|
1943
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
1944
|
-
nameCounts.set(baseName, count + 1);
|
|
1945
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
1946
|
-
const destination = path5.join(filesRoot, finalName);
|
|
1947
|
-
await copyFile(absoluteSource, destination);
|
|
1948
|
-
const resolvedDestination = path5.resolve(destination);
|
|
1949
|
-
mirrored.push(resolvedDestination);
|
|
1950
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
1951
|
-
guidelineMirrors.add(resolvedDestination);
|
|
1952
|
-
}
|
|
2187
|
+
const envValue = env[trimmed];
|
|
2188
|
+
if (envValue !== void 0) {
|
|
2189
|
+
if (envValue.trim().length === 0) {
|
|
2190
|
+
throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
|
|
1953
2191
|
}
|
|
1954
|
-
return
|
|
1955
|
-
mirroredInputFiles: mirrored,
|
|
1956
|
-
guidelineMirrors
|
|
1957
|
-
};
|
|
1958
|
-
}
|
|
1959
|
-
async createWorkspace() {
|
|
1960
|
-
return await mkdtemp(path5.join(tmpdir(), WORKSPACE_PREFIX));
|
|
2192
|
+
return envValue;
|
|
1961
2193
|
}
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
2194
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
2195
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
2196
|
+
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
2197
|
+
if (looksLikeEnv) {
|
|
2198
|
+
if (optionalEnv) {
|
|
2199
|
+
return void 0;
|
|
2200
|
+
}
|
|
2201
|
+
if (!allowLiteral) {
|
|
2202
|
+
throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
|
|
1966
2203
|
}
|
|
1967
2204
|
}
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
if (
|
|
1972
|
-
|
|
1973
|
-
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
1974
|
-
await access2(executablePath, constants2.F_OK);
|
|
1975
|
-
return executablePath;
|
|
2205
|
+
return trimmed;
|
|
2206
|
+
}
|
|
2207
|
+
function resolveOptionalLiteralString(source) {
|
|
2208
|
+
if (source === void 0 || source === null) {
|
|
2209
|
+
return void 0;
|
|
1976
2210
|
}
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
const { stdout } = await execAsync2(`${locator} ${candidate}`);
|
|
1980
|
-
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1981
|
-
const preferred = selectExecutableCandidate(lines);
|
|
1982
|
-
if (preferred) {
|
|
1983
|
-
const executablePath = await ensureWindowsExecutableVariant(preferred);
|
|
1984
|
-
await access2(executablePath, constants2.F_OK);
|
|
1985
|
-
return executablePath;
|
|
1986
|
-
}
|
|
1987
|
-
} catch {
|
|
2211
|
+
if (typeof source !== "string") {
|
|
2212
|
+
throw new Error("expected string value");
|
|
1988
2213
|
}
|
|
1989
|
-
|
|
2214
|
+
const trimmed = source.trim();
|
|
2215
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
1990
2216
|
}
|
|
1991
|
-
function
|
|
1992
|
-
if (
|
|
2217
|
+
function resolveOptionalNumber(source, description) {
|
|
2218
|
+
if (source === void 0 || source === null || source === "") {
|
|
1993
2219
|
return void 0;
|
|
1994
2220
|
}
|
|
1995
|
-
if (
|
|
1996
|
-
return
|
|
2221
|
+
if (typeof source === "number") {
|
|
2222
|
+
return Number.isFinite(source) ? source : void 0;
|
|
1997
2223
|
}
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
return match;
|
|
2224
|
+
if (typeof source === "string") {
|
|
2225
|
+
const numeric = Number(source);
|
|
2226
|
+
if (Number.isFinite(numeric)) {
|
|
2227
|
+
return numeric;
|
|
2003
2228
|
}
|
|
2004
2229
|
}
|
|
2005
|
-
|
|
2230
|
+
throw new Error(`${description} must be a number`);
|
|
2006
2231
|
}
|
|
2007
|
-
|
|
2008
|
-
if (
|
|
2009
|
-
return
|
|
2232
|
+
function resolveOptionalBoolean(source) {
|
|
2233
|
+
if (source === void 0 || source === null || source === "") {
|
|
2234
|
+
return void 0;
|
|
2010
2235
|
}
|
|
2011
|
-
if (
|
|
2012
|
-
return
|
|
2236
|
+
if (typeof source === "boolean") {
|
|
2237
|
+
return source;
|
|
2013
2238
|
}
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2239
|
+
if (typeof source === "string") {
|
|
2240
|
+
const lowered = source.trim().toLowerCase();
|
|
2241
|
+
if (lowered === "true" || lowered === "1") {
|
|
2242
|
+
return true;
|
|
2243
|
+
}
|
|
2244
|
+
if (lowered === "false" || lowered === "0") {
|
|
2245
|
+
return false;
|
|
2021
2246
|
}
|
|
2022
2247
|
}
|
|
2023
|
-
|
|
2248
|
+
throw new Error("expected boolean value");
|
|
2024
2249
|
}
|
|
2025
|
-
function
|
|
2026
|
-
|
|
2027
|
-
return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
|
|
2250
|
+
function isLikelyEnvReference(value) {
|
|
2251
|
+
return /^[A-Z0-9_]+$/.test(value);
|
|
2028
2252
|
}
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
return [];
|
|
2253
|
+
function resolveOptionalStringArray(source, env, description) {
|
|
2254
|
+
if (source === void 0 || source === null) {
|
|
2255
|
+
return void 0;
|
|
2033
2256
|
}
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
}
|
|
2037
|
-
function parseCodexJson(output) {
|
|
2038
|
-
const trimmed = output.trim();
|
|
2039
|
-
if (trimmed.length === 0) {
|
|
2040
|
-
throw new Error("Codex CLI produced no output in --json mode");
|
|
2257
|
+
if (!Array.isArray(source)) {
|
|
2258
|
+
throw new Error(`${description} must be an array of strings`);
|
|
2041
2259
|
}
|
|
2042
|
-
|
|
2043
|
-
return
|
|
2044
|
-
}
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2260
|
+
if (source.length === 0) {
|
|
2261
|
+
return void 0;
|
|
2262
|
+
}
|
|
2263
|
+
const resolved = [];
|
|
2264
|
+
for (let i = 0; i < source.length; i++) {
|
|
2265
|
+
const item = source[i];
|
|
2266
|
+
if (typeof item !== "string") {
|
|
2267
|
+
throw new Error(`${description}[${i}] must be a string`);
|
|
2048
2268
|
}
|
|
2049
|
-
const
|
|
2050
|
-
if (
|
|
2051
|
-
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2269
|
+
const trimmed = item.trim();
|
|
2270
|
+
if (trimmed.length === 0) {
|
|
2271
|
+
throw new Error(`${description}[${i}] cannot be empty`);
|
|
2272
|
+
}
|
|
2273
|
+
const envValue = env[trimmed];
|
|
2274
|
+
if (envValue !== void 0) {
|
|
2275
|
+
if (envValue.trim().length === 0) {
|
|
2276
|
+
throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
|
|
2055
2277
|
}
|
|
2278
|
+
resolved.push(envValue);
|
|
2279
|
+
} else {
|
|
2280
|
+
resolved.push(trimmed);
|
|
2056
2281
|
}
|
|
2057
|
-
const preview = trimmed.slice(0, 200);
|
|
2058
|
-
throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
|
|
2059
2282
|
}
|
|
2283
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
2060
2284
|
}
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
|
|
2070
|
-
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
2285
|
+
|
|
2286
|
+
// src/evaluation/providers/vscode.ts
|
|
2287
|
+
import { readFile as readFile2 } from "node:fs/promises";
|
|
2288
|
+
import path5 from "node:path";
|
|
2289
|
+
import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
|
|
2290
|
+
var VSCodeProvider = class {
|
|
2291
|
+
id;
|
|
2292
|
+
kind;
|
|
2293
|
+
targetName;
|
|
2294
|
+
supportsBatch = true;
|
|
2295
|
+
config;
|
|
2296
|
+
constructor(targetName, config, kind) {
|
|
2297
|
+
this.id = `${kind}:${targetName}`;
|
|
2298
|
+
this.kind = kind;
|
|
2299
|
+
this.targetName = targetName;
|
|
2300
|
+
this.config = config;
|
|
2075
2301
|
}
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
|
|
2302
|
+
async invoke(request) {
|
|
2303
|
+
if (request.signal?.aborted) {
|
|
2304
|
+
throw new Error("VS Code provider request was aborted before dispatch");
|
|
2305
|
+
}
|
|
2306
|
+
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
2307
|
+
const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
|
|
2308
|
+
const session = await dispatchAgentSession({
|
|
2309
|
+
userQuery: promptContent,
|
|
2310
|
+
extraAttachments: inputFiles,
|
|
2311
|
+
wait: this.config.waitForResponse,
|
|
2312
|
+
dryRun: this.config.dryRun,
|
|
2313
|
+
vscodeCmd: this.config.command,
|
|
2314
|
+
subagentRoot: this.config.subagentRoot,
|
|
2315
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
2316
|
+
silent: true
|
|
2317
|
+
});
|
|
2318
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
2319
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
2320
|
+
throw new Error(failure);
|
|
2321
|
+
}
|
|
2322
|
+
if (this.config.dryRun) {
|
|
2323
|
+
return {
|
|
2324
|
+
text: "",
|
|
2325
|
+
raw: {
|
|
2326
|
+
session,
|
|
2327
|
+
inputFiles
|
|
2328
|
+
}
|
|
2329
|
+
};
|
|
2092
2330
|
}
|
|
2331
|
+
const responseText = await readFile2(session.responseFile, "utf8");
|
|
2332
|
+
return {
|
|
2333
|
+
text: responseText,
|
|
2334
|
+
raw: {
|
|
2335
|
+
session,
|
|
2336
|
+
inputFiles
|
|
2337
|
+
}
|
|
2338
|
+
};
|
|
2093
2339
|
}
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
2098
|
-
|
|
2099
|
-
|
|
2340
|
+
async invokeBatch(requests) {
|
|
2341
|
+
if (requests.length === 0) {
|
|
2342
|
+
return [];
|
|
2343
|
+
}
|
|
2344
|
+
const normalizedRequests = requests.map((req) => ({
|
|
2345
|
+
request: req,
|
|
2346
|
+
inputFiles: normalizeAttachments(req.inputFiles)
|
|
2347
|
+
}));
|
|
2348
|
+
const combinedInputFiles = mergeAttachments(
|
|
2349
|
+
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
2350
|
+
);
|
|
2351
|
+
const userQueries = normalizedRequests.map(
|
|
2352
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
|
|
2353
|
+
);
|
|
2354
|
+
const session = await dispatchBatchAgent({
|
|
2355
|
+
userQueries,
|
|
2356
|
+
extraAttachments: combinedInputFiles,
|
|
2357
|
+
wait: this.config.waitForResponse,
|
|
2358
|
+
dryRun: this.config.dryRun,
|
|
2359
|
+
vscodeCmd: this.config.command,
|
|
2360
|
+
subagentRoot: this.config.subagentRoot,
|
|
2361
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
2362
|
+
silent: true
|
|
2363
|
+
});
|
|
2364
|
+
if (session.exitCode !== 0 || !session.responseFiles) {
|
|
2365
|
+
const failure = session.error ?? "VS Code subagent did not produce batch responses";
|
|
2366
|
+
throw new Error(failure);
|
|
2367
|
+
}
|
|
2368
|
+
if (this.config.dryRun) {
|
|
2369
|
+
return normalizedRequests.map(({ inputFiles }) => ({
|
|
2370
|
+
text: "",
|
|
2371
|
+
raw: {
|
|
2372
|
+
session,
|
|
2373
|
+
inputFiles,
|
|
2374
|
+
allInputFiles: combinedInputFiles
|
|
2375
|
+
}
|
|
2376
|
+
}));
|
|
2377
|
+
}
|
|
2378
|
+
if (session.responseFiles.length !== requests.length) {
|
|
2379
|
+
throw new Error(
|
|
2380
|
+
`VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
|
|
2381
|
+
);
|
|
2100
2382
|
}
|
|
2101
|
-
|
|
2102
|
-
|
|
2103
|
-
|
|
2104
|
-
|
|
2105
|
-
|
|
2106
|
-
|
|
2107
|
-
|
|
2108
|
-
|
|
2109
|
-
|
|
2110
|
-
|
|
2111
|
-
|
|
2112
|
-
|
|
2113
|
-
if (text) {
|
|
2114
|
-
return text;
|
|
2383
|
+
const responses = [];
|
|
2384
|
+
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
2385
|
+
const responseText = await readFile2(responseFile, "utf8");
|
|
2386
|
+
responses.push({
|
|
2387
|
+
text: responseText,
|
|
2388
|
+
raw: {
|
|
2389
|
+
session,
|
|
2390
|
+
inputFiles: normalizedRequests[index]?.inputFiles,
|
|
2391
|
+
allInputFiles: combinedInputFiles,
|
|
2392
|
+
responseFile
|
|
2393
|
+
}
|
|
2394
|
+
});
|
|
2115
2395
|
}
|
|
2396
|
+
return responses;
|
|
2116
2397
|
}
|
|
2117
|
-
|
|
2398
|
+
};
|
|
2399
|
+
function buildPromptDocument2(request, attachments, guidelinePatterns) {
|
|
2400
|
+
const parts = [];
|
|
2401
|
+
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
2402
|
+
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
2403
|
+
const nonGuidelineAttachments = attachmentFiles.filter(
|
|
2404
|
+
(file) => !guidelineFiles.includes(file)
|
|
2405
|
+
);
|
|
2406
|
+
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
2407
|
+
if (prereadBlock.length > 0) {
|
|
2408
|
+
parts.push("\n", prereadBlock);
|
|
2409
|
+
}
|
|
2410
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
2411
|
+
return parts.join("\n").trim();
|
|
2118
2412
|
}
|
|
2119
|
-
function
|
|
2120
|
-
if (
|
|
2121
|
-
return
|
|
2413
|
+
function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
2414
|
+
if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
|
|
2415
|
+
return "";
|
|
2122
2416
|
}
|
|
2123
|
-
const
|
|
2124
|
-
|
|
2125
|
-
|
|
2126
|
-
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2417
|
+
const buildList = (files) => files.map((absolutePath) => {
|
|
2418
|
+
const fileName = path5.basename(absolutePath);
|
|
2419
|
+
const fileUri = pathToFileUri2(absolutePath);
|
|
2420
|
+
return `* [${fileName}](${fileUri})`;
|
|
2421
|
+
});
|
|
2422
|
+
const sections = [];
|
|
2423
|
+
if (guidelineFiles.length > 0) {
|
|
2424
|
+
sections.push(`Read all guideline files:
|
|
2425
|
+
${buildList(guidelineFiles).join("\n")}.`);
|
|
2131
2426
|
}
|
|
2132
|
-
|
|
2133
|
-
|
|
2134
|
-
|
|
2135
|
-
return flattened;
|
|
2427
|
+
if (attachmentFiles.length > 0) {
|
|
2428
|
+
sections.push(`Read all attachment files:
|
|
2429
|
+
${buildList(attachmentFiles).join("\n")}.`);
|
|
2136
2430
|
}
|
|
2137
|
-
|
|
2431
|
+
sections.push(
|
|
2432
|
+
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
2433
|
+
"Then apply system_instructions on the user query below."
|
|
2434
|
+
);
|
|
2435
|
+
return sections.join("\n");
|
|
2138
2436
|
}
|
|
2139
|
-
function
|
|
2140
|
-
if (!
|
|
2141
|
-
return
|
|
2437
|
+
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
2438
|
+
if (!attachments || attachments.length === 0) {
|
|
2439
|
+
return [];
|
|
2142
2440
|
}
|
|
2143
|
-
const
|
|
2144
|
-
const
|
|
2145
|
-
|
|
2146
|
-
const
|
|
2147
|
-
if (
|
|
2148
|
-
|
|
2441
|
+
const unique = /* @__PURE__ */ new Map();
|
|
2442
|
+
for (const attachment of attachments) {
|
|
2443
|
+
const absolutePath = path5.resolve(attachment);
|
|
2444
|
+
const normalized = absolutePath.split(path5.sep).join("/");
|
|
2445
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
2446
|
+
if (!unique.has(absolutePath)) {
|
|
2447
|
+
unique.set(absolutePath, absolutePath);
|
|
2448
|
+
}
|
|
2149
2449
|
}
|
|
2150
2450
|
}
|
|
2151
|
-
return
|
|
2451
|
+
return Array.from(unique.values());
|
|
2152
2452
|
}
|
|
2153
|
-
function
|
|
2154
|
-
if (
|
|
2155
|
-
return
|
|
2453
|
+
function collectAttachmentFiles(attachments) {
|
|
2454
|
+
if (!attachments || attachments.length === 0) {
|
|
2455
|
+
return [];
|
|
2156
2456
|
}
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
|
|
2162
|
-
|
|
2163
|
-
const text = segment.text;
|
|
2164
|
-
return typeof text === "string" ? text : void 0;
|
|
2165
|
-
}
|
|
2166
|
-
return void 0;
|
|
2167
|
-
}).filter((part) => typeof part === "string" && part.length > 0);
|
|
2168
|
-
return parts.length > 0 ? parts.join(" \n") : void 0;
|
|
2457
|
+
const unique = /* @__PURE__ */ new Map();
|
|
2458
|
+
for (const attachment of attachments) {
|
|
2459
|
+
const absolutePath = path5.resolve(attachment);
|
|
2460
|
+
if (!unique.has(absolutePath)) {
|
|
2461
|
+
unique.set(absolutePath, absolutePath);
|
|
2462
|
+
}
|
|
2169
2463
|
}
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2464
|
+
return Array.from(unique.values());
|
|
2465
|
+
}
|
|
2466
|
+
function pathToFileUri2(filePath) {
|
|
2467
|
+
const absolutePath = path5.isAbsolute(filePath) ? filePath : path5.resolve(filePath);
|
|
2468
|
+
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
2469
|
+
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
2470
|
+
return `file:///${normalizedPath}`;
|
|
2173
2471
|
}
|
|
2174
|
-
return
|
|
2472
|
+
return `file://${normalizedPath}`;
|
|
2175
2473
|
}
|
|
2176
|
-
function
|
|
2177
|
-
|
|
2178
|
-
if (lines.length <= 1) {
|
|
2474
|
+
function normalizeAttachments(attachments) {
|
|
2475
|
+
if (!attachments || attachments.length === 0) {
|
|
2179
2476
|
return void 0;
|
|
2180
2477
|
}
|
|
2181
|
-
const
|
|
2182
|
-
for (const
|
|
2183
|
-
|
|
2184
|
-
parsed.push(JSON.parse(line));
|
|
2185
|
-
} catch {
|
|
2186
|
-
return void 0;
|
|
2187
|
-
}
|
|
2188
|
-
}
|
|
2189
|
-
return parsed;
|
|
2190
|
-
}
|
|
2191
|
-
function pickDetail(stderr, stdout) {
|
|
2192
|
-
const errorText = stderr.trim();
|
|
2193
|
-
if (errorText.length > 0) {
|
|
2194
|
-
return errorText;
|
|
2478
|
+
const deduped = /* @__PURE__ */ new Set();
|
|
2479
|
+
for (const attachment of attachments) {
|
|
2480
|
+
deduped.add(path5.resolve(attachment));
|
|
2195
2481
|
}
|
|
2196
|
-
|
|
2197
|
-
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
2482
|
+
return Array.from(deduped);
|
|
2198
2483
|
}
|
|
2199
|
-
function
|
|
2200
|
-
|
|
2201
|
-
|
|
2484
|
+
function mergeAttachments(all) {
|
|
2485
|
+
const deduped = /* @__PURE__ */ new Set();
|
|
2486
|
+
for (const list of all) {
|
|
2487
|
+
if (!list) continue;
|
|
2488
|
+
for (const inputFile of list) {
|
|
2489
|
+
deduped.add(path5.resolve(inputFile));
|
|
2490
|
+
}
|
|
2202
2491
|
}
|
|
2203
|
-
|
|
2204
|
-
return ` after ${seconds}s`;
|
|
2492
|
+
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
2205
2493
|
}
|
|
2206
|
-
async function
|
|
2207
|
-
|
|
2208
|
-
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
});
|
|
2214
|
-
let stdout = "";
|
|
2215
|
-
let stderr = "";
|
|
2216
|
-
let timedOut = false;
|
|
2217
|
-
const onAbort = () => {
|
|
2218
|
-
child.kill("SIGTERM");
|
|
2219
|
-
};
|
|
2220
|
-
if (options.signal) {
|
|
2221
|
-
if (options.signal.aborted) {
|
|
2222
|
-
onAbort();
|
|
2223
|
-
} else {
|
|
2224
|
-
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
2225
|
-
}
|
|
2226
|
-
}
|
|
2227
|
-
let timeoutHandle;
|
|
2228
|
-
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
2229
|
-
timeoutHandle = setTimeout(() => {
|
|
2230
|
-
timedOut = true;
|
|
2231
|
-
child.kill("SIGTERM");
|
|
2232
|
-
}, options.timeoutMs);
|
|
2233
|
-
timeoutHandle.unref?.();
|
|
2494
|
+
async function ensureVSCodeSubagents(options) {
|
|
2495
|
+
const { kind, count, verbose = false } = options;
|
|
2496
|
+
const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
|
|
2497
|
+
const subagentRoot = getSubagentRoot(vscodeCmd);
|
|
2498
|
+
try {
|
|
2499
|
+
if (verbose) {
|
|
2500
|
+
console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
|
|
2234
2501
|
}
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
child.stderr.setEncoding("utf8");
|
|
2240
|
-
child.stderr.on("data", (chunk) => {
|
|
2241
|
-
stderr += chunk;
|
|
2502
|
+
const result = await provisionSubagents({
|
|
2503
|
+
targetRoot: subagentRoot,
|
|
2504
|
+
subagents: count,
|
|
2505
|
+
dryRun: false
|
|
2242
2506
|
});
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
clearTimeout(timeoutHandle);
|
|
2507
|
+
if (verbose) {
|
|
2508
|
+
if (result.created.length > 0) {
|
|
2509
|
+
console.log(`Created ${result.created.length} new subagent(s)`);
|
|
2247
2510
|
}
|
|
2248
|
-
if (
|
|
2249
|
-
|
|
2511
|
+
if (result.skippedExisting.length > 0) {
|
|
2512
|
+
console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
|
|
2250
2513
|
}
|
|
2514
|
+
console.log(`
|
|
2515
|
+
total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
|
|
2516
|
+
}
|
|
2517
|
+
return {
|
|
2518
|
+
provisioned: true,
|
|
2519
|
+
message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
|
|
2520
|
+
};
|
|
2521
|
+
} catch (error) {
|
|
2522
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2523
|
+
if (verbose) {
|
|
2524
|
+
console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
|
|
2525
|
+
}
|
|
2526
|
+
return {
|
|
2527
|
+
provisioned: false,
|
|
2528
|
+
message: `Provisioning failed: ${errorMessage}`
|
|
2251
2529
|
};
|
|
2252
|
-
child.on("error", (error) => {
|
|
2253
|
-
cleanup();
|
|
2254
|
-
reject(error);
|
|
2255
|
-
});
|
|
2256
|
-
child.on("close", (code) => {
|
|
2257
|
-
cleanup();
|
|
2258
|
-
resolve({
|
|
2259
|
-
stdout,
|
|
2260
|
-
stderr,
|
|
2261
|
-
exitCode: typeof code === "number" ? code : -1,
|
|
2262
|
-
timedOut
|
|
2263
|
-
});
|
|
2264
|
-
});
|
|
2265
|
-
});
|
|
2266
|
-
}
|
|
2267
|
-
function shouldShellExecute(executable) {
|
|
2268
|
-
if (process.platform !== "win32") {
|
|
2269
|
-
return false;
|
|
2270
2530
|
}
|
|
2271
|
-
const lower = executable.toLowerCase();
|
|
2272
|
-
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
2273
2531
|
}
|
|
2274
2532
|
|
|
2275
2533
|
// src/evaluation/providers/targets-file.ts
|
|
@@ -2386,7 +2644,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
2386
2644
|
}
|
|
2387
2645
|
|
|
2388
2646
|
// src/evaluation/evaluators.ts
|
|
2389
|
-
import { randomUUID } from "node:crypto";
|
|
2647
|
+
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
2390
2648
|
var LlmJudgeEvaluator = class {
|
|
2391
2649
|
kind = "llm_judge";
|
|
2392
2650
|
resolveJudgeProvider;
|
|
@@ -2424,7 +2682,7 @@ var LlmJudgeEvaluator = class {
|
|
|
2424
2682
|
const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
|
|
2425
2683
|
const reasoning = parsed.reasoning ?? response.reasoning;
|
|
2426
2684
|
const evaluatorRawRequest = {
|
|
2427
|
-
id:
|
|
2685
|
+
id: randomUUID2(),
|
|
2428
2686
|
provider: judgeProvider.id,
|
|
2429
2687
|
prompt,
|
|
2430
2688
|
target: context.target.name,
|
|
@@ -2663,7 +2921,7 @@ function parseJsonSafe(payload) {
|
|
|
2663
2921
|
}
|
|
2664
2922
|
|
|
2665
2923
|
// src/evaluation/orchestrator.ts
|
|
2666
|
-
import { createHash, randomUUID as
|
|
2924
|
+
import { createHash, randomUUID as randomUUID3 } from "node:crypto";
|
|
2667
2925
|
import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
|
|
2668
2926
|
import path7 from "node:path";
|
|
2669
2927
|
|
|
@@ -3436,7 +3694,7 @@ function sanitizeFilename(value) {
|
|
|
3436
3694
|
return "prompt";
|
|
3437
3695
|
}
|
|
3438
3696
|
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
3439
|
-
return sanitized.length > 0 ? sanitized :
|
|
3697
|
+
return sanitized.length > 0 ? sanitized : randomUUID3();
|
|
3440
3698
|
}
|
|
3441
3699
|
async function invokeProvider(provider, options) {
|
|
3442
3700
|
const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
|