@agentv/core 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1401 -1143
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +1403 -1145
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1112,1323 +1112,1581 @@ function formatTimeoutSuffix(timeoutMs) {
|
|
|
1112
1112
|
return ` after ${seconds}s`;
|
|
1113
1113
|
}
|
|
1114
1114
|
|
|
1115
|
-
// src/evaluation/providers/
|
|
1116
|
-
var
|
|
1117
|
-
var
|
|
1115
|
+
// src/evaluation/providers/codex.ts
|
|
1116
|
+
var import_node_child_process2 = require("child_process");
|
|
1117
|
+
var import_node_crypto = require("crypto");
|
|
1118
|
+
var import_node_fs3 = require("fs");
|
|
1119
|
+
var import_promises3 = require("fs/promises");
|
|
1120
|
+
var import_node_os = require("os");
|
|
1121
|
+
var import_node_path5 = __toESM(require("path"), 1);
|
|
1122
|
+
var import_node_util2 = require("util");
|
|
1123
|
+
|
|
1124
|
+
// src/evaluation/providers/preread.ts
|
|
1125
|
+
var import_node_path4 = __toESM(require("path"), 1);
|
|
1126
|
+
function buildPromptDocument(request, inputFiles, options) {
|
|
1127
|
+
const parts = [];
|
|
1128
|
+
const guidelineFiles = collectGuidelineFiles(
|
|
1129
|
+
inputFiles,
|
|
1130
|
+
options?.guidelinePatterns ?? request.guideline_patterns,
|
|
1131
|
+
options?.guidelineOverrides
|
|
1132
|
+
);
|
|
1133
|
+
const inputFilesList = collectInputFiles(inputFiles);
|
|
1134
|
+
const nonGuidelineInputFiles = inputFilesList.filter(
|
|
1135
|
+
(file) => !guidelineFiles.includes(file)
|
|
1136
|
+
);
|
|
1137
|
+
const prereadBlock = buildMandatoryPrereadBlock(guidelineFiles, nonGuidelineInputFiles);
|
|
1138
|
+
if (prereadBlock.length > 0) {
|
|
1139
|
+
parts.push("\n", prereadBlock);
|
|
1140
|
+
}
|
|
1141
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
1142
|
+
return parts.join("\n").trim();
|
|
1143
|
+
}
|
|
1144
|
+
function normalizeInputFiles2(inputFiles) {
|
|
1145
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1146
|
+
return void 0;
|
|
1147
|
+
}
|
|
1148
|
+
const deduped = /* @__PURE__ */ new Map();
|
|
1149
|
+
for (const inputFile of inputFiles) {
|
|
1150
|
+
const absolutePath = import_node_path4.default.resolve(inputFile);
|
|
1151
|
+
if (!deduped.has(absolutePath)) {
|
|
1152
|
+
deduped.set(absolutePath, absolutePath);
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
return Array.from(deduped.values());
|
|
1156
|
+
}
|
|
1157
|
+
function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
|
|
1158
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1159
|
+
return [];
|
|
1160
|
+
}
|
|
1161
|
+
const unique = /* @__PURE__ */ new Map();
|
|
1162
|
+
for (const inputFile of inputFiles) {
|
|
1163
|
+
const absolutePath = import_node_path4.default.resolve(inputFile);
|
|
1164
|
+
if (overrides?.has(absolutePath)) {
|
|
1165
|
+
if (!unique.has(absolutePath)) {
|
|
1166
|
+
unique.set(absolutePath, absolutePath);
|
|
1167
|
+
}
|
|
1168
|
+
continue;
|
|
1169
|
+
}
|
|
1170
|
+
const normalized = absolutePath.split(import_node_path4.default.sep).join("/");
|
|
1171
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
1172
|
+
if (!unique.has(absolutePath)) {
|
|
1173
|
+
unique.set(absolutePath, absolutePath);
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
return Array.from(unique.values());
|
|
1178
|
+
}
|
|
1179
|
+
function collectInputFiles(inputFiles) {
|
|
1180
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1181
|
+
return [];
|
|
1182
|
+
}
|
|
1183
|
+
const unique = /* @__PURE__ */ new Map();
|
|
1184
|
+
for (const inputFile of inputFiles) {
|
|
1185
|
+
const absolutePath = import_node_path4.default.resolve(inputFile);
|
|
1186
|
+
if (!unique.has(absolutePath)) {
|
|
1187
|
+
unique.set(absolutePath, absolutePath);
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
return Array.from(unique.values());
|
|
1191
|
+
}
|
|
1192
|
+
function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
|
|
1193
|
+
if (guidelineFiles.length === 0 && inputFiles.length === 0) {
|
|
1194
|
+
return "";
|
|
1195
|
+
}
|
|
1196
|
+
const buildList = (files) => files.map((absolutePath) => {
|
|
1197
|
+
const fileName = import_node_path4.default.basename(absolutePath);
|
|
1198
|
+
const fileUri = pathToFileUri(absolutePath);
|
|
1199
|
+
return `* [${fileName}](${fileUri})`;
|
|
1200
|
+
});
|
|
1201
|
+
const sections = [];
|
|
1202
|
+
if (guidelineFiles.length > 0) {
|
|
1203
|
+
sections.push(`Read all guideline files:
|
|
1204
|
+
${buildList(guidelineFiles).join("\n")}.`);
|
|
1205
|
+
}
|
|
1206
|
+
if (inputFiles.length > 0) {
|
|
1207
|
+
sections.push(`Read all input files:
|
|
1208
|
+
${buildList(inputFiles).join("\n")}.`);
|
|
1209
|
+
}
|
|
1210
|
+
sections.push(
|
|
1211
|
+
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
1212
|
+
"Then apply system_instructions on the user query below."
|
|
1213
|
+
);
|
|
1214
|
+
return sections.join("\n");
|
|
1215
|
+
}
|
|
1216
|
+
function pathToFileUri(filePath) {
|
|
1217
|
+
const absolutePath = import_node_path4.default.isAbsolute(filePath) ? filePath : import_node_path4.default.resolve(filePath);
|
|
1218
|
+
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
1219
|
+
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
1220
|
+
return `file:///${normalizedPath}`;
|
|
1221
|
+
}
|
|
1222
|
+
return `file://${normalizedPath}`;
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
// src/evaluation/providers/codex.ts
|
|
1226
|
+
var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
|
|
1227
|
+
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1228
|
+
var PROMPT_FILENAME = "prompt.md";
|
|
1229
|
+
var FILES_DIR = "files";
|
|
1230
|
+
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1231
|
+
var CodexProvider = class {
|
|
1118
1232
|
id;
|
|
1119
|
-
kind = "
|
|
1233
|
+
kind = "codex";
|
|
1120
1234
|
targetName;
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1235
|
+
supportsBatch = false;
|
|
1236
|
+
config;
|
|
1237
|
+
runCodex;
|
|
1238
|
+
environmentCheck;
|
|
1239
|
+
resolvedExecutable;
|
|
1240
|
+
constructor(targetName, config, runner = defaultCodexRunner) {
|
|
1241
|
+
this.id = `codex:${targetName}`;
|
|
1127
1242
|
this.targetName = targetName;
|
|
1128
|
-
this.
|
|
1129
|
-
this.
|
|
1130
|
-
this.delayMinMs = config.delayMinMs ?? 0;
|
|
1131
|
-
this.delayMaxMs = config.delayMaxMs ?? 0;
|
|
1243
|
+
this.config = config;
|
|
1244
|
+
this.runCodex = runner;
|
|
1132
1245
|
}
|
|
1133
1246
|
async invoke(request) {
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1247
|
+
if (request.signal?.aborted) {
|
|
1248
|
+
throw new Error("Codex provider request was aborted before execution");
|
|
1137
1249
|
}
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1250
|
+
await this.ensureEnvironmentReady();
|
|
1251
|
+
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
1252
|
+
const originalGuidelines = new Set(
|
|
1253
|
+
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
|
|
1254
|
+
);
|
|
1255
|
+
const workspaceRoot = await this.createWorkspace();
|
|
1256
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
1257
|
+
try {
|
|
1258
|
+
const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
|
|
1259
|
+
inputFiles,
|
|
1260
|
+
workspaceRoot,
|
|
1261
|
+
originalGuidelines
|
|
1262
|
+
);
|
|
1263
|
+
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
1264
|
+
guidelinePatterns: request.guideline_patterns,
|
|
1265
|
+
guidelineOverrides: guidelineMirrors
|
|
1266
|
+
});
|
|
1267
|
+
const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
1268
|
+
await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
|
|
1269
|
+
const args = this.buildCodexArgs();
|
|
1270
|
+
const cwd = this.resolveCwd(workspaceRoot);
|
|
1271
|
+
const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
|
|
1272
|
+
if (result.timedOut) {
|
|
1273
|
+
throw new Error(
|
|
1274
|
+
`Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
|
|
1275
|
+
);
|
|
1143
1276
|
}
|
|
1144
|
-
|
|
1277
|
+
if (result.exitCode !== 0) {
|
|
1278
|
+
const detail = pickDetail(result.stderr, result.stdout);
|
|
1279
|
+
const prefix = `Codex CLI exited with code ${result.exitCode}`;
|
|
1280
|
+
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
1281
|
+
}
|
|
1282
|
+
const parsed = parseCodexJson(result.stdout);
|
|
1283
|
+
const assistantText = extractAssistantText(parsed);
|
|
1284
|
+
return {
|
|
1285
|
+
text: assistantText,
|
|
1286
|
+
raw: {
|
|
1287
|
+
response: parsed,
|
|
1288
|
+
stdout: result.stdout,
|
|
1289
|
+
stderr: result.stderr,
|
|
1290
|
+
exitCode: result.exitCode,
|
|
1291
|
+
args,
|
|
1292
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1293
|
+
promptFile,
|
|
1294
|
+
workspace: workspaceRoot,
|
|
1295
|
+
inputFiles: mirroredInputFiles,
|
|
1296
|
+
logFile: logger?.filePath
|
|
1297
|
+
}
|
|
1298
|
+
};
|
|
1299
|
+
} finally {
|
|
1300
|
+
await logger?.close();
|
|
1301
|
+
await this.cleanupWorkspace(workspaceRoot);
|
|
1302
|
+
}
|
|
1145
1303
|
}
|
|
1146
|
-
|
|
1147
|
-
if (this.
|
|
1148
|
-
|
|
1149
|
-
const max = Math.max(min, this.delayMaxMs);
|
|
1150
|
-
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
1304
|
+
async ensureEnvironmentReady() {
|
|
1305
|
+
if (!this.environmentCheck) {
|
|
1306
|
+
this.environmentCheck = this.validateEnvironment();
|
|
1151
1307
|
}
|
|
1152
|
-
|
|
1308
|
+
await this.environmentCheck;
|
|
1153
1309
|
}
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
// src/evaluation/providers/targets.ts
|
|
1157
|
-
var import_zod = require("zod");
|
|
1158
|
-
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
1159
|
-
var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
1160
|
-
name: import_zod.z.string().min(1, "target name is required"),
|
|
1161
|
-
provider: import_zod.z.string().min(1, "provider is required"),
|
|
1162
|
-
settings: import_zod.z.record(import_zod.z.unknown()).optional(),
|
|
1163
|
-
judge_target: import_zod.z.string().optional(),
|
|
1164
|
-
workers: import_zod.z.number().int().min(1).optional()
|
|
1165
|
-
});
|
|
1166
|
-
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
1167
|
-
function normalizeAzureApiVersion(value) {
|
|
1168
|
-
if (!value) {
|
|
1169
|
-
return DEFAULT_AZURE_API_VERSION;
|
|
1310
|
+
async validateEnvironment() {
|
|
1311
|
+
this.resolvedExecutable = await locateExecutable(this.config.executable);
|
|
1170
1312
|
}
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1313
|
+
resolveCwd(workspaceRoot) {
|
|
1314
|
+
if (!this.config.cwd) {
|
|
1315
|
+
return workspaceRoot;
|
|
1316
|
+
}
|
|
1317
|
+
return import_node_path5.default.resolve(this.config.cwd);
|
|
1174
1318
|
}
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
)
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
kind: "gemini",
|
|
1209
|
-
name: parsed.name,
|
|
1210
|
-
judgeTarget: parsed.judge_target,
|
|
1211
|
-
workers: parsed.workers,
|
|
1212
|
-
providerBatching,
|
|
1213
|
-
config: resolveGeminiConfig(parsed, env)
|
|
1214
|
-
};
|
|
1215
|
-
case "codex":
|
|
1216
|
-
case "codex-cli":
|
|
1217
|
-
return {
|
|
1218
|
-
kind: "codex",
|
|
1219
|
-
name: parsed.name,
|
|
1220
|
-
judgeTarget: parsed.judge_target,
|
|
1221
|
-
workers: parsed.workers,
|
|
1222
|
-
providerBatching,
|
|
1223
|
-
config: resolveCodexConfig(parsed, env)
|
|
1224
|
-
};
|
|
1225
|
-
case "mock":
|
|
1226
|
-
return {
|
|
1227
|
-
kind: "mock",
|
|
1228
|
-
name: parsed.name,
|
|
1229
|
-
judgeTarget: parsed.judge_target,
|
|
1230
|
-
workers: parsed.workers,
|
|
1231
|
-
providerBatching,
|
|
1232
|
-
config: resolveMockConfig(parsed)
|
|
1233
|
-
};
|
|
1234
|
-
case "vscode":
|
|
1235
|
-
case "vscode-insiders":
|
|
1236
|
-
return {
|
|
1237
|
-
kind: provider,
|
|
1238
|
-
name: parsed.name,
|
|
1239
|
-
judgeTarget: parsed.judge_target,
|
|
1240
|
-
workers: parsed.workers,
|
|
1241
|
-
providerBatching,
|
|
1242
|
-
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
1243
|
-
};
|
|
1244
|
-
case "cli":
|
|
1319
|
+
buildCodexArgs() {
|
|
1320
|
+
const args = ["--ask-for-approval", "never", "exec", "--json", "--color", "never", "--skip-git-repo-check"];
|
|
1321
|
+
if (this.config.args && this.config.args.length > 0) {
|
|
1322
|
+
args.push(...this.config.args);
|
|
1323
|
+
}
|
|
1324
|
+
args.push("-");
|
|
1325
|
+
return args;
|
|
1326
|
+
}
|
|
1327
|
+
async executeCodex(args, cwd, promptContent, signal, logger) {
|
|
1328
|
+
try {
|
|
1329
|
+
return await this.runCodex({
|
|
1330
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1331
|
+
args,
|
|
1332
|
+
cwd,
|
|
1333
|
+
prompt: promptContent,
|
|
1334
|
+
timeoutMs: this.config.timeoutMs,
|
|
1335
|
+
env: process.env,
|
|
1336
|
+
signal,
|
|
1337
|
+
onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
|
|
1338
|
+
onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
|
|
1339
|
+
});
|
|
1340
|
+
} catch (error) {
|
|
1341
|
+
const err = error;
|
|
1342
|
+
if (err.code === "ENOENT") {
|
|
1343
|
+
throw new Error(
|
|
1344
|
+
`Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
|
|
1345
|
+
);
|
|
1346
|
+
}
|
|
1347
|
+
throw error;
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
1351
|
+
if (!inputFiles || inputFiles.length === 0) {
|
|
1245
1352
|
return {
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
judgeTarget: parsed.judge_target,
|
|
1249
|
-
workers: parsed.workers,
|
|
1250
|
-
providerBatching,
|
|
1251
|
-
config: resolveCliConfig(parsed, env)
|
|
1353
|
+
mirroredInputFiles: void 0,
|
|
1354
|
+
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
1252
1355
|
};
|
|
1253
|
-
|
|
1254
|
-
|
|
1356
|
+
}
|
|
1357
|
+
const filesRoot = import_node_path5.default.join(workspaceRoot, FILES_DIR);
|
|
1358
|
+
await (0, import_promises3.mkdir)(filesRoot, { recursive: true });
|
|
1359
|
+
const mirrored = [];
|
|
1360
|
+
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
1361
|
+
const nameCounts = /* @__PURE__ */ new Map();
|
|
1362
|
+
for (const inputFile of inputFiles) {
|
|
1363
|
+
const absoluteSource = import_node_path5.default.resolve(inputFile);
|
|
1364
|
+
const baseName = import_node_path5.default.basename(absoluteSource);
|
|
1365
|
+
const count = nameCounts.get(baseName) ?? 0;
|
|
1366
|
+
nameCounts.set(baseName, count + 1);
|
|
1367
|
+
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
1368
|
+
const destination = import_node_path5.default.join(filesRoot, finalName);
|
|
1369
|
+
await (0, import_promises3.copyFile)(absoluteSource, destination);
|
|
1370
|
+
const resolvedDestination = import_node_path5.default.resolve(destination);
|
|
1371
|
+
mirrored.push(resolvedDestination);
|
|
1372
|
+
if (guidelineOriginals.has(absoluteSource)) {
|
|
1373
|
+
guidelineMirrors.add(resolvedDestination);
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
return {
|
|
1377
|
+
mirroredInputFiles: mirrored,
|
|
1378
|
+
guidelineMirrors
|
|
1379
|
+
};
|
|
1255
1380
|
}
|
|
1381
|
+
async createWorkspace() {
|
|
1382
|
+
return await (0, import_promises3.mkdtemp)(import_node_path5.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
|
|
1383
|
+
}
|
|
1384
|
+
async cleanupWorkspace(workspaceRoot) {
|
|
1385
|
+
try {
|
|
1386
|
+
await (0, import_promises3.rm)(workspaceRoot, { recursive: true, force: true });
|
|
1387
|
+
} catch {
|
|
1388
|
+
}
|
|
1389
|
+
}
|
|
1390
|
+
resolveLogDirectory() {
|
|
1391
|
+
const disabled = isCodexLogStreamingDisabled();
|
|
1392
|
+
if (disabled) {
|
|
1393
|
+
return void 0;
|
|
1394
|
+
}
|
|
1395
|
+
if (this.config.logDir) {
|
|
1396
|
+
return import_node_path5.default.resolve(this.config.logDir);
|
|
1397
|
+
}
|
|
1398
|
+
return import_node_path5.default.join(process.cwd(), ".agentv", "logs", "codex");
|
|
1399
|
+
}
|
|
1400
|
+
async createStreamLogger(request) {
|
|
1401
|
+
const logDir = this.resolveLogDirectory();
|
|
1402
|
+
if (!logDir) {
|
|
1403
|
+
return void 0;
|
|
1404
|
+
}
|
|
1405
|
+
try {
|
|
1406
|
+
await (0, import_promises3.mkdir)(logDir, { recursive: true });
|
|
1407
|
+
} catch (error) {
|
|
1408
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1409
|
+
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
1410
|
+
return void 0;
|
|
1411
|
+
}
|
|
1412
|
+
const filePath = import_node_path5.default.join(logDir, buildLogFilename(request, this.targetName));
|
|
1413
|
+
try {
|
|
1414
|
+
const logger = await CodexStreamLogger.create({
|
|
1415
|
+
filePath,
|
|
1416
|
+
targetName: this.targetName,
|
|
1417
|
+
evalCaseId: request.evalCaseId,
|
|
1418
|
+
attempt: request.attempt,
|
|
1419
|
+
format: this.config.logFormat ?? "summary"
|
|
1420
|
+
});
|
|
1421
|
+
console.log(`Streaming Codex CLI output to ${filePath}`);
|
|
1422
|
+
return logger;
|
|
1423
|
+
} catch (error) {
|
|
1424
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1425
|
+
console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
|
|
1426
|
+
return void 0;
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1429
|
+
};
|
|
1430
|
+
var CodexStreamLogger = class _CodexStreamLogger {
|
|
1431
|
+
filePath;
|
|
1432
|
+
stream;
|
|
1433
|
+
startedAt = Date.now();
|
|
1434
|
+
stdoutBuffer = "";
|
|
1435
|
+
stderrBuffer = "";
|
|
1436
|
+
format;
|
|
1437
|
+
constructor(filePath, format) {
|
|
1438
|
+
this.filePath = filePath;
|
|
1439
|
+
this.format = format;
|
|
1440
|
+
this.stream = (0, import_node_fs3.createWriteStream)(filePath, { flags: "a" });
|
|
1441
|
+
}
|
|
1442
|
+
static async create(options) {
|
|
1443
|
+
const logger = new _CodexStreamLogger(options.filePath, options.format);
|
|
1444
|
+
const header = [
|
|
1445
|
+
"# Codex CLI stream log",
|
|
1446
|
+
`# target: ${options.targetName}`,
|
|
1447
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
1448
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
1449
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
1450
|
+
""
|
|
1451
|
+
].filter((line) => Boolean(line));
|
|
1452
|
+
logger.writeLines(header);
|
|
1453
|
+
return logger;
|
|
1454
|
+
}
|
|
1455
|
+
handleStdoutChunk(chunk) {
|
|
1456
|
+
this.stdoutBuffer += chunk;
|
|
1457
|
+
this.flushBuffer("stdout");
|
|
1458
|
+
}
|
|
1459
|
+
handleStderrChunk(chunk) {
|
|
1460
|
+
this.stderrBuffer += chunk;
|
|
1461
|
+
this.flushBuffer("stderr");
|
|
1462
|
+
}
|
|
1463
|
+
async close() {
|
|
1464
|
+
this.flushBuffer("stdout");
|
|
1465
|
+
this.flushBuffer("stderr");
|
|
1466
|
+
this.flushRemainder();
|
|
1467
|
+
await new Promise((resolve, reject) => {
|
|
1468
|
+
this.stream.once("error", reject);
|
|
1469
|
+
this.stream.end(() => resolve());
|
|
1470
|
+
});
|
|
1471
|
+
}
|
|
1472
|
+
writeLines(lines) {
|
|
1473
|
+
for (const line of lines) {
|
|
1474
|
+
this.stream.write(`${line}
|
|
1475
|
+
`);
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
flushBuffer(source) {
|
|
1479
|
+
const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
|
|
1480
|
+
const lines = buffer.split(/\r?\n/);
|
|
1481
|
+
const remainder = lines.pop() ?? "";
|
|
1482
|
+
if (source === "stdout") {
|
|
1483
|
+
this.stdoutBuffer = remainder;
|
|
1484
|
+
} else {
|
|
1485
|
+
this.stderrBuffer = remainder;
|
|
1486
|
+
}
|
|
1487
|
+
for (const line of lines) {
|
|
1488
|
+
const formatted = this.formatLine(line, source);
|
|
1489
|
+
if (formatted) {
|
|
1490
|
+
this.stream.write(formatted);
|
|
1491
|
+
this.stream.write("\n");
|
|
1492
|
+
}
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
formatLine(rawLine, source) {
|
|
1496
|
+
const trimmed = rawLine.trim();
|
|
1497
|
+
if (trimmed.length === 0) {
|
|
1498
|
+
return void 0;
|
|
1499
|
+
}
|
|
1500
|
+
const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
|
|
1501
|
+
return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
|
|
1502
|
+
}
|
|
1503
|
+
flushRemainder() {
|
|
1504
|
+
const stdoutRemainder = this.stdoutBuffer.trim();
|
|
1505
|
+
if (stdoutRemainder.length > 0) {
|
|
1506
|
+
const formatted = this.formatLine(stdoutRemainder, "stdout");
|
|
1507
|
+
if (formatted) {
|
|
1508
|
+
this.stream.write(formatted);
|
|
1509
|
+
this.stream.write("\n");
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
const stderrRemainder = this.stderrBuffer.trim();
|
|
1513
|
+
if (stderrRemainder.length > 0) {
|
|
1514
|
+
const formatted = this.formatLine(stderrRemainder, "stderr");
|
|
1515
|
+
if (formatted) {
|
|
1516
|
+
this.stream.write(formatted);
|
|
1517
|
+
this.stream.write("\n");
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
this.stdoutBuffer = "";
|
|
1521
|
+
this.stderrBuffer = "";
|
|
1522
|
+
}
|
|
1523
|
+
};
|
|
1524
|
+
function isCodexLogStreamingDisabled() {
|
|
1525
|
+
const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
|
|
1526
|
+
if (!envValue) {
|
|
1527
|
+
return false;
|
|
1528
|
+
}
|
|
1529
|
+
const normalized = envValue.trim().toLowerCase();
|
|
1530
|
+
return normalized === "false" || normalized === "0" || normalized === "off";
|
|
1256
1531
|
}
|
|
1257
|
-
function
|
|
1258
|
-
const
|
|
1259
|
-
const
|
|
1260
|
-
const
|
|
1261
|
-
const
|
|
1262
|
-
|
|
1263
|
-
const temperatureSource = settings.temperature;
|
|
1264
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1265
|
-
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
1266
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
1267
|
-
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
1268
|
-
const version = normalizeAzureApiVersion(
|
|
1269
|
-
resolveOptionalString(versionSource, env, `${target.name} api version`)
|
|
1270
|
-
);
|
|
1271
|
-
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
1272
|
-
const maxOutputTokens = resolveOptionalNumber(
|
|
1273
|
-
maxTokensSource,
|
|
1274
|
-
`${target.name} max output tokens`
|
|
1275
|
-
);
|
|
1276
|
-
return {
|
|
1277
|
-
resourceName,
|
|
1278
|
-
deploymentName,
|
|
1279
|
-
apiKey,
|
|
1280
|
-
version,
|
|
1281
|
-
temperature,
|
|
1282
|
-
maxOutputTokens
|
|
1283
|
-
};
|
|
1284
|
-
}
|
|
1285
|
-
function resolveAnthropicConfig(target, env) {
|
|
1286
|
-
const settings = target.settings ?? {};
|
|
1287
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1288
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1289
|
-
const temperatureSource = settings.temperature;
|
|
1290
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1291
|
-
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
1292
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
1293
|
-
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
1294
|
-
return {
|
|
1295
|
-
apiKey,
|
|
1296
|
-
model,
|
|
1297
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1298
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
1299
|
-
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
1300
|
-
};
|
|
1301
|
-
}
|
|
1302
|
-
function resolveGeminiConfig(target, env) {
|
|
1303
|
-
const settings = target.settings ?? {};
|
|
1304
|
-
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
1305
|
-
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
1306
|
-
const temperatureSource = settings.temperature;
|
|
1307
|
-
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
1308
|
-
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
1309
|
-
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
1310
|
-
allowLiteral: true,
|
|
1311
|
-
optionalEnv: true
|
|
1312
|
-
}) ?? "gemini-2.5-flash";
|
|
1313
|
-
return {
|
|
1314
|
-
apiKey,
|
|
1315
|
-
model,
|
|
1316
|
-
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
1317
|
-
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
1318
|
-
};
|
|
1319
|
-
}
|
|
1320
|
-
function resolveCodexConfig(target, env) {
|
|
1321
|
-
const settings = target.settings ?? {};
|
|
1322
|
-
const executableSource = settings.executable ?? settings.command ?? settings.binary;
|
|
1323
|
-
const argsSource = settings.args ?? settings.arguments;
|
|
1324
|
-
const cwdSource = settings.cwd;
|
|
1325
|
-
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
1326
|
-
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
1327
|
-
allowLiteral: true,
|
|
1328
|
-
optionalEnv: true
|
|
1329
|
-
}) ?? "codex";
|
|
1330
|
-
const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
|
|
1331
|
-
const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
|
|
1332
|
-
allowLiteral: true,
|
|
1333
|
-
optionalEnv: true
|
|
1334
|
-
});
|
|
1335
|
-
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
1336
|
-
return {
|
|
1337
|
-
executable,
|
|
1338
|
-
args,
|
|
1339
|
-
cwd,
|
|
1340
|
-
timeoutMs
|
|
1341
|
-
};
|
|
1342
|
-
}
|
|
1343
|
-
function resolveMockConfig(target) {
|
|
1344
|
-
const settings = target.settings ?? {};
|
|
1345
|
-
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
1346
|
-
return { response };
|
|
1532
|
+
function buildLogFilename(request, targetName) {
|
|
1533
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1534
|
+
const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
|
|
1535
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
1536
|
+
const target = sanitizeForFilename(targetName);
|
|
1537
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto.randomUUID)().slice(0, 8)}.log`;
|
|
1347
1538
|
}
|
|
1348
|
-
function
|
|
1349
|
-
const
|
|
1350
|
-
|
|
1351
|
-
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
1352
|
-
allowLiteral: false,
|
|
1353
|
-
optionalEnv: true
|
|
1354
|
-
}) : void 0;
|
|
1355
|
-
const commandSource = settings.vscode_cmd ?? settings.command;
|
|
1356
|
-
const waitSource = settings.wait;
|
|
1357
|
-
const dryRunSource = settings.dry_run ?? settings.dryRun;
|
|
1358
|
-
const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
|
|
1359
|
-
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
1360
|
-
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
1361
|
-
return {
|
|
1362
|
-
command,
|
|
1363
|
-
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
1364
|
-
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
1365
|
-
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
1366
|
-
allowLiteral: true,
|
|
1367
|
-
optionalEnv: true
|
|
1368
|
-
}),
|
|
1369
|
-
workspaceTemplate
|
|
1370
|
-
};
|
|
1539
|
+
function sanitizeForFilename(value) {
|
|
1540
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
1541
|
+
return sanitized.length > 0 ? sanitized : "codex";
|
|
1371
1542
|
}
|
|
1372
|
-
function
|
|
1373
|
-
const
|
|
1374
|
-
const
|
|
1375
|
-
const
|
|
1376
|
-
|
|
1377
|
-
)
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
});
|
|
1382
|
-
const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
|
|
1383
|
-
const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
|
|
1384
|
-
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
1385
|
-
const commandTemplate = resolveString(
|
|
1386
|
-
commandTemplateSource,
|
|
1387
|
-
env,
|
|
1388
|
-
`${target.name} CLI command template`,
|
|
1389
|
-
true
|
|
1390
|
-
);
|
|
1391
|
-
assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
|
|
1392
|
-
return {
|
|
1393
|
-
commandTemplate,
|
|
1394
|
-
filesFormat,
|
|
1395
|
-
cwd,
|
|
1396
|
-
env: envOverrides,
|
|
1397
|
-
timeoutMs,
|
|
1398
|
-
healthcheck
|
|
1399
|
-
};
|
|
1543
|
+
function formatElapsed(startedAt) {
|
|
1544
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
1545
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
1546
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
1547
|
+
const seconds = elapsedSeconds % 60;
|
|
1548
|
+
if (hours > 0) {
|
|
1549
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1550
|
+
}
|
|
1551
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1400
1552
|
}
|
|
1401
|
-
function
|
|
1402
|
-
|
|
1553
|
+
function formatCodexLogMessage(rawLine, source) {
|
|
1554
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1555
|
+
if (parsed) {
|
|
1556
|
+
const summary = summarizeCodexEvent(parsed);
|
|
1557
|
+
if (summary) {
|
|
1558
|
+
return summary;
|
|
1559
|
+
}
|
|
1560
|
+
}
|
|
1561
|
+
if (source === "stderr") {
|
|
1562
|
+
return `stderr: ${rawLine}`;
|
|
1563
|
+
}
|
|
1564
|
+
return rawLine;
|
|
1565
|
+
}
|
|
1566
|
+
function formatCodexJsonLog(rawLine) {
|
|
1567
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1568
|
+
if (!parsed) {
|
|
1569
|
+
return rawLine;
|
|
1570
|
+
}
|
|
1571
|
+
try {
|
|
1572
|
+
return JSON.stringify(parsed, null, 2);
|
|
1573
|
+
} catch {
|
|
1574
|
+
return rawLine;
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
function summarizeCodexEvent(event) {
|
|
1578
|
+
if (!event || typeof event !== "object") {
|
|
1403
1579
|
return void 0;
|
|
1404
1580
|
}
|
|
1405
|
-
|
|
1406
|
-
|
|
1581
|
+
const record = event;
|
|
1582
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
1583
|
+
let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
|
|
1584
|
+
if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
|
|
1585
|
+
const item = record.item;
|
|
1586
|
+
if (item && typeof item === "object") {
|
|
1587
|
+
const candidate = flattenContent(
|
|
1588
|
+
item.text ?? item.content ?? item.output
|
|
1589
|
+
);
|
|
1590
|
+
if (candidate) {
|
|
1591
|
+
message = candidate;
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1407
1594
|
}
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1595
|
+
if (!message) {
|
|
1596
|
+
const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
|
|
1597
|
+
if (type && itemType) {
|
|
1598
|
+
return `${type}:${itemType}`;
|
|
1599
|
+
}
|
|
1600
|
+
if (type) {
|
|
1601
|
+
return type;
|
|
1413
1602
|
}
|
|
1414
|
-
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
1415
|
-
resolved[key] = resolvedValue;
|
|
1416
1603
|
}
|
|
1417
|
-
|
|
1604
|
+
if (type && message) {
|
|
1605
|
+
return `${type}: ${message}`;
|
|
1606
|
+
}
|
|
1607
|
+
if (message) {
|
|
1608
|
+
return message;
|
|
1609
|
+
}
|
|
1610
|
+
return type;
|
|
1418
1611
|
}
|
|
1419
|
-
function
|
|
1420
|
-
|
|
1421
|
-
|
|
1612
|
+
function tryParseJsonValue(rawLine) {
|
|
1613
|
+
try {
|
|
1614
|
+
return JSON.parse(rawLine);
|
|
1615
|
+
} catch {
|
|
1422
1616
|
return void 0;
|
|
1423
1617
|
}
|
|
1424
|
-
|
|
1425
|
-
|
|
1618
|
+
}
|
|
1619
|
+
async function locateExecutable(candidate) {
|
|
1620
|
+
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
1621
|
+
if (includesPathSeparator) {
|
|
1622
|
+
const resolved = import_node_path5.default.isAbsolute(candidate) ? candidate : import_node_path5.default.resolve(candidate);
|
|
1623
|
+
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
1624
|
+
await (0, import_promises3.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
1625
|
+
return executablePath;
|
|
1426
1626
|
}
|
|
1427
|
-
|
|
1627
|
+
const locator = process.platform === "win32" ? "where" : "which";
|
|
1628
|
+
try {
|
|
1629
|
+
const { stdout } = await execAsync2(`${locator} ${candidate}`);
|
|
1630
|
+
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1631
|
+
const preferred = selectExecutableCandidate(lines);
|
|
1632
|
+
if (preferred) {
|
|
1633
|
+
const executablePath = await ensureWindowsExecutableVariant(preferred);
|
|
1634
|
+
await (0, import_promises3.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
1635
|
+
return executablePath;
|
|
1636
|
+
}
|
|
1637
|
+
} catch {
|
|
1638
|
+
}
|
|
1639
|
+
throw new Error(`Codex executable '${candidate}' was not found on PATH`);
|
|
1428
1640
|
}
|
|
1429
|
-
function
|
|
1430
|
-
if (
|
|
1641
|
+
function selectExecutableCandidate(candidates) {
|
|
1642
|
+
if (candidates.length === 0) {
|
|
1431
1643
|
return void 0;
|
|
1432
1644
|
}
|
|
1433
|
-
if (
|
|
1434
|
-
|
|
1645
|
+
if (process.platform !== "win32") {
|
|
1646
|
+
return candidates[0];
|
|
1435
1647
|
}
|
|
1436
|
-
const
|
|
1437
|
-
const
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
if (type === "http") {
|
|
1443
|
-
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
1444
|
-
return {
|
|
1445
|
-
type: "http",
|
|
1446
|
-
url,
|
|
1447
|
-
timeoutMs
|
|
1448
|
-
};
|
|
1648
|
+
const extensions = getWindowsExecutableExtensions();
|
|
1649
|
+
for (const ext of extensions) {
|
|
1650
|
+
const match = candidates.find((candidate) => candidate.toLowerCase().endsWith(ext));
|
|
1651
|
+
if (match) {
|
|
1652
|
+
return match;
|
|
1653
|
+
}
|
|
1449
1654
|
}
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
true
|
|
1456
|
-
);
|
|
1457
|
-
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
1458
|
-
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
1459
|
-
allowLiteral: true,
|
|
1460
|
-
optionalEnv: true
|
|
1461
|
-
});
|
|
1462
|
-
return {
|
|
1463
|
-
type: "command",
|
|
1464
|
-
commandTemplate,
|
|
1465
|
-
timeoutMs,
|
|
1466
|
-
cwd
|
|
1467
|
-
};
|
|
1655
|
+
return candidates[0];
|
|
1656
|
+
}
|
|
1657
|
+
async function ensureWindowsExecutableVariant(candidate) {
|
|
1658
|
+
if (process.platform !== "win32") {
|
|
1659
|
+
return candidate;
|
|
1468
1660
|
}
|
|
1469
|
-
|
|
1661
|
+
if (hasExecutableExtension(candidate)) {
|
|
1662
|
+
return candidate;
|
|
1663
|
+
}
|
|
1664
|
+
const extensions = getWindowsExecutableExtensions();
|
|
1665
|
+
for (const ext of extensions) {
|
|
1666
|
+
const withExtension = `${candidate}${ext}`;
|
|
1667
|
+
try {
|
|
1668
|
+
await (0, import_promises3.access)(withExtension, import_node_fs3.constants.F_OK);
|
|
1669
|
+
return withExtension;
|
|
1670
|
+
} catch {
|
|
1671
|
+
}
|
|
1672
|
+
}
|
|
1673
|
+
return candidate;
|
|
1470
1674
|
}
|
|
1471
|
-
function
|
|
1472
|
-
const
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1675
|
+
function hasExecutableExtension(candidate) {
|
|
1676
|
+
const lower = candidate.toLowerCase();
|
|
1677
|
+
return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
|
|
1678
|
+
}
|
|
1679
|
+
var DEFAULT_WINDOWS_EXTENSIONS = [".com", ".exe", ".bat", ".cmd", ".ps1"];
|
|
1680
|
+
function getWindowsExecutableExtensions() {
|
|
1681
|
+
if (process.platform !== "win32") {
|
|
1682
|
+
return [];
|
|
1683
|
+
}
|
|
1684
|
+
const fromEnv = process.env.PATHEXT?.split(";").map((ext) => ext.trim().toLowerCase()).filter((ext) => ext.length > 0);
|
|
1685
|
+
return fromEnv && fromEnv.length > 0 ? fromEnv : DEFAULT_WINDOWS_EXTENSIONS;
|
|
1686
|
+
}
|
|
1687
|
+
function parseCodexJson(output) {
|
|
1688
|
+
const trimmed = output.trim();
|
|
1689
|
+
if (trimmed.length === 0) {
|
|
1690
|
+
throw new Error("Codex CLI produced no output in --json mode");
|
|
1691
|
+
}
|
|
1692
|
+
try {
|
|
1693
|
+
return JSON.parse(trimmed);
|
|
1694
|
+
} catch {
|
|
1695
|
+
const lineObjects = parseJsonLines(trimmed);
|
|
1696
|
+
if (lineObjects) {
|
|
1697
|
+
return lineObjects;
|
|
1698
|
+
}
|
|
1699
|
+
const lastBrace = trimmed.lastIndexOf("{");
|
|
1700
|
+
if (lastBrace >= 0) {
|
|
1701
|
+
const candidate = trimmed.slice(lastBrace);
|
|
1702
|
+
try {
|
|
1703
|
+
return JSON.parse(candidate);
|
|
1704
|
+
} catch {
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
const preview = trimmed.slice(0, 200);
|
|
1708
|
+
throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
function extractAssistantText(parsed) {
|
|
1712
|
+
if (Array.isArray(parsed)) {
|
|
1713
|
+
const text = extractFromEventStream(parsed);
|
|
1714
|
+
if (text) {
|
|
1715
|
+
return text;
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
if (!parsed || typeof parsed !== "object") {
|
|
1719
|
+
throw new Error("Codex CLI JSON response did not include an assistant message");
|
|
1720
|
+
}
|
|
1721
|
+
const record = parsed;
|
|
1722
|
+
const eventText = extractFromEvent(record);
|
|
1723
|
+
if (eventText) {
|
|
1724
|
+
return eventText;
|
|
1725
|
+
}
|
|
1726
|
+
const messages = Array.isArray(record.messages) ? record.messages : void 0;
|
|
1727
|
+
if (messages) {
|
|
1728
|
+
for (let index = messages.length - 1; index >= 0; index -= 1) {
|
|
1729
|
+
const entry = messages[index];
|
|
1730
|
+
if (!entry || typeof entry !== "object") {
|
|
1731
|
+
continue;
|
|
1732
|
+
}
|
|
1733
|
+
const role = entry.role;
|
|
1734
|
+
if (role !== "assistant") {
|
|
1735
|
+
continue;
|
|
1736
|
+
}
|
|
1737
|
+
const content = entry.content;
|
|
1738
|
+
const flattened = flattenContent(content);
|
|
1739
|
+
if (flattened) {
|
|
1740
|
+
return flattened;
|
|
1741
|
+
}
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
const response = record.response;
|
|
1745
|
+
if (response && typeof response === "object") {
|
|
1746
|
+
const content = response.content;
|
|
1747
|
+
const flattened = flattenContent(content);
|
|
1748
|
+
if (flattened) {
|
|
1749
|
+
return flattened;
|
|
1478
1750
|
}
|
|
1479
1751
|
}
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
for (const match of matches) {
|
|
1485
|
-
if (match[1]) {
|
|
1486
|
-
results.push(match[1]);
|
|
1487
|
-
}
|
|
1752
|
+
const output = record.output;
|
|
1753
|
+
const flattenedOutput = flattenContent(output);
|
|
1754
|
+
if (flattenedOutput) {
|
|
1755
|
+
return flattenedOutput;
|
|
1488
1756
|
}
|
|
1489
|
-
|
|
1757
|
+
throw new Error("Codex CLI JSON response did not include an assistant message");
|
|
1490
1758
|
}
|
|
1491
|
-
function
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1759
|
+
function extractFromEventStream(events) {
|
|
1760
|
+
for (let index = events.length - 1; index >= 0; index -= 1) {
|
|
1761
|
+
const candidate = events[index];
|
|
1762
|
+
const text = extractFromEvent(candidate);
|
|
1763
|
+
if (text) {
|
|
1764
|
+
return text;
|
|
1765
|
+
}
|
|
1498
1766
|
}
|
|
1499
|
-
return
|
|
1767
|
+
return void 0;
|
|
1500
1768
|
}
|
|
1501
|
-
function
|
|
1502
|
-
if (
|
|
1503
|
-
return void 0;
|
|
1504
|
-
}
|
|
1505
|
-
if (typeof source !== "string") {
|
|
1506
|
-
throw new Error(`${description} must be a string`);
|
|
1507
|
-
}
|
|
1508
|
-
const trimmed = source.trim();
|
|
1509
|
-
if (trimmed.length === 0) {
|
|
1769
|
+
function extractFromEvent(event) {
|
|
1770
|
+
if (!event || typeof event !== "object") {
|
|
1510
1771
|
return void 0;
|
|
1511
1772
|
}
|
|
1512
|
-
const
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1773
|
+
const record = event;
|
|
1774
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
1775
|
+
if (type === JSONL_TYPE_ITEM_COMPLETED) {
|
|
1776
|
+
const item = record.item;
|
|
1777
|
+
const text = extractFromItem(item);
|
|
1778
|
+
if (text) {
|
|
1779
|
+
return text;
|
|
1516
1780
|
}
|
|
1517
|
-
return envValue;
|
|
1518
1781
|
}
|
|
1519
|
-
const
|
|
1520
|
-
const
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
if (optionalEnv) {
|
|
1524
|
-
return void 0;
|
|
1525
|
-
}
|
|
1526
|
-
if (!allowLiteral) {
|
|
1527
|
-
throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
|
|
1528
|
-
}
|
|
1782
|
+
const output = record.output ?? record.content;
|
|
1783
|
+
const flattened = flattenContent(output);
|
|
1784
|
+
if (flattened) {
|
|
1785
|
+
return flattened;
|
|
1529
1786
|
}
|
|
1530
|
-
return
|
|
1787
|
+
return void 0;
|
|
1531
1788
|
}
|
|
1532
|
-
function
|
|
1533
|
-
if (
|
|
1789
|
+
function extractFromItem(item) {
|
|
1790
|
+
if (!item || typeof item !== "object") {
|
|
1534
1791
|
return void 0;
|
|
1535
1792
|
}
|
|
1536
|
-
|
|
1537
|
-
|
|
1793
|
+
const record = item;
|
|
1794
|
+
const itemType = typeof record.type === "string" ? record.type : void 0;
|
|
1795
|
+
if (itemType === "agent_message" || itemType === "response" || itemType === "output") {
|
|
1796
|
+
const text = flattenContent(record.text ?? record.content ?? record.output);
|
|
1797
|
+
if (text) {
|
|
1798
|
+
return text;
|
|
1799
|
+
}
|
|
1538
1800
|
}
|
|
1539
|
-
|
|
1540
|
-
return trimmed.length > 0 ? trimmed : void 0;
|
|
1801
|
+
return void 0;
|
|
1541
1802
|
}
|
|
1542
|
-
function
|
|
1543
|
-
if (
|
|
1544
|
-
return
|
|
1803
|
+
function flattenContent(value) {
|
|
1804
|
+
if (typeof value === "string") {
|
|
1805
|
+
return value;
|
|
1545
1806
|
}
|
|
1546
|
-
if (
|
|
1547
|
-
|
|
1807
|
+
if (Array.isArray(value)) {
|
|
1808
|
+
const parts = value.map((segment) => {
|
|
1809
|
+
if (typeof segment === "string") {
|
|
1810
|
+
return segment;
|
|
1811
|
+
}
|
|
1812
|
+
if (segment && typeof segment === "object" && "text" in segment) {
|
|
1813
|
+
const text = segment.text;
|
|
1814
|
+
return typeof text === "string" ? text : void 0;
|
|
1815
|
+
}
|
|
1816
|
+
return void 0;
|
|
1817
|
+
}).filter((part) => typeof part === "string" && part.length > 0);
|
|
1818
|
+
return parts.length > 0 ? parts.join(" \n") : void 0;
|
|
1548
1819
|
}
|
|
1549
|
-
if (typeof
|
|
1550
|
-
const
|
|
1551
|
-
|
|
1552
|
-
return numeric;
|
|
1553
|
-
}
|
|
1820
|
+
if (value && typeof value === "object" && "text" in value) {
|
|
1821
|
+
const text = value.text;
|
|
1822
|
+
return typeof text === "string" ? text : void 0;
|
|
1554
1823
|
}
|
|
1555
|
-
|
|
1824
|
+
return void 0;
|
|
1556
1825
|
}
|
|
1557
|
-
function
|
|
1558
|
-
|
|
1826
|
+
function parseJsonLines(output) {
|
|
1827
|
+
const lines = output.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
1828
|
+
if (lines.length <= 1) {
|
|
1559
1829
|
return void 0;
|
|
1560
1830
|
}
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
return true;
|
|
1568
|
-
}
|
|
1569
|
-
if (lowered === "false" || lowered === "0") {
|
|
1570
|
-
return false;
|
|
1831
|
+
const parsed = [];
|
|
1832
|
+
for (const line of lines) {
|
|
1833
|
+
try {
|
|
1834
|
+
parsed.push(JSON.parse(line));
|
|
1835
|
+
} catch {
|
|
1836
|
+
return void 0;
|
|
1571
1837
|
}
|
|
1572
1838
|
}
|
|
1573
|
-
|
|
1574
|
-
}
|
|
1575
|
-
function isLikelyEnvReference(value) {
|
|
1576
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
1839
|
+
return parsed;
|
|
1577
1840
|
}
|
|
1578
|
-
function
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
if (!Array.isArray(source)) {
|
|
1583
|
-
throw new Error(`${description} must be an array of strings`);
|
|
1841
|
+
function pickDetail(stderr, stdout) {
|
|
1842
|
+
const errorText = stderr.trim();
|
|
1843
|
+
if (errorText.length > 0) {
|
|
1844
|
+
return errorText;
|
|
1584
1845
|
}
|
|
1585
|
-
|
|
1586
|
-
|
|
1846
|
+
const stdoutText = stdout.trim();
|
|
1847
|
+
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
1848
|
+
}
|
|
1849
|
+
function formatTimeoutSuffix2(timeoutMs) {
|
|
1850
|
+
if (!timeoutMs || timeoutMs <= 0) {
|
|
1851
|
+
return "";
|
|
1587
1852
|
}
|
|
1588
|
-
const
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1853
|
+
const seconds = Math.ceil(timeoutMs / 1e3);
|
|
1854
|
+
return ` after ${seconds}s`;
|
|
1855
|
+
}
|
|
1856
|
+
async function defaultCodexRunner(options) {
|
|
1857
|
+
return await new Promise((resolve, reject) => {
|
|
1858
|
+
const child = (0, import_node_child_process2.spawn)(options.executable, options.args, {
|
|
1859
|
+
cwd: options.cwd,
|
|
1860
|
+
env: options.env,
|
|
1861
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
1862
|
+
shell: shouldShellExecute(options.executable)
|
|
1863
|
+
});
|
|
1864
|
+
let stdout = "";
|
|
1865
|
+
let stderr = "";
|
|
1866
|
+
let timedOut = false;
|
|
1867
|
+
const onAbort = () => {
|
|
1868
|
+
child.kill("SIGTERM");
|
|
1869
|
+
};
|
|
1870
|
+
if (options.signal) {
|
|
1871
|
+
if (options.signal.aborted) {
|
|
1872
|
+
onAbort();
|
|
1873
|
+
} else {
|
|
1874
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
1875
|
+
}
|
|
1593
1876
|
}
|
|
1594
|
-
|
|
1595
|
-
if (
|
|
1596
|
-
|
|
1877
|
+
let timeoutHandle;
|
|
1878
|
+
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
1879
|
+
timeoutHandle = setTimeout(() => {
|
|
1880
|
+
timedOut = true;
|
|
1881
|
+
child.kill("SIGTERM");
|
|
1882
|
+
}, options.timeoutMs);
|
|
1883
|
+
timeoutHandle.unref?.();
|
|
1597
1884
|
}
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1885
|
+
child.stdout.setEncoding("utf8");
|
|
1886
|
+
child.stdout.on("data", (chunk) => {
|
|
1887
|
+
stdout += chunk;
|
|
1888
|
+
options.onStdoutChunk?.(chunk);
|
|
1889
|
+
});
|
|
1890
|
+
child.stderr.setEncoding("utf8");
|
|
1891
|
+
child.stderr.on("data", (chunk) => {
|
|
1892
|
+
stderr += chunk;
|
|
1893
|
+
options.onStderrChunk?.(chunk);
|
|
1894
|
+
});
|
|
1895
|
+
child.stdin.end(options.prompt);
|
|
1896
|
+
const cleanup = () => {
|
|
1897
|
+
if (timeoutHandle) {
|
|
1898
|
+
clearTimeout(timeoutHandle);
|
|
1602
1899
|
}
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
}
|
|
1900
|
+
if (options.signal) {
|
|
1901
|
+
options.signal.removeEventListener("abort", onAbort);
|
|
1902
|
+
}
|
|
1903
|
+
};
|
|
1904
|
+
child.on("error", (error) => {
|
|
1905
|
+
cleanup();
|
|
1906
|
+
reject(error);
|
|
1907
|
+
});
|
|
1908
|
+
child.on("close", (code) => {
|
|
1909
|
+
cleanup();
|
|
1910
|
+
resolve({
|
|
1911
|
+
stdout,
|
|
1912
|
+
stderr,
|
|
1913
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
1914
|
+
timedOut
|
|
1915
|
+
});
|
|
1916
|
+
});
|
|
1917
|
+
});
|
|
1918
|
+
}
|
|
1919
|
+
function shouldShellExecute(executable) {
|
|
1920
|
+
if (process.platform !== "win32") {
|
|
1921
|
+
return false;
|
|
1607
1922
|
}
|
|
1608
|
-
|
|
1923
|
+
const lower = executable.toLowerCase();
|
|
1924
|
+
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
1609
1925
|
}
|
|
1610
1926
|
|
|
1611
|
-
// src/evaluation/providers/
|
|
1612
|
-
var
|
|
1613
|
-
var
|
|
1614
|
-
var import_subagent = require("subagent");
|
|
1615
|
-
var VSCodeProvider = class {
|
|
1927
|
+
// src/evaluation/providers/mock.ts
|
|
1928
|
+
var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
|
|
1929
|
+
var MockProvider = class {
|
|
1616
1930
|
id;
|
|
1617
|
-
kind;
|
|
1931
|
+
kind = "mock";
|
|
1618
1932
|
targetName;
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1933
|
+
cannedResponse;
|
|
1934
|
+
delayMs;
|
|
1935
|
+
delayMinMs;
|
|
1936
|
+
delayMaxMs;
|
|
1937
|
+
constructor(targetName, config) {
|
|
1938
|
+
this.id = `mock:${targetName}`;
|
|
1624
1939
|
this.targetName = targetName;
|
|
1625
|
-
this.
|
|
1940
|
+
this.cannedResponse = config.response ?? DEFAULT_MOCK_RESPONSE;
|
|
1941
|
+
this.delayMs = config.delayMs ?? 0;
|
|
1942
|
+
this.delayMinMs = config.delayMinMs ?? 0;
|
|
1943
|
+
this.delayMaxMs = config.delayMaxMs ?? 0;
|
|
1626
1944
|
}
|
|
1627
1945
|
async invoke(request) {
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
1632
|
-
const promptContent = buildPromptDocument(request, inputFiles, request.guideline_patterns);
|
|
1633
|
-
const session = await (0, import_subagent.dispatchAgentSession)({
|
|
1634
|
-
userQuery: promptContent,
|
|
1635
|
-
extraAttachments: inputFiles,
|
|
1636
|
-
wait: this.config.waitForResponse,
|
|
1637
|
-
dryRun: this.config.dryRun,
|
|
1638
|
-
vscodeCmd: this.config.command,
|
|
1639
|
-
subagentRoot: this.config.subagentRoot,
|
|
1640
|
-
workspaceTemplate: this.config.workspaceTemplate,
|
|
1641
|
-
silent: true
|
|
1642
|
-
});
|
|
1643
|
-
if (session.exitCode !== 0 || !session.responseFile) {
|
|
1644
|
-
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
1645
|
-
throw new Error(failure);
|
|
1646
|
-
}
|
|
1647
|
-
if (this.config.dryRun) {
|
|
1648
|
-
return {
|
|
1649
|
-
text: "",
|
|
1650
|
-
raw: {
|
|
1651
|
-
session,
|
|
1652
|
-
inputFiles
|
|
1653
|
-
}
|
|
1654
|
-
};
|
|
1946
|
+
const delay = this.calculateDelay();
|
|
1947
|
+
if (delay > 0) {
|
|
1948
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1655
1949
|
}
|
|
1656
|
-
const responseText = await (0, import_promises3.readFile)(session.responseFile, "utf8");
|
|
1657
1950
|
return {
|
|
1658
|
-
text:
|
|
1951
|
+
text: this.cannedResponse,
|
|
1659
1952
|
raw: {
|
|
1660
|
-
|
|
1661
|
-
|
|
1953
|
+
prompt: request.prompt,
|
|
1954
|
+
guidelines: request.guidelines
|
|
1662
1955
|
}
|
|
1663
1956
|
};
|
|
1664
1957
|
}
|
|
1665
|
-
|
|
1666
|
-
if (
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
request: req,
|
|
1671
|
-
inputFiles: normalizeAttachments(req.inputFiles)
|
|
1672
|
-
}));
|
|
1673
|
-
const combinedInputFiles = mergeAttachments(
|
|
1674
|
-
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
1675
|
-
);
|
|
1676
|
-
const userQueries = normalizedRequests.map(
|
|
1677
|
-
({ request, inputFiles }) => buildPromptDocument(request, inputFiles, request.guideline_patterns)
|
|
1678
|
-
);
|
|
1679
|
-
const session = await (0, import_subagent.dispatchBatchAgent)({
|
|
1680
|
-
userQueries,
|
|
1681
|
-
extraAttachments: combinedInputFiles,
|
|
1682
|
-
wait: this.config.waitForResponse,
|
|
1683
|
-
dryRun: this.config.dryRun,
|
|
1684
|
-
vscodeCmd: this.config.command,
|
|
1685
|
-
subagentRoot: this.config.subagentRoot,
|
|
1686
|
-
workspaceTemplate: this.config.workspaceTemplate,
|
|
1687
|
-
silent: true
|
|
1688
|
-
});
|
|
1689
|
-
if (session.exitCode !== 0 || !session.responseFiles) {
|
|
1690
|
-
const failure = session.error ?? "VS Code subagent did not produce batch responses";
|
|
1691
|
-
throw new Error(failure);
|
|
1692
|
-
}
|
|
1693
|
-
if (this.config.dryRun) {
|
|
1694
|
-
return normalizedRequests.map(({ inputFiles }) => ({
|
|
1695
|
-
text: "",
|
|
1696
|
-
raw: {
|
|
1697
|
-
session,
|
|
1698
|
-
inputFiles,
|
|
1699
|
-
allInputFiles: combinedInputFiles
|
|
1700
|
-
}
|
|
1701
|
-
}));
|
|
1702
|
-
}
|
|
1703
|
-
if (session.responseFiles.length !== requests.length) {
|
|
1704
|
-
throw new Error(
|
|
1705
|
-
`VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
|
|
1706
|
-
);
|
|
1707
|
-
}
|
|
1708
|
-
const responses = [];
|
|
1709
|
-
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
1710
|
-
const responseText = await (0, import_promises3.readFile)(responseFile, "utf8");
|
|
1711
|
-
responses.push({
|
|
1712
|
-
text: responseText,
|
|
1713
|
-
raw: {
|
|
1714
|
-
session,
|
|
1715
|
-
inputFiles: normalizedRequests[index]?.inputFiles,
|
|
1716
|
-
allInputFiles: combinedInputFiles,
|
|
1717
|
-
responseFile
|
|
1718
|
-
}
|
|
1719
|
-
});
|
|
1958
|
+
calculateDelay() {
|
|
1959
|
+
if (this.delayMinMs > 0 || this.delayMaxMs > 0) {
|
|
1960
|
+
const min = Math.max(0, this.delayMinMs);
|
|
1961
|
+
const max = Math.max(min, this.delayMaxMs);
|
|
1962
|
+
return Math.floor(Math.random() * (max - min + 1)) + min;
|
|
1720
1963
|
}
|
|
1721
|
-
return
|
|
1964
|
+
return this.delayMs;
|
|
1722
1965
|
}
|
|
1723
1966
|
};
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
)
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
|
|
1740
|
-
return "";
|
|
1967
|
+
|
|
1968
|
+
// src/evaluation/providers/targets.ts
|
|
1969
|
+
var import_zod = require("zod");
|
|
1970
|
+
var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set(["PROMPT", "GUIDELINES", "EVAL_ID", "ATTEMPT", "FILES"]);
|
|
1971
|
+
var BASE_TARGET_SCHEMA = import_zod.z.object({
|
|
1972
|
+
name: import_zod.z.string().min(1, "target name is required"),
|
|
1973
|
+
provider: import_zod.z.string().min(1, "provider is required"),
|
|
1974
|
+
settings: import_zod.z.record(import_zod.z.unknown()).optional(),
|
|
1975
|
+
judge_target: import_zod.z.string().optional(),
|
|
1976
|
+
workers: import_zod.z.number().int().min(1).optional()
|
|
1977
|
+
});
|
|
1978
|
+
var DEFAULT_AZURE_API_VERSION = "2024-10-01-preview";
|
|
1979
|
+
function normalizeAzureApiVersion(value) {
|
|
1980
|
+
if (!value) {
|
|
1981
|
+
return DEFAULT_AZURE_API_VERSION;
|
|
1741
1982
|
}
|
|
1742
|
-
const
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
return `* [${fileName}](${fileUri})`;
|
|
1746
|
-
});
|
|
1747
|
-
const sections = [];
|
|
1748
|
-
if (guidelineFiles.length > 0) {
|
|
1749
|
-
sections.push(`Read all guideline files:
|
|
1750
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
1983
|
+
const trimmed = value.trim();
|
|
1984
|
+
if (trimmed.length === 0) {
|
|
1985
|
+
return DEFAULT_AZURE_API_VERSION;
|
|
1751
1986
|
}
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1987
|
+
const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
|
|
1988
|
+
return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
|
|
1989
|
+
}
|
|
1990
|
+
function resolveTargetDefinition(definition, env = process.env) {
|
|
1991
|
+
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
1992
|
+
const provider = parsed.provider.toLowerCase();
|
|
1993
|
+
const providerBatching = resolveOptionalBoolean(
|
|
1994
|
+
parsed.settings?.provider_batching ?? parsed.settings?.providerBatching
|
|
1995
|
+
);
|
|
1996
|
+
switch (provider) {
|
|
1997
|
+
case "azure":
|
|
1998
|
+
case "azure-openai":
|
|
1999
|
+
return {
|
|
2000
|
+
kind: "azure",
|
|
2001
|
+
name: parsed.name,
|
|
2002
|
+
judgeTarget: parsed.judge_target,
|
|
2003
|
+
workers: parsed.workers,
|
|
2004
|
+
providerBatching,
|
|
2005
|
+
config: resolveAzureConfig(parsed, env)
|
|
2006
|
+
};
|
|
2007
|
+
case "anthropic":
|
|
2008
|
+
return {
|
|
2009
|
+
kind: "anthropic",
|
|
2010
|
+
name: parsed.name,
|
|
2011
|
+
judgeTarget: parsed.judge_target,
|
|
2012
|
+
workers: parsed.workers,
|
|
2013
|
+
providerBatching,
|
|
2014
|
+
config: resolveAnthropicConfig(parsed, env)
|
|
2015
|
+
};
|
|
2016
|
+
case "gemini":
|
|
2017
|
+
case "google":
|
|
2018
|
+
case "google-gemini":
|
|
2019
|
+
return {
|
|
2020
|
+
kind: "gemini",
|
|
2021
|
+
name: parsed.name,
|
|
2022
|
+
judgeTarget: parsed.judge_target,
|
|
2023
|
+
workers: parsed.workers,
|
|
2024
|
+
providerBatching,
|
|
2025
|
+
config: resolveGeminiConfig(parsed, env)
|
|
2026
|
+
};
|
|
2027
|
+
case "codex":
|
|
2028
|
+
case "codex-cli":
|
|
2029
|
+
return {
|
|
2030
|
+
kind: "codex",
|
|
2031
|
+
name: parsed.name,
|
|
2032
|
+
judgeTarget: parsed.judge_target,
|
|
2033
|
+
workers: parsed.workers,
|
|
2034
|
+
providerBatching,
|
|
2035
|
+
config: resolveCodexConfig(parsed, env)
|
|
2036
|
+
};
|
|
2037
|
+
case "mock":
|
|
2038
|
+
return {
|
|
2039
|
+
kind: "mock",
|
|
2040
|
+
name: parsed.name,
|
|
2041
|
+
judgeTarget: parsed.judge_target,
|
|
2042
|
+
workers: parsed.workers,
|
|
2043
|
+
providerBatching,
|
|
2044
|
+
config: resolveMockConfig(parsed)
|
|
2045
|
+
};
|
|
2046
|
+
case "vscode":
|
|
2047
|
+
case "vscode-insiders":
|
|
2048
|
+
return {
|
|
2049
|
+
kind: provider,
|
|
2050
|
+
name: parsed.name,
|
|
2051
|
+
judgeTarget: parsed.judge_target,
|
|
2052
|
+
workers: parsed.workers,
|
|
2053
|
+
providerBatching,
|
|
2054
|
+
config: resolveVSCodeConfig(parsed, env, provider === "vscode-insiders")
|
|
2055
|
+
};
|
|
2056
|
+
case "cli":
|
|
2057
|
+
return {
|
|
2058
|
+
kind: "cli",
|
|
2059
|
+
name: parsed.name,
|
|
2060
|
+
judgeTarget: parsed.judge_target,
|
|
2061
|
+
workers: parsed.workers,
|
|
2062
|
+
providerBatching,
|
|
2063
|
+
config: resolveCliConfig(parsed, env)
|
|
2064
|
+
};
|
|
2065
|
+
default:
|
|
2066
|
+
throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
|
|
1755
2067
|
}
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
2068
|
+
}
|
|
2069
|
+
function resolveAzureConfig(target, env) {
|
|
2070
|
+
const settings = target.settings ?? {};
|
|
2071
|
+
const endpointSource = settings.endpoint ?? settings.resource ?? settings.resourceName;
|
|
2072
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
2073
|
+
const deploymentSource = settings.deployment ?? settings.deploymentName ?? settings.model;
|
|
2074
|
+
const versionSource = settings.version ?? settings.api_version;
|
|
2075
|
+
const temperatureSource = settings.temperature;
|
|
2076
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2077
|
+
const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
|
|
2078
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
|
|
2079
|
+
const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
|
|
2080
|
+
const version = normalizeAzureApiVersion(
|
|
2081
|
+
resolveOptionalString(versionSource, env, `${target.name} api version`)
|
|
2082
|
+
);
|
|
2083
|
+
const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
|
|
2084
|
+
const maxOutputTokens = resolveOptionalNumber(
|
|
2085
|
+
maxTokensSource,
|
|
2086
|
+
`${target.name} max output tokens`
|
|
1759
2087
|
);
|
|
1760
|
-
return
|
|
2088
|
+
return {
|
|
2089
|
+
resourceName,
|
|
2090
|
+
deploymentName,
|
|
2091
|
+
apiKey,
|
|
2092
|
+
version,
|
|
2093
|
+
temperature,
|
|
2094
|
+
maxOutputTokens
|
|
2095
|
+
};
|
|
1761
2096
|
}
|
|
1762
|
-
function
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
const
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
}
|
|
1775
|
-
|
|
1776
|
-
|
|
2097
|
+
function resolveAnthropicConfig(target, env) {
|
|
2098
|
+
const settings = target.settings ?? {};
|
|
2099
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
2100
|
+
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
2101
|
+
const temperatureSource = settings.temperature;
|
|
2102
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2103
|
+
const thinkingBudgetSource = settings.thinking_budget ?? settings.thinkingBudget;
|
|
2104
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
|
|
2105
|
+
const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
|
|
2106
|
+
return {
|
|
2107
|
+
apiKey,
|
|
2108
|
+
model,
|
|
2109
|
+
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2110
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
|
|
2111
|
+
thinkingBudget: resolveOptionalNumber(thinkingBudgetSource, `${target.name} thinking budget`)
|
|
2112
|
+
};
|
|
1777
2113
|
}
|
|
1778
|
-
function
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
const
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
}
|
|
1789
|
-
return
|
|
2114
|
+
function resolveGeminiConfig(target, env) {
|
|
2115
|
+
const settings = target.settings ?? {};
|
|
2116
|
+
const apiKeySource = settings.api_key ?? settings.apiKey;
|
|
2117
|
+
const modelSource = settings.model ?? settings.deployment ?? settings.variant;
|
|
2118
|
+
const temperatureSource = settings.temperature;
|
|
2119
|
+
const maxTokensSource = settings.max_output_tokens ?? settings.maxTokens;
|
|
2120
|
+
const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
|
|
2121
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
|
|
2122
|
+
allowLiteral: true,
|
|
2123
|
+
optionalEnv: true
|
|
2124
|
+
}) ?? "gemini-2.5-flash";
|
|
2125
|
+
return {
|
|
2126
|
+
apiKey,
|
|
2127
|
+
model,
|
|
2128
|
+
temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
|
|
2129
|
+
maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`)
|
|
2130
|
+
};
|
|
1790
2131
|
}
|
|
1791
|
-
function
|
|
1792
|
-
const
|
|
1793
|
-
const
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
2132
|
+
function resolveCodexConfig(target, env) {
|
|
2133
|
+
const settings = target.settings ?? {};
|
|
2134
|
+
const executableSource = settings.executable ?? settings.command ?? settings.binary;
|
|
2135
|
+
const argsSource = settings.args ?? settings.arguments;
|
|
2136
|
+
const cwdSource = settings.cwd;
|
|
2137
|
+
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
2138
|
+
const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
|
|
2139
|
+
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
2140
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
2141
|
+
allowLiteral: true,
|
|
2142
|
+
optionalEnv: true
|
|
2143
|
+
}) ?? "codex";
|
|
2144
|
+
const args = resolveOptionalStringArray(argsSource, env, `${target.name} codex args`);
|
|
2145
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} codex cwd`, {
|
|
2146
|
+
allowLiteral: true,
|
|
2147
|
+
optionalEnv: true
|
|
2148
|
+
});
|
|
2149
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
2150
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
2151
|
+
allowLiteral: true,
|
|
2152
|
+
optionalEnv: true
|
|
2153
|
+
});
|
|
2154
|
+
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
2155
|
+
return {
|
|
2156
|
+
executable,
|
|
2157
|
+
args,
|
|
2158
|
+
cwd,
|
|
2159
|
+
timeoutMs,
|
|
2160
|
+
logDir,
|
|
2161
|
+
logFormat
|
|
2162
|
+
};
|
|
1798
2163
|
}
|
|
1799
|
-
function
|
|
1800
|
-
if (
|
|
2164
|
+
function normalizeCodexLogFormat(value) {
|
|
2165
|
+
if (value === void 0 || value === null) {
|
|
1801
2166
|
return void 0;
|
|
1802
2167
|
}
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
deduped.add(import_node_path4.default.resolve(attachment));
|
|
2168
|
+
if (typeof value !== "string") {
|
|
2169
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1806
2170
|
}
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
const deduped = /* @__PURE__ */ new Set();
|
|
1811
|
-
for (const list of all) {
|
|
1812
|
-
if (!list) continue;
|
|
1813
|
-
for (const inputFile of list) {
|
|
1814
|
-
deduped.add(import_node_path4.default.resolve(inputFile));
|
|
1815
|
-
}
|
|
2171
|
+
const normalized = value.trim().toLowerCase();
|
|
2172
|
+
if (normalized === "json" || normalized === "summary") {
|
|
2173
|
+
return normalized;
|
|
1816
2174
|
}
|
|
1817
|
-
|
|
2175
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
1818
2176
|
}
|
|
1819
|
-
|
|
1820
|
-
const
|
|
1821
|
-
const
|
|
1822
|
-
|
|
1823
|
-
try {
|
|
1824
|
-
if (verbose) {
|
|
1825
|
-
console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
|
|
1826
|
-
}
|
|
1827
|
-
const result = await (0, import_subagent.provisionSubagents)({
|
|
1828
|
-
targetRoot: subagentRoot,
|
|
1829
|
-
subagents: count,
|
|
1830
|
-
dryRun: false
|
|
1831
|
-
});
|
|
1832
|
-
if (verbose) {
|
|
1833
|
-
if (result.created.length > 0) {
|
|
1834
|
-
console.log(`Created ${result.created.length} new subagent(s)`);
|
|
1835
|
-
}
|
|
1836
|
-
if (result.skippedExisting.length > 0) {
|
|
1837
|
-
console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
|
|
1838
|
-
}
|
|
1839
|
-
console.log(`
|
|
1840
|
-
total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
|
|
1841
|
-
}
|
|
1842
|
-
return {
|
|
1843
|
-
provisioned: true,
|
|
1844
|
-
message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
|
|
1845
|
-
};
|
|
1846
|
-
} catch (error) {
|
|
1847
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1848
|
-
if (verbose) {
|
|
1849
|
-
console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
|
|
1850
|
-
}
|
|
1851
|
-
return {
|
|
1852
|
-
provisioned: false,
|
|
1853
|
-
message: `Provisioning failed: ${errorMessage}`
|
|
1854
|
-
};
|
|
1855
|
-
}
|
|
2177
|
+
function resolveMockConfig(target) {
|
|
2178
|
+
const settings = target.settings ?? {};
|
|
2179
|
+
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
2180
|
+
return { response };
|
|
1856
2181
|
}
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
const
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
2182
|
+
function resolveVSCodeConfig(target, env, insiders) {
|
|
2183
|
+
const settings = target.settings ?? {};
|
|
2184
|
+
const workspaceTemplateEnvVar = resolveOptionalLiteralString(settings.workspace_template ?? settings.workspaceTemplate);
|
|
2185
|
+
const workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(workspaceTemplateEnvVar, env, `${target.name} workspace template path`, {
|
|
2186
|
+
allowLiteral: false,
|
|
2187
|
+
optionalEnv: true
|
|
2188
|
+
}) : void 0;
|
|
2189
|
+
const commandSource = settings.vscode_cmd ?? settings.command;
|
|
2190
|
+
const waitSource = settings.wait;
|
|
2191
|
+
const dryRunSource = settings.dry_run ?? settings.dryRun;
|
|
2192
|
+
const subagentRootSource = settings.subagent_root ?? settings.subagentRoot;
|
|
2193
|
+
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
2194
|
+
const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
2195
|
+
return {
|
|
2196
|
+
command,
|
|
2197
|
+
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
2198
|
+
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
2199
|
+
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
2200
|
+
allowLiteral: true,
|
|
2201
|
+
optionalEnv: true
|
|
2202
|
+
}),
|
|
2203
|
+
workspaceTemplate
|
|
2204
|
+
};
|
|
2205
|
+
}
|
|
2206
|
+
function resolveCliConfig(target, env) {
|
|
2207
|
+
const settings = target.settings ?? {};
|
|
2208
|
+
const commandTemplateSource = settings.command_template ?? settings.commandTemplate;
|
|
2209
|
+
const filesFormat = resolveOptionalLiteralString(
|
|
2210
|
+
settings.files_format ?? settings.filesFormat ?? settings.attachments_format ?? settings.attachmentsFormat
|
|
1874
2211
|
);
|
|
1875
|
-
const
|
|
1876
|
-
|
|
1877
|
-
|
|
2212
|
+
const cwd = resolveOptionalString(settings.cwd, env, `${target.name} working directory`, {
|
|
2213
|
+
allowLiteral: true,
|
|
2214
|
+
optionalEnv: true
|
|
2215
|
+
});
|
|
2216
|
+
const envOverrides = resolveEnvOverrides(settings.env, env, target.name);
|
|
2217
|
+
const timeoutMs = resolveTimeoutMs(settings.timeout_seconds ?? settings.timeoutSeconds, `${target.name} timeout`);
|
|
2218
|
+
const healthcheck = resolveCliHealthcheck(settings.healthcheck, env, target.name);
|
|
2219
|
+
const commandTemplate = resolveString(
|
|
2220
|
+
commandTemplateSource,
|
|
2221
|
+
env,
|
|
2222
|
+
`${target.name} CLI command template`,
|
|
2223
|
+
true
|
|
1878
2224
|
);
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
2225
|
+
assertSupportedCliPlaceholders(commandTemplate, `${target.name} CLI command template`);
|
|
2226
|
+
return {
|
|
2227
|
+
commandTemplate,
|
|
2228
|
+
filesFormat,
|
|
2229
|
+
cwd,
|
|
2230
|
+
env: envOverrides,
|
|
2231
|
+
timeoutMs,
|
|
2232
|
+
healthcheck
|
|
2233
|
+
};
|
|
1885
2234
|
}
|
|
1886
|
-
function
|
|
1887
|
-
if (
|
|
2235
|
+
function resolveEnvOverrides(source, env, targetName) {
|
|
2236
|
+
if (source === void 0 || source === null) {
|
|
1888
2237
|
return void 0;
|
|
1889
2238
|
}
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
const absolutePath = import_node_path5.default.resolve(inputFile);
|
|
1893
|
-
if (!deduped.has(absolutePath)) {
|
|
1894
|
-
deduped.set(absolutePath, absolutePath);
|
|
1895
|
-
}
|
|
1896
|
-
}
|
|
1897
|
-
return Array.from(deduped.values());
|
|
1898
|
-
}
|
|
1899
|
-
function collectGuidelineFiles2(inputFiles, guidelinePatterns, overrides) {
|
|
1900
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
1901
|
-
return [];
|
|
2239
|
+
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2240
|
+
throw new Error(`${targetName} env overrides must be an object map of strings`);
|
|
1902
2241
|
}
|
|
1903
|
-
const
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
if (
|
|
1907
|
-
|
|
1908
|
-
unique.set(absolutePath, absolutePath);
|
|
1909
|
-
}
|
|
1910
|
-
continue;
|
|
1911
|
-
}
|
|
1912
|
-
const normalized = absolutePath.split(import_node_path5.default.sep).join("/");
|
|
1913
|
-
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
1914
|
-
if (!unique.has(absolutePath)) {
|
|
1915
|
-
unique.set(absolutePath, absolutePath);
|
|
1916
|
-
}
|
|
2242
|
+
const entries = Object.entries(source);
|
|
2243
|
+
const resolved = {};
|
|
2244
|
+
for (const [key, value] of entries) {
|
|
2245
|
+
if (typeof value !== "string") {
|
|
2246
|
+
throw new Error(`${targetName} env override '${key}' must be a string`);
|
|
1917
2247
|
}
|
|
2248
|
+
const resolvedValue = resolveString(value, env, `${targetName} env override '${key}'`);
|
|
2249
|
+
resolved[key] = resolvedValue;
|
|
1918
2250
|
}
|
|
1919
|
-
return
|
|
2251
|
+
return Object.keys(resolved).length > 0 ? resolved : void 0;
|
|
1920
2252
|
}
|
|
1921
|
-
function
|
|
1922
|
-
|
|
1923
|
-
|
|
2253
|
+
function resolveTimeoutMs(source, description) {
|
|
2254
|
+
const seconds = resolveOptionalNumber(source, `${description} (seconds)`);
|
|
2255
|
+
if (seconds === void 0) {
|
|
2256
|
+
return void 0;
|
|
1924
2257
|
}
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
const absolutePath = import_node_path5.default.resolve(inputFile);
|
|
1928
|
-
if (!unique.has(absolutePath)) {
|
|
1929
|
-
unique.set(absolutePath, absolutePath);
|
|
1930
|
-
}
|
|
2258
|
+
if (seconds <= 0) {
|
|
2259
|
+
throw new Error(`${description} must be greater than zero seconds`);
|
|
1931
2260
|
}
|
|
1932
|
-
return
|
|
2261
|
+
return Math.floor(seconds * 1e3);
|
|
1933
2262
|
}
|
|
1934
|
-
function
|
|
1935
|
-
if (
|
|
1936
|
-
return
|
|
1937
|
-
}
|
|
1938
|
-
const buildList = (files) => files.map((absolutePath) => {
|
|
1939
|
-
const fileName = import_node_path5.default.basename(absolutePath);
|
|
1940
|
-
const fileUri = pathToFileUri2(absolutePath);
|
|
1941
|
-
return `* [${fileName}](${fileUri})`;
|
|
1942
|
-
});
|
|
1943
|
-
const sections = [];
|
|
1944
|
-
if (guidelineFiles.length > 0) {
|
|
1945
|
-
sections.push(`Read all guideline files:
|
|
1946
|
-
${buildList(guidelineFiles).join("\n")}.`);
|
|
2263
|
+
function resolveCliHealthcheck(source, env, targetName) {
|
|
2264
|
+
if (source === void 0 || source === null) {
|
|
2265
|
+
return void 0;
|
|
1947
2266
|
}
|
|
1948
|
-
if (
|
|
1949
|
-
|
|
1950
|
-
${buildList(inputFiles).join("\n")}.`);
|
|
2267
|
+
if (typeof source !== "object" || Array.isArray(source)) {
|
|
2268
|
+
throw new Error(`${targetName} healthcheck must be an object`);
|
|
1951
2269
|
}
|
|
1952
|
-
|
|
1953
|
-
|
|
1954
|
-
|
|
2270
|
+
const candidate = source;
|
|
2271
|
+
const type = candidate.type;
|
|
2272
|
+
const timeoutMs = resolveTimeoutMs(
|
|
2273
|
+
candidate.timeout_seconds ?? candidate.timeoutSeconds,
|
|
2274
|
+
`${targetName} healthcheck timeout`
|
|
1955
2275
|
);
|
|
1956
|
-
|
|
1957
|
-
}
|
|
1958
|
-
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
}
|
|
1964
|
-
return `file://${normalizedPath}`;
|
|
1965
|
-
}
|
|
1966
|
-
|
|
1967
|
-
// src/evaluation/providers/codex.ts
|
|
1968
|
-
var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
|
|
1969
|
-
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1970
|
-
var PROMPT_FILENAME = "prompt.md";
|
|
1971
|
-
var FILES_DIR = "files";
|
|
1972
|
-
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1973
|
-
var CodexProvider = class {
|
|
1974
|
-
id;
|
|
1975
|
-
kind = "codex";
|
|
1976
|
-
targetName;
|
|
1977
|
-
supportsBatch = false;
|
|
1978
|
-
config;
|
|
1979
|
-
runCodex;
|
|
1980
|
-
environmentCheck;
|
|
1981
|
-
resolvedExecutable;
|
|
1982
|
-
constructor(targetName, config, runner = defaultCodexRunner) {
|
|
1983
|
-
this.id = `codex:${targetName}`;
|
|
1984
|
-
this.targetName = targetName;
|
|
1985
|
-
this.config = config;
|
|
1986
|
-
this.runCodex = runner;
|
|
2276
|
+
if (type === "http") {
|
|
2277
|
+
const url = resolveString(candidate.url, env, `${targetName} healthcheck URL`);
|
|
2278
|
+
return {
|
|
2279
|
+
type: "http",
|
|
2280
|
+
url,
|
|
2281
|
+
timeoutMs
|
|
2282
|
+
};
|
|
1987
2283
|
}
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
const originalGuidelines = new Set(
|
|
1995
|
-
collectGuidelineFiles2(inputFiles, request.guideline_patterns).map((file) => import_node_path6.default.resolve(file))
|
|
2284
|
+
if (type === "command") {
|
|
2285
|
+
const commandTemplate = resolveString(
|
|
2286
|
+
candidate.command_template ?? candidate.commandTemplate,
|
|
2287
|
+
env,
|
|
2288
|
+
`${targetName} healthcheck command template`,
|
|
2289
|
+
true
|
|
1996
2290
|
);
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2291
|
+
assertSupportedCliPlaceholders(commandTemplate, `${targetName} healthcheck command template`);
|
|
2292
|
+
const cwd = resolveOptionalString(candidate.cwd, env, `${targetName} healthcheck cwd`, {
|
|
2293
|
+
allowLiteral: true,
|
|
2294
|
+
optionalEnv: true
|
|
2295
|
+
});
|
|
2296
|
+
return {
|
|
2297
|
+
type: "command",
|
|
2298
|
+
commandTemplate,
|
|
2299
|
+
timeoutMs,
|
|
2300
|
+
cwd
|
|
2301
|
+
};
|
|
2302
|
+
}
|
|
2303
|
+
throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
|
|
2304
|
+
}
|
|
2305
|
+
function assertSupportedCliPlaceholders(template, description) {
|
|
2306
|
+
const placeholders = extractCliPlaceholders(template);
|
|
2307
|
+
for (const placeholder of placeholders) {
|
|
2308
|
+
if (!CLI_PLACEHOLDERS.has(placeholder)) {
|
|
2309
|
+
throw new Error(
|
|
2310
|
+
`${description} includes unsupported placeholder '{${placeholder}}'. Supported placeholders: ${Array.from(CLI_PLACEHOLDERS).join(", ")}`
|
|
2003
2311
|
);
|
|
2004
|
-
const promptContent = buildPromptDocument2(request, mirroredInputFiles, {
|
|
2005
|
-
guidelinePatterns: request.guideline_patterns,
|
|
2006
|
-
guidelineOverrides: guidelineMirrors
|
|
2007
|
-
});
|
|
2008
|
-
const promptFile = import_node_path6.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
2009
|
-
await (0, import_promises4.writeFile)(promptFile, promptContent, "utf8");
|
|
2010
|
-
const args = this.buildCodexArgs();
|
|
2011
|
-
const cwd = this.resolveCwd(workspaceRoot);
|
|
2012
|
-
const result = await this.executeCodex(args, cwd, promptContent, request.signal);
|
|
2013
|
-
if (result.timedOut) {
|
|
2014
|
-
throw new Error(
|
|
2015
|
-
`Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
|
|
2016
|
-
);
|
|
2017
|
-
}
|
|
2018
|
-
if (result.exitCode !== 0) {
|
|
2019
|
-
const detail = pickDetail(result.stderr, result.stdout);
|
|
2020
|
-
const prefix = `Codex CLI exited with code ${result.exitCode}`;
|
|
2021
|
-
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
2022
|
-
}
|
|
2023
|
-
const parsed = parseCodexJson(result.stdout);
|
|
2024
|
-
const assistantText = extractAssistantText(parsed);
|
|
2025
|
-
return {
|
|
2026
|
-
text: assistantText,
|
|
2027
|
-
raw: {
|
|
2028
|
-
response: parsed,
|
|
2029
|
-
stdout: result.stdout,
|
|
2030
|
-
stderr: result.stderr,
|
|
2031
|
-
exitCode: result.exitCode,
|
|
2032
|
-
args,
|
|
2033
|
-
executable: this.resolvedExecutable ?? this.config.executable,
|
|
2034
|
-
promptFile,
|
|
2035
|
-
workspace: workspaceRoot,
|
|
2036
|
-
inputFiles: mirroredInputFiles
|
|
2037
|
-
}
|
|
2038
|
-
};
|
|
2039
|
-
} finally {
|
|
2040
|
-
await this.cleanupWorkspace(workspaceRoot);
|
|
2041
2312
|
}
|
|
2042
2313
|
}
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2314
|
+
}
|
|
2315
|
+
function extractCliPlaceholders(template) {
|
|
2316
|
+
const matches = template.matchAll(/\{([A-Z_]+)\}/g);
|
|
2317
|
+
const results = [];
|
|
2318
|
+
for (const match of matches) {
|
|
2319
|
+
if (match[1]) {
|
|
2320
|
+
results.push(match[1]);
|
|
2046
2321
|
}
|
|
2047
|
-
await this.environmentCheck;
|
|
2048
2322
|
}
|
|
2049
|
-
|
|
2050
|
-
|
|
2323
|
+
return results;
|
|
2324
|
+
}
|
|
2325
|
+
function resolveString(source, env, description, allowLiteral = false) {
|
|
2326
|
+
const value = resolveOptionalString(source, env, description, {
|
|
2327
|
+
allowLiteral,
|
|
2328
|
+
optionalEnv: false
|
|
2329
|
+
});
|
|
2330
|
+
if (value === void 0) {
|
|
2331
|
+
throw new Error(`${description} is required`);
|
|
2051
2332
|
}
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
return
|
|
2333
|
+
return value;
|
|
2334
|
+
}
|
|
2335
|
+
function resolveOptionalString(source, env, description, options) {
|
|
2336
|
+
if (source === void 0 || source === null) {
|
|
2337
|
+
return void 0;
|
|
2057
2338
|
}
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
if (this.config.args && this.config.args.length > 0) {
|
|
2061
|
-
args.push(...this.config.args);
|
|
2062
|
-
}
|
|
2063
|
-
args.push("-");
|
|
2064
|
-
return args;
|
|
2339
|
+
if (typeof source !== "string") {
|
|
2340
|
+
throw new Error(`${description} must be a string`);
|
|
2065
2341
|
}
|
|
2066
|
-
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
executable: this.resolvedExecutable ?? this.config.executable,
|
|
2070
|
-
args,
|
|
2071
|
-
cwd,
|
|
2072
|
-
prompt: promptContent,
|
|
2073
|
-
timeoutMs: this.config.timeoutMs,
|
|
2074
|
-
env: process.env,
|
|
2075
|
-
signal
|
|
2076
|
-
});
|
|
2077
|
-
} catch (error) {
|
|
2078
|
-
const err = error;
|
|
2079
|
-
if (err.code === "ENOENT") {
|
|
2080
|
-
throw new Error(
|
|
2081
|
-
`Codex executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
|
|
2082
|
-
);
|
|
2083
|
-
}
|
|
2084
|
-
throw error;
|
|
2085
|
-
}
|
|
2342
|
+
const trimmed = source.trim();
|
|
2343
|
+
if (trimmed.length === 0) {
|
|
2344
|
+
return void 0;
|
|
2086
2345
|
}
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
2090
|
-
|
|
2091
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
2092
|
-
};
|
|
2093
|
-
}
|
|
2094
|
-
const filesRoot = import_node_path6.default.join(workspaceRoot, FILES_DIR);
|
|
2095
|
-
await (0, import_promises4.mkdir)(filesRoot, { recursive: true });
|
|
2096
|
-
const mirrored = [];
|
|
2097
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
2098
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
2099
|
-
for (const inputFile of inputFiles) {
|
|
2100
|
-
const absoluteSource = import_node_path6.default.resolve(inputFile);
|
|
2101
|
-
const baseName = import_node_path6.default.basename(absoluteSource);
|
|
2102
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
2103
|
-
nameCounts.set(baseName, count + 1);
|
|
2104
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
2105
|
-
const destination = import_node_path6.default.join(filesRoot, finalName);
|
|
2106
|
-
await (0, import_promises4.copyFile)(absoluteSource, destination);
|
|
2107
|
-
const resolvedDestination = import_node_path6.default.resolve(destination);
|
|
2108
|
-
mirrored.push(resolvedDestination);
|
|
2109
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
2110
|
-
guidelineMirrors.add(resolvedDestination);
|
|
2111
|
-
}
|
|
2346
|
+
const envValue = env[trimmed];
|
|
2347
|
+
if (envValue !== void 0) {
|
|
2348
|
+
if (envValue.trim().length === 0) {
|
|
2349
|
+
throw new Error(`Environment variable '${trimmed}' for ${description} is empty`);
|
|
2112
2350
|
}
|
|
2113
|
-
return
|
|
2114
|
-
mirroredInputFiles: mirrored,
|
|
2115
|
-
guidelineMirrors
|
|
2116
|
-
};
|
|
2117
|
-
}
|
|
2118
|
-
async createWorkspace() {
|
|
2119
|
-
return await (0, import_promises4.mkdtemp)(import_node_path6.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
|
|
2351
|
+
return envValue;
|
|
2120
2352
|
}
|
|
2121
|
-
|
|
2122
|
-
|
|
2123
|
-
|
|
2124
|
-
|
|
2353
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
2354
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
2355
|
+
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
2356
|
+
if (looksLikeEnv) {
|
|
2357
|
+
if (optionalEnv) {
|
|
2358
|
+
return void 0;
|
|
2359
|
+
}
|
|
2360
|
+
if (!allowLiteral) {
|
|
2361
|
+
throw new Error(`Environment variable '${trimmed}' required for ${description} is not set`);
|
|
2125
2362
|
}
|
|
2126
2363
|
}
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
if (
|
|
2131
|
-
|
|
2132
|
-
const executablePath = await ensureWindowsExecutableVariant(resolved);
|
|
2133
|
-
await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
2134
|
-
return executablePath;
|
|
2364
|
+
return trimmed;
|
|
2365
|
+
}
|
|
2366
|
+
function resolveOptionalLiteralString(source) {
|
|
2367
|
+
if (source === void 0 || source === null) {
|
|
2368
|
+
return void 0;
|
|
2135
2369
|
}
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
const { stdout } = await execAsync2(`${locator} ${candidate}`);
|
|
2139
|
-
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
2140
|
-
const preferred = selectExecutableCandidate(lines);
|
|
2141
|
-
if (preferred) {
|
|
2142
|
-
const executablePath = await ensureWindowsExecutableVariant(preferred);
|
|
2143
|
-
await (0, import_promises4.access)(executablePath, import_node_fs3.constants.F_OK);
|
|
2144
|
-
return executablePath;
|
|
2145
|
-
}
|
|
2146
|
-
} catch {
|
|
2370
|
+
if (typeof source !== "string") {
|
|
2371
|
+
throw new Error("expected string value");
|
|
2147
2372
|
}
|
|
2148
|
-
|
|
2373
|
+
const trimmed = source.trim();
|
|
2374
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
2149
2375
|
}
|
|
2150
|
-
function
|
|
2151
|
-
if (
|
|
2376
|
+
function resolveOptionalNumber(source, description) {
|
|
2377
|
+
if (source === void 0 || source === null || source === "") {
|
|
2152
2378
|
return void 0;
|
|
2153
2379
|
}
|
|
2154
|
-
if (
|
|
2155
|
-
return
|
|
2380
|
+
if (typeof source === "number") {
|
|
2381
|
+
return Number.isFinite(source) ? source : void 0;
|
|
2156
2382
|
}
|
|
2157
|
-
|
|
2158
|
-
|
|
2159
|
-
|
|
2160
|
-
|
|
2161
|
-
return match;
|
|
2383
|
+
if (typeof source === "string") {
|
|
2384
|
+
const numeric = Number(source);
|
|
2385
|
+
if (Number.isFinite(numeric)) {
|
|
2386
|
+
return numeric;
|
|
2162
2387
|
}
|
|
2163
2388
|
}
|
|
2164
|
-
|
|
2389
|
+
throw new Error(`${description} must be a number`);
|
|
2165
2390
|
}
|
|
2166
|
-
|
|
2167
|
-
if (
|
|
2168
|
-
return
|
|
2391
|
+
function resolveOptionalBoolean(source) {
|
|
2392
|
+
if (source === void 0 || source === null || source === "") {
|
|
2393
|
+
return void 0;
|
|
2169
2394
|
}
|
|
2170
|
-
if (
|
|
2171
|
-
return
|
|
2395
|
+
if (typeof source === "boolean") {
|
|
2396
|
+
return source;
|
|
2172
2397
|
}
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
2177
|
-
|
|
2178
|
-
|
|
2179
|
-
|
|
2398
|
+
if (typeof source === "string") {
|
|
2399
|
+
const lowered = source.trim().toLowerCase();
|
|
2400
|
+
if (lowered === "true" || lowered === "1") {
|
|
2401
|
+
return true;
|
|
2402
|
+
}
|
|
2403
|
+
if (lowered === "false" || lowered === "0") {
|
|
2404
|
+
return false;
|
|
2180
2405
|
}
|
|
2181
2406
|
}
|
|
2182
|
-
|
|
2407
|
+
throw new Error("expected boolean value");
|
|
2183
2408
|
}
|
|
2184
|
-
function
|
|
2185
|
-
|
|
2186
|
-
return getWindowsExecutableExtensions().some((ext) => lower.endsWith(ext));
|
|
2409
|
+
function isLikelyEnvReference(value) {
|
|
2410
|
+
return /^[A-Z0-9_]+$/.test(value);
|
|
2187
2411
|
}
|
|
2188
|
-
|
|
2189
|
-
|
|
2190
|
-
|
|
2191
|
-
return [];
|
|
2412
|
+
function resolveOptionalStringArray(source, env, description) {
|
|
2413
|
+
if (source === void 0 || source === null) {
|
|
2414
|
+
return void 0;
|
|
2192
2415
|
}
|
|
2193
|
-
|
|
2194
|
-
|
|
2195
|
-
}
|
|
2196
|
-
function parseCodexJson(output) {
|
|
2197
|
-
const trimmed = output.trim();
|
|
2198
|
-
if (trimmed.length === 0) {
|
|
2199
|
-
throw new Error("Codex CLI produced no output in --json mode");
|
|
2416
|
+
if (!Array.isArray(source)) {
|
|
2417
|
+
throw new Error(`${description} must be an array of strings`);
|
|
2200
2418
|
}
|
|
2201
|
-
|
|
2202
|
-
return
|
|
2203
|
-
}
|
|
2204
|
-
|
|
2205
|
-
|
|
2206
|
-
|
|
2419
|
+
if (source.length === 0) {
|
|
2420
|
+
return void 0;
|
|
2421
|
+
}
|
|
2422
|
+
const resolved = [];
|
|
2423
|
+
for (let i = 0; i < source.length; i++) {
|
|
2424
|
+
const item = source[i];
|
|
2425
|
+
if (typeof item !== "string") {
|
|
2426
|
+
throw new Error(`${description}[${i}] must be a string`);
|
|
2207
2427
|
}
|
|
2208
|
-
const
|
|
2209
|
-
if (
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2428
|
+
const trimmed = item.trim();
|
|
2429
|
+
if (trimmed.length === 0) {
|
|
2430
|
+
throw new Error(`${description}[${i}] cannot be empty`);
|
|
2431
|
+
}
|
|
2432
|
+
const envValue = env[trimmed];
|
|
2433
|
+
if (envValue !== void 0) {
|
|
2434
|
+
if (envValue.trim().length === 0) {
|
|
2435
|
+
throw new Error(`Environment variable '${trimmed}' for ${description}[${i}] is empty`);
|
|
2214
2436
|
}
|
|
2437
|
+
resolved.push(envValue);
|
|
2438
|
+
} else {
|
|
2439
|
+
resolved.push(trimmed);
|
|
2215
2440
|
}
|
|
2216
|
-
const preview = trimmed.slice(0, 200);
|
|
2217
|
-
throw new Error(`Codex CLI emitted invalid JSON: ${preview}${trimmed.length > 200 ? "\u2026" : ""}`);
|
|
2218
2441
|
}
|
|
2442
|
+
return resolved.length > 0 ? resolved : void 0;
|
|
2219
2443
|
}
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2232
|
-
|
|
2233
|
-
|
|
2444
|
+
|
|
2445
|
+
// src/evaluation/providers/vscode.ts
|
|
2446
|
+
var import_promises4 = require("fs/promises");
|
|
2447
|
+
var import_node_path6 = __toESM(require("path"), 1);
|
|
2448
|
+
var import_subagent = require("subagent");
|
|
2449
|
+
var VSCodeProvider = class {
|
|
2450
|
+
id;
|
|
2451
|
+
kind;
|
|
2452
|
+
targetName;
|
|
2453
|
+
supportsBatch = true;
|
|
2454
|
+
config;
|
|
2455
|
+
constructor(targetName, config, kind) {
|
|
2456
|
+
this.id = `${kind}:${targetName}`;
|
|
2457
|
+
this.kind = kind;
|
|
2458
|
+
this.targetName = targetName;
|
|
2459
|
+
this.config = config;
|
|
2234
2460
|
}
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2250
|
-
|
|
2461
|
+
async invoke(request) {
|
|
2462
|
+
if (request.signal?.aborted) {
|
|
2463
|
+
throw new Error("VS Code provider request was aborted before dispatch");
|
|
2464
|
+
}
|
|
2465
|
+
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
2466
|
+
const promptContent = buildPromptDocument2(request, inputFiles, request.guideline_patterns);
|
|
2467
|
+
const session = await (0, import_subagent.dispatchAgentSession)({
|
|
2468
|
+
userQuery: promptContent,
|
|
2469
|
+
extraAttachments: inputFiles,
|
|
2470
|
+
wait: this.config.waitForResponse,
|
|
2471
|
+
dryRun: this.config.dryRun,
|
|
2472
|
+
vscodeCmd: this.config.command,
|
|
2473
|
+
subagentRoot: this.config.subagentRoot,
|
|
2474
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
2475
|
+
silent: true
|
|
2476
|
+
});
|
|
2477
|
+
if (session.exitCode !== 0 || !session.responseFile) {
|
|
2478
|
+
const failure = session.error ?? "VS Code subagent did not produce a response";
|
|
2479
|
+
throw new Error(failure);
|
|
2480
|
+
}
|
|
2481
|
+
if (this.config.dryRun) {
|
|
2482
|
+
return {
|
|
2483
|
+
text: "",
|
|
2484
|
+
raw: {
|
|
2485
|
+
session,
|
|
2486
|
+
inputFiles
|
|
2487
|
+
}
|
|
2488
|
+
};
|
|
2251
2489
|
}
|
|
2490
|
+
const responseText = await (0, import_promises4.readFile)(session.responseFile, "utf8");
|
|
2491
|
+
return {
|
|
2492
|
+
text: responseText,
|
|
2493
|
+
raw: {
|
|
2494
|
+
session,
|
|
2495
|
+
inputFiles
|
|
2496
|
+
}
|
|
2497
|
+
};
|
|
2252
2498
|
}
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2499
|
+
async invokeBatch(requests) {
|
|
2500
|
+
if (requests.length === 0) {
|
|
2501
|
+
return [];
|
|
2502
|
+
}
|
|
2503
|
+
const normalizedRequests = requests.map((req) => ({
|
|
2504
|
+
request: req,
|
|
2505
|
+
inputFiles: normalizeAttachments(req.inputFiles)
|
|
2506
|
+
}));
|
|
2507
|
+
const combinedInputFiles = mergeAttachments(
|
|
2508
|
+
normalizedRequests.map(({ inputFiles }) => inputFiles)
|
|
2509
|
+
);
|
|
2510
|
+
const userQueries = normalizedRequests.map(
|
|
2511
|
+
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles, request.guideline_patterns)
|
|
2512
|
+
);
|
|
2513
|
+
const session = await (0, import_subagent.dispatchBatchAgent)({
|
|
2514
|
+
userQueries,
|
|
2515
|
+
extraAttachments: combinedInputFiles,
|
|
2516
|
+
wait: this.config.waitForResponse,
|
|
2517
|
+
dryRun: this.config.dryRun,
|
|
2518
|
+
vscodeCmd: this.config.command,
|
|
2519
|
+
subagentRoot: this.config.subagentRoot,
|
|
2520
|
+
workspaceTemplate: this.config.workspaceTemplate,
|
|
2521
|
+
silent: true
|
|
2522
|
+
});
|
|
2523
|
+
if (session.exitCode !== 0 || !session.responseFiles) {
|
|
2524
|
+
const failure = session.error ?? "VS Code subagent did not produce batch responses";
|
|
2525
|
+
throw new Error(failure);
|
|
2526
|
+
}
|
|
2527
|
+
if (this.config.dryRun) {
|
|
2528
|
+
return normalizedRequests.map(({ inputFiles }) => ({
|
|
2529
|
+
text: "",
|
|
2530
|
+
raw: {
|
|
2531
|
+
session,
|
|
2532
|
+
inputFiles,
|
|
2533
|
+
allInputFiles: combinedInputFiles
|
|
2534
|
+
}
|
|
2535
|
+
}));
|
|
2536
|
+
}
|
|
2537
|
+
if (session.responseFiles.length !== requests.length) {
|
|
2538
|
+
throw new Error(
|
|
2539
|
+
`VS Code batch returned ${session.responseFiles.length} responses for ${requests.length} requests`
|
|
2540
|
+
);
|
|
2259
2541
|
}
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
|
|
2265
|
-
|
|
2266
|
-
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
2270
|
-
|
|
2271
|
-
|
|
2272
|
-
if (text) {
|
|
2273
|
-
return text;
|
|
2542
|
+
const responses = [];
|
|
2543
|
+
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
2544
|
+
const responseText = await (0, import_promises4.readFile)(responseFile, "utf8");
|
|
2545
|
+
responses.push({
|
|
2546
|
+
text: responseText,
|
|
2547
|
+
raw: {
|
|
2548
|
+
session,
|
|
2549
|
+
inputFiles: normalizedRequests[index]?.inputFiles,
|
|
2550
|
+
allInputFiles: combinedInputFiles,
|
|
2551
|
+
responseFile
|
|
2552
|
+
}
|
|
2553
|
+
});
|
|
2274
2554
|
}
|
|
2555
|
+
return responses;
|
|
2275
2556
|
}
|
|
2276
|
-
|
|
2557
|
+
};
|
|
2558
|
+
function buildPromptDocument2(request, attachments, guidelinePatterns) {
|
|
2559
|
+
const parts = [];
|
|
2560
|
+
const guidelineFiles = collectGuidelineFiles2(attachments, guidelinePatterns);
|
|
2561
|
+
const attachmentFiles = collectAttachmentFiles(attachments);
|
|
2562
|
+
const nonGuidelineAttachments = attachmentFiles.filter(
|
|
2563
|
+
(file) => !guidelineFiles.includes(file)
|
|
2564
|
+
);
|
|
2565
|
+
const prereadBlock = buildMandatoryPrereadBlock2(guidelineFiles, nonGuidelineAttachments);
|
|
2566
|
+
if (prereadBlock.length > 0) {
|
|
2567
|
+
parts.push("\n", prereadBlock);
|
|
2568
|
+
}
|
|
2569
|
+
parts.push("\n[[ ## user_query ## ]]\n", request.prompt.trim());
|
|
2570
|
+
return parts.join("\n").trim();
|
|
2277
2571
|
}
|
|
2278
|
-
function
|
|
2279
|
-
if (
|
|
2280
|
-
return
|
|
2572
|
+
function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
2573
|
+
if (guidelineFiles.length === 0 && attachmentFiles.length === 0) {
|
|
2574
|
+
return "";
|
|
2281
2575
|
}
|
|
2282
|
-
const
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2576
|
+
const buildList = (files) => files.map((absolutePath) => {
|
|
2577
|
+
const fileName = import_node_path6.default.basename(absolutePath);
|
|
2578
|
+
const fileUri = pathToFileUri2(absolutePath);
|
|
2579
|
+
return `* [${fileName}](${fileUri})`;
|
|
2580
|
+
});
|
|
2581
|
+
const sections = [];
|
|
2582
|
+
if (guidelineFiles.length > 0) {
|
|
2583
|
+
sections.push(`Read all guideline files:
|
|
2584
|
+
${buildList(guidelineFiles).join("\n")}.`);
|
|
2290
2585
|
}
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
return flattened;
|
|
2586
|
+
if (attachmentFiles.length > 0) {
|
|
2587
|
+
sections.push(`Read all attachment files:
|
|
2588
|
+
${buildList(attachmentFiles).join("\n")}.`);
|
|
2295
2589
|
}
|
|
2296
|
-
|
|
2590
|
+
sections.push(
|
|
2591
|
+
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
2592
|
+
"Then apply system_instructions on the user query below."
|
|
2593
|
+
);
|
|
2594
|
+
return sections.join("\n");
|
|
2297
2595
|
}
|
|
2298
|
-
function
|
|
2299
|
-
if (!
|
|
2300
|
-
return
|
|
2596
|
+
function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
2597
|
+
if (!attachments || attachments.length === 0) {
|
|
2598
|
+
return [];
|
|
2301
2599
|
}
|
|
2302
|
-
const
|
|
2303
|
-
const
|
|
2304
|
-
|
|
2305
|
-
const
|
|
2306
|
-
if (
|
|
2307
|
-
|
|
2600
|
+
const unique = /* @__PURE__ */ new Map();
|
|
2601
|
+
for (const attachment of attachments) {
|
|
2602
|
+
const absolutePath = import_node_path6.default.resolve(attachment);
|
|
2603
|
+
const normalized = absolutePath.split(import_node_path6.default.sep).join("/");
|
|
2604
|
+
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
2605
|
+
if (!unique.has(absolutePath)) {
|
|
2606
|
+
unique.set(absolutePath, absolutePath);
|
|
2607
|
+
}
|
|
2308
2608
|
}
|
|
2309
2609
|
}
|
|
2310
|
-
return
|
|
2610
|
+
return Array.from(unique.values());
|
|
2311
2611
|
}
|
|
2312
|
-
function
|
|
2313
|
-
if (
|
|
2314
|
-
return
|
|
2612
|
+
function collectAttachmentFiles(attachments) {
|
|
2613
|
+
if (!attachments || attachments.length === 0) {
|
|
2614
|
+
return [];
|
|
2315
2615
|
}
|
|
2316
|
-
|
|
2317
|
-
|
|
2318
|
-
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
const text = segment.text;
|
|
2323
|
-
return typeof text === "string" ? text : void 0;
|
|
2324
|
-
}
|
|
2325
|
-
return void 0;
|
|
2326
|
-
}).filter((part) => typeof part === "string" && part.length > 0);
|
|
2327
|
-
return parts.length > 0 ? parts.join(" \n") : void 0;
|
|
2616
|
+
const unique = /* @__PURE__ */ new Map();
|
|
2617
|
+
for (const attachment of attachments) {
|
|
2618
|
+
const absolutePath = import_node_path6.default.resolve(attachment);
|
|
2619
|
+
if (!unique.has(absolutePath)) {
|
|
2620
|
+
unique.set(absolutePath, absolutePath);
|
|
2621
|
+
}
|
|
2328
2622
|
}
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2623
|
+
return Array.from(unique.values());
|
|
2624
|
+
}
|
|
2625
|
+
function pathToFileUri2(filePath) {
|
|
2626
|
+
const absolutePath = import_node_path6.default.isAbsolute(filePath) ? filePath : import_node_path6.default.resolve(filePath);
|
|
2627
|
+
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
2628
|
+
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
2629
|
+
return `file:///${normalizedPath}`;
|
|
2332
2630
|
}
|
|
2333
|
-
return
|
|
2631
|
+
return `file://${normalizedPath}`;
|
|
2334
2632
|
}
|
|
2335
|
-
function
|
|
2336
|
-
|
|
2337
|
-
if (lines.length <= 1) {
|
|
2633
|
+
function normalizeAttachments(attachments) {
|
|
2634
|
+
if (!attachments || attachments.length === 0) {
|
|
2338
2635
|
return void 0;
|
|
2339
2636
|
}
|
|
2340
|
-
const
|
|
2341
|
-
for (const
|
|
2342
|
-
|
|
2343
|
-
parsed.push(JSON.parse(line));
|
|
2344
|
-
} catch {
|
|
2345
|
-
return void 0;
|
|
2346
|
-
}
|
|
2347
|
-
}
|
|
2348
|
-
return parsed;
|
|
2349
|
-
}
|
|
2350
|
-
function pickDetail(stderr, stdout) {
|
|
2351
|
-
const errorText = stderr.trim();
|
|
2352
|
-
if (errorText.length > 0) {
|
|
2353
|
-
return errorText;
|
|
2637
|
+
const deduped = /* @__PURE__ */ new Set();
|
|
2638
|
+
for (const attachment of attachments) {
|
|
2639
|
+
deduped.add(import_node_path6.default.resolve(attachment));
|
|
2354
2640
|
}
|
|
2355
|
-
|
|
2356
|
-
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
2641
|
+
return Array.from(deduped);
|
|
2357
2642
|
}
|
|
2358
|
-
function
|
|
2359
|
-
|
|
2360
|
-
|
|
2643
|
+
function mergeAttachments(all) {
|
|
2644
|
+
const deduped = /* @__PURE__ */ new Set();
|
|
2645
|
+
for (const list of all) {
|
|
2646
|
+
if (!list) continue;
|
|
2647
|
+
for (const inputFile of list) {
|
|
2648
|
+
deduped.add(import_node_path6.default.resolve(inputFile));
|
|
2649
|
+
}
|
|
2361
2650
|
}
|
|
2362
|
-
|
|
2363
|
-
return ` after ${seconds}s`;
|
|
2651
|
+
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
2364
2652
|
}
|
|
2365
|
-
async function
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
});
|
|
2373
|
-
let stdout = "";
|
|
2374
|
-
let stderr = "";
|
|
2375
|
-
let timedOut = false;
|
|
2376
|
-
const onAbort = () => {
|
|
2377
|
-
child.kill("SIGTERM");
|
|
2378
|
-
};
|
|
2379
|
-
if (options.signal) {
|
|
2380
|
-
if (options.signal.aborted) {
|
|
2381
|
-
onAbort();
|
|
2382
|
-
} else {
|
|
2383
|
-
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
2384
|
-
}
|
|
2385
|
-
}
|
|
2386
|
-
let timeoutHandle;
|
|
2387
|
-
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
2388
|
-
timeoutHandle = setTimeout(() => {
|
|
2389
|
-
timedOut = true;
|
|
2390
|
-
child.kill("SIGTERM");
|
|
2391
|
-
}, options.timeoutMs);
|
|
2392
|
-
timeoutHandle.unref?.();
|
|
2653
|
+
async function ensureVSCodeSubagents(options) {
|
|
2654
|
+
const { kind, count, verbose = false } = options;
|
|
2655
|
+
const vscodeCmd = kind === "vscode-insiders" ? "code-insiders" : "code";
|
|
2656
|
+
const subagentRoot = (0, import_subagent.getSubagentRoot)(vscodeCmd);
|
|
2657
|
+
try {
|
|
2658
|
+
if (verbose) {
|
|
2659
|
+
console.log(`Provisioning ${count} subagent(s) via: subagent ${vscodeCmd} provision`);
|
|
2393
2660
|
}
|
|
2394
|
-
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
child.stderr.setEncoding("utf8");
|
|
2399
|
-
child.stderr.on("data", (chunk) => {
|
|
2400
|
-
stderr += chunk;
|
|
2661
|
+
const result = await (0, import_subagent.provisionSubagents)({
|
|
2662
|
+
targetRoot: subagentRoot,
|
|
2663
|
+
subagents: count,
|
|
2664
|
+
dryRun: false
|
|
2401
2665
|
});
|
|
2402
|
-
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
clearTimeout(timeoutHandle);
|
|
2666
|
+
if (verbose) {
|
|
2667
|
+
if (result.created.length > 0) {
|
|
2668
|
+
console.log(`Created ${result.created.length} new subagent(s)`);
|
|
2406
2669
|
}
|
|
2407
|
-
if (
|
|
2408
|
-
|
|
2670
|
+
if (result.skippedExisting.length > 0) {
|
|
2671
|
+
console.log(`Reusing ${result.skippedExisting.length} existing unlocked subagent(s)`);
|
|
2409
2672
|
}
|
|
2673
|
+
console.log(`
|
|
2674
|
+
total unlocked subagents available: ${result.created.length + result.skippedExisting.length}`);
|
|
2675
|
+
}
|
|
2676
|
+
return {
|
|
2677
|
+
provisioned: true,
|
|
2678
|
+
message: `Provisioned ${count} subagent(s): ${result.created.length} created, ${result.skippedExisting.length} reused`
|
|
2679
|
+
};
|
|
2680
|
+
} catch (error) {
|
|
2681
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
2682
|
+
if (verbose) {
|
|
2683
|
+
console.warn(`Provisioning failed (continuing anyway): ${errorMessage}`);
|
|
2684
|
+
}
|
|
2685
|
+
return {
|
|
2686
|
+
provisioned: false,
|
|
2687
|
+
message: `Provisioning failed: ${errorMessage}`
|
|
2410
2688
|
};
|
|
2411
|
-
child.on("error", (error) => {
|
|
2412
|
-
cleanup();
|
|
2413
|
-
reject(error);
|
|
2414
|
-
});
|
|
2415
|
-
child.on("close", (code) => {
|
|
2416
|
-
cleanup();
|
|
2417
|
-
resolve({
|
|
2418
|
-
stdout,
|
|
2419
|
-
stderr,
|
|
2420
|
-
exitCode: typeof code === "number" ? code : -1,
|
|
2421
|
-
timedOut
|
|
2422
|
-
});
|
|
2423
|
-
});
|
|
2424
|
-
});
|
|
2425
|
-
}
|
|
2426
|
-
function shouldShellExecute(executable) {
|
|
2427
|
-
if (process.platform !== "win32") {
|
|
2428
|
-
return false;
|
|
2429
2689
|
}
|
|
2430
|
-
const lower = executable.toLowerCase();
|
|
2431
|
-
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
2432
2690
|
}
|
|
2433
2691
|
|
|
2434
2692
|
// src/evaluation/providers/targets-file.ts
|
|
@@ -2550,7 +2808,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
2550
2808
|
}
|
|
2551
2809
|
|
|
2552
2810
|
// src/evaluation/evaluators.ts
|
|
2553
|
-
var
|
|
2811
|
+
var import_node_crypto2 = require("crypto");
|
|
2554
2812
|
var LlmJudgeEvaluator = class {
|
|
2555
2813
|
kind = "llm_judge";
|
|
2556
2814
|
resolveJudgeProvider;
|
|
@@ -2588,7 +2846,7 @@ var LlmJudgeEvaluator = class {
|
|
|
2588
2846
|
const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
|
|
2589
2847
|
const reasoning = parsed.reasoning ?? response.reasoning;
|
|
2590
2848
|
const evaluatorRawRequest = {
|
|
2591
|
-
id: (0,
|
|
2849
|
+
id: (0, import_node_crypto2.randomUUID)(),
|
|
2592
2850
|
provider: judgeProvider.id,
|
|
2593
2851
|
prompt,
|
|
2594
2852
|
target: context.target.name,
|
|
@@ -2827,7 +3085,7 @@ function parseJsonSafe(payload) {
|
|
|
2827
3085
|
}
|
|
2828
3086
|
|
|
2829
3087
|
// src/evaluation/orchestrator.ts
|
|
2830
|
-
var
|
|
3088
|
+
var import_node_crypto3 = require("crypto");
|
|
2831
3089
|
var import_promises6 = require("fs/promises");
|
|
2832
3090
|
var import_node_path8 = __toESM(require("path"), 1);
|
|
2833
3091
|
|
|
@@ -3600,7 +3858,7 @@ function sanitizeFilename(value) {
|
|
|
3600
3858
|
return "prompt";
|
|
3601
3859
|
}
|
|
3602
3860
|
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
3603
|
-
return sanitized.length > 0 ? sanitized : (0,
|
|
3861
|
+
return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
|
|
3604
3862
|
}
|
|
3605
3863
|
async function invokeProvider(provider, options) {
|
|
3606
3864
|
const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
@@ -3652,7 +3910,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
|
|
|
3652
3910
|
};
|
|
3653
3911
|
}
|
|
3654
3912
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
3655
|
-
const hash = (0,
|
|
3913
|
+
const hash = (0, import_node_crypto3.createHash)("sha256");
|
|
3656
3914
|
hash.update(provider.id);
|
|
3657
3915
|
hash.update(target.name);
|
|
3658
3916
|
hash.update(evalCase.id);
|