@agentv/core 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +268 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +268 -10
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1114,6 +1114,7 @@ function formatTimeoutSuffix(timeoutMs) {
|
|
|
1114
1114
|
|
|
1115
1115
|
// src/evaluation/providers/codex.ts
|
|
1116
1116
|
var import_node_child_process2 = require("child_process");
|
|
1117
|
+
var import_node_crypto = require("crypto");
|
|
1117
1118
|
var import_node_fs3 = require("fs");
|
|
1118
1119
|
var import_promises3 = require("fs/promises");
|
|
1119
1120
|
var import_node_os = require("os");
|
|
@@ -1252,6 +1253,7 @@ var CodexProvider = class {
|
|
|
1252
1253
|
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
|
|
1253
1254
|
);
|
|
1254
1255
|
const workspaceRoot = await this.createWorkspace();
|
|
1256
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
1255
1257
|
try {
|
|
1256
1258
|
const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
|
|
1257
1259
|
inputFiles,
|
|
@@ -1266,7 +1268,7 @@ var CodexProvider = class {
|
|
|
1266
1268
|
await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
|
|
1267
1269
|
const args = this.buildCodexArgs();
|
|
1268
1270
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
1269
|
-
const result = await this.executeCodex(args, cwd, promptContent, request.signal);
|
|
1271
|
+
const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
|
|
1270
1272
|
if (result.timedOut) {
|
|
1271
1273
|
throw new Error(
|
|
1272
1274
|
`Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
|
|
@@ -1290,10 +1292,12 @@ var CodexProvider = class {
|
|
|
1290
1292
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1291
1293
|
promptFile,
|
|
1292
1294
|
workspace: workspaceRoot,
|
|
1293
|
-
inputFiles: mirroredInputFiles
|
|
1295
|
+
inputFiles: mirroredInputFiles,
|
|
1296
|
+
logFile: logger?.filePath
|
|
1294
1297
|
}
|
|
1295
1298
|
};
|
|
1296
1299
|
} finally {
|
|
1300
|
+
await logger?.close();
|
|
1297
1301
|
await this.cleanupWorkspace(workspaceRoot);
|
|
1298
1302
|
}
|
|
1299
1303
|
}
|
|
@@ -1320,7 +1324,7 @@ var CodexProvider = class {
|
|
|
1320
1324
|
args.push("-");
|
|
1321
1325
|
return args;
|
|
1322
1326
|
}
|
|
1323
|
-
async executeCodex(args, cwd, promptContent, signal) {
|
|
1327
|
+
async executeCodex(args, cwd, promptContent, signal, logger) {
|
|
1324
1328
|
try {
|
|
1325
1329
|
return await this.runCodex({
|
|
1326
1330
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
@@ -1329,7 +1333,9 @@ var CodexProvider = class {
|
|
|
1329
1333
|
prompt: promptContent,
|
|
1330
1334
|
timeoutMs: this.config.timeoutMs,
|
|
1331
1335
|
env: process.env,
|
|
1332
|
-
signal
|
|
1336
|
+
signal,
|
|
1337
|
+
onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
|
|
1338
|
+
onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
|
|
1333
1339
|
});
|
|
1334
1340
|
} catch (error) {
|
|
1335
1341
|
const err = error;
|
|
@@ -1381,7 +1387,235 @@ var CodexProvider = class {
|
|
|
1381
1387
|
} catch {
|
|
1382
1388
|
}
|
|
1383
1389
|
}
|
|
1390
|
+
resolveLogDirectory() {
|
|
1391
|
+
const disabled = isCodexLogStreamingDisabled();
|
|
1392
|
+
if (disabled) {
|
|
1393
|
+
return void 0;
|
|
1394
|
+
}
|
|
1395
|
+
if (this.config.logDir) {
|
|
1396
|
+
return import_node_path5.default.resolve(this.config.logDir);
|
|
1397
|
+
}
|
|
1398
|
+
return import_node_path5.default.join(process.cwd(), ".agentv", "logs", "codex");
|
|
1399
|
+
}
|
|
1400
|
+
async createStreamLogger(request) {
|
|
1401
|
+
const logDir = this.resolveLogDirectory();
|
|
1402
|
+
if (!logDir) {
|
|
1403
|
+
return void 0;
|
|
1404
|
+
}
|
|
1405
|
+
try {
|
|
1406
|
+
await (0, import_promises3.mkdir)(logDir, { recursive: true });
|
|
1407
|
+
} catch (error) {
|
|
1408
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1409
|
+
console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
|
|
1410
|
+
return void 0;
|
|
1411
|
+
}
|
|
1412
|
+
const filePath = import_node_path5.default.join(logDir, buildLogFilename(request, this.targetName));
|
|
1413
|
+
try {
|
|
1414
|
+
const logger = await CodexStreamLogger.create({
|
|
1415
|
+
filePath,
|
|
1416
|
+
targetName: this.targetName,
|
|
1417
|
+
evalCaseId: request.evalCaseId,
|
|
1418
|
+
attempt: request.attempt,
|
|
1419
|
+
format: this.config.logFormat ?? "summary"
|
|
1420
|
+
});
|
|
1421
|
+
console.log(`Streaming Codex CLI output to ${filePath}`);
|
|
1422
|
+
return logger;
|
|
1423
|
+
} catch (error) {
|
|
1424
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1425
|
+
console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
|
|
1426
|
+
return void 0;
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1384
1429
|
};
|
|
1430
|
+
var CodexStreamLogger = class _CodexStreamLogger {
|
|
1431
|
+
filePath;
|
|
1432
|
+
stream;
|
|
1433
|
+
startedAt = Date.now();
|
|
1434
|
+
stdoutBuffer = "";
|
|
1435
|
+
stderrBuffer = "";
|
|
1436
|
+
format;
|
|
1437
|
+
constructor(filePath, format) {
|
|
1438
|
+
this.filePath = filePath;
|
|
1439
|
+
this.format = format;
|
|
1440
|
+
this.stream = (0, import_node_fs3.createWriteStream)(filePath, { flags: "a" });
|
|
1441
|
+
}
|
|
1442
|
+
static async create(options) {
|
|
1443
|
+
const logger = new _CodexStreamLogger(options.filePath, options.format);
|
|
1444
|
+
const header = [
|
|
1445
|
+
"# Codex CLI stream log",
|
|
1446
|
+
`# target: ${options.targetName}`,
|
|
1447
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
1448
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
1449
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
1450
|
+
""
|
|
1451
|
+
].filter((line) => Boolean(line));
|
|
1452
|
+
logger.writeLines(header);
|
|
1453
|
+
return logger;
|
|
1454
|
+
}
|
|
1455
|
+
handleStdoutChunk(chunk) {
|
|
1456
|
+
this.stdoutBuffer += chunk;
|
|
1457
|
+
this.flushBuffer("stdout");
|
|
1458
|
+
}
|
|
1459
|
+
handleStderrChunk(chunk) {
|
|
1460
|
+
this.stderrBuffer += chunk;
|
|
1461
|
+
this.flushBuffer("stderr");
|
|
1462
|
+
}
|
|
1463
|
+
async close() {
|
|
1464
|
+
this.flushBuffer("stdout");
|
|
1465
|
+
this.flushBuffer("stderr");
|
|
1466
|
+
this.flushRemainder();
|
|
1467
|
+
await new Promise((resolve, reject) => {
|
|
1468
|
+
this.stream.once("error", reject);
|
|
1469
|
+
this.stream.end(() => resolve());
|
|
1470
|
+
});
|
|
1471
|
+
}
|
|
1472
|
+
writeLines(lines) {
|
|
1473
|
+
for (const line of lines) {
|
|
1474
|
+
this.stream.write(`${line}
|
|
1475
|
+
`);
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
flushBuffer(source) {
|
|
1479
|
+
const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
|
|
1480
|
+
const lines = buffer.split(/\r?\n/);
|
|
1481
|
+
const remainder = lines.pop() ?? "";
|
|
1482
|
+
if (source === "stdout") {
|
|
1483
|
+
this.stdoutBuffer = remainder;
|
|
1484
|
+
} else {
|
|
1485
|
+
this.stderrBuffer = remainder;
|
|
1486
|
+
}
|
|
1487
|
+
for (const line of lines) {
|
|
1488
|
+
const formatted = this.formatLine(line, source);
|
|
1489
|
+
if (formatted) {
|
|
1490
|
+
this.stream.write(formatted);
|
|
1491
|
+
this.stream.write("\n");
|
|
1492
|
+
}
|
|
1493
|
+
}
|
|
1494
|
+
}
|
|
1495
|
+
formatLine(rawLine, source) {
|
|
1496
|
+
const trimmed = rawLine.trim();
|
|
1497
|
+
if (trimmed.length === 0) {
|
|
1498
|
+
return void 0;
|
|
1499
|
+
}
|
|
1500
|
+
const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
|
|
1501
|
+
return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
|
|
1502
|
+
}
|
|
1503
|
+
flushRemainder() {
|
|
1504
|
+
const stdoutRemainder = this.stdoutBuffer.trim();
|
|
1505
|
+
if (stdoutRemainder.length > 0) {
|
|
1506
|
+
const formatted = this.formatLine(stdoutRemainder, "stdout");
|
|
1507
|
+
if (formatted) {
|
|
1508
|
+
this.stream.write(formatted);
|
|
1509
|
+
this.stream.write("\n");
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
const stderrRemainder = this.stderrBuffer.trim();
|
|
1513
|
+
if (stderrRemainder.length > 0) {
|
|
1514
|
+
const formatted = this.formatLine(stderrRemainder, "stderr");
|
|
1515
|
+
if (formatted) {
|
|
1516
|
+
this.stream.write(formatted);
|
|
1517
|
+
this.stream.write("\n");
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
this.stdoutBuffer = "";
|
|
1521
|
+
this.stderrBuffer = "";
|
|
1522
|
+
}
|
|
1523
|
+
};
|
|
1524
|
+
function isCodexLogStreamingDisabled() {
|
|
1525
|
+
const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
|
|
1526
|
+
if (!envValue) {
|
|
1527
|
+
return false;
|
|
1528
|
+
}
|
|
1529
|
+
const normalized = envValue.trim().toLowerCase();
|
|
1530
|
+
return normalized === "false" || normalized === "0" || normalized === "off";
|
|
1531
|
+
}
|
|
1532
|
+
function buildLogFilename(request, targetName) {
|
|
1533
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
1534
|
+
const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
|
|
1535
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
1536
|
+
const target = sanitizeForFilename(targetName);
|
|
1537
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto.randomUUID)().slice(0, 8)}.log`;
|
|
1538
|
+
}
|
|
1539
|
+
function sanitizeForFilename(value) {
|
|
1540
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
1541
|
+
return sanitized.length > 0 ? sanitized : "codex";
|
|
1542
|
+
}
|
|
1543
|
+
function formatElapsed(startedAt) {
|
|
1544
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
1545
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
1546
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
1547
|
+
const seconds = elapsedSeconds % 60;
|
|
1548
|
+
if (hours > 0) {
|
|
1549
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1550
|
+
}
|
|
1551
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
1552
|
+
}
|
|
1553
|
+
function formatCodexLogMessage(rawLine, source) {
|
|
1554
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1555
|
+
if (parsed) {
|
|
1556
|
+
const summary = summarizeCodexEvent(parsed);
|
|
1557
|
+
if (summary) {
|
|
1558
|
+
return summary;
|
|
1559
|
+
}
|
|
1560
|
+
}
|
|
1561
|
+
if (source === "stderr") {
|
|
1562
|
+
return `stderr: ${rawLine}`;
|
|
1563
|
+
}
|
|
1564
|
+
return rawLine;
|
|
1565
|
+
}
|
|
1566
|
+
function formatCodexJsonLog(rawLine) {
|
|
1567
|
+
const parsed = tryParseJsonValue(rawLine);
|
|
1568
|
+
if (!parsed) {
|
|
1569
|
+
return rawLine;
|
|
1570
|
+
}
|
|
1571
|
+
try {
|
|
1572
|
+
return JSON.stringify(parsed, null, 2);
|
|
1573
|
+
} catch {
|
|
1574
|
+
return rawLine;
|
|
1575
|
+
}
|
|
1576
|
+
}
|
|
1577
|
+
function summarizeCodexEvent(event) {
|
|
1578
|
+
if (!event || typeof event !== "object") {
|
|
1579
|
+
return void 0;
|
|
1580
|
+
}
|
|
1581
|
+
const record = event;
|
|
1582
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
1583
|
+
let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
|
|
1584
|
+
if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
|
|
1585
|
+
const item = record.item;
|
|
1586
|
+
if (item && typeof item === "object") {
|
|
1587
|
+
const candidate = flattenContent(
|
|
1588
|
+
item.text ?? item.content ?? item.output
|
|
1589
|
+
);
|
|
1590
|
+
if (candidate) {
|
|
1591
|
+
message = candidate;
|
|
1592
|
+
}
|
|
1593
|
+
}
|
|
1594
|
+
}
|
|
1595
|
+
if (!message) {
|
|
1596
|
+
const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
|
|
1597
|
+
if (type && itemType) {
|
|
1598
|
+
return `${type}:${itemType}`;
|
|
1599
|
+
}
|
|
1600
|
+
if (type) {
|
|
1601
|
+
return type;
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
if (type && message) {
|
|
1605
|
+
return `${type}: ${message}`;
|
|
1606
|
+
}
|
|
1607
|
+
if (message) {
|
|
1608
|
+
return message;
|
|
1609
|
+
}
|
|
1610
|
+
return type;
|
|
1611
|
+
}
|
|
1612
|
+
function tryParseJsonValue(rawLine) {
|
|
1613
|
+
try {
|
|
1614
|
+
return JSON.parse(rawLine);
|
|
1615
|
+
} catch {
|
|
1616
|
+
return void 0;
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1385
1619
|
async function locateExecutable(candidate) {
|
|
1386
1620
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
1387
1621
|
if (includesPathSeparator) {
|
|
@@ -1651,10 +1885,12 @@ async function defaultCodexRunner(options) {
|
|
|
1651
1885
|
child.stdout.setEncoding("utf8");
|
|
1652
1886
|
child.stdout.on("data", (chunk) => {
|
|
1653
1887
|
stdout += chunk;
|
|
1888
|
+
options.onStdoutChunk?.(chunk);
|
|
1654
1889
|
});
|
|
1655
1890
|
child.stderr.setEncoding("utf8");
|
|
1656
1891
|
child.stderr.on("data", (chunk) => {
|
|
1657
1892
|
stderr += chunk;
|
|
1893
|
+
options.onStderrChunk?.(chunk);
|
|
1658
1894
|
});
|
|
1659
1895
|
child.stdin.end(options.prompt);
|
|
1660
1896
|
const cleanup = () => {
|
|
@@ -1899,6 +2135,8 @@ function resolveCodexConfig(target, env) {
|
|
|
1899
2135
|
const argsSource = settings.args ?? settings.arguments;
|
|
1900
2136
|
const cwdSource = settings.cwd;
|
|
1901
2137
|
const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
|
|
2138
|
+
const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
|
|
2139
|
+
const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
1902
2140
|
const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
|
|
1903
2141
|
allowLiteral: true,
|
|
1904
2142
|
optionalEnv: true
|
|
@@ -1909,13 +2147,33 @@ function resolveCodexConfig(target, env) {
|
|
|
1909
2147
|
optionalEnv: true
|
|
1910
2148
|
});
|
|
1911
2149
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
2150
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
2151
|
+
allowLiteral: true,
|
|
2152
|
+
optionalEnv: true
|
|
2153
|
+
});
|
|
2154
|
+
const logFormat = normalizeCodexLogFormat(logFormatSource);
|
|
1912
2155
|
return {
|
|
1913
2156
|
executable,
|
|
1914
2157
|
args,
|
|
1915
2158
|
cwd,
|
|
1916
|
-
timeoutMs
|
|
2159
|
+
timeoutMs,
|
|
2160
|
+
logDir,
|
|
2161
|
+
logFormat
|
|
1917
2162
|
};
|
|
1918
2163
|
}
|
|
2164
|
+
function normalizeCodexLogFormat(value) {
|
|
2165
|
+
if (value === void 0 || value === null) {
|
|
2166
|
+
return void 0;
|
|
2167
|
+
}
|
|
2168
|
+
if (typeof value !== "string") {
|
|
2169
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2170
|
+
}
|
|
2171
|
+
const normalized = value.trim().toLowerCase();
|
|
2172
|
+
if (normalized === "json" || normalized === "summary") {
|
|
2173
|
+
return normalized;
|
|
2174
|
+
}
|
|
2175
|
+
throw new Error("codex log format must be 'summary' or 'json'");
|
|
2176
|
+
}
|
|
1919
2177
|
function resolveMockConfig(target) {
|
|
1920
2178
|
const settings = target.settings ?? {};
|
|
1921
2179
|
const response = typeof settings.response === "string" ? settings.response : void 0;
|
|
@@ -2550,7 +2808,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
|
|
|
2550
2808
|
}
|
|
2551
2809
|
|
|
2552
2810
|
// src/evaluation/evaluators.ts
|
|
2553
|
-
var
|
|
2811
|
+
var import_node_crypto2 = require("crypto");
|
|
2554
2812
|
var LlmJudgeEvaluator = class {
|
|
2555
2813
|
kind = "llm_judge";
|
|
2556
2814
|
resolveJudgeProvider;
|
|
@@ -2588,7 +2846,7 @@ var LlmJudgeEvaluator = class {
|
|
|
2588
2846
|
const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
|
|
2589
2847
|
const reasoning = parsed.reasoning ?? response.reasoning;
|
|
2590
2848
|
const evaluatorRawRequest = {
|
|
2591
|
-
id: (0,
|
|
2849
|
+
id: (0, import_node_crypto2.randomUUID)(),
|
|
2592
2850
|
provider: judgeProvider.id,
|
|
2593
2851
|
prompt,
|
|
2594
2852
|
target: context.target.name,
|
|
@@ -2827,7 +3085,7 @@ function parseJsonSafe(payload) {
|
|
|
2827
3085
|
}
|
|
2828
3086
|
|
|
2829
3087
|
// src/evaluation/orchestrator.ts
|
|
2830
|
-
var
|
|
3088
|
+
var import_node_crypto3 = require("crypto");
|
|
2831
3089
|
var import_promises6 = require("fs/promises");
|
|
2832
3090
|
var import_node_path8 = __toESM(require("path"), 1);
|
|
2833
3091
|
|
|
@@ -3600,7 +3858,7 @@ function sanitizeFilename(value) {
|
|
|
3600
3858
|
return "prompt";
|
|
3601
3859
|
}
|
|
3602
3860
|
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
3603
|
-
return sanitized.length > 0 ? sanitized : (0,
|
|
3861
|
+
return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
|
|
3604
3862
|
}
|
|
3605
3863
|
async function invokeProvider(provider, options) {
|
|
3606
3864
|
const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
|
|
@@ -3652,7 +3910,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
|
|
|
3652
3910
|
};
|
|
3653
3911
|
}
|
|
3654
3912
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
3655
|
-
const hash = (0,
|
|
3913
|
+
const hash = (0, import_node_crypto3.createHash)("sha256");
|
|
3656
3914
|
hash.update(provider.id);
|
|
3657
3915
|
hash.update(target.name);
|
|
3658
3916
|
hash.update(evalCase.id);
|