@agentv/core 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1114,6 +1114,7 @@ function formatTimeoutSuffix(timeoutMs) {
1114
1114
 
1115
1115
  // src/evaluation/providers/codex.ts
1116
1116
  var import_node_child_process2 = require("child_process");
1117
+ var import_node_crypto = require("crypto");
1117
1118
  var import_node_fs3 = require("fs");
1118
1119
  var import_promises3 = require("fs/promises");
1119
1120
  var import_node_os = require("os");
@@ -1252,6 +1253,7 @@ var CodexProvider = class {
1252
1253
  collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
1253
1254
  );
1254
1255
  const workspaceRoot = await this.createWorkspace();
1256
+ const logger = await this.createStreamLogger(request).catch(() => void 0);
1255
1257
  try {
1256
1258
  const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
1257
1259
  inputFiles,
@@ -1266,7 +1268,7 @@ var CodexProvider = class {
1266
1268
  await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
1267
1269
  const args = this.buildCodexArgs();
1268
1270
  const cwd = this.resolveCwd(workspaceRoot);
1269
- const result = await this.executeCodex(args, cwd, promptContent, request.signal);
1271
+ const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
1270
1272
  if (result.timedOut) {
1271
1273
  throw new Error(
1272
1274
  `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
@@ -1290,10 +1292,12 @@ var CodexProvider = class {
1290
1292
  executable: this.resolvedExecutable ?? this.config.executable,
1291
1293
  promptFile,
1292
1294
  workspace: workspaceRoot,
1293
- inputFiles: mirroredInputFiles
1295
+ inputFiles: mirroredInputFiles,
1296
+ logFile: logger?.filePath
1294
1297
  }
1295
1298
  };
1296
1299
  } finally {
1300
+ await logger?.close();
1297
1301
  await this.cleanupWorkspace(workspaceRoot);
1298
1302
  }
1299
1303
  }
@@ -1320,7 +1324,7 @@ var CodexProvider = class {
1320
1324
  args.push("-");
1321
1325
  return args;
1322
1326
  }
1323
- async executeCodex(args, cwd, promptContent, signal) {
1327
+ async executeCodex(args, cwd, promptContent, signal, logger) {
1324
1328
  try {
1325
1329
  return await this.runCodex({
1326
1330
  executable: this.resolvedExecutable ?? this.config.executable,
@@ -1329,7 +1333,9 @@ var CodexProvider = class {
1329
1333
  prompt: promptContent,
1330
1334
  timeoutMs: this.config.timeoutMs,
1331
1335
  env: process.env,
1332
- signal
1336
+ signal,
1337
+ onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
1338
+ onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
1333
1339
  });
1334
1340
  } catch (error) {
1335
1341
  const err = error;
@@ -1381,7 +1387,235 @@ var CodexProvider = class {
1381
1387
  } catch {
1382
1388
  }
1383
1389
  }
1390
+ resolveLogDirectory() {
1391
+ const disabled = isCodexLogStreamingDisabled();
1392
+ if (disabled) {
1393
+ return void 0;
1394
+ }
1395
+ if (this.config.logDir) {
1396
+ return import_node_path5.default.resolve(this.config.logDir);
1397
+ }
1398
+ return import_node_path5.default.join(process.cwd(), ".agentv", "logs", "codex");
1399
+ }
1400
+ async createStreamLogger(request) {
1401
+ const logDir = this.resolveLogDirectory();
1402
+ if (!logDir) {
1403
+ return void 0;
1404
+ }
1405
+ try {
1406
+ await (0, import_promises3.mkdir)(logDir, { recursive: true });
1407
+ } catch (error) {
1408
+ const message = error instanceof Error ? error.message : String(error);
1409
+ console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
1410
+ return void 0;
1411
+ }
1412
+ const filePath = import_node_path5.default.join(logDir, buildLogFilename(request, this.targetName));
1413
+ try {
1414
+ const logger = await CodexStreamLogger.create({
1415
+ filePath,
1416
+ targetName: this.targetName,
1417
+ evalCaseId: request.evalCaseId,
1418
+ attempt: request.attempt,
1419
+ format: this.config.logFormat ?? "summary"
1420
+ });
1421
+ console.log(`Streaming Codex CLI output to ${filePath}`);
1422
+ return logger;
1423
+ } catch (error) {
1424
+ const message = error instanceof Error ? error.message : String(error);
1425
+ console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
1426
+ return void 0;
1427
+ }
1428
+ }
1384
1429
  };
1430
+ var CodexStreamLogger = class _CodexStreamLogger {
1431
+ filePath;
1432
+ stream;
1433
+ startedAt = Date.now();
1434
+ stdoutBuffer = "";
1435
+ stderrBuffer = "";
1436
+ format;
1437
+ constructor(filePath, format) {
1438
+ this.filePath = filePath;
1439
+ this.format = format;
1440
+ this.stream = (0, import_node_fs3.createWriteStream)(filePath, { flags: "a" });
1441
+ }
1442
+ static async create(options) {
1443
+ const logger = new _CodexStreamLogger(options.filePath, options.format);
1444
+ const header = [
1445
+ "# Codex CLI stream log",
1446
+ `# target: ${options.targetName}`,
1447
+ options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
1448
+ options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
1449
+ `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
1450
+ ""
1451
+ ].filter((line) => Boolean(line));
1452
+ logger.writeLines(header);
1453
+ return logger;
1454
+ }
1455
+ handleStdoutChunk(chunk) {
1456
+ this.stdoutBuffer += chunk;
1457
+ this.flushBuffer("stdout");
1458
+ }
1459
+ handleStderrChunk(chunk) {
1460
+ this.stderrBuffer += chunk;
1461
+ this.flushBuffer("stderr");
1462
+ }
1463
+ async close() {
1464
+ this.flushBuffer("stdout");
1465
+ this.flushBuffer("stderr");
1466
+ this.flushRemainder();
1467
+ await new Promise((resolve, reject) => {
1468
+ this.stream.once("error", reject);
1469
+ this.stream.end(() => resolve());
1470
+ });
1471
+ }
1472
+ writeLines(lines) {
1473
+ for (const line of lines) {
1474
+ this.stream.write(`${line}
1475
+ `);
1476
+ }
1477
+ }
1478
+ flushBuffer(source) {
1479
+ const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
1480
+ const lines = buffer.split(/\r?\n/);
1481
+ const remainder = lines.pop() ?? "";
1482
+ if (source === "stdout") {
1483
+ this.stdoutBuffer = remainder;
1484
+ } else {
1485
+ this.stderrBuffer = remainder;
1486
+ }
1487
+ for (const line of lines) {
1488
+ const formatted = this.formatLine(line, source);
1489
+ if (formatted) {
1490
+ this.stream.write(formatted);
1491
+ this.stream.write("\n");
1492
+ }
1493
+ }
1494
+ }
1495
+ formatLine(rawLine, source) {
1496
+ const trimmed = rawLine.trim();
1497
+ if (trimmed.length === 0) {
1498
+ return void 0;
1499
+ }
1500
+ const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
1501
+ return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
1502
+ }
1503
+ flushRemainder() {
1504
+ const stdoutRemainder = this.stdoutBuffer.trim();
1505
+ if (stdoutRemainder.length > 0) {
1506
+ const formatted = this.formatLine(stdoutRemainder, "stdout");
1507
+ if (formatted) {
1508
+ this.stream.write(formatted);
1509
+ this.stream.write("\n");
1510
+ }
1511
+ }
1512
+ const stderrRemainder = this.stderrBuffer.trim();
1513
+ if (stderrRemainder.length > 0) {
1514
+ const formatted = this.formatLine(stderrRemainder, "stderr");
1515
+ if (formatted) {
1516
+ this.stream.write(formatted);
1517
+ this.stream.write("\n");
1518
+ }
1519
+ }
1520
+ this.stdoutBuffer = "";
1521
+ this.stderrBuffer = "";
1522
+ }
1523
+ };
1524
+ function isCodexLogStreamingDisabled() {
1525
+ const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
1526
+ if (!envValue) {
1527
+ return false;
1528
+ }
1529
+ const normalized = envValue.trim().toLowerCase();
1530
+ return normalized === "false" || normalized === "0" || normalized === "off";
1531
+ }
1532
+ function buildLogFilename(request, targetName) {
1533
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1534
+ const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
1535
+ const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
1536
+ const target = sanitizeForFilename(targetName);
1537
+ return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto.randomUUID)().slice(0, 8)}.log`;
1538
+ }
1539
+ function sanitizeForFilename(value) {
1540
+ const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
1541
+ return sanitized.length > 0 ? sanitized : "codex";
1542
+ }
1543
+ function formatElapsed(startedAt) {
1544
+ const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
1545
+ const hours = Math.floor(elapsedSeconds / 3600);
1546
+ const minutes = Math.floor(elapsedSeconds % 3600 / 60);
1547
+ const seconds = elapsedSeconds % 60;
1548
+ if (hours > 0) {
1549
+ return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1550
+ }
1551
+ return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1552
+ }
1553
+ function formatCodexLogMessage(rawLine, source) {
1554
+ const parsed = tryParseJsonValue(rawLine);
1555
+ if (parsed) {
1556
+ const summary = summarizeCodexEvent(parsed);
1557
+ if (summary) {
1558
+ return summary;
1559
+ }
1560
+ }
1561
+ if (source === "stderr") {
1562
+ return `stderr: ${rawLine}`;
1563
+ }
1564
+ return rawLine;
1565
+ }
1566
+ function formatCodexJsonLog(rawLine) {
1567
+ const parsed = tryParseJsonValue(rawLine);
1568
+ if (!parsed) {
1569
+ return rawLine;
1570
+ }
1571
+ try {
1572
+ return JSON.stringify(parsed, null, 2);
1573
+ } catch {
1574
+ return rawLine;
1575
+ }
1576
+ }
1577
+ function summarizeCodexEvent(event) {
1578
+ if (!event || typeof event !== "object") {
1579
+ return void 0;
1580
+ }
1581
+ const record = event;
1582
+ const type = typeof record.type === "string" ? record.type : void 0;
1583
+ let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
1584
+ if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
1585
+ const item = record.item;
1586
+ if (item && typeof item === "object") {
1587
+ const candidate = flattenContent(
1588
+ item.text ?? item.content ?? item.output
1589
+ );
1590
+ if (candidate) {
1591
+ message = candidate;
1592
+ }
1593
+ }
1594
+ }
1595
+ if (!message) {
1596
+ const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
1597
+ if (type && itemType) {
1598
+ return `${type}:${itemType}`;
1599
+ }
1600
+ if (type) {
1601
+ return type;
1602
+ }
1603
+ }
1604
+ if (type && message) {
1605
+ return `${type}: ${message}`;
1606
+ }
1607
+ if (message) {
1608
+ return message;
1609
+ }
1610
+ return type;
1611
+ }
1612
+ function tryParseJsonValue(rawLine) {
1613
+ try {
1614
+ return JSON.parse(rawLine);
1615
+ } catch {
1616
+ return void 0;
1617
+ }
1618
+ }
1385
1619
  async function locateExecutable(candidate) {
1386
1620
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
1387
1621
  if (includesPathSeparator) {
@@ -1651,10 +1885,12 @@ async function defaultCodexRunner(options) {
1651
1885
  child.stdout.setEncoding("utf8");
1652
1886
  child.stdout.on("data", (chunk) => {
1653
1887
  stdout += chunk;
1888
+ options.onStdoutChunk?.(chunk);
1654
1889
  });
1655
1890
  child.stderr.setEncoding("utf8");
1656
1891
  child.stderr.on("data", (chunk) => {
1657
1892
  stderr += chunk;
1893
+ options.onStderrChunk?.(chunk);
1658
1894
  });
1659
1895
  child.stdin.end(options.prompt);
1660
1896
  const cleanup = () => {
@@ -1899,6 +2135,8 @@ function resolveCodexConfig(target, env) {
1899
2135
  const argsSource = settings.args ?? settings.arguments;
1900
2136
  const cwdSource = settings.cwd;
1901
2137
  const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
2138
+ const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
2139
+ const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
1902
2140
  const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
1903
2141
  allowLiteral: true,
1904
2142
  optionalEnv: true
@@ -1909,13 +2147,33 @@ function resolveCodexConfig(target, env) {
1909
2147
  optionalEnv: true
1910
2148
  });
1911
2149
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
2150
+ const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
2151
+ allowLiteral: true,
2152
+ optionalEnv: true
2153
+ });
2154
+ const logFormat = normalizeCodexLogFormat(logFormatSource);
1912
2155
  return {
1913
2156
  executable,
1914
2157
  args,
1915
2158
  cwd,
1916
- timeoutMs
2159
+ timeoutMs,
2160
+ logDir,
2161
+ logFormat
1917
2162
  };
1918
2163
  }
2164
+ function normalizeCodexLogFormat(value) {
2165
+ if (value === void 0 || value === null) {
2166
+ return void 0;
2167
+ }
2168
+ if (typeof value !== "string") {
2169
+ throw new Error("codex log format must be 'summary' or 'json'");
2170
+ }
2171
+ const normalized = value.trim().toLowerCase();
2172
+ if (normalized === "json" || normalized === "summary") {
2173
+ return normalized;
2174
+ }
2175
+ throw new Error("codex log format must be 'summary' or 'json'");
2176
+ }
1919
2177
  function resolveMockConfig(target) {
1920
2178
  const settings = target.settings ?? {};
1921
2179
  const response = typeof settings.response === "string" ? settings.response : void 0;
@@ -2550,7 +2808,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
2550
2808
  }
2551
2809
 
2552
2810
  // src/evaluation/evaluators.ts
2553
- var import_node_crypto = require("crypto");
2811
+ var import_node_crypto2 = require("crypto");
2554
2812
  var LlmJudgeEvaluator = class {
2555
2813
  kind = "llm_judge";
2556
2814
  resolveJudgeProvider;
@@ -2588,7 +2846,7 @@ var LlmJudgeEvaluator = class {
2588
2846
  const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
2589
2847
  const reasoning = parsed.reasoning ?? response.reasoning;
2590
2848
  const evaluatorRawRequest = {
2591
- id: (0, import_node_crypto.randomUUID)(),
2849
+ id: (0, import_node_crypto2.randomUUID)(),
2592
2850
  provider: judgeProvider.id,
2593
2851
  prompt,
2594
2852
  target: context.target.name,
@@ -2827,7 +3085,7 @@ function parseJsonSafe(payload) {
2827
3085
  }
2828
3086
 
2829
3087
  // src/evaluation/orchestrator.ts
2830
- var import_node_crypto2 = require("crypto");
3088
+ var import_node_crypto3 = require("crypto");
2831
3089
  var import_promises6 = require("fs/promises");
2832
3090
  var import_node_path8 = __toESM(require("path"), 1);
2833
3091
 
@@ -3600,7 +3858,7 @@ function sanitizeFilename(value) {
3600
3858
  return "prompt";
3601
3859
  }
3602
3860
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
3603
- return sanitized.length > 0 ? sanitized : (0, import_node_crypto2.randomUUID)();
3861
+ return sanitized.length > 0 ? sanitized : (0, import_node_crypto3.randomUUID)();
3604
3862
  }
3605
3863
  async function invokeProvider(provider, options) {
3606
3864
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -3652,7 +3910,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
3652
3910
  };
3653
3911
  }
3654
3912
  function createCacheKey(provider, target, evalCase, promptInputs) {
3655
- const hash = (0, import_node_crypto2.createHash)("sha256");
3913
+ const hash = (0, import_node_crypto3.createHash)("sha256");
3656
3914
  hash.update(provider.id);
3657
3915
  hash.update(target.name);
3658
3916
  hash.update(evalCase.id);