@agentv/core 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -265,6 +265,8 @@ interface CodexResolvedConfig {
265
265
  readonly args?: readonly string[];
266
266
  readonly cwd?: string;
267
267
  readonly timeoutMs?: number;
268
+ readonly logDir?: string;
269
+ readonly logFormat?: "summary" | "json";
268
270
  }
269
271
  interface MockResolvedConfig {
270
272
  readonly response?: string;
package/dist/index.d.ts CHANGED
@@ -265,6 +265,8 @@ interface CodexResolvedConfig {
265
265
  readonly args?: readonly string[];
266
266
  readonly cwd?: string;
267
267
  readonly timeoutMs?: number;
268
+ readonly logDir?: string;
269
+ readonly logFormat?: "summary" | "json";
268
270
  }
269
271
  interface MockResolvedConfig {
270
272
  readonly response?: string;
package/dist/index.js CHANGED
@@ -955,7 +955,8 @@ function formatTimeoutSuffix(timeoutMs) {
955
955
 
956
956
  // src/evaluation/providers/codex.ts
957
957
  import { exec as execCallback, spawn } from "node:child_process";
958
- import { constants as constants2 } from "node:fs";
958
+ import { randomUUID } from "node:crypto";
959
+ import { constants as constants2, createWriteStream } from "node:fs";
959
960
  import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
960
961
  import { tmpdir } from "node:os";
961
962
  import path4 from "node:path";
@@ -1093,6 +1094,7 @@ var CodexProvider = class {
1093
1094
  collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
1094
1095
  );
1095
1096
  const workspaceRoot = await this.createWorkspace();
1097
+ const logger = await this.createStreamLogger(request).catch(() => void 0);
1096
1098
  try {
1097
1099
  const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
1098
1100
  inputFiles,
@@ -1107,7 +1109,7 @@ var CodexProvider = class {
1107
1109
  await writeFile(promptFile, promptContent, "utf8");
1108
1110
  const args = this.buildCodexArgs();
1109
1111
  const cwd = this.resolveCwd(workspaceRoot);
1110
- const result = await this.executeCodex(args, cwd, promptContent, request.signal);
1112
+ const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
1111
1113
  if (result.timedOut) {
1112
1114
  throw new Error(
1113
1115
  `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
@@ -1131,10 +1133,12 @@ var CodexProvider = class {
1131
1133
  executable: this.resolvedExecutable ?? this.config.executable,
1132
1134
  promptFile,
1133
1135
  workspace: workspaceRoot,
1134
- inputFiles: mirroredInputFiles
1136
+ inputFiles: mirroredInputFiles,
1137
+ logFile: logger?.filePath
1135
1138
  }
1136
1139
  };
1137
1140
  } finally {
1141
+ await logger?.close();
1138
1142
  await this.cleanupWorkspace(workspaceRoot);
1139
1143
  }
1140
1144
  }
@@ -1161,7 +1165,7 @@ var CodexProvider = class {
1161
1165
  args.push("-");
1162
1166
  return args;
1163
1167
  }
1164
- async executeCodex(args, cwd, promptContent, signal) {
1168
+ async executeCodex(args, cwd, promptContent, signal, logger) {
1165
1169
  try {
1166
1170
  return await this.runCodex({
1167
1171
  executable: this.resolvedExecutable ?? this.config.executable,
@@ -1170,7 +1174,9 @@ var CodexProvider = class {
1170
1174
  prompt: promptContent,
1171
1175
  timeoutMs: this.config.timeoutMs,
1172
1176
  env: process.env,
1173
- signal
1177
+ signal,
1178
+ onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
1179
+ onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
1174
1180
  });
1175
1181
  } catch (error) {
1176
1182
  const err = error;
@@ -1222,7 +1228,235 @@ var CodexProvider = class {
1222
1228
  } catch {
1223
1229
  }
1224
1230
  }
1231
+ resolveLogDirectory() {
1232
+ const disabled = isCodexLogStreamingDisabled();
1233
+ if (disabled) {
1234
+ return void 0;
1235
+ }
1236
+ if (this.config.logDir) {
1237
+ return path4.resolve(this.config.logDir);
1238
+ }
1239
+ return path4.join(process.cwd(), ".agentv", "logs", "codex");
1240
+ }
1241
+ async createStreamLogger(request) {
1242
+ const logDir = this.resolveLogDirectory();
1243
+ if (!logDir) {
1244
+ return void 0;
1245
+ }
1246
+ try {
1247
+ await mkdir(logDir, { recursive: true });
1248
+ } catch (error) {
1249
+ const message = error instanceof Error ? error.message : String(error);
1250
+ console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
1251
+ return void 0;
1252
+ }
1253
+ const filePath = path4.join(logDir, buildLogFilename(request, this.targetName));
1254
+ try {
1255
+ const logger = await CodexStreamLogger.create({
1256
+ filePath,
1257
+ targetName: this.targetName,
1258
+ evalCaseId: request.evalCaseId,
1259
+ attempt: request.attempt,
1260
+ format: this.config.logFormat ?? "summary"
1261
+ });
1262
+ console.log(`Streaming Codex CLI output to ${filePath}`);
1263
+ return logger;
1264
+ } catch (error) {
1265
+ const message = error instanceof Error ? error.message : String(error);
1266
+ console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
1267
+ return void 0;
1268
+ }
1269
+ }
1270
+ };
1271
+ var CodexStreamLogger = class _CodexStreamLogger {
1272
+ filePath;
1273
+ stream;
1274
+ startedAt = Date.now();
1275
+ stdoutBuffer = "";
1276
+ stderrBuffer = "";
1277
+ format;
1278
+ constructor(filePath, format) {
1279
+ this.filePath = filePath;
1280
+ this.format = format;
1281
+ this.stream = createWriteStream(filePath, { flags: "a" });
1282
+ }
1283
+ static async create(options) {
1284
+ const logger = new _CodexStreamLogger(options.filePath, options.format);
1285
+ const header = [
1286
+ "# Codex CLI stream log",
1287
+ `# target: ${options.targetName}`,
1288
+ options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
1289
+ options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
1290
+ `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
1291
+ ""
1292
+ ].filter((line) => Boolean(line));
1293
+ logger.writeLines(header);
1294
+ return logger;
1295
+ }
1296
+ handleStdoutChunk(chunk) {
1297
+ this.stdoutBuffer += chunk;
1298
+ this.flushBuffer("stdout");
1299
+ }
1300
+ handleStderrChunk(chunk) {
1301
+ this.stderrBuffer += chunk;
1302
+ this.flushBuffer("stderr");
1303
+ }
1304
+ async close() {
1305
+ this.flushBuffer("stdout");
1306
+ this.flushBuffer("stderr");
1307
+ this.flushRemainder();
1308
+ await new Promise((resolve, reject) => {
1309
+ this.stream.once("error", reject);
1310
+ this.stream.end(() => resolve());
1311
+ });
1312
+ }
1313
+ writeLines(lines) {
1314
+ for (const line of lines) {
1315
+ this.stream.write(`${line}
1316
+ `);
1317
+ }
1318
+ }
1319
+ flushBuffer(source) {
1320
+ const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
1321
+ const lines = buffer.split(/\r?\n/);
1322
+ const remainder = lines.pop() ?? "";
1323
+ if (source === "stdout") {
1324
+ this.stdoutBuffer = remainder;
1325
+ } else {
1326
+ this.stderrBuffer = remainder;
1327
+ }
1328
+ for (const line of lines) {
1329
+ const formatted = this.formatLine(line, source);
1330
+ if (formatted) {
1331
+ this.stream.write(formatted);
1332
+ this.stream.write("\n");
1333
+ }
1334
+ }
1335
+ }
1336
+ formatLine(rawLine, source) {
1337
+ const trimmed = rawLine.trim();
1338
+ if (trimmed.length === 0) {
1339
+ return void 0;
1340
+ }
1341
+ const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
1342
+ return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
1343
+ }
1344
+ flushRemainder() {
1345
+ const stdoutRemainder = this.stdoutBuffer.trim();
1346
+ if (stdoutRemainder.length > 0) {
1347
+ const formatted = this.formatLine(stdoutRemainder, "stdout");
1348
+ if (formatted) {
1349
+ this.stream.write(formatted);
1350
+ this.stream.write("\n");
1351
+ }
1352
+ }
1353
+ const stderrRemainder = this.stderrBuffer.trim();
1354
+ if (stderrRemainder.length > 0) {
1355
+ const formatted = this.formatLine(stderrRemainder, "stderr");
1356
+ if (formatted) {
1357
+ this.stream.write(formatted);
1358
+ this.stream.write("\n");
1359
+ }
1360
+ }
1361
+ this.stdoutBuffer = "";
1362
+ this.stderrBuffer = "";
1363
+ }
1225
1364
  };
1365
+ function isCodexLogStreamingDisabled() {
1366
+ const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
1367
+ if (!envValue) {
1368
+ return false;
1369
+ }
1370
+ const normalized = envValue.trim().toLowerCase();
1371
+ return normalized === "false" || normalized === "0" || normalized === "off";
1372
+ }
1373
+ function buildLogFilename(request, targetName) {
1374
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
1375
+ const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
1376
+ const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
1377
+ const target = sanitizeForFilename(targetName);
1378
+ return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
1379
+ }
1380
+ function sanitizeForFilename(value) {
1381
+ const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
1382
+ return sanitized.length > 0 ? sanitized : "codex";
1383
+ }
1384
+ function formatElapsed(startedAt) {
1385
+ const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
1386
+ const hours = Math.floor(elapsedSeconds / 3600);
1387
+ const minutes = Math.floor(elapsedSeconds % 3600 / 60);
1388
+ const seconds = elapsedSeconds % 60;
1389
+ if (hours > 0) {
1390
+ return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1391
+ }
1392
+ return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
1393
+ }
1394
+ function formatCodexLogMessage(rawLine, source) {
1395
+ const parsed = tryParseJsonValue(rawLine);
1396
+ if (parsed) {
1397
+ const summary = summarizeCodexEvent(parsed);
1398
+ if (summary) {
1399
+ return summary;
1400
+ }
1401
+ }
1402
+ if (source === "stderr") {
1403
+ return `stderr: ${rawLine}`;
1404
+ }
1405
+ return rawLine;
1406
+ }
1407
+ function formatCodexJsonLog(rawLine) {
1408
+ const parsed = tryParseJsonValue(rawLine);
1409
+ if (!parsed) {
1410
+ return rawLine;
1411
+ }
1412
+ try {
1413
+ return JSON.stringify(parsed, null, 2);
1414
+ } catch {
1415
+ return rawLine;
1416
+ }
1417
+ }
1418
+ function summarizeCodexEvent(event) {
1419
+ if (!event || typeof event !== "object") {
1420
+ return void 0;
1421
+ }
1422
+ const record = event;
1423
+ const type = typeof record.type === "string" ? record.type : void 0;
1424
+ let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
1425
+ if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
1426
+ const item = record.item;
1427
+ if (item && typeof item === "object") {
1428
+ const candidate = flattenContent(
1429
+ item.text ?? item.content ?? item.output
1430
+ );
1431
+ if (candidate) {
1432
+ message = candidate;
1433
+ }
1434
+ }
1435
+ }
1436
+ if (!message) {
1437
+ const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
1438
+ if (type && itemType) {
1439
+ return `${type}:${itemType}`;
1440
+ }
1441
+ if (type) {
1442
+ return type;
1443
+ }
1444
+ }
1445
+ if (type && message) {
1446
+ return `${type}: ${message}`;
1447
+ }
1448
+ if (message) {
1449
+ return message;
1450
+ }
1451
+ return type;
1452
+ }
1453
+ function tryParseJsonValue(rawLine) {
1454
+ try {
1455
+ return JSON.parse(rawLine);
1456
+ } catch {
1457
+ return void 0;
1458
+ }
1459
+ }
1226
1460
  async function locateExecutable(candidate) {
1227
1461
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
1228
1462
  if (includesPathSeparator) {
@@ -1492,10 +1726,12 @@ async function defaultCodexRunner(options) {
1492
1726
  child.stdout.setEncoding("utf8");
1493
1727
  child.stdout.on("data", (chunk) => {
1494
1728
  stdout += chunk;
1729
+ options.onStdoutChunk?.(chunk);
1495
1730
  });
1496
1731
  child.stderr.setEncoding("utf8");
1497
1732
  child.stderr.on("data", (chunk) => {
1498
1733
  stderr += chunk;
1734
+ options.onStderrChunk?.(chunk);
1499
1735
  });
1500
1736
  child.stdin.end(options.prompt);
1501
1737
  const cleanup = () => {
@@ -1740,6 +1976,8 @@ function resolveCodexConfig(target, env) {
1740
1976
  const argsSource = settings.args ?? settings.arguments;
1741
1977
  const cwdSource = settings.cwd;
1742
1978
  const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
1979
+ const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
1980
+ const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
1743
1981
  const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
1744
1982
  allowLiteral: true,
1745
1983
  optionalEnv: true
@@ -1750,13 +1988,33 @@ function resolveCodexConfig(target, env) {
1750
1988
  optionalEnv: true
1751
1989
  });
1752
1990
  const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
1991
+ const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
1992
+ allowLiteral: true,
1993
+ optionalEnv: true
1994
+ });
1995
+ const logFormat = normalizeCodexLogFormat(logFormatSource);
1753
1996
  return {
1754
1997
  executable,
1755
1998
  args,
1756
1999
  cwd,
1757
- timeoutMs
2000
+ timeoutMs,
2001
+ logDir,
2002
+ logFormat
1758
2003
  };
1759
2004
  }
2005
+ function normalizeCodexLogFormat(value) {
2006
+ if (value === void 0 || value === null) {
2007
+ return void 0;
2008
+ }
2009
+ if (typeof value !== "string") {
2010
+ throw new Error("codex log format must be 'summary' or 'json'");
2011
+ }
2012
+ const normalized = value.trim().toLowerCase();
2013
+ if (normalized === "json" || normalized === "summary") {
2014
+ return normalized;
2015
+ }
2016
+ throw new Error("codex log format must be 'summary' or 'json'");
2017
+ }
1760
2018
  function resolveMockConfig(target) {
1761
2019
  const settings = target.settings ?? {};
1762
2020
  const response = typeof settings.response === "string" ? settings.response : void 0;
@@ -2386,7 +2644,7 @@ function resolveAndCreateProvider(definition, env = process.env) {
2386
2644
  }
2387
2645
 
2388
2646
  // src/evaluation/evaluators.ts
2389
- import { randomUUID } from "node:crypto";
2647
+ import { randomUUID as randomUUID2 } from "node:crypto";
2390
2648
  var LlmJudgeEvaluator = class {
2391
2649
  kind = "llm_judge";
2392
2650
  resolveJudgeProvider;
@@ -2424,7 +2682,7 @@ var LlmJudgeEvaluator = class {
2424
2682
  const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
2425
2683
  const reasoning = parsed.reasoning ?? response.reasoning;
2426
2684
  const evaluatorRawRequest = {
2427
- id: randomUUID(),
2685
+ id: randomUUID2(),
2428
2686
  provider: judgeProvider.id,
2429
2687
  prompt,
2430
2688
  target: context.target.name,
@@ -2663,7 +2921,7 @@ function parseJsonSafe(payload) {
2663
2921
  }
2664
2922
 
2665
2923
  // src/evaluation/orchestrator.ts
2666
- import { createHash, randomUUID as randomUUID2 } from "node:crypto";
2924
+ import { createHash, randomUUID as randomUUID3 } from "node:crypto";
2667
2925
  import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
2668
2926
  import path7 from "node:path";
2669
2927
 
@@ -3436,7 +3694,7 @@ function sanitizeFilename(value) {
3436
3694
  return "prompt";
3437
3695
  }
3438
3696
  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
3439
- return sanitized.length > 0 ? sanitized : randomUUID2();
3697
+ return sanitized.length > 0 ? sanitized : randomUUID3();
3440
3698
  }
3441
3699
  async function invokeProvider(provider, options) {
3442
3700
  const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;