npm - @agentv/core - Versions diffs - 0.5.1 → 0.6.1 - Mend

@agentv/core 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-NL7K4CAK.js → chunk-OW3SHBIJ.js} +7 -2
package/dist/chunk-OW3SHBIJ.js.map +1 -0
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +1 -1
package/dist/index.cjs +439 -14
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +30 -2
package/dist/index.d.ts +30 -2
package/dist/index.js +434 -15
package/dist/index.js.map +1 -1
package/package.json +2 -2
package/dist/chunk-NL7K4CAK.js.map +0 -1

package/dist/index.js CHANGED Viewed

@@ -4,8 +4,9 @@ import {
   buildSearchRoots,
   fileExists,
   findGitRoot,
+  readTextFile,
   resolveFileReference
-} from "./chunk-NL7K4CAK.js";
+} from "./chunk-OW3SHBIJ.js";
 // src/evaluation/types.ts
 var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
@@ -149,6 +150,9 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = parsed;
+  const datasetNameFromSuite = asString(suite.dataset)?.trim();
+  const fallbackDataset = path.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
+  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
   const schema = suite.$schema;
   if (schema !== SCHEMA_EVAL_V2) {
     const message = typeof schema === "string" ? `Invalid $schema value '${schema}' in ${evalFilePath}. Expected '${SCHEMA_EVAL_V2}'` : `Missing required field '$schema' in ${evalFilePath}.
@@ -296,6 +300,7 @@ Please add '$schema: ${SCHEMA_EVAL_V2}' at the top of the file.`;
     ];
     const testCase = {
       id,
+      dataset: datasetName,
       conversation_id: conversationId,
       task: userTextPrompt,
       user_segments: userSegments,
@@ -676,6 +681,9 @@ var AzureProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var AnthropicProvider = class {
   constructor(targetName, config) {
@@ -710,6 +718,9 @@ var AnthropicProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 var GeminiProvider = class {
   constructor(targetName, config) {
@@ -743,6 +754,9 @@ var GeminiProvider = class {
     );
     return mapResponse(ensureChatResponse(response));
   }
+  getAxAI() {
+    return this.ai;
+  }
 };
 // src/evaluation/providers/cli.ts
@@ -955,7 +969,8 @@ function formatTimeoutSuffix(timeoutMs) {
 // src/evaluation/providers/codex.ts
 import { exec as execCallback, spawn } from "node:child_process";
-import { constants as constants2 } from "node:fs";
+import { randomUUID } from "node:crypto";
+import { constants as constants2, createWriteStream } from "node:fs";
 import { access as access2, copyFile, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import path4 from "node:path";
@@ -1062,6 +1077,59 @@ function pathToFileUri(filePath) {
   return `file://${normalizedPath}`;
 }
+// src/evaluation/providers/codex-log-tracker.ts
+var GLOBAL_LOGS_KEY = Symbol.for("agentv.codexLogs");
+var GLOBAL_SUBSCRIBERS_KEY = Symbol.for("agentv.codexLogSubscribers");
+function getCodexLogStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_LOGS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = [];
+  globalObject[GLOBAL_LOGS_KEY] = created;
+  return created;
+}
+function getSubscriberStore() {
+  const globalObject = globalThis;
+  const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY];
+  if (existing) {
+    return existing;
+  }
+  const created = /* @__PURE__ */ new Set();
+  globalObject[GLOBAL_SUBSCRIBERS_KEY] = created;
+  return created;
+}
+function notifySubscribers(entry) {
+  const subscribers = Array.from(getSubscriberStore());
+  for (const listener of subscribers) {
+    try {
+      listener(entry);
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Codex log subscriber failed: ${message}`);
+    }
+  }
+}
+function recordCodexLogEntry(entry) {
+  getCodexLogStore().push(entry);
+  notifySubscribers(entry);
+}
+function consumeCodexLogEntries() {
+  const store = getCodexLogStore();
+  if (store.length === 0) {
+    return [];
+  }
+  return store.splice(0, store.length);
+}
+function subscribeToCodexLogEntries(listener) {
+  const store = getSubscriberStore();
+  store.add(listener);
+  return () => {
+    store.delete(listener);
+  };
+}
 // src/evaluation/providers/codex.ts
 var execAsync2 = promisify2(execCallback);
 var WORKSPACE_PREFIX = "agentv-codex-";
@@ -1093,6 +1161,7 @@ var CodexProvider = class {
       collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
     );
     const workspaceRoot = await this.createWorkspace();
+    const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
       const { mirroredInputFiles, guidelineMirrors } = await this.mirrorInputFiles(
         inputFiles,
@@ -1107,7 +1176,7 @@ var CodexProvider = class {
       await writeFile(promptFile, promptContent, "utf8");
       const args = this.buildCodexArgs();
       const cwd = this.resolveCwd(workspaceRoot);
-      const result = await this.executeCodex(args, cwd, promptContent, request.signal);
+      const result = await this.executeCodex(args, cwd, promptContent, request.signal, logger);
       if (result.timedOut) {
         throw new Error(
           `Codex CLI timed out${formatTimeoutSuffix2(this.config.timeoutMs ?? void 0)}`
@@ -1131,10 +1200,12 @@ var CodexProvider = class {
           executable: this.resolvedExecutable ?? this.config.executable,
           promptFile,
           workspace: workspaceRoot,
-          inputFiles: mirroredInputFiles
+          inputFiles: mirroredInputFiles,
+          logFile: logger?.filePath
         }
       };
     } finally {
+      await logger?.close();
       await this.cleanupWorkspace(workspaceRoot);
     }
   }
@@ -1161,7 +1232,7 @@ var CodexProvider = class {
     args.push("-");
     return args;
   }
-  async executeCodex(args, cwd, promptContent, signal) {
+  async executeCodex(args, cwd, promptContent, signal, logger) {
     try {
       return await this.runCodex({
         executable: this.resolvedExecutable ?? this.config.executable,
@@ -1170,7 +1241,9 @@ var CodexProvider = class {
         prompt: promptContent,
         timeoutMs: this.config.timeoutMs,
         env: process.env,
-        signal
+        signal,
+        onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
+        onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
       });
     } catch (error) {
       const err = error;
@@ -1222,7 +1295,240 @@ var CodexProvider = class {
     } catch {
     }
   }
+  resolveLogDirectory() {
+    const disabled = isCodexLogStreamingDisabled();
+    if (disabled) {
+      return void 0;
+    }
+    if (this.config.logDir) {
+      return path4.resolve(this.config.logDir);
+    }
+    return path4.join(process.cwd(), ".agentv", "logs", "codex");
+  }
+  async createStreamLogger(request) {
+    const logDir = this.resolveLogDirectory();
+    if (!logDir) {
+      return void 0;
+    }
+    try {
+      await mkdir(logDir, { recursive: true });
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
+      return void 0;
+    }
+    const filePath = path4.join(logDir, buildLogFilename(request, this.targetName));
+    try {
+      const logger = await CodexStreamLogger.create({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt,
+        format: this.config.logFormat ?? "summary"
+      });
+      recordCodexLogEntry({
+        filePath,
+        targetName: this.targetName,
+        evalCaseId: request.evalCaseId,
+        attempt: request.attempt
+      });
+      return logger;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      console.warn(`Skipping Codex stream logging for ${filePath}: ${message}`);
+      return void 0;
+    }
+  }
 };
+var CodexStreamLogger = class _CodexStreamLogger {
+  filePath;
+  stream;
+  startedAt = Date.now();
+  stdoutBuffer = "";
+  stderrBuffer = "";
+  format;
+  constructor(filePath, format) {
+    this.filePath = filePath;
+    this.format = format;
+    this.stream = createWriteStream(filePath, { flags: "a" });
+  }
+  static async create(options) {
+    const logger = new _CodexStreamLogger(options.filePath, options.format);
+    const header = [
+      "# Codex CLI stream log",
+      `# target: ${options.targetName}`,
+      options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
+      options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
+      `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
+      ""
+    ].filter((line) => Boolean(line));
+    logger.writeLines(header);
+    return logger;
+  }
+  handleStdoutChunk(chunk) {
+    this.stdoutBuffer += chunk;
+    this.flushBuffer("stdout");
+  }
+  handleStderrChunk(chunk) {
+    this.stderrBuffer += chunk;
+    this.flushBuffer("stderr");
+  }
+  async close() {
+    this.flushBuffer("stdout");
+    this.flushBuffer("stderr");
+    this.flushRemainder();
+    await new Promise((resolve, reject) => {
+      this.stream.once("error", reject);
+      this.stream.end(() => resolve());
+    });
+  }
+  writeLines(lines) {
+    for (const line of lines) {
+      this.stream.write(`${line}
+`);
+    }
+  }
+  flushBuffer(source) {
+    const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
+    const lines = buffer.split(/\r?\n/);
+    const remainder = lines.pop() ?? "";
+    if (source === "stdout") {
+      this.stdoutBuffer = remainder;
+    } else {
+      this.stderrBuffer = remainder;
+    }
+    for (const line of lines) {
+      const formatted = this.formatLine(line, source);
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+  }
+  formatLine(rawLine, source) {
+    const trimmed = rawLine.trim();
+    if (trimmed.length === 0) {
+      return void 0;
+    }
+    const message = this.format === "json" ? formatCodexJsonLog(trimmed) : formatCodexLogMessage(trimmed, source);
+    return `[+${formatElapsed(this.startedAt)}] [${source}] ${message}`;
+  }
+  flushRemainder() {
+    const stdoutRemainder = this.stdoutBuffer.trim();
+    if (stdoutRemainder.length > 0) {
+      const formatted = this.formatLine(stdoutRemainder, "stdout");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    const stderrRemainder = this.stderrBuffer.trim();
+    if (stderrRemainder.length > 0) {
+      const formatted = this.formatLine(stderrRemainder, "stderr");
+      if (formatted) {
+        this.stream.write(formatted);
+        this.stream.write("\n");
+      }
+    }
+    this.stdoutBuffer = "";
+    this.stderrBuffer = "";
+  }
+};
+function isCodexLogStreamingDisabled() {
+  const envValue = process.env.AGENTV_CODEX_STREAM_LOGS;
+  if (!envValue) {
+    return false;
+  }
+  const normalized = envValue.trim().toLowerCase();
+  return normalized === "false" || normalized === "0" || normalized === "off";
+}
+function buildLogFilename(request, targetName) {
+  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+  const evalId = sanitizeForFilename(request.evalCaseId ?? "codex");
+  const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
+  const target = sanitizeForFilename(targetName);
+  return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID().slice(0, 8)}.log`;
+}
+function sanitizeForFilename(value) {
+  const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
+  return sanitized.length > 0 ? sanitized : "codex";
+}
+function formatElapsed(startedAt) {
+  const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
+  const hours = Math.floor(elapsedSeconds / 3600);
+  const minutes = Math.floor(elapsedSeconds % 3600 / 60);
+  const seconds = elapsedSeconds % 60;
+  if (hours > 0) {
+    return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+  }
+  return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
+}
+function formatCodexLogMessage(rawLine, source) {
+  const parsed = tryParseJsonValue(rawLine);
+  if (parsed) {
+    const summary = summarizeCodexEvent(parsed);
+    if (summary) {
+      return summary;
+    }
+  }
+  if (source === "stderr") {
+    return `stderr: ${rawLine}`;
+  }
+  return rawLine;
+}
+function formatCodexJsonLog(rawLine) {
+  const parsed = tryParseJsonValue(rawLine);
+  if (!parsed) {
+    return rawLine;
+  }
+  try {
+    return JSON.stringify(parsed, null, 2);
+  } catch {
+    return rawLine;
+  }
+}
+function summarizeCodexEvent(event) {
+  if (!event || typeof event !== "object") {
+    return void 0;
+  }
+  const record = event;
+  const type = typeof record.type === "string" ? record.type : void 0;
+  let message = extractFromEvent(event) ?? extractFromItem(record.item) ?? flattenContent(record.output ?? record.content);
+  if (!message && type === JSONL_TYPE_ITEM_COMPLETED) {
+    const item = record.item;
+    if (item && typeof item === "object") {
+      const candidate = flattenContent(
+        item.text ?? item.content ?? item.output
+      );
+      if (candidate) {
+        message = candidate;
+      }
+    }
+  }
+  if (!message) {
+    const itemType = typeof record.item?.type === "string" ? record.item.type : void 0;
+    if (type && itemType) {
+      return `${type}:${itemType}`;
+    }
+    if (type) {
+      return type;
+    }
+  }
+  if (type && message) {
+    return `${type}: ${message}`;
+  }
+  if (message) {
+    return message;
+  }
+  return type;
+}
+function tryParseJsonValue(rawLine) {
+  try {
+    return JSON.parse(rawLine);
+  } catch {
+    return void 0;
+  }
+}
 async function locateExecutable(candidate) {
   const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
   if (includesPathSeparator) {
@@ -1492,10 +1798,12 @@ async function defaultCodexRunner(options) {
     child.stdout.setEncoding("utf8");
     child.stdout.on("data", (chunk) => {
       stdout += chunk;
+      options.onStdoutChunk?.(chunk);
     });
     child.stderr.setEncoding("utf8");
     child.stderr.on("data", (chunk) => {
       stderr += chunk;
+      options.onStderrChunk?.(chunk);
     });
     child.stdin.end(options.prompt);
     const cleanup = () => {
@@ -1740,6 +2048,8 @@ function resolveCodexConfig(target, env) {
   const argsSource = settings.args ?? settings.arguments;
   const cwdSource = settings.cwd;
   const timeoutSource = settings.timeout_seconds ?? settings.timeoutSeconds;
+  const logDirSource = settings.log_dir ?? settings.logDir ?? settings.log_directory ?? settings.logDirectory;
+  const logFormatSource = settings.log_format ?? settings.logFormat ?? settings.log_output_format ?? settings.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1750,13 +2060,33 @@ function resolveCodexConfig(target, env) {
     optionalEnv: true
   });
   const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
+  const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
+  const logFormat = normalizeCodexLogFormat(logFormatSource);
   return {
     executable,
     args,
     cwd,
-    timeoutMs
+    timeoutMs,
+    logDir,
+    logFormat
   };
 }
+function normalizeCodexLogFormat(value) {
+  if (value === void 0 || value === null) {
+    return void 0;
+  }
+  if (typeof value !== "string") {
+    throw new Error("codex log format must be 'summary' or 'json'");
+  }
+  const normalized = value.trim().toLowerCase();
+  if (normalized === "json" || normalized === "summary") {
+    return normalized;
+  }
+  throw new Error("codex log format must be 'summary' or 'json'");
+}
 function resolveMockConfig(target) {
   const settings = target.settings ?? {};
   const response = typeof settings.response === "string" ? settings.response : void 0;
@@ -2386,7 +2716,30 @@ function resolveAndCreateProvider(definition, env = process.env) {
 }
 // src/evaluation/evaluators.ts
-import { randomUUID } from "node:crypto";
+import { ax, f } from "@ax-llm/ax";
+import { randomUUID as randomUUID2 } from "node:crypto";
+var LLM_JUDGE_SIGNATURE = f().input(
+  "evaluationContext",
+  f.object(
+    {
+      expectedOutcome: f.string("The expected outcome for the original task"),
+      request: f.string("The original task request"),
+      referenceAnswer: f.string("The gold standard reference answer"),
+      generatedAnswer: f.string("The answer to evaluate"),
+      guidelines: f.string("Additional evaluation guidelines or instructions").optional()
+    },
+    "Complete evaluation context for the judge"
+  )
+).output(
+  "evaluation",
+  f.object({
+    score: f.number("Score between 0.0 and 1.0").min(0).max(1),
+    hits: f.string("Brief specific achievement").array(),
+    misses: f.string("Brief specific failure or omission").array(),
+    reasoning: f.string("Concise explanation for the score").max(500)
+  })
+).build();
+var LLM_JUDGE = ax(LLM_JUDGE_SIGNATURE);
 var LlmJudgeEvaluator = class {
   kind = "llm_judge";
   resolveJudgeProvider;
@@ -2404,6 +2757,44 @@ var LlmJudgeEvaluator = class {
     if (!judgeProvider) {
       throw new Error("No judge provider available for LLM grading");
     }
+    if (providerSupportsAx(judgeProvider)) {
+      return this.evaluateWithAx(context, judgeProvider);
+    }
+    return this.evaluateWithPrompt(context, judgeProvider);
+  }
+  async evaluateWithAx(context, judgeProvider) {
+    const ai = judgeProvider.getAxAI();
+    const guidelines = context.promptInputs.guidelines?.trim();
+    const evaluationContext = {
+      expectedOutcome: context.evalCase.outcome.trim(),
+      request: context.evalCase.task.trim(),
+      referenceAnswer: context.evalCase.expected_assistant_raw.trim(),
+      generatedAnswer: context.candidate.trim(),
+      ...guidelines ? { guidelines } : {}
+    };
+    const options = this.buildJudgeForwardOptions(context);
+    const result = await LLM_JUDGE.forward(ai, { evaluationContext }, options);
+    const evaluation = result.evaluation;
+    const expectedAspectCount = Math.max(
+      evaluation.hits.length + evaluation.misses.length,
+      1
+    );
+    return {
+      score: evaluation.score,
+      hits: evaluation.hits,
+      misses: evaluation.misses,
+      expectedAspectCount,
+      reasoning: evaluation.reasoning,
+      evaluatorRawRequest: {
+        id: randomUUID2(),
+        provider: judgeProvider.id,
+        target: context.target.name,
+        method: "ax-structured-output",
+        signature: LLM_JUDGE_SIGNATURE.toString()
+      }
+    };
+  }
+  async evaluateWithPrompt(context, judgeProvider) {
     const prompt = buildQualityPrompt(context.evalCase, context.candidate);
     const systemPrompt = context.systemPrompt ?? this.customPrompt ?? QUALITY_SYSTEM_PROMPT;
     const metadata = {
@@ -2423,8 +2814,9 @@ var LlmJudgeEvaluator = class {
     const hits = Array.isArray(parsed.hits) ? parsed.hits.filter(isNonEmptyString).slice(0, 4) : [];
     const misses = Array.isArray(parsed.misses) ? parsed.misses.filter(isNonEmptyString).slice(0, 4) : [];
     const reasoning = parsed.reasoning ?? response.reasoning;
+    const expectedAspectCount = Math.max(hits.length + misses.length, 1);
     const evaluatorRawRequest = {
-      id: randomUUID(),
+      id: randomUUID2(),
       provider: judgeProvider.id,
       prompt,
       target: context.target.name,
@@ -2435,12 +2827,34 @@ var LlmJudgeEvaluator = class {
       score,
       hits,
       misses,
-      expectedAspectCount: hits.length + misses.length || 1,
+      expectedAspectCount,
       reasoning,
       evaluatorRawRequest
     };
   }
+  buildJudgeForwardOptions(context) {
+    const modelConfig = this.buildJudgeModelConfig();
+    if (modelConfig === void 0 && context.judgeModel === void 0) {
+      return void 0;
+    }
+    return {
+      ...context.judgeModel ? { model: context.judgeModel } : {},
+      ...modelConfig ? { modelConfig } : {}
+    };
+  }
+  buildJudgeModelConfig() {
+    if (this.maxOutputTokens === void 0 && this.temperature === void 0) {
+      return void 0;
+    }
+    return {
+      ...this.maxOutputTokens !== void 0 ? { maxTokens: this.maxOutputTokens } : {},
+      ...this.temperature !== void 0 ? { temperature: this.temperature } : {}
+    };
+  }
 };
+function providerSupportsAx(provider) {
+  return typeof provider.getAxAI === "function";
+}
 var QUALITY_SYSTEM_PROMPT = [
   "You are an expert evaluator. Your goal is to grade the generated_answer based on how well it achieves the expected_outcome for the original task.",
   "",
@@ -2663,8 +3077,8 @@ function parseJsonSafe(payload) {
 }
 // src/evaluation/orchestrator.ts
-import { createHash, randomUUID as randomUUID2 } from "node:crypto";
-import { mkdir as mkdir2, readFile as readFile4, writeFile as writeFile2 } from "node:fs/promises";
+import { createHash, randomUUID as randomUUID3 } from "node:crypto";
+import { mkdir as mkdir2, writeFile as writeFile2 } from "node:fs/promises";
 import path7 from "node:path";
 // ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
@@ -3211,6 +3625,7 @@ async function evaluateCandidate(options) {
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: score.score,
     hits: score.hits,
@@ -3387,7 +3802,7 @@ async function runLlmJudgeEvaluator(options) {
 async function resolveCustomPrompt(config) {
   if (config.promptPath) {
     try {
-      return await readFile4(config.promptPath, "utf8");
+      return await readTextFile(config.promptPath);
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       console.warn(`Could not read custom prompt at ${config.promptPath}: ${message}`);
@@ -3436,7 +3851,7 @@ function sanitizeFilename(value) {
     return "prompt";
   }
   const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
-  return sanitized.length > 0 ? sanitized : randomUUID2();
+  return sanitized.length > 0 ? sanitized : randomUUID3();
 }
 async function invokeProvider(provider, options) {
   const { evalCase, promptInputs, attempt, agentTimeoutMs, signal } = options;
@@ -3475,6 +3890,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs)
   };
   return {
     eval_id: evalCase.id,
+    dataset: evalCase.dataset,
     conversation_id: evalCase.conversation_id,
     score: 0,
     hits: [],
@@ -3524,6 +3940,7 @@ export {
   buildDirectoryChain,
   buildPromptInputs,
   buildSearchRoots,
+  consumeCodexLogEntries,
   createAgentKernel,
   createProvider,
   ensureVSCodeSubagents,
@@ -3540,10 +3957,12 @@ export {
   listTargetNames,
   loadEvalCases,
   readTargetDefinitions,
+  readTextFile,
   resolveAndCreateProvider,
   resolveFileReference,
   resolveTargetDefinition,
   runEvalCase,
-  runEvaluation
+  runEvaluation,
+  subscribeToCodexLogEntries
 };
 //# sourceMappingURL=index.js.map