npm - agentv - Versions diffs - 4.6.1 → 4.8.0 - Mend

agentv 4.6.1 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +1 -1
package/dist/{chunk-NSVFUL27.js → chunk-A6W3KOCS.js} +4428 -3605
package/dist/chunk-A6W3KOCS.js.map +1 -0
package/dist/{chunk-YXXD27OK.js → chunk-H4GQXK5M.js} +1314 -440
package/dist/chunk-H4GQXK5M.js.map +1 -0
package/dist/{chunk-MHWYA4CS.js → chunk-QBZJSQXV.js} +365 -349
package/dist/chunk-QBZJSQXV.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-BN5NUVAB.js → dist-QXVR2ZRH.js} +16 -2
package/dist/index.js +3 -3
package/dist/{interactive-DMSVE6CS.js → interactive-IRYNIFCY.js} +10 -47
package/dist/interactive-IRYNIFCY.js.map +1 -0
package/dist/studio/assets/index-DHxVz6M9.css +1 -0
package/dist/studio/assets/{index-C7TnyYee.js → index-DcwjOyrk.js} +1 -1
package/dist/studio/assets/index-Y5InSvcS.js +65 -0
package/dist/studio/index.html +2 -2
package/package.json +1 -1
package/dist/chunk-MHWYA4CS.js.map +0 -1
package/dist/chunk-NSVFUL27.js.map +0 -1
package/dist/chunk-YXXD27OK.js.map +0 -1
package/dist/interactive-DMSVE6CS.js.map +0 -1
package/dist/studio/assets/index-jJVIJh8b.css +0 -1
package/dist/studio/assets/index-vn54AYtS.js +0 -65
/package/dist/{dist-BN5NUVAB.js.map → dist-QXVR2ZRH.js.map} +0 -0

package/dist/{chunk-MHWYA4CS.js → chunk-QBZJSQXV.js} RENAMED Viewed

@@ -2,6 +2,8 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
 import {
   CLI_PLACEHOLDERS,
   COMMON_TARGET_SETTINGS,
+  DEFAULT_EVAL_PATTERNS,
+  DEFAULT_THRESHOLD,
   KNOWN_PROVIDERS,
   PROVIDER_ALIASES,
   ResponseCache,
@@ -9,6 +11,7 @@ import {
   buildSearchRoots,
   deriveCategory,
   ensureVSCodeSubagents,
+  findDeprecatedCamelCaseTargetWarnings,
   findGitRoot,
   interpolateEnv,
   isEvaluatorKind,
@@ -29,12 +32,12 @@ import {
   subscribeToCopilotCliLogEntries,
   subscribeToCopilotSdkLogEntries,
   subscribeToPiLogEntries
-} from "./chunk-YXXD27OK.js";
+} from "./chunk-H4GQXK5M.js";
 // package.json
 var package_default = {
   name: "agentv",
-  version: "4.6.1",
+  version: "4.8.0",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -346,6 +349,9 @@ function buildDefaultRunDir(cwd) {
 function resolveRunIndexPath(runDir) {
   return path3.join(runDir, RESULT_INDEX_FILENAME);
 }
+function isRunManifestPath(filePath) {
+  return path3.basename(filePath) === RESULT_INDEX_FILENAME;
+}
 function resolveExistingRunPrimaryPath(runDir) {
   const indexPath = resolveRunIndexPath(runDir);
   if (existsSync(indexPath)) {
@@ -370,9 +376,19 @@ function resolveWorkspaceOrFilePath(filePath) {
   }
   return existing;
 }
+function resolveRunManifestPath(filePath) {
+  if (isDirectoryPath(filePath)) {
+    return resolveWorkspaceOrFilePath(filePath);
+  }
+  if (!isRunManifestPath(filePath)) {
+    throw new Error(
+      `Expected a run workspace directory or ${RESULT_INDEX_FILENAME} manifest: ${filePath}`
+    );
+  }
+  return filePath;
+}
 // src/commands/eval/artifact-writer.ts
-var PASS_THRESHOLD = 0.8;
 function computeStats(values) {
   if (values.length === 0) {
     return { mean: 0, stddev: 0 };
@@ -387,10 +403,10 @@ function computeStats(values) {
 function computePassRate(result) {
   const scores = result.scores;
   if (scores && scores.length > 0) {
-    const passed = scores.filter((s) => s.score >= PASS_THRESHOLD).length;
+    const passed = scores.filter((s) => s.score >= DEFAULT_THRESHOLD).length;
     return passed / scores.length;
   }
-  return (result.score ?? 0) >= PASS_THRESHOLD ? 1 : 0;
+  return (result.score ?? 0) >= DEFAULT_THRESHOLD ? 1 : 0;
 }
 function countToolCalls(result) {
   const toolCalls = {};
@@ -596,12 +612,12 @@ function safeArtifactPathSegment(value, fallback) {
 function safeTestId(testId) {
   return safeArtifactPathSegment(testId, "unknown");
 }
-function getDataset(result) {
-  return result.dataset;
+function getSuite(result) {
+  return result.suite;
 }
 function buildArtifactSubdir(result) {
   const segments = [];
-  const evalSet = getDataset(result);
+  const evalSet = getSuite(result);
   if (evalSet) {
     segments.push(safeArtifactPathSegment(evalSet, "default"));
   }
@@ -628,7 +644,7 @@ function buildResultIndexArtifact(result) {
   return {
     timestamp: result.timestamp,
     test_id: result.testId ?? "unknown",
-    dataset: getDataset(result),
+    suite: getSuite(result),
     category: result.category,
     conversation_id: result.conversationId,
     score: result.score,
@@ -651,42 +667,6 @@ async function writeJsonlFile(filePath, records) {
 `;
   await writeFile(filePath, content, "utf8");
 }
-function toCamelCase(str) {
-  return str.replace(/_([a-z])/g, (_, letter) => letter.toUpperCase());
-}
-function toCamelCaseDeep(obj) {
-  if (obj === null || obj === void 0) {
-    return obj;
-  }
-  if (Array.isArray(obj)) {
-    return obj.map((item) => toCamelCaseDeep(item));
-  }
-  if (typeof obj === "object") {
-    const result = {};
-    for (const [key, value] of Object.entries(obj)) {
-      result[toCamelCase(key)] = toCamelCaseDeep(value);
-    }
-    return result;
-  }
-  return obj;
-}
-function parseJsonlResults(content) {
-  const results = [];
-  const lines = content.split("\n");
-  for (const line of lines) {
-    const trimmed = line.trim();
-    if (trimmed.length === 0) {
-      continue;
-    }
-    try {
-      const parsed = JSON.parse(trimmed);
-      const camelCased = toCamelCaseDeep(parsed);
-      results.push(camelCased);
-    } catch {
-    }
-  }
-  return results;
-}
 async function writeArtifactsFromResults(results, outputDir, options) {
   const testArtifactDir = outputDir;
   const timingPath = path4.join(outputDir, "timing.json");
@@ -733,7 +713,6 @@ async function writeArtifactsFromResults(results, outputDir, options) {
 // src/commands/eval/benchmark-writer.ts
 import { writeFile as writeFile2 } from "node:fs/promises";
-var PASS_THRESHOLD2 = 0.8;
 function computeStats2(values) {
   if (values.length === 0) {
     return { mean: 0, stddev: 0 };
@@ -748,10 +727,10 @@ function computeStats2(values) {
 function computePassRate2(result) {
   const scores = result.scores;
   if (scores && scores.length > 0) {
-    const passed = scores.filter((s) => s.score >= PASS_THRESHOLD2).length;
+    const passed = scores.filter((s) => s.score >= DEFAULT_THRESHOLD).length;
     return passed / scores.length;
   }
-  return result.score >= PASS_THRESHOLD2 ? 1 : 0;
+  return result.score >= DEFAULT_THRESHOLD ? 1 : 0;
 }
 function buildBenchmarkJson(results) {
   const passRates = results.map(computePassRate2);
@@ -1698,7 +1677,7 @@ var JunitWriter = class _JunitWriter {
     this.closed = true;
     const grouped = /* @__PURE__ */ new Map();
     for (const result of this.results) {
-      const suite = result.dataset ?? "default";
+      const suite = result.suite ?? "default";
       const existing = grouped.get(suite);
       if (existing) {
         existing.push(result);
@@ -1708,14 +1687,17 @@ var JunitWriter = class _JunitWriter {
     }
     const suiteXmls = [];
     for (const [suiteName, results] of grouped) {
-      const failures = results.filter((r) => r.score < this.threshold).length;
-      const errors = results.filter((r) => r.error !== void 0).length;
+      const errors = results.filter((r) => r.executionStatus === "execution_error").length;
+      const failures = results.filter(
+        (r) => r.executionStatus !== "execution_error" && r.score < this.threshold
+      ).length;
       const testCases = results.map((r) => {
         const time = r.durationMs ? (r.durationMs / 1e3).toFixed(3) : "0.000";
         let inner = "";
-        if (r.error) {
+        if (r.executionStatus === "execution_error") {
+          const errorMsg = r.error ?? "Execution error";
           inner = `
-      <error message="${escapeXml(r.error)}">${escapeXml(r.error)}</error>
+      <error message="${escapeXml(errorMsg)}">${escapeXml(errorMsg)}</error>
     `;
         } else if (r.score < this.threshold) {
           const message = `score=${r.score.toFixed(3)}`;
@@ -1730,17 +1712,21 @@ var JunitWriter = class _JunitWriter {
         }
         return `    <testcase name="${escapeXml(r.testId)}" classname="${escapeXml(suiteName)}" time="${time}">${inner}</testcase>`;
       });
+      const suiteTime = results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
       suiteXmls.push(
-        `  <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}">
+        `  <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}" time="${suiteTime.toFixed(3)}">
 ${testCases.join("\n")}
   </testsuite>`
       );
     }
     const totalTests = this.results.length;
-    const totalFailures = this.results.filter((r) => r.score < this.threshold).length;
-    const totalErrors = this.results.filter((r) => r.error !== void 0).length;
+    const totalErrors = this.results.filter((r) => r.executionStatus === "execution_error").length;
+    const totalFailures = this.results.filter(
+      (r) => r.executionStatus !== "execution_error" && r.score < this.threshold
+    ).length;
+    const totalTime = this.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
     const xml = `<?xml version="1.0" encoding="UTF-8"?>
-<testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}">
+<testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}" time="${totalTime.toFixed(3)}">
 ${suiteXmls.join("\n")}
 </testsuites>
 `;
@@ -1839,17 +1825,6 @@ function createWriterFromPath(filePath, options) {
       );
   }
 }
-async function createMultiWriter(filePaths, options) {
-  const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp, options)));
-  return {
-    async append(result) {
-      await Promise.all(writers.map((w) => w.append(result)));
-    },
-    async close() {
-      await Promise.all(writers.map((w) => w.close()));
-    }
-  };
-}
 // src/commands/eval/progress-display.ts
 var ANSI_BOLD = "\x1B[1m";
@@ -1926,12 +1901,12 @@ var ProgressDisplay = class {
   }
   addLogPaths(paths, provider) {
     const newPaths = [];
-    for (const path16 of paths) {
-      if (this.logPathSet.has(path16)) {
+    for (const path17 of paths) {
+      if (this.logPathSet.has(path17)) {
         continue;
       }
-      this.logPathSet.add(path16);
-      newPaths.push(path16);
+      this.logPathSet.add(path17);
+      newPaths.push(path17);
     }
     if (newPaths.length === 0) {
       return;
@@ -1944,8 +1919,8 @@ var ProgressDisplay = class {
       this.hasPrintedLogHeader = true;
     }
     const startIndex = this.logPaths.length - newPaths.length;
-    newPaths.forEach((path16, offset) => {
-      console.log(`${startIndex + offset + 1}. ${path16}`);
+    newPaths.forEach((path17, offset) => {
+      console.log(`${startIndex + offset + 1}. ${path17}`);
     });
   }
   finish() {
@@ -1962,9 +1937,6 @@ import path12 from "node:path";
 function parseJsonlLines(content) {
   return content.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0).map((line) => JSON.parse(line));
 }
-function isIndexManifestPath(sourceFile) {
-  return path12.basename(sourceFile) === RESULT_INDEX_FILENAME;
-}
 function parseMarkdownMessages(content) {
   const trimmed = content.trim();
   if (!trimmed.startsWith("@[")) {
@@ -2022,11 +1994,11 @@ function hydrateOutput(baseDir, record) {
 function hydrateManifestRecord(baseDir, record) {
   const grading = readOptionalJson(baseDir, record.grading_path);
   const timing = readOptionalJson(baseDir, record.timing_path);
-  const testId = record.test_id ?? record.eval_id ?? "unknown";
+  const testId = record.test_id ?? "unknown";
   return {
     timestamp: record.timestamp,
     testId,
-    dataset: record.dataset,
+    suite: record.suite,
     category: record.category,
     target: record.target,
     score: record.score,
@@ -2066,74 +2038,44 @@ function parseResultManifest(content) {
 }
 function resolveResultSourcePath(source, cwd) {
   const resolved = path12.isAbsolute(source) ? source : path12.resolve(cwd ?? process.cwd(), source);
-  return resolveWorkspaceOrFilePath(resolved);
+  if (isDirectoryPath(resolved) || path12.basename(resolved) === RESULT_INDEX_FILENAME) {
+    return resolveRunManifestPath(resolved);
+  }
+  return resolved;
 }
 function loadManifestResults(sourceFile) {
-  const resolvedSourceFile = resolveWorkspaceOrFilePath(sourceFile);
-  if (!isIndexManifestPath(resolvedSourceFile)) {
-    return parseJsonlResults(readFileSync(resolvedSourceFile, "utf8"));
-  }
+  const resolvedSourceFile = resolveRunManifestPath(sourceFile);
   const content = readFileSync(resolvedSourceFile, "utf8");
   const records = parseResultManifest(content);
   const baseDir = path12.dirname(resolvedSourceFile);
   return records.map((record) => hydrateManifestRecord(baseDir, record));
 }
 function loadLightweightResults(sourceFile) {
-  const resolvedSourceFile = resolveWorkspaceOrFilePath(sourceFile);
+  const resolvedSourceFile = resolveRunManifestPath(sourceFile);
   const content = readFileSync(resolvedSourceFile, "utf8");
-  if (isIndexManifestPath(resolvedSourceFile)) {
-    return parseResultManifest(content).map((record) => ({
-      testId: record.test_id ?? record.eval_id ?? "unknown",
-      target: record.target,
-      experiment: record.experiment,
-      score: record.score,
-      scores: record.scores,
-      executionStatus: record.execution_status,
-      error: record.error,
-      timestamp: record.timestamp
-    }));
-  }
-  const records = [];
-  for (const line of content.split(/\r?\n/)) {
-    const trimmed = line.trim();
-    if (!trimmed) {
-      continue;
-    }
-    let record;
-    try {
-      record = JSON.parse(trimmed);
-    } catch {
-      continue;
-    }
-    const rawTestId = record.test_id ?? record.eval_id ?? record.testId ?? record.evalId;
-    if (typeof rawTestId !== "string") {
-      throw new Error(`Missing test_id in result: ${trimmed}`);
-    }
-    if (typeof record.score !== "number") {
-      throw new Error(`Missing or invalid score in result: ${trimmed}`);
-    }
-    records.push({
-      testId: rawTestId,
-      target: typeof record.target === "string" ? record.target : void 0,
-      score: record.score,
-      scores: Array.isArray(record.scores) ? record.scores : void 0,
-      executionStatus: typeof record.execution_status === "string" ? record.execution_status : typeof record.executionStatus === "string" ? record.executionStatus : void 0,
-      error: typeof record.error === "string" ? record.error : void 0,
-      timestamp: typeof record.timestamp === "string" ? record.timestamp : void 0
-    });
-  }
-  return records;
+  return parseResultManifest(content).map((record) => ({
+    testId: record.test_id ?? "unknown",
+    suite: record.suite,
+    target: record.target,
+    experiment: record.experiment,
+    score: record.score,
+    scores: record.scores,
+    executionStatus: record.execution_status,
+    error: record.error,
+    timestamp: record.timestamp
+  }));
 }
 // src/commands/eval/retry-errors.ts
+async function loadRetrySourceResults(jsonlPath) {
+  return loadManifestResults(resolveResultSourcePath(jsonlPath));
+}
 async function loadErrorTestIds(jsonlPath) {
-  const resolvedPath = resolveResultSourcePath(jsonlPath);
-  const ids = loadLightweightResults(resolvedPath).filter((result) => result.executionStatus === "execution_error").map((result) => result.testId);
+  const ids = (await loadRetrySourceResults(jsonlPath)).filter((result) => result.executionStatus === "execution_error").map((result) => result.testId);
   return [...new Set(ids)];
 }
 async function loadNonErrorResults(jsonlPath) {
-  const resolvedPath = resolveResultSourcePath(jsonlPath);
-  return loadManifestResults(resolvedPath).filter(
+  return (await loadRetrySourceResults(jsonlPath)).filter(
     (result) => result.testId && result.executionStatus !== "execution_error"
   );
 }
@@ -2146,7 +2088,7 @@ function resolveRunCacheFile(cache) {
   if (cache.lastRunDir) {
     return resolveExistingRunPrimaryPath(cache.lastRunDir) ?? resolveRunIndexPath(cache.lastRunDir);
   }
-  return cache.lastResultFile ?? "";
+  return "";
 }
 function cachePath(cwd) {
   return path13.join(cwd, ".agentv", CACHE_FILENAME);
@@ -2160,15 +2102,14 @@ async function loadRunCache(cwd) {
   }
 }
 async function saveRunCache(cwd, resultPath) {
+  if (path13.basename(resultPath) !== RESULT_INDEX_FILENAME) {
+    return;
+  }
   const dir = path13.join(cwd, ".agentv");
   await mkdir7(dir, { recursive: true });
-  const basename = path13.basename(resultPath);
-  const cache = basename === RESULT_INDEX_FILENAME ? {
+  const cache = {
     lastRunDir: path13.dirname(resultPath),
     timestamp: (/* @__PURE__ */ new Date()).toISOString()
-  } : {
-    lastResultFile: resultPath,
-    timestamp: (/* @__PURE__ */ new Date()).toISOString()
   };
   await writeFile6(cachePath(cwd), `${JSON.stringify(cache, null, 2)}
 `, "utf-8");
@@ -2313,11 +2254,21 @@ function formatEvaluationSummary(summary, options) {
   }
   const gradedCount = summary.total - summary.executionErrorCount;
   const threshold = options?.threshold ?? 0.8;
-  const overallPassed = summary.passedCount === gradedCount || summary.qualityFailureCount === 0 && summary.executionErrorCount === 0;
-  const overallVerdict = overallPassed ? "PASS" : "FAIL";
+  const allExecutionErrors = summary.total > 0 && summary.executionErrorCount === summary.total;
+  const overallPassed = !allExecutionErrors && (summary.passedCount === gradedCount || summary.qualityFailureCount === 0 && summary.executionErrorCount === 0);
   const useColor = !(process.env.NO_COLOR !== void 0) && (process.stdout.isTTY ?? false);
-  const verdictColor = overallPassed ? "\x1B[32m" : "\x1B[31m";
-  const verdictText = `RESULT: ${overallVerdict}  (${summary.passedCount}/${gradedCount} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`;
+  let overallVerdict;
+  let verdictColor;
+  let verdictText;
+  if (allExecutionErrors) {
+    overallVerdict = "INCONCLUSIVE";
+    verdictColor = "\x1B[33m";
+    verdictText = `RESULT: INCONCLUSIVE  (all ${summary.total} test(s) had execution errors \u2014 no evaluation was performed)`;
+  } else {
+    overallVerdict = overallPassed ? "PASS" : "FAIL";
+    verdictColor = overallPassed ? "\x1B[32m" : "\x1B[31m";
+    verdictText = `RESULT: ${overallVerdict}  (${summary.passedCount}/${gradedCount} scored >= ${threshold}, mean: ${formatScore(summary.mean)})`;
+  }
   lines.push("\n==================================================");
   if (useColor) {
     lines.push(`\x1B[1m${verdictColor}${verdictText}\x1B[0m`);
@@ -2527,7 +2478,7 @@ var KNOWN_TEST_FIELDS = /* @__PURE__ */ new Set([
   "workspace",
   "metadata",
   "conversation_id",
-  "dataset",
+  "suite",
   "note"
 ]);
 var NAME_PATTERN = /^[a-z0-9-]+$/;
@@ -3090,87 +3041,68 @@ function isObject2(value) {
 var COMMON_SETTINGS = new Set(COMMON_TARGET_SETTINGS);
 var RETRY_SETTINGS = /* @__PURE__ */ new Set([
   "max_retries",
-  "maxRetries",
   "retry_initial_delay_ms",
-  "retryInitialDelayMs",
   "retry_max_delay_ms",
-  "retryMaxDelayMs",
   "retry_backoff_factor",
-  "retryBackoffFactor",
-  "retry_status_codes",
-  "retryStatusCodes"
+  "retry_status_codes"
 ]);
 var AZURE_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   ...RETRY_SETTINGS,
   "endpoint",
   "resource",
-  "resourceName",
   "api_key",
-  "apiKey",
   "deployment",
-  "deploymentName",
   "model",
   "version",
   "api_version",
+  "api_format",
   "temperature",
-  "max_output_tokens",
-  "maxTokens"
+  "max_output_tokens"
 ]);
 var OPENAI_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   ...RETRY_SETTINGS,
   "endpoint",
   "base_url",
-  "baseUrl",
   "api_key",
-  "apiKey",
   "model",
   "deployment",
   "variant",
   "api_format",
-  "apiFormat",
   "temperature",
-  "max_output_tokens",
-  "maxTokens"
+  "max_output_tokens"
 ]);
 var OPENROUTER_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   ...RETRY_SETTINGS,
   "api_key",
-  "apiKey",
   "model",
   "deployment",
   "variant",
   "temperature",
-  "max_output_tokens",
-  "maxTokens"
+  "max_output_tokens"
 ]);
 var ANTHROPIC_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   ...RETRY_SETTINGS,
   "api_key",
-  "apiKey",
   "model",
   "deployment",
   "variant",
   "temperature",
   "max_output_tokens",
-  "maxTokens",
-  "thinking_budget",
-  "thinkingBudget"
+  "thinking_budget"
 ]);
 var GEMINI_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   ...RETRY_SETTINGS,
   "api_key",
-  "apiKey",
   "model",
   "deployment",
   "variant",
   "temperature",
-  "max_output_tokens",
-  "maxTokens"
+  "max_output_tokens"
 ]);
 var CODEX_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
@@ -3182,40 +3114,26 @@ var CODEX_SETTINGS = /* @__PURE__ */ new Set([
   "arguments",
   "cwd",
   "timeout_seconds",
-  "timeoutSeconds",
   "log_dir",
-  "logDir",
   "log_directory",
-  "logDirectory",
   "log_format",
-  "logFormat",
   "log_output_format",
-  "logOutputFormat",
   "system_prompt",
-  "systemPrompt",
-  "workspace_template",
-  "workspaceTemplate"
+  "workspace_template"
 ]);
 var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   "cli_url",
-  "cliUrl",
   "cli_path",
-  "cliPath",
   "github_token",
-  "githubToken",
   "model",
   "cwd",
   "timeout_seconds",
-  "timeoutSeconds",
   "log_dir",
-  "logDir",
   "log_format",
-  "logFormat",
   "system_prompt",
-  "systemPrompt",
   "workspace_template",
-  "workspaceTemplate"
+  "byok"
 ]);
 var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
@@ -3227,35 +3145,23 @@ var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
   "model",
   "cwd",
   "timeout_seconds",
-  "timeoutSeconds",
   "log_dir",
-  "logDir",
   "log_format",
-  "logFormat",
   "system_prompt",
-  "systemPrompt",
-  "workspace_template",
-  "workspaceTemplate"
+  "workspace_template"
 ]);
 var VSCODE_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   "executable",
   "workspace_template",
-  "workspaceTemplate",
   "wait",
   "dry_run",
-  "dryRun",
   "subagent_root",
-  "subagentRoot",
-  "timeout_seconds",
-  "timeoutSeconds"
+  "timeout_seconds"
 ]);
 var MOCK_SETTINGS = /* @__PURE__ */ new Set([
   ...COMMON_SETTINGS,
   "response",
-  "delayMs",
-  "delayMinMs",
-  "delayMaxMs",
   "trace"
   // For testing tool-trajectory evaluator
 ]);
@@ -3264,23 +3170,14 @@ var CLAUDE_SETTINGS = /* @__PURE__ */ new Set([
   "model",
   "cwd",
   "timeout_seconds",
-  "timeoutSeconds",
   "log_dir",
-  "logDir",
   "log_directory",
-  "logDirectory",
   "log_format",
-  "logFormat",
   "log_output_format",
-  "logOutputFormat",
   "system_prompt",
-  "systemPrompt",
   "workspace_template",
-  "workspaceTemplate",
   "max_turns",
-  "maxTurns",
-  "max_budget_usd",
-  "maxBudgetUsd"
+  "max_budget_usd"
 ]);
 function getKnownSettings(provider) {
   const normalizedProvider = provider.toLowerCase();
@@ -3405,15 +3302,15 @@ async function validateTargetsFile(filePath) {
       });
       return;
     }
-    const timeoutSeconds = healthcheck.timeout_seconds ?? healthcheck.timeoutSeconds;
+    const timeoutSeconds = healthcheck.timeout_seconds;
     if (timeoutSeconds !== void 0) {
       const numericTimeout = Number(timeoutSeconds);
       if (!Number.isFinite(numericTimeout) || numericTimeout <= 0) {
         errors2.push({
           severity: "error",
           filePath: absolutePath2,
-          location: `${location}.timeoutSeconds`,
-          message: "healthcheck.timeoutSeconds must be a positive number when provided"
+          location: `${location}.timeout_seconds`,
+          message: "healthcheck.timeout_seconds must be a positive number when provided"
         });
       }
     }
@@ -3512,6 +3409,18 @@ async function validateTargetsFile(filePath) {
       });
       continue;
     }
+    for (const warning of findDeprecatedCamelCaseTargetWarnings(target, location)) {
+      const fieldMatch = warning.message.match(/field '([^']+)'/);
+      const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
+      const field = fieldMatch?.[1] ?? "unknown";
+      const replacement = replacementMatch?.[1] ?? "snake_case";
+      errors.push({
+        severity: "error",
+        filePath: absolutePath,
+        location: warning.location,
+        message: `camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
+      });
+    }
     const name = target.name;
     if (typeof name !== "string" || name.trim().length === 0) {
       errors.push({
@@ -3891,7 +3800,9 @@ Errors in ${targetsFilePath}:`);
     };
   }
   try {
-    const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath);
+    const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath, {
+      emitDeprecationWarnings: false
+    });
     return {
       definitions,
       resolvedTarget,
@@ -3974,7 +3885,9 @@ Errors in ${targetsFilePath}:`);
       });
     } else {
       try {
-        const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath);
+        const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath, {
+          emitDeprecationWarnings: false
+        });
         results.push({
           definitions,
           resolvedTarget,
@@ -4043,6 +3956,16 @@ function normalizeStringArray(value) {
   }
   return [];
 }
+function normalizeFilter(value) {
+  if (Array.isArray(value)) {
+    const filters = normalizeStringArray(value);
+    if (filters.length === 0) {
+      return void 0;
+    }
+    return filters.length === 1 ? filters[0] : filters;
+  }
+  return normalizeString(value);
+}
 function matchesTagFilters(fileTags, includeTags, excludeTags) {
   const tags = new Set(fileTags ?? []);
   if (includeTags.length > 0) {
@@ -4084,15 +4007,12 @@ function trimOutputMessages(output, outputMessages) {
   return sliced.map((m) => ({ role: m.role, content: m.content }));
 }
 function normalizeOptions(rawOptions, config, yamlExecution) {
-  const cliFormat = normalizeString(rawOptions.outputFormat);
-  const configFormat = config?.output?.format;
-  const formatStr = cliFormat ?? configFormat ?? "jsonl";
-  const format = formatStr === "yaml" ? "yaml" : "jsonl";
   const cliWorkers = normalizeOptionalNumber(rawOptions.workers);
   const configWorkers = config?.execution?.workers;
   const workers = cliWorkers ?? configWorkers ?? 0;
-  const rawOutputPaths = rawOptions.output;
-  const outputPaths = Array.isArray(rawOutputPaths) ? rawOutputPaths.filter((v) => typeof v === "string" && v.trim().length > 0) : [];
+  const cliOutputDir = normalizeString(rawOptions.output);
+  const rawExportPaths = rawOptions.export;
+  const exportPaths = Array.isArray(rawExportPaths) ? rawExportPaths.filter((v) => typeof v === "string" && v.trim().length > 0) : [];
   const rawTarget = rawOptions.target;
   let cliTargets = [];
   let singleTarget;
@@ -4132,11 +4052,11 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
     target: singleTarget,
     cliTargets,
     targetsPath: normalizeString(rawOptions.targets),
-    filter: normalizeString(rawOptions.filter),
+    filter: normalizeFilter(rawOptions.filter),
     workers: workers > 0 ? workers : void 0,
+    outputDir: cliOutputDir,
     outPath: cliOut ?? configOut,
-    outputPaths,
-    format,
+    exportPaths,
     dryRun: normalizeBoolean(rawOptions.dryRun),
     dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
     dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
@@ -4165,7 +4085,8 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
     outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages)),
     threshold: normalizeOptionalNumber(rawOptions.threshold),
     tags: normalizeStringArray(rawOptions.tag),
-    excludeTags: normalizeStringArray(rawOptions.excludeTag)
+    excludeTags: normalizeStringArray(rawOptions.excludeTag),
+    transcript: normalizeString(rawOptions.transcript)
   };
 }
 async function ensureFileExists(filePath, description) {
@@ -4191,20 +4112,20 @@ function createProgressReporter(maxWorkers, options) {
     addLogPaths: (paths, provider) => display.addLogPaths(paths, provider)
   };
 }
-function makeEvalKey(testFilePath, evalId) {
-  return `${path15.resolve(testFilePath)}::${evalId}`;
+function makeTestCaseKey(testFilePath, testId) {
+  return `${path15.resolve(testFilePath)}::${testId}`;
 }
 function createDisplayIdTracker() {
   const map = /* @__PURE__ */ new Map();
   let nextId = 1;
   return {
-    getOrAssign(evalKey) {
-      const existing = map.get(evalKey);
+    getOrAssign(testCaseKey) {
+      const existing = map.get(testCaseKey);
       if (existing !== void 0) {
         return existing;
       }
       const assigned = nextId++;
-      map.set(evalKey, assigned);
+      map.set(testCaseKey, assigned);
       return assigned;
     }
   };
@@ -4255,58 +4176,79 @@ async function prepareFileMetadata(params) {
     filter: options.filter,
     category
   });
-  const filteredIds = suite.tests.map((value) => value.id);
-  const cliTargets = options.cliTargets;
+  const testIds = suite.tests.map((value) => value.id);
   const suiteTargets = suite.targets;
-  let targetNames;
-  if (cliTargets.length > 0) {
-    targetNames = cliTargets;
-  } else if (suiteTargets && suiteTargets.length > 0) {
-    targetNames = suiteTargets;
-  } else {
-    targetNames = [];
-  }
   let selections;
-  if (targetNames.length > 1) {
-    const multiSelections = await selectMultipleTargets({
-      testFilePath,
-      repoRoot,
-      cwd,
-      explicitTargetsPath: options.targetsPath,
-      dryRun: options.dryRun,
-      dryRunDelay: options.dryRunDelay,
-      dryRunDelayMin: options.dryRunDelayMin,
-      dryRunDelayMax: options.dryRunDelayMax,
-      env: process.env,
-      targetNames
-    });
-    selections = multiSelections.map((sel) => ({
-      selection: sel,
-      inlineTargetLabel: sel.targetName
-    }));
-  } else {
-    const selection = await selectTarget({
-      testFilePath,
-      repoRoot,
-      cwd,
-      explicitTargetsPath: options.targetsPath,
-      cliTargetName: targetNames.length === 1 ? targetNames[0] : options.target,
-      dryRun: options.dryRun,
-      dryRunDelay: options.dryRunDelay,
-      dryRunDelayMin: options.dryRunDelayMin,
-      dryRunDelayMax: options.dryRunDelayMax,
-      env: process.env
-    });
+  if (options.transcript) {
+    const transcriptSelection = {
+      definitions: [],
+      resolvedTarget: {
+        kind: "transcript",
+        name: "transcript",
+        config: {}
+      },
+      targetName: "transcript",
+      targetSource: "cli",
+      targetsFilePath: options.transcript
+    };
     selections = [
       {
-        selection,
-        inlineTargetLabel: selection.targetName
+        selection: transcriptSelection,
+        inlineTargetLabel: `transcript (${path15.basename(options.transcript)})`
       }
     ];
+  } else {
+    const cliTargets = options.cliTargets;
+    const suiteTargets2 = suite.targets;
+    let targetNames;
+    if (cliTargets.length > 0) {
+      targetNames = cliTargets;
+    } else if (suiteTargets2 && suiteTargets2.length > 0) {
+      targetNames = suiteTargets2;
+    } else {
+      targetNames = [];
+    }
+    if (targetNames.length > 1) {
+      const multiSelections = await selectMultipleTargets({
+        testFilePath,
+        repoRoot,
+        cwd,
+        explicitTargetsPath: options.targetsPath,
+        dryRun: options.dryRun,
+        dryRunDelay: options.dryRunDelay,
+        dryRunDelayMin: options.dryRunDelayMin,
+        dryRunDelayMax: options.dryRunDelayMax,
+        env: process.env,
+        targetNames
+      });
+      selections = multiSelections.map((sel) => ({
+        selection: sel,
+        inlineTargetLabel: sel.targetName
+      }));
+    } else {
+      const selection = await selectTarget({
+        testFilePath,
+        repoRoot,
+        cwd,
+        explicitTargetsPath: options.targetsPath,
+        cliTargetName: targetNames.length === 1 ? targetNames[0] : options.target,
+        dryRun: options.dryRun,
+        dryRunDelay: options.dryRunDelay,
+        dryRunDelayMin: options.dryRunDelayMin,
+        dryRunDelayMax: options.dryRunDelayMax,
+        env: process.env
+      });
+      selections = [
+        {
+          selection,
+          inlineTargetLabel: selection.targetName
+        }
+      ];
+    }
   }
   return {
-    evalIds: filteredIds,
-    evalCases: suite.tests,
+    testIds,
+    testCases: suite.tests,
     selections,
     trialsConfig: suite.trials,
     suiteTargets,
@@ -4344,15 +4286,16 @@ async function runSingleEvalFile(params) {
     workersOverride,
     yamlWorkers,
     progressReporter,
-    seenEvalCases,
+    seenTestCases,
     displayIdTracker,
     selection,
     inlineTargetLabel,
-    evalCases,
+    testCases,
     trialsConfig,
     matrixMode,
     totalBudgetUsd,
-    failOnError
+    failOnError,
+    providerFactory
   } = params;
   const targetName = selection.targetName;
   await ensureFileExists(testFilePath, "Test file");
@@ -4408,7 +4351,8 @@ async function runSingleEvalFile(params) {
       }
       return true;
     })(),
-    evalCases,
+    filter: options.filter,
+    evalCases: testCases,
     verbose: options.verbose,
     maxConcurrency: resolvedWorkers,
     workspaceMode: options.workspaceMode,
@@ -4419,6 +4363,7 @@ async function runSingleEvalFile(params) {
     graderTarget: options.graderTarget,
     model: options.model,
     threshold: options.threshold,
+    providerFactory,
     streamCallbacks: streamingObserver?.getStreamCallbacks(),
     onResult: async (result) => {
       streamingObserver?.completeFromResult?.(result);
@@ -4442,13 +4387,13 @@ async function runSingleEvalFile(params) {
       }
     },
     onProgress: async (event) => {
-      const evalKeyId = matrixMode ? `${event.testId}@${targetName}` : event.testId;
-      const evalKey = makeEvalKey(testFilePath, evalKeyId);
-      if (event.status === "pending" && !seenEvalCases.has(evalKey)) {
-        seenEvalCases.add(evalKey);
-        progressReporter.setTotal(seenEvalCases.size);
+      const testCaseKeyId = matrixMode ? `${event.testId}@${targetName}` : event.testId;
+      const testCaseKey = makeTestCaseKey(testFilePath, testCaseKeyId);
+      if (event.status === "pending" && !seenTestCases.has(testCaseKey)) {
+        seenTestCases.add(testCaseKey);
+        progressReporter.setTotal(seenTestCases.size);
       }
-      const displayId = displayIdTracker.getOrAssign(evalKey);
+      const displayId = displayIdTracker.getOrAssign(testCaseKey);
       if (event.status === "running" && streamingObserver) {
         streamingObserver.startEvalCase(event.testId, targetName, testFilePath);
       }
@@ -4528,13 +4473,48 @@ async function runEvalCommand(input) {
   if (options.verbose) {
     console.log(`Repository root: ${repoRoot}`);
   }
-  const usesDefaultArtifactWorkspace = !options.outPath;
-  const outputPath = options.outPath ? path15.resolve(options.outPath) : buildDefaultOutputPath(cwd);
+  if (options.outPath) {
+    console.warn("Warning: --out is deprecated. Use --output <dir> to set the artifact directory.");
+  }
+  if (options.artifacts) {
+    console.warn(
+      "Warning: --artifacts is deprecated. Use --output <dir> to set the artifact directory."
+    );
+  }
+  if (options.benchmarkJson) {
+    console.warn(
+      "Warning: --benchmark-json is deprecated. benchmark.json is always written to the artifact directory."
+    );
+  }
+  if (normalizeString(input.rawOptions.outputFormat)) {
+    console.warn(
+      "Warning: --output-format is deprecated. The artifact directory always uses JSONL."
+    );
+  }
+  const explicitDir = options.outputDir ?? options.artifacts;
+  let runDir;
+  let outputPath;
+  let usesDefaultArtifactWorkspace;
+  if (explicitDir) {
+    runDir = path15.resolve(explicitDir);
+    mkdirSync(runDir, { recursive: true });
+    outputPath = path15.join(runDir, "index.jsonl");
+    usesDefaultArtifactWorkspace = true;
+  } else if (options.outPath) {
+    outputPath = path15.resolve(options.outPath);
+    runDir = path15.dirname(outputPath);
+    mkdirSync(runDir, { recursive: true });
+    usesDefaultArtifactWorkspace = false;
+  } else {
+    outputPath = buildDefaultOutputPath(cwd);
+    runDir = path15.dirname(outputPath);
+    usesDefaultArtifactWorkspace = true;
+  }
   let otelExporter = null;
   const useFileExport = !!options.otelFile;
   if (options.exportOtel || useFileExport) {
     try {
-      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-BN5NUVAB.js");
+      const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QXVR2ZRH.js");
       let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
       let headers = {};
       if (options.otelBackend) {
@@ -4575,16 +4555,11 @@ async function runEvalCommand(input) {
     }
   }
   const primaryWritePath = outputPath;
-  const extraOutputPaths = options.outputPaths.map((p) => path15.resolve(p));
-  const allOutputPaths = extraOutputPaths.length > 0 ? [primaryWritePath, ...extraOutputPaths] : [primaryWritePath];
-  const uniqueOutputPaths = [...new Set(allOutputPaths)];
-  const reportedOutputPaths = extraOutputPaths.length > 0 ? [outputPath, ...extraOutputPaths] : [outputPath];
-  const uniqueReportedOutputPaths = [...new Set(reportedOutputPaths)];
-  if (uniqueOutputPaths.length === 1) {
-    console.log(`Output path: ${outputPath}`);
-  } else {
-    console.log("Output paths:");
-    for (const p of uniqueReportedOutputPaths) {
+  const resolvedExportPaths = options.exportPaths.map((p) => path15.resolve(p));
+  console.log(`Artifact directory: ${runDir}`);
+  if (resolvedExportPaths.length > 0) {
+    console.log("Export files:");
+    for (const p of resolvedExportPaths) {
       console.log(`  ${p}`);
     }
   }
@@ -4594,7 +4569,7 @@ async function runEvalCommand(input) {
   }
   const evaluationRunner = await resolveEvaluationRunner();
   const allResults = [];
-  const seenEvalCases = /* @__PURE__ */ new Set();
+  const seenTestCases = /* @__PURE__ */ new Set();
   const displayIdTracker = createDisplayIdTracker();
   const totalWorkers = options.workers ?? DEFAULT_WORKERS;
   const fileConcurrency = Math.min(
@@ -4656,7 +4631,6 @@ async function runEvalCommand(input) {
     yamlCache: yamlCacheEnabled
   });
   const cache = cacheEnabled ? new ResponseCache(yamlCachePath ? path15.resolve(yamlCachePath) : void 0) : void 0;
-  const useCache = cacheEnabled;
   if (cacheEnabled) {
     console.log(`Response cache: enabled${yamlCachePath ? ` (${yamlCachePath})` : ""}`);
   }
@@ -4666,17 +4640,12 @@ async function runEvalCommand(input) {
     throw new Error("--threshold must be between 0 and 1");
   }
   const writerOptions = resolvedThreshold !== void 0 ? { threshold: resolvedThreshold } : void 0;
-  let outputWriter;
-  if (uniqueOutputPaths.length === 1) {
-    outputWriter = await createOutputWriter(primaryWritePath, options.format);
-  } else {
-    outputWriter = await createMultiWriter(uniqueOutputPaths, writerOptions);
-  }
+  const outputWriter = await createOutputWriter(primaryWritePath, "jsonl");
   const isMatrixMode = Array.from(fileMetadata.values()).some((meta) => meta.selections.length > 1);
   let totalEvalCount = 0;
   for (const meta of fileMetadata.values()) {
     const suiteTargetNames = meta.selections.map((s) => s.selection.targetName);
-    for (const test of meta.evalCases) {
+    for (const test of meta.testCases) {
       const testTargetNames = test.targets && test.targets.length > 0 ? test.targets.filter((t) => suiteTargetNames.includes(t)) : suiteTargetNames;
       totalEvalCount += testTargetNames.length > 0 ? testTargetNames.length : 1;
     }
@@ -4720,13 +4689,13 @@ async function runEvalCommand(input) {
   });
   for (const [testFilePath, meta] of fileMetadata.entries()) {
     for (const { selection, inlineTargetLabel } of meta.selections) {
-      for (const testId of meta.evalIds) {
-        const evalKey = makeEvalKey(
+      for (const testId of meta.testIds) {
+        const testCaseKey = makeTestCaseKey(
           testFilePath,
           meta.selections.length > 1 ? `${testId}@${selection.targetName}` : testId
         );
-        seenEvalCases.add(evalKey);
-        const displayId = displayIdTracker.getOrAssign(evalKey);
+        seenTestCases.add(testCaseKey);
+        const displayId = displayIdTracker.getOrAssign(testCaseKey);
         progressReporter.update(displayId, {
           workerId: displayId,
           testId: meta.selections.length > 1 ? `${testId}@${selection.targetName}` : testId,
@@ -4737,6 +4706,24 @@ async function runEvalCommand(input) {
     }
   }
   const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
+  let transcriptProviderFactory;
+  if (options.transcript) {
+    const { TranscriptProvider } = await import("./dist-QXVR2ZRH.js");
+    const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
+    const totalTests = [...fileMetadata.values()].reduce(
+      (sum, meta) => sum + meta.testCases.length,
+      0
+    );
+    if (transcriptProvider.lineCount !== totalTests) {
+      throw new Error(
+        `Transcript has ${transcriptProvider.lineCount} entry(s) but eval defines ${totalTests} test(s). Each transcript line maps positionally to one test case.`
+      );
+    }
+    transcriptProviderFactory = () => transcriptProvider;
+    console.log(
+      `Using transcript: ${options.transcript} (${transcriptProvider.lineCount} entry(s))`
+    );
+  }
   try {
     await runWithLimit(activeTestFiles, fileConcurrency, async (testFilePath) => {
       const targetPrep = fileMetadata.get(testFilePath);
@@ -4746,13 +4733,13 @@ async function runEvalCommand(input) {
       const targetResults = await Promise.all(
         targetPrep.selections.map(async ({ selection, inlineTargetLabel }) => {
           const targetName = selection.targetName;
-          const applicableEvalCases = targetPrep.selections.length > 1 ? targetPrep.evalCases.filter((test) => {
+          const applicableTestCases = targetPrep.selections.length > 1 ? targetPrep.testCases.filter((test) => {
             if (test.targets && test.targets.length > 0) {
               return test.targets.includes(targetName);
             }
             return true;
-          }) : targetPrep.evalCases;
-          if (applicableEvalCases.length === 0) {
+          }) : targetPrep.testCases;
+          if (applicableTestCases.length === 0) {
             return [];
           }
           try {
@@ -4768,16 +4755,17 @@ async function runEvalCommand(input) {
               workersOverride: perFileWorkers,
               yamlWorkers: targetPrep.yamlWorkers,
               progressReporter,
-              seenEvalCases,
+              seenTestCases,
               displayIdTracker,
               selection,
               inlineTargetLabel,
-              evalCases: applicableEvalCases,
-              trialsConfig: targetPrep.trialsConfig,
+              testCases: applicableTestCases,
+              trialsConfig: options.transcript ? void 0 : targetPrep.trialsConfig,
               matrixMode: targetPrep.selections.length > 1,
               totalBudgetUsd: targetPrep.totalBudgetUsd,
               failOnError: targetPrep.failOnError,
-              threshold: resolvedThreshold
+              threshold: resolvedThreshold,
+              providerFactory: transcriptProviderFactory
             });
             return result.results;
           } catch (fileError) {
@@ -4785,9 +4773,9 @@ async function runEvalCommand(input) {
             console.error(`
 \u26A0 Eval file failed: ${path15.basename(testFilePath)} \u2014 ${message}
 `);
-            const errorResults = applicableEvalCases.map((evalCase) => ({
+            const errorResults = applicableTestCases.map((testCase) => ({
               timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-              testId: evalCase.id,
+              testId: testCase.id,
               score: 0,
               assertions: [],
               output: [],
@@ -4824,6 +4812,7 @@ async function runEvalCommand(input) {
     const thresholdOpts = resolvedThreshold !== void 0 ? { threshold: resolvedThreshold } : void 0;
     const summary = calculateEvaluationSummary(allResults, thresholdOpts);
     console.log(formatEvaluationSummary(summary, thresholdOpts));
+    const allExecutionErrors = summary.total > 0 && summary.executionErrorCount === summary.total;
     const thresholdFailed = resolvedThreshold !== void 0 && summary.qualityFailureCount > 0;
     if (isMatrixMode && allResults.length > 0) {
       console.log(formatMatrixSummary(allResults));
@@ -4833,18 +4822,17 @@ async function runEvalCommand(input) {
       await writeBenchmarkJson(benchmarkPath, allResults);
       console.log(`Benchmark written to: ${benchmarkPath}`);
     }
-    if (usesDefaultArtifactWorkspace) {
+    if (usesDefaultArtifactWorkspace && allResults.length > 0) {
       const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
-      const workspaceDir = path15.dirname(outputPath);
       const {
         testArtifactDir,
         timingPath,
         benchmarkPath: workspaceBenchmarkPath,
         indexPath
-      } = await writeArtifactsFromResults(allResults, workspaceDir, {
+      } = await writeArtifactsFromResults(allResults, runDir, {
         evalFile
       });
-      console.log(`Artifact workspace written to: ${workspaceDir}`);
+      console.log(`Artifact workspace written to: ${runDir}`);
       console.log(`  Index: ${indexPath}`);
       console.log(
         `  Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`
@@ -4852,24 +4840,17 @@ async function runEvalCommand(input) {
       console.log(`  Timing: ${timingPath}`);
       console.log(`  Benchmark: ${workspaceBenchmarkPath}`);
     }
-    if (options.artifacts) {
-      const artifactsDir = path15.resolve(options.artifacts);
-      const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
-      const {
-        testArtifactDir,
-        indexPath,
-        timingPath,
-        benchmarkPath: abp
-      } = await writeArtifactsFromResults(allResults, artifactsDir, {
-        evalFile
-      });
-      console.log(`Artifacts written to: ${artifactsDir}`);
-      console.log(`  Index: ${indexPath}`);
+    if (resolvedExportPaths.length > 0 && allResults.length > 0) {
+      for (const exportPath of resolvedExportPaths) {
+        const writer = await createWriterFromPath(exportPath, writerOptions);
+        for (const result of allResults) {
+          await writer.append(result);
+        }
+        await writer.close();
+      }
       console.log(
-        `  Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`
+        `Export file(s) written: ${resolvedExportPaths.map((p) => path15.relative(cwd, p)).join(", ")}`
       );
-      console.log(`  Timing:  ${timingPath}`);
-      console.log(`  Benchmark: ${abp}`);
     }
     const failedWithWorkspaces = allResults.filter(
       (r) => r.workspacePath && (r.error || r.score < 0.5)
@@ -4881,15 +4862,8 @@ async function runEvalCommand(input) {
       }
     }
     if (allResults.length > 0) {
-      if (uniqueReportedOutputPaths.length === 1) {
-        console.log(`
+      console.log(`
 Results written to: ${outputPath}`);
-      } else {
-        console.log("\nResults written to:");
-        for (const p of uniqueReportedOutputPaths) {
-          console.log(`  ${p}`);
-        }
-      }
       await saveRunCache(cwd, outputPath).catch(() => void 0);
     }
     if (summary.executionErrorCount > 0 && !options.retryErrors) {
@@ -4907,7 +4881,8 @@ Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed t
       outputPath,
       testFiles: activeTestFiles,
       target: options.target,
-      thresholdFailed
+      thresholdFailed,
+      allExecutionErrors
     };
   } finally {
     unsubscribeCodexLogs();
@@ -4940,6 +4915,43 @@ async function resolveEvaluationRunner() {
   return candidate;
 }
+// src/commands/eval/discover.ts
+import path16 from "node:path";
+import fg2 from "fast-glob";
+async function discoverEvalFiles(cwd) {
+  const repoRoot = await findRepoRoot(cwd);
+  const config = await loadConfig(path16.join(cwd, "_"), repoRoot);
+  const patterns = config?.eval_patterns && config.eval_patterns.length > 0 ? config.eval_patterns : DEFAULT_EVAL_PATTERNS;
+  const ignore = ["**/node_modules/**", "**/dist/**"];
+  const matches = await fg2(patterns, {
+    cwd,
+    absolute: true,
+    onlyFiles: true,
+    ignore,
+    followSymbolicLinks: true,
+    caseSensitiveMatch: false
+  });
+  const evalFiles = matches.map((absPath) => {
+    const relativePath = path16.relative(cwd, absPath);
+    const category = deriveCategory(relativePath);
+    return { path: absPath, relativePath, category };
+  });
+  evalFiles.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
+  return evalFiles;
+}
+function getCategories(files) {
+  const categories = /* @__PURE__ */ new Set();
+  for (const file of files) {
+    categories.add(file.category);
+  }
+  const sorted = Array.from(categories);
+  sorted.sort();
+  return sorted;
+}
+function filterByCategory(files, category) {
+  return files.filter((f) => f.category === category);
+}
 export {
   package_default,
   toSnakeCaseDeep,
@@ -4948,12 +4960,13 @@ export {
   buildDefaultRunDir,
   resolveExistingRunPrimaryPath,
   resolveWorkspaceOrFilePath,
-  writeArtifactsFromResults,
+  resolveRunManifestPath,
   parseResultManifest,
   resolveResultSourcePath,
   loadManifestResults,
   loadLightweightResults,
   HtmlWriter,
+  writeArtifactsFromResults,
   resolveRunCacheFile,
   loadRunCache,
   resolveEvalPaths,
@@ -4966,6 +4979,9 @@ export {
   TARGET_FILE_CANDIDATES,
   fileExists,
   selectTarget,
-  runEvalCommand
+  runEvalCommand,
+  discoverEvalFiles,
+  getCategories,
+  filterByCategory
 };
-//# sourceMappingURL=chunk-MHWYA4CS.js.map
+//# sourceMappingURL=chunk-QBZJSQXV.js.map