npm - agentv - Versions diffs - 4.32.0-next.1 → 4.34.0-next.1 - Mend

agentv 4.32.0-next.1 → 4.34.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +4 -5
package/dist/{artifact-writer-VDF7KRWL.js → artifact-writer-UWZX5JKX.js} +4 -4
package/dist/{chunk-TAZBCVEZ.js → chunk-6QEIZ33V.js} +1340 -279
package/dist/chunk-6QEIZ33V.js.map +1 -0
package/dist/{chunk-IGTRNQAM.js → chunk-FK5FLLME.js} +2383 -674
package/dist/chunk-FK5FLLME.js.map +1 -0
package/dist/chunk-GPRZ7XSC.js +1234 -0
package/dist/chunk-GPRZ7XSC.js.map +1 -0
package/dist/{chunk-5JMFFG36.js → chunk-KMO527KH.js} +784 -1081
package/dist/chunk-KMO527KH.js.map +1 -0
package/dist/{chunk-LX5AK3P7.js → chunk-KP4SPQ2M.js} +585 -191
package/dist/chunk-KP4SPQ2M.js.map +1 -0
package/dist/cli.js +5 -5
package/dist/dashboard/assets/{index-BdoQWnyM.js → index-79OddHgT.js} +1 -1
package/dist/dashboard/assets/index-BPMAZqjE.css +1 -0
package/dist/dashboard/assets/index-BycNIWwy.js +118 -0
package/dist/dashboard/index.html +3 -3
package/dist/{dist-GICSKMNP.js → dist-Z5VWSDOO.js} +58 -6
package/dist/index.js +5 -5
package/dist/{interactive-GIDBBDYZ.js → interactive-NTT2QLPR.js} +5 -5
package/dist/skills/agentv-eval-writer/SKILL.md +2 -1
package/dist/skills/agentv-eval-writer/references/eval-schema.json +104 -0
package/dist/skills/agentv-eval-writer/references/rubric-evaluator.md +20 -0
package/dist/{ts-eval-loader-Z6IUSDNA-YBOE4JIQ.js → ts-eval-loader-EQJX3OLT-THE7D3GR.js} +2 -2
package/package.json +2 -2
package/dist/chunk-2ZEY3WBH.js +0 -729
package/dist/chunk-2ZEY3WBH.js.map +0 -1
package/dist/chunk-5JMFFG36.js.map +0 -1
package/dist/chunk-IGTRNQAM.js.map +0 -1
package/dist/chunk-LX5AK3P7.js.map +0 -1
package/dist/chunk-TAZBCVEZ.js.map +0 -1
package/dist/dashboard/assets/index-DcPH8PyS.css +0 -1
package/dist/dashboard/assets/index-EXkiwqam.js +0 -116
/package/dist/{artifact-writer-VDF7KRWL.js.map → artifact-writer-UWZX5JKX.js.map} +0 -0
/package/dist/{dist-GICSKMNP.js.map → dist-Z5VWSDOO.js.map} +0 -0
/package/dist/{interactive-GIDBBDYZ.js.map → interactive-NTT2QLPR.js.map} +0 -0
/package/dist/{ts-eval-loader-Z6IUSDNA-YBOE4JIQ.js.map → ts-eval-loader-EQJX3OLT-THE7D3GR.js.map} +0 -0

package/dist/{chunk-TAZBCVEZ.js → chunk-6QEIZ33V.js} RENAMED Viewed

@@ -4056,16 +4056,18 @@ var coerce = {
 };
 var NEVER = INVALID;
-// ../../packages/core/dist/chunk-5RQMJZDJ.js
+// ../../packages/core/dist/chunk-EW5X2RGJ.js
 import { parse } from "yaml";
+import os from "node:os";
+import path from "node:path";
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
-import path from "node:path";
+import path2 from "node:path";
 import { existsSync, readFileSync } from "node:fs";
 import { homedir } from "node:os";
-import path2 from "node:path";
-import { readFile as readFile2, readdir, stat } from "node:fs/promises";
 import path3 from "node:path";
+import { readFile as readFile2, readdir, stat } from "node:fs/promises";
+import path4 from "node:path";
 import fg from "fast-glob";
 var CONTENT_TYPES = /* @__PURE__ */ new Set(["text", "image", "file"]);
 function isContent(value) {
@@ -4164,10 +4166,37 @@ var GRADER_KIND_SET = new Set(GRADER_KIND_VALUES);
 function isGraderKind(value) {
   return typeof value === "string" && GRADER_KIND_SET.has(value);
 }
+var RUBRIC_OPERATOR_VALUES = ["correctness", "contradiction"];
 var PARSE_OPTIONS = { merge: true };
 function parseYamlValue(content) {
   return parse(content, PARSE_OPTIONS);
 }
+function readEnvPath(name) {
+  const value = process.env[name];
+  if (!value || value === "undefined") return void 0;
+  return value;
+}
+function getAgentvConfigDir() {
+  return readEnvPath("AGENTV_HOME") ?? path.join(os.homedir(), ".agentv");
+}
+function getAgentvHome() {
+  return getAgentvConfigDir();
+}
+function getAgentvDataDir() {
+  return readEnvPath("AGENTV_DATA_DIR") ?? getAgentvConfigDir();
+}
+function getWorkspacesRoot() {
+  return path.join(getAgentvDataDir(), "workspaces");
+}
+function getSubagentsRoot() {
+  return path.join(getAgentvDataDir(), "subagents");
+}
+function getTraceStateRoot() {
+  return path.join(getAgentvDataDir(), "trace-state");
+}
+function getWorkspacePoolRoot() {
+  return path.join(getAgentvDataDir(), "workspace-pool");
+}
 async function fileExists(filePath) {
   try {
     await access(filePath, constants.F_OK);
@@ -4188,14 +4217,14 @@ async function readJsonFile(filePath) {
   return JSON.parse(content);
 }
 async function findGitRoot(startPath) {
-  let currentDir = path.dirname(path.resolve(startPath));
-  const root = path.parse(currentDir).root;
+  let currentDir = path2.dirname(path2.resolve(startPath));
+  const root = path2.parse(currentDir).root;
   while (currentDir !== root) {
-    const gitPath = path.join(currentDir, ".git");
+    const gitPath = path2.join(currentDir, ".git");
     if (await fileExists(gitPath)) {
       return currentDir;
     }
-    const parentDir = path.dirname(currentDir);
+    const parentDir = path2.dirname(currentDir);
     if (parentDir === currentDir) {
       break;
     }
@@ -4206,8 +4235,8 @@ async function findGitRoot(startPath) {
 function buildDirectoryChain(filePath, repoRoot) {
   const directories = [];
   const seen = /* @__PURE__ */ new Set();
-  const boundary = path.resolve(repoRoot);
-  let current = path.resolve(path.dirname(filePath));
+  const boundary = path2.resolve(repoRoot);
+  let current = path2.resolve(path2.dirname(filePath));
   while (current !== void 0) {
     if (!seen.has(current)) {
       directories.push(current);
@@ -4216,7 +4245,7 @@ function buildDirectoryChain(filePath, repoRoot) {
     if (current === boundary) {
       break;
     }
-    const parent = path.dirname(current);
+    const parent = path2.dirname(current);
     if (parent === current) {
       break;
     }
@@ -4230,16 +4259,16 @@ function buildDirectoryChain(filePath, repoRoot) {
 function buildSearchRoots(evalPath, repoRoot) {
   const uniqueRoots = [];
   const addRoot = (root) => {
-    const normalized = path.resolve(root);
+    const normalized = path2.resolve(root);
     if (!uniqueRoots.includes(normalized)) {
       uniqueRoots.push(normalized);
     }
   };
-  let currentDir = path.dirname(evalPath);
+  let currentDir = path2.dirname(evalPath);
   let reachedBoundary = false;
   while (!reachedBoundary) {
     addRoot(currentDir);
-    const parentDir = path.dirname(currentDir);
+    const parentDir = path2.dirname(currentDir);
     if (currentDir === repoRoot || parentDir === currentDir) {
       reachedBoundary = true;
     } else {
@@ -4257,16 +4286,16 @@ function trimLeadingSeparators(value) {
 async function resolveFileReference(rawValue, searchRoots) {
   const displayPath = trimLeadingSeparators(rawValue);
   const potentialPaths = [];
-  if (path.isAbsolute(rawValue)) {
-    potentialPaths.push(path.normalize(rawValue));
+  if (path2.isAbsolute(rawValue)) {
+    potentialPaths.push(path2.normalize(rawValue));
   }
   for (const base of searchRoots) {
-    potentialPaths.push(path.resolve(base, displayPath));
+    potentialPaths.push(path2.resolve(base, displayPath));
   }
   const attempted = [];
   const seen = /* @__PURE__ */ new Set();
   for (const candidate of potentialPaths) {
-    const absoluteCandidate = path.resolve(candidate);
+    const absoluteCandidate = path2.resolve(candidate);
     if (seen.has(absoluteCandidate)) {
       continue;
     }
@@ -4448,11 +4477,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
-    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !path3.isAbsolute(cwd)) {
+    cwd = path3.resolve(path3.dirname(path3.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = path2.dirname(path2.resolve(evalFilePath));
+    cwd = path3.dirname(path3.resolve(evalFilePath));
   }
   return {
     command,
@@ -4469,11 +4498,11 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
-    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !path3.isAbsolute(cwd)) {
+    cwd = path3.resolve(path3.dirname(path3.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = path2.dirname(path2.resolve(evalFilePath));
+    cwd = path3.dirname(path3.resolve(evalFilePath));
   }
   const timeoutSeconds = input.timeout_seconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -4531,7 +4560,15 @@ var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
   ["retryInitialDelayMs", "retry_initial_delay_ms"],
   ["retryMaxDelayMs", "retry_max_delay_ms"],
   ["retryBackoffFactor", "retry_backoff_factor"],
-  ["retryStatusCodes", "retry_status_codes"]
+  ["retryStatusCodes", "retry_status_codes"],
+  ["modelReasoningEffort", "model_reasoning_effort"]
+]);
+var CODEX_MODEL_REASONING_EFFORT_VALUES = /* @__PURE__ */ new Set([
+  "minimal",
+  "low",
+  "medium",
+  "high",
+  "xhigh"
 ]);
 var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
   ["timeoutSeconds", "timeout_seconds"]
@@ -4869,6 +4906,9 @@ function normalizeOpenAIBaseUrl(value) {
   if (trimmed.length === 0) {
     return DEFAULT_OPENAI_BASE_URL;
   }
+  if (/\.openai\.azure\.com\/openai\/deployments\/[^/]+$/i.test(trimmed)) {
+    return trimmed;
+  }
   return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
 }
 function resolveAzureConfig(target, env) {
@@ -4997,22 +5037,34 @@ function resolveGeminiConfig(target, env) {
 }
 function resolveCodexConfig(target, env, _evalFilePath) {
   const modelSource = target.model;
+  const modelReasoningEffortSource = target.model_reasoning_effort;
   const executableSource = target.executable ?? target.command ?? target.binary;
   const argsSource = target.args ?? target.arguments;
   const cwdSource = target.cwd;
   const timeoutSource = target.timeout_seconds;
   const logDirSource = target.log_dir ?? target.log_directory;
-  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
   const systemPromptSource = target.system_prompt;
-  const streamLogResult = resolveStreamLog(target, env.AGENTV_CODEX_LOG_FORMAT);
-  if (streamLogResult.deprecationWarning) {
-    process.stderr.write(`[agentv] \u26A0 ${streamLogResult.deprecationWarning}
-`);
+  if (target.log_format !== void 0 || target.log_output_format !== void 0) {
+    throw new Error(
+      `${target.name}: log_format is no longer supported for codex targets. Use stream_log instead.`
+    );
   }
+  const streamLogResult = resolveStreamLog({ name: target.name, stream_log: target.stream_log });
   const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
     allowLiteral: true,
     optionalEnv: true
   });
+  const modelReasoningEffort = normalizeCodexModelReasoningEffort(
+    resolveOptionalString(
+      modelReasoningEffortSource,
+      env,
+      `${target.name} codex model reasoning effort`,
+      {
+        allowLiteral: true,
+        optionalEnv: true
+      }
+    )
+  );
   const executable = resolveOptionalString(executableSource, env, `${target.name} codex executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -5027,32 +5079,30 @@ function resolveCodexConfig(target, env, _evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  const logFormat = normalizeCodexLogFormat(logFormatSource);
   const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
   return {
     model,
+    modelReasoningEffort,
     executable,
     args,
     cwd,
     timeoutMs,
     logDir,
-    logFormat,
     streamLog: streamLogResult.streamLog,
     systemPrompt
   };
 }
-function normalizeCodexLogFormat(value) {
-  if (value === void 0 || value === null) {
+function normalizeCodexModelReasoningEffort(value) {
+  if (value === void 0) {
     return void 0;
   }
-  if (typeof value !== "string") {
-    throw new Error("codex log format must be 'summary' or 'json'");
-  }
   const normalized = value.trim().toLowerCase();
-  if (normalized === "json" || normalized === "summary") {
+  if (CODEX_MODEL_REASONING_EFFORT_VALUES.has(normalized)) {
     return normalized;
   }
-  throw new Error("codex log format must be 'summary' or 'json'");
+  throw new Error(
+    `codex model_reasoning_effort must be one of: ${[...CODEX_MODEL_REASONING_EFFORT_VALUES].join(", ")}`
+  );
 }
 function resolveStreamLog(target, envFallback) {
   if (target.stream_log !== void 0 && target.stream_log !== null) {
@@ -5461,7 +5511,7 @@ function resolveClaudeConfig(target, env, _evalFilePath) {
   };
 }
 function resolveCcMirrorBinaryPath(variant) {
-  const variantJsonPath = path2.join(homedir(), ".cc-mirror", variant, "variant.json");
+  const variantJsonPath = path3.join(homedir(), ".cc-mirror", variant, "variant.json");
   if (!existsSync(variantJsonPath)) {
     throw new Error(
       `cc-mirror variant "${variant}": ${variantJsonPath} not found. Install the variant or set "executable" explicitly.`
@@ -5538,8 +5588,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path47 = firstError?.path.join(".") || "";
-    const prefix = path47 ? `${target.name} ${path47}: ` : `${target.name}: `;
+    const path53 = firstError?.path.join(".") || "";
+    const prefix = path53 ? `${target.name} ${path53}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -5560,11 +5610,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
-    cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !path3.isAbsolute(cwd)) {
+    cwd = path3.resolve(path3.dirname(path3.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = path2.dirname(path2.resolve(evalFilePath));
+    cwd = path3.dirname(path3.resolve(evalFilePath));
   }
   return {
     command,
@@ -5918,7 +5968,7 @@ function parseJsonlCases(content, filePath) {
   return results;
 }
 async function loadCasesFromFile(filePath) {
-  const ext = path3.extname(filePath).toLowerCase();
+  const ext = path4.extname(filePath).toLowerCase();
   let content;
   try {
     content = await readFile2(filePath, "utf8");
@@ -5945,7 +5995,7 @@ async function loadCasesFromFile(filePath) {
 }
 async function resolveFileReference2(ref, evalFileDir) {
   const rawPath = extractFilePath(ref);
-  const absolutePattern = path3.resolve(evalFileDir, rawPath);
+  const absolutePattern = path4.resolve(evalFileDir, rawPath);
   if (isGlobPattern(rawPath)) {
     const matches = await fg(absolutePattern.replaceAll("\\", "/"), {
       onlyFiles: true,
@@ -5972,10 +6022,10 @@ async function loadCasesFromDirectory(dirPath) {
   const subdirs = entries.filter((e) => e.isDirectory()).sort((a, b) => a.name < b.name ? -1 : a.name > b.name ? 1 : 0);
   const results = [];
   for (const subdir of subdirs) {
-    const subdirPath = path3.join(dirPath, subdir.name);
+    const subdirPath = path4.join(dirPath, subdir.name);
     let caseFilePath;
     for (const filename of ["case.yaml", "case.yml"]) {
-      const candidate = path3.join(subdirPath, filename);
+      const candidate = path4.join(subdirPath, filename);
       try {
         const s = await stat(candidate);
         if (s.isFile()) {
@@ -6011,7 +6061,7 @@ async function loadCasesFromDirectory(dirPath) {
       caseObj.id = subdir.name;
     }
     if (!caseObj.workspace) {
-      const workspaceDirPath = path3.join(subdirPath, "workspace");
+      const workspaceDirPath = path4.join(subdirPath, "workspace");
       try {
         const s = await stat(workspaceDirPath);
         if (s.isDirectory()) {
@@ -6037,40 +6087,40 @@ async function expandFileReferences(tests, evalFileDir) {
   return expanded;
 }
-// ../../packages/core/dist/chunk-N5EU446L.js
+// ../../packages/core/dist/chunk-7QB53OPK.js
 import path46 from "node:path";
 import { pathToFileURL as pathToFileURL2 } from "node:url";
 import { existsSync as existsSync6 } from "node:fs";
 import path45 from "node:path";
 import micromatch4 from "micromatch";
+import { mkdir, readFile as readFile3, writeFile } from "node:fs/promises";
+import path5 from "node:path";
 import { execFile as execFile3 } from "node:child_process";
 import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
 import { existsSync as existsSync5 } from "node:fs";
-import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir8, stat as stat9 } from "node:fs/promises";
+import { copyFile as copyFile2, mkdir as mkdir15, readdir as readdir8, stat as stat9 } from "node:fs/promises";
 import path44 from "node:path";
 import { promisify as promisify7 } from "node:util";
 import micromatch3 from "micromatch";
-import os from "node:os";
-import path4 from "node:path";
-import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { mkdtemp, rm, writeFile as writeFile2 } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { dirname, join } from "node:path";
 import { randomBytes } from "node:crypto";
 import { createServer } from "node:http";
 import fs from "node:fs/promises";
 import path32 from "node:path";
-import { readFile as readFile3 } from "node:fs/promises";
+import { readFile as readFile22 } from "node:fs/promises";
 import path22 from "node:path";
 import { fileURLToPath } from "node:url";
 import { spawn } from "node:child_process";
 import { randomUUID } from "node:crypto";
 import { createWriteStream } from "node:fs";
-import { mkdir } from "node:fs/promises";
-import path5 from "node:path";
+import { mkdir as mkdir2 } from "node:fs/promises";
+import path52 from "node:path";
 import path42 from "node:path";
 import { randomUUID as randomUUID2 } from "node:crypto";
 import { createWriteStream as createWriteStream2 } from "node:fs";
-import { mkdir as mkdir2 } from "node:fs/promises";
+import { mkdir as mkdir3 } from "node:fs/promises";
 import path6 from "node:path";
 import { exec as execWithCallback } from "node:child_process";
 import fs2 from "node:fs/promises";
@@ -6079,10 +6129,10 @@ import path7 from "node:path";
 import { promisify } from "node:util";
 import { randomUUID as randomUUID3 } from "node:crypto";
 import { createWriteStream as createWriteStream3 } from "node:fs";
-import { mkdir as mkdir3 } from "node:fs/promises";
+import { mkdir as mkdir4 } from "node:fs/promises";
 import path8 from "node:path";
 import { randomUUID as randomUUID5 } from "node:crypto";
-import { mkdir as mkdir4 } from "node:fs/promises";
+import { mkdir as mkdir5 } from "node:fs/promises";
 import { homedir as homedir2 } from "node:os";
 import path11 from "node:path";
 import { Readable, Writable } from "node:stream";
@@ -18704,10 +18754,10 @@ var RequestError = class _RequestError extends Error {
   }
 };
-// ../../packages/core/dist/chunk-N5EU446L.js
+// ../../packages/core/dist/chunk-7QB53OPK.js
 import { exec as execCallback } from "node:child_process";
 import { readdirSync, statSync } from "node:fs";
-import { readFile as readFile22, readdir as readdir2, stat as stat2 } from "node:fs/promises";
+import { readFile as readFile32, readdir as readdir2, stat as stat2 } from "node:fs/promises";
 import path9 from "node:path";
 import { promisify as promisify2 } from "node:util";
 import { randomUUID as randomUUID4 } from "node:crypto";
@@ -18715,26 +18765,26 @@ import { createWriteStream as createWriteStream4, existsSync as existsSync2, rea
 import { arch, homedir as homedir3, platform } from "node:os";
 import path10 from "node:path";
 import { fileURLToPath as fileURLToPath2 } from "node:url";
-import { readFile as readFile4 } from "node:fs/promises";
+import { readFile as readFile5 } from "node:fs/promises";
 import { homedir as homedir4 } from "node:os";
 import path13 from "node:path";
-import { readFile as readFile32, readdir as readdir22, stat as stat22 } from "node:fs/promises";
+import { readFile as readFile4, readdir as readdir22, stat as stat22 } from "node:fs/promises";
 import { homedir as homedir32 } from "node:os";
 import path12 from "node:path";
 import { randomUUID as randomUUID6 } from "node:crypto";
 import { existsSync as existsSync22 } from "node:fs";
-import { mkdir as mkdir5 } from "node:fs/promises";
+import { mkdir as mkdir6 } from "node:fs/promises";
 import path14 from "node:path";
 import { execSync, spawn as spawn3 } from "node:child_process";
 import { randomUUID as randomUUID7 } from "node:crypto";
 import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
-import { mkdir as mkdir6, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
+import { mkdir as mkdir7, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
 import { tmpdir as tmpdir2 } from "node:os";
 import path15 from "node:path";
 import { execSync as execSync2 } from "node:child_process";
 import { randomUUID as randomUUID8 } from "node:crypto";
 import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
-import { mkdir as mkdir7 } from "node:fs/promises";
+import { mkdir as mkdir8 } from "node:fs/promises";
 import path16 from "node:path";
 import { createInterface } from "node:readline";
 import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
@@ -18742,28 +18792,28 @@ import { exec as exec2 } from "node:child_process";
 import { constants as constants2, access as access2 } from "node:fs/promises";
 import path27 from "node:path";
 import { promisify as promisify4 } from "node:util";
-import { stat as stat5, writeFile as writeFile5 } from "node:fs/promises";
+import { stat as stat5, writeFile as writeFile6 } from "node:fs/promises";
 import path25 from "node:path";
 import { constants as constants3 } from "node:fs";
-import { access as access3, mkdir as mkdir8, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
+import { access as access3, mkdir as mkdir9, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
 import path17 from "node:path";
 import path18 from "node:path";
 import path19 from "node:path";
-import { readFile as readFile5 } from "node:fs/promises";
+import { readFile as readFile6 } from "node:fs/promises";
 import path20 from "node:path";
 import { exec, spawn as spawn4 } from "node:child_process";
-import { mkdir as mkdir9, writeFile as writeFile3 } from "node:fs/promises";
+import { mkdir as mkdir10, writeFile as writeFile4 } from "node:fs/promises";
 import path222 from "node:path";
 import { promisify as promisify3 } from "node:util";
 import path21 from "node:path";
-import { copyFile, mkdir as mkdir10, readFile as readFile6, readdir as readdir4, stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
+import { copyFile, mkdir as mkdir11, readFile as readFile7, readdir as readdir4, stat as stat4, writeFile as writeFile5 } from "node:fs/promises";
 import path24 from "node:path";
 import path23 from "node:path";
 import JSON5 from "json5";
-import { writeFile as writeFile6 } from "node:fs/promises";
+import { writeFile as writeFile7 } from "node:fs/promises";
 import path26 from "node:path";
 import { constants as constants32 } from "node:fs";
-import { access as access32, readFile as readFile7 } from "node:fs/promises";
+import { access as access32, readFile as readFile8 } from "node:fs/promises";
 import path28 from "node:path";
 import path29 from "node:path";
 import fg2 from "fast-glob";
@@ -18772,12 +18822,12 @@ import path31 from "node:path";
 import fg22 from "fast-glob";
 import path322 from "node:path";
 import fg3 from "fast-glob";
-import { cp, mkdir as mkdir12, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
+import { cp, mkdir as mkdir13, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
 import path33 from "node:path";
 import { execFile } from "node:child_process";
 import { createHash } from "node:crypto";
 import { existsSync as existsSync3 } from "node:fs";
-import { cp as cp2, mkdir as mkdir13, readFile as readFile8, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
+import { cp as cp2, mkdir as mkdir14, readFile as readFile9, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile8 } from "node:fs/promises";
 import path34 from "node:path";
 import { promisify as promisify5 } from "node:util";
 import { execFile as execFile2 } from "node:child_process";
@@ -18786,25 +18836,65 @@ import path35 from "node:path";
 import { promisify as promisify6 } from "node:util";
 import { readdir as readdir7, stat as stat7 } from "node:fs/promises";
 import path36 from "node:path";
-import { readFile as readFile15, stat as stat8 } from "node:fs/promises";
+import { readFile as readFile16, stat as stat8 } from "node:fs/promises";
 import path43 from "node:path";
 import micromatch2 from "micromatch";
-import { readFile as readFile9 } from "node:fs/promises";
-import path37 from "node:path";
+import { stringify as stringifyYaml } from "yaml";
 import { readFile as readFile10 } from "node:fs/promises";
+import path37 from "node:path";
+import { readFile as readFile11 } from "node:fs/promises";
 import path39 from "node:path";
 import { constants as constants4 } from "node:fs";
 import { access as access4 } from "node:fs/promises";
 import path38 from "node:path";
 import { fileURLToPath as fileURLToPath4 } from "node:url";
-import { readFile as readFile12 } from "node:fs/promises";
+import { readFile as readFile13 } from "node:fs/promises";
 import path40 from "node:path";
-import { readFile as readFile11 } from "node:fs/promises";
-import { readFile as readFile14 } from "node:fs/promises";
+import { readFile as readFile12 } from "node:fs/promises";
+import { readFile as readFile15 } from "node:fs/promises";
 import path422 from "node:path";
 import micromatch from "micromatch";
-import { readFile as readFile13 } from "node:fs/promises";
+import { readFile as readFile14 } from "node:fs/promises";
 import path41 from "node:path";
+var DEFAULT_CACHE_PATH = ".agentv/cache";
+var ResponseCache = class {
+  cachePath;
+  constructor(cachePath) {
+    this.cachePath = cachePath ?? DEFAULT_CACHE_PATH;
+  }
+  async get(key) {
+    const filePath = this.keyToPath(key);
+    try {
+      const data = await readFile3(filePath, "utf8");
+      return JSON.parse(data);
+    } catch {
+      return void 0;
+    }
+  }
+  async set(key, value) {
+    const filePath = this.keyToPath(key);
+    const dir = path5.dirname(filePath);
+    await mkdir(dir, { recursive: true });
+    await writeFile(filePath, JSON.stringify(value, null, 2), "utf8");
+  }
+  keyToPath(key) {
+    const prefix = key.slice(0, 2);
+    return path5.join(this.cachePath, prefix, `${key}.json`);
+  }
+};
+function shouldEnableCache(params) {
+  if (params.cliNoCache) return false;
+  if (params.cliCache) return true;
+  if (params.yamlCache !== void 0) return params.yamlCache;
+  return params.tsConfigCache === true;
+}
+function shouldSkipCacheForTemperature(targetConfig) {
+  const temp = targetConfig.temperature;
+  if (typeof temp === "number" && temp > 0) {
+    return true;
+  }
+  return false;
+}
 var DEFAULT_THRESHOLD = 0.8;
 var PASS_THRESHOLD = DEFAULT_THRESHOLD;
 function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
@@ -19026,32 +19116,6 @@ function validateConcurrency(concurrency) {
     throw new TypeError("Expected `concurrency` to be a number from 1 and up");
   }
 }
-function readEnvPath(name) {
-  const value = process.env[name];
-  if (!value || value === "undefined") return void 0;
-  return value;
-}
-function getAgentvConfigDir() {
-  return readEnvPath("AGENTV_HOME") ?? path4.join(os.homedir(), ".agentv");
-}
-function getAgentvHome() {
-  return getAgentvConfigDir();
-}
-function getAgentvDataDir() {
-  return readEnvPath("AGENTV_DATA_DIR") ?? getAgentvConfigDir();
-}
-function getWorkspacesRoot() {
-  return path4.join(getAgentvDataDir(), "workspaces");
-}
-function getSubagentsRoot() {
-  return path4.join(getAgentvDataDir(), "subagents");
-}
-function getTraceStateRoot() {
-  return path4.join(getAgentvDataDir(), "trace-state");
-}
-function getWorkspacePoolRoot() {
-  return path4.join(getAgentvDataDir(), "workspace-pool");
-}
 var DEFAULT_MAX_CALLS = 50;
 async function createTargetProxy(options) {
   const { defaultProvider, targetResolver, availableTargets, maxCalls } = options;
@@ -19373,7 +19437,7 @@ async function materializeContentForGrader(messages, getWorkDir) {
         const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
         const dir = await getWorkDir();
         const filePath = join(dir, `img-${counter++}.${ext}`);
-        await writeFile(filePath, Buffer.from(base64Data, "base64"));
+        await writeFile2(filePath, Buffer.from(base64Data, "base64"));
         blocks.push({ type: "image", media_type: img.media_type, path: filePath });
       } else {
         blocks.push({ type: "image", media_type: img.media_type, path: img.source });
@@ -19416,7 +19480,7 @@ var CodeGrader = class {
       if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
         const tmpDir = await mkdtemp(join(tmpdir(), "agentv-grader-"));
         outputPath = join(tmpDir, "output.json");
-        await writeFile(outputPath, serialized);
+        await writeFile2(outputPath, serialized);
         outputForPayload = null;
       }
     }
@@ -19433,6 +19497,7 @@ var CodeGrader = class {
         context.evalCase.input,
         getImageDir
       ),
+      metadata: context.evalCase.metadata ?? null,
       trace: context.trace ?? null,
       tokenUsage: context.tokenUsage ?? null,
       costUsd: context.costUsd ?? null,
@@ -19664,7 +19729,7 @@ async function preprocessContentFile(block, preprocessors, basePath) {
     return runContentPreprocessor(block, resolvedPath, preprocessor);
   }
   try {
-    const buffer = await readFile3(resolvedPath);
+    const buffer = await readFile22(resolvedPath);
     const text = buffer.toString("utf8").replace(/\r\n/g, "\n");
     if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) {
       return {
@@ -19758,6 +19823,10 @@ ${text}`;
 var TEMPLATE_VARIABLES = {
   EXPECTED_OUTPUT: "expected_output",
   CRITERIA: "criteria",
+  METADATA: "metadata",
+  METADATA_JSON: "metadata_json",
+  RUBRICS: "rubrics",
+  RUBRICS_JSON: "rubrics_json",
   INPUT: "input",
   OUTPUT: "output",
   FILE_CHANGES: "file_changes",
@@ -19779,6 +19848,25 @@ var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
   [TEMPLATE_VARIABLES.OUTPUT_TEXT, TEMPLATE_VARIABLES.OUTPUT],
   [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT, TEMPLATE_VARIABLES.EXPECTED_OUTPUT]
 ]);
+var OPERATOR_GUIDANCE = {
+  correctness: "Correctness: mark satisfied only when the answer positively supports or fulfills the outcome. Omission or contradiction should not satisfy it.",
+  contradiction: "Contradiction guard: mark satisfied when the answer does not make a claim that contradicts the outcome. Do not require the answer to mention the outcome; mark unsatisfied only for incompatible claims."
+};
+function formatRubricOperatorLabel(operator) {
+  return operator ? ` (operator: ${operator})` : "";
+}
+function formatRubricOperatorGuidance(rubrics) {
+  const operators = /* @__PURE__ */ new Set();
+  for (const rubric of rubrics) {
+    if (rubric.operator) {
+      operators.add(rubric.operator);
+    }
+  }
+  if (operators.size === 0) {
+    return [];
+  }
+  return [...operators].map((operator) => OPERATOR_GUIDANCE[operator]);
+}
 var DEFAULT_MAX_STEPS = 10;
 var MAX_STEPS_LIMIT = 50;
 var MAX_FILE_SIZE = 50 * 1024;
@@ -19860,6 +19948,32 @@ var scoreRangeEvaluationSchema = external_exports.object({
   checks: external_exports.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
   overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)").optional()
 });
+function stringifyPretty(value) {
+  return value === void 0 ? "" : JSON.stringify(value, null, 2);
+}
+function stringifyCompact(value) {
+  return value === void 0 ? "" : JSON.stringify(value);
+}
+function buildTemplateVariables(context) {
+  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
+  const rubrics = context.evaluator?.type === "llm-grader" ? context.evaluator.rubrics : void 0;
+  return {
+    [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
+    [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
+    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
+    [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
+    [TEMPLATE_VARIABLES.METADATA]: stringifyPretty(context.evalCase.metadata),
+    [TEMPLATE_VARIABLES.METADATA_JSON]: stringifyCompact(context.evalCase.metadata),
+    [TEMPLATE_VARIABLES.RUBRICS]: stringifyPretty(rubrics),
+    [TEMPLATE_VARIABLES.RUBRICS_JSON]: stringifyCompact(rubrics),
+    [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
+    [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
+    // Deprecated aliases — same values as the primary variables above
+    [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
+    [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
+    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
+  };
+}
 function resolveContentBasePath(context) {
   if (context.workspacePath) {
     return context.workspacePath;
@@ -19931,19 +20045,7 @@ var LlmGrader = class {
   // LLM mode (existing)
   // ---------------------------------------------------------------------------
   async evaluateFreeform(context, graderProvider) {
-    const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
-    const variables = {
-      [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
-      [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
-      [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
-      [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
-      [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
-      [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
-      // Deprecated aliases — same values as the primary variables above
-      [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
-      [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
-      [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
-    };
+    const variables = buildTemplateVariables(context);
     const systemPrompt = buildOutputSchema();
     const graderTemplate = context.graderTemplateOverride ?? this.graderTemplate ?? DEFAULT_GRADER_TEMPLATE;
     warnDeprecatedTemplateVars(graderTemplate);
@@ -20010,7 +20112,7 @@ ${context.toolCalls}`;
     if (hasScoreRanges) {
       return this.evaluateWithScoreRanges(context, graderProvider, rubrics);
     }
-    const prompt = this.buildRubricPrompt(context, rubrics);
+    const prompt = context.graderTemplateOverride || this.graderTemplate ? this.buildCustomPrompt(context) : this.buildRubricPrompt(context, rubrics);
     const systemPrompt = buildRubricOutputSchema();
     const graderRawRequest = {
       userPrompt: prompt,
@@ -20055,7 +20157,7 @@ ${context.toolCalls}`;
    * Each criterion is scored 0-10 and normalized to 0-1.
    */
   async evaluateWithScoreRanges(context, graderProvider, rubrics) {
-    const prompt = this.buildScoreRangePrompt(context, rubrics);
+    const prompt = context.graderTemplateOverride || this.graderTemplate ? this.buildCustomPrompt(context) : this.buildScoreRangePrompt(context, rubrics);
     const systemPrompt = buildScoreRangeOutputSchema();
     const graderRawRequest = {
       userPrompt: prompt,
@@ -20274,21 +20376,11 @@ ${context.toolCalls}`;
    */
   buildAgentUserPrompt(context) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
-    const variables = {
-      [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
-      [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
-      [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
-      [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
-      [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
-      [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
-      // Deprecated aliases
-      [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
-      [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
-      [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
-    };
-    if (this.graderTemplate) {
-      warnDeprecatedTemplateVars(this.graderTemplate);
-      return substituteVariables(this.graderTemplate, variables);
+    const variables = buildTemplateVariables(context);
+    const template = context.graderTemplateOverride ?? this.graderTemplate;
+    if (template) {
+      warnDeprecatedTemplateVars(template);
+      return substituteVariables(template, variables);
     }
     const config2 = context.evaluator;
     const rubrics = config2?.type === "llm-grader" ? config2.rubrics : void 0;
@@ -20338,21 +20430,11 @@ ${context.toolCalls}`;
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const config2 = context.evaluator;
     const rubrics = config2?.type === "llm-grader" ? config2.rubrics : void 0;
-    if (this.graderTemplate) {
-      const variables = {
-        [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
-        [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
-        [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
-        [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
-        [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
-        [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
-        // Deprecated aliases
-        [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
-        [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
-        [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
-      };
-      warnDeprecatedTemplateVars(this.graderTemplate);
-      const customPrompt = substituteVariables(this.graderTemplate, variables);
+    const template = context.graderTemplateOverride ?? this.graderTemplate;
+    if (template) {
+      const variables = buildTemplateVariables(context);
+      warnDeprecatedTemplateVars(template);
+      const customPrompt = substituteVariables(template, variables);
       const outputSchema = rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
       return `${customPrompt}
@@ -20478,6 +20560,9 @@ ${outputSchema}`;
       const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
       const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
       parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
+      if (rubric.operator) {
+        parts.push(`Operator: ${rubric.operator}`);
+      }
       if (rubric.outcome) {
         parts.push(`Description: ${rubric.outcome}`);
       }
@@ -20490,12 +20575,21 @@ ${outputSchema}`;
         }
       }
     }
+    const operatorGuidance = formatRubricOperatorGuidance(rubrics);
+    if (operatorGuidance.length > 0) {
+      parts.push("", ...operatorGuidance);
+    }
     parts.push(
       "",
       "For each criterion, provide an integer score 0-10 that matches one of its defined score ranges."
     );
     return parts.join("\n");
   }
+  buildCustomPrompt(context) {
+    const template = context.graderTemplateOverride ?? this.graderTemplate ?? "";
+    warnDeprecatedTemplateVars(template);
+    return substituteVariables(template, buildTemplateVariables(context));
+  }
   buildRubricPrompt(context, rubrics) {
     const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
     const parts = [
@@ -20519,10 +20613,21 @@ ${outputSchema}`;
       parts.push("[[ ## tool_calls ## ]]", context.toolCalls, "");
     }
     parts.push("[[ ## rubrics ## ]]");
+    const operatorGuidance = formatRubricOperatorGuidance(rubrics);
+    if (operatorGuidance.length > 0) {
+      parts.push("", "Operator guidance:");
+      for (const guidance of operatorGuidance) {
+        parts.push(`- ${guidance}`);
+      }
+      parts.push("");
+    }
     for (const rubric of rubrics) {
       const requiredLabel = rubric.required ? " (REQUIRED)" : "";
       const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
-      parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
+      const operatorLabel = formatRubricOperatorLabel(rubric.operator);
+      parts.push(
+        `- [${rubric.id}]${requiredLabel}${weightLabel}${operatorLabel}: ${rubric.outcome}`
+      );
     }
     parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
     return parts.join("\n");
@@ -21248,6 +21353,384 @@ var CostGrader = class {
     };
   }
 };
+var NORMALIZED_TRAJECTORY_SCHEMA_VERSION = "agentv.trace.v1";
+var NORMALIZED_TRACE_SOURCE_KINDS = [
+  "agentv_run",
+  "otlp",
+  "phoenix",
+  "langfuse",
+  "pi_session",
+  "imported_transcript",
+  "compact_transcript"
+];
+var NORMALIZED_TRACE_EVENT_TYPES = [
+  "message",
+  "model_turn",
+  "tool_call",
+  "tool_result"
+];
+var NORMALIZED_TOOL_STATUSES = ["ok", "error", "timeout", "cancelled", "unknown"];
+var NORMALIZED_REDACTION_LEVELS = ["none", "partial", "full"];
+function omitUndefinedProperties(value) {
+  return Object.fromEntries(
+    Object.entries(value).filter(([, property]) => property !== void 0)
+  );
+}
+var MetadataWireSchema = external_exports.record(external_exports.string(), external_exports.unknown());
+var TokenUsageWireSchema = external_exports.object({
+  input: external_exports.number(),
+  output: external_exports.number(),
+  cached: external_exports.number().optional(),
+  reasoning: external_exports.number().optional()
+});
+var NormalizedRedactionStateWireSchema = external_exports.object({
+  level: external_exports.enum(NORMALIZED_REDACTION_LEVELS),
+  fields: external_exports.array(external_exports.string()).optional(),
+  reason: external_exports.string().optional()
+});
+var NormalizedTraceErrorWireSchema = external_exports.object({
+  message: external_exports.string(),
+  name: external_exports.string().optional(),
+  code: external_exports.string().optional(),
+  stack: external_exports.string().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceSourceWireSchema = external_exports.object({
+  kind: external_exports.enum(NORMALIZED_TRACE_SOURCE_KINDS),
+  path: external_exports.string().optional(),
+  url: external_exports.string().optional(),
+  provider: external_exports.string().optional(),
+  format: external_exports.string().optional(),
+  version: external_exports.string().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceSessionWireSchema = external_exports.object({
+  session_id: external_exports.string().optional(),
+  conversation_id: external_exports.string().optional(),
+  cwd: external_exports.string().optional(),
+  started_at: external_exports.string().optional(),
+  ended_at: external_exports.string().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceBranchWireSchema = external_exports.object({
+  selected_leaf_id: external_exports.string().optional(),
+  selected_path_ids: external_exports.array(external_exports.string()).optional(),
+  included_event_ids: external_exports.array(external_exports.string()).optional(),
+  omitted_event_ids: external_exports.array(external_exports.string()).optional(),
+  selection_reason: external_exports.string().optional()
+});
+var NormalizedTraceSourceRefWireSchema = external_exports.object({
+  event_id: external_exports.string().optional(),
+  message_id: external_exports.string().optional(),
+  span_id: external_exports.string().optional(),
+  trace_id: external_exports.string().optional(),
+  raw_kind: external_exports.string().optional(),
+  path: external_exports.string().optional(),
+  line: external_exports.number().int().nonnegative().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedRawEvidenceWireSchema = external_exports.object({
+  kind: external_exports.string(),
+  ref: external_exports.string().optional(),
+  media_type: external_exports.string().optional(),
+  content: external_exports.unknown().optional(),
+  redacted: external_exports.boolean().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceMessageWireSchema = external_exports.object({
+  role: external_exports.string(),
+  name: external_exports.string().optional(),
+  content: external_exports.unknown().optional(),
+  redaction: NormalizedRedactionStateWireSchema.optional(),
+  token_usage: TokenUsageWireSchema.optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceModelWireSchema = external_exports.object({
+  provider: external_exports.string().optional(),
+  name: external_exports.string().optional(),
+  invocation_id: external_exports.string().optional(),
+  token_usage: TokenUsageWireSchema.optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceToolWireSchema = external_exports.object({
+  name: external_exports.string(),
+  call_id: external_exports.string().optional(),
+  input: external_exports.unknown().optional(),
+  output: external_exports.unknown().optional(),
+  status: external_exports.enum(NORMALIZED_TOOL_STATUSES).optional(),
+  error: NormalizedTraceErrorWireSchema.optional(),
+  redaction: NormalizedRedactionStateWireSchema.optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTraceEventWireSchema = external_exports.object({
+  event_id: external_exports.string(),
+  parent_event_id: external_exports.string().optional(),
+  ordinal: external_exports.number().int().nonnegative(),
+  type: external_exports.enum(NORMALIZED_TRACE_EVENT_TYPES),
+  timestamp: external_exports.string().optional(),
+  duration_ms: external_exports.number().nonnegative().optional(),
+  duration_inferred: external_exports.boolean().optional(),
+  turn_index: external_exports.number().int().nonnegative().optional(),
+  message: NormalizedTraceMessageWireSchema.optional(),
+  model: NormalizedTraceModelWireSchema.optional(),
+  tool: NormalizedTraceToolWireSchema.optional(),
+  source_ref: NormalizedTraceSourceRefWireSchema.optional(),
+  raw_evidence: external_exports.array(NormalizedRawEvidenceWireSchema).optional(),
+  redaction: NormalizedRedactionStateWireSchema.optional(),
+  metadata: MetadataWireSchema.optional()
+});
+var NormalizedTrajectoryWireSchema = external_exports.object({
+  schema_version: external_exports.literal(NORMALIZED_TRAJECTORY_SCHEMA_VERSION),
+  source: NormalizedTraceSourceWireSchema,
+  session: NormalizedTraceSessionWireSchema,
+  branch: NormalizedTraceBranchWireSchema.optional(),
+  events: external_exports.array(NormalizedTraceEventWireSchema),
+  token_usage: TokenUsageWireSchema.optional(),
+  cost_usd: external_exports.number().optional(),
+  duration_ms: external_exports.number().optional(),
+  started_at: external_exports.string().optional(),
+  ended_at: external_exports.string().optional(),
+  metadata: MetadataWireSchema.optional()
+});
+function toNormalizedTrajectoryWire(trajectory) {
+  return NormalizedTrajectoryWireSchema.parse(
+    omitUndefinedProperties({
+      schema_version: trajectory.schemaVersion,
+      source: toNormalizedTraceSourceWire(trajectory.source),
+      session: toNormalizedTraceSessionWire(trajectory.session),
+      branch: trajectory.branch ? toNormalizedTraceBranchWire(trajectory.branch) : void 0,
+      events: trajectory.events.map(toNormalizedTraceEventWire),
+      token_usage: trajectory.tokenUsage,
+      cost_usd: trajectory.costUsd,
+      duration_ms: trajectory.durationMs,
+      started_at: trajectory.startedAt,
+      ended_at: trajectory.endedAt,
+      metadata: trajectory.metadata
+    })
+  );
+}
+function fromNormalizedTrajectoryWire(input) {
+  const wire = NormalizedTrajectoryWireSchema.parse(input);
+  return {
+    schemaVersion: wire.schema_version,
+    source: fromNormalizedTraceSourceWire(wire.source),
+    session: fromNormalizedTraceSessionWire(wire.session),
+    branch: wire.branch ? fromNormalizedTraceBranchWire(wire.branch) : void 0,
+    events: wire.events.map(fromNormalizedTraceEventWire),
+    tokenUsage: wire.token_usage,
+    costUsd: wire.cost_usd,
+    durationMs: wire.duration_ms,
+    startedAt: wire.started_at,
+    endedAt: wire.ended_at,
+    metadata: wire.metadata
+  };
+}
+function toNormalizedTraceSourceWire(source) {
+  return omitUndefinedProperties({
+    kind: source.kind,
+    path: source.path,
+    url: source.url,
+    provider: source.provider,
+    format: source.format,
+    version: source.version,
+    metadata: source.metadata
+  });
+}
+function fromNormalizedTraceSourceWire(source) {
+  return {
+    kind: source.kind,
+    path: source.path,
+    url: source.url,
+    provider: source.provider,
+    format: source.format,
+    version: source.version,
+    metadata: source.metadata
+  };
+}
+function toNormalizedTraceSessionWire(session) {
+  return omitUndefinedProperties({
+    session_id: session.sessionId,
+    conversation_id: session.conversationId,
+    cwd: session.cwd,
+    started_at: session.startedAt,
+    ended_at: session.endedAt,
+    metadata: session.metadata
+  });
+}
+function fromNormalizedTraceSessionWire(session) {
+  return {
+    sessionId: session.session_id,
+    conversationId: session.conversation_id,
+    cwd: session.cwd,
+    startedAt: session.started_at,
+    endedAt: session.ended_at,
+    metadata: session.metadata
+  };
+}
+function toNormalizedTraceBranchWire(branch) {
+  return omitUndefinedProperties({
+    selected_leaf_id: branch.selectedLeafId,
+    selected_path_ids: branch.selectedPathIds,
+    included_event_ids: branch.includedEventIds,
+    omitted_event_ids: branch.omittedEventIds,
+    selection_reason: branch.selectionReason
+  });
+}
+function fromNormalizedTraceBranchWire(branch) {
+  return {
+    selectedLeafId: branch.selected_leaf_id,
+    selectedPathIds: branch.selected_path_ids,
+    includedEventIds: branch.included_event_ids,
+    omittedEventIds: branch.omitted_event_ids,
+    selectionReason: branch.selection_reason
+  };
+}
+function toNormalizedTraceEventWire(event) {
+  return NormalizedTraceEventWireSchema.parse(
+    omitUndefinedProperties({
+      event_id: event.eventId,
+      parent_event_id: event.parentEventId,
+      ordinal: event.ordinal,
+      type: event.type,
+      timestamp: event.timestamp,
+      duration_ms: event.durationMs,
+      duration_inferred: event.durationInferred,
+      turn_index: event.turnIndex,
+      message: event.message ? toNormalizedTraceMessageWire(event.message) : void 0,
+      model: event.model ? toNormalizedTraceModelWire(event.model) : void 0,
+      tool: event.tool ? toNormalizedTraceToolWire(event.tool) : void 0,
+      source_ref: event.sourceRef ? toNormalizedTraceSourceRefWire(event.sourceRef) : void 0,
+      raw_evidence: event.rawEvidence?.map(toNormalizedRawEvidenceWire),
+      redaction: event.redaction,
+      metadata: event.metadata
+    })
+  );
+}
+function fromNormalizedTraceEventWire(event) {
+  return {
+    eventId: event.event_id,
+    parentEventId: event.parent_event_id,
+    ordinal: event.ordinal,
+    type: event.type,
+    timestamp: event.timestamp,
+    durationMs: event.duration_ms,
+    durationInferred: event.duration_inferred,
+    turnIndex: event.turn_index,
+    message: event.message ? fromNormalizedTraceMessageWire(event.message) : void 0,
+    model: event.model ? fromNormalizedTraceModelWire(event.model) : void 0,
+    tool: event.tool ? fromNormalizedTraceToolWire(event.tool) : void 0,
+    sourceRef: event.source_ref ? fromNormalizedTraceSourceRefWire(event.source_ref) : void 0,
+    rawEvidence: event.raw_evidence?.map(fromNormalizedRawEvidenceWire),
+    redaction: event.redaction,
+    metadata: event.metadata
+  };
+}
+function toNormalizedTraceMessageWire(message) {
+  return omitUndefinedProperties({
+    role: message.role,
+    name: message.name,
+    content: message.content,
+    redaction: message.redaction,
+    token_usage: message.tokenUsage,
+    metadata: message.metadata
+  });
+}
+function fromNormalizedTraceMessageWire(message) {
+  return {
+    role: message.role,
+    name: message.name,
+    content: message.content,
+    redaction: message.redaction,
+    tokenUsage: message.token_usage,
+    metadata: message.metadata
+  };
+}
+function toNormalizedTraceModelWire(model) {
+  return omitUndefinedProperties({
+    provider: model.provider,
+    name: model.name,
+    invocation_id: model.invocationId,
+    token_usage: model.tokenUsage,
+    metadata: model.metadata
+  });
+}
+function fromNormalizedTraceModelWire(model) {
+  return {
+    provider: model.provider,
+    name: model.name,
+    invocationId: model.invocation_id,
+    tokenUsage: model.token_usage,
+    metadata: model.metadata
+  };
+}
+function toNormalizedTraceToolWire(tool) {
+  return omitUndefinedProperties({
+    name: tool.name,
+    call_id: tool.callId,
+    input: tool.input,
+    output: tool.output,
+    status: tool.status,
+    error: tool.error,
+    redaction: tool.redaction,
+    metadata: tool.metadata
+  });
+}
+function fromNormalizedTraceToolWire(tool) {
+  return {
+    name: tool.name,
+    callId: tool.call_id,
+    input: tool.input,
+    output: tool.output,
+    status: tool.status,
+    error: tool.error,
+    redaction: tool.redaction,
+    metadata: tool.metadata
+  };
+}
+function toNormalizedTraceSourceRefWire(sourceRef) {
+  return omitUndefinedProperties({
+    event_id: sourceRef.eventId,
+    message_id: sourceRef.messageId,
+    span_id: sourceRef.spanId,
+    trace_id: sourceRef.traceId,
+    raw_kind: sourceRef.rawKind,
+    path: sourceRef.path,
+    line: sourceRef.line,
+    metadata: sourceRef.metadata
+  });
+}
+function fromNormalizedTraceSourceRefWire(sourceRef) {
+  return {
+    eventId: sourceRef.event_id,
+    messageId: sourceRef.message_id,
+    spanId: sourceRef.span_id,
+    traceId: sourceRef.trace_id,
+    rawKind: sourceRef.raw_kind,
+    path: sourceRef.path,
+    line: sourceRef.line,
+    metadata: sourceRef.metadata
+  };
+}
+function toNormalizedRawEvidenceWire(evidence) {
+  return omitUndefinedProperties({
+    kind: evidence.kind,
+    ref: evidence.ref,
+    media_type: evidence.mediaType,
+    content: evidence.content,
+    redacted: evidence.redacted,
+    metadata: evidence.metadata
+  });
+}
+function fromNormalizedRawEvidenceWire(evidence) {
+  return {
+    kind: evidence.kind,
+    ref: evidence.ref,
+    mediaType: evidence.media_type,
+    content: evidence.content,
+    redacted: evidence.redacted,
+    metadata: evidence.metadata
+  };
+}
 function computeTraceSummary(messages) {
   const toolCallCounts = {};
   const toolDurations = {};
@@ -21315,6 +21798,82 @@ function computeTraceSummary(messages) {
     endTime: latestEnd?.toISOString()
   };
 }
+function getSelectedTrajectoryEvents(trajectory) {
+  if (!trajectory.branch?.includedEventIds || trajectory.branch.includedEventIds.length === 0) {
+    return trajectory.events;
+  }
+  const includedIds = new Set(trajectory.branch.includedEventIds);
+  return trajectory.events.filter((event) => includedIds.has(event.eventId));
+}
+function computeTraceSummaryFromTrajectory(trajectory) {
+  const selectedEvents = getSelectedTrajectoryEvents(trajectory);
+  const hasModelTurnEvents = selectedEvents.some((event) => event.type === "model_turn");
+  const toolCallCounts = {};
+  const toolDurations = {};
+  let totalToolCalls = 0;
+  let errorCount = 0;
+  let llmCallCount = 0;
+  let earliestStart;
+  let latestEnd;
+  let hasAnyDuration = false;
+  for (const event of selectedEvents) {
+    if (event.type === "model_turn" || !hasModelTurnEvents && event.type === "message" && event.message?.role === "assistant") {
+      llmCallCount++;
+    }
+    const eventStart = parseTimestamp(event.timestamp);
+    if (eventStart && (!earliestStart || eventStart < earliestStart)) {
+      earliestStart = eventStart;
+    }
+    const eventEnd = deriveEventEnd(eventStart, event.durationMs);
+    if (eventEnd && (!latestEnd || eventEnd > latestEnd)) {
+      latestEnd = eventEnd;
+    }
+    if (event.type !== "tool_call" || !event.tool) {
+      continue;
+    }
+    toolCallCounts[event.tool.name] = (toolCallCounts[event.tool.name] ?? 0) + 1;
+    totalToolCalls++;
+    if (isErrorToolEvent(event)) {
+      errorCount++;
+    }
+    if (event.durationMs !== void 0) {
+      hasAnyDuration = true;
+      if (!toolDurations[event.tool.name]) {
+        toolDurations[event.tool.name] = [];
+      }
+      toolDurations[event.tool.name].push(event.durationMs);
+    }
+  }
+  return {
+    trace: {
+      eventCount: totalToolCalls,
+      toolCalls: toolCallCounts,
+      errorCount,
+      llmCallCount,
+      ...hasAnyDuration ? { toolDurations } : {}
+    },
+    tokenUsage: trajectory.tokenUsage,
+    costUsd: trajectory.costUsd,
+    durationMs: trajectory.durationMs,
+    startTime: trajectory.startedAt ?? earliestStart?.toISOString(),
+    endTime: trajectory.endedAt ?? latestEnd?.toISOString()
+  };
+}
+function parseTimestamp(timestamp) {
+  if (!timestamp) return void 0;
+  const value = new Date(timestamp);
+  return Number.isNaN(value.getTime()) ? void 0 : value;
+}
+function deriveEventEnd(start, durationMs) {
+  if (!start) return void 0;
+  if (durationMs === void 0) return start;
+  return new Date(start.getTime() + durationMs);
+}
+function isErrorToolEvent(event) {
+  return Boolean(
+    event.tool?.error || event.tool?.status === "error" || event.tool?.status === "timeout" || event.tool?.status === "cancelled"
+  );
+}
 var DEFAULT_EXPLORATION_TOOLS = [
   "read",
   "grep",
@@ -22099,6 +22658,30 @@ var SkillTriggerGrader = class {
     };
   }
 };
+function stringifyPretty2(value) {
+  return value === void 0 ? "" : JSON.stringify(value, null, 2);
+}
+function stringifyCompact2(value) {
+  return value === void 0 ? "" : JSON.stringify(value);
+}
+function buildTemplateVariables2(input) {
+  const formattedQuestion = input.promptInputs.question && input.promptInputs.question.trim().length > 0 ? input.promptInputs.question : input.evalCase.question;
+  return {
+    [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
+    [TEMPLATE_VARIABLES.OUTPUT]: input.candidate.trim(),
+    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (input.evalCase.reference_answer ?? "").trim(),
+    [TEMPLATE_VARIABLES.CRITERIA]: input.evalCase.criteria.trim(),
+    [TEMPLATE_VARIABLES.METADATA]: stringifyPretty2(input.evalCase.metadata),
+    [TEMPLATE_VARIABLES.METADATA_JSON]: stringifyCompact2(input.evalCase.metadata),
+    [TEMPLATE_VARIABLES.RUBRICS]: stringifyPretty2(input.rubrics),
+    [TEMPLATE_VARIABLES.RUBRICS_JSON]: stringifyCompact2(input.rubrics),
+    [TEMPLATE_VARIABLES.FILE_CHANGES]: input.fileChanges ?? "",
+    [TEMPLATE_VARIABLES.TOOL_CALLS]: input.toolCalls ?? "",
+    [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
+    [TEMPLATE_VARIABLES.OUTPUT_TEXT]: input.candidate.trim(),
+    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (input.evalCase.reference_answer ?? "").trim()
+  };
+}
 function assembleLlmGraderPrompt(input) {
   const {
     evalCase,
@@ -22111,6 +22694,17 @@ function assembleLlmGraderPrompt(input) {
   } = input;
   const rubrics = evaluatorConfig?.rubrics;
   if (rubrics && rubrics.length > 0) {
+    if (graderTemplateOverride) {
+      return assembleCustom(
+        evalCase,
+        candidate,
+        promptInputs,
+        rubrics,
+        fileChanges,
+        toolCalls,
+        graderTemplateOverride
+      );
+    }
     const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
     if (hasScoreRanges) {
       return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
@@ -22127,19 +22721,13 @@ function assembleLlmGraderPrompt(input) {
   );
 }
 function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, toolCalls, graderTemplateOverride) {
-  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
-  const variables = {
-    [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
-    [TEMPLATE_VARIABLES.OUTPUT]: candidate.trim(),
-    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
-    [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
-    [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
-    [TEMPLATE_VARIABLES.TOOL_CALLS]: toolCalls ?? "",
-    // Deprecated aliases
-    [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
-    [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
-    [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
-  };
+  const variables = buildTemplateVariables2({
+    evalCase,
+    candidate,
+    promptInputs,
+    fileChanges,
+    toolCalls
+  });
   const systemPrompt = buildOutputSchema();
   const template = graderTemplateOverride ?? DEFAULT_GRADER_TEMPLATE;
   let userPrompt = substituteVariables(template, variables);
@@ -22162,6 +22750,27 @@ ${toolCalls}`;
     mode: "freeform"
   };
 }
+function assembleCustom(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls, graderTemplateOverride) {
+  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
+  const systemPrompt = hasScoreRanges ? buildScoreRangeOutputSchema() : buildRubricOutputSchema();
+  const userPrompt = substituteVariables(
+    graderTemplateOverride,
+    buildTemplateVariables2({
+      evalCase,
+      candidate,
+      promptInputs,
+      rubrics,
+      fileChanges,
+      toolCalls
+    })
+  );
+  return {
+    systemPrompt,
+    userPrompt,
+    responseSchema: systemPrompt,
+    mode: hasScoreRanges ? "score_range" : "checklist"
+  };
+}
 function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
   const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
   const parts = [
@@ -22185,10 +22794,19 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
     parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
   }
   parts.push("[[ ## rubrics ## ]]");
+  const operatorGuidance = formatRubricOperatorGuidance(rubrics);
+  if (operatorGuidance.length > 0) {
+    parts.push("", "Operator guidance:");
+    for (const guidance of operatorGuidance) {
+      parts.push(`- ${guidance}`);
+    }
+    parts.push("");
+  }
   for (const rubric of rubrics) {
     const requiredLabel = rubric.required ? " (REQUIRED)" : "";
     const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
-    parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
+    const operatorLabel = formatRubricOperatorLabel(rubric.operator);
+    parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}${operatorLabel}: ${rubric.outcome}`);
   }
   parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
   const systemPrompt = buildRubricOutputSchema();
@@ -22228,6 +22846,9 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
     const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
     const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
     parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
+    if (rubric.operator) {
+      parts.push(`Operator: ${rubric.operator}`);
+    }
     if (rubric.outcome) {
       parts.push(`Description: ${rubric.outcome}`);
     }
@@ -22240,6 +22861,10 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
       }
     }
   }
+  const operatorGuidance = formatRubricOperatorGuidance(rubrics);
+  if (operatorGuidance.length > 0) {
+    parts.push("", ...operatorGuidance);
+  }
   parts.push(
     "",
     "For each criterion, provide an integer score 0-10 that matches one of its defined score ranges."
@@ -23409,10 +24034,10 @@ var ClaudeCliProvider = class {
   }
   resolveCwd(cwdOverride) {
     if (cwdOverride) {
-      return path5.resolve(cwdOverride);
+      return path52.resolve(cwdOverride);
     }
     if (this.config.cwd) {
-      return path5.resolve(this.config.cwd);
+      return path52.resolve(this.config.cwd);
     }
     return void 0;
   }
@@ -23422,9 +24047,9 @@ var ClaudeCliProvider = class {
       return void 0;
     }
     if (this.config.logDir) {
-      return path5.resolve(this.config.logDir);
+      return path52.resolve(this.config.logDir);
     }
-    return path5.join(process.cwd(), ".agentv", "logs", "claude-cli");
+    return path52.join(process.cwd(), ".agentv", "logs", "claude-cli");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -23432,13 +24057,13 @@ var ClaudeCliProvider = class {
       return void 0;
     }
     try {
-      await mkdir(logDir, { recursive: true });
+      await mkdir2(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = path5.join(logDir, buildLogFilename(request, this.targetName));
+    const filePath = path52.join(logDir, buildLogFilename(request, this.targetName));
     try {
       const logger = await ClaudeCliStreamLogger.create({
         filePath,
@@ -23921,7 +24546,7 @@ var ClaudeSdkProvider = class {
       return void 0;
     }
     try {
-      await mkdir2(logDir, { recursive: true });
+      await mkdir3(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
@@ -24746,6 +25371,9 @@ var CodexProvider = class {
     const startMs = Date.now();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     const codexOptions = {};
+    if (this.config.executable) {
+      codexOptions.codexPathOverride = this.config.executable;
+    }
     if (this.config.model) {
       codexOptions.config = { model: this.config.model };
     }
@@ -24757,6 +25385,9 @@ var CodexProvider = class {
     if (cwd) {
       threadOptions.workingDirectory = cwd;
     }
+    if (this.config.modelReasoningEffort) {
+      threadOptions.modelReasoningEffort = this.config.modelReasoningEffort;
+    }
     const thread = codex.startThread(threadOptions);
     const inputFiles = normalizeInputFiles(request.inputFiles);
     const basePrompt = buildPromptDocument(request, inputFiles);
@@ -24904,7 +25535,7 @@ ${basePrompt}` : basePrompt;
   }
   resolveLogDirectory() {
     const disabled = isCodexLogStreamingDisabled();
-    if (disabled) {
+    if (disabled || this.config.streamLog === false) {
       return void 0;
     }
     if (this.config.logDir) {
@@ -24918,7 +25549,7 @@ ${basePrompt}` : basePrompt;
       return void 0;
     }
     try {
-      await mkdir3(logDir, { recursive: true });
+      await mkdir4(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
@@ -24931,7 +25562,7 @@ ${basePrompt}` : basePrompt;
         targetName: this.targetName,
         evalCaseId: request.evalCaseId,
         attempt: request.attempt,
-        format: this.config.logFormat ?? "summary"
+        format: this.config.streamLog === "raw" ? "json" : "summary"
       });
       recordCodexLogEntry({
         filePath,
@@ -25136,7 +25767,7 @@ async function walkDir(rootDir, currentDir, snapshot) {
       if (fileStat.size > SNAPSHOT_MAX_FILE_BYTES) continue;
       let content;
       try {
-        content = await readFile22(fullPath, "utf8");
+        content = await readFile32(fullPath, "utf8");
         if (content.includes("\0")) continue;
       } catch {
         continue;
@@ -25220,7 +25851,7 @@ function subscribeToCopilotCliLogEntries(listener) {
   };
 }
 function resolvePlatformCliPath() {
-  const os3 = platform();
+  const os22 = platform();
   const cpu = arch();
   const platformMap = {
     linux: "linux",
@@ -25231,13 +25862,13 @@ function resolvePlatformCliPath() {
     x64: "x64",
     arm64: "arm64"
   };
-  const osPart = platformMap[os3];
+  const osPart = platformMap[os22];
   const archPart = archMap[cpu];
   if (!osPart || !archPart) {
     return void 0;
   }
   const packageName = `@github/copilot-${osPart}-${archPart}`;
-  const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
+  const binaryName = os22 === "win32" ? "copilot.exe" : "copilot";
   try {
     const resolved = import.meta.resolve(`${packageName}/package.json`);
     const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
@@ -25305,9 +25936,9 @@ function resolvePlatformCliPath() {
 }
 function globalNpmRoots() {
   const roots = [];
-  const os3 = platform();
+  const os22 = platform();
   const home = homedir3();
-  if (os3 === "win32") {
+  if (os22 === "win32") {
     if (process.env.APPDATA) {
       roots.push(path10.join(process.env.APPDATA, "npm", "node_modules"));
     }
@@ -25322,7 +25953,7 @@ function globalNpmRoots() {
   if (process.env.npm_config_prefix) {
     const prefix = process.env.npm_config_prefix;
     roots.push(
-      os3 === "win32" ? path10.join(prefix, "node_modules") : path10.join(prefix, "lib", "node_modules")
+      os22 === "win32" ? path10.join(prefix, "node_modules") : path10.join(prefix, "lib", "node_modules")
     );
   }
   return Array.from(new Set(roots));
@@ -25741,7 +26372,7 @@ var CopilotCliProvider = class {
       return void 0;
     }
     try {
-      await mkdir4(logDir, { recursive: true });
+      await mkdir5(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
@@ -25992,7 +26623,7 @@ async function discoverCopilotSessions(opts) {
     const workspacePath = path12.join(sessionDir, "workspace.yaml");
     const eventsPath = path12.join(sessionDir, "events.jsonl");
     try {
-      const workspaceContent = await readFile32(workspacePath, "utf8");
+      const workspaceContent = await readFile4(workspacePath, "utf8");
       const workspace = parseYamlValue(workspaceContent) ?? {};
       const cwd = String(workspace.cwd ?? "");
       let updatedAt;
@@ -26052,7 +26683,7 @@ var CopilotLogProvider = class {
     const eventsPath = path13.join(sessionDir, "events.jsonl");
     let eventsContent;
     try {
-      eventsContent = await readFile4(eventsPath, "utf8");
+      eventsContent = await readFile5(eventsPath, "utf8");
     } catch (err) {
       throw new Error(
         `Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
@@ -26429,7 +27060,7 @@ var CopilotSdkProvider = class {
       return void 0;
     }
     try {
-      await mkdir5(logDir, { recursive: true });
+      await mkdir6(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
@@ -26746,7 +27377,7 @@ var PiCliProvider = class {
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
       const promptFile = path15.join(cwd, PROMPT_FILENAME);
-      await writeFile2(promptFile, request.question, "utf8");
+      await writeFile3(promptFile, request.question, "utf8");
       const args = this.buildPiArgs(request.question, inputFiles);
       const result = await this.executePi(args, cwd, request.signal, logger);
       if (result.timedOut) {
@@ -26937,7 +27568,7 @@ ${prompt}` : prompt;
       return void 0;
     }
     try {
-      await mkdir6(logDir, { recursive: true });
+      await mkdir7(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
@@ -27928,7 +28559,7 @@ ${fileList}`;
       return void 0;
     }
     try {
-      await mkdir7(logDir, { recursive: true });
+      await mkdir8(logDir, { recursive: true });
     } catch (error40) {
       const message = error40 instanceof Error ? error40.message : String(error40);
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
@@ -28152,7 +28783,7 @@ async function pathExists(target) {
   }
 }
 async function ensureDir(target) {
-  await mkdir8(target, { recursive: true });
+  await mkdir9(target, { recursive: true });
 }
 async function readDirEntries(target) {
   const entries = await readdir3(target, { withFileTypes: true });
@@ -28304,7 +28935,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
   const maxAttempts = 10;
   while (attempts < maxAttempts) {
     try {
-      const content = await readFile5(responseFileFinal, { encoding: "utf8" });
+      const content = await readFile6(responseFileFinal, { encoding: "utf8" });
       if (!silent) {
         process.stdout.write(`${content}
 `);
@@ -28361,7 +28992,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
     const maxAttempts = 10;
     while (attempts < maxAttempts) {
       try {
-        const content = await readFile5(file2, { encoding: "utf8" });
+        const content = await readFile6(file2, { encoding: "utf8" });
         if (!silent) {
           process.stdout.write(`${content}
 `);
@@ -28454,9 +29085,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
   const aliveFile = path222.join(subagentDir, DEFAULT_ALIVE_FILENAME);
   await removeIfExists(aliveFile);
   const githubAgentsDir = path222.join(subagentDir, ".github", "agents");
-  await mkdir9(githubAgentsDir, { recursive: true });
+  await mkdir10(githubAgentsDir, { recursive: true });
   const wakeupDst = path222.join(githubAgentsDir, "wakeup.md");
-  await writeFile3(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
+  await writeFile4(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
   const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
     label: "open-workspace"
   });
@@ -28485,9 +29116,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
 async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
   const workspacePath = path222.join(subagentDir, `${path222.basename(subagentDir)}.code-workspace`);
   const messagesDir = path222.join(subagentDir, "messages");
-  await mkdir9(messagesDir, { recursive: true });
+  await mkdir10(messagesDir, { recursive: true });
   const reqFile = path222.join(messagesDir, `${timestamp}_req.md`);
-  await writeFile3(reqFile, requestInstructions, { encoding: "utf8" });
+  await writeFile4(reqFile, requestInstructions, { encoding: "utf8" });
   const reqUri = pathToFileUri2(reqFile);
   const chatArgs = ["-r", "chat", "-m", chatId];
   for (const attachment of attachmentPaths) {
@@ -28513,7 +29144,7 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
 async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
   const workspacePath = path222.join(subagentDir, `${path222.basename(subagentDir)}.code-workspace`);
   const messagesDir = path222.join(subagentDir, "messages");
-  await mkdir9(messagesDir, { recursive: true });
+  await mkdir10(messagesDir, { recursive: true });
   const chatArgs = ["-r", "chat", "-m", chatId];
   for (const attachment of attachmentPaths) {
     chatArgs.push("-a", attachment);
@@ -28643,7 +29274,7 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
     if (!stats.isFile()) {
       throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
     }
-    const templateText = await readFile6(workspaceSrc, "utf8");
+    const templateText = await readFile7(workspaceSrc, "utf8");
     workspaceContent = JSON.parse(templateText);
   } else {
     workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
@@ -28662,9 +29293,9 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
       transformedContent = JSON.stringify(parsed, null, 2);
     }
   }
-  await writeFile4(workspaceDst, transformedContent, "utf8");
+  await writeFile5(workspaceDst, transformedContent, "utf8");
   const messagesDir = path24.join(subagentDir, "messages");
-  await mkdir10(messagesDir, { recursive: true });
+  await mkdir11(messagesDir, { recursive: true });
   return { workspace: workspaceDst, messagesDir };
 }
 async function createSubagentLock(subagentDir) {
@@ -28687,7 +29318,7 @@ async function createSubagentLock(subagentDir) {
     );
   }
   const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
-  await writeFile4(lockFile, "", { encoding: "utf8" });
+  await writeFile5(lockFile, "", { encoding: "utf8" });
   return lockFile;
 }
 async function removeSubagentLock(subagentDir) {
@@ -28712,7 +29343,7 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
   }
   if (promptFile) {
     const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
-    await mkdir10(githubAgentsDir, { recursive: true });
+    await mkdir11(githubAgentsDir, { recursive: true });
     const agentFile = path24.join(githubAgentsDir, `${chatId}.md`);
     try {
       await copyFile(promptFile, agentFile);
@@ -28971,7 +29602,7 @@ async function dispatchBatchAgent(options) {
           const reqFile = requestFiles[index];
           const tmpFile = responseTmpFiles[index];
           const finalFile = responseFilesFinal[index];
-          return writeFile5(
+          return writeFile6(
             reqFile,
             createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
             { encoding: "utf8" }
@@ -28983,7 +29614,7 @@ async function dispatchBatchAgent(options) {
         responseFilesFinal,
         orchestratorTemplateContent
       );
-      await writeFile5(orchestratorFile, orchestratorContent, { encoding: "utf8" });
+      await writeFile6(orchestratorFile, orchestratorContent, { encoding: "utf8" });
     }
     const chatAttachments = [orchestratorFile, ...attachments];
     const orchestratorUri = pathToFileUri2(orchestratorFile);
@@ -29126,8 +29757,8 @@ async function provisionSubagents(options) {
       if (!dryRun) {
         await removeIfExists(lockFile);
         await ensureDir(githubAgentsDir);
-        await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
-        await writeFile6(wakeupDst, wakeupContent, "utf8");
+        await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
+        await writeFile7(wakeupDst, wakeupContent, "utf8");
       }
       created.push(subagentDir);
       lockedSubagents.delete(subagentDir);
@@ -29137,8 +29768,8 @@ async function provisionSubagents(options) {
     if (!isLocked && force) {
       if (!dryRun) {
         await ensureDir(githubAgentsDir);
-        await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
-        await writeFile6(wakeupDst, wakeupContent, "utf8");
+        await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
+        await writeFile7(wakeupDst, wakeupContent, "utf8");
       }
       created.push(subagentDir);
       subagentsProvisioned += 1;
@@ -29146,8 +29777,8 @@ async function provisionSubagents(options) {
     }
     if (!dryRun && !await pathExists(workspaceDst)) {
       await ensureDir(githubAgentsDir);
-      await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
-      await writeFile6(wakeupDst, wakeupContent, "utf8");
+      await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
+      await writeFile7(wakeupDst, wakeupContent, "utf8");
     }
     skippedExisting.push(subagentDir);
     subagentsProvisioned += 1;
@@ -29162,8 +29793,8 @@ async function provisionSubagents(options) {
     if (!dryRun) {
       await ensureDir(subagentDir);
       await ensureDir(githubAgentsDir);
-      await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
-      await writeFile6(wakeupDst, wakeupContent, "utf8");
+      await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
+      await writeFile7(wakeupDst, wakeupContent, "utf8");
     }
     created.push(subagentDir);
     subagentsProvisioned += 1;
@@ -29523,7 +30154,7 @@ async function readTargetDefinitions(filePath) {
   if (!await fileExists2(absolutePath)) {
     throw new Error(`targets.yaml not found at ${absolutePath}`);
   }
-  const raw = await readFile7(absolutePath, "utf8");
+  const raw = await readFile8(absolutePath, "utf8");
   const parsed = parseYamlValue(raw);
   if (!isRecord(parsed)) {
     throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -29701,6 +30332,7 @@ async function executePromptTemplate(script, context, config2, timeoutMs) {
     output: context.output ?? null,
     inputFiles: context.evalCase.file_paths,
     input: context.evalCase.input,
+    metadata: context.evalCase.metadata ?? null,
     trace: context.trace ?? null,
     fileChanges: context.fileChanges ?? null,
     workspacePath: context.workspacePath ?? null,
@@ -30236,7 +30868,7 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
   return path33.join(root, evalRunId, caseId);
 }
 async function copyDirectoryRecursive(src, dest) {
-  await mkdir12(dest, { recursive: true });
+  await mkdir13(dest, { recursive: true });
   const entries = await readdir5(src, { withFileTypes: true });
   for (const entry of entries) {
     const srcPath = path33.join(src, entry.name);
@@ -30357,7 +30989,7 @@ function computeWorkspaceFingerprint(repos) {
   return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
 }
 async function copyDirectoryRecursive2(src, dest, skipDirs) {
-  await mkdir13(dest, { recursive: true });
+  await mkdir14(dest, { recursive: true });
   const entries = await readdir6(src, { withFileTypes: true });
   for (const entry of entries) {
     const srcPath = path34.join(src, entry.name);
@@ -30395,7 +31027,7 @@ var WorkspacePoolManager = class {
     const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
     const fingerprint = computeWorkspaceFingerprint(repos);
     const poolDir = path34.join(this.poolRoot, fingerprint);
-    await mkdir13(poolDir, { recursive: true });
+    await mkdir14(poolDir, { recursive: true });
     const drifted = await this.checkDrift(poolDir, fingerprint);
     if (drifted) {
       console.warn(
@@ -30422,7 +31054,7 @@ var WorkspacePoolManager = class {
           poolDir
         };
       }
-      await mkdir13(slotPath, { recursive: true });
+      await mkdir14(slotPath, { recursive: true });
       if (templatePath) {
         await copyDirectoryRecursive2(templatePath, slotPath);
       }
@@ -30459,14 +31091,14 @@ var WorkspacePoolManager = class {
   async tryLock(lockPath) {
     for (let attempt = 0; attempt < 3; attempt++) {
       try {
-        await writeFile7(lockPath, String(process.pid), { flag: "wx" });
+        await writeFile8(lockPath, String(process.pid), { flag: "wx" });
         return true;
       } catch (err) {
         if (err.code !== "EEXIST") {
           throw err;
         }
         try {
-          const pidStr = await readFile8(lockPath, "utf-8");
+          const pidStr = await readFile9(lockPath, "utf-8");
           const pid = Number.parseInt(pidStr.trim(), 10);
           if (!Number.isNaN(pid)) {
             try {
@@ -30493,7 +31125,7 @@ var WorkspacePoolManager = class {
   async checkDrift(poolDir, fingerprint) {
     const metadataPath = path34.join(poolDir, "metadata.json");
     try {
-      const raw = await readFile8(metadataPath, "utf-8");
+      const raw = await readFile9(metadataPath, "utf-8");
       const metadata = JSON.parse(raw);
       return metadata.fingerprint !== fingerprint;
     } catch {
@@ -30508,7 +31140,7 @@ var WorkspacePoolManager = class {
       repos,
       createdAt: (/* @__PURE__ */ new Date()).toISOString()
     };
-    await writeFile7(path34.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
+    await writeFile8(path34.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
   }
   /** Remove all slot directories and their lock files from a pool directory. */
   async removeAllSlots(poolDir) {
@@ -30518,7 +31150,7 @@ var WorkspacePoolManager = class {
         const lockPath = path34.join(poolDir, `${entry}.lock`);
         if (existsSync3(lockPath)) {
           try {
-            const pidStr = await readFile8(lockPath, "utf-8");
+            const pidStr = await readFile9(lockPath, "utf-8");
             const pid = Number.parseInt(pidStr.trim(), 10);
             if (!Number.isNaN(pid)) {
               try {
@@ -30936,7 +31568,7 @@ function isAgentSkillsFormat(parsed) {
   return Array.isArray(obj.evals);
 }
 async function loadTestsFromAgentSkills(filePath) {
-  const raw = await readFile9(filePath, "utf8");
+  const raw = await readFile10(filePath, "utf8");
   let parsed;
   try {
     parsed = JSON.parse(raw);
@@ -31105,20 +31737,22 @@ var DEFAULT_EVAL_PATTERNS = [
 ];
 async function loadConfig(evalFilePath, repoRoot) {
   const directories = buildDirectoryChain2(evalFilePath, repoRoot);
+  const globalConfigPath = path39.join(getAgentvConfigDir(), "config.yaml");
   for (const directory of directories) {
     const configPath = path39.join(directory, ".agentv", "config.yaml");
     if (!await fileExists3(configPath)) {
       continue;
     }
     const config2 = await readConfigFile(configPath);
-    if (config2) return config2;
+    if (config2) {
+      return config2;
+    }
   }
-  const globalConfigPath = path39.join(getAgentvConfigDir(), "config.yaml");
   return await fileExists3(globalConfigPath) ? readConfigFile(globalConfigPath) : null;
 }
 async function readConfigFile(configPath) {
   try {
-    const rawConfig = await readFile10(configPath, "utf8");
+    const rawConfig = await readFile11(configPath, "utf8");
     const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
     if (!isJsonObject(parsed)) {
       logWarning(`Invalid config.yaml format at ${configPath}`);
@@ -31331,7 +31965,10 @@ function extractCacheConfig(suite) {
     logWarning(`Invalid execution.cache: ${cache}. Must be a boolean. Ignoring.`);
     return void 0;
   }
-  const cachePath = executionObj.cache_path ?? executionObj.cachePath;
+  if (executionObj.cachePath !== void 0) {
+    logWarning("Invalid execution.cachePath: use snake_case execution.cache_path in YAML.");
+  }
+  const cachePath = executionObj.cache_path;
   const resolvedCachePath = typeof cachePath === "string" && cachePath.trim().length > 0 ? cachePath.trim() : void 0;
   return { enabled: cache, cachePath: resolvedCachePath };
 }
@@ -31500,6 +32137,12 @@ function parseResultsConfig(raw, configPath) {
     ...branchPrefix && { branch_prefix: branchPrefix }
   };
 }
+function resolveResultsConfigForProject(config2, _projectId) {
+  if (!config2) {
+    return void 0;
+  }
+  return config2.results;
+}
 function parseHooksConfig(raw, configPath) {
   if (raw === void 0 || raw === null) {
     return void 0;
@@ -31525,7 +32168,7 @@ function logWarning(message) {
 var ANSI_YELLOW3 = "\x1B[33m";
 var ANSI_RESET4 = "\x1B[0m";
 async function validateCustomPromptContent(promptPath) {
-  const content = await readFile11(promptPath, "utf8");
+  const content = await readFile12(promptPath, "utf8");
   validateTemplateVariables(content, promptPath);
 }
 function validateTemplateVariables(content, source) {
@@ -31655,7 +32298,7 @@ ${resolved.attempted.map((attempt) => `  Tried: ${attempt}`).join("\n")}` : "";
     const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
     throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
   }
-  const content = await readFile12(resolved.resolvedPath, "utf8");
+  const content = await readFile13(resolved.resolvedPath, "utf8");
   const parsed = interpolateEnv(parseYamlValue(content), process.env);
   if (!isJsonObject2(parsed)) {
     throw new Error(
@@ -31702,6 +32345,103 @@ async function expandGraderEntries(candidateEvaluators, searchRoots, evalId, inc
   }
   return expanded;
 }
+async function collectAssertionTemplateSourceReferences(rawEvalCase, globalExecution, searchRoots, evalId) {
+  const execution = rawEvalCase.execution;
+  const executionObject = isJsonObject2(execution) ? execution : void 0;
+  const caseEvaluators = rawEvalCase.assertions ?? rawEvalCase.assert ?? (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators;
+  const skipDefaults = executionObject?.skip_defaults === true;
+  const rootEvaluators = skipDefaults ? void 0 : globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators;
+  return [
+    ...await collectAssertionTemplateReferencesFromValue(caseEvaluators, searchRoots, evalId),
+    ...await collectAssertionTemplateReferencesFromValue(rootEvaluators, searchRoots, evalId)
+  ];
+}
+async function collectAssertionTemplateReferencesFromValue(value, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
+  if (value === void 0) {
+    return [];
+  }
+  const references = [];
+  if (Array.isArray(value)) {
+    for (const item of value) {
+      if (isIncludeEntry(item)) {
+        const nextDepth = includeContext.depth + 1;
+        if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
+          const chain = [...includeContext.chain, item.include].join(" -> ");
+          throw new Error(
+            `Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
+          );
+        }
+        const resolved = await resolveAssertionTemplateReference(item.include, searchRoots);
+        references.push({
+          kind: "assertion_template",
+          displayPath: resolved.displayPath,
+          ...resolved.resolvedPath ? { resolvedPath: path40.resolve(resolved.resolvedPath) } : {}
+        });
+        if (resolved.resolvedPath) {
+          if (includeContext.chain.includes(resolved.resolvedPath)) {
+            const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
+            throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
+          }
+          const content = await readFile13(resolved.resolvedPath, "utf8");
+          const parsed = interpolateEnv(parseYamlValue(content), process.env);
+          if (isJsonObject2(parsed) && Array.isArray(parsed.assertions)) {
+            const templateDir = path40.dirname(resolved.resolvedPath);
+            const nestedSearchRoots = [
+              templateDir,
+              ...searchRoots.filter((root) => path40.resolve(root) !== templateDir)
+            ];
+            references.push(
+              ...await collectAssertionTemplateReferencesFromValue(
+                parsed.assertions,
+                nestedSearchRoots,
+                evalId,
+                {
+                  depth: nextDepth,
+                  chain: [...includeContext.chain, resolved.resolvedPath]
+                }
+              )
+            );
+          }
+        }
+        continue;
+      }
+      if (isJsonObject2(item)) {
+        references.push(
+          ...await collectAssertionTemplateReferencesFromObject(
+            item,
+            searchRoots,
+            evalId,
+            includeContext
+          )
+        );
+      }
+    }
+  } else if (isJsonObject2(value)) {
+    references.push(
+      ...await collectAssertionTemplateReferencesFromObject(
+        value,
+        searchRoots,
+        evalId,
+        includeContext
+      )
+    );
+  }
+  return references;
+}
+async function collectAssertionTemplateReferencesFromObject(value, searchRoots, evalId, includeContext) {
+  const references = [];
+  for (const key of ["assertions", "assert", "evaluators"]) {
+    references.push(
+      ...await collectAssertionTemplateReferencesFromValue(
+        value[key],
+        searchRoots,
+        evalId,
+        includeContext
+      )
+    );
+  }
+  return references;
+}
 async function parseGraderList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
   const expandedEvaluators = await expandGraderEntries(candidateEvaluators, searchRoots, evalId);
   if (!expandedEvaluators) {
@@ -31828,6 +32568,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
+      const resolvedScriptPath = await resolveOptionalCommandSource(command, searchRoots);
       const cwd = asString(rawEvaluator.cwd);
       let resolvedCwd;
       if (cwd) {
@@ -31893,6 +32634,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
         name,
         type: "code-grader",
         command,
+        ...resolvedScriptPath ? { resolvedScriptPath } : {},
         cwd,
         resolvedCwd,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
@@ -32960,6 +33702,17 @@ function asStringArray(value, description) {
   }
   return result;
 }
+async function resolveOptionalCommandSource(command, searchRoots) {
+  const candidate = command.at(-1);
+  if (!candidate || !looksLikeFilePath(candidate)) {
+    return void 0;
+  }
+  const resolved = await resolveFileReference3(candidate, searchRoots);
+  return resolved.resolvedPath ? path40.resolve(resolved.resolvedPath) : void 0;
+}
+function looksLikeFilePath(value) {
+  return path40.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
+}
 function parseCommandToArgv(command) {
   if (process.platform === "win32") {
     return ["cmd.exe", "/c", command];
@@ -33028,6 +33781,19 @@ var VALID_FIELD_AGGREGATION_TYPES = /* @__PURE__ */ new Set(["weighted_average",
 function isValidFieldAggregationType(value) {
   return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
 }
+var VALID_RUBRIC_OPERATORS = new Set(RUBRIC_OPERATOR_VALUES);
+function parseRubricOperator(value, rubricId, evaluatorName, evalId) {
+  if (value === void 0) {
+    return void 0;
+  }
+  if (typeof value === "string" && VALID_RUBRIC_OPERATORS.has(value)) {
+    return value;
+  }
+  logWarning2(
+    `Ignoring invalid operator for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': must be one of ${RUBRIC_OPERATOR_VALUES.join(", ")}`
+  );
+  return void 0;
+}
 function parseRubricItems(rawRubrics, evaluatorName, evalId) {
   const items = [];
   for (const [index, rawRubric] of rawRubrics.entries()) {
@@ -33038,7 +33804,8 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
       continue;
     }
     const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
-    const expectedOutcome = asString(rawRubric.outcome) ?? "";
+    const expectedOutcome = asString(rawRubric.outcome) ?? asString(rawRubric.criteria) ?? "";
+    const operator = parseRubricOperator(rawRubric.operator, id, evaluatorName, evalId);
     const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
     let minScore;
     let requiredMinScore;
@@ -33082,6 +33849,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
         id,
         weight,
         ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
+        ...operator !== void 0 ? { operator } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
         ...minScore !== void 0 ? { min_score: minScore } : {},
         ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
@@ -33097,6 +33865,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
       items.push({
         id,
         outcome: expectedOutcome,
+        ...operator !== void 0 ? { operator } : {},
         weight,
         // Default to required: true if not specified (backward compatibility)
         required: required2 ?? true,
@@ -33219,6 +33988,8 @@ function parseInlineRubrics(rawRubrics) {
       };
     }
     const expectedOutcome = asString(rubric.outcome) ?? "";
+    const id = asString(rubric.id) ?? `rubric-${index + 1}`;
+    const operator = parseRubricOperator(rubric.operator, id, "rubrics", "<inline>");
     const rawScoreRanges = rubric.score_ranges;
     const normalizedScoreRanges = rawScoreRanges !== void 0 ? normalizeScoreRangesShorthand(rawScoreRanges) : void 0;
     const scoreRanges = Array.isArray(normalizedScoreRanges) && normalizedScoreRanges.length > 0 ? normalizedScoreRanges.filter((r) => isJsonObject2(r)).map((range) => ({
@@ -33226,7 +33997,8 @@ function parseInlineRubrics(rawRubrics) {
       outcome: asString(range.outcome) ?? ""
     })).filter((r) => r.outcome.length > 0) : void 0;
     const baseRubric = {
-      id: asString(rubric.id) ?? `rubric-${index + 1}`,
+      id,
+      ...operator !== void 0 ? { operator } : {},
       weight: typeof rubric.weight === "number" ? rubric.weight : 1
     };
     let inlineMinScore;
@@ -33386,7 +34158,7 @@ async function processMessages(options) {
           continue;
         }
         try {
-          const fileContent = (await readFile13(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+          const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
           processedContent.push({
             ...cloneJsonObject(rawSegment),
             path: displayPath,
@@ -33427,7 +34199,7 @@ async function processMessages(options) {
           continue;
         }
         try {
-          const imageBuffer = await readFile13(resolvedPath);
+          const imageBuffer = await readFile14(resolvedPath);
           const base643 = imageBuffer.toString("base64");
           processedContent.push({
             type: "image",
@@ -33510,7 +34282,7 @@ async function processExpectedMessages(options) {
             continue;
           }
           try {
-            const fileContent = (await readFile13(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
+            const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
             processedContent.push({
               type: "file",
               path: displayPath,
@@ -33550,7 +34322,7 @@ async function processExpectedMessages(options) {
             continue;
           }
           try {
-            const imageBuffer = await readFile13(resolvedPath);
+            const imageBuffer = await readFile14(resolvedPath);
             const base643 = imageBuffer.toString("base64");
             processedContent.push({
               type: "image",
@@ -33590,6 +34362,12 @@ function expandInputShorthand(value) {
   if (typeof value === "string") {
     return [{ role: "user", content: value }];
   }
+  if (isJsonObject(value)) {
+    if ("role" in value) {
+      return isTestMessage(value) ? [value] : void 0;
+    }
+    return [{ role: "user", content: value }];
+  }
   if (Array.isArray(value)) {
     const messages = value.filter((msg) => isTestMessage(msg));
     return messages.length > 0 ? messages : void 0;
@@ -33675,7 +34453,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
     return {};
   }
   try {
-    const content = await readFile14(sidecarPath, "utf8");
+    const content = await readFile15(sidecarPath, "utf8");
     const parsed = interpolateEnv(parseYamlValue(content), process.env);
     if (!isJsonObject(parsed)) {
       logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
@@ -33720,7 +34498,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
-  const rawFile = await readFile14(absoluteTestPath, "utf8");
+  const rawFile = await readFile15(absoluteTestPath, "utf8");
   const rawCases = parseJsonlContent(rawFile, evalFilePath);
   const fallbackSuiteName = path422.basename(absoluteTestPath, ".jsonl") || "eval";
   const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
@@ -34129,7 +34907,7 @@ function interpolateRawEvalCase(raw, vars) {
 async function readTestSuiteMetadata(testFilePath) {
   try {
     const absolutePath = path43.resolve(testFilePath);
-    const content = await readFile15(absolutePath, "utf8");
+    const content = await readFile16(absolutePath, "utf8");
     const parsed = interpolateEnv(parseYamlValue(content), process.env);
     if (!isJsonObject(parsed)) {
       return {};
@@ -34153,7 +34931,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
     return { tests: await loadTestsFromAgentSkills(evalFilePath) };
   }
   if (format === "typescript") {
-    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-Z6IUSDNA-YBOE4JIQ.js");
+    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT-THE7D3GR.js");
     return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
   }
   const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -34188,7 +34966,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
     return loadTestsFromAgentSkills(evalFilePath);
   }
   if (format === "typescript") {
-    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-Z6IUSDNA-YBOE4JIQ.js");
+    const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT-THE7D3GR.js");
     const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
     return suite.tests;
   }
@@ -34203,8 +34981,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   const repoRootPath = resolveToAbsolutePath(repoRoot);
   const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
   const config2 = await loadConfig(absoluteTestPath, repoRootPath);
-  const rawFile = await readFile15(absoluteTestPath, "utf8");
-  const interpolated = interpolateEnv(parseYamlValue(rawFile), process.env);
+  const rawFile = await readFile16(absoluteTestPath, "utf8");
+  const rawParsed = parseYamlValue(rawFile);
+  const rawCaseSnapshots = buildRawInlineTestSnapshots(rawParsed);
+  const interpolated = interpolateEnv(rawParsed, process.env);
   if (!isJsonObject(interpolated)) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
@@ -34241,7 +35021,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
   const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
-  const suiteGovernance = extractSuiteGovernance(suite);
+  const suiteMetadataPayload = extractSuiteMetadataPayload(suite);
   const rawSuiteInput = suite.input;
   const rawSuiteInputFiles = suite.input_files;
   const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
@@ -34343,6 +35123,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       logError3(`Skipping test '${id}': ${message}`);
       continue;
     }
+    const assertionTemplateReferences = await collectAssertionTemplateSourceReferences(
+      renderedCase,
+      globalExecution,
+      searchRoots,
+      id ?? "unknown"
+    );
     const inlineRubrics = renderedCase.rubrics;
     if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
       const rubricEvaluator = parseInlineRubrics(inlineRubrics);
@@ -34355,8 +35141,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
     const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
-    const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
-    const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
+    const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suiteMetadataPayload);
     const caseTargets = extractTargetsFromTestCase(renderedCase);
     const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
       (v) => typeof v === "string"
@@ -34395,12 +35180,245 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       ...onTurnFailure ? { on_turn_failure: onTurnFailure } : {},
       ...windowSize !== void 0 ? { window_size: windowSize } : {},
       ...dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {},
-      ...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}
+      ...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {},
+      source: buildEvalTestSource({
+        evalFilePath,
+        absoluteTestPath,
+        repoRootPath,
+        id,
+        renderedCase,
+        rawCaseSnapshots,
+        inputMessages,
+        evaluators,
+        assertionTemplateReferences
+      })
     };
     results.push(testCase);
   }
   return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
 }
+var SOURCE_SECRET_KEY_PATTERN = /(api[_-]?key|authorization|bearer|credential|password|private[_-]?key|secret|token)/i;
+var REDACTED_SOURCE_VALUE = "[redacted]";
+function buildRawInlineTestSnapshots(rawParsed) {
+  const snapshots = /* @__PURE__ */ new Map();
+  if (!isJsonObject(rawParsed)) {
+    return snapshots;
+  }
+  const rawTests = rawParsed.tests ?? rawParsed.eval_cases ?? rawParsed.evalcases;
+  if (!Array.isArray(rawTests)) {
+    return snapshots;
+  }
+  for (const rawTest of rawTests) {
+    if (!isJsonObject(rawTest) || typeof rawTest.id !== "string") {
+      continue;
+    }
+    snapshots.set(rawTest.id, stringifySourceYaml(rawTest));
+  }
+  return snapshots;
+}
+function buildEvalTestSource(params) {
+  const evalFileRepoPath = toPortableRelativePath(params.repoRootPath, params.absoluteTestPath);
+  const testSnapshotYaml = params.rawCaseSnapshots.get(params.id) ?? stringifySourceYaml(params.renderedCase);
+  const evaluatorReferences = collectGraderSourceReferences(params.evaluators);
+  const inputReferences = collectInputSourceReferences(params.inputMessages);
+  const references = dedupeSourceReferences([
+    ...inputReferences,
+    ...evaluatorReferences,
+    ...params.assertionTemplateReferences
+  ]);
+  return {
+    evalFilePath: params.evalFilePath,
+    evalFileAbsolutePath: params.absoluteTestPath,
+    ...evalFileRepoPath ? { evalFileRepoPath } : {},
+    testId: params.id,
+    testSnapshotYaml,
+    graderDefinitions: buildGraderSourceDefinitions(params.evaluators),
+    references
+  };
+}
+function stringifySourceYaml(value) {
+  return stringifyYaml(sanitizeSourceValue(value), { lineWidth: 0 }).trimEnd();
+}
+function sanitizeSourceValue(value, keyHint) {
+  if (keyHint && SOURCE_SECRET_KEY_PATTERN.test(keyHint)) {
+    return REDACTED_SOURCE_VALUE;
+  }
+  if (value === null || typeof value === "string" || typeof value === "number") {
+    return value;
+  }
+  if (typeof value === "boolean") {
+    return value;
+  }
+  if (Array.isArray(value)) {
+    return value.map((item) => sanitizeSourceValue(item));
+  }
+  if (typeof value === "object" && value !== null) {
+    const entries = Object.entries(value).map(([key, entryValue]) => [
+      key,
+      sanitizeSourceValue(entryValue, key)
+    ]);
+    return Object.fromEntries(entries);
+  }
+  return String(value);
+}
+function buildGraderSourceDefinitions(evaluators) {
+  return (evaluators ?? []).map((evaluator) => ({
+    name: evaluator.name,
+    type: evaluator.type,
+    ...evaluator.weight !== void 0 ? { weight: evaluator.weight } : {},
+    ...evaluator.required !== void 0 ? { required: evaluator.required } : {},
+    ..."min_score" in evaluator && evaluator.min_score !== void 0 ? { minScore: evaluator.min_score } : {},
+    definition: sanitizeGraderDefinition(evaluator)
+  }));
+}
+function sanitizeGraderDefinition(evaluator) {
+  const copy = sanitizeSourceValue(evaluator);
+  return stripRuntimeResolutionFields(copy);
+}
+function stripRuntimeResolutionFields(value) {
+  const stripped = {};
+  for (const [key, entryValue] of Object.entries(value)) {
+    if (key === "resolvedPromptPath" || key === "promptPath" || key === "resolvedPromptScript" || key === "resolvedScriptPath" || key === "resolvedCwd" || key === "resolvedCommand") {
+      continue;
+    }
+    if (Array.isArray(entryValue)) {
+      stripped[key] = entryValue.map(
+        (item) => isJsonObject(item) ? stripRuntimeResolutionFields(item) : item
+      );
+    } else if (isJsonObject(entryValue)) {
+      stripped[key] = stripRuntimeResolutionFields(entryValue);
+    } else {
+      stripped[key] = entryValue;
+    }
+  }
+  return stripped;
+}
+function collectInputSourceReferences(inputMessages) {
+  const references = [];
+  for (const message of inputMessages) {
+    if (!Array.isArray(message.content)) {
+      continue;
+    }
+    for (const segment of message.content) {
+      if (!isJsonObject(segment) || segment.type !== "file") {
+        continue;
+      }
+      const displayPath = typeof segment.path === "string" ? segment.path : typeof segment.value === "string" ? segment.value : "input file";
+      references.push({
+        kind: "input_file",
+        displayPath,
+        ...typeof segment.resolvedPath === "string" ? { resolvedPath: path43.resolve(segment.resolvedPath) } : {}
+      });
+    }
+  }
+  return references;
+}
+function collectGraderSourceReferences(evaluators) {
+  const references = [];
+  for (const evaluator of evaluators ?? []) {
+    references.push(...collectSingleGraderSourceReferences(evaluator));
+  }
+  return references;
+}
+function collectSingleGraderSourceReferences(evaluator) {
+  const references = [];
+  if (evaluator.type === "code-grader") {
+    const command = evaluator.command ?? evaluator.script ?? [];
+    references.push({
+      kind: "code_grader_command",
+      displayPath: evaluator.resolvedScriptPath ?? command.join(" "),
+      ...evaluator.resolvedScriptPath ? { resolvedPath: evaluator.resolvedScriptPath } : {},
+      graderName: evaluator.name,
+      command
+    });
+    if (evaluator.resolvedCwd) {
+      references.push({
+        kind: "code_grader_cwd",
+        displayPath: evaluator.cwd ?? evaluator.resolvedCwd,
+        resolvedPath: evaluator.resolvedCwd,
+        graderName: evaluator.name
+      });
+    }
+  }
+  if (evaluator.type === "llm-grader") {
+    const promptPath = evaluator.resolvedPromptPath ?? evaluator.promptPath;
+    if (promptPath) {
+      references.push({
+        kind: "llm_grader_prompt",
+        displayPath: typeof evaluator.prompt === "string" ? evaluator.prompt : promptPath,
+        resolvedPath: promptPath,
+        graderName: evaluator.name
+      });
+    }
+    if (evaluator.resolvedPromptScript && evaluator.resolvedPromptScript.length > 0) {
+      references.push({
+        kind: "prompt_script",
+        displayPath: evaluator.resolvedPromptScript.at(-1) ?? evaluator.name,
+        resolvedPath: evaluator.resolvedPromptScript.at(-1),
+        graderName: evaluator.name,
+        command: evaluator.resolvedPromptScript
+      });
+    }
+  }
+  const preprocessors = "preprocessors" in evaluator ? evaluator.preprocessors : void 0;
+  for (const preprocessor of preprocessors ?? []) {
+    if (preprocessor.resolvedCommand && preprocessor.resolvedCommand.length > 0) {
+      references.push({
+        kind: "preprocessor_command",
+        displayPath: preprocessor.resolvedCommand.at(-1) ?? preprocessor.type,
+        resolvedPath: preprocessor.resolvedCommand.at(-1),
+        graderName: evaluator.name,
+        command: preprocessor.resolvedCommand
+      });
+    }
+  }
+  if (evaluator.type === "composite") {
+    for (const member of evaluator.assertions) {
+      references.push(...collectSingleGraderSourceReferences(member));
+    }
+    if (evaluator.aggregator.type === "code-grader") {
+      references.push({
+        kind: "code_grader_command",
+        displayPath: evaluator.aggregator.path,
+        resolvedPath: path43.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
+        graderName: evaluator.name
+      });
+    } else if (evaluator.aggregator.type === "llm-grader" && evaluator.aggregator.promptPath) {
+      references.push({
+        kind: "llm_grader_prompt",
+        displayPath: evaluator.aggregator.prompt ?? evaluator.aggregator.promptPath,
+        resolvedPath: evaluator.aggregator.promptPath,
+        graderName: evaluator.name
+      });
+    }
+  }
+  return references;
+}
+function dedupeSourceReferences(references) {
+  const seen = /* @__PURE__ */ new Set();
+  const deduped = [];
+  for (const reference of references) {
+    const key = JSON.stringify([
+      reference.kind,
+      reference.resolvedPath ?? reference.displayPath,
+      reference.graderName ?? "",
+      reference.command?.join("\0") ?? ""
+    ]);
+    if (seen.has(key)) {
+      continue;
+    }
+    seen.add(key);
+    deduped.push(reference);
+  }
+  return deduped;
+}
+function toPortableRelativePath(root, candidate) {
+  const relative = path43.relative(root, candidate);
+  if (relative && !relative.startsWith("..") && !path43.isAbsolute(relative)) {
+    return relative.split(path43.sep).join("/");
+  }
+  return void 0;
+}
 async function loadTestById(evalFilePath, repoRoot, evalId) {
   const tests = await loadTests(evalFilePath, repoRoot);
   const match = tests.find((c) => c.id === evalId);
@@ -34493,7 +35511,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
     const workspaceFilePath = path43.resolve(evalFileDir, raw);
     let content;
     try {
-      content = await readFile15(workspaceFilePath, "utf8");
+      content = await readFile16(workspaceFilePath, "utf8");
     } catch {
       throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
     }
@@ -34617,19 +35635,18 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
 function asString5(value) {
   return typeof value === "string" ? value : void 0;
 }
-function extractSuiteGovernance(suite) {
+function extractSuiteMetadataPayload(suite) {
+  const payload = isJsonObject(suite.metadata) ? { ...suite.metadata } : {};
   const top = suite.governance;
   if (isJsonObject(top)) {
-    return top;
-  }
-  const wrapper = suite.metadata;
-  if (isJsonObject(wrapper)) {
-    const nested = wrapper.governance;
+    payload.governance = top;
+  } else {
+    const nested = payload.governance;
     if (isJsonObject(nested)) {
-      return nested;
+      payload.governance = nested;
     }
   }
-  return void 0;
+  return Object.keys(payload).length > 0 ? payload : void 0;
 }
 function mergeSuiteMetadataPayload(caseMetadata, suitePayload) {
   if (!suitePayload) return caseMetadata;
@@ -35118,7 +36135,7 @@ async function runEvaluation(options) {
     const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
     if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
       if (!dirExists) {
-        await mkdir14(configuredStaticPath, { recursive: true });
+        await mkdir15(configuredStaticPath, { recursive: true });
       }
       if (workspaceTemplate) {
         await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -35163,7 +36180,7 @@ async function runEvaluation(options) {
     }
   } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
     sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
-    await mkdir14(sharedWorkspacePath, { recursive: true });
+    await mkdir15(sharedWorkspacePath, { recursive: true });
     setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
   }
   try {
@@ -36013,7 +37030,7 @@ async function runEvalCase(options) {
     }
     if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
       workspacePath = getWorkspacePath(evalRunId, evalCase.id);
-      await mkdir14(workspacePath, { recursive: true });
+      await mkdir15(workspacePath, { recursive: true });
     }
     if (evalCase.workspace?.repos?.length && workspacePath) {
       const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -36068,7 +37085,7 @@ async function runEvalCase(options) {
           const srcPath = path44.resolve(baseDir, relPath);
           const destPath = path44.resolve(workspacePath, relPath);
           try {
-            await mkdir14(path44.dirname(destPath), { recursive: true });
+            await mkdir15(path44.dirname(destPath), { recursive: true });
             await copyFile2(srcPath, destPath);
           } catch (error40) {
             const message = error40 instanceof Error ? error40.message : String(error40);
@@ -37632,6 +38649,12 @@ async function evaluate(config2) {
     resolvedTarget = resolveTargetDefinition(targetDef);
   }
   const collectedResults = [];
+  const cacheEnabled = shouldEnableCache({
+    cliCache: config2.cache === true,
+    cliNoCache: false,
+    yamlCache: config2.cache === void 0 ? materialized.cache : void 0
+  });
+  const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path45.resolve(materialized.cachePath) : void 0) : void 0;
   const results = await runEvaluation({
     testFilePath,
     repoRoot,
@@ -37644,6 +38667,8 @@ async function evaluate(config2) {
     filter: config2.filter,
     threshold: config2.threshold,
     evalCases: materialized.tests,
+    cache,
+    useCache: !!cache && !shouldSkipCacheForTemperature(resolvedTarget.config),
     ...materialized.budgetUsd !== void 0 && { budgetUsd: materialized.budgetUsd },
     onResult: async (result) => {
       collectedResults.push(result);
@@ -37674,6 +38699,7 @@ async function materializeEvalConfig(config2, options) {
       tests: tests2,
       workers: config2.workers ?? suite.workers,
       cache: config2.cache ?? suite.cacheConfig?.enabled,
+      cachePath: config2.cachePath ?? suite.cacheConfig?.cachePath,
       budgetUsd: config2.budgetUsd ?? suite.budgetUsd,
       threshold: config2.threshold ?? suite.threshold,
       metadata: config2.metadata ?? suite.metadata,
@@ -37692,6 +38718,7 @@ async function materializeEvalConfig(config2, options) {
     tests,
     workers: config2.workers,
     cache: config2.cache,
+    cachePath: config2.cachePath,
     budgetUsd: config2.budgetUsd,
     threshold: config2.threshold,
     metadata: config2.metadata,
@@ -37809,9 +38836,11 @@ function mapAssertionType(type) {
 }
 function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
   const total = results.length;
+  const qualityResults = results.filter((r) => r.executionStatus !== "execution_error");
+  const executionErrors = total - qualityResults.length;
   let passed = 0;
   let scoreSum = 0;
-  for (const r of results) {
+  for (const r of qualityResults) {
     scoreSum += r.score;
     if (r.score >= threshold) {
       passed++;
@@ -37820,9 +38849,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
   return {
     total,
     passed,
-    failed: total - passed,
+    failed: qualityResults.length - passed,
+    executionErrors,
     durationMs,
-    meanScore: total > 0 ? scoreSum / total : 0
+    meanScore: qualityResults.length > 0 ? scoreSum / qualityResults.length : 0
   };
 }
 var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
@@ -37903,7 +38933,12 @@ async function loadTsEvalSuite(filePath, repoRoot, options) {
   return {
     tests: materialized.tests,
     ...materialized.workers !== void 0 && { workers: materialized.workers },
-    ...materialized.cache !== void 0 && { cacheConfig: { enabled: materialized.cache } },
+    ...materialized.cache !== void 0 && {
+      cacheConfig: {
+        enabled: materialized.cache,
+        ...materialized.cachePath !== void 0 && { cachePath: materialized.cachePath }
+      }
+    },
     ...materialized.budgetUsd !== void 0 && { budgetUsd: materialized.budgetUsd },
     ...materialized.threshold !== void 0 && { threshold: materialized.threshold },
     ...materialized.metadata !== void 0 && { metadata: materialized.metadata },
@@ -37936,7 +38971,15 @@ export {
   isJsonValue,
   isTestMessage,
   isGraderKind,
+  RUBRIC_OPERATOR_VALUES,
   parseYamlValue,
+  getAgentvConfigDir,
+  getAgentvHome,
+  getAgentvDataDir,
+  getWorkspacesRoot,
+  getSubagentsRoot,
+  getTraceStateRoot,
+  getWorkspacePoolRoot,
   fileExists,
   normalizeLineEndings,
   readTextFile,
@@ -37956,6 +38999,9 @@ export {
   interpolateEnv,
   loadCasesFromFile,
   loadCasesFromDirectory,
+  ResponseCache,
+  shouldEnableCache,
+  shouldSkipCacheForTemperature,
   DEFAULT_THRESHOLD,
   PASS_THRESHOLD,
   scoreToVerdict,
@@ -37966,13 +39012,6 @@ export {
   parseJsonSafe,
   deepEqual,
   negateScore,
-  getAgentvConfigDir,
-  getAgentvHome,
-  getAgentvDataDir,
-  getWorkspacesRoot,
-  getSubagentsRoot,
-  getTraceStateRoot,
-  getWorkspacePoolRoot,
   toSnakeCaseDeep,
   toCamelCaseDeep,
   CodeGrader,
@@ -37990,7 +39029,28 @@ export {
   extractImageBlocks,
   CompositeGrader,
   CostGrader,
+  NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
+  NORMALIZED_TRACE_SOURCE_KINDS,
+  NORMALIZED_TRACE_EVENT_TYPES,
+  NORMALIZED_TOOL_STATUSES,
+  NORMALIZED_REDACTION_LEVELS,
+  NormalizedRedactionStateWireSchema,
+  NormalizedTraceErrorWireSchema,
+  NormalizedTraceSourceWireSchema,
+  NormalizedTraceSessionWireSchema,
+  NormalizedTraceBranchWireSchema,
+  NormalizedTraceSourceRefWireSchema,
+  NormalizedRawEvidenceWireSchema,
+  NormalizedTraceMessageWireSchema,
+  NormalizedTraceModelWireSchema,
+  NormalizedTraceToolWireSchema,
+  NormalizedTraceEventWireSchema,
+  NormalizedTrajectoryWireSchema,
+  toNormalizedTrajectoryWire,
+  fromNormalizedTrajectoryWire,
   computeTraceSummary,
+  getSelectedTrajectoryEvents,
+  computeTraceSummaryFromTrajectory,
   DEFAULT_EXPLORATION_TOOLS,
   explorationRatio,
   tokensPerTool,
@@ -38071,6 +39131,7 @@ export {
   extractCacheConfig,
   extractFailOnError,
   extractThreshold,
+  resolveResultsConfigForProject,
   detectFormat,
   parseRepoSource,
   parseRepoCheckout,
@@ -38089,4 +39150,4 @@ export {
   loadTsEvalFile,
   loadTsEvalSuite
 };
-//# sourceMappingURL=chunk-TAZBCVEZ.js.map
+//# sourceMappingURL=chunk-6QEIZ33V.js.map