npm - @agentv/core - Versions diffs - 4.6.0 → 4.7.0 - Mend

@agentv/core 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{chunk-AIQ5FO4G.js → chunk-75RFVESM.js} +273 -125
package/dist/chunk-75RFVESM.js.map +1 -0
package/dist/evaluation/validation/index.cjs +110 -95
package/dist/evaluation/validation/index.cjs.map +1 -1
package/dist/evaluation/validation/index.js +30 -72
package/dist/evaluation/validation/index.js.map +1 -1
package/dist/index.cjs +1488 -517
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +291 -74
package/dist/index.d.ts +291 -74
package/dist/index.js +1187 -369
package/dist/index.js.map +1 -1
package/package.json +1 -1
package/dist/chunk-AIQ5FO4G.js.map +0 -1

package/dist/index.cjs CHANGED Viewed

@@ -31,12 +31,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
 // ../../node_modules/.bun/tsup@8.3.5+19811ebab77a7b1c/node_modules/tsup/assets/cjs_shims.js
-var getImportMetaUrl, importMetaUrl;
 var init_cjs_shims = __esm({
   "../../node_modules/.bun/tsup@8.3.5+19811ebab77a7b1c/node_modules/tsup/assets/cjs_shims.js"() {
     "use strict";
-    getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.src || new URL("main.js", document.baseURI).href;
-    importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
   }
 });
@@ -1435,6 +1432,7 @@ __export(index_exports, {
   DEFAULT_EVALUATOR_TEMPLATE: () => DEFAULT_EVALUATOR_TEMPLATE,
   DEFAULT_EVAL_PATTERNS: () => DEFAULT_EVAL_PATTERNS,
   DEFAULT_EXPLORATION_TOOLS: () => DEFAULT_EXPLORATION_TOOLS,
+  DEFAULT_THRESHOLD: () => DEFAULT_THRESHOLD,
   DeterministicAssertionEvaluator: () => DeterministicAssertionEvaluator,
   EvaluatorRegistry: () => EvaluatorRegistry,
   ExecutionMetricsEvaluator: () => ExecutionMetricsEvaluator,
@@ -1456,6 +1454,7 @@ __export(index_exports, {
   TemplateNotFoundError: () => TemplateNotFoundError,
   TokenUsageEvaluator: () => TokenUsageEvaluator,
   ToolTrajectoryEvaluator: () => ToolTrajectoryEvaluator,
+  TranscriptProvider: () => TranscriptProvider,
   WorkspaceCreationError: () => WorkspaceCreationError,
   WorkspacePoolManager: () => WorkspacePoolManager,
   addProject: () => addProject,
@@ -1492,6 +1491,7 @@ __export(index_exports, {
   detectFormat: () => detectFormat,
   discoverAssertions: () => discoverAssertions,
   discoverClaudeSessions: () => discoverClaudeSessions,
+  discoverCodexSessions: () => discoverCodexSessions,
   discoverCopilotSessions: () => discoverCopilotSessions,
   discoverGraders: () => discoverGraders,
   discoverJudges: () => discoverGraders,
@@ -1552,6 +1552,8 @@ __export(index_exports, {
   normalizeLineEndings: () => normalizeLineEndings,
   parseAgentSkillsEvals: () => parseAgentSkillsEvals,
   parseClaudeSession: () => parseClaudeSession,
+  parseCodexSession: () => parseCodexSession,
+  parseCopilotEvents: () => parseCopilotEvents,
   parseJsonFromText: () => parseJsonFromText,
   parseJsonSafe: () => parseJsonSafe,
   readJsonFile: () => readJsonFile,
@@ -1559,8 +1561,10 @@ __export(index_exports, {
   readTestSuiteMetadata: () => readTestSuiteMetadata,
   readTextFile: () => readTextFile,
   readTranscriptFile: () => readTranscriptFile,
+  readTranscriptJsonl: () => readTranscriptJsonl,
   removeProject: () => removeProject,
   resolveAndCreateProvider: () => resolveAndCreateProvider,
+  resolveDelegatedTargetDefinition: () => resolveDelegatedTargetDefinition,
   resolveFileReference: () => resolveFileReference3,
   resolveTargetDefinition: () => resolveTargetDefinition,
   resolveWorkspaceTemplate: () => resolveWorkspaceTemplate,
@@ -1590,6 +1594,7 @@ __export(index_exports, {
   substituteVariables: () => substituteVariables,
   toCamelCaseDeep: () => toCamelCaseDeep,
   toSnakeCaseDeep: () => toSnakeCaseDeep,
+  toTranscriptJsonLine: () => toTranscriptJsonLine,
   tokensPerTool: () => tokensPerTool,
   touchProject: () => touchProject,
   transpileEvalYaml: () => transpileEvalYaml,
@@ -2674,8 +2679,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
     const negate = rawEvaluator.negate === true ? true : void 0;
     if (isCustomType) {
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
-      const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "negate"]);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
+      const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
       const config2 = {};
       for (const [key, value] of Object.entries(rawEvaluator)) {
         if (!knownProps2.has(key) && value !== void 0) {
@@ -2687,6 +2697,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         type: customTypeName,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...Object.keys(config2).length > 0 ? { config: config2 } : {}
       });
@@ -2756,7 +2767,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
           );
         }
       }
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       const knownProps2 = /* @__PURE__ */ new Set([
         "name",
         "type",
@@ -2782,6 +2798,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         resolvedCwd,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...Object.keys(config2).length > 0 ? { config: config2 } : {},
         ...targetConfig !== void 0 ? { target: targetConfig } : {}
@@ -2910,7 +2927,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         };
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "composite",
@@ -2918,6 +2940,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         aggregator,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3028,7 +3051,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       const config2 = {
         name,
         type: "tool-trajectory",
@@ -3037,6 +3065,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...expected ? { expected } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
       };
@@ -3099,7 +3128,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       const aggregation = asString(rawEvaluator.aggregation);
       const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "field-accuracy",
@@ -3107,6 +3141,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...validAggregation ? { aggregation: validAggregation } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3120,13 +3155,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "latency",
         threshold,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3140,13 +3181,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "cost",
         budget,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3178,13 +3225,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "token-usage",
         ...validLimits,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3230,13 +3283,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "execution-metrics",
         ...validThresholds,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3250,7 +3309,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       const rawShouldTrigger = rawEvaluator.should_trigger;
       const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "skill-trigger",
@@ -3258,6 +3322,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3269,13 +3334,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "contains",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3289,13 +3360,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3307,13 +3384,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "icontains",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3327,13 +3410,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3345,13 +3434,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3364,7 +3459,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       }
       const flags = asString(rawEvaluator.flags);
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "regex",
@@ -3372,18 +3472,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...flags !== void 0 ? { flags } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
     }
     if (typeValue === "is-json") {
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "is-json",
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3395,13 +3502,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "equals",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3437,7 +3550,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "llm-grader",
@@ -3445,6 +3563,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...graderTargetName ? { target: graderTargetName } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -3514,7 +3633,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name,
+        evalId
+      );
       evaluators.push({
         name,
         type: "llm-grader",
@@ -3522,12 +3646,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...graderTargetName ? { target: graderTargetName } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
     }
     const weight = validateWeight(rawEvaluator.weight, name, evalId);
-    const required = parseRequired(rawEvaluator.required);
+    const { required, min_score } = parseRequiredAndMinScore(
+      rawEvaluator.required,
+      rawEvaluator.min_score,
+      name,
+      evalId
+    );
     const knownProps = /* @__PURE__ */ new Set([
       "name",
       "type",
@@ -3538,6 +3668,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       "weight",
       "config",
       "required",
+      "min_score",
       "negate",
       "max_steps",
       "maxSteps",
@@ -3567,6 +3698,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       ...graderTargetName ? { target: graderTargetName } : {},
       ...weight !== void 0 ? { weight } : {},
       ...required !== void 0 ? { required } : {},
+      ...min_score !== void 0 ? { min_score } : {},
       ...negate !== void 0 ? { negate } : {},
       ...finalConfig ? { config: finalConfig } : {},
       ...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
@@ -3698,10 +3830,23 @@ ${detailBlock}${ANSI_RESET5}`);
     console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET5}`);
   }
 }
-function parseRequired(value) {
-  if (value === true) return true;
-  if (typeof value === "number" && value > 0 && value <= 1) return value;
-  return void 0;
+function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
+  const result = {};
+  if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
+    result.min_score = rawMinScore;
+  }
+  if (rawRequired === true) {
+    result.required = true;
+  } else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
+    if (result.min_score === void 0) {
+      result.min_score = rawRequired;
+    }
+    result.required = rawRequired;
+    logWarning2(
+      `Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
+    );
+  }
+  return result;
 }
 function validateWeight(rawWeight, evaluatorName, evalId) {
   if (rawWeight === void 0) {
@@ -3744,16 +3889,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
     const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
     const expectedOutcome = asString(rawRubric.outcome) ?? "";
     const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
+    let minScore;
     let requiredMinScore;
     let required;
-    if (typeof rawRubric.required_min_score === "number") {
-      const minScore = rawRubric.required_min_score;
-      if (!Number.isInteger(minScore) || minScore < 0 || minScore > 10) {
+    if (typeof rawRubric.min_score === "number") {
+      const ms = rawRubric.min_score;
+      if (ms <= 0 || ms > 1) {
         throw new Error(
-          `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${minScore})`
+          `Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
         );
       }
-      requiredMinScore = minScore;
+      minScore = ms;
+      requiredMinScore = Math.round(ms * 10);
+    } else if (typeof rawRubric.required_min_score === "number") {
+      const rms = rawRubric.required_min_score;
+      if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
+        throw new Error(
+          `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
+        );
+      }
+      requiredMinScore = rms;
+      minScore = rms / 10;
+      logWarning2(
+        `Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
+      );
     }
     if (typeof rawRubric.required === "boolean") {
       required = rawRubric.required;
@@ -3773,6 +3932,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
         weight,
         ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
         ...required !== void 0 ? { required } : {},
+        ...minScore !== void 0 ? { min_score: minScore } : {},
         ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
         score_ranges: scoreRanges
       });
@@ -3789,6 +3949,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
         weight,
         // Default to required: true if not specified (backward compatibility)
         required: required ?? true,
+        ...minScore !== void 0 ? { min_score: minScore } : {},
         ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
       });
     }
@@ -3917,12 +4078,22 @@ function parseInlineRubrics(rawRubrics) {
       id: asString(rubric.id) ?? `rubric-${index + 1}`,
       weight: typeof rubric.weight === "number" ? rubric.weight : 1
     };
+    let inlineMinScore;
+    let inlineRequiredMinScore;
+    if (typeof rubric.min_score === "number") {
+      inlineMinScore = rubric.min_score;
+      inlineRequiredMinScore = Math.round(inlineMinScore * 10);
+    } else if (typeof rubric.required_min_score === "number") {
+      inlineRequiredMinScore = rubric.required_min_score;
+      inlineMinScore = inlineRequiredMinScore / 10;
+    }
     if (scoreRanges && scoreRanges.length > 0) {
       return {
         ...baseRubric,
         ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
         ...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
-        ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {},
+        ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
+        ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
         score_ranges: scoreRanges
       };
     }
@@ -3930,7 +4101,8 @@ function parseInlineRubrics(rawRubrics) {
       ...baseRubric,
       outcome: expectedOutcome,
       required: typeof rubric.required === "boolean" ? rubric.required : true,
-      ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {}
+      ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
+      ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
     };
   }).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
   if (rubricItems.length === 0) {
@@ -4334,6 +4506,9 @@ function resolveExpectedMessages(raw) {
 var ANSI_YELLOW6 = "\x1B[33m";
 var ANSI_RED2 = "\x1B[31m";
 var ANSI_RESET7 = "\x1B[0m";
+function matchesFilter(id, filter) {
+  return typeof filter === "string" ? import_micromatch.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch.default.isMatch(id, pattern));
+}
 function detectFormat(filePath) {
   const ext = import_node_path7.default.extname(filePath).toLowerCase();
   if (ext === ".jsonl") return "jsonl";
@@ -4401,40 +4576,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
   const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
   const rawFile = await (0, import_promises7.readFile)(absoluteTestPath, "utf8");
   const rawCases = parseJsonlContent(rawFile, evalFilePath);
-  const fallbackEvalSet = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
-  const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
+  const fallbackSuiteName = import_node_path7.default.basename(absoluteTestPath, ".jsonl") || "eval";
+  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
   const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
   const globalExecution = sidecar.execution;
   if (verbose) {
     console.log(`
-[JSONL Dataset: ${evalFilePath}]`);
+[JSONL Suite: ${evalFilePath}]`);
     console.log(`  Cases: ${rawCases.length}`);
-    console.log(`  Eval set: ${evalSetName}`);
+    console.log(`  Suite: ${suiteName}`);
     if (sidecar.description) {
       console.log(`  Description: ${sidecar.description}`);
     }
   }
   const results = [];
   for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
-    const evalcase = rawCases[lineIndex];
+    const testCaseConfig = rawCases[lineIndex];
     const lineNumber = lineIndex + 1;
-    const id = asString4(evalcase.id);
-    if (filterPattern && (!id || !import_micromatch.default.isMatch(id, filterPattern))) {
+    const id = asString4(testCaseConfig.id);
+    if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString4(evalcase.conversation_id);
-    let outcome = asString4(evalcase.criteria);
-    if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString4(evalcase.expected_outcome);
+    const conversationId = asString4(testCaseConfig.conversation_id);
+    let outcome = asString4(testCaseConfig.criteria);
+    if (!outcome && testCaseConfig.expected_outcome !== void 0) {
+      outcome = asString4(testCaseConfig.expected_outcome);
       if (outcome) {
         logWarning4(
-          `Test '${asString4(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
-    const rawInputMessages = resolveInputMessages(evalcase);
-    const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
+    const rawInputMessages = resolveInputMessages(testCaseConfig);
+    const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
     if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
       logError2(
         `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
@@ -4471,18 +4646,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
     const mergedExecution = caseExecution ?? globalExecution;
-    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
     let evaluators;
     try {
-      evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
+      evaluators = await parseEvaluators(
+        testCaseConfig,
+        mergedExecution,
+        searchRoots,
+        id ?? "unknown"
+      );
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
       continue;
     }
-    const inlineRubrics = evalcase.rubrics;
+    const inlineRubrics = testCaseConfig.rubrics;
     if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
       const rubricEvaluator = parseInlineRubrics(inlineRubrics);
       if (rubricEvaluator) {
@@ -4493,7 +4673,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
     const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const testCase = {
       id,
-      dataset: evalSetName,
+      suite: suiteName,
       conversation_id: conversationId,
       question,
       input: inputMessages,
@@ -4501,7 +4681,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
       criteria: outcome ?? "",
-      evaluator: evalCaseEvaluatorKind,
+      evaluator: testCaseEvaluatorKind,
       assertions: evaluators
     };
     results.push(testCase);
@@ -4686,6 +4866,9 @@ function buildChatPromptFromSegments(options) {
 var ANSI_YELLOW7 = "\x1B[33m";
 var ANSI_RED3 = "\x1B[31m";
 var ANSI_RESET8 = "\x1B[0m";
+function matchesFilter2(id, filter) {
+  return typeof filter === "string" ? import_micromatch2.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch2.default.isMatch(id, pattern));
+}
 function resolveTests(suite) {
   if (suite.tests !== void 0) return suite.tests;
   if (suite.eval_cases !== void 0) {
@@ -4765,18 +4948,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = interpolated;
-  const evalSetNameFromSuite = asString5(suite.name)?.trim();
-  const fallbackEvalSet = import_node_path8.default.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
-  const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
-  const rawTestcases = resolveTests(suite);
+  const suiteNameFromFile = asString5(suite.name)?.trim();
+  const fallbackSuiteName = import_node_path8.default.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
+  const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
+  const rawTestCases = resolveTests(suite);
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
   const evalFileDir = import_node_path8.default.dirname(absoluteTestPath);
-  let expandedTestcases;
-  if (typeof rawTestcases === "string") {
-    const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestcases);
-    expandedTestcases = await loadCasesFromFile(externalPath);
-  } else if (Array.isArray(rawTestcases)) {
-    expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
+  let expandedTestCases;
+  if (typeof rawTestCases === "string") {
+    const externalPath = import_node_path8.default.resolve(evalFileDir, rawTestCases);
+    expandedTestCases = await loadCasesFromFile(externalPath);
+  } else if (Array.isArray(rawTestCases)) {
+    expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
@@ -4791,32 +4974,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   }
   const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
   const results = [];
-  for (const rawEvalcase of expandedTestcases) {
-    if (!isJsonObject(rawEvalcase)) {
+  for (const rawTestCase of expandedTestCases) {
+    if (!isJsonObject(rawTestCase)) {
       logWarning5("Skipping invalid test entry (expected object)");
       continue;
     }
-    const evalcase = rawEvalcase;
-    const id = asString5(evalcase.id);
-    if (filterPattern && (!id || !import_micromatch2.default.isMatch(id, filterPattern))) {
+    const testCaseConfig = rawTestCase;
+    const id = asString5(testCaseConfig.id);
+    if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString5(evalcase.conversation_id);
-    let outcome = asString5(evalcase.criteria);
-    if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString5(evalcase.expected_outcome);
+    const conversationId = asString5(testCaseConfig.conversation_id);
+    let outcome = asString5(testCaseConfig.criteria);
+    if (!outcome && testCaseConfig.expected_outcome !== void 0) {
+      outcome = asString5(testCaseConfig.expected_outcome);
       if (outcome) {
         logWarning5(
-          `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
-    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
+    const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
     const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
-    const testInputMessages = resolveInputMessages(evalcase, effectiveSuiteInputFiles);
-    const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assertions !== void 0 || evalcase.assert !== void 0;
+    const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
+    const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
     if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
       logError3(
         `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
@@ -4863,16 +5047,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
     let evaluators;
     try {
-      evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+      evaluators = await parseEvaluators(
+        testCaseConfig,
+        globalExecution,
+        searchRoots,
+        id ?? "unknown"
+      );
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       logError3(`Skipping test '${id}': ${message}`);
       continue;
     }
-    const inlineRubrics = evalcase.rubrics;
+    const inlineRubrics = testCaseConfig.rubrics;
     if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
       const rubricEvaluator = parseInlineRubrics(inlineRubrics);
       if (rubricEvaluator) {
@@ -4881,13 +5070,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
     const userFilePaths = collectResolvedInputFilePaths(inputMessages);
-    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
-    const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
-    const caseTargets = extractTargetsFromTestCase(evalcase);
+    const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
+    const caseTargets = extractTargetsFromTestCase(testCaseConfig);
     const testCase = {
       id,
-      dataset: evalSetName,
+      suite: suiteName,
       category: options?.category,
       conversation_id: conversationId,
       question,
@@ -4896,11 +5085,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
       criteria: outcome ?? "",
-      evaluator: evalCaseEvaluatorKind,
+      evaluator: testCaseEvaluatorKind,
       assertions: evaluators,
       workspace: mergedWorkspace,
       metadata,
-      targets: caseTargets
+      targets: caseTargets,
+      ...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
     };
     results.push(testCase);
   }
@@ -5566,7 +5756,7 @@ var AzureProvider = class {
     };
     this.retryConfig = config.retry;
     const azure = (0, import_azure2.createAzure)(buildAzureOptions(config));
-    this.model = azure.chat(config.deploymentName);
+    this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
   }
   id;
   kind = "azure";
@@ -5692,7 +5882,9 @@ function buildAzureOptions(config) {
   const options = {
     apiKey: config.apiKey,
     apiVersion: config.version,
-    useDeploymentBasedUrls: true
+    // Chat completions still use deployment-scoped Azure URLs for compatibility
+    // with existing deployments. Responses API should use the SDK's v1 path.
+    useDeploymentBasedUrls: config.apiFormat !== "responses"
   };
   const baseURL = normalizeAzureBaseUrl(config.resourceName);
   if (baseURL) {
@@ -7169,15 +7361,16 @@ var CliProvider = class {
       outputFilePath
     );
     const renderedCommand = renderTemplate(this.config.command, templateValues);
+    const effectiveCwd = requests[0]?.cwd ?? this.config.cwd;
     if (this.verbose) {
       console.log(
-        `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
+        `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${effectiveCwd ?? ""} command=${renderedCommand}`
       );
     }
     try {
       const startTime = Date.now();
       const result = await this.runCommand(renderedCommand, {
-        cwd: this.config.cwd,
+        cwd: effectiveCwd,
         env: process.env,
         timeoutMs: this.config.timeoutMs,
         signal: controller.signal
@@ -7210,7 +7403,7 @@ var CliProvider = class {
               command: renderedCommand,
               stderr: result.stderr,
               exitCode: result.exitCode ?? 0,
-              cwd: this.config.cwd,
+              cwd: effectiveCwd,
               outputFile: outputFilePath
             }
           };
@@ -7228,7 +7421,7 @@ var CliProvider = class {
               command: renderedCommand,
               stderr: result.stderr,
               exitCode: result.exitCode ?? 0,
-              cwd: this.config.cwd,
+              cwd: effectiveCwd,
               outputFile: outputFilePath,
               error: errorMessage
             }
@@ -7243,7 +7436,7 @@ var CliProvider = class {
             command: renderedCommand,
             stderr: result.stderr,
             exitCode: result.exitCode ?? 0,
-            cwd: this.config.cwd,
+            cwd: effectiveCwd,
             outputFile: outputFilePath,
             recordId: evalCaseId
           }
@@ -9267,6 +9460,76 @@ function subscribeToPiLogEntries(listener) {
   };
 }
+// src/evaluation/providers/pi-provider-aliases.ts
+init_cjs_shims();
+var SUBPROVIDER_ALIASES = {
+  azure: "azure-openai-responses"
+};
+var SUBPROVIDER_ALIASES_WITH_BASE_URL = {
+  // Azure v1 endpoints are OpenAI-compatible; use the standard client
+  // to avoid AzureOpenAI adding api-version query params.
+  azure: "openai-responses"
+};
+var ENV_KEY_MAP = {
+  google: "GEMINI_API_KEY",
+  gemini: "GEMINI_API_KEY",
+  anthropic: "ANTHROPIC_API_KEY",
+  openai: "OPENAI_API_KEY",
+  groq: "GROQ_API_KEY",
+  xai: "XAI_API_KEY",
+  openrouter: "OPENROUTER_API_KEY",
+  azure: "AZURE_OPENAI_API_KEY"
+};
+var ENV_BASE_URL_MAP = {
+  openai: "OPENAI_BASE_URL",
+  azure: "AZURE_OPENAI_BASE_URL",
+  openrouter: "OPENROUTER_BASE_URL"
+};
+function resolveSubprovider(name, hasBaseUrl = false) {
+  const lower = name.toLowerCase();
+  if (hasBaseUrl) {
+    const alias = SUBPROVIDER_ALIASES_WITH_BASE_URL[lower];
+    if (alias) return alias;
+  }
+  return SUBPROVIDER_ALIASES[lower] ?? name;
+}
+function resolveCliProvider(name) {
+  const lower = name.toLowerCase();
+  if (lower === "azure") return "azure-openai-responses";
+  return name;
+}
+function resolveEnvKeyName(provider, hasBaseUrl = false) {
+  const lower = provider.toLowerCase();
+  if (hasBaseUrl && lower === "azure") return "OPENAI_API_KEY";
+  return ENV_KEY_MAP[lower];
+}
+function resolveEnvBaseUrlName(provider, hasBaseUrl = false) {
+  const lower = provider.toLowerCase();
+  if (hasBaseUrl && lower === "azure") return "OPENAI_BASE_URL";
+  return ENV_BASE_URL_MAP[lower];
+}
+function extractAzureResourceName(baseUrl) {
+  const urlMatch = baseUrl.match(/^https?:\/\/([^./]+)/);
+  if (urlMatch) return urlMatch[1];
+  return baseUrl;
+}
+function normalizeAzureSdkBaseUrl(baseUrl) {
+  const trimmed = baseUrl.trim().replace(/\/+$/, "");
+  if (!trimmed) {
+    return trimmed;
+  }
+  if (!/^https?:\/\//i.test(trimmed)) {
+    return `https://${trimmed}.openai.azure.com/openai/v1`;
+  }
+  if (/\/openai\/v1$/i.test(trimmed)) {
+    return trimmed;
+  }
+  if (/\/openai$/i.test(trimmed)) {
+    return `${trimmed}/v1`;
+  }
+  return `${trimmed}/openai/v1`;
+}
 // src/evaluation/providers/pi-utils.ts
 init_cjs_shims();
 function extractPiTextContent(content) {
@@ -9426,12 +9689,12 @@ var PiCliProvider = class {
   buildPiArgs(prompt, inputFiles) {
     const args = [];
     if (this.config.subprovider) {
-      args.push("--provider", this.config.subprovider);
+      args.push("--provider", resolveCliProvider(this.config.subprovider));
     }
     if (this.config.model) {
       args.push("--model", this.config.model);
     }
-    if (this.config.apiKey) {
+    if (this.config.apiKey && this.config.subprovider?.toLowerCase() !== "azure") {
       args.push("--api-key", this.config.apiKey);
     }
     args.push("--mode", "json");
@@ -9483,35 +9746,35 @@ ${prompt}` : prompt;
   }
   buildEnv() {
     const env = { ...process.env };
-    if (this.config.apiKey) {
-      const provider = this.config.subprovider?.toLowerCase() ?? "google";
-      const ENV_KEY_MAP = {
-        google: "GEMINI_API_KEY",
-        gemini: "GEMINI_API_KEY",
-        anthropic: "ANTHROPIC_API_KEY",
-        openai: "OPENAI_API_KEY",
-        groq: "GROQ_API_KEY",
-        xai: "XAI_API_KEY",
-        openrouter: "OPENROUTER_API_KEY"
-      };
-      const envKey = ENV_KEY_MAP[provider];
-      if (envKey) {
-        env[envKey] = this.config.apiKey;
+    const provider = this.config.subprovider?.toLowerCase() ?? "google";
+    if (provider === "azure") {
+      if (this.config.apiKey) {
+        env.AZURE_OPENAI_API_KEY = this.config.apiKey;
+      }
+      if (this.config.baseUrl) {
+        env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
+      }
+    } else {
+      if (this.config.apiKey) {
+        const envKey = resolveEnvKeyName(provider);
+        if (envKey) {
+          env[envKey] = this.config.apiKey;
+        }
       }
     }
     if (this.config.subprovider) {
-      const provider = this.config.subprovider.toLowerCase();
+      const resolvedProvider = resolveCliProvider(this.config.subprovider);
       const PROVIDER_OWN_PREFIXES = {
         openrouter: ["OPENROUTER_"],
         anthropic: ["ANTHROPIC_"],
         openai: ["OPENAI_"],
-        azure: ["AZURE_OPENAI_"],
+        "azure-openai-responses": ["AZURE_OPENAI_"],
         google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
         gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
         groq: ["GROQ_"],
         xai: ["XAI_"]
       };
-      const ownPrefixes = PROVIDER_OWN_PREFIXES[provider] ?? [];
+      const ownPrefixes = PROVIDER_OWN_PREFIXES[resolvedProvider] ?? [];
       const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
       for (const key of Object.keys(env)) {
         if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
@@ -9802,6 +10065,24 @@ function extractMessages(events) {
       }
     }
   }
+  if (messages) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      if (messages[i].role === "assistant" && !messages[i].content) {
+        for (let j = events.length - 1; j >= 0; j--) {
+          const evt = events[j];
+          if (!evt || evt.type !== "message_end") continue;
+          const msg = evt.message;
+          if (msg?.role !== "assistant") continue;
+          const text = extractPiTextContent(msg.content);
+          if (text) {
+            messages[i] = { ...messages[i], content: text };
+            break;
+          }
+        }
+        break;
+      }
+    }
+  }
   const eventToolCalls = extractToolCallsFromEvents(events);
   if (eventToolCalls.length > 0) {
     injectEventToolCalls(messages, eventToolCalls);
@@ -9986,17 +10267,43 @@ function formatTimeoutSuffix3(timeoutMs) {
   if (!timeoutMs || timeoutMs <= 0) return "";
   return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
 }
+function resolveWindowsCmd(executable) {
+  if (process.platform !== "win32") return [executable, []];
+  const lower = executable.toLowerCase();
+  if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
+  let fullPath;
+  try {
+    fullPath = (0, import_node_child_process4.execSync)(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
+  } catch {
+    return [executable, []];
+  }
+  const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
+  try {
+    const content = (0, import_node_fs9.readFileSync)(cmdPath, "utf-8");
+    const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
+    if (match) {
+      const dp0 = import_node_path21.default.dirname(import_node_path21.default.resolve(cmdPath));
+      const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${import_node_path21.default.sep}`);
+      try {
+        (0, import_node_fs9.accessSync)(scriptPath);
+        return ["node", [scriptPath]];
+      } catch {
+      }
+    }
+  } catch {
+  }
+  return [executable, []];
+}
 async function defaultPiRunner(options) {
   return await new Promise((resolve, reject) => {
     const parts = options.executable.split(/\s+/);
-    const executable = parts[0];
-    const executableArgs = parts.slice(1);
+    const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
+    const executableArgs = [...prefixArgs, ...parts.slice(1)];
     const allArgs = [...executableArgs, ...options.args];
-    const child = (0, import_node_child_process4.spawn)(executable, allArgs, {
+    const child = (0, import_node_child_process4.spawn)(resolvedExe, allArgs, {
       cwd: options.cwd,
       env: options.env,
-      stdio: ["pipe", "pipe", "pipe"],
-      shell: false
+      stdio: ["pipe", "pipe", "pipe"]
     });
     let stdout = "";
     let stderr = "";
@@ -10056,9 +10363,40 @@ var import_node_child_process5 = require("child_process");
 var import_node_crypto8 = require("crypto");
 var import_node_fs10 = require("fs");
 var import_promises19 = require("fs/promises");
-var import_node_path22 = __toESM(require("path"), 1);
+var import_node_path23 = __toESM(require("path"), 1);
 var import_node_readline = require("readline");
 var import_node_url3 = require("url");
+// src/paths.ts
+init_cjs_shims();
+var import_node_os6 = __toESM(require("os"), 1);
+var import_node_path22 = __toESM(require("path"), 1);
+var logged = false;
+function getAgentvHome() {
+  const envHome = process.env.AGENTV_HOME;
+  if (envHome && envHome !== "undefined") {
+    if (!logged) {
+      logged = true;
+      console.warn(`Using AGENTV_HOME: ${envHome}`);
+    }
+    return envHome;
+  }
+  return import_node_path22.default.join(import_node_os6.default.homedir(), ".agentv");
+}
+function getWorkspacesRoot() {
+  return import_node_path22.default.join(getAgentvHome(), "workspaces");
+}
+function getSubagentsRoot() {
+  return import_node_path22.default.join(getAgentvHome(), "subagents");
+}
+function getTraceStateRoot() {
+  return import_node_path22.default.join(getAgentvHome(), "trace-state");
+}
+function getWorkspacePoolRoot() {
+  return import_node_path22.default.join(getAgentvHome(), "workspace-pool");
+}
+// src/evaluation/providers/pi-coding-agent.ts
 var piCodingAgentModule = null;
 var piAiModule = null;
 var loadingPromise = null;
@@ -10076,46 +10414,126 @@ async function promptInstall() {
     rl.close();
   }
 }
-function findAgentvRoot() {
-  const thisFile = (0, import_node_url3.fileURLToPath)(importMetaUrl);
-  let dir = import_node_path22.default.dirname(thisFile);
-  for (let i = 0; i < 10; i++) {
+function findManagedSdkInstallRoot() {
+  return import_node_path23.default.join(getAgentvHome(), "deps", "pi-sdk");
+}
+function resolveGlobalNpmRoot() {
+  try {
+    const root = (0, import_node_child_process5.execSync)("npm root -g", {
+      encoding: "utf-8",
+      stdio: ["ignore", "pipe", "ignore"]
+    }).trim();
+    return root.length > 0 ? root : void 0;
+  } catch {
+    return void 0;
+  }
+}
+function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
+  return import_node_path23.default.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
+}
+function findAccessiblePath(paths) {
+  for (const candidate of paths) {
     try {
-      const pkg = import_node_path22.default.join(dir, "package.json");
-      (0, import_node_fs10.accessSync)(pkg);
-      return dir;
+      (0, import_node_fs10.accessSync)(candidate);
+      return candidate;
     } catch {
-      const parent = import_node_path22.default.dirname(dir);
-      if (parent === dir) break;
-      dir = parent;
     }
   }
-  return import_node_path22.default.dirname(thisFile);
+  return void 0;
 }
-async function doLoadSdkModules() {
+async function tryImportLocalSdkModules() {
   try {
     [piCodingAgentModule, piAiModule] = await Promise.all([
       import("@mariozechner/pi-coding-agent"),
       import("@mariozechner/pi-ai")
     ]);
+    return true;
   } catch {
-    if (await promptInstall()) {
-      const installDir = findAgentvRoot();
-      console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
-      (0, import_node_child_process5.execSync)("bun add @mariozechner/pi-coding-agent", {
-        cwd: installDir,
-        stdio: "inherit"
-      });
-      [piCodingAgentModule, piAiModule] = await Promise.all([
-        import("@mariozechner/pi-coding-agent"),
-        import("@mariozechner/pi-ai")
-      ]);
-    } else {
-      throw new Error(
-        "pi-coding-agent SDK is not installed. Install it with:\n  bun add @mariozechner/pi-coding-agent"
-      );
+    return false;
+  }
+}
+async function tryImportManagedSdkModules() {
+  const managedRoot = findManagedSdkInstallRoot();
+  const piCodingAgentEntry = findAccessiblePath([
+    import_node_path23.default.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
+  ]);
+  const piAiEntry = findAccessiblePath([
+    import_node_path23.default.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
+    import_node_path23.default.join(
+      managedRoot,
+      "node_modules",
+      "@mariozechner",
+      "pi-coding-agent",
+      "node_modules",
+      "@mariozechner",
+      "pi-ai",
+      "dist",
+      "index.js"
+    )
+  ]);
+  if (!piCodingAgentEntry || !piAiEntry) return false;
+  try {
+    [piCodingAgentModule, piAiModule] = await Promise.all([
+      import((0, import_node_url3.pathToFileURL)(piCodingAgentEntry).href),
+      import((0, import_node_url3.pathToFileURL)(piAiEntry).href)
+    ]);
+    return true;
+  } catch {
+    return false;
+  }
+}
+async function tryImportGlobalSdkModules() {
+  const globalNpmRoot = resolveGlobalNpmRoot();
+  if (!globalNpmRoot) return false;
+  const piCodingAgentEntry = findAccessiblePath([
+    buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
+  ]);
+  const piAiEntry = findAccessiblePath([
+    buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
+    import_node_path23.default.join(
+      globalNpmRoot,
+      "@mariozechner",
+      "pi-coding-agent",
+      "node_modules",
+      "@mariozechner",
+      "pi-ai",
+      "dist",
+      "index.js"
+    )
+  ]);
+  if (!piCodingAgentEntry || !piAiEntry) return false;
+  try {
+    [piCodingAgentModule, piAiModule] = await Promise.all([
+      import((0, import_node_url3.pathToFileURL)(piCodingAgentEntry).href),
+      import((0, import_node_url3.pathToFileURL)(piAiEntry).href)
+    ]);
+    return true;
+  } catch {
+    return false;
+  }
+}
+function installSdkModules(installDir) {
+  console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
+  (0, import_node_fs10.mkdirSync)(installDir, { recursive: true });
+  (0, import_node_child_process5.execSync)("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
+    cwd: installDir,
+    stdio: "inherit"
+  });
+}
+async function doLoadSdkModules() {
+  if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
+    return;
+  }
+  if (await promptInstall()) {
+    const installDir = findManagedSdkInstallRoot();
+    installSdkModules(installDir);
+    if (await tryImportManagedSdkModules()) {
+      return;
     }
   }
+  throw new Error(
+    "pi-coding-agent SDK is not installed. Install it with:\n  npm install @mariozechner/pi-coding-agent"
+  );
 }
 async function loadSdkModules() {
   if (!piCodingAgentModule || !piAiModule) {
@@ -10143,7 +10561,9 @@ async function loadSdkModules() {
     codingTools: piSdk.codingTools,
     toolMap,
     SessionManager: piSdk.SessionManager,
-    getModel: piAi.getModel
+    getModel: piAi.getModel,
+    // biome-ignore lint/suspicious/noExplicitAny: registerBuiltInApiProviders exists at runtime but not in type defs
+    registerBuiltInApiProviders: piAi.registerBuiltInApiProviders
   };
 }
 var PiCodingAgentProvider = class {
@@ -10165,17 +10585,35 @@ var PiCodingAgentProvider = class {
     const startTime = (/* @__PURE__ */ new Date()).toISOString();
     const startMs = Date.now();
     const sdk = await loadSdkModules();
+    sdk.registerBuiltInApiProviders();
     const logger = await this.createStreamLogger(request).catch(() => void 0);
     try {
       const cwd = this.resolveCwd(request.cwd);
-      const providerName = this.config.subprovider ?? "google";
+      const rawProvider = this.config.subprovider ?? "google";
+      const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
+      const hasBaseUrl = !!normalizedBaseUrl;
+      const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
       const modelId = this.config.model ?? "gemini-2.5-flash";
-      this.setApiKeyEnv(providerName);
-      const model = sdk.getModel(providerName, modelId);
+      this.setApiKeyEnv(rawProvider, hasBaseUrl);
+      this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
+      let model = sdk.getModel(providerName, modelId);
+      if (model && normalizedBaseUrl) {
+        model = { ...model, baseUrl: normalizedBaseUrl };
+      }
       if (!model) {
-        throw new Error(
-          `pi-coding-agent: getModel('${providerName}', '${modelId}') returned undefined. The model '${modelId}' is not registered for provider '${providerName}' in pi-ai. Check that subprovider and model are correct in your target config.`
-        );
+        const envProvider = providerName.replace(/-responses$/, "");
+        model = {
+          id: modelId,
+          name: modelId,
+          api: providerName,
+          provider: envProvider,
+          baseUrl: normalizedBaseUrl ?? "",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+          contextWindow: 128e3,
+          maxTokens: 16384
+        };
       }
       const tools = this.resolveTools(sdk);
       const { session } = await sdk.createAgentSession({
@@ -10328,28 +10766,35 @@ ${fileList}`;
     }
   }
   /** Maps config apiKey to the provider-specific env var the SDK reads. */
-  setApiKeyEnv(providerName) {
+  setApiKeyEnv(providerName, hasBaseUrl = false) {
     if (!this.config.apiKey) return;
-    const ENV_KEY_MAP = {
-      google: "GEMINI_API_KEY",
-      gemini: "GEMINI_API_KEY",
-      anthropic: "ANTHROPIC_API_KEY",
-      openai: "OPENAI_API_KEY",
-      groq: "GROQ_API_KEY",
-      xai: "XAI_API_KEY",
-      openrouter: "OPENROUTER_API_KEY"
-    };
-    const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
+    const envKey = resolveEnvKeyName(providerName, hasBaseUrl);
     if (envKey) {
       process.env[envKey] = this.config.apiKey;
     }
   }
+  /** Maps config baseUrl to the provider-specific env var the SDK reads. */
+  setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
+    const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
+    if (!normalizedBaseUrl) return;
+    const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
+    if (envKey) {
+      process.env[envKey] = normalizedBaseUrl;
+    }
+  }
+  normalizeSdkBaseUrl(providerName, baseUrl) {
+    if (!baseUrl) return void 0;
+    if (providerName.toLowerCase() === "azure") {
+      return normalizeAzureSdkBaseUrl(baseUrl);
+    }
+    return baseUrl;
+  }
   resolveCwd(cwdOverride) {
     if (cwdOverride) {
-      return import_node_path22.default.resolve(cwdOverride);
+      return import_node_path23.default.resolve(cwdOverride);
     }
     if (this.config.cwd) {
-      return import_node_path22.default.resolve(this.config.cwd);
+      return import_node_path23.default.resolve(this.config.cwd);
     }
     return process.cwd();
   }
@@ -10368,9 +10813,9 @@ ${fileList}`;
   }
   resolveLogDirectory() {
     if (this.config.logDir) {
-      return import_node_path22.default.resolve(this.config.logDir);
+      return import_node_path23.default.resolve(this.config.logDir);
     }
-    return import_node_path22.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+    return import_node_path23.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -10384,7 +10829,7 @@ ${fileList}`;
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = import_node_path22.default.join(logDir, buildLogFilename6(request, this.targetName));
+    const filePath = import_node_path23.default.join(logDir, buildLogFilename6(request, this.targetName));
     try {
       const logger = await PiStreamLogger2.create({
         filePath,
@@ -10599,19 +11044,17 @@ var ProviderRegistry = class {
 // src/evaluation/providers/targets.ts
 init_cjs_shims();
-var import_node_path23 = __toESM(require("path"), 1);
+var import_node_path24 = __toESM(require("path"), 1);
 var import_zod3 = require("zod");
 var CliHealthcheckHttpInputSchema = import_zod3.z.object({
   url: import_zod3.z.string().min(1, "healthcheck URL is required"),
-  timeout_seconds: import_zod3.z.number().positive().optional(),
-  timeoutSeconds: import_zod3.z.number().positive().optional()
-});
+  timeout_seconds: import_zod3.z.number().positive().optional()
+}).passthrough();
 var CliHealthcheckCommandInputSchema = import_zod3.z.object({
   command: import_zod3.z.string().min(1, "healthcheck command is required"),
   cwd: import_zod3.z.string().optional(),
-  timeout_seconds: import_zod3.z.number().positive().optional(),
-  timeoutSeconds: import_zod3.z.number().positive().optional()
-});
+  timeout_seconds: import_zod3.z.number().positive().optional()
+}).passthrough();
 var CliHealthcheckInputSchema = import_zod3.z.union([
   CliHealthcheckHttpInputSchema,
   CliHealthcheckCommandInputSchema
@@ -10623,36 +11066,28 @@ var CliTargetInputSchema = import_zod3.z.object({
   command: import_zod3.z.string(),
   // Files format - optional
   files_format: import_zod3.z.string().optional(),
-  filesFormat: import_zod3.z.string().optional(),
   attachments_format: import_zod3.z.string().optional(),
-  attachmentsFormat: import_zod3.z.string().optional(),
   // Working directory - optional
   cwd: import_zod3.z.string().optional(),
   // Workspace template directory - optional (mutually exclusive with cwd)
   workspace_template: import_zod3.z.string().optional(),
-  workspaceTemplate: import_zod3.z.string().optional(),
   // Timeout in seconds - optional
   timeout_seconds: import_zod3.z.number().positive().optional(),
-  timeoutSeconds: import_zod3.z.number().positive().optional(),
   // Healthcheck configuration - optional
   healthcheck: CliHealthcheckInputSchema.optional(),
   // Verbose mode - optional
   verbose: import_zod3.z.boolean().optional(),
   cli_verbose: import_zod3.z.boolean().optional(),
-  cliVerbose: import_zod3.z.boolean().optional(),
   // Keep temp files - optional
   keep_temp_files: import_zod3.z.boolean().optional(),
-  keepTempFiles: import_zod3.z.boolean().optional(),
   keep_output_files: import_zod3.z.boolean().optional(),
-  keepOutputFiles: import_zod3.z.boolean().optional(),
   // Common target fields
   grader_target: import_zod3.z.string().optional(),
   judge_target: import_zod3.z.string().optional(),
   // backward compat
   workers: import_zod3.z.number().int().min(1).optional(),
-  provider_batching: import_zod3.z.boolean().optional(),
-  providerBatching: import_zod3.z.boolean().optional()
-});
+  provider_batching: import_zod3.z.boolean().optional()
+}).passthrough();
 var CliHealthcheckHttpSchema = import_zod3.z.object({
   url: import_zod3.z.string().min(1),
   timeoutMs: import_zod3.z.number().positive().optional()
@@ -10677,7 +11112,7 @@ var CliTargetConfigSchema = import_zod3.z.object({
   keepTempFiles: import_zod3.z.boolean().optional()
 }).strict();
 function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
-  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutSeconds = input.timeout_seconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
   if ("url" in input && input.url) {
     const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
@@ -10696,11 +11131,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !import_node_path23.default.isAbsolute(cwd)) {
-    cwd = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
+    cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath));
+    cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
   }
   return {
     command,
@@ -10711,9 +11146,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
 function normalizeCliTargetInput(input, env, evalFilePath) {
   const targetName = input.name;
   const command = resolveString(input.command, env, `${targetName} CLI command`, true);
-  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormatSource = input.files_format ?? input.attachments_format;
   const filesFormat = resolveOptionalLiteralString(filesFormatSource);
-  const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
+  const workspaceTemplateSource = input.workspace_template;
   let workspaceTemplate = resolveOptionalString(
     workspaceTemplateSource,
     env,
@@ -10723,15 +11158,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !import_node_path23.default.isAbsolute(cwd)) {
-    cwd = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
+    cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -10739,14 +11174,12 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
     );
   }
   if (!cwd && !workspaceTemplate && evalFilePath) {
-    cwd = import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath));
+    cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
   }
-  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutSeconds = input.timeout_seconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
-  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
-  const keepTempFiles = resolveOptionalBoolean(
-    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
-  );
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
+  const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
   const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
   return {
     command,
@@ -10767,15 +11200,106 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
   "FILES",
   "OUTPUT_FILE"
 ]);
+var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
+  ["providerBatching", "provider_batching"],
+  ["subagentModeAllowed", "subagent_mode_allowed"],
+  ["fallbackTargets", "fallback_targets"],
+  ["resourceName", "endpoint"],
+  ["baseUrl", "base_url"],
+  ["apiKey", "api_key"],
+  ["deploymentName", "model"],
+  ["thinkingBudget", "thinking_budget"],
+  ["maxTokens", "max_output_tokens"],
+  ["apiFormat", "api_format"],
+  ["timeoutSeconds", "timeout_seconds"],
+  ["logDir", "log_dir"],
+  ["logDirectory", "log_directory"],
+  ["logFormat", "log_format"],
+  ["logOutputFormat", "log_output_format"],
+  ["systemPrompt", "system_prompt"],
+  ["maxTurns", "max_turns"],
+  ["maxBudgetUsd", "max_budget_usd"],
+  ["dryRun", "dry_run"],
+  ["subagentRoot", "subagent_root"],
+  ["filesFormat", "files_format"],
+  ["attachmentsFormat", "attachments_format"],
+  ["cliUrl", "cli_url"],
+  ["cliPath", "cli_path"],
+  ["githubToken", "github_token"],
+  ["sessionDir", "session_dir"],
+  ["sessionId", "session_id"],
+  ["sessionStateDir", "session_state_dir"],
+  ["maxRetries", "max_retries"],
+  ["retryInitialDelayMs", "retry_initial_delay_ms"],
+  ["retryMaxDelayMs", "retry_max_delay_ms"],
+  ["retryBackoffFactor", "retry_backoff_factor"],
+  ["retryStatusCodes", "retry_status_codes"]
+]);
+var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
+  ["timeoutSeconds", "timeout_seconds"]
+]);
+function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
+  if (typeof value !== "object" || value === null || Array.isArray(value)) {
+    return [];
+  }
+  const warnings = [];
+  for (const [camelCaseField, snakeCaseField] of aliases) {
+    if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
+      warnings.push({
+        location: `${location}.${camelCaseField}`,
+        message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
+      });
+    }
+  }
+  return warnings;
+}
+function assertNoDeprecatedCamelCaseTargetFields(definition) {
+  if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
+    throw new Error(
+      `${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
+    );
+  }
+  const warning = findDeprecatedCamelCaseTargetWarnings(
+    definition,
+    `target "${definition.name}"`
+  )[0];
+  if (!warning) {
+    return;
+  }
+  const fieldMatch = warning.message.match(/field '([^']+)'/);
+  const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
+  const field = fieldMatch?.[1] ?? "unknown";
+  const replacement = replacementMatch?.[1] ?? "snake_case";
+  throw new Error(
+    `${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
+  );
+}
+function findDeprecatedCamelCaseTargetWarnings(target, location) {
+  const warnings = collectDeprecatedCamelCaseWarnings(
+    target,
+    location,
+    DEPRECATED_TARGET_CAMEL_CASE_FIELDS
+  );
+  if (typeof target !== "object" || target === null || Array.isArray(target)) {
+    return warnings;
+  }
+  const healthcheck = target.healthcheck;
+  warnings.push(
+    ...collectDeprecatedCamelCaseWarnings(
+      healthcheck,
+      `${location}.healthcheck`,
+      DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
+    )
+  );
+  return warnings;
+}
 var COMMON_TARGET_SETTINGS = [
   "use_target",
   "provider_batching",
-  "providerBatching",
   "subagent_mode_allowed",
-  "subagentModeAllowed",
-  "fallback_targets",
-  "fallbackTargets"
+  "fallback_targets"
 ];
+var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
 var BASE_TARGET_SCHEMA = import_zod3.z.object({
   name: import_zod3.z.string().min(1, "target name is required"),
   provider: import_zod3.z.string().optional(),
@@ -10785,43 +11309,40 @@ var BASE_TARGET_SCHEMA = import_zod3.z.object({
   // backward compat
   workers: import_zod3.z.number().int().min(1).optional(),
   workspace_template: import_zod3.z.string().optional(),
-  workspaceTemplate: import_zod3.z.string().optional(),
   subagent_mode_allowed: import_zod3.z.boolean().optional(),
-  fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional(),
-  fallbackTargets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
+  fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
 }).passthrough();
 var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
+var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
 var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
-function normalizeAzureApiVersion(value) {
+function normalizeAzureApiVersion(value, apiFormat) {
+  const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
   if (!value) {
-    return DEFAULT_AZURE_API_VERSION;
+    return defaultVersion;
   }
   const trimmed = value.trim();
   if (trimmed.length === 0) {
-    return DEFAULT_AZURE_API_VERSION;
+    return defaultVersion;
   }
   const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
-  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+  return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
 }
 function resolveRetryConfig(target) {
-  const maxRetries = resolveOptionalNumber(
-    target.max_retries ?? target.maxRetries,
-    `${target.name} max retries`
-  );
+  const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
   const initialDelayMs = resolveOptionalNumber(
-    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
+    target.retry_initial_delay_ms,
     `${target.name} retry initial delay`
   );
   const maxDelayMs = resolveOptionalNumber(
-    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
+    target.retry_max_delay_ms,
     `${target.name} retry max delay`
   );
   const backoffFactor = resolveOptionalNumber(
-    target.retry_backoff_factor ?? target.retryBackoffFactor,
+    target.retry_backoff_factor,
     `${target.name} retry backoff factor`
   );
   const retryableStatusCodes = resolveOptionalNumberArray(
-    target.retry_status_codes ?? target.retryStatusCodes,
+    target.retry_status_codes,
     `${target.name} retry status codes`
   );
   if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
@@ -10835,9 +11356,56 @@ function resolveRetryConfig(target) {
     retryableStatusCodes
   };
 }
-function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
+function resolveDelegatedTargetDefinition(name, definitions, env = process.env) {
+  let definition = definitions.get(name);
+  if (!definition) {
+    return void 0;
+  }
+  const visited = [definition.name];
+  for (let depth = 0; depth < 10; depth++) {
+    const rawUseTarget = typeof definition.use_target === "string" ? definition.use_target.trim() : void 0;
+    if (!rawUseTarget) {
+      return definition;
+    }
+    const envMatch = rawUseTarget.match(USE_TARGET_ENV_PATTERN);
+    const envVarName = envMatch?.[1];
+    const resolvedName = envVarName ? env[envVarName]?.trim() ?? "" : rawUseTarget;
+    if (resolvedName.length === 0) {
+      if (envVarName) {
+        throw new Error(
+          `Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, but ${envVarName} is not set. Set ${envVarName} to the name of a concrete target (for example, "azure") before running the eval.`
+        );
+      }
+      throw new Error(
+        `Target "${definition.name}" has an empty use_target value. Point it at a concrete target name before running the eval.`
+      );
+    }
+    const next = definitions.get(resolvedName);
+    if (!next) {
+      if (envVarName) {
+        throw new Error(
+          `Target "${definition.name}" uses use_target: \${{ ${envVarName} }}, which resolved to "${resolvedName}", but no target named "${resolvedName}" exists.`
+        );
+      }
+      throw new Error(
+        `Target "${definition.name}" uses use_target: "${resolvedName}", but no target named "${resolvedName}" exists.`
+      );
+    }
+    if (visited.includes(next.name)) {
+      const chain = [...visited, next.name].join(" -> ");
+      throw new Error(`Circular use_target reference detected: ${chain}`);
+    }
+    definition = next;
+    visited.push(definition.name);
+  }
+  throw new Error(
+    `Target "${name}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
+  );
+}
+function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
+  assertNoDeprecatedCamelCaseTargetFields(definition);
   const parsed = BASE_TARGET_SCHEMA.parse(definition);
-  if (parsed.workspace_template !== void 0 || parsed.workspaceTemplate !== void 0) {
+  if (parsed.workspace_template !== void 0) {
     throw new Error(
       `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
     );
@@ -10853,13 +11421,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
     `${parsed.name} provider`,
     true
   ).toLowerCase();
-  const providerBatching = resolveOptionalBoolean(
-    parsed.provider_batching ?? parsed.providerBatching
-  );
-  const subagentModeAllowed = resolveOptionalBoolean(
-    parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
-  );
-  const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
+  const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
+  const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
+  const fallbackTargets = parsed.fallback_targets;
   const base = {
     name: parsed.name,
     graderTarget: parsed.grader_target ?? parsed.judge_target,
@@ -11009,20 +11573,22 @@ function normalizeOpenAIBaseUrl(value) {
   return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
 }
 function resolveAzureConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
+  const endpointSource = target.endpoint ?? target.resource;
+  const apiKeySource = target.api_key;
+  const deploymentSource = target.deployment ?? target.model;
   const versionSource = target.version ?? target.api_version;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
   const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
   const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
+  const apiFormat = resolveApiFormat(target, env, target.name);
   const version = normalizeAzureApiVersion(
     resolveOptionalString(versionSource, env, `${target.name} api version`, {
       allowLiteral: true,
       optionalEnv: true
-    })
+    }),
+    apiFormat
   );
   const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
   const maxOutputTokens = resolveOptionalNumber(
@@ -11035,13 +11601,17 @@ function resolveAzureConfig(target, env) {
     deploymentName,
     apiKey,
     version,
+    apiFormat,
     temperature,
     maxOutputTokens,
     retry
   };
 }
-function resolveApiFormat(target, targetName) {
-  const raw = target.api_format ?? target.apiFormat;
+function resolveApiFormat(target, env, targetName) {
+  const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
   if (raw === void 0) return void 0;
   if (raw === "chat" || raw === "responses") return raw;
   throw new Error(
@@ -11049,11 +11619,11 @@ function resolveApiFormat(target, targetName) {
   );
 }
 function resolveOpenAIConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const endpointSource = target.endpoint ?? target.base_url;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const baseURL = normalizeOpenAIBaseUrl(
     resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
       allowLiteral: true,
@@ -11067,17 +11637,17 @@ function resolveOpenAIConfig(target, env) {
     baseURL,
     apiKey,
     model,
-    apiFormat: resolveApiFormat(target, target.name),
+    apiFormat: resolveApiFormat(target, env, target.name),
     temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
     maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
     retry
   };
 }
 function resolveOpenRouterConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const retry = resolveRetryConfig(target);
   return {
     apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
@@ -11088,11 +11658,11 @@ function resolveOpenRouterConfig(target, env) {
   };
 }
 function resolveAnthropicConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
+  const maxTokensSource = target.max_output_tokens;
+  const thinkingBudgetSource = target.thinking_budget;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
   const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
   const retry = resolveRetryConfig(target);
@@ -11106,10 +11676,10 @@ function resolveAnthropicConfig(target, env) {
   };
 }
 function resolveGeminiConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
   const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
     allowLiteral: true,
@@ -11129,11 +11699,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
   const executableSource = target.executable ?? target.command ?? target.binary;
   const argsSource = target.args ?? target.arguments;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt;
   const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11156,8 +11726,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -11197,16 +11767,16 @@ function normalizeCodexLogFormat(value) {
   throw new Error("codex log format must be 'summary' or 'json'");
 }
 function resolveCopilotSdkConfig(target, env, evalFilePath) {
-  const cliUrlSource = target.cli_url ?? target.cliUrl;
-  const cliPathSource = target.cli_path ?? target.cliPath;
-  const githubTokenSource = target.github_token ?? target.githubToken;
+  const cliUrlSource = target.cli_url;
+  const cliPathSource = target.cli_path;
+  const githubTokenSource = target.github_token;
   const modelSource = target.model;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11241,8 +11811,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -11279,11 +11849,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
   const modelSource = target.model;
   const argsSource = target.args ?? target.arguments;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11306,8 +11876,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -11347,16 +11917,16 @@ function normalizeCopilotLogFormat(value) {
 }
 function resolvePiCodingAgentConfig(target, env, evalFilePath) {
   const subproviderSource = target.subprovider;
-  const modelSource = target.model ?? target.pi_model ?? target.piModel;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
-  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const modelSource = target.model ?? target.pi_model;
+  const apiKeySource = target.api_key;
+  const toolsSource = target.tools ?? target.pi_tools;
+  const thinkingSource = target.thinking ?? target.pi_thinking;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const subprovider = resolveOptionalString(
     subproviderSource,
     env,
@@ -11374,6 +11944,11 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
     allowLiteral: false,
     optionalEnv: true
   });
+  const baseUrlSource = target.base_url ?? target.endpoint;
+  const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
   const tools = resolveOptionalString(toolsSource, env, `${target.name} pi tools`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11395,8 +11970,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -11414,6 +11989,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
     subprovider,
     model,
     apiKey,
+    baseUrl,
     tools,
     thinking,
     cwd,
@@ -11427,16 +12003,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
 function resolvePiCliConfig(target, env, evalFilePath) {
   const executableSource = target.executable ?? target.command ?? target.binary;
   const subproviderSource = target.subprovider;
-  const modelSource = target.model ?? target.pi_model ?? target.piModel;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
-  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const modelSource = target.model ?? target.pi_model;
+  const apiKeySource = target.api_key;
+  const toolsSource = target.tools ?? target.pi_tools;
+  const thinkingSource = target.thinking ?? target.pi_thinking;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11455,6 +12031,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
     allowLiteral: false,
     optionalEnv: true
   });
+  const baseUrlSource = target.base_url ?? target.endpoint;
+  const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
   const tools = resolveOptionalString(toolsSource, env, `${target.name} pi-cli tools`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11475,8 +12056,8 @@ function resolvePiCliConfig(target, env, evalFilePath) {
     `${target.name} pi-cli workspace template`,
     { allowLiteral: true, optionalEnv: true }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
@@ -11493,6 +12074,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
     subprovider,
     model,
     apiKey,
+    baseUrl,
     tools,
     thinking,
     args,
@@ -11507,11 +12089,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
 function resolveClaudeConfig(target, env, evalFilePath) {
   const modelSource = target.model;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_LOG_FORMAT;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt;
   const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
     allowLiteral: true,
     optionalEnv: true
@@ -11529,8 +12111,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
       optionalEnv: true
     }
   );
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   if (cwd && workspaceTemplate) {
     throw new Error(
@@ -11544,8 +12126,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
   });
   const logFormat = normalizeClaudeLogFormat(logFormatSource);
   const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
-  const maxTurns = typeof target.max_turns === "number" ? target.max_turns : typeof target.maxTurns === "number" ? target.maxTurns : void 0;
-  const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : typeof target.maxBudgetUsd === "number" ? target.maxBudgetUsd : void 0;
+  const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
+  const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
   return {
     model,
     systemPrompt,
@@ -11576,9 +12158,7 @@ function resolveMockConfig(target) {
   return { response };
 }
 function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
-  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
-    target.workspace_template ?? target.workspaceTemplate
-  );
+  const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
   let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
     workspaceTemplateEnvVar,
     env,
@@ -11588,14 +12168,14 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
       optionalEnv: true
     }
   ) : void 0;
-  if (workspaceTemplate && evalFilePath && !import_node_path23.default.isAbsolute(workspaceTemplate)) {
-    workspaceTemplate = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), workspaceTemplate);
+  if (workspaceTemplate && evalFilePath && !import_node_path24.default.isAbsolute(workspaceTemplate)) {
+    workspaceTemplate = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), workspaceTemplate);
   }
   const executableSource = target.executable;
   const waitSource = target.wait;
-  const dryRunSource = target.dry_run ?? target.dryRun;
-  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const dryRunSource = target.dry_run;
+  const subagentRootSource = target.subagent_root;
+  const timeoutSource = target.timeout_seconds;
   const defaultCommand = insiders ? "code-insiders" : "code";
   const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
     allowLiteral: true,
@@ -11630,8 +12210,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path52 = firstError?.path.join(".") || "";
-    const prefix = path52 ? `${target.name} ${path52}: ` : `${target.name}: `;
+    const path53 = firstError?.path.join(".") || "";
+    const prefix = path53 ? `${target.name} ${path53}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -11646,17 +12226,17 @@ function resolveCliConfig(target, env, evalFilePath) {
 }
 function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
   const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
-  const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
+  const timeoutSeconds = target.timeout_seconds;
   const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
   let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
     allowLiteral: true,
     optionalEnv: true
   });
-  if (cwd && evalFilePath && !import_node_path23.default.isAbsolute(cwd)) {
-    cwd = import_node_path23.default.resolve(import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath)), cwd);
+  if (cwd && evalFilePath && !import_node_path24.default.isAbsolute(cwd)) {
+    cwd = import_node_path24.default.resolve(import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath)), cwd);
   }
   if (!cwd && evalFilePath) {
-    cwd = import_node_path23.default.dirname(import_node_path23.default.resolve(evalFilePath));
+    cwd = import_node_path24.default.dirname(import_node_path24.default.resolve(evalFilePath));
   }
   return {
     command,
@@ -11710,10 +12290,10 @@ function resolveDiscover(value, targetName) {
   throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
 }
 function resolveCopilotLogConfig(target, env) {
-  const sessionDirSource = target.session_dir ?? target.sessionDir;
-  const sessionIdSource = target.session_id ?? target.sessionId;
+  const sessionDirSource = target.session_dir;
+  const sessionIdSource = target.session_id;
   const discoverSource = target.discover;
-  const sessionStateDirSource = target.session_state_dir ?? target.sessionStateDir;
+  const sessionStateDirSource = target.session_state_dir;
   const cwdSource = target.cwd;
   return {
     sessionDir: resolveOptionalString(
@@ -11894,7 +12474,7 @@ var import_node_path33 = __toESM(require("path"), 1);
 init_cjs_shims();
 var import_node_fs11 = require("fs");
 var import_promises20 = require("fs/promises");
-var import_node_path24 = __toESM(require("path"), 1);
+var import_node_path25 = __toESM(require("path"), 1);
 async function pathExists(target) {
   try {
     await (0, import_promises20.access)(target, import_node_fs11.constants.F_OK);
@@ -11910,7 +12490,7 @@ async function readDirEntries(target) {
   const entries = await (0, import_promises20.readdir)(target, { withFileTypes: true });
   return entries.map((entry) => ({
     name: entry.name,
-    absolutePath: import_node_path24.default.join(target, entry.name),
+    absolutePath: import_node_path25.default.join(target, entry.name),
     isDirectory: entry.isDirectory()
   }));
 }
@@ -11926,9 +12506,9 @@ async function removeIfExists(target) {
 // src/evaluation/providers/vscode/utils/path.ts
 init_cjs_shims();
-var import_node_path25 = __toESM(require("path"), 1);
+var import_node_path26 = __toESM(require("path"), 1);
 function pathToFileUri2(filePath) {
-  const absolutePath = import_node_path25.default.isAbsolute(filePath) ? filePath : import_node_path25.default.resolve(filePath);
+  const absolutePath = import_node_path26.default.isAbsolute(filePath) ? filePath : import_node_path26.default.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -11938,7 +12518,7 @@ function pathToFileUri2(filePath) {
 // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
 init_cjs_shims();
-var import_node_path26 = __toESM(require("path"), 1);
+var import_node_path27 = __toESM(require("path"), 1);
 // src/evaluation/providers/vscode/utils/template.ts
 init_cjs_shims();
@@ -12032,8 +12612,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
   });
 }
 function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
-  const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path26.default.basename(file)}`).join("\n");
-  const responseList = responseFiles.map((file) => `"${import_node_path26.default.basename(file)}"`).join(", ");
+  const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path27.default.basename(file)}`).join("\n");
+  const responseList = responseFiles.map((file) => `"${import_node_path27.default.basename(file)}"`).join(", ");
   return renderTemplate2(templateContent, {
     requestFiles: requestLines,
     responseList
@@ -12043,7 +12623,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
 // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
 init_cjs_shims();
 var import_promises21 = require("fs/promises");
-var import_node_path27 = __toESM(require("path"), 1);
+var import_node_path28 = __toESM(require("path"), 1);
 // src/evaluation/providers/vscode/utils/time.ts
 init_cjs_shims();
@@ -12103,7 +12683,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
 }
 async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
   if (!silent) {
-    const fileList = responseFilesFinal.map((file) => import_node_path27.default.basename(file)).join(", ");
+    const fileList = responseFilesFinal.map((file) => import_node_path28.default.basename(file)).join(", ");
     console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
   }
   const deadline = Date.now() + timeoutMs;
@@ -12112,7 +12692,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
     while (pending.size > 0) {
       if (Date.now() >= deadline) {
         if (!silent) {
-          const remaining = [...pending].map((f) => import_node_path27.default.basename(f)).join(", ");
+          const remaining = [...pending].map((f) => import_node_path28.default.basename(f)).join(", ");
           console.error(
             `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
           );
@@ -12170,37 +12750,6 @@ var import_node_util2 = require("util");
 // src/evaluation/providers/vscode/dispatch/constants.ts
 init_cjs_shims();
 var import_node_path29 = __toESM(require("path"), 1);
-// src/paths.ts
-init_cjs_shims();
-var import_node_os6 = __toESM(require("os"), 1);
-var import_node_path28 = __toESM(require("path"), 1);
-var logged = false;
-function getAgentvHome() {
-  const envHome = process.env.AGENTV_HOME;
-  if (envHome && envHome !== "undefined") {
-    if (!logged) {
-      logged = true;
-      console.warn(`Using AGENTV_HOME: ${envHome}`);
-    }
-    return envHome;
-  }
-  return import_node_path28.default.join(import_node_os6.default.homedir(), ".agentv");
-}
-function getWorkspacesRoot() {
-  return import_node_path28.default.join(getAgentvHome(), "workspaces");
-}
-function getSubagentsRoot() {
-  return import_node_path28.default.join(getAgentvHome(), "subagents");
-}
-function getTraceStateRoot() {
-  return import_node_path28.default.join(getAgentvHome(), "trace-state");
-}
-function getWorkspacePoolRoot() {
-  return import_node_path28.default.join(getAgentvHome(), "workspace-pool");
-}
-// src/evaluation/providers/vscode/dispatch/constants.ts
 var DEFAULT_LOCK_NAME = "subagent.lock";
 var DEFAULT_ALIVE_FILENAME = ".alive";
 function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -13353,6 +13902,15 @@ var AGENT_PROVIDER_KINDS = [
   "vscode",
   "vscode-insiders"
 ];
+var LLM_GRADER_CAPABLE_KINDS = [
+  "openai",
+  "openrouter",
+  "azure",
+  "anthropic",
+  "gemini",
+  "agentv",
+  "mock"
+];
 function extractLastAssistantContent(messages) {
   if (!messages || messages.length === 0) {
     return "";
@@ -13506,9 +14064,10 @@ init_cjs_shims();
 // src/evaluation/evaluators/scoring.ts
 init_cjs_shims();
-var PASS_THRESHOLD = 0.8;
-function scoreToVerdict(score) {
-  return score >= PASS_THRESHOLD ? "pass" : "fail";
+var DEFAULT_THRESHOLD = 0.8;
+var PASS_THRESHOLD = DEFAULT_THRESHOLD;
+function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
+  return score >= threshold ? "pass" : "fail";
 }
 function clampScore(value) {
   if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -13699,13 +14258,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
 async function execShellWithStdin(command, stdinPayload, options = {}) {
   const { mkdir: mkdir17, readFile: readFile17, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
   const { tmpdir: tmpdir3 } = await import("os");
-  const path52 = await import("path");
+  const path53 = await import("path");
   const { randomUUID: randomUUID10 } = await import("crypto");
-  const dir = path52.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
+  const dir = path53.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
   await mkdir17(dir, { recursive: true });
-  const stdinPath = path52.join(dir, "stdin.txt");
-  const stdoutPath = path52.join(dir, "stdout.txt");
-  const stderrPath = path52.join(dir, "stderr.txt");
+  const stdinPath = path53.join(dir, "stdin.txt");
+  const stdoutPath = path53.join(dir, "stdout.txt");
+  const stderrPath = path53.join(dir, "stderr.txt");
   await writeFile9(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn5 } = await import("child_process");
@@ -14907,7 +15466,7 @@ ${outputSchema}`;
     parts.push("[[ ## scoring_criteria ## ]]");
     for (const rubric of rubrics) {
       const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
-      const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
+      const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
       parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
       if (rubric.outcome) {
         parts.push(`Description: ${rubric.outcome}`);
@@ -14961,54 +15520,106 @@ ${outputSchema}`;
   async runWithRetry(options) {
     const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
     let lastError;
+    let lastInvalidResponse;
+    let shouldAttemptStructureFix = false;
     for (let attempt = 1; attempt <= 3; attempt++) {
       try {
-        const model = graderProvider.asLanguageModel?.();
-        if (model) {
-          const modelOptions = {
-            ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
-            ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
-          };
-          const hasImages = images && images.length > 0;
-          const result = hasImages ? await (0, import_ai2.generateText)({
-            model,
-            system: systemPrompt,
-            messages: [
-              {
-                role: "user",
-                content: [
-                  { type: "text", text: userPrompt },
-                  ...toAiSdkImageParts(images)
-                ]
-              }
-            ],
-            ...modelOptions
-          }) : await (0, import_ai2.generateText)({
-            model,
-            system: systemPrompt,
-            prompt: userPrompt,
-            ...modelOptions
-          });
-          const data2 = schema.parse(parseJsonFromText(result.text));
-          const rawUsage = result.usage;
-          const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
-          return { data: data2, tokenUsage };
+        const result = await this.generateStructuredResponse({
+          context: context2,
+          graderProvider,
+          systemPrompt,
+          userPrompt,
+          images
+        });
+        const canRepairResponse = result.text.trim().length > 0;
+        lastInvalidResponse = canRepairResponse ? result : void 0;
+        let data;
+        try {
+          data = schema.parse(parseJsonFromText(result.text));
+        } catch (e) {
+          lastError = e instanceof Error ? e : new Error(String(e));
+          shouldAttemptStructureFix = canRepairResponse;
+          continue;
         }
-        const response = await graderProvider.invoke({
-          question: userPrompt,
+        return {
+          data,
+          providerResponse: result.providerResponse,
+          tokenUsage: result.tokenUsage
+        };
+      } catch (e) {
+        lastError = e instanceof Error ? e : new Error(String(e));
+      }
+    }
+    if (shouldAttemptStructureFix && lastInvalidResponse) {
+      try {
+        const repaired = await this.generateStructuredResponse({
+          context: context2,
+          graderProvider,
           systemPrompt,
-          evalCaseId: context2.evalCase.id,
-          attempt: context2.attempt,
-          maxOutputTokens: this.maxOutputTokens,
-          temperature: this.temperature
+          userPrompt: buildStructureRepairPrompt({
+            validationError: lastError?.message ?? "Schema validation failed",
+            invalidResponse: lastInvalidResponse.text
+          })
         });
-        const data = schema.parse(parseJsonFromText(extractLastAssistantContent(response.output)));
-        return { data, providerResponse: response, tokenUsage: response.tokenUsage };
+        const data = schema.parse(parseJsonFromText(repaired.text));
+        return {
+          data,
+          providerResponse: repaired.providerResponse,
+          tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
+        };
       } catch (e) {
         lastError = e instanceof Error ? e : new Error(String(e));
       }
     }
-    throw new Error(`Failed to parse evaluator response after 3 attempts: ${lastError?.message}`);
+    throw new Error(
+      `Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
+    );
+  }
+  async generateStructuredResponse(options) {
+    const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
+    const model = graderProvider.asLanguageModel?.();
+    if (model) {
+      const modelOptions = {
+        ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
+        ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
+      };
+      const hasImages = images && images.length > 0;
+      const result = hasImages ? await (0, import_ai2.generateText)({
+        model,
+        system: systemPrompt,
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: userPrompt },
+              ...toAiSdkImageParts(images)
+            ]
+          }
+        ],
+        ...modelOptions
+      }) : await (0, import_ai2.generateText)({
+        model,
+        system: systemPrompt,
+        prompt: userPrompt,
+        ...modelOptions
+      });
+      const rawUsage = result.usage;
+      const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
+      return { text: result.text, tokenUsage };
+    }
+    const response = await graderProvider.invoke({
+      question: userPrompt,
+      systemPrompt,
+      evalCaseId: context2.evalCase.id,
+      attempt: context2.attempt,
+      maxOutputTokens: this.maxOutputTokens,
+      temperature: this.temperature
+    });
+    return {
+      text: extractLastAssistantContent(response.output),
+      providerResponse: response,
+      tokenUsage: response.tokenUsage
+    };
   }
 };
 function buildOutputSchema() {
@@ -15028,6 +15639,29 @@ function buildOutputSchema() {
     "}"
   ].join("\n");
 }
+function buildStructureRepairPrompt(options) {
+  const { validationError, invalidResponse } = options;
+  return [
+    "The following evaluation response has useful grading content but invalid JSON structure.",
+    "Repair it to satisfy the schema in the system prompt.",
+    "Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
+    "",
+    "Validation error:",
+    validationError,
+    "",
+    "Invalid response:",
+    invalidResponse
+  ].join("\n");
+}
+function sumTokenUsage(first, second) {
+  if (!first && !second) {
+    return void 0;
+  }
+  return {
+    input: (first?.input ?? 0) + (second?.input ?? 0),
+    output: (first?.output ?? 0) + (second?.output ?? 0)
+  };
+}
 function buildRubricOutputSchema() {
   return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
 You must return a valid JSON object matching this schema:
@@ -15127,19 +15761,21 @@ function calculateScoreRangeResult(result, rubrics) {
     rawScores[rubric.id] = rawScore;
     totalWeight += rubric.weight;
     weightedScoreSum += normalizedScore * rubric.weight;
-    let requiredMinScore;
-    if (rubric.required_min_score !== void 0) {
-      requiredMinScore = rubric.required_min_score;
+    let minScoreThreshold;
+    if (rubric.min_score !== void 0) {
+      minScoreThreshold = rubric.min_score;
+    } else if (rubric.required_min_score !== void 0) {
+      minScoreThreshold = rubric.required_min_score / 10;
     } else if (rubric.required === true) {
-      requiredMinScore = 10;
+      minScoreThreshold = 1;
     }
     const matchingRange = rubric.score_ranges?.find(
       (r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
     );
     const rangeDescription = matchingRange?.outcome ?? "";
     const criterionLabel = rubric.outcome ?? rubric.id;
-    const passed = !(requiredMinScore !== void 0 && rawScore < requiredMinScore) && rawScore >= 7;
-    if (requiredMinScore !== void 0 && rawScore < requiredMinScore) {
+    const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
+    if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
       failedRequired = true;
     }
     assertions.push({
@@ -15216,11 +15852,11 @@ function createFilesystemTools(workspacePath) {
       execute: async (input) => {
         try {
           const resolved = resolveSandboxed(workspacePath, input.path);
-          const stat10 = await import_promises29.default.stat(resolved);
-          if (stat10.isDirectory()) {
+          const stat11 = await import_promises29.default.stat(resolved);
+          if (stat11.isDirectory()) {
             return { error: `'${input.path}' is a directory, not a file` };
           }
-          const buffer = Buffer.alloc(Math.min(stat10.size, MAX_FILE_SIZE));
+          const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
           const fd = await import_promises29.default.open(resolved, "r");
           try {
             await fd.read(buffer, 0, buffer.length, 0);
@@ -15228,8 +15864,8 @@ function createFilesystemTools(workspacePath) {
             await fd.close();
           }
           const content = buffer.toString("utf-8");
-          const truncated = stat10.size > MAX_FILE_SIZE;
-          return { content, truncated, size: stat10.size };
+          const truncated = stat11.size > MAX_FILE_SIZE;
+          return { content, truncated, size: stat11.size };
         } catch (error) {
           return { error: error instanceof Error ? error.message : String(error) };
         }
@@ -15280,8 +15916,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
       const ext = import_node_path39.default.extname(entry.name).toLowerCase();
       if (BINARY_EXTENSIONS.has(ext)) continue;
       try {
-        const stat10 = await import_promises29.default.stat(fullPath);
-        if (stat10.size > MAX_FILE_SIZE) continue;
+        const stat11 = await import_promises29.default.stat(fullPath);
+        if (stat11.size > MAX_FILE_SIZE) continue;
         const content = await import_promises29.default.readFile(fullPath, "utf-8");
         const lines = content.split("\n");
         for (let i = 0; i < lines.length; i++) {
@@ -15925,115 +16561,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path52, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path52);
-    const expectedValue = resolvePath(expectedData, path52);
+    const { path: path53, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path53);
+    const expectedValue = resolvePath(expectedData, path53);
     if (expectedValue === void 0) {
       return {
-        path: path52,
+        path: path53,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path52}: no expected value`
+        message: `${path53}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path52,
+          path: path53,
           score: 0,
           weight,
           hit: false,
-          message: `${path52} (required, missing)`
+          message: `${path53} (required, missing)`
         };
       }
       return {
-        path: path52,
+        path: path53,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path52}: optional field missing`
+        message: `${path53}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path52, candidateValue, expectedValue, weight);
+        return this.compareExact(path53, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path52,
+          path53,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path52, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path52,
+          path: path53,
           score: 0,
           weight,
           hit: false,
-          message: `${path52}: unknown match type "${match}"`
+          message: `${path53}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path52, candidateValue, expectedValue, weight) {
+  compareExact(path53, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path52,
+        path: path53,
         score: 1,
         weight,
         hit: true,
-        message: path52
+        message: path53
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path52,
+        path: path53,
         score: 0,
         weight,
         hit: false,
-        message: `${path52} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path52,
+      path: path53,
       score: 0,
       weight,
       hit: false,
-      message: `${path52} (value mismatch)`
+      message: `${path53} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path52, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber(candidateValue);
     const expectedNum = toNumber(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path52,
+        path: path53,
         score: 0,
         weight,
         hit: false,
-        message: `${path52} (non-numeric value)`
+        message: `${path53} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path52,
+        path: path53,
         score: 0,
         weight,
         hit: false,
-        message: `${path52} (invalid numeric value)`
+        message: `${path53} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -16046,61 +16682,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path52,
+        path: path53,
         score: 1,
         weight,
         hit: true,
-        message: `${path52} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path52,
+      path: path53,
       score: 0,
       weight,
       hit: false,
-      message: `${path52} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path52, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path52,
+        path: path53,
         score: 0,
         weight,
         hit: false,
-        message: `${path52} (unparseable candidate date)`
+        message: `${path53} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path52,
+        path: path53,
         score: 0,
         weight,
         hit: false,
-        message: `${path52} (unparseable expected date)`
+        message: `${path53} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path52,
+        path: path53,
         score: 1,
         weight,
         hit: true,
-        message: path52
+        message: path53
       };
     }
     return {
-      path: path52,
+      path: path53,
       score: 0,
       weight,
       hit: false,
-      message: `${path52} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -16133,11 +16769,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path52) {
-  if (!path52 || !obj) {
+function resolvePath(obj, path53) {
+  if (!path53 || !obj) {
     return void 0;
   }
-  const parts = path52.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -16634,8 +17270,8 @@ var TokenUsageEvaluator = class {
 // src/evaluation/evaluators/tool-trajectory.ts
 init_cjs_shims();
-function getNestedValue(obj, path52) {
-  const parts = path52.split(".");
+function getNestedValue(obj, path53) {
+  const parts = path53.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -18428,7 +19064,7 @@ var WorkspacePoolManager = class {
   }
   /**
    * Reset an existing slot for reuse:
-   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
    * 2. Re-copy template files (skip repo directories)
    */
   async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
@@ -18441,7 +19077,17 @@ var WorkspacePoolManager = class {
         continue;
       }
       const ref = repo.checkout?.ref ?? "HEAD";
-      await git(["reset", "--hard", ref], { cwd: repoDir });
+      const resolve = repo.checkout?.resolve ?? "remote";
+      if (resolve === "remote") {
+        const fetchArgs = ["fetch", "origin", ref];
+        if (repo.clone?.depth) {
+          fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
+        }
+        await git(fetchArgs, { cwd: repoDir });
+        await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
+      } else {
+        await git(["reset", "--hard", ref], { cwd: repoDir });
+      }
       const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
       await git(["clean", cleanFlag], { cwd: repoDir });
     }
@@ -18741,7 +19387,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
 }
 // src/evaluation/orchestrator.ts
-function classifyQualityStatus(score, threshold = PASS_THRESHOLD) {
+function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
   return score >= threshold ? "ok" : "quality_failure";
 }
 function buildSkippedEvaluatorError(scores) {
@@ -18833,7 +19479,7 @@ async function runEvaluation(options) {
   const filteredEvalCases = filterEvalCases(evalCases, filter);
   if (filteredEvalCases.length === 0) {
     if (filter) {
-      throw new Error(`No tests matched filter '${filter}' in ${evalFilePath}`);
+      throw new Error(`No tests matched filter '${formatFilter(filter)}' in ${evalFilePath}`);
     }
     return [];
   }
@@ -18859,20 +19505,10 @@ async function runEvaluation(options) {
     if (resolvedTargetsByName.has(name)) {
       return resolvedTargetsByName.get(name);
     }
-    let definition = targetDefinitions.get(name);
+    const definition = resolveDelegatedTargetDefinition(name, targetDefinitions, envLookup);
     if (!definition) {
       return void 0;
     }
-    for (let depth = 0; depth < 5; depth++) {
-      const useTarget = definition.use_target;
-      if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
-      const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
-      const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
-      if (resolvedName.length === 0) break;
-      const next = targetDefinitions.get(resolvedName);
-      if (!next) break;
-      definition = next;
-    }
     const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
     resolvedTargetsByName.set(name, resolved);
     return resolved;
@@ -18895,6 +19531,9 @@ async function runEvaluation(options) {
     const graderName = targetContext.graderTarget ?? targetContext.name;
     const resolvedGrader = resolveTargetByName(graderName);
     if (!resolvedGrader) {
+      if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
+        return void 0;
+      }
       return getOrCreateProvider(targetContext);
     }
     return getOrCreateProvider(resolvedGrader);
@@ -19225,7 +19864,7 @@ async function runEvaluation(options) {
           const budgetResult = {
             timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
             testId: evalCase.id,
-            dataset: evalCase.dataset,
+            suite: evalCase.suite,
             category: evalCase.category,
             score: 0,
             assertions: [],
@@ -19262,7 +19901,7 @@ async function runEvaluation(options) {
           const haltResult = {
             timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
             testId: evalCase.id,
-            dataset: evalCase.dataset,
+            suite: evalCase.suite,
             category: evalCase.category,
             score: 0,
             assertions: [],
@@ -19574,7 +20213,7 @@ async function runBatchEvaluation(options) {
         targetResolver,
         availableTargets,
         verbose,
-        threshold: batchThreshold
+        threshold: evalCase.threshold ?? batchThreshold
       });
       if (providerError) {
         result = {
@@ -20036,8 +20675,9 @@ async function runEvalCase(options) {
       fileChanges,
       workspacePath,
       verbose,
-      threshold: caseThreshold
+      threshold: evalCase.threshold ?? caseThreshold
     });
+    const effectiveThreshold = evalCase.threshold ?? caseThreshold;
     const totalDurationMs = Date.now() - caseStartMs;
     const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
     const evalRunTokenUsage = tokenUsage || graderTokens ? {
@@ -20051,7 +20691,7 @@ async function runEvalCase(options) {
       ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
     };
     const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
-    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
+    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
     const targetUsedField = targetUsed ? { targetUsed } : {};
     const finalResult = providerError ? {
       ...result,
@@ -20252,7 +20892,8 @@ async function evaluateCandidate(options) {
     targetResolver,
     availableTargets,
     fileChanges,
-    workspacePath
+    workspacePath,
+    threshold: evalThreshold
   });
   const completedAt = nowFn();
   let agentRequest;
@@ -20283,7 +20924,7 @@ async function evaluateCandidate(options) {
   return {
     timestamp: completedAt.toISOString(),
     testId: evalCase.id,
-    dataset: evalCase.dataset,
+    suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
     score: score.score,
@@ -20326,7 +20967,8 @@ async function runEvaluatorsForCase(options) {
     targetResolver,
     availableTargets,
     fileChanges,
-    workspacePath
+    workspacePath,
+    threshold
   } = options;
   if (evalCase.assertions && evalCase.assertions.length > 0) {
     return runEvaluatorList({
@@ -20352,7 +20994,8 @@ async function runEvaluatorsForCase(options) {
       targetResolver,
       availableTargets,
       fileChanges,
-      workspacePath
+      workspacePath,
+      threshold
     });
   }
   const evaluatorKind = evalCase.evaluator ?? "llm-grader";
@@ -20454,7 +21097,8 @@ async function runEvaluatorList(options) {
         name: evaluatorConfig.name,
         type: evaluatorConfig.type,
         weight,
-        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
+        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
+        ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
       });
       scores.push({
         name: evaluatorConfig.name,
@@ -20489,7 +21133,8 @@ async function runEvaluatorList(options) {
         name: evaluatorConfig.name ?? "unknown",
         type: evaluatorConfig.type ?? "llm-grader",
         weight,
-        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
+        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
+        ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
       });
       scores.push({
         name: evaluatorConfig.name ?? "unknown",
@@ -20523,9 +21168,10 @@ async function runEvaluatorList(options) {
       }
     }
   }
+  const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
   const hasRequiredFailure = scored.some((entry) => {
     if (!entry.required) return false;
-    const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
+    const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
     return entry.score.score < minScore;
   });
   const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
@@ -20536,17 +21182,23 @@ async function runEvaluatorList(options) {
   const expectedAspectCount = assertions.length || 1;
   const score = {
     score: aggregateScore,
-    verdict: scoreToVerdict(aggregateScore),
+    verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
     assertions,
     expectedAspectCount
   };
   return { score, scores };
 }
+function formatFilter(filter) {
+  return typeof filter === "string" ? filter : filter.join(", ");
+}
+function matchesFilter3(id, filter) {
+  return typeof filter === "string" ? import_micromatch3.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch3.default.isMatch(id, pattern));
+}
 function filterEvalCases(evalCases, filter) {
   if (!filter) {
     return evalCases;
   }
-  return evalCases.filter((evalCase) => import_micromatch3.default.isMatch(evalCase.id, filter));
+  return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter));
 }
 function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
   const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
@@ -20633,7 +21285,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
   return {
     timestamp: timestamp.toISOString(),
     testId: evalCase.id,
-    dataset: evalCase.dataset,
+    suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
     score: 0,
@@ -20907,6 +21559,7 @@ async function evaluate(config) {
     verbose: config.verbose,
     maxConcurrency: config.workers ?? 3,
     filter: config.filter,
+    threshold: config.threshold,
     evalCases,
     onResult: async (result) => {
       collectedResults.push(result);
@@ -20917,19 +21570,19 @@ async function evaluate(config) {
   const durationMs = Date.now() - startTime;
   return {
     results: allResults,
-    summary: computeSummary(allResults, durationMs)
+    summary: computeSummary(allResults, durationMs, config.threshold)
   };
 }
 function mapAssertionType(type) {
   return type.replace(/_/g, "-");
 }
-function computeSummary(results, durationMs) {
+function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
   const total = results.length;
   let passed = 0;
   let scoreSum = 0;
   for (const r of results) {
     scoreSum += r.score;
-    if (r.score >= PASS_THRESHOLD) {
+    if (r.score >= threshold) {
       passed++;
     }
   }
@@ -20960,7 +21613,7 @@ async function discoverDefaultTarget(repoRoot) {
   return null;
 }
 async function loadEnvHierarchy(repoRoot, startPath) {
-  const { readFileSync: readFileSync3 } = await import("fs");
+  const { readFileSync: readFileSync4 } = await import("fs");
   const chain = buildDirectoryChain2(startPath, repoRoot);
   const envFiles = [];
   for (const dir of chain) {
@@ -20969,7 +21622,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
   }
   for (let i = 0; i < envFiles.length; i++) {
     try {
-      const content = readFileSync3(envFiles[i], "utf8");
+      const content = readFileSync4(envFiles[i], "utf8");
       for (const line of content.split("\n")) {
         const trimmed = line.trim();
         if (!trimmed || trimmed.startsWith("#")) continue;
@@ -21043,7 +21696,7 @@ var CONFIG_FILE_NAMES = [
 ];
 async function loadTsConfig(projectRoot) {
   const { existsSync: existsSync7 } = await import("fs");
-  const { pathToFileURL } = await import("url");
+  const { pathToFileURL: pathToFileURL2 } = await import("url");
   const { join: join2 } = await import("path");
   for (const fileName of CONFIG_FILE_NAMES) {
     const filePath = join2(projectRoot, fileName);
@@ -21051,7 +21704,7 @@ async function loadTsConfig(projectRoot) {
       continue;
     }
     try {
-      const fileUrl = pathToFileURL(filePath).href;
+      const fileUrl = pathToFileURL2(filePath).href;
       const mod = await import(fileUrl);
       const config = mod.default ?? mod;
       return AgentVConfigSchema.parse(config);
@@ -21492,7 +22145,7 @@ var OtelTraceExporter = class {
         rootSpan.setAttribute("gen_ai.system", "agentv");
         rootSpan.setAttribute("agentv.test_id", result.testId);
         rootSpan.setAttribute("agentv.target", result.target);
-        if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
+        if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
         rootSpan.setAttribute("agentv.score", result.score);
         if (captureContent && result.output.length > 0) {
           const lastMsg = result.output[result.output.length - 1];
@@ -21701,7 +22354,7 @@ var OtelStreamingObserver = class {
     this.rootSpan.setAttribute("gen_ai.system", "agentv");
     this.rootSpan.setAttribute("agentv.test_id", testId);
     this.rootSpan.setAttribute("agentv.target", target);
-    if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
+    if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
     this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
   }
   /** Create and immediately export a tool span */
@@ -22057,12 +22710,244 @@ function extractToolResultContent(content) {
   return parts.length > 0 ? parts.join("") : void 0;
 }
-// src/import/session-discovery.ts
+// src/import/codex-parser.ts
+init_cjs_shims();
+function parseCodexSession(jsonl) {
+  const messages = [];
+  let sessionId = "";
+  let cwd;
+  let model;
+  let version;
+  let startTimestamp;
+  let endTimestamp;
+  const pendingCalls = /* @__PURE__ */ new Map();
+  const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
+  for (const line of lines) {
+    let entry;
+    try {
+      entry = JSON.parse(line);
+    } catch {
+      continue;
+    }
+    if (!entry.type) continue;
+    if (entry.timestamp) {
+      if (!startTimestamp) startTimestamp = entry.timestamp;
+      endTimestamp = entry.timestamp;
+    }
+    const payload = entry.payload ?? {};
+    switch (entry.type) {
+      case "session_meta": {
+        sessionId = String(payload.id ?? "");
+        cwd = payload.cwd ? String(payload.cwd) : void 0;
+        version = payload.cli_version ? String(payload.cli_version) : void 0;
+        if (payload.model && !model) {
+          model = String(payload.model);
+        }
+        break;
+      }
+      case "turn_context": {
+        if (payload.model && !model) {
+          model = String(payload.model);
+        }
+        if (payload.cwd && !cwd) {
+          cwd = String(payload.cwd);
+        }
+        break;
+      }
+      case "response_item": {
+        const itemType = String(payload.type ?? "");
+        const role = String(payload.role ?? "");
+        switch (itemType) {
+          case "message": {
+            if (role === "developer") break;
+            const content = extractResponseItemContent(payload.content);
+            if (role === "user" && content) {
+              messages.push({ role: "user", content });
+            } else if (role === "assistant" && content) {
+              messages.push({ role: "assistant", content });
+            }
+            break;
+          }
+          case "function_call": {
+            const toolName = String(payload.name ?? "");
+            const callId = String(payload.call_id ?? "");
+            let input;
+            if (typeof payload.arguments === "string") {
+              try {
+                input = JSON.parse(payload.arguments);
+              } catch {
+                input = payload.arguments;
+              }
+            } else {
+              input = payload.arguments;
+            }
+            const toolCall = { tool: toolName, input, id: callId };
+            const msgIdx = messages.length;
+            messages.push({
+              role: "assistant",
+              toolCalls: [toolCall]
+            });
+            if (callId) {
+              pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
+            }
+            break;
+          }
+          case "custom_tool_call": {
+            const toolName = String(payload.name ?? "");
+            const callId = String(payload.call_id ?? "");
+            let input;
+            if (typeof payload.arguments === "string") {
+              try {
+                input = JSON.parse(payload.arguments);
+              } catch {
+                input = payload.arguments;
+              }
+            } else {
+              input = payload.arguments;
+            }
+            const toolCall = { tool: toolName, input, id: callId };
+            const msgIdx = messages.length;
+            messages.push({
+              role: "assistant",
+              toolCalls: [toolCall]
+            });
+            if (callId) {
+              pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
+            }
+            break;
+          }
+          case "function_call_output":
+          case "custom_tool_call_output": {
+            const callId = String(payload.call_id ?? "");
+            const pending = pendingCalls.get(callId);
+            if (pending) {
+              const existingMsg = messages[pending.msgIdx];
+              const existingCalls = [...existingMsg.toolCalls ?? []];
+              existingCalls[pending.toolIdx] = {
+                ...existingCalls[pending.toolIdx],
+                output: payload.output
+              };
+              messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
+              pendingCalls.delete(callId);
+            }
+            break;
+          }
+          // Skip reasoning blocks (thinking tokens)
+          case "reasoning":
+            break;
+        }
+        break;
+      }
+    }
+  }
+  let durationMs;
+  if (startTimestamp && endTimestamp) {
+    durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
+  }
+  const source = {
+    provider: "codex",
+    sessionId,
+    cwd,
+    startedAt: startTimestamp,
+    model,
+    version
+  };
+  return {
+    messages,
+    source,
+    // Codex rollout files don't include token counts (only rate limit info)
+    tokenUsage: void 0,
+    durationMs,
+    costUsd: null
+  };
+}
+function extractResponseItemContent(content) {
+  if (typeof content === "string") return content;
+  if (!Array.isArray(content)) return void 0;
+  const parts = [];
+  for (const block of content) {
+    if (typeof block === "object" && block !== null) {
+      const b = block;
+      if (typeof b.text === "string") {
+        parts.push(b.text);
+      }
+    }
+  }
+  return parts.length > 0 ? parts.join("") : void 0;
+}
+// src/import/codex-session-discovery.ts
 init_cjs_shims();
 var import_promises36 = require("fs/promises");
 var import_node_os8 = require("os");
 var import_node_path53 = __toESM(require("path"), 1);
-var DEFAULT_PROJECTS_DIR = () => import_node_path53.default.join((0, import_node_os8.homedir)(), ".claude", "projects");
+var DEFAULT_SESSIONS_DIR = () => import_node_path53.default.join((0, import_node_os8.homedir)(), ".codex", "sessions");
+async function discoverCodexSessions(opts) {
+  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
+  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
+  const sessions = [];
+  let yearDirs;
+  try {
+    yearDirs = await (0, import_promises36.readdir)(sessionsDir);
+  } catch {
+    return [];
+  }
+  for (const year of yearDirs) {
+    const yearPath = import_node_path53.default.join(sessionsDir, year);
+    let monthDirs;
+    try {
+      monthDirs = await (0, import_promises36.readdir)(yearPath);
+    } catch {
+      continue;
+    }
+    for (const month of monthDirs) {
+      const monthPath = import_node_path53.default.join(yearPath, month);
+      let dayDirs;
+      try {
+        dayDirs = await (0, import_promises36.readdir)(monthPath);
+      } catch {
+        continue;
+      }
+      for (const day of dayDirs) {
+        if (opts?.date) {
+          const dirDate = `${year}-${month}-${day}`;
+          if (dirDate !== opts.date) continue;
+        }
+        const dayPath = import_node_path53.default.join(monthPath, day);
+        let files;
+        try {
+          files = await (0, import_promises36.readdir)(dayPath);
+        } catch {
+          continue;
+        }
+        for (const file of files) {
+          if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
+          const filePath = import_node_path53.default.join(dayPath, file);
+          const nameWithoutExt = file.replace(/\.jsonl$/, "");
+          const parts = nameWithoutExt.split("-");
+          const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
+          let updatedAt;
+          try {
+            const fileStat = await (0, import_promises36.stat)(filePath);
+            updatedAt = fileStat.mtime;
+          } catch {
+            updatedAt = /* @__PURE__ */ new Date(0);
+          }
+          sessions.push({ sessionId, filePath, filename: file, updatedAt });
+        }
+      }
+    }
+  }
+  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
+  return sessions.slice(0, limit);
+}
+// src/import/session-discovery.ts
+init_cjs_shims();
+var import_promises37 = require("fs/promises");
+var import_node_os9 = require("os");
+var import_node_path54 = __toESM(require("path"), 1);
+var DEFAULT_PROJECTS_DIR = () => import_node_path54.default.join((0, import_node_os9.homedir)(), ".claude", "projects");
 function encodeProjectPath(projectPath) {
   return projectPath.replace(/\//g, "-");
 }
@@ -22071,7 +22956,7 @@ async function discoverClaudeSessions(opts) {
   const limit = opts?.latest ? 1 : opts?.limit ?? 10;
   let projectDirs;
   try {
-    projectDirs = await (0, import_promises36.readdir)(projectsDir);
+    projectDirs = await (0, import_promises37.readdir)(projectsDir);
   } catch {
     return [];
   }
@@ -22081,10 +22966,10 @@ async function discoverClaudeSessions(opts) {
   }
   const sessions = [];
   for (const projectDir of projectDirs) {
-    const dirPath = import_node_path53.default.join(projectsDir, projectDir);
+    const dirPath = import_node_path54.default.join(projectsDir, projectDir);
     let entries;
     try {
-      entries = await (0, import_promises36.readdir)(dirPath);
+      entries = await (0, import_promises37.readdir)(dirPath);
     } catch {
       continue;
     }
@@ -22092,10 +22977,10 @@ async function discoverClaudeSessions(opts) {
       if (!entry.endsWith(".jsonl")) continue;
       const sessionId = entry.replace(/\.jsonl$/, "");
       if (opts?.sessionId && sessionId !== opts.sessionId) continue;
-      const filePath = import_node_path53.default.join(dirPath, entry);
+      const filePath = import_node_path54.default.join(dirPath, entry);
       let updatedAt;
       try {
-        const fileStat = await (0, import_promises36.stat)(filePath);
+        const fileStat = await (0, import_promises37.stat)(filePath);
         updatedAt = fileStat.mtime;
       } catch {
         updatedAt = /* @__PURE__ */ new Date(0);
@@ -22112,13 +22997,91 @@ async function discoverClaudeSessions(opts) {
   return sessions.slice(0, limit);
 }
+// src/import/transcript-provider.ts
+init_cjs_shims();
 // src/import/types.ts
 init_cjs_shims();
-var import_promises37 = require("fs/promises");
+var import_promises38 = require("fs/promises");
+function toTranscriptJsonLine(entry) {
+  const firstUserMessage = entry.messages.find((m) => m.role === "user");
+  const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
+  return {
+    input,
+    output: entry.messages,
+    token_usage: entry.tokenUsage ? {
+      input: entry.tokenUsage.input,
+      output: entry.tokenUsage.output,
+      cached: entry.tokenUsage.cached
+    } : void 0,
+    duration_ms: entry.durationMs,
+    cost_usd: entry.costUsd,
+    source: {
+      provider: entry.source.provider,
+      session_id: entry.source.sessionId,
+      model: entry.source.model,
+      timestamp: entry.source.startedAt,
+      git_branch: entry.source.gitBranch,
+      cwd: entry.source.cwd ?? entry.source.projectPath,
+      version: entry.source.version
+    }
+  };
+}
+async function readTranscriptJsonl(filePath) {
+  const text = await (0, import_promises38.readFile)(filePath, "utf8");
+  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
+}
 async function readTranscriptFile(filePath) {
-  return (0, import_promises37.readFile)(filePath, "utf8");
+  return (0, import_promises38.readFile)(filePath, "utf8");
 }
+// src/import/transcript-provider.ts
+var TranscriptProvider = class _TranscriptProvider {
+  id;
+  kind = "transcript";
+  targetName;
+  lines;
+  cursor = 0;
+  constructor(targetName, lines) {
+    this.targetName = targetName;
+    this.id = `transcript:${targetName}`;
+    this.lines = lines;
+  }
+  /**
+   * Create a TranscriptProvider from a JSONL file path.
+   */
+  static async fromFile(filePath) {
+    const lines = await readTranscriptJsonl(filePath);
+    if (lines.length === 0) {
+      throw new Error(`Transcript file is empty: ${filePath}`);
+    }
+    const providerName = lines[0].source.provider ?? "transcript";
+    return new _TranscriptProvider(providerName, lines);
+  }
+  get lineCount() {
+    return this.lines.length;
+  }
+  async invoke(_request) {
+    if (this.cursor >= this.lines.length) {
+      throw new Error(
+        `Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
+      );
+    }
+    const line = this.lines[this.cursor++];
+    return {
+      output: line.output,
+      tokenUsage: line.token_usage ? {
+        input: line.token_usage.input,
+        output: line.token_usage.output,
+        cached: line.token_usage.cached
+      } : void 0,
+      durationMs: line.duration_ms,
+      costUsd: line.cost_usd ?? void 0,
+      startTime: line.source.timestamp
+    };
+  }
+};
 // src/index.ts
 function createAgentKernel() {
   return { status: "stub" };
@@ -22133,6 +23096,7 @@ function createAgentKernel() {
   DEFAULT_EVALUATOR_TEMPLATE,
   DEFAULT_EVAL_PATTERNS,
   DEFAULT_EXPLORATION_TOOLS,
+  DEFAULT_THRESHOLD,
   DeterministicAssertionEvaluator,
   EvaluatorRegistry,
   ExecutionMetricsEvaluator,
@@ -22154,6 +23118,7 @@ function createAgentKernel() {
   TemplateNotFoundError,
   TokenUsageEvaluator,
   ToolTrajectoryEvaluator,
+  TranscriptProvider,
   WorkspaceCreationError,
   WorkspacePoolManager,
   addProject,
@@ -22190,6 +23155,7 @@ function createAgentKernel() {
   detectFormat,
   discoverAssertions,
   discoverClaudeSessions,
+  discoverCodexSessions,
   discoverCopilotSessions,
   discoverGraders,
   discoverJudges,
@@ -22250,6 +23216,8 @@ function createAgentKernel() {
   normalizeLineEndings,
   parseAgentSkillsEvals,
   parseClaudeSession,
+  parseCodexSession,
+  parseCopilotEvents,
   parseJsonFromText,
   parseJsonSafe,
   readJsonFile,
@@ -22257,8 +23225,10 @@ function createAgentKernel() {
   readTestSuiteMetadata,
   readTextFile,
   readTranscriptFile,
+  readTranscriptJsonl,
   removeProject,
   resolveAndCreateProvider,
+  resolveDelegatedTargetDefinition,
   resolveFileReference,
   resolveTargetDefinition,
   resolveWorkspaceTemplate,
@@ -22288,6 +23258,7 @@ function createAgentKernel() {
   substituteVariables,
   toCamelCaseDeep,
   toSnakeCaseDeep,
+  toTranscriptJsonLine,
   tokensPerTool,
   touchProject,
   transpileEvalYaml,