npm - agentv - Versions diffs - 4.6.1 → 4.7.0 - Mend

agentv 4.6.1 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +1 -1
package/dist/{chunk-MHWYA4CS.js → chunk-AX4CQS45.js} +300 -283
package/dist/chunk-AX4CQS45.js.map +1 -0
package/dist/{chunk-YXXD27OK.js → chunk-I6UE4LHZ.js} +1232 -439
package/dist/chunk-I6UE4LHZ.js.map +1 -0
package/dist/{chunk-NSVFUL27.js → chunk-VEAOMKNS.js} +4420 -3603
package/dist/chunk-VEAOMKNS.js.map +1 -0
package/dist/cli.js +3 -3
package/dist/{dist-BN5NUVAB.js → dist-XRVHRBJF.js} +16 -2
package/dist/index.js +3 -3
package/dist/{interactive-DMSVE6CS.js → interactive-UBEMNJZG.js} +10 -47
package/dist/interactive-UBEMNJZG.js.map +1 -0
package/dist/studio/assets/index-DHxVz6M9.css +1 -0
package/dist/studio/assets/{index-C7TnyYee.js → index-DcwjOyrk.js} +1 -1
package/dist/studio/assets/index-Y5InSvcS.js +65 -0
package/dist/studio/index.html +2 -2
package/package.json +1 -1
package/dist/chunk-MHWYA4CS.js.map +0 -1
package/dist/chunk-NSVFUL27.js.map +0 -1
package/dist/chunk-YXXD27OK.js.map +0 -1
package/dist/interactive-DMSVE6CS.js.map +0 -1
package/dist/studio/assets/index-jJVIJh8b.css +0 -1
package/dist/studio/assets/index-vn54AYtS.js +0 -65
/package/dist/{dist-BN5NUVAB.js.map → dist-XRVHRBJF.js.map} +0 -0

package/dist/{chunk-YXXD27OK.js → chunk-I6UE4LHZ.js} RENAMED Viewed

@@ -301,7 +301,7 @@ var require_dist = __commonJS({
   }
 });
-// ../../packages/core/dist/chunk-ZK4GG7PR.js
+// ../../packages/core/dist/chunk-75RFVESM.js
 import { constants } from "node:fs";
 import { access, readFile } from "node:fs/promises";
 import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
   void: () => voidType
 });
-// ../../packages/core/dist/chunk-ZK4GG7PR.js
+// ../../packages/core/dist/chunk-75RFVESM.js
 import { readFile as readFile2 } from "node:fs/promises";
 import path3 from "node:path";
 import fg from "fast-glob";
@@ -633,15 +633,13 @@ async function resolveFileReference(rawValue, searchRoots) {
 }
 var CliHealthcheckHttpInputSchema = external_exports2.object({
   url: external_exports2.string().min(1, "healthcheck URL is required"),
-  timeout_seconds: external_exports2.number().positive().optional(),
-  timeoutSeconds: external_exports2.number().positive().optional()
-});
+  timeout_seconds: external_exports2.number().positive().optional()
+}).passthrough();
 var CliHealthcheckCommandInputSchema = external_exports2.object({
   command: external_exports2.string().min(1, "healthcheck command is required"),
   cwd: external_exports2.string().optional(),
-  timeout_seconds: external_exports2.number().positive().optional(),
-  timeoutSeconds: external_exports2.number().positive().optional()
-});
+  timeout_seconds: external_exports2.number().positive().optional()
+}).passthrough();
 var CliHealthcheckInputSchema = external_exports2.union([
   CliHealthcheckHttpInputSchema,
   CliHealthcheckCommandInputSchema
@@ -653,36 +651,28 @@ var CliTargetInputSchema = external_exports2.object({
   command: external_exports2.string(),
   // Files format - optional
   files_format: external_exports2.string().optional(),
-  filesFormat: external_exports2.string().optional(),
   attachments_format: external_exports2.string().optional(),
-  attachmentsFormat: external_exports2.string().optional(),
   // Working directory - optional
   cwd: external_exports2.string().optional(),
   // Workspace template directory - optional (mutually exclusive with cwd)
   workspace_template: external_exports2.string().optional(),
-  workspaceTemplate: external_exports2.string().optional(),
   // Timeout in seconds - optional
   timeout_seconds: external_exports2.number().positive().optional(),
-  timeoutSeconds: external_exports2.number().positive().optional(),
   // Healthcheck configuration - optional
   healthcheck: CliHealthcheckInputSchema.optional(),
   // Verbose mode - optional
   verbose: external_exports2.boolean().optional(),
   cli_verbose: external_exports2.boolean().optional(),
-  cliVerbose: external_exports2.boolean().optional(),
   // Keep temp files - optional
   keep_temp_files: external_exports2.boolean().optional(),
-  keepTempFiles: external_exports2.boolean().optional(),
   keep_output_files: external_exports2.boolean().optional(),
-  keepOutputFiles: external_exports2.boolean().optional(),
   // Common target fields
   grader_target: external_exports2.string().optional(),
   judge_target: external_exports2.string().optional(),
   // backward compat
   workers: external_exports2.number().int().min(1).optional(),
-  provider_batching: external_exports2.boolean().optional(),
-  providerBatching: external_exports2.boolean().optional()
-});
+  provider_batching: external_exports2.boolean().optional()
+}).passthrough();
 var CliHealthcheckHttpSchema = external_exports2.object({
   url: external_exports2.string().min(1),
   timeoutMs: external_exports2.number().positive().optional()
@@ -707,7 +697,7 @@ var CliTargetConfigSchema = external_exports2.object({
   keepTempFiles: external_exports2.boolean().optional()
 }).strict();
 function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
-  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutSeconds = input.timeout_seconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
   if ("url" in input && input.url) {
     const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
@@ -741,9 +731,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
 function normalizeCliTargetInput(input, env, evalFilePath) {
   const targetName = input.name;
   const command = resolveString(input.command, env, `${targetName} CLI command`, true);
-  const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
+  const filesFormatSource = input.files_format ?? input.attachments_format;
   const filesFormat = resolveOptionalLiteralString(filesFormatSource);
-  const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
+  const workspaceTemplateSource = input.workspace_template;
   let workspaceTemplate = resolveOptionalString(
     workspaceTemplateSource,
     env,
@@ -771,12 +761,10 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
   if (!cwd && !workspaceTemplate && evalFilePath) {
     cwd = path2.dirname(path2.resolve(evalFilePath));
   }
-  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
+  const timeoutSeconds = input.timeout_seconds;
   const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
-  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
-  const keepTempFiles = resolveOptionalBoolean(
-    input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
-  );
+  const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
+  const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
   const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
   return {
     command,
@@ -797,14 +785,104 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
   "FILES",
   "OUTPUT_FILE"
 ]);
+var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
+  ["providerBatching", "provider_batching"],
+  ["subagentModeAllowed", "subagent_mode_allowed"],
+  ["fallbackTargets", "fallback_targets"],
+  ["resourceName", "endpoint"],
+  ["baseUrl", "base_url"],
+  ["apiKey", "api_key"],
+  ["deploymentName", "model"],
+  ["thinkingBudget", "thinking_budget"],
+  ["maxTokens", "max_output_tokens"],
+  ["apiFormat", "api_format"],
+  ["timeoutSeconds", "timeout_seconds"],
+  ["logDir", "log_dir"],
+  ["logDirectory", "log_directory"],
+  ["logFormat", "log_format"],
+  ["logOutputFormat", "log_output_format"],
+  ["systemPrompt", "system_prompt"],
+  ["maxTurns", "max_turns"],
+  ["maxBudgetUsd", "max_budget_usd"],
+  ["dryRun", "dry_run"],
+  ["subagentRoot", "subagent_root"],
+  ["filesFormat", "files_format"],
+  ["attachmentsFormat", "attachments_format"],
+  ["cliUrl", "cli_url"],
+  ["cliPath", "cli_path"],
+  ["githubToken", "github_token"],
+  ["sessionDir", "session_dir"],
+  ["sessionId", "session_id"],
+  ["sessionStateDir", "session_state_dir"],
+  ["maxRetries", "max_retries"],
+  ["retryInitialDelayMs", "retry_initial_delay_ms"],
+  ["retryMaxDelayMs", "retry_max_delay_ms"],
+  ["retryBackoffFactor", "retry_backoff_factor"],
+  ["retryStatusCodes", "retry_status_codes"]
+]);
+var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
+  ["timeoutSeconds", "timeout_seconds"]
+]);
+function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
+  if (typeof value !== "object" || value === null || Array.isArray(value)) {
+    return [];
+  }
+  const warnings = [];
+  for (const [camelCaseField, snakeCaseField] of aliases) {
+    if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
+      warnings.push({
+        location: `${location}.${camelCaseField}`,
+        message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
+      });
+    }
+  }
+  return warnings;
+}
+function assertNoDeprecatedCamelCaseTargetFields(definition) {
+  if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
+    throw new Error(
+      `${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
+    );
+  }
+  const warning = findDeprecatedCamelCaseTargetWarnings(
+    definition,
+    `target "${definition.name}"`
+  )[0];
+  if (!warning) {
+    return;
+  }
+  const fieldMatch = warning.message.match(/field '([^']+)'/);
+  const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
+  const field = fieldMatch?.[1] ?? "unknown";
+  const replacement = replacementMatch?.[1] ?? "snake_case";
+  throw new Error(
+    `${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
+  );
+}
+function findDeprecatedCamelCaseTargetWarnings(target, location) {
+  const warnings = collectDeprecatedCamelCaseWarnings(
+    target,
+    location,
+    DEPRECATED_TARGET_CAMEL_CASE_FIELDS
+  );
+  if (typeof target !== "object" || target === null || Array.isArray(target)) {
+    return warnings;
+  }
+  const healthcheck = target.healthcheck;
+  warnings.push(
+    ...collectDeprecatedCamelCaseWarnings(
+      healthcheck,
+      `${location}.healthcheck`,
+      DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
+    )
+  );
+  return warnings;
+}
 var COMMON_TARGET_SETTINGS = [
   "use_target",
   "provider_batching",
-  "providerBatching",
   "subagent_mode_allowed",
-  "subagentModeAllowed",
-  "fallback_targets",
-  "fallbackTargets"
+  "fallback_targets"
 ];
 var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
 var BASE_TARGET_SCHEMA = external_exports2.object({
@@ -816,43 +894,40 @@ var BASE_TARGET_SCHEMA = external_exports2.object({
   // backward compat
   workers: external_exports2.number().int().min(1).optional(),
   workspace_template: external_exports2.string().optional(),
-  workspaceTemplate: external_exports2.string().optional(),
   subagent_mode_allowed: external_exports2.boolean().optional(),
-  fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional(),
-  fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
+  fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
 }).passthrough();
 var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
+var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
 var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
-function normalizeAzureApiVersion(value) {
+function normalizeAzureApiVersion(value, apiFormat) {
+  const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
   if (!value) {
-    return DEFAULT_AZURE_API_VERSION;
+    return defaultVersion;
   }
   const trimmed = value.trim();
   if (trimmed.length === 0) {
-    return DEFAULT_AZURE_API_VERSION;
+    return defaultVersion;
   }
   const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
-  return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
+  return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
 }
 function resolveRetryConfig(target) {
-  const maxRetries = resolveOptionalNumber(
-    target.max_retries ?? target.maxRetries,
-    `${target.name} max retries`
-  );
+  const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
   const initialDelayMs = resolveOptionalNumber(
-    target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
+    target.retry_initial_delay_ms,
     `${target.name} retry initial delay`
   );
   const maxDelayMs = resolveOptionalNumber(
-    target.retry_max_delay_ms ?? target.retryMaxDelayMs,
+    target.retry_max_delay_ms,
     `${target.name} retry max delay`
   );
   const backoffFactor = resolveOptionalNumber(
-    target.retry_backoff_factor ?? target.retryBackoffFactor,
+    target.retry_backoff_factor,
     `${target.name} retry backoff factor`
   );
   const retryableStatusCodes = resolveOptionalNumberArray(
-    target.retry_status_codes ?? target.retryStatusCodes,
+    target.retry_status_codes,
     `${target.name} retry status codes`
   );
   if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
@@ -912,9 +987,10 @@ function resolveDelegatedTargetDefinition(name21, definitions, env = process.env
     `Target "${name21}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
   );
 }
-function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
+function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
+  assertNoDeprecatedCamelCaseTargetFields(definition);
   const parsed = BASE_TARGET_SCHEMA.parse(definition);
-  if (parsed.workspace_template !== void 0 || parsed.workspaceTemplate !== void 0) {
+  if (parsed.workspace_template !== void 0) {
     throw new Error(
       `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
     );
@@ -930,13 +1006,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
     `${parsed.name} provider`,
     true
   ).toLowerCase();
-  const providerBatching = resolveOptionalBoolean(
-    parsed.provider_batching ?? parsed.providerBatching
-  );
-  const subagentModeAllowed = resolveOptionalBoolean(
-    parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
-  );
-  const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
+  const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
+  const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
+  const fallbackTargets = parsed.fallback_targets;
   const base = {
     name: parsed.name,
     graderTarget: parsed.grader_target ?? parsed.judge_target,
@@ -1086,20 +1158,22 @@ function normalizeOpenAIBaseUrl(value) {
   return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
 }
 function resolveAzureConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
+  const endpointSource = target.endpoint ?? target.resource;
+  const apiKeySource = target.api_key;
+  const deploymentSource = target.deployment ?? target.model;
   const versionSource = target.version ?? target.api_version;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
   const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
   const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
+  const apiFormat = resolveApiFormat(target, env, target.name);
   const version = normalizeAzureApiVersion(
     resolveOptionalString(versionSource, env, `${target.name} api version`, {
       allowLiteral: true,
       optionalEnv: true
-    })
+    }),
+    apiFormat
   );
   const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
   const maxOutputTokens = resolveOptionalNumber(
@@ -1112,13 +1186,17 @@ function resolveAzureConfig(target, env) {
     deploymentName,
     apiKey,
     version,
+    apiFormat,
     temperature,
     maxOutputTokens,
     retry
   };
 }
-function resolveApiFormat(target, targetName) {
-  const raw = target.api_format ?? target.apiFormat;
+function resolveApiFormat(target, env, targetName) {
+  const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
+    allowLiteral: true,
+    optionalEnv: true
+  });
   if (raw === void 0) return void 0;
   if (raw === "chat" || raw === "responses") return raw;
   throw new Error(
@@ -1126,11 +1204,11 @@ function resolveApiFormat(target, targetName) {
   );
 }
 function resolveOpenAIConfig(target, env) {
-  const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const endpointSource = target.endpoint ?? target.base_url;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const baseURL = normalizeOpenAIBaseUrl(
     resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
       allowLiteral: true,
@@ -1144,17 +1222,17 @@ function resolveOpenAIConfig(target, env) {
     baseURL,
     apiKey,
     model,
-    apiFormat: resolveApiFormat(target, target.name),
+    apiFormat: resolveApiFormat(target, env, target.name),
     temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
     maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
     retry
   };
 }
 function resolveOpenRouterConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const retry = resolveRetryConfig(target);
   return {
     apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
@@ -1165,11 +1243,11 @@ function resolveOpenRouterConfig(target, env) {
   };
 }
 function resolveAnthropicConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
-  const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
+  const maxTokensSource = target.max_output_tokens;
+  const thinkingBudgetSource = target.thinking_budget;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
   const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
   const retry = resolveRetryConfig(target);
@@ -1183,10 +1261,10 @@ function resolveAnthropicConfig(target, env) {
   };
 }
 function resolveGeminiConfig(target, env) {
-  const apiKeySource = target.api_key ?? target.apiKey;
+  const apiKeySource = target.api_key;
   const modelSource = target.model ?? target.deployment ?? target.variant;
   const temperatureSource = target.temperature;
-  const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
+  const maxTokensSource = target.max_output_tokens;
   const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
   const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
     allowLiteral: true,
@@ -1206,11 +1284,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
   const executableSource = target.executable ?? target.command ?? target.binary;
   const argsSource = target.args ?? target.arguments;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt;
   const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1274,16 +1352,16 @@ function normalizeCodexLogFormat(value) {
   throw new Error("codex log format must be 'summary' or 'json'");
 }
 function resolveCopilotSdkConfig(target, env, evalFilePath) {
-  const cliUrlSource = target.cli_url ?? target.cliUrl;
-  const cliPathSource = target.cli_path ?? target.cliPath;
-  const githubTokenSource = target.github_token ?? target.githubToken;
+  const cliUrlSource = target.cli_url;
+  const cliPathSource = target.cli_path;
+  const githubTokenSource = target.github_token;
   const modelSource = target.model;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1356,11 +1434,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
   const modelSource = target.model;
   const argsSource = target.args ?? target.arguments;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1424,16 +1502,16 @@ function normalizeCopilotLogFormat(value) {
 }
 function resolvePiCodingAgentConfig(target, env, evalFilePath) {
   const subproviderSource = target.subprovider;
-  const modelSource = target.model ?? target.pi_model ?? target.piModel;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
-  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const modelSource = target.model ?? target.pi_model;
+  const apiKeySource = target.api_key;
+  const toolsSource = target.tools ?? target.pi_tools;
+  const thinkingSource = target.thinking ?? target.pi_thinking;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const subprovider = resolveOptionalString(
     subproviderSource,
     env,
@@ -1451,7 +1529,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
     allowLiteral: false,
     optionalEnv: true
   });
-  const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
+  const baseUrlSource = target.base_url ?? target.endpoint;
   const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1510,16 +1588,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
 function resolvePiCliConfig(target, env, evalFilePath) {
   const executableSource = target.executable ?? target.command ?? target.binary;
   const subproviderSource = target.subprovider;
-  const modelSource = target.model ?? target.pi_model ?? target.piModel;
-  const apiKeySource = target.api_key ?? target.apiKey;
-  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
-  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
+  const modelSource = target.model ?? target.pi_model;
+  const apiKeySource = target.api_key;
+  const toolsSource = target.tools ?? target.pi_tools;
+  const thinkingSource = target.thinking ?? target.pi_thinking;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format;
+  const systemPromptSource = target.system_prompt;
   const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1538,7 +1616,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
     allowLiteral: false,
     optionalEnv: true
   });
-  const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
+  const baseUrlSource = target.base_url ?? target.endpoint;
   const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1596,11 +1674,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
 function resolveClaudeConfig(target, env, evalFilePath) {
   const modelSource = target.model;
   const cwdSource = target.cwd;
-  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
-  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
-  const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_LOG_FORMAT;
-  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
+  const workspaceTemplateSource = target.workspace_template;
+  const timeoutSource = target.timeout_seconds;
+  const logDirSource = target.log_dir ?? target.log_directory;
+  const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
+  const systemPromptSource = target.system_prompt;
   const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
     allowLiteral: true,
     optionalEnv: true
@@ -1633,8 +1711,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
   });
   const logFormat = normalizeClaudeLogFormat(logFormatSource);
   const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
-  const maxTurns = typeof target.max_turns === "number" ? target.max_turns : typeof target.maxTurns === "number" ? target.maxTurns : void 0;
-  const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : typeof target.maxBudgetUsd === "number" ? target.maxBudgetUsd : void 0;
+  const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
+  const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
   return {
     model,
     systemPrompt,
@@ -1665,9 +1743,7 @@ function resolveMockConfig(target) {
   return { response };
 }
 function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
-  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
-    target.workspace_template ?? target.workspaceTemplate
-  );
+  const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
   let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
     workspaceTemplateEnvVar,
     env,
@@ -1682,9 +1758,9 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
   }
   const executableSource = target.executable;
   const waitSource = target.wait;
-  const dryRunSource = target.dry_run ?? target.dryRun;
-  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
-  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
+  const dryRunSource = target.dry_run;
+  const subagentRootSource = target.subagent_root;
+  const timeoutSource = target.timeout_seconds;
   const defaultCommand = insiders ? "code-insiders" : "code";
   const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
     allowLiteral: true,
@@ -1719,8 +1795,8 @@ function resolveCliConfig(target, env, evalFilePath) {
   const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
   if (!parseResult.success) {
     const firstError = parseResult.error.errors[0];
-    const path49 = firstError?.path.join(".") || "";
-    const prefix = path49 ? `${target.name} ${path49}: ` : `${target.name}: `;
+    const path410 = firstError?.path.join(".") || "";
+    const prefix = path410 ? `${target.name} ${path410}: ` : `${target.name}: `;
     throw new Error(`${prefix}${firstError?.message}`);
   }
   const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -1735,7 +1811,7 @@ function resolveCliConfig(target, env, evalFilePath) {
 }
 function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
   const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
-  const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
+  const timeoutSeconds = target.timeout_seconds;
   const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
   let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
     allowLiteral: true,
@@ -1799,10 +1875,10 @@ function resolveDiscover(value, targetName) {
   throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
 }
 function resolveCopilotLogConfig(target, env) {
-  const sessionDirSource = target.session_dir ?? target.sessionDir;
-  const sessionIdSource = target.session_id ?? target.sessionId;
+  const sessionDirSource = target.session_dir;
+  const sessionIdSource = target.session_id;
   const discoverSource = target.discover;
-  const sessionStateDirSource = target.session_state_dir ?? target.sessionStateDir;
+  const sessionStateDirSource = target.session_state_dir;
   const cwdSource = target.cwd;
   return {
     sessionDir: resolveOptionalString(
@@ -1975,6 +2051,15 @@ var AGENT_PROVIDER_KINDS = [
   "vscode",
   "vscode-insiders"
 ];
+var LLM_GRADER_CAPABLE_KINDS = [
+  "openai",
+  "openrouter",
+  "azure",
+  "anthropic",
+  "gemini",
+  "agentv",
+  "mock"
+];
 var KNOWN_PROVIDERS = [
   "openai",
   "openrouter",
@@ -1994,7 +2079,8 @@ var KNOWN_PROVIDERS = [
   "mock",
   "vscode",
   "vscode-insiders",
-  "agentv"
+  "agentv",
+  "transcript"
 ];
 var PROVIDER_ALIASES = [
   "azure-openai",
@@ -6803,7 +6889,7 @@ function createOpenRouter(options = {}) {
   );
   const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
     provider: "openrouter.chat",
-    url: ({ path: path49 }) => `${baseURL}${path49}`,
+    url: ({ path: path50 }) => `${baseURL}${path50}`,
     headers: getHeaders,
     compatibility,
     fetch: options.fetch,
@@ -6811,7 +6897,7 @@ function createOpenRouter(options = {}) {
   });
   const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
     provider: "openrouter.completion",
-    url: ({ path: path49 }) => `${baseURL}${path49}`,
+    url: ({ path: path50 }) => `${baseURL}${path50}`,
     headers: getHeaders,
     compatibility,
     fetch: options.fetch,
@@ -6819,14 +6905,14 @@ function createOpenRouter(options = {}) {
   });
   const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
     provider: "openrouter.embedding",
-    url: ({ path: path49 }) => `${baseURL}${path49}`,
+    url: ({ path: path50 }) => `${baseURL}${path50}`,
     headers: getHeaders,
     fetch: options.fetch,
     extraBody: options.extraBody
   });
   const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
     provider: "openrouter.image",
-    url: ({ path: path49 }) => `${baseURL}${path49}`,
+    url: ({ path: path50 }) => `${baseURL}${path50}`,
     headers: getHeaders,
     fetch: options.fetch,
     extraBody: options.extraBody
@@ -14345,11 +14431,13 @@ import { tmpdir } from "node:os";
 import path19 from "node:path";
 import { execSync as execSync2 } from "node:child_process";
 import { randomUUID as randomUUID8 } from "node:crypto";
-import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
+import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
 import { mkdir as mkdir7 } from "node:fs/promises";
-import path20 from "node:path";
+import path21 from "node:path";
 import { createInterface } from "node:readline";
-import { fileURLToPath as fileURLToPath3 } from "node:url";
+import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
+import os2 from "node:os";
+import path20 from "node:path";
 import { exec as exec2 } from "node:child_process";
 import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
 import path322 from "node:path";
@@ -14358,18 +14446,16 @@ import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
 import path30 from "node:path";
 import { constants as constants22 } from "node:fs";
 import { access as access22, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
-import path21 from "node:path";
 import path222 from "node:path";
 import path23 from "node:path";
-import { readFile as readFile9 } from "node:fs/promises";
 import path24 from "node:path";
+import { readFile as readFile9 } from "node:fs/promises";
+import path25 from "node:path";
 import { exec, spawn as spawn4 } from "node:child_process";
 import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
 import path27 from "node:path";
 import { promisify as promisify2 } from "node:util";
 import path26 from "node:path";
-import os2 from "node:os";
-import path25 from "node:path";
 import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
 import path29 from "node:path";
 import path28 from "node:path";
@@ -14420,12 +14506,15 @@ import { existsSync as existsSync5 } from "node:fs";
 import path45 from "node:path";
 import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
 import path46 from "node:path";
-import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
+import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
 import path47 from "node:path";
 import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
 import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
 import { homedir as homedir3 } from "node:os";
 import path48 from "node:path";
+import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
+import { homedir as homedir4 } from "node:os";
+import path49 from "node:path";
 import { readFile as readFile14 } from "node:fs/promises";
 function computeTraceSummary(messages) {
   const toolCallCounts = {};
@@ -15213,8 +15302,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
     const negate = rawEvaluator.negate === true ? true : void 0;
     if (isCustomType) {
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
-      const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "negate"]);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
+      const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
       const config2 = {};
       for (const [key, value] of Object.entries(rawEvaluator)) {
         if (!knownProps2.has(key) && value !== void 0) {
@@ -15226,6 +15320,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         type: customTypeName,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...Object.keys(config2).length > 0 ? { config: config2 } : {}
       });
@@ -15295,7 +15390,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
           );
         }
       }
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       const knownProps2 = /* @__PURE__ */ new Set([
         "name",
         "type",
@@ -15321,6 +15421,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         resolvedCwd,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...Object.keys(config2).length > 0 ? { config: config2 } : {},
         ...targetConfig !== void 0 ? { target: targetConfig } : {}
@@ -15449,7 +15550,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         };
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "composite",
@@ -15457,6 +15563,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         aggregator,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15567,7 +15674,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       const config2 = {
         name: name21,
         type: "tool-trajectory",
@@ -15576,6 +15688,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...expected ? { expected } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {},
         ...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
       };
@@ -15638,7 +15751,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       const aggregation = asString(rawEvaluator.aggregation);
       const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "field-accuracy",
@@ -15646,6 +15764,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...validAggregation ? { aggregation: validAggregation } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15659,13 +15778,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "latency",
         threshold,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15679,13 +15804,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "cost",
         budget,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15717,13 +15848,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "token-usage",
         ...validLimits,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15769,13 +15906,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "execution-metrics",
         ...validThresholds,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15789,7 +15932,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       const rawShouldTrigger = rawEvaluator.should_trigger;
       const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "skill-trigger",
@@ -15797,6 +15945,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15808,13 +15957,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "contains",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15828,13 +15983,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15846,13 +16007,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "icontains",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15866,13 +16033,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15884,13 +16057,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: typeValue,
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15903,7 +16082,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       }
       const flags = asString(rawEvaluator.flags);
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "regex",
@@ -15911,18 +16095,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...flags !== void 0 ? { flags } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
     }
     if (typeValue === "is-json") {
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "is-json",
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15934,13 +16125,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "equals",
         value,
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -15976,7 +16173,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "llm-grader",
@@ -15984,6 +16186,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...graderTargetName ? { target: graderTargetName } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
@@ -16053,7 +16256,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         continue;
       }
       const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
-      const required2 = parseRequired(rawEvaluator.required);
+      const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
+        rawEvaluator.required,
+        rawEvaluator.min_score,
+        name21,
+        evalId
+      );
       evaluators.push({
         name: name21,
         type: "llm-grader",
@@ -16061,12 +16269,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
         ...graderTargetName ? { target: graderTargetName } : {},
         ...weight2 !== void 0 ? { weight: weight2 } : {},
         ...required2 !== void 0 ? { required: required2 } : {},
+        ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
         ...negate !== void 0 ? { negate } : {}
       });
       continue;
     }
     const weight = validateWeight(rawEvaluator.weight, name21, evalId);
-    const required = parseRequired(rawEvaluator.required);
+    const { required, min_score } = parseRequiredAndMinScore(
+      rawEvaluator.required,
+      rawEvaluator.min_score,
+      name21,
+      evalId
+    );
     const knownProps = /* @__PURE__ */ new Set([
       "name",
       "type",
@@ -16077,6 +16291,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       "weight",
       "config",
       "required",
+      "min_score",
       "negate",
       "max_steps",
       "maxSteps",
@@ -16106,6 +16321,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
       ...graderTargetName ? { target: graderTargetName } : {},
       ...weight !== void 0 ? { weight } : {},
       ...required !== void 0 ? { required } : {},
+      ...min_score !== void 0 ? { min_score } : {},
       ...negate !== void 0 ? { negate } : {},
       ...finalConfig ? { config: finalConfig } : {},
       ...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
@@ -16237,10 +16453,23 @@ ${detailBlock}${ANSI_RESET4}`);
     console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
   }
 }
-function parseRequired(value) {
-  if (value === true) return true;
-  if (typeof value === "number" && value > 0 && value <= 1) return value;
-  return void 0;
+function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
+  const result = {};
+  if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
+    result.min_score = rawMinScore;
+  }
+  if (rawRequired === true) {
+    result.required = true;
+  } else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
+    if (result.min_score === void 0) {
+      result.min_score = rawRequired;
+    }
+    result.required = rawRequired;
+    logWarning2(
+      `Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
+    );
+  }
+  return result;
 }
 function validateWeight(rawWeight, evaluatorName, evalId) {
   if (rawWeight === void 0) {
@@ -16283,16 +16512,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
     const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
     const expectedOutcome = asString(rawRubric.outcome) ?? "";
     const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
+    let minScore;
     let requiredMinScore;
     let required;
-    if (typeof rawRubric.required_min_score === "number") {
-      const minScore = rawRubric.required_min_score;
-      if (!Number.isInteger(minScore) || minScore < 0 || minScore > 10) {
+    if (typeof rawRubric.min_score === "number") {
+      const ms = rawRubric.min_score;
+      if (ms <= 0 || ms > 1) {
         throw new Error(
-          `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${minScore})`
+          `Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
         );
       }
-      requiredMinScore = minScore;
+      minScore = ms;
+      requiredMinScore = Math.round(ms * 10);
+    } else if (typeof rawRubric.required_min_score === "number") {
+      const rms = rawRubric.required_min_score;
+      if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
+        throw new Error(
+          `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
+        );
+      }
+      requiredMinScore = rms;
+      minScore = rms / 10;
+      logWarning2(
+        `Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
+      );
     }
     if (typeof rawRubric.required === "boolean") {
       required = rawRubric.required;
@@ -16312,6 +16555,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
         weight,
         ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
         ...required !== void 0 ? { required } : {},
+        ...minScore !== void 0 ? { min_score: minScore } : {},
         ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
         score_ranges: scoreRanges
       });
@@ -16328,6 +16572,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
         weight,
         // Default to required: true if not specified (backward compatibility)
         required: required ?? true,
+        ...minScore !== void 0 ? { min_score: minScore } : {},
         ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
       });
     }
@@ -16456,12 +16701,22 @@ function parseInlineRubrics(rawRubrics) {
       id: asString(rubric.id) ?? `rubric-${index + 1}`,
       weight: typeof rubric.weight === "number" ? rubric.weight : 1
     };
+    let inlineMinScore;
+    let inlineRequiredMinScore;
+    if (typeof rubric.min_score === "number") {
+      inlineMinScore = rubric.min_score;
+      inlineRequiredMinScore = Math.round(inlineMinScore * 10);
+    } else if (typeof rubric.required_min_score === "number") {
+      inlineRequiredMinScore = rubric.required_min_score;
+      inlineMinScore = inlineRequiredMinScore / 10;
+    }
     if (scoreRanges && scoreRanges.length > 0) {
       return {
         ...baseRubric,
         ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
         ...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
-        ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {},
+        ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
+        ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
         score_ranges: scoreRanges
       };
     }
@@ -16469,7 +16724,8 @@ function parseInlineRubrics(rawRubrics) {
       ...baseRubric,
       outcome: expectedOutcome,
       required: typeof rubric.required === "boolean" ? rubric.required : true,
-      ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {}
+      ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
+      ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
     };
   }).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
   if (rubricItems.length === 0) {
@@ -16851,6 +17107,9 @@ function resolveExpectedMessages(raw) {
 var ANSI_YELLOW5 = "\x1B[33m";
 var ANSI_RED2 = "\x1B[31m";
 var ANSI_RESET6 = "\x1B[0m";
+function matchesFilter(id, filter2) {
+  return typeof filter2 === "string" ? micromatch.isMatch(id, filter2) : filter2.some((pattern) => micromatch.isMatch(id, pattern));
+}
 function detectFormat(filePath) {
   const ext = path6.extname(filePath).toLowerCase();
   if (ext === ".jsonl") return "jsonl";
@@ -16918,40 +17177,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
   const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
   const rawFile = await readFile5(absoluteTestPath, "utf8");
   const rawCases = parseJsonlContent(rawFile, evalFilePath);
-  const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
-  const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
+  const fallbackSuiteName = path6.basename(absoluteTestPath, ".jsonl") || "eval";
+  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
   const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
   const globalExecution = sidecar.execution;
   if (verbose) {
     console.log(`
-[JSONL Dataset: ${evalFilePath}]`);
+[JSONL Suite: ${evalFilePath}]`);
     console.log(`  Cases: ${rawCases.length}`);
-    console.log(`  Eval set: ${evalSetName}`);
+    console.log(`  Suite: ${suiteName}`);
     if (sidecar.description) {
       console.log(`  Description: ${sidecar.description}`);
     }
   }
   const results = [];
   for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
-    const evalcase = rawCases[lineIndex];
+    const testCaseConfig = rawCases[lineIndex];
     const lineNumber = lineIndex + 1;
-    const id = asString4(evalcase.id);
-    if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
+    const id = asString4(testCaseConfig.id);
+    if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString4(evalcase.conversation_id);
-    let outcome = asString4(evalcase.criteria);
-    if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString4(evalcase.expected_outcome);
+    const conversationId = asString4(testCaseConfig.conversation_id);
+    let outcome = asString4(testCaseConfig.criteria);
+    if (!outcome && testCaseConfig.expected_outcome !== void 0) {
+      outcome = asString4(testCaseConfig.expected_outcome);
       if (outcome) {
         logWarning4(
-          `Test '${asString4(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
-    const rawInputMessages = resolveInputMessages(evalcase);
-    const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
+    const rawInputMessages = resolveInputMessages(testCaseConfig);
+    const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
     if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
       logError2(
         `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
@@ -16988,18 +17247,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
     const mergedExecution = caseExecution ?? globalExecution;
-    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
     let evaluators;
     try {
-      evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
+      evaluators = await parseEvaluators(
+        testCaseConfig,
+        mergedExecution,
+        searchRoots,
+        id ?? "unknown"
+      );
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
       continue;
     }
-    const inlineRubrics = evalcase.rubrics;
+    const inlineRubrics = testCaseConfig.rubrics;
     if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
       const rubricEvaluator = parseInlineRubrics(inlineRubrics);
       if (rubricEvaluator) {
@@ -17010,7 +17274,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
     const userFilePaths = collectResolvedInputFilePaths(inputMessages);
     const testCase = {
       id,
-      dataset: evalSetName,
+      suite: suiteName,
       conversation_id: conversationId,
       question,
       input: inputMessages,
@@ -17018,7 +17282,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
       criteria: outcome ?? "",
-      evaluator: evalCaseEvaluatorKind,
+      evaluator: testCaseEvaluatorKind,
       assertions: evaluators
     };
     results.push(testCase);
@@ -17194,6 +17458,9 @@ function buildChatPromptFromSegments(options) {
 var ANSI_YELLOW6 = "\x1B[33m";
 var ANSI_RED3 = "\x1B[31m";
 var ANSI_RESET7 = "\x1B[0m";
+function matchesFilter2(id, filter2) {
+  return typeof filter2 === "string" ? micromatch2.isMatch(id, filter2) : filter2.some((pattern) => micromatch2.isMatch(id, pattern));
+}
 function resolveTests(suite) {
   if (suite.tests !== void 0) return suite.tests;
   if (suite.eval_cases !== void 0) {
@@ -17273,18 +17540,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     throw new Error(`Invalid test file format: ${evalFilePath}`);
   }
   const suite = interpolated;
-  const evalSetNameFromSuite = asString5(suite.name)?.trim();
-  const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
-  const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
-  const rawTestcases = resolveTests(suite);
+  const suiteNameFromFile = asString5(suite.name)?.trim();
+  const fallbackSuiteName = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
+  const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
+  const rawTestCases = resolveTests(suite);
   const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
   const evalFileDir = path7.dirname(absoluteTestPath);
-  let expandedTestcases;
-  if (typeof rawTestcases === "string") {
-    const externalPath = path7.resolve(evalFileDir, rawTestcases);
-    expandedTestcases = await loadCasesFromFile(externalPath);
-  } else if (Array.isArray(rawTestcases)) {
-    expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
+  let expandedTestCases;
+  if (typeof rawTestCases === "string") {
+    const externalPath = path7.resolve(evalFileDir, rawTestCases);
+    expandedTestCases = await loadCasesFromFile(externalPath);
+  } else if (Array.isArray(rawTestCases)) {
+    expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
   } else {
     throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
   }
@@ -17299,32 +17566,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
   }
   const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
   const results = [];
-  for (const rawEvalcase of expandedTestcases) {
-    if (!isJsonObject(rawEvalcase)) {
+  for (const rawTestCase of expandedTestCases) {
+    if (!isJsonObject(rawTestCase)) {
       logWarning5("Skipping invalid test entry (expected object)");
       continue;
     }
-    const evalcase = rawEvalcase;
-    const id = asString5(evalcase.id);
-    if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
+    const testCaseConfig = rawTestCase;
+    const id = asString5(testCaseConfig.id);
+    if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
       continue;
     }
-    const conversationId = asString5(evalcase.conversation_id);
-    let outcome = asString5(evalcase.criteria);
-    if (!outcome && evalcase.expected_outcome !== void 0) {
-      outcome = asString5(evalcase.expected_outcome);
+    const conversationId = asString5(testCaseConfig.conversation_id);
+    let outcome = asString5(testCaseConfig.criteria);
+    if (!outcome && testCaseConfig.expected_outcome !== void 0) {
+      outcome = asString5(testCaseConfig.expected_outcome);
       if (outcome) {
         logWarning5(
-          `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
+          `Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
         );
       }
     }
-    const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
+    const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
     const skipDefaults = caseExecution?.skip_defaults === true;
+    const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
     const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
-    const testInputMessages = resolveInputMessages(evalcase, effectiveSuiteInputFiles);
-    const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
-    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assertions !== void 0 || evalcase.assert !== void 0;
+    const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
+    const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
+    const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
     if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
       logError3(
         `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
@@ -17371,16 +17639,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       }
     }
     const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
-    const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
+    const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
     let evaluators;
     try {
-      evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
+      evaluators = await parseEvaluators(
+        testCaseConfig,
+        globalExecution,
+        searchRoots,
+        id ?? "unknown"
+      );
     } catch (error) {
       const message = error instanceof Error ? error.message : String(error);
       logError3(`Skipping test '${id}': ${message}`);
       continue;
     }
-    const inlineRubrics = evalcase.rubrics;
+    const inlineRubrics = testCaseConfig.rubrics;
     if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
       const rubricEvaluator = parseInlineRubrics(inlineRubrics);
       if (rubricEvaluator) {
@@ -17389,13 +17662,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
     }
     warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
     const userFilePaths = collectResolvedInputFilePaths(inputMessages);
-    const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
+    const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
     const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
-    const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
-    const caseTargets = extractTargetsFromTestCase(evalcase);
+    const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
+    const caseTargets = extractTargetsFromTestCase(testCaseConfig);
     const testCase = {
       id,
-      dataset: evalSetName,
+      suite: suiteName,
       category: options?.category,
       conversation_id: conversationId,
       question,
@@ -17404,11 +17677,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
       reference_answer: referenceAnswer,
       file_paths: userFilePaths,
       criteria: outcome ?? "",
-      evaluator: evalCaseEvaluatorKind,
+      evaluator: testCaseEvaluatorKind,
       assertions: evaluators,
       workspace: mergedWorkspace,
       metadata,
-      targets: caseTargets
+      targets: caseTargets,
+      ...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
     };
     results.push(testCase);
   }
@@ -17939,7 +18213,7 @@ var AzureProvider = class {
     };
     this.retryConfig = config.retry;
     const azure = createAzure(buildAzureOptions(config));
-    this.model = azure.chat(config.deploymentName);
+    this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
   }
   id;
   kind = "azure";
@@ -18065,7 +18339,9 @@ function buildAzureOptions(config) {
   const options = {
     apiKey: config.apiKey,
     apiVersion: config.version,
-    useDeploymentBasedUrls: true
+    // Chat completions still use deployment-scoped Azure URLs for compatibility
+    // with existing deployments. Responses API should use the SDK's v1 path.
+    useDeploymentBasedUrls: config.apiFormat !== "responses"
   };
   const baseURL = normalizeAzureBaseUrl(config.resourceName);
   if (baseURL) {
@@ -21575,6 +21851,22 @@ function extractAzureResourceName(baseUrl) {
   if (urlMatch) return urlMatch[1];
   return baseUrl;
 }
+function normalizeAzureSdkBaseUrl(baseUrl) {
+  const trimmed = baseUrl.trim().replace(/\/+$/, "");
+  if (!trimmed) {
+    return trimmed;
+  }
+  if (!/^https?:\/\//i.test(trimmed)) {
+    return `https://${trimmed}.openai.azure.com/openai/v1`;
+  }
+  if (/\/openai\/v1$/i.test(trimmed)) {
+    return trimmed;
+  }
+  if (/\/openai$/i.test(trimmed)) {
+    return `${trimmed}/v1`;
+  }
+  return `${trimmed}/openai/v1`;
+}
 function extractPiTextContent(content) {
   if (typeof content === "string") {
     return content;
@@ -22397,6 +22689,30 @@ async function defaultPiRunner(options) {
     });
   });
 }
+var logged = false;
+function getAgentvHome() {
+  const envHome = process.env.AGENTV_HOME;
+  if (envHome && envHome !== "undefined") {
+    if (!logged) {
+      logged = true;
+      console.warn(`Using AGENTV_HOME: ${envHome}`);
+    }
+    return envHome;
+  }
+  return path20.join(os2.homedir(), ".agentv");
+}
+function getWorkspacesRoot() {
+  return path20.join(getAgentvHome(), "workspaces");
+}
+function getSubagentsRoot() {
+  return path20.join(getAgentvHome(), "subagents");
+}
+function getTraceStateRoot() {
+  return path20.join(getAgentvHome(), "trace-state");
+}
+function getWorkspacePoolRoot() {
+  return path20.join(getAgentvHome(), "workspace-pool");
+}
 var piCodingAgentModule = null;
 var piAiModule = null;
 var loadingPromise = null;
@@ -22414,46 +22730,126 @@ async function promptInstall() {
     rl.close();
   }
 }
-function findAgentvRoot() {
-  const thisFile = fileURLToPath3(import.meta.url);
-  let dir = path20.dirname(thisFile);
-  for (let i = 0; i < 10; i++) {
+function findManagedSdkInstallRoot() {
+  return path21.join(getAgentvHome(), "deps", "pi-sdk");
+}
+function resolveGlobalNpmRoot() {
+  try {
+    const root = execSync2("npm root -g", {
+      encoding: "utf-8",
+      stdio: ["ignore", "pipe", "ignore"]
+    }).trim();
+    return root.length > 0 ? root : void 0;
+  } catch {
+    return void 0;
+  }
+}
+function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
+  return path21.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
+}
+function findAccessiblePath(paths) {
+  for (const candidate of paths) {
     try {
-      const pkg = path20.join(dir, "package.json");
-      accessSync2(pkg);
-      return dir;
+      accessSync2(candidate);
+      return candidate;
     } catch {
-      const parent = path20.dirname(dir);
-      if (parent === dir) break;
-      dir = parent;
     }
   }
-  return path20.dirname(thisFile);
+  return void 0;
 }
-async function doLoadSdkModules() {
+async function tryImportLocalSdkModules() {
   try {
     [piCodingAgentModule, piAiModule] = await Promise.all([
       import("@mariozechner/pi-coding-agent"),
       import("@mariozechner/pi-ai")
     ]);
+    return true;
   } catch {
-    if (await promptInstall()) {
-      const installDir = findAgentvRoot();
-      console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
-      execSync2("bun add @mariozechner/pi-coding-agent", {
-        cwd: installDir,
-        stdio: "inherit"
-      });
-      [piCodingAgentModule, piAiModule] = await Promise.all([
-        import("@mariozechner/pi-coding-agent"),
-        import("@mariozechner/pi-ai")
-      ]);
-    } else {
-      throw new Error(
-        "pi-coding-agent SDK is not installed. Install it with:\n  bun add @mariozechner/pi-coding-agent"
-      );
+    return false;
+  }
+}
+async function tryImportManagedSdkModules() {
+  const managedRoot = findManagedSdkInstallRoot();
+  const piCodingAgentEntry = findAccessiblePath([
+    path21.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
+  ]);
+  const piAiEntry = findAccessiblePath([
+    path21.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
+    path21.join(
+      managedRoot,
+      "node_modules",
+      "@mariozechner",
+      "pi-coding-agent",
+      "node_modules",
+      "@mariozechner",
+      "pi-ai",
+      "dist",
+      "index.js"
+    )
+  ]);
+  if (!piCodingAgentEntry || !piAiEntry) return false;
+  try {
+    [piCodingAgentModule, piAiModule] = await Promise.all([
+      import(pathToFileURL(piCodingAgentEntry).href),
+      import(pathToFileURL(piAiEntry).href)
+    ]);
+    return true;
+  } catch {
+    return false;
+  }
+}
+async function tryImportGlobalSdkModules() {
+  const globalNpmRoot = resolveGlobalNpmRoot();
+  if (!globalNpmRoot) return false;
+  const piCodingAgentEntry = findAccessiblePath([
+    buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
+  ]);
+  const piAiEntry = findAccessiblePath([
+    buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
+    path21.join(
+      globalNpmRoot,
+      "@mariozechner",
+      "pi-coding-agent",
+      "node_modules",
+      "@mariozechner",
+      "pi-ai",
+      "dist",
+      "index.js"
+    )
+  ]);
+  if (!piCodingAgentEntry || !piAiEntry) return false;
+  try {
+    [piCodingAgentModule, piAiModule] = await Promise.all([
+      import(pathToFileURL(piCodingAgentEntry).href),
+      import(pathToFileURL(piAiEntry).href)
+    ]);
+    return true;
+  } catch {
+    return false;
+  }
+}
+function installSdkModules(installDir) {
+  console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
+  mkdirSync(installDir, { recursive: true });
+  execSync2("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
+    cwd: installDir,
+    stdio: "inherit"
+  });
+}
+async function doLoadSdkModules() {
+  if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
+    return;
+  }
+  if (await promptInstall()) {
+    const installDir = findManagedSdkInstallRoot();
+    installSdkModules(installDir);
+    if (await tryImportManagedSdkModules()) {
+      return;
     }
   }
+  throw new Error(
+    "pi-coding-agent SDK is not installed. Install it with:\n  npm install @mariozechner/pi-coding-agent"
+  );
 }
 async function loadSdkModules() {
   if (!piCodingAgentModule || !piAiModule) {
@@ -22510,12 +22906,16 @@ var PiCodingAgentProvider = class {
     try {
       const cwd = this.resolveCwd(request.cwd);
       const rawProvider = this.config.subprovider ?? "google";
-      const hasBaseUrl = !!this.config.baseUrl;
+      const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
+      const hasBaseUrl = !!normalizedBaseUrl;
       const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
       const modelId = this.config.model ?? "gemini-2.5-flash";
       this.setApiKeyEnv(rawProvider, hasBaseUrl);
-      this.setBaseUrlEnv(rawProvider, hasBaseUrl);
+      this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
       let model = sdk.getModel(providerName, modelId);
+      if (model && normalizedBaseUrl) {
+        model = { ...model, baseUrl: normalizedBaseUrl };
+      }
       if (!model) {
         const envProvider = providerName.replace(/-responses$/, "");
         model = {
@@ -22523,7 +22923,7 @@ var PiCodingAgentProvider = class {
           name: modelId,
           api: providerName,
           provider: envProvider,
-          baseUrl: this.config.baseUrl ?? "",
+          baseUrl: normalizedBaseUrl ?? "",
           reasoning: false,
           input: ["text"],
           cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -22690,19 +23090,27 @@ ${fileList}`;
     }
   }
   /** Maps config baseUrl to the provider-specific env var the SDK reads. */
-  setBaseUrlEnv(providerName, hasBaseUrl = false) {
-    if (!this.config.baseUrl) return;
+  setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
+    const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
+    if (!normalizedBaseUrl) return;
     const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
     if (envKey) {
-      process.env[envKey] = this.config.baseUrl;
+      process.env[envKey] = normalizedBaseUrl;
     }
   }
+  normalizeSdkBaseUrl(providerName, baseUrl) {
+    if (!baseUrl) return void 0;
+    if (providerName.toLowerCase() === "azure") {
+      return normalizeAzureSdkBaseUrl(baseUrl);
+    }
+    return baseUrl;
+  }
   resolveCwd(cwdOverride) {
     if (cwdOverride) {
-      return path20.resolve(cwdOverride);
+      return path21.resolve(cwdOverride);
     }
     if (this.config.cwd) {
-      return path20.resolve(this.config.cwd);
+      return path21.resolve(this.config.cwd);
     }
     return process.cwd();
   }
@@ -22721,9 +23129,9 @@ ${fileList}`;
   }
   resolveLogDirectory() {
     if (this.config.logDir) {
-      return path20.resolve(this.config.logDir);
+      return path21.resolve(this.config.logDir);
     }
-    return path20.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
+    return path21.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
   }
   async createStreamLogger(request) {
     const logDir = this.resolveLogDirectory();
@@ -22737,7 +23145,7 @@ ${fileList}`;
       console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
       return void 0;
     }
-    const filePath = path20.join(logDir, buildLogFilename6(request, this.targetName));
+    const filePath = path21.join(logDir, buildLogFilename6(request, this.targetName));
     try {
       const logger = await PiStreamLogger2.create({
         filePath,
@@ -22961,7 +23369,7 @@ async function readDirEntries(target) {
   const entries = await readdir2(target, { withFileTypes: true });
   return entries.map((entry) => ({
     name: entry.name,
-    absolutePath: path21.join(target, entry.name),
+    absolutePath: path222.join(target, entry.name),
     isDirectory: entry.isDirectory()
   }));
 }
@@ -22975,7 +23383,7 @@ async function removeIfExists(target) {
   }
 }
 function pathToFileUri2(filePath) {
-  const absolutePath = path222.isAbsolute(filePath) ? filePath : path222.resolve(filePath);
+  const absolutePath = path23.isAbsolute(filePath) ? filePath : path23.resolve(filePath);
   const normalizedPath = absolutePath.replace(/\\/g, "/");
   if (/^[a-zA-Z]:\//.test(normalizedPath)) {
     return `file:///${normalizedPath}`;
@@ -23067,8 +23475,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
   });
 }
 function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
-  const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path23.basename(file)}`).join("\n");
-  const responseList = responseFiles.map((file) => `"${path23.basename(file)}"`).join(", ");
+  const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path24.basename(file)}`).join("\n");
+  const responseList = responseFiles.map((file) => `"${path24.basename(file)}"`).join(", ");
   return renderTemplate2(templateContent, {
     requestFiles: requestLines,
     responseList
@@ -23128,7 +23536,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
 }
 async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
   if (!silent) {
-    const fileList = responseFilesFinal.map((file) => path24.basename(file)).join(", ");
+    const fileList = responseFilesFinal.map((file) => path25.basename(file)).join(", ");
     console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
   }
   const deadline = Date.now() + timeoutMs;
@@ -23137,7 +23545,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
     while (pending.size > 0) {
       if (Date.now() >= deadline) {
         if (!silent) {
-          const remaining = [...pending].map((f) => path24.basename(f)).join(", ");
+          const remaining = [...pending].map((f) => path25.basename(f)).join(", ");
           console.error(
             `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
           );
@@ -23184,30 +23592,6 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
   }
   return true;
 }
-var logged = false;
-function getAgentvHome() {
-  const envHome = process.env.AGENTV_HOME;
-  if (envHome && envHome !== "undefined") {
-    if (!logged) {
-      logged = true;
-      console.warn(`Using AGENTV_HOME: ${envHome}`);
-    }
-    return envHome;
-  }
-  return path25.join(os2.homedir(), ".agentv");
-}
-function getWorkspacesRoot() {
-  return path25.join(getAgentvHome(), "workspaces");
-}
-function getSubagentsRoot() {
-  return path25.join(getAgentvHome(), "subagents");
-}
-function getTraceStateRoot() {
-  return path25.join(getAgentvHome(), "trace-state");
-}
-function getWorkspacePoolRoot() {
-  return path25.join(getAgentvHome(), "workspace-pool");
-}
 var DEFAULT_LOCK_NAME = "subagent.lock";
 var DEFAULT_ALIVE_FILENAME = ".alive";
 function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -24428,9 +24812,10 @@ function resolveAndCreateProvider(definition, env = process.env) {
   const resolved = resolveTargetDefinition(definition, env);
   return createProvider(resolved);
 }
-var PASS_THRESHOLD = 0.8;
-function scoreToVerdict(score) {
-  return score >= PASS_THRESHOLD ? "pass" : "fail";
+var DEFAULT_THRESHOLD = 0.8;
+var PASS_THRESHOLD = DEFAULT_THRESHOLD;
+function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
+  return score >= threshold ? "pass" : "fail";
 }
 function clampScore(value) {
   if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -24612,13 +24997,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
 async function execShellWithStdin(command, stdinPayload, options = {}) {
   const { mkdir: mkdir16, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
   const { tmpdir: tmpdir3 } = await import("node:os");
-  const path49 = await import("node:path");
+  const path50 = await import("node:path");
   const { randomUUID: randomUUID10 } = await import("node:crypto");
-  const dir = path49.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
+  const dir = path50.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
   await mkdir16(dir, { recursive: true });
-  const stdinPath = path49.join(dir, "stdin.txt");
-  const stdoutPath = path49.join(dir, "stdout.txt");
-  const stderrPath = path49.join(dir, "stderr.txt");
+  const stdinPath = path50.join(dir, "stdin.txt");
+  const stdoutPath = path50.join(dir, "stdout.txt");
+  const stderrPath = path50.join(dir, "stderr.txt");
   await writeFile9(stdinPath, stdinPayload, "utf8");
   const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
   const { spawn: spawn5 } = await import("node:child_process");
@@ -25799,7 +26184,7 @@ ${outputSchema2}`;
     parts.push("[[ ## scoring_criteria ## ]]");
     for (const rubric of rubrics) {
       const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
-      const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
+      const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
       parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
       if (rubric.outcome) {
         parts.push(`Description: ${rubric.outcome}`);
@@ -25853,54 +26238,106 @@ ${outputSchema2}`;
   async runWithRetry(options) {
     const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
     let lastError;
+    let lastInvalidResponse;
+    let shouldAttemptStructureFix = false;
     for (let attempt = 1; attempt <= 3; attempt++) {
       try {
-        const model = graderProvider.asLanguageModel?.();
-        if (model) {
-          const modelOptions = {
-            ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
-            ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
-          };
-          const hasImages = images && images.length > 0;
-          const result = hasImages ? await generateText({
-            model,
-            system: systemPrompt,
-            messages: [
-              {
-                role: "user",
-                content: [
-                  { type: "text", text: userPrompt },
-                  ...toAiSdkImageParts(images)
-                ]
-              }
-            ],
-            ...modelOptions
-          }) : await generateText({
-            model,
-            system: systemPrompt,
-            prompt: userPrompt,
-            ...modelOptions
-          });
-          const data2 = schema.parse(parseJsonFromText(result.text));
-          const rawUsage = result.usage;
-          const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
-          return { data: data2, tokenUsage };
+        const result = await this.generateStructuredResponse({
+          context: context2,
+          graderProvider,
+          systemPrompt,
+          userPrompt,
+          images
+        });
+        const canRepairResponse = result.text.trim().length > 0;
+        lastInvalidResponse = canRepairResponse ? result : void 0;
+        let data;
+        try {
+          data = schema.parse(parseJsonFromText(result.text));
+        } catch (e) {
+          lastError = e instanceof Error ? e : new Error(String(e));
+          shouldAttemptStructureFix = canRepairResponse;
+          continue;
         }
-        const response = await graderProvider.invoke({
-          question: userPrompt,
+        return {
+          data,
+          providerResponse: result.providerResponse,
+          tokenUsage: result.tokenUsage
+        };
+      } catch (e) {
+        lastError = e instanceof Error ? e : new Error(String(e));
+      }
+    }
+    if (shouldAttemptStructureFix && lastInvalidResponse) {
+      try {
+        const repaired = await this.generateStructuredResponse({
+          context: context2,
+          graderProvider,
           systemPrompt,
-          evalCaseId: context2.evalCase.id,
-          attempt: context2.attempt,
-          maxOutputTokens: this.maxOutputTokens,
-          temperature: this.temperature
+          userPrompt: buildStructureRepairPrompt({
+            validationError: lastError?.message ?? "Schema validation failed",
+            invalidResponse: lastInvalidResponse.text
+          })
         });
-        const data = schema.parse(parseJsonFromText(extractLastAssistantContent(response.output)));
-        return { data, providerResponse: response, tokenUsage: response.tokenUsage };
+        const data = schema.parse(parseJsonFromText(repaired.text));
+        return {
+          data,
+          providerResponse: repaired.providerResponse,
+          tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
+        };
       } catch (e) {
         lastError = e instanceof Error ? e : new Error(String(e));
       }
     }
-    throw new Error(`Failed to parse evaluator response after 3 attempts: ${lastError?.message}`);
+    throw new Error(
+      `Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
+    );
+  }
+  async generateStructuredResponse(options) {
+    const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
+    const model = graderProvider.asLanguageModel?.();
+    if (model) {
+      const modelOptions = {
+        ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
+        ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
+      };
+      const hasImages = images && images.length > 0;
+      const result = hasImages ? await generateText({
+        model,
+        system: systemPrompt,
+        messages: [
+          {
+            role: "user",
+            content: [
+              { type: "text", text: userPrompt },
+              ...toAiSdkImageParts(images)
+            ]
+          }
+        ],
+        ...modelOptions
+      }) : await generateText({
+        model,
+        system: systemPrompt,
+        prompt: userPrompt,
+        ...modelOptions
+      });
+      const rawUsage = result.usage;
+      const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
+      return { text: result.text, tokenUsage };
+    }
+    const response = await graderProvider.invoke({
+      question: userPrompt,
+      systemPrompt,
+      evalCaseId: context2.evalCase.id,
+      attempt: context2.attempt,
+      maxOutputTokens: this.maxOutputTokens,
+      temperature: this.temperature
+    });
+    return {
+      text: extractLastAssistantContent(response.output),
+      providerResponse: response,
+      tokenUsage: response.tokenUsage
+    };
   }
 };
 function buildOutputSchema() {
@@ -25920,6 +26357,29 @@ function buildOutputSchema() {
     "}"
   ].join("\n");
 }
+function buildStructureRepairPrompt(options) {
+  const { validationError, invalidResponse } = options;
+  return [
+    "The following evaluation response has useful grading content but invalid JSON structure.",
+    "Repair it to satisfy the schema in the system prompt.",
+    "Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
+    "",
+    "Validation error:",
+    validationError,
+    "",
+    "Invalid response:",
+    invalidResponse
+  ].join("\n");
+}
+function sumTokenUsage(first, second) {
+  if (!first && !second) {
+    return void 0;
+  }
+  return {
+    input: (first?.input ?? 0) + (second?.input ?? 0),
+    output: (first?.output ?? 0) + (second?.output ?? 0)
+  };
+}
 function buildRubricOutputSchema() {
   return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
 You must return a valid JSON object matching this schema:
@@ -26019,19 +26479,21 @@ function calculateScoreRangeResult(result, rubrics) {
     rawScores[rubric.id] = rawScore;
     totalWeight += rubric.weight;
     weightedScoreSum += normalizedScore * rubric.weight;
-    let requiredMinScore;
-    if (rubric.required_min_score !== void 0) {
-      requiredMinScore = rubric.required_min_score;
+    let minScoreThreshold;
+    if (rubric.min_score !== void 0) {
+      minScoreThreshold = rubric.min_score;
+    } else if (rubric.required_min_score !== void 0) {
+      minScoreThreshold = rubric.required_min_score / 10;
     } else if (rubric.required === true) {
-      requiredMinScore = 10;
+      minScoreThreshold = 1;
     }
     const matchingRange = rubric.score_ranges?.find(
       (r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
     );
     const rangeDescription = matchingRange?.outcome ?? "";
     const criterionLabel = rubric.outcome ?? rubric.id;
-    const passed = !(requiredMinScore !== void 0 && rawScore < requiredMinScore) && rawScore >= 7;
-    if (requiredMinScore !== void 0 && rawScore < requiredMinScore) {
+    const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
+    if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
       failedRequired = true;
     }
     assertions.push({
@@ -26108,11 +26570,11 @@ function createFilesystemTools(workspacePath) {
       execute: async (input) => {
         try {
           const resolved = resolveSandboxed(workspacePath, input.path);
-          const stat10 = await fs2.stat(resolved);
-          if (stat10.isDirectory()) {
+          const stat11 = await fs2.stat(resolved);
+          if (stat11.isDirectory()) {
             return { error: `'${input.path}' is a directory, not a file` };
           }
-          const buffer = Buffer.alloc(Math.min(stat10.size, MAX_FILE_SIZE));
+          const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
           const fd = await fs2.open(resolved, "r");
           try {
             await fd.read(buffer, 0, buffer.length, 0);
@@ -26120,8 +26582,8 @@ function createFilesystemTools(workspacePath) {
             await fd.close();
           }
           const content = buffer.toString("utf-8");
-          const truncated = stat10.size > MAX_FILE_SIZE;
-          return { content, truncated, size: stat10.size };
+          const truncated = stat11.size > MAX_FILE_SIZE;
+          return { content, truncated, size: stat11.size };
         } catch (error) {
           return { error: error instanceof Error ? error.message : String(error) };
         }
@@ -26172,8 +26634,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
       const ext = path35.extname(entry.name).toLowerCase();
       if (BINARY_EXTENSIONS.has(ext)) continue;
       try {
-        const stat10 = await fs2.stat(fullPath);
-        if (stat10.size > MAX_FILE_SIZE) continue;
+        const stat11 = await fs2.stat(fullPath);
+        if (stat11.size > MAX_FILE_SIZE) continue;
         const content = await fs2.readFile(fullPath, "utf-8");
         const lines = content.split("\n");
         for (let i = 0; i < lines.length; i++) {
@@ -26806,115 +27268,115 @@ var FieldAccuracyEvaluator = class {
    * Evaluate a single field against the expected value.
    */
   evaluateField(fieldConfig, candidateData, expectedData) {
-    const { path: path49, match, required = true, weight = 1 } = fieldConfig;
-    const candidateValue = resolvePath(candidateData, path49);
-    const expectedValue = resolvePath(expectedData, path49);
+    const { path: path50, match, required = true, weight = 1 } = fieldConfig;
+    const candidateValue = resolvePath(candidateData, path50);
+    const expectedValue = resolvePath(expectedData, path50);
     if (expectedValue === void 0) {
       return {
-        path: path49,
+        path: path50,
         score: 1,
         // No expected value means no comparison needed
         weight,
         hit: true,
-        message: `${path49}: no expected value`
+        message: `${path50}: no expected value`
       };
     }
     if (candidateValue === void 0) {
       if (required) {
         return {
-          path: path49,
+          path: path50,
           score: 0,
           weight,
           hit: false,
-          message: `${path49} (required, missing)`
+          message: `${path50} (required, missing)`
         };
       }
       return {
-        path: path49,
+        path: path50,
         score: 1,
         // Don't penalize missing optional fields
         weight: 0,
         // Zero weight means it won't affect the score
         hit: true,
-        message: `${path49}: optional field missing`
+        message: `${path50}: optional field missing`
       };
     }
     switch (match) {
       case "exact":
-        return this.compareExact(path49, candidateValue, expectedValue, weight);
+        return this.compareExact(path50, candidateValue, expectedValue, weight);
       case "numeric_tolerance":
         return this.compareNumericTolerance(
-          path49,
+          path50,
           candidateValue,
           expectedValue,
           fieldConfig,
           weight
         );
       case "date":
-        return this.compareDate(path49, candidateValue, expectedValue, fieldConfig, weight);
+        return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
       default:
         return {
-          path: path49,
+          path: path50,
           score: 0,
           weight,
           hit: false,
-          message: `${path49}: unknown match type "${match}"`
+          message: `${path50}: unknown match type "${match}"`
         };
     }
   }
   /**
    * Exact equality comparison.
    */
-  compareExact(path49, candidateValue, expectedValue, weight) {
+  compareExact(path50, candidateValue, expectedValue, weight) {
     if (deepEqual(candidateValue, expectedValue)) {
       return {
-        path: path49,
+        path: path50,
         score: 1,
         weight,
         hit: true,
-        message: path49
+        message: path50
       };
     }
     if (typeof candidateValue !== typeof expectedValue) {
       return {
-        path: path49,
+        path: path50,
         score: 0,
         weight,
         hit: false,
-        message: `${path49} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
+        message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
       };
     }
     return {
-      path: path49,
+      path: path50,
       score: 0,
       weight,
       hit: false,
-      message: `${path49} (value mismatch)`
+      message: `${path50} (value mismatch)`
     };
   }
   /**
    * Numeric comparison with absolute or relative tolerance.
    */
-  compareNumericTolerance(path49, candidateValue, expectedValue, fieldConfig, weight) {
+  compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
     const { tolerance = 0, relative = false } = fieldConfig;
     const candidateNum = toNumber(candidateValue);
     const expectedNum = toNumber(expectedValue);
     if (candidateNum === null || expectedNum === null) {
       return {
-        path: path49,
+        path: path50,
         score: 0,
         weight,
         hit: false,
-        message: `${path49} (non-numeric value)`
+        message: `${path50} (non-numeric value)`
       };
     }
     if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
       return {
-        path: path49,
+        path: path50,
         score: 0,
         weight,
         hit: false,
-        message: `${path49} (invalid numeric value)`
+        message: `${path50} (invalid numeric value)`
       };
     }
     const diff = Math.abs(candidateNum - expectedNum);
@@ -26927,61 +27389,61 @@ var FieldAccuracyEvaluator = class {
     }
     if (withinTolerance) {
       return {
-        path: path49,
+        path: path50,
         score: 1,
         weight,
         hit: true,
-        message: `${path49} (within tolerance: diff=${diff.toFixed(2)})`
+        message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
       };
     }
     return {
-      path: path49,
+      path: path50,
       score: 0,
       weight,
       hit: false,
-      message: `${path49} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
+      message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
     };
   }
   /**
    * Date comparison with format normalization.
    */
-  compareDate(path49, candidateValue, expectedValue, fieldConfig, weight) {
+  compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
     const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
     const candidateDate = parseDate(String(candidateValue), formats);
     const expectedDate = parseDate(String(expectedValue), formats);
     if (candidateDate === null) {
       return {
-        path: path49,
+        path: path50,
         score: 0,
         weight,
         hit: false,
-        message: `${path49} (unparseable candidate date)`
+        message: `${path50} (unparseable candidate date)`
       };
     }
     if (expectedDate === null) {
       return {
-        path: path49,
+        path: path50,
         score: 0,
         weight,
         hit: false,
-        message: `${path49} (unparseable expected date)`
+        message: `${path50} (unparseable expected date)`
       };
     }
     if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
       return {
-        path: path49,
+        path: path50,
         score: 1,
         weight,
         hit: true,
-        message: path49
+        message: path50
       };
     }
     return {
-      path: path49,
+      path: path50,
       score: 0,
       weight,
       hit: false,
-      message: `${path49} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
+      message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
     };
   }
   /**
@@ -27014,11 +27476,11 @@ var FieldAccuracyEvaluator = class {
     };
   }
 };
-function resolvePath(obj, path49) {
-  if (!path49 || !obj) {
+function resolvePath(obj, path50) {
+  if (!path50 || !obj) {
     return void 0;
   }
-  const parts = path49.split(/\.|\[|\]/).filter((p) => p.length > 0);
+  const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0) {
@@ -27500,8 +27962,8 @@ var TokenUsageEvaluator = class {
     };
   }
 };
-function getNestedValue(obj, path49) {
-  const parts = path49.split(".");
+function getNestedValue(obj, path50) {
+  const parts = path50.split(".");
   let current = obj;
   for (const part of parts) {
     if (current === null || current === void 0 || typeof current !== "object") {
@@ -29224,7 +29686,7 @@ var WorkspacePoolManager = class {
   }
   /**
    * Reset an existing slot for reuse:
-   * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
+   * 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
    * 2. Re-copy template files (skip repo directories)
    */
   async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
@@ -29237,7 +29699,17 @@ var WorkspacePoolManager = class {
         continue;
       }
       const ref = repo.checkout?.ref ?? "HEAD";
-      await git(["reset", "--hard", ref], { cwd: repoDir });
+      const resolve2 = repo.checkout?.resolve ?? "remote";
+      if (resolve2 === "remote") {
+        const fetchArgs = ["fetch", "origin", ref];
+        if (repo.clone?.depth) {
+          fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
+        }
+        await git(fetchArgs, { cwd: repoDir });
+        await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
+      } else {
+        await git(["reset", "--hard", ref], { cwd: repoDir });
+      }
       const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
       await git(["clean", cleanFlag], { cwd: repoDir });
     }
@@ -29520,7 +29992,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
   }
   return result.stdout;
 }
-function classifyQualityStatus(score, threshold = PASS_THRESHOLD) {
+function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
   return score >= threshold ? "ok" : "quality_failure";
 }
 function buildSkippedEvaluatorError(scores) {
@@ -29612,7 +30084,7 @@ async function runEvaluation(options) {
   const filteredEvalCases = filterEvalCases(evalCases, filter2);
   if (filteredEvalCases.length === 0) {
     if (filter2) {
-      throw new Error(`No tests matched filter '${filter2}' in ${evalFilePath}`);
+      throw new Error(`No tests matched filter '${formatFilter(filter2)}' in ${evalFilePath}`);
     }
     return [];
   }
@@ -29664,6 +30136,9 @@ async function runEvaluation(options) {
     const graderName = targetContext.graderTarget ?? targetContext.name;
     const resolvedGrader = resolveTargetByName(graderName);
     if (!resolvedGrader) {
+      if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
+        return void 0;
+      }
       return getOrCreateProvider(targetContext);
     }
     return getOrCreateProvider(resolvedGrader);
@@ -29994,7 +30469,7 @@ async function runEvaluation(options) {
           const budgetResult = {
             timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
             testId: evalCase.id,
-            dataset: evalCase.dataset,
+            suite: evalCase.suite,
             category: evalCase.category,
             score: 0,
             assertions: [],
@@ -30031,7 +30506,7 @@ async function runEvaluation(options) {
           const haltResult = {
             timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
             testId: evalCase.id,
-            dataset: evalCase.dataset,
+            suite: evalCase.suite,
             category: evalCase.category,
             score: 0,
             assertions: [],
@@ -30343,7 +30818,7 @@ async function runBatchEvaluation(options) {
         targetResolver,
         availableTargets,
         verbose,
-        threshold: batchThreshold
+        threshold: evalCase.threshold ?? batchThreshold
       });
       if (providerError) {
         result = {
@@ -30805,8 +31280,9 @@ async function runEvalCase(options) {
       fileChanges,
       workspacePath,
       verbose,
-      threshold: caseThreshold
+      threshold: evalCase.threshold ?? caseThreshold
     });
+    const effectiveThreshold = evalCase.threshold ?? caseThreshold;
     const totalDurationMs = Date.now() - caseStartMs;
     const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
     const evalRunTokenUsage = tokenUsage || graderTokens ? {
@@ -30820,7 +31296,7 @@ async function runEvalCase(options) {
       ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
     };
     const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
-    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
+    const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
     const targetUsedField = targetUsed ? { targetUsed } : {};
     const finalResult = providerError ? {
       ...result,
@@ -31021,7 +31497,8 @@ async function evaluateCandidate(options) {
     targetResolver,
     availableTargets,
     fileChanges,
-    workspacePath
+    workspacePath,
+    threshold: evalThreshold
   });
   const completedAt = nowFn();
   let agentRequest;
@@ -31052,7 +31529,7 @@ async function evaluateCandidate(options) {
   return {
     timestamp: completedAt.toISOString(),
     testId: evalCase.id,
-    dataset: evalCase.dataset,
+    suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
     score: score.score,
@@ -31095,7 +31572,8 @@ async function runEvaluatorsForCase(options) {
     targetResolver,
     availableTargets,
     fileChanges,
-    workspacePath
+    workspacePath,
+    threshold
   } = options;
   if (evalCase.assertions && evalCase.assertions.length > 0) {
     return runEvaluatorList({
@@ -31121,7 +31599,8 @@ async function runEvaluatorsForCase(options) {
       targetResolver,
       availableTargets,
       fileChanges,
-      workspacePath
+      workspacePath,
+      threshold
     });
   }
   const evaluatorKind = evalCase.evaluator ?? "llm-grader";
@@ -31223,7 +31702,8 @@ async function runEvaluatorList(options) {
         name: evaluatorConfig.name,
         type: evaluatorConfig.type,
         weight,
-        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
+        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
+        ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
       });
       scores.push({
         name: evaluatorConfig.name,
@@ -31258,7 +31738,8 @@ async function runEvaluatorList(options) {
         name: evaluatorConfig.name ?? "unknown",
         type: evaluatorConfig.type ?? "llm-grader",
         weight,
-        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
+        ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
+        ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
       });
       scores.push({
         name: evaluatorConfig.name ?? "unknown",
@@ -31292,9 +31773,10 @@ async function runEvaluatorList(options) {
       }
     }
   }
+  const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
   const hasRequiredFailure = scored.some((entry) => {
     if (!entry.required) return false;
-    const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
+    const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
     return entry.score.score < minScore;
   });
   const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
@@ -31305,17 +31787,23 @@ async function runEvaluatorList(options) {
   const expectedAspectCount = assertions.length || 1;
   const score = {
     score: aggregateScore,
-    verdict: scoreToVerdict(aggregateScore),
+    verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
     assertions,
     expectedAspectCount
   };
   return { score, scores };
 }
+function formatFilter(filter2) {
+  return typeof filter2 === "string" ? filter2 : filter2.join(", ");
+}
+function matchesFilter3(id, filter2) {
+  return typeof filter2 === "string" ? micromatch3.isMatch(id, filter2) : filter2.some((pattern) => micromatch3.isMatch(id, pattern));
+}
 function filterEvalCases(evalCases, filter2) {
   if (!filter2) {
     return evalCases;
   }
-  return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter2));
+  return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter2));
 }
 function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
   const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
@@ -31402,7 +31890,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
   return {
     timestamp: timestamp.toISOString(),
     testId: evalCase.id,
-    dataset: evalCase.dataset,
+    suite: evalCase.suite,
     category: evalCase.category,
     conversationId: evalCase.conversation_id,
     score: 0,
@@ -31666,6 +32154,7 @@ async function evaluate(config) {
     verbose: config.verbose,
     maxConcurrency: config.workers ?? 3,
     filter: config.filter,
+    threshold: config.threshold,
     evalCases,
     onResult: async (result) => {
       collectedResults.push(result);
@@ -31676,19 +32165,19 @@ async function evaluate(config) {
   const durationMs = Date.now() - startTime;
   return {
     results: allResults,
-    summary: computeSummary(allResults, durationMs)
+    summary: computeSummary(allResults, durationMs, config.threshold)
   };
 }
 function mapAssertionType(type) {
   return type.replace(/_/g, "-");
 }
-function computeSummary(results, durationMs) {
+function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
   const total = results.length;
   let passed = 0;
   let scoreSum = 0;
   for (const r of results) {
     scoreSum += r.score;
-    if (r.score >= PASS_THRESHOLD) {
+    if (r.score >= threshold) {
       passed++;
     }
   }
@@ -31798,7 +32287,7 @@ var CONFIG_FILE_NAMES = [
 ];
 async function loadTsConfig(projectRoot) {
   const { existsSync: existsSync7 } = await import("node:fs");
-  const { pathToFileURL } = await import("node:url");
+  const { pathToFileURL: pathToFileURL2 } = await import("node:url");
   const { join: join2 } = await import("node:path");
   for (const fileName of CONFIG_FILE_NAMES) {
     const filePath = join2(projectRoot, fileName);
@@ -31806,7 +32295,7 @@ async function loadTsConfig(projectRoot) {
       continue;
     }
     try {
-      const fileUrl = pathToFileURL(filePath).href;
+      const fileUrl = pathToFileURL2(filePath).href;
       const mod = await import(fileUrl);
       const config = mod.default ?? mod;
       return AgentVConfigSchema.parse(config);
@@ -31953,7 +32442,7 @@ function saveProjectRegistry(registry) {
   const registryPath = getProjectsRegistryPath();
   const dir = path47.dirname(registryPath);
   if (!existsSync6(dir)) {
-    mkdirSync(dir, { recursive: true });
+    mkdirSync2(dir, { recursive: true });
   }
   writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
 }
@@ -32213,7 +32702,7 @@ var OtelTraceExporter = class {
         rootSpan.setAttribute("gen_ai.system", "agentv");
         rootSpan.setAttribute("agentv.test_id", result.testId);
         rootSpan.setAttribute("agentv.target", result.target);
-        if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
+        if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
         rootSpan.setAttribute("agentv.score", result.score);
         if (captureContent && result.output.length > 0) {
           const lastMsg = result.output[result.output.length - 1];
@@ -32422,7 +32911,7 @@ var OtelStreamingObserver = class {
     this.rootSpan.setAttribute("gen_ai.system", "agentv");
     this.rootSpan.setAttribute("agentv.test_id", testId);
     this.rootSpan.setAttribute("agentv.target", target);
-    if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
+    if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
     this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
   }
   /** Create and immediately export a tool span */
@@ -32768,7 +33257,230 @@ function extractToolResultContent(content) {
   }
   return parts.length > 0 ? parts.join("") : void 0;
 }
-var DEFAULT_PROJECTS_DIR = () => path48.join(homedir3(), ".claude", "projects");
+function parseCodexSession(jsonl) {
+  const messages = [];
+  let sessionId = "";
+  let cwd;
+  let model;
+  let version;
+  let startTimestamp;
+  let endTimestamp;
+  const pendingCalls = /* @__PURE__ */ new Map();
+  const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
+  for (const line of lines) {
+    let entry;
+    try {
+      entry = JSON.parse(line);
+    } catch {
+      continue;
+    }
+    if (!entry.type) continue;
+    if (entry.timestamp) {
+      if (!startTimestamp) startTimestamp = entry.timestamp;
+      endTimestamp = entry.timestamp;
+    }
+    const payload = entry.payload ?? {};
+    switch (entry.type) {
+      case "session_meta": {
+        sessionId = String(payload.id ?? "");
+        cwd = payload.cwd ? String(payload.cwd) : void 0;
+        version = payload.cli_version ? String(payload.cli_version) : void 0;
+        if (payload.model && !model) {
+          model = String(payload.model);
+        }
+        break;
+      }
+      case "turn_context": {
+        if (payload.model && !model) {
+          model = String(payload.model);
+        }
+        if (payload.cwd && !cwd) {
+          cwd = String(payload.cwd);
+        }
+        break;
+      }
+      case "response_item": {
+        const itemType = String(payload.type ?? "");
+        const role = String(payload.role ?? "");
+        switch (itemType) {
+          case "message": {
+            if (role === "developer") break;
+            const content = extractResponseItemContent(payload.content);
+            if (role === "user" && content) {
+              messages.push({ role: "user", content });
+            } else if (role === "assistant" && content) {
+              messages.push({ role: "assistant", content });
+            }
+            break;
+          }
+          case "function_call": {
+            const toolName = String(payload.name ?? "");
+            const callId = String(payload.call_id ?? "");
+            let input;
+            if (typeof payload.arguments === "string") {
+              try {
+                input = JSON.parse(payload.arguments);
+              } catch {
+                input = payload.arguments;
+              }
+            } else {
+              input = payload.arguments;
+            }
+            const toolCall = { tool: toolName, input, id: callId };
+            const msgIdx = messages.length;
+            messages.push({
+              role: "assistant",
+              toolCalls: [toolCall]
+            });
+            if (callId) {
+              pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
+            }
+            break;
+          }
+          case "custom_tool_call": {
+            const toolName = String(payload.name ?? "");
+            const callId = String(payload.call_id ?? "");
+            let input;
+            if (typeof payload.arguments === "string") {
+              try {
+                input = JSON.parse(payload.arguments);
+              } catch {
+                input = payload.arguments;
+              }
+            } else {
+              input = payload.arguments;
+            }
+            const toolCall = { tool: toolName, input, id: callId };
+            const msgIdx = messages.length;
+            messages.push({
+              role: "assistant",
+              toolCalls: [toolCall]
+            });
+            if (callId) {
+              pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
+            }
+            break;
+          }
+          case "function_call_output":
+          case "custom_tool_call_output": {
+            const callId = String(payload.call_id ?? "");
+            const pending = pendingCalls.get(callId);
+            if (pending) {
+              const existingMsg = messages[pending.msgIdx];
+              const existingCalls = [...existingMsg.toolCalls ?? []];
+              existingCalls[pending.toolIdx] = {
+                ...existingCalls[pending.toolIdx],
+                output: payload.output
+              };
+              messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
+              pendingCalls.delete(callId);
+            }
+            break;
+          }
+          // Skip reasoning blocks (thinking tokens)
+          case "reasoning":
+            break;
+        }
+        break;
+      }
+    }
+  }
+  let durationMs;
+  if (startTimestamp && endTimestamp) {
+    durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
+  }
+  const source = {
+    provider: "codex",
+    sessionId,
+    cwd,
+    startedAt: startTimestamp,
+    model,
+    version
+  };
+  return {
+    messages,
+    source,
+    // Codex rollout files don't include token counts (only rate limit info)
+    tokenUsage: void 0,
+    durationMs,
+    costUsd: null
+  };
+}
+function extractResponseItemContent(content) {
+  if (typeof content === "string") return content;
+  if (!Array.isArray(content)) return void 0;
+  const parts = [];
+  for (const block of content) {
+    if (typeof block === "object" && block !== null) {
+      const b = block;
+      if (typeof b.text === "string") {
+        parts.push(b.text);
+      }
+    }
+  }
+  return parts.length > 0 ? parts.join("") : void 0;
+}
+var DEFAULT_SESSIONS_DIR = () => path48.join(homedir3(), ".codex", "sessions");
+async function discoverCodexSessions(opts) {
+  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
+  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
+  const sessions = [];
+  let yearDirs;
+  try {
+    yearDirs = await readdir8(sessionsDir);
+  } catch {
+    return [];
+  }
+  for (const year of yearDirs) {
+    const yearPath = path48.join(sessionsDir, year);
+    let monthDirs;
+    try {
+      monthDirs = await readdir8(yearPath);
+    } catch {
+      continue;
+    }
+    for (const month of monthDirs) {
+      const monthPath = path48.join(yearPath, month);
+      let dayDirs;
+      try {
+        dayDirs = await readdir8(monthPath);
+      } catch {
+        continue;
+      }
+      for (const day of dayDirs) {
+        if (opts?.date) {
+          const dirDate = `${year}-${month}-${day}`;
+          if (dirDate !== opts.date) continue;
+        }
+        const dayPath = path48.join(monthPath, day);
+        let files;
+        try {
+          files = await readdir8(dayPath);
+        } catch {
+          continue;
+        }
+        for (const file of files) {
+          if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
+          const filePath = path48.join(dayPath, file);
+          const nameWithoutExt = file.replace(/\.jsonl$/, "");
+          const parts = nameWithoutExt.split("-");
+          const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
+          let updatedAt;
+          try {
+            const fileStat = await stat9(filePath);
+            updatedAt = fileStat.mtime;
+          } catch {
+            updatedAt = /* @__PURE__ */ new Date(0);
+          }
+          sessions.push({ sessionId, filePath, filename: file, updatedAt });
+        }
+      }
+    }
+  }
+  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
+  return sessions.slice(0, limit);
+}
+var DEFAULT_PROJECTS_DIR = () => path49.join(homedir4(), ".claude", "projects");
 function encodeProjectPath(projectPath) {
   return projectPath.replace(/\//g, "-");
 }
@@ -32777,7 +33489,7 @@ async function discoverClaudeSessions(opts) {
   const limit = opts?.latest ? 1 : opts?.limit ?? 10;
   let projectDirs;
   try {
-    projectDirs = await readdir8(projectsDir);
+    projectDirs = await readdir9(projectsDir);
   } catch {
     return [];
   }
@@ -32787,10 +33499,10 @@ async function discoverClaudeSessions(opts) {
   }
   const sessions = [];
   for (const projectDir of projectDirs) {
-    const dirPath = path48.join(projectsDir, projectDir);
+    const dirPath = path49.join(projectsDir, projectDir);
     let entries;
     try {
-      entries = await readdir8(dirPath);
+      entries = await readdir9(dirPath);
     } catch {
       continue;
     }
@@ -32798,10 +33510,10 @@ async function discoverClaudeSessions(opts) {
       if (!entry.endsWith(".jsonl")) continue;
       const sessionId = entry.replace(/\.jsonl$/, "");
       if (opts?.sessionId && sessionId !== opts.sessionId) continue;
-      const filePath = path48.join(dirPath, entry);
+      const filePath = path49.join(dirPath, entry);
       let updatedAt;
       try {
-        const fileStat = await stat9(filePath);
+        const fileStat = await stat10(filePath);
         updatedAt = fileStat.mtime;
       } catch {
         updatedAt = /* @__PURE__ */ new Date(0);
@@ -32817,9 +33529,82 @@ async function discoverClaudeSessions(opts) {
   sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
   return sessions.slice(0, limit);
 }
+function toTranscriptJsonLine(entry) {
+  const firstUserMessage = entry.messages.find((m) => m.role === "user");
+  const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
+  return {
+    input,
+    output: entry.messages,
+    token_usage: entry.tokenUsage ? {
+      input: entry.tokenUsage.input,
+      output: entry.tokenUsage.output,
+      cached: entry.tokenUsage.cached
+    } : void 0,
+    duration_ms: entry.durationMs,
+    cost_usd: entry.costUsd,
+    source: {
+      provider: entry.source.provider,
+      session_id: entry.source.sessionId,
+      model: entry.source.model,
+      timestamp: entry.source.startedAt,
+      git_branch: entry.source.gitBranch,
+      cwd: entry.source.cwd ?? entry.source.projectPath,
+      version: entry.source.version
+    }
+  };
+}
+async function readTranscriptJsonl(filePath) {
+  const text2 = await readFile14(filePath, "utf8");
+  return text2.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
+}
 async function readTranscriptFile(filePath) {
   return readFile14(filePath, "utf8");
 }
+var TranscriptProvider = class _TranscriptProvider {
+  id;
+  kind = "transcript";
+  targetName;
+  lines;
+  cursor = 0;
+  constructor(targetName, lines) {
+    this.targetName = targetName;
+    this.id = `transcript:${targetName}`;
+    this.lines = lines;
+  }
+  /**
+   * Create a TranscriptProvider from a JSONL file path.
+   */
+  static async fromFile(filePath) {
+    const lines = await readTranscriptJsonl(filePath);
+    if (lines.length === 0) {
+      throw new Error(`Transcript file is empty: ${filePath}`);
+    }
+    const providerName = lines[0].source.provider ?? "transcript";
+    return new _TranscriptProvider(providerName, lines);
+  }
+  get lineCount() {
+    return this.lines.length;
+  }
+  async invoke(_request) {
+    if (this.cursor >= this.lines.length) {
+      throw new Error(
+        `Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
+      );
+    }
+    const line = this.lines[this.cursor++];
+    return {
+      output: line.output,
+      tokenUsage: line.token_usage ? {
+        input: line.token_usage.input,
+        output: line.token_usage.output,
+        cached: line.token_usage.cached
+      } : void 0,
+      durationMs: line.duration_ms,
+      costUsd: line.cost_usd ?? void 0,
+      startTime: line.source.timestamp
+    };
+  }
+};
 function createAgentKernel() {
   return { status: "stub" };
 }
@@ -32843,6 +33628,7 @@ export {
   buildSearchRoots,
   resolveFileReference,
   CLI_PLACEHOLDERS,
+  findDeprecatedCamelCaseTargetWarnings,
   COMMON_TARGET_SETTINGS,
   resolveDelegatedTargetDefinition,
   resolveTargetDefinition,
@@ -32887,17 +33673,18 @@ export {
   subscribeToCodexLogEntries,
   consumeCopilotCliLogEntries,
   subscribeToCopilotCliLogEntries,
+  parseCopilotEvents,
   discoverCopilotSessions,
   consumeCopilotSdkLogEntries,
   subscribeToCopilotSdkLogEntries,
   consumePiLogEntries,
   subscribeToPiLogEntries,
-  ProviderRegistry,
   getAgentvHome,
   getWorkspacesRoot,
   getSubagentsRoot,
   getTraceStateRoot,
   getWorkspacePoolRoot,
+  ProviderRegistry,
   ensureVSCodeSubagents,
   readTargetDefinitions,
   listTargetNames,
@@ -32905,6 +33692,7 @@ export {
   createBuiltinProviderRegistry,
   createProvider,
   resolveAndCreateProvider,
+  DEFAULT_THRESHOLD,
   PASS_THRESHOLD,
   scoreToVerdict,
   clampScore,
@@ -32992,8 +33780,13 @@ export {
   OtelTraceExporter,
   OtelStreamingObserver,
   parseClaudeSession,
+  parseCodexSession,
+  discoverCodexSessions,
   discoverClaudeSessions,
+  toTranscriptJsonLine,
+  readTranscriptJsonl,
   readTranscriptFile,
+  TranscriptProvider,
   createAgentKernel
 };
-//# sourceMappingURL=chunk-YXXD27OK.js.map
+//# sourceMappingURL=chunk-I6UE4LHZ.js.map