agentv 4.6.1 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-ZK4GG7PR.js
304
+ // ../../packages/core/dist/chunk-VCVVKCC4.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-ZK4GG7PR.js
422
+ // ../../packages/core/dist/chunk-VCVVKCC4.js
423
423
  import { readFile as readFile2 } from "node:fs/promises";
424
424
  import path3 from "node:path";
425
425
  import fg from "fast-glob";
@@ -633,15 +633,13 @@ async function resolveFileReference(rawValue, searchRoots) {
633
633
  }
634
634
  var CliHealthcheckHttpInputSchema = external_exports2.object({
635
635
  url: external_exports2.string().min(1, "healthcheck URL is required"),
636
- timeout_seconds: external_exports2.number().positive().optional(),
637
- timeoutSeconds: external_exports2.number().positive().optional()
638
- });
636
+ timeout_seconds: external_exports2.number().positive().optional()
637
+ }).passthrough();
639
638
  var CliHealthcheckCommandInputSchema = external_exports2.object({
640
639
  command: external_exports2.string().min(1, "healthcheck command is required"),
641
640
  cwd: external_exports2.string().optional(),
642
- timeout_seconds: external_exports2.number().positive().optional(),
643
- timeoutSeconds: external_exports2.number().positive().optional()
644
- });
641
+ timeout_seconds: external_exports2.number().positive().optional()
642
+ }).passthrough();
645
643
  var CliHealthcheckInputSchema = external_exports2.union([
646
644
  CliHealthcheckHttpInputSchema,
647
645
  CliHealthcheckCommandInputSchema
@@ -653,36 +651,28 @@ var CliTargetInputSchema = external_exports2.object({
653
651
  command: external_exports2.string(),
654
652
  // Files format - optional
655
653
  files_format: external_exports2.string().optional(),
656
- filesFormat: external_exports2.string().optional(),
657
654
  attachments_format: external_exports2.string().optional(),
658
- attachmentsFormat: external_exports2.string().optional(),
659
655
  // Working directory - optional
660
656
  cwd: external_exports2.string().optional(),
661
657
  // Workspace template directory - optional (mutually exclusive with cwd)
662
658
  workspace_template: external_exports2.string().optional(),
663
- workspaceTemplate: external_exports2.string().optional(),
664
659
  // Timeout in seconds - optional
665
660
  timeout_seconds: external_exports2.number().positive().optional(),
666
- timeoutSeconds: external_exports2.number().positive().optional(),
667
661
  // Healthcheck configuration - optional
668
662
  healthcheck: CliHealthcheckInputSchema.optional(),
669
663
  // Verbose mode - optional
670
664
  verbose: external_exports2.boolean().optional(),
671
665
  cli_verbose: external_exports2.boolean().optional(),
672
- cliVerbose: external_exports2.boolean().optional(),
673
666
  // Keep temp files - optional
674
667
  keep_temp_files: external_exports2.boolean().optional(),
675
- keepTempFiles: external_exports2.boolean().optional(),
676
668
  keep_output_files: external_exports2.boolean().optional(),
677
- keepOutputFiles: external_exports2.boolean().optional(),
678
669
  // Common target fields
679
670
  grader_target: external_exports2.string().optional(),
680
671
  judge_target: external_exports2.string().optional(),
681
672
  // backward compat
682
673
  workers: external_exports2.number().int().min(1).optional(),
683
- provider_batching: external_exports2.boolean().optional(),
684
- providerBatching: external_exports2.boolean().optional()
685
- });
674
+ provider_batching: external_exports2.boolean().optional()
675
+ }).passthrough();
686
676
  var CliHealthcheckHttpSchema = external_exports2.object({
687
677
  url: external_exports2.string().min(1),
688
678
  timeoutMs: external_exports2.number().positive().optional()
@@ -707,7 +697,7 @@ var CliTargetConfigSchema = external_exports2.object({
707
697
  keepTempFiles: external_exports2.boolean().optional()
708
698
  }).strict();
709
699
  function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
710
- const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
700
+ const timeoutSeconds = input.timeout_seconds;
711
701
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
712
702
  if ("url" in input && input.url) {
713
703
  const url = resolveString(input.url, env, `${targetName} healthcheck URL`);
@@ -741,9 +731,9 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
741
731
  function normalizeCliTargetInput(input, env, evalFilePath) {
742
732
  const targetName = input.name;
743
733
  const command = resolveString(input.command, env, `${targetName} CLI command`, true);
744
- const filesFormatSource = input.files_format ?? input.filesFormat ?? input.attachments_format ?? input.attachmentsFormat;
734
+ const filesFormatSource = input.files_format ?? input.attachments_format;
745
735
  const filesFormat = resolveOptionalLiteralString(filesFormatSource);
746
- const workspaceTemplateSource = input.workspace_template ?? input.workspaceTemplate;
736
+ const workspaceTemplateSource = input.workspace_template;
747
737
  let workspaceTemplate = resolveOptionalString(
748
738
  workspaceTemplateSource,
749
739
  env,
@@ -771,12 +761,10 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
771
761
  if (!cwd && !workspaceTemplate && evalFilePath) {
772
762
  cwd = path2.dirname(path2.resolve(evalFilePath));
773
763
  }
774
- const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
764
+ const timeoutSeconds = input.timeout_seconds;
775
765
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
776
- const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose ?? input.cliVerbose);
777
- const keepTempFiles = resolveOptionalBoolean(
778
- input.keep_temp_files ?? input.keepTempFiles ?? input.keep_output_files ?? input.keepOutputFiles
779
- );
766
+ const verbose = resolveOptionalBoolean(input.verbose ?? input.cli_verbose);
767
+ const keepTempFiles = resolveOptionalBoolean(input.keep_temp_files ?? input.keep_output_files);
780
768
  const healthcheck = input.healthcheck ? normalizeCliHealthcheck(input.healthcheck, env, targetName, evalFilePath) : void 0;
781
769
  return {
782
770
  command,
@@ -797,14 +785,104 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
797
785
  "FILES",
798
786
  "OUTPUT_FILE"
799
787
  ]);
788
+ var DEPRECATED_TARGET_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
789
+ ["providerBatching", "provider_batching"],
790
+ ["subagentModeAllowed", "subagent_mode_allowed"],
791
+ ["fallbackTargets", "fallback_targets"],
792
+ ["resourceName", "endpoint"],
793
+ ["baseUrl", "base_url"],
794
+ ["apiKey", "api_key"],
795
+ ["deploymentName", "model"],
796
+ ["thinkingBudget", "thinking_budget"],
797
+ ["maxTokens", "max_output_tokens"],
798
+ ["apiFormat", "api_format"],
799
+ ["timeoutSeconds", "timeout_seconds"],
800
+ ["logDir", "log_dir"],
801
+ ["logDirectory", "log_directory"],
802
+ ["logFormat", "log_format"],
803
+ ["logOutputFormat", "log_output_format"],
804
+ ["systemPrompt", "system_prompt"],
805
+ ["maxTurns", "max_turns"],
806
+ ["maxBudgetUsd", "max_budget_usd"],
807
+ ["dryRun", "dry_run"],
808
+ ["subagentRoot", "subagent_root"],
809
+ ["filesFormat", "files_format"],
810
+ ["attachmentsFormat", "attachments_format"],
811
+ ["cliUrl", "cli_url"],
812
+ ["cliPath", "cli_path"],
813
+ ["githubToken", "github_token"],
814
+ ["sessionDir", "session_dir"],
815
+ ["sessionId", "session_id"],
816
+ ["sessionStateDir", "session_state_dir"],
817
+ ["maxRetries", "max_retries"],
818
+ ["retryInitialDelayMs", "retry_initial_delay_ms"],
819
+ ["retryMaxDelayMs", "retry_max_delay_ms"],
820
+ ["retryBackoffFactor", "retry_backoff_factor"],
821
+ ["retryStatusCodes", "retry_status_codes"]
822
+ ]);
823
+ var DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS = /* @__PURE__ */ new Map([
824
+ ["timeoutSeconds", "timeout_seconds"]
825
+ ]);
826
+ function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
827
+ if (typeof value !== "object" || value === null || Array.isArray(value)) {
828
+ return [];
829
+ }
830
+ const warnings = [];
831
+ for (const [camelCaseField, snakeCaseField] of aliases) {
832
+ if (Object.prototype.hasOwnProperty.call(value, camelCaseField)) {
833
+ warnings.push({
834
+ location: `${location}.${camelCaseField}`,
835
+ message: `camelCase field '${camelCaseField}' is no longer supported in targets.yaml. Use '${snakeCaseField}' instead.`
836
+ });
837
+ }
838
+ }
839
+ return warnings;
840
+ }
841
+ function assertNoDeprecatedCamelCaseTargetFields(definition) {
842
+ if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
843
+ throw new Error(
844
+ `${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
845
+ );
846
+ }
847
+ const warning = findDeprecatedCamelCaseTargetWarnings(
848
+ definition,
849
+ `target "${definition.name}"`
850
+ )[0];
851
+ if (!warning) {
852
+ return;
853
+ }
854
+ const fieldMatch = warning.message.match(/field '([^']+)'/);
855
+ const replacementMatch = warning.message.match(/Use '([^']+)' instead/);
856
+ const field = fieldMatch?.[1] ?? "unknown";
857
+ const replacement = replacementMatch?.[1] ?? "snake_case";
858
+ throw new Error(
859
+ `${warning.location}: camelCase field '${field}' is no longer supported in targets.yaml. Use '${replacement}' instead.`
860
+ );
861
+ }
862
+ function findDeprecatedCamelCaseTargetWarnings(target, location) {
863
+ const warnings = collectDeprecatedCamelCaseWarnings(
864
+ target,
865
+ location,
866
+ DEPRECATED_TARGET_CAMEL_CASE_FIELDS
867
+ );
868
+ if (typeof target !== "object" || target === null || Array.isArray(target)) {
869
+ return warnings;
870
+ }
871
+ const healthcheck = target.healthcheck;
872
+ warnings.push(
873
+ ...collectDeprecatedCamelCaseWarnings(
874
+ healthcheck,
875
+ `${location}.healthcheck`,
876
+ DEPRECATED_HEALTHCHECK_CAMEL_CASE_FIELDS
877
+ )
878
+ );
879
+ return warnings;
880
+ }
800
881
  var COMMON_TARGET_SETTINGS = [
801
882
  "use_target",
802
883
  "provider_batching",
803
- "providerBatching",
804
884
  "subagent_mode_allowed",
805
- "subagentModeAllowed",
806
- "fallback_targets",
807
- "fallbackTargets"
885
+ "fallback_targets"
808
886
  ];
809
887
  var USE_TARGET_ENV_PATTERN = /^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i;
810
888
  var BASE_TARGET_SCHEMA = external_exports2.object({
@@ -816,43 +894,40 @@ var BASE_TARGET_SCHEMA = external_exports2.object({
816
894
  // backward compat
817
895
  workers: external_exports2.number().int().min(1).optional(),
818
896
  workspace_template: external_exports2.string().optional(),
819
- workspaceTemplate: external_exports2.string().optional(),
820
897
  subagent_mode_allowed: external_exports2.boolean().optional(),
821
- fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional(),
822
- fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
898
+ fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional()
823
899
  }).passthrough();
824
900
  var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
901
+ var DEFAULT_AZURE_RESPONSES_API_VERSION = "v1";
825
902
  var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
826
- function normalizeAzureApiVersion(value) {
903
+ function normalizeAzureApiVersion(value, apiFormat) {
904
+ const defaultVersion = apiFormat === "responses" ? DEFAULT_AZURE_RESPONSES_API_VERSION : DEFAULT_AZURE_API_VERSION;
827
905
  if (!value) {
828
- return DEFAULT_AZURE_API_VERSION;
906
+ return defaultVersion;
829
907
  }
830
908
  const trimmed = value.trim();
831
909
  if (trimmed.length === 0) {
832
- return DEFAULT_AZURE_API_VERSION;
910
+ return defaultVersion;
833
911
  }
834
912
  const withoutPrefix = trimmed.replace(/^api[-_]?version\s*=\s*/i, "").trim();
835
- return withoutPrefix.length > 0 ? withoutPrefix : DEFAULT_AZURE_API_VERSION;
913
+ return withoutPrefix.length > 0 ? withoutPrefix : defaultVersion;
836
914
  }
837
915
  function resolveRetryConfig(target) {
838
- const maxRetries = resolveOptionalNumber(
839
- target.max_retries ?? target.maxRetries,
840
- `${target.name} max retries`
841
- );
916
+ const maxRetries = resolveOptionalNumber(target.max_retries, `${target.name} max retries`);
842
917
  const initialDelayMs = resolveOptionalNumber(
843
- target.retry_initial_delay_ms ?? target.retryInitialDelayMs,
918
+ target.retry_initial_delay_ms,
844
919
  `${target.name} retry initial delay`
845
920
  );
846
921
  const maxDelayMs = resolveOptionalNumber(
847
- target.retry_max_delay_ms ?? target.retryMaxDelayMs,
922
+ target.retry_max_delay_ms,
848
923
  `${target.name} retry max delay`
849
924
  );
850
925
  const backoffFactor = resolveOptionalNumber(
851
- target.retry_backoff_factor ?? target.retryBackoffFactor,
926
+ target.retry_backoff_factor,
852
927
  `${target.name} retry backoff factor`
853
928
  );
854
929
  const retryableStatusCodes = resolveOptionalNumberArray(
855
- target.retry_status_codes ?? target.retryStatusCodes,
930
+ target.retry_status_codes,
856
931
  `${target.name} retry status codes`
857
932
  );
858
933
  if (maxRetries === void 0 && initialDelayMs === void 0 && maxDelayMs === void 0 && backoffFactor === void 0 && retryableStatusCodes === void 0) {
@@ -912,9 +987,10 @@ function resolveDelegatedTargetDefinition(name21, definitions, env = process.env
912
987
  `Target "${name21}" exceeded the maximum use_target resolution depth (10). Check for a delegation loop or overly deep alias chain.`
913
988
  );
914
989
  }
915
- function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
990
+ function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
991
+ assertNoDeprecatedCamelCaseTargetFields(definition);
916
992
  const parsed = BASE_TARGET_SCHEMA.parse(definition);
917
- if (parsed.workspace_template !== void 0 || parsed.workspaceTemplate !== void 0) {
993
+ if (parsed.workspace_template !== void 0) {
918
994
  throw new Error(
919
995
  `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
920
996
  );
@@ -930,13 +1006,9 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
930
1006
  `${parsed.name} provider`,
931
1007
  true
932
1008
  ).toLowerCase();
933
- const providerBatching = resolveOptionalBoolean(
934
- parsed.provider_batching ?? parsed.providerBatching
935
- );
936
- const subagentModeAllowed = resolveOptionalBoolean(
937
- parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
938
- );
939
- const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
1009
+ const providerBatching = resolveOptionalBoolean(parsed.provider_batching);
1010
+ const subagentModeAllowed = resolveOptionalBoolean(parsed.subagent_mode_allowed);
1011
+ const fallbackTargets = parsed.fallback_targets;
940
1012
  const base = {
941
1013
  name: parsed.name,
942
1014
  graderTarget: parsed.grader_target ?? parsed.judge_target,
@@ -1086,20 +1158,22 @@ function normalizeOpenAIBaseUrl(value) {
1086
1158
  return trimmed.endsWith("/v1") ? trimmed : `${trimmed}/v1`;
1087
1159
  }
1088
1160
  function resolveAzureConfig(target, env) {
1089
- const endpointSource = target.endpoint ?? target.resource ?? target.resourceName;
1090
- const apiKeySource = target.api_key ?? target.apiKey;
1091
- const deploymentSource = target.deployment ?? target.deploymentName ?? target.model;
1161
+ const endpointSource = target.endpoint ?? target.resource;
1162
+ const apiKeySource = target.api_key;
1163
+ const deploymentSource = target.deployment ?? target.model;
1092
1164
  const versionSource = target.version ?? target.api_version;
1093
1165
  const temperatureSource = target.temperature;
1094
- const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
1166
+ const maxTokensSource = target.max_output_tokens;
1095
1167
  const resourceName = resolveString(endpointSource, env, `${target.name} endpoint`);
1096
1168
  const apiKey = resolveString(apiKeySource, env, `${target.name} api key`);
1097
1169
  const deploymentName = resolveString(deploymentSource, env, `${target.name} deployment`);
1170
+ const apiFormat = resolveApiFormat(target, env, target.name);
1098
1171
  const version = normalizeAzureApiVersion(
1099
1172
  resolveOptionalString(versionSource, env, `${target.name} api version`, {
1100
1173
  allowLiteral: true,
1101
1174
  optionalEnv: true
1102
- })
1175
+ }),
1176
+ apiFormat
1103
1177
  );
1104
1178
  const temperature = resolveOptionalNumber(temperatureSource, `${target.name} temperature`);
1105
1179
  const maxOutputTokens = resolveOptionalNumber(
@@ -1112,13 +1186,17 @@ function resolveAzureConfig(target, env) {
1112
1186
  deploymentName,
1113
1187
  apiKey,
1114
1188
  version,
1189
+ apiFormat,
1115
1190
  temperature,
1116
1191
  maxOutputTokens,
1117
1192
  retry
1118
1193
  };
1119
1194
  }
1120
- function resolveApiFormat(target, targetName) {
1121
- const raw = target.api_format ?? target.apiFormat;
1195
+ function resolveApiFormat(target, env, targetName) {
1196
+ const raw = resolveOptionalString(target.api_format, env, `${targetName} api format`, {
1197
+ allowLiteral: true,
1198
+ optionalEnv: true
1199
+ });
1122
1200
  if (raw === void 0) return void 0;
1123
1201
  if (raw === "chat" || raw === "responses") return raw;
1124
1202
  throw new Error(
@@ -1126,11 +1204,11 @@ function resolveApiFormat(target, targetName) {
1126
1204
  );
1127
1205
  }
1128
1206
  function resolveOpenAIConfig(target, env) {
1129
- const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
1130
- const apiKeySource = target.api_key ?? target.apiKey;
1207
+ const endpointSource = target.endpoint ?? target.base_url;
1208
+ const apiKeySource = target.api_key;
1131
1209
  const modelSource = target.model ?? target.deployment ?? target.variant;
1132
1210
  const temperatureSource = target.temperature;
1133
- const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
1211
+ const maxTokensSource = target.max_output_tokens;
1134
1212
  const baseURL = normalizeOpenAIBaseUrl(
1135
1213
  resolveOptionalString(endpointSource, env, `${target.name} endpoint`, {
1136
1214
  allowLiteral: true,
@@ -1144,17 +1222,17 @@ function resolveOpenAIConfig(target, env) {
1144
1222
  baseURL,
1145
1223
  apiKey,
1146
1224
  model,
1147
- apiFormat: resolveApiFormat(target, target.name),
1225
+ apiFormat: resolveApiFormat(target, env, target.name),
1148
1226
  temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1149
1227
  maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
1150
1228
  retry
1151
1229
  };
1152
1230
  }
1153
1231
  function resolveOpenRouterConfig(target, env) {
1154
- const apiKeySource = target.api_key ?? target.apiKey;
1232
+ const apiKeySource = target.api_key;
1155
1233
  const modelSource = target.model ?? target.deployment ?? target.variant;
1156
1234
  const temperatureSource = target.temperature;
1157
- const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
1235
+ const maxTokensSource = target.max_output_tokens;
1158
1236
  const retry = resolveRetryConfig(target);
1159
1237
  return {
1160
1238
  apiKey: resolveString(apiKeySource, env, `${target.name} OpenRouter api key`),
@@ -1165,11 +1243,11 @@ function resolveOpenRouterConfig(target, env) {
1165
1243
  };
1166
1244
  }
1167
1245
  function resolveAnthropicConfig(target, env) {
1168
- const apiKeySource = target.api_key ?? target.apiKey;
1246
+ const apiKeySource = target.api_key;
1169
1247
  const modelSource = target.model ?? target.deployment ?? target.variant;
1170
1248
  const temperatureSource = target.temperature;
1171
- const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
1172
- const thinkingBudgetSource = target.thinking_budget ?? target.thinkingBudget;
1249
+ const maxTokensSource = target.max_output_tokens;
1250
+ const thinkingBudgetSource = target.thinking_budget;
1173
1251
  const apiKey = resolveString(apiKeySource, env, `${target.name} Anthropic api key`);
1174
1252
  const model = resolveString(modelSource, env, `${target.name} Anthropic model`);
1175
1253
  const retry = resolveRetryConfig(target);
@@ -1183,10 +1261,10 @@ function resolveAnthropicConfig(target, env) {
1183
1261
  };
1184
1262
  }
1185
1263
  function resolveGeminiConfig(target, env) {
1186
- const apiKeySource = target.api_key ?? target.apiKey;
1264
+ const apiKeySource = target.api_key;
1187
1265
  const modelSource = target.model ?? target.deployment ?? target.variant;
1188
1266
  const temperatureSource = target.temperature;
1189
- const maxTokensSource = target.max_output_tokens ?? target.maxTokens;
1267
+ const maxTokensSource = target.max_output_tokens;
1190
1268
  const apiKey = resolveString(apiKeySource, env, `${target.name} Google API key`);
1191
1269
  const model = resolveOptionalString(modelSource, env, `${target.name} Gemini model`, {
1192
1270
  allowLiteral: true,
@@ -1206,11 +1284,11 @@ function resolveCodexConfig(target, env, evalFilePath) {
1206
1284
  const executableSource = target.executable ?? target.command ?? target.binary;
1207
1285
  const argsSource = target.args ?? target.arguments;
1208
1286
  const cwdSource = target.cwd;
1209
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1210
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1211
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1212
- const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CODEX_LOG_FORMAT;
1213
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1287
+ const workspaceTemplateSource = target.workspace_template;
1288
+ const timeoutSource = target.timeout_seconds;
1289
+ const logDirSource = target.log_dir ?? target.log_directory;
1290
+ const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
1291
+ const systemPromptSource = target.system_prompt;
1214
1292
  const model = resolveOptionalString(modelSource, env, `${target.name} codex model`, {
1215
1293
  allowLiteral: true,
1216
1294
  optionalEnv: true
@@ -1274,16 +1352,16 @@ function normalizeCodexLogFormat(value) {
1274
1352
  throw new Error("codex log format must be 'summary' or 'json'");
1275
1353
  }
1276
1354
  function resolveCopilotSdkConfig(target, env, evalFilePath) {
1277
- const cliUrlSource = target.cli_url ?? target.cliUrl;
1278
- const cliPathSource = target.cli_path ?? target.cliPath;
1279
- const githubTokenSource = target.github_token ?? target.githubToken;
1355
+ const cliUrlSource = target.cli_url;
1356
+ const cliPathSource = target.cli_path;
1357
+ const githubTokenSource = target.github_token;
1280
1358
  const modelSource = target.model;
1281
1359
  const cwdSource = target.cwd;
1282
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1283
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1284
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1285
- const logFormatSource = target.log_format ?? target.logFormat;
1286
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1360
+ const workspaceTemplateSource = target.workspace_template;
1361
+ const timeoutSource = target.timeout_seconds;
1362
+ const logDirSource = target.log_dir ?? target.log_directory;
1363
+ const logFormatSource = target.log_format;
1364
+ const systemPromptSource = target.system_prompt;
1287
1365
  const cliUrl = resolveOptionalString(cliUrlSource, env, `${target.name} copilot-sdk cli URL`, {
1288
1366
  allowLiteral: true,
1289
1367
  optionalEnv: true
@@ -1338,6 +1416,52 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
1338
1416
  );
1339
1417
  const logFormat = normalizeCopilotLogFormat(logFormatSource);
1340
1418
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
1419
+ const byok = target.byok;
1420
+ let byokType;
1421
+ let byokBaseUrl;
1422
+ let byokApiKey;
1423
+ let byokBearerToken;
1424
+ let byokApiVersion;
1425
+ let byokWireApi;
1426
+ if (byok && typeof byok === "object") {
1427
+ byokType = resolveOptionalString(byok.type, env, `${target.name} byok type`, {
1428
+ allowLiteral: true,
1429
+ optionalEnv: true
1430
+ });
1431
+ byokBaseUrl = resolveOptionalString(byok.base_url, env, `${target.name} byok base URL`, {
1432
+ allowLiteral: true,
1433
+ optionalEnv: true
1434
+ });
1435
+ byokApiKey = resolveOptionalString(byok.api_key, env, `${target.name} byok API key`, {
1436
+ allowLiteral: false,
1437
+ optionalEnv: true
1438
+ });
1439
+ byokBearerToken = resolveOptionalString(
1440
+ byok.bearer_token,
1441
+ env,
1442
+ `${target.name} byok bearer token`,
1443
+ {
1444
+ allowLiteral: false,
1445
+ optionalEnv: true
1446
+ }
1447
+ );
1448
+ byokApiVersion = resolveOptionalString(
1449
+ byok.api_version,
1450
+ env,
1451
+ `${target.name} byok API version`,
1452
+ {
1453
+ allowLiteral: true,
1454
+ optionalEnv: true
1455
+ }
1456
+ );
1457
+ byokWireApi = resolveOptionalString(byok.wire_api, env, `${target.name} byok wire API`, {
1458
+ allowLiteral: true,
1459
+ optionalEnv: true
1460
+ });
1461
+ if (!byokBaseUrl) {
1462
+ throw new Error(`${target.name}: 'byok.base_url' is required when 'byok' is specified`);
1463
+ }
1464
+ }
1341
1465
  return {
1342
1466
  cliUrl,
1343
1467
  cliPath,
@@ -1348,7 +1472,13 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
1348
1472
  timeoutMs,
1349
1473
  logDir,
1350
1474
  logFormat,
1351
- systemPrompt
1475
+ systemPrompt,
1476
+ byokType,
1477
+ byokBaseUrl,
1478
+ byokApiKey,
1479
+ byokBearerToken,
1480
+ byokApiVersion,
1481
+ byokWireApi
1352
1482
  };
1353
1483
  }
1354
1484
  function resolveCopilotCliConfig(target, env, evalFilePath) {
@@ -1356,11 +1486,11 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
1356
1486
  const modelSource = target.model;
1357
1487
  const argsSource = target.args ?? target.arguments;
1358
1488
  const cwdSource = target.cwd;
1359
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1360
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1361
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1362
- const logFormatSource = target.log_format ?? target.logFormat;
1363
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1489
+ const workspaceTemplateSource = target.workspace_template;
1490
+ const timeoutSource = target.timeout_seconds;
1491
+ const logDirSource = target.log_dir ?? target.log_directory;
1492
+ const logFormatSource = target.log_format;
1493
+ const systemPromptSource = target.system_prompt;
1364
1494
  const executable = resolveOptionalString(executableSource, env, `${target.name} copilot-cli executable`, {
1365
1495
  allowLiteral: true,
1366
1496
  optionalEnv: true
@@ -1424,16 +1554,16 @@ function normalizeCopilotLogFormat(value) {
1424
1554
  }
1425
1555
  function resolvePiCodingAgentConfig(target, env, evalFilePath) {
1426
1556
  const subproviderSource = target.subprovider;
1427
- const modelSource = target.model ?? target.pi_model ?? target.piModel;
1428
- const apiKeySource = target.api_key ?? target.apiKey;
1429
- const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
1430
- const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
1557
+ const modelSource = target.model ?? target.pi_model;
1558
+ const apiKeySource = target.api_key;
1559
+ const toolsSource = target.tools ?? target.pi_tools;
1560
+ const thinkingSource = target.thinking ?? target.pi_thinking;
1431
1561
  const cwdSource = target.cwd;
1432
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1433
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1434
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1435
- const logFormatSource = target.log_format ?? target.logFormat;
1436
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1562
+ const workspaceTemplateSource = target.workspace_template;
1563
+ const timeoutSource = target.timeout_seconds;
1564
+ const logDirSource = target.log_dir ?? target.log_directory;
1565
+ const logFormatSource = target.log_format;
1566
+ const systemPromptSource = target.system_prompt;
1437
1567
  const subprovider = resolveOptionalString(
1438
1568
  subproviderSource,
1439
1569
  env,
@@ -1451,7 +1581,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
1451
1581
  allowLiteral: false,
1452
1582
  optionalEnv: true
1453
1583
  });
1454
- const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
1584
+ const baseUrlSource = target.base_url ?? target.endpoint;
1455
1585
  const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi base url`, {
1456
1586
  allowLiteral: true,
1457
1587
  optionalEnv: true
@@ -1510,16 +1640,16 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
1510
1640
  function resolvePiCliConfig(target, env, evalFilePath) {
1511
1641
  const executableSource = target.executable ?? target.command ?? target.binary;
1512
1642
  const subproviderSource = target.subprovider;
1513
- const modelSource = target.model ?? target.pi_model ?? target.piModel;
1514
- const apiKeySource = target.api_key ?? target.apiKey;
1515
- const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
1516
- const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
1643
+ const modelSource = target.model ?? target.pi_model;
1644
+ const apiKeySource = target.api_key;
1645
+ const toolsSource = target.tools ?? target.pi_tools;
1646
+ const thinkingSource = target.thinking ?? target.pi_thinking;
1517
1647
  const cwdSource = target.cwd;
1518
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1519
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1520
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1521
- const logFormatSource = target.log_format ?? target.logFormat;
1522
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1648
+ const workspaceTemplateSource = target.workspace_template;
1649
+ const timeoutSource = target.timeout_seconds;
1650
+ const logDirSource = target.log_dir ?? target.log_directory;
1651
+ const logFormatSource = target.log_format;
1652
+ const systemPromptSource = target.system_prompt;
1523
1653
  const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
1524
1654
  allowLiteral: true,
1525
1655
  optionalEnv: true
@@ -1538,7 +1668,7 @@ function resolvePiCliConfig(target, env, evalFilePath) {
1538
1668
  allowLiteral: false,
1539
1669
  optionalEnv: true
1540
1670
  });
1541
- const baseUrlSource = target.base_url ?? target.baseUrl ?? target.endpoint;
1671
+ const baseUrlSource = target.base_url ?? target.endpoint;
1542
1672
  const baseUrl = resolveOptionalString(baseUrlSource, env, `${target.name} pi-cli base url`, {
1543
1673
  allowLiteral: true,
1544
1674
  optionalEnv: true
@@ -1596,11 +1726,11 @@ function resolvePiCliConfig(target, env, evalFilePath) {
1596
1726
  function resolveClaudeConfig(target, env, evalFilePath) {
1597
1727
  const modelSource = target.model;
1598
1728
  const cwdSource = target.cwd;
1599
- const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
1600
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1601
- const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
1602
- const logFormatSource = target.log_format ?? target.logFormat ?? target.log_output_format ?? target.logOutputFormat ?? env.AGENTV_CLAUDE_LOG_FORMAT;
1603
- const systemPromptSource = target.system_prompt ?? target.systemPrompt;
1729
+ const workspaceTemplateSource = target.workspace_template;
1730
+ const timeoutSource = target.timeout_seconds;
1731
+ const logDirSource = target.log_dir ?? target.log_directory;
1732
+ const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
1733
+ const systemPromptSource = target.system_prompt;
1604
1734
  const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
1605
1735
  allowLiteral: true,
1606
1736
  optionalEnv: true
@@ -1633,8 +1763,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
1633
1763
  });
1634
1764
  const logFormat = normalizeClaudeLogFormat(logFormatSource);
1635
1765
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
1636
- const maxTurns = typeof target.max_turns === "number" ? target.max_turns : typeof target.maxTurns === "number" ? target.maxTurns : void 0;
1637
- const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : typeof target.maxBudgetUsd === "number" ? target.maxBudgetUsd : void 0;
1766
+ const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
1767
+ const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
1638
1768
  return {
1639
1769
  model,
1640
1770
  systemPrompt,
@@ -1665,9 +1795,7 @@ function resolveMockConfig(target) {
1665
1795
  return { response };
1666
1796
  }
1667
1797
  function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
1668
- const workspaceTemplateEnvVar = resolveOptionalLiteralString(
1669
- target.workspace_template ?? target.workspaceTemplate
1670
- );
1798
+ const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
1671
1799
  let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
1672
1800
  workspaceTemplateEnvVar,
1673
1801
  env,
@@ -1682,9 +1810,9 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
1682
1810
  }
1683
1811
  const executableSource = target.executable;
1684
1812
  const waitSource = target.wait;
1685
- const dryRunSource = target.dry_run ?? target.dryRun;
1686
- const subagentRootSource = target.subagent_root ?? target.subagentRoot;
1687
- const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
1813
+ const dryRunSource = target.dry_run;
1814
+ const subagentRootSource = target.subagent_root;
1815
+ const timeoutSource = target.timeout_seconds;
1688
1816
  const defaultCommand = insiders ? "code-insiders" : "code";
1689
1817
  const executable = resolveOptionalString(executableSource, env, `${target.name} vscode executable`, {
1690
1818
  allowLiteral: true,
@@ -1719,8 +1847,8 @@ function resolveCliConfig(target, env, evalFilePath) {
1719
1847
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
1720
1848
  if (!parseResult.success) {
1721
1849
  const firstError = parseResult.error.errors[0];
1722
- const path49 = firstError?.path.join(".") || "";
1723
- const prefix = path49 ? `${target.name} ${path49}: ` : `${target.name}: `;
1850
+ const path410 = firstError?.path.join(".") || "";
1851
+ const prefix = path410 ? `${target.name} ${path410}: ` : `${target.name}: `;
1724
1852
  throw new Error(`${prefix}${firstError?.message}`);
1725
1853
  }
1726
1854
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -1735,7 +1863,7 @@ function resolveCliConfig(target, env, evalFilePath) {
1735
1863
  }
1736
1864
  function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath) {
1737
1865
  const command = target.command ? resolveString(target.command, env, `${target.name} command`, true) : `bun run .agentv/providers/${providerKind}.ts {PROMPT}`;
1738
- const timeoutSeconds = target.timeout_seconds ?? target.timeoutSeconds;
1866
+ const timeoutSeconds = target.timeout_seconds;
1739
1867
  const timeoutMs = resolveTimeoutMs(timeoutSeconds, `${target.name} timeout`);
1740
1868
  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
1741
1869
  allowLiteral: true,
@@ -1799,10 +1927,10 @@ function resolveDiscover(value, targetName) {
1799
1927
  throw new Error(`Target "${targetName}": discover must be "latest" (got "${String(value)}")`);
1800
1928
  }
1801
1929
  function resolveCopilotLogConfig(target, env) {
1802
- const sessionDirSource = target.session_dir ?? target.sessionDir;
1803
- const sessionIdSource = target.session_id ?? target.sessionId;
1930
+ const sessionDirSource = target.session_dir;
1931
+ const sessionIdSource = target.session_id;
1804
1932
  const discoverSource = target.discover;
1805
- const sessionStateDirSource = target.session_state_dir ?? target.sessionStateDir;
1933
+ const sessionStateDirSource = target.session_state_dir;
1806
1934
  const cwdSource = target.cwd;
1807
1935
  return {
1808
1936
  sessionDir: resolveOptionalString(
@@ -1975,6 +2103,15 @@ var AGENT_PROVIDER_KINDS = [
1975
2103
  "vscode",
1976
2104
  "vscode-insiders"
1977
2105
  ];
2106
+ var LLM_GRADER_CAPABLE_KINDS = [
2107
+ "openai",
2108
+ "openrouter",
2109
+ "azure",
2110
+ "anthropic",
2111
+ "gemini",
2112
+ "agentv",
2113
+ "mock"
2114
+ ];
1978
2115
  var KNOWN_PROVIDERS = [
1979
2116
  "openai",
1980
2117
  "openrouter",
@@ -1994,7 +2131,8 @@ var KNOWN_PROVIDERS = [
1994
2131
  "mock",
1995
2132
  "vscode",
1996
2133
  "vscode-insiders",
1997
- "agentv"
2134
+ "agentv",
2135
+ "transcript"
1998
2136
  ];
1999
2137
  var PROVIDER_ALIASES = [
2000
2138
  "azure-openai",
@@ -6803,7 +6941,7 @@ function createOpenRouter(options = {}) {
6803
6941
  );
6804
6942
  const createChatModel = (modelId, settings = {}) => new OpenRouterChatLanguageModel(modelId, settings, {
6805
6943
  provider: "openrouter.chat",
6806
- url: ({ path: path49 }) => `${baseURL}${path49}`,
6944
+ url: ({ path: path50 }) => `${baseURL}${path50}`,
6807
6945
  headers: getHeaders,
6808
6946
  compatibility,
6809
6947
  fetch: options.fetch,
@@ -6811,7 +6949,7 @@ function createOpenRouter(options = {}) {
6811
6949
  });
6812
6950
  const createCompletionModel = (modelId, settings = {}) => new OpenRouterCompletionLanguageModel(modelId, settings, {
6813
6951
  provider: "openrouter.completion",
6814
- url: ({ path: path49 }) => `${baseURL}${path49}`,
6952
+ url: ({ path: path50 }) => `${baseURL}${path50}`,
6815
6953
  headers: getHeaders,
6816
6954
  compatibility,
6817
6955
  fetch: options.fetch,
@@ -6819,14 +6957,14 @@ function createOpenRouter(options = {}) {
6819
6957
  });
6820
6958
  const createEmbeddingModel = (modelId, settings = {}) => new OpenRouterEmbeddingModel(modelId, settings, {
6821
6959
  provider: "openrouter.embedding",
6822
- url: ({ path: path49 }) => `${baseURL}${path49}`,
6960
+ url: ({ path: path50 }) => `${baseURL}${path50}`,
6823
6961
  headers: getHeaders,
6824
6962
  fetch: options.fetch,
6825
6963
  extraBody: options.extraBody
6826
6964
  });
6827
6965
  const createImageModel = (modelId, settings = {}) => new OpenRouterImageModel(modelId, settings, {
6828
6966
  provider: "openrouter.image",
6829
- url: ({ path: path49 }) => `${baseURL}${path49}`,
6967
+ url: ({ path: path50 }) => `${baseURL}${path50}`,
6830
6968
  headers: getHeaders,
6831
6969
  fetch: options.fetch,
6832
6970
  extraBody: options.extraBody
@@ -14345,11 +14483,13 @@ import { tmpdir } from "node:os";
14345
14483
  import path19 from "node:path";
14346
14484
  import { execSync as execSync2 } from "node:child_process";
14347
14485
  import { randomUUID as randomUUID8 } from "node:crypto";
14348
- import { accessSync as accessSync2, createWriteStream as createWriteStream6 } from "node:fs";
14486
+ import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
14349
14487
  import { mkdir as mkdir7 } from "node:fs/promises";
14350
- import path20 from "node:path";
14488
+ import path21 from "node:path";
14351
14489
  import { createInterface } from "node:readline";
14352
- import { fileURLToPath as fileURLToPath3 } from "node:url";
14490
+ import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
14491
+ import os2 from "node:os";
14492
+ import path20 from "node:path";
14353
14493
  import { exec as exec2 } from "node:child_process";
14354
14494
  import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
14355
14495
  import path322 from "node:path";
@@ -14358,18 +14498,16 @@ import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
14358
14498
  import path30 from "node:path";
14359
14499
  import { constants as constants22 } from "node:fs";
14360
14500
  import { access as access22, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
14361
- import path21 from "node:path";
14362
14501
  import path222 from "node:path";
14363
14502
  import path23 from "node:path";
14364
- import { readFile as readFile9 } from "node:fs/promises";
14365
14503
  import path24 from "node:path";
14504
+ import { readFile as readFile9 } from "node:fs/promises";
14505
+ import path25 from "node:path";
14366
14506
  import { exec, spawn as spawn4 } from "node:child_process";
14367
14507
  import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
14368
14508
  import path27 from "node:path";
14369
14509
  import { promisify as promisify2 } from "node:util";
14370
14510
  import path26 from "node:path";
14371
- import os2 from "node:os";
14372
- import path25 from "node:path";
14373
14511
  import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
14374
14512
  import path29 from "node:path";
14375
14513
  import path28 from "node:path";
@@ -14420,12 +14558,15 @@ import { existsSync as existsSync5 } from "node:fs";
14420
14558
  import path45 from "node:path";
14421
14559
  import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
14422
14560
  import path46 from "node:path";
14423
- import { existsSync as existsSync6, mkdirSync, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
14561
+ import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
14424
14562
  import path47 from "node:path";
14425
14563
  import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
14426
14564
  import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
14427
14565
  import { homedir as homedir3 } from "node:os";
14428
14566
  import path48 from "node:path";
14567
+ import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
14568
+ import { homedir as homedir4 } from "node:os";
14569
+ import path49 from "node:path";
14429
14570
  import { readFile as readFile14 } from "node:fs/promises";
14430
14571
  function computeTraceSummary(messages) {
14431
14572
  const toolCallCounts = {};
@@ -15213,8 +15354,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15213
15354
  const negate = rawEvaluator.negate === true ? true : void 0;
15214
15355
  if (isCustomType) {
15215
15356
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15216
- const required2 = parseRequired(rawEvaluator.required);
15217
- const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "negate"]);
15357
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15358
+ rawEvaluator.required,
15359
+ rawEvaluator.min_score,
15360
+ name21,
15361
+ evalId
15362
+ );
15363
+ const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "weight", "required", "min_score", "negate"]);
15218
15364
  const config2 = {};
15219
15365
  for (const [key, value] of Object.entries(rawEvaluator)) {
15220
15366
  if (!knownProps2.has(key) && value !== void 0) {
@@ -15226,6 +15372,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15226
15372
  type: customTypeName,
15227
15373
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15228
15374
  ...required2 !== void 0 ? { required: required2 } : {},
15375
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15229
15376
  ...negate !== void 0 ? { negate } : {},
15230
15377
  ...Object.keys(config2).length > 0 ? { config: config2 } : {}
15231
15378
  });
@@ -15295,7 +15442,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15295
15442
  );
15296
15443
  }
15297
15444
  }
15298
- const required2 = parseRequired(rawEvaluator.required);
15445
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15446
+ rawEvaluator.required,
15447
+ rawEvaluator.min_score,
15448
+ name21,
15449
+ evalId
15450
+ );
15299
15451
  const knownProps2 = /* @__PURE__ */ new Set([
15300
15452
  "name",
15301
15453
  "type",
@@ -15321,6 +15473,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15321
15473
  resolvedCwd,
15322
15474
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15323
15475
  ...required2 !== void 0 ? { required: required2 } : {},
15476
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15324
15477
  ...negate !== void 0 ? { negate } : {},
15325
15478
  ...Object.keys(config2).length > 0 ? { config: config2 } : {},
15326
15479
  ...targetConfig !== void 0 ? { target: targetConfig } : {}
@@ -15449,7 +15602,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15449
15602
  };
15450
15603
  }
15451
15604
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15452
- const required2 = parseRequired(rawEvaluator.required);
15605
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15606
+ rawEvaluator.required,
15607
+ rawEvaluator.min_score,
15608
+ name21,
15609
+ evalId
15610
+ );
15453
15611
  evaluators.push({
15454
15612
  name: name21,
15455
15613
  type: "composite",
@@ -15457,6 +15615,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15457
15615
  aggregator,
15458
15616
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15459
15617
  ...required2 !== void 0 ? { required: required2 } : {},
15618
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15460
15619
  ...negate !== void 0 ? { negate } : {}
15461
15620
  });
15462
15621
  continue;
@@ -15567,7 +15726,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15567
15726
  continue;
15568
15727
  }
15569
15728
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15570
- const required2 = parseRequired(rawEvaluator.required);
15729
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15730
+ rawEvaluator.required,
15731
+ rawEvaluator.min_score,
15732
+ name21,
15733
+ evalId
15734
+ );
15571
15735
  const config2 = {
15572
15736
  name: name21,
15573
15737
  type: "tool-trajectory",
@@ -15576,6 +15740,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15576
15740
  ...expected ? { expected } : {},
15577
15741
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15578
15742
  ...required2 !== void 0 ? { required: required2 } : {},
15743
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15579
15744
  ...negate !== void 0 ? { negate } : {},
15580
15745
  ...argsMatch2 !== void 0 ? { argsMatch: argsMatch2 } : {}
15581
15746
  };
@@ -15638,7 +15803,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15638
15803
  const aggregation = asString(rawEvaluator.aggregation);
15639
15804
  const validAggregation = isValidFieldAggregationType(aggregation) ? aggregation : void 0;
15640
15805
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15641
- const required2 = parseRequired(rawEvaluator.required);
15806
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15807
+ rawEvaluator.required,
15808
+ rawEvaluator.min_score,
15809
+ name21,
15810
+ evalId
15811
+ );
15642
15812
  evaluators.push({
15643
15813
  name: name21,
15644
15814
  type: "field-accuracy",
@@ -15646,6 +15816,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15646
15816
  ...validAggregation ? { aggregation: validAggregation } : {},
15647
15817
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15648
15818
  ...required2 !== void 0 ? { required: required2 } : {},
15819
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15649
15820
  ...negate !== void 0 ? { negate } : {}
15650
15821
  });
15651
15822
  continue;
@@ -15659,13 +15830,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15659
15830
  continue;
15660
15831
  }
15661
15832
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15662
- const required2 = parseRequired(rawEvaluator.required);
15833
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15834
+ rawEvaluator.required,
15835
+ rawEvaluator.min_score,
15836
+ name21,
15837
+ evalId
15838
+ );
15663
15839
  evaluators.push({
15664
15840
  name: name21,
15665
15841
  type: "latency",
15666
15842
  threshold,
15667
15843
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15668
15844
  ...required2 !== void 0 ? { required: required2 } : {},
15845
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15669
15846
  ...negate !== void 0 ? { negate } : {}
15670
15847
  });
15671
15848
  continue;
@@ -15679,13 +15856,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15679
15856
  continue;
15680
15857
  }
15681
15858
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15682
- const required2 = parseRequired(rawEvaluator.required);
15859
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15860
+ rawEvaluator.required,
15861
+ rawEvaluator.min_score,
15862
+ name21,
15863
+ evalId
15864
+ );
15683
15865
  evaluators.push({
15684
15866
  name: name21,
15685
15867
  type: "cost",
15686
15868
  budget,
15687
15869
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15688
15870
  ...required2 !== void 0 ? { required: required2 } : {},
15871
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15689
15872
  ...negate !== void 0 ? { negate } : {}
15690
15873
  });
15691
15874
  continue;
@@ -15717,13 +15900,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15717
15900
  continue;
15718
15901
  }
15719
15902
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15720
- const required2 = parseRequired(rawEvaluator.required);
15903
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15904
+ rawEvaluator.required,
15905
+ rawEvaluator.min_score,
15906
+ name21,
15907
+ evalId
15908
+ );
15721
15909
  evaluators.push({
15722
15910
  name: name21,
15723
15911
  type: "token-usage",
15724
15912
  ...validLimits,
15725
15913
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15726
15914
  ...required2 !== void 0 ? { required: required2 } : {},
15915
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15727
15916
  ...negate !== void 0 ? { negate } : {}
15728
15917
  });
15729
15918
  continue;
@@ -15769,13 +15958,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15769
15958
  continue;
15770
15959
  }
15771
15960
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15772
- const required2 = parseRequired(rawEvaluator.required);
15961
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15962
+ rawEvaluator.required,
15963
+ rawEvaluator.min_score,
15964
+ name21,
15965
+ evalId
15966
+ );
15773
15967
  evaluators.push({
15774
15968
  name: name21,
15775
15969
  type: "execution-metrics",
15776
15970
  ...validThresholds,
15777
15971
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15778
15972
  ...required2 !== void 0 ? { required: required2 } : {},
15973
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15779
15974
  ...negate !== void 0 ? { negate } : {}
15780
15975
  });
15781
15976
  continue;
@@ -15789,7 +15984,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15789
15984
  const rawShouldTrigger = rawEvaluator.should_trigger;
15790
15985
  const shouldTrigger = typeof rawShouldTrigger === "boolean" ? rawShouldTrigger : void 0;
15791
15986
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15792
- const required2 = parseRequired(rawEvaluator.required);
15987
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
15988
+ rawEvaluator.required,
15989
+ rawEvaluator.min_score,
15990
+ name21,
15991
+ evalId
15992
+ );
15793
15993
  evaluators.push({
15794
15994
  name: name21,
15795
15995
  type: "skill-trigger",
@@ -15797,6 +15997,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15797
15997
  ...shouldTrigger !== void 0 ? { should_trigger: shouldTrigger } : {},
15798
15998
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15799
15999
  ...required2 !== void 0 ? { required: required2 } : {},
16000
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15800
16001
  ...negate !== void 0 ? { negate } : {}
15801
16002
  });
15802
16003
  continue;
@@ -15808,13 +16009,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15808
16009
  continue;
15809
16010
  }
15810
16011
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15811
- const required2 = parseRequired(rawEvaluator.required);
16012
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16013
+ rawEvaluator.required,
16014
+ rawEvaluator.min_score,
16015
+ name21,
16016
+ evalId
16017
+ );
15812
16018
  evaluators.push({
15813
16019
  name: name21,
15814
16020
  type: "contains",
15815
16021
  value,
15816
16022
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15817
16023
  ...required2 !== void 0 ? { required: required2 } : {},
16024
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15818
16025
  ...negate !== void 0 ? { negate } : {}
15819
16026
  });
15820
16027
  continue;
@@ -15828,13 +16035,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15828
16035
  continue;
15829
16036
  }
15830
16037
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15831
- const required2 = parseRequired(rawEvaluator.required);
16038
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16039
+ rawEvaluator.required,
16040
+ rawEvaluator.min_score,
16041
+ name21,
16042
+ evalId
16043
+ );
15832
16044
  evaluators.push({
15833
16045
  name: name21,
15834
16046
  type: typeValue,
15835
16047
  value,
15836
16048
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15837
16049
  ...required2 !== void 0 ? { required: required2 } : {},
16050
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15838
16051
  ...negate !== void 0 ? { negate } : {}
15839
16052
  });
15840
16053
  continue;
@@ -15846,13 +16059,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15846
16059
  continue;
15847
16060
  }
15848
16061
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15849
- const required2 = parseRequired(rawEvaluator.required);
16062
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16063
+ rawEvaluator.required,
16064
+ rawEvaluator.min_score,
16065
+ name21,
16066
+ evalId
16067
+ );
15850
16068
  evaluators.push({
15851
16069
  name: name21,
15852
16070
  type: "icontains",
15853
16071
  value,
15854
16072
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15855
16073
  ...required2 !== void 0 ? { required: required2 } : {},
16074
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15856
16075
  ...negate !== void 0 ? { negate } : {}
15857
16076
  });
15858
16077
  continue;
@@ -15866,13 +16085,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15866
16085
  continue;
15867
16086
  }
15868
16087
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15869
- const required2 = parseRequired(rawEvaluator.required);
16088
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16089
+ rawEvaluator.required,
16090
+ rawEvaluator.min_score,
16091
+ name21,
16092
+ evalId
16093
+ );
15870
16094
  evaluators.push({
15871
16095
  name: name21,
15872
16096
  type: typeValue,
15873
16097
  value,
15874
16098
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15875
16099
  ...required2 !== void 0 ? { required: required2 } : {},
16100
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15876
16101
  ...negate !== void 0 ? { negate } : {}
15877
16102
  });
15878
16103
  continue;
@@ -15884,13 +16109,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15884
16109
  continue;
15885
16110
  }
15886
16111
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15887
- const required2 = parseRequired(rawEvaluator.required);
16112
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16113
+ rawEvaluator.required,
16114
+ rawEvaluator.min_score,
16115
+ name21,
16116
+ evalId
16117
+ );
15888
16118
  evaluators.push({
15889
16119
  name: name21,
15890
16120
  type: typeValue,
15891
16121
  value,
15892
16122
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15893
16123
  ...required2 !== void 0 ? { required: required2 } : {},
16124
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15894
16125
  ...negate !== void 0 ? { negate } : {}
15895
16126
  });
15896
16127
  continue;
@@ -15903,7 +16134,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15903
16134
  }
15904
16135
  const flags = asString(rawEvaluator.flags);
15905
16136
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15906
- const required2 = parseRequired(rawEvaluator.required);
16137
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16138
+ rawEvaluator.required,
16139
+ rawEvaluator.min_score,
16140
+ name21,
16141
+ evalId
16142
+ );
15907
16143
  evaluators.push({
15908
16144
  name: name21,
15909
16145
  type: "regex",
@@ -15911,18 +16147,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15911
16147
  ...flags !== void 0 ? { flags } : {},
15912
16148
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15913
16149
  ...required2 !== void 0 ? { required: required2 } : {},
16150
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15914
16151
  ...negate !== void 0 ? { negate } : {}
15915
16152
  });
15916
16153
  continue;
15917
16154
  }
15918
16155
  if (typeValue === "is-json") {
15919
16156
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15920
- const required2 = parseRequired(rawEvaluator.required);
16157
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16158
+ rawEvaluator.required,
16159
+ rawEvaluator.min_score,
16160
+ name21,
16161
+ evalId
16162
+ );
15921
16163
  evaluators.push({
15922
16164
  name: name21,
15923
16165
  type: "is-json",
15924
16166
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15925
16167
  ...required2 !== void 0 ? { required: required2 } : {},
16168
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15926
16169
  ...negate !== void 0 ? { negate } : {}
15927
16170
  });
15928
16171
  continue;
@@ -15934,13 +16177,19 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15934
16177
  continue;
15935
16178
  }
15936
16179
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15937
- const required2 = parseRequired(rawEvaluator.required);
16180
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16181
+ rawEvaluator.required,
16182
+ rawEvaluator.min_score,
16183
+ name21,
16184
+ evalId
16185
+ );
15938
16186
  evaluators.push({
15939
16187
  name: name21,
15940
16188
  type: "equals",
15941
16189
  value,
15942
16190
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15943
16191
  ...required2 !== void 0 ? { required: required2 } : {},
16192
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15944
16193
  ...negate !== void 0 ? { negate } : {}
15945
16194
  });
15946
16195
  continue;
@@ -15976,7 +16225,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15976
16225
  continue;
15977
16226
  }
15978
16227
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
15979
- const required2 = parseRequired(rawEvaluator.required);
16228
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16229
+ rawEvaluator.required,
16230
+ rawEvaluator.min_score,
16231
+ name21,
16232
+ evalId
16233
+ );
15980
16234
  evaluators.push({
15981
16235
  name: name21,
15982
16236
  type: "llm-grader",
@@ -15984,6 +16238,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15984
16238
  ...graderTargetName ? { target: graderTargetName } : {},
15985
16239
  ...weight2 !== void 0 ? { weight: weight2 } : {},
15986
16240
  ...required2 !== void 0 ? { required: required2 } : {},
16241
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
15987
16242
  ...negate !== void 0 ? { negate } : {}
15988
16243
  });
15989
16244
  continue;
@@ -16053,7 +16308,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
16053
16308
  continue;
16054
16309
  }
16055
16310
  const weight2 = validateWeight(rawEvaluator.weight, name21, evalId);
16056
- const required2 = parseRequired(rawEvaluator.required);
16311
+ const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
16312
+ rawEvaluator.required,
16313
+ rawEvaluator.min_score,
16314
+ name21,
16315
+ evalId
16316
+ );
16057
16317
  evaluators.push({
16058
16318
  name: name21,
16059
16319
  type: "llm-grader",
@@ -16061,12 +16321,18 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
16061
16321
  ...graderTargetName ? { target: graderTargetName } : {},
16062
16322
  ...weight2 !== void 0 ? { weight: weight2 } : {},
16063
16323
  ...required2 !== void 0 ? { required: required2 } : {},
16324
+ ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
16064
16325
  ...negate !== void 0 ? { negate } : {}
16065
16326
  });
16066
16327
  continue;
16067
16328
  }
16068
16329
  const weight = validateWeight(rawEvaluator.weight, name21, evalId);
16069
- const required = parseRequired(rawEvaluator.required);
16330
+ const { required, min_score } = parseRequiredAndMinScore(
16331
+ rawEvaluator.required,
16332
+ rawEvaluator.min_score,
16333
+ name21,
16334
+ evalId
16335
+ );
16070
16336
  const knownProps = /* @__PURE__ */ new Set([
16071
16337
  "name",
16072
16338
  "type",
@@ -16077,6 +16343,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
16077
16343
  "weight",
16078
16344
  "config",
16079
16345
  "required",
16346
+ "min_score",
16080
16347
  "negate",
16081
16348
  "max_steps",
16082
16349
  "maxSteps",
@@ -16106,6 +16373,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
16106
16373
  ...graderTargetName ? { target: graderTargetName } : {},
16107
16374
  ...weight !== void 0 ? { weight } : {},
16108
16375
  ...required !== void 0 ? { required } : {},
16376
+ ...min_score !== void 0 ? { min_score } : {},
16109
16377
  ...negate !== void 0 ? { negate } : {},
16110
16378
  ...finalConfig ? { config: finalConfig } : {},
16111
16379
  ...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
@@ -16237,10 +16505,23 @@ ${detailBlock}${ANSI_RESET4}`);
16237
16505
  console.warn(`${ANSI_YELLOW3}Warning: ${message}${ANSI_RESET4}`);
16238
16506
  }
16239
16507
  }
16240
- function parseRequired(value) {
16241
- if (value === true) return true;
16242
- if (typeof value === "number" && value > 0 && value <= 1) return value;
16243
- return void 0;
16508
+ function parseRequiredAndMinScore(rawRequired, rawMinScore, evaluatorName, evalId) {
16509
+ const result = {};
16510
+ if (typeof rawMinScore === "number" && rawMinScore > 0 && rawMinScore <= 1) {
16511
+ result.min_score = rawMinScore;
16512
+ }
16513
+ if (rawRequired === true) {
16514
+ result.required = true;
16515
+ } else if (typeof rawRequired === "number" && rawRequired > 0 && rawRequired <= 1) {
16516
+ if (result.min_score === void 0) {
16517
+ result.min_score = rawRequired;
16518
+ }
16519
+ result.required = rawRequired;
16520
+ logWarning2(
16521
+ `Evaluator '${evaluatorName}' in '${evalId}': 'required: ${rawRequired}' is deprecated. Use 'required: true' + 'min_score: ${rawRequired}' instead.`
16522
+ );
16523
+ }
16524
+ return result;
16244
16525
  }
16245
16526
  function validateWeight(rawWeight, evaluatorName, evalId) {
16246
16527
  if (rawWeight === void 0) {
@@ -16283,16 +16564,30 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
16283
16564
  const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
16284
16565
  const expectedOutcome = asString(rawRubric.outcome) ?? "";
16285
16566
  const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
16567
+ let minScore;
16286
16568
  let requiredMinScore;
16287
16569
  let required;
16288
- if (typeof rawRubric.required_min_score === "number") {
16289
- const minScore = rawRubric.required_min_score;
16290
- if (!Number.isInteger(minScore) || minScore < 0 || minScore > 10) {
16570
+ if (typeof rawRubric.min_score === "number") {
16571
+ const ms = rawRubric.min_score;
16572
+ if (ms <= 0 || ms > 1) {
16573
+ throw new Error(
16574
+ `Invalid min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be in (0, 1] (got ${ms})`
16575
+ );
16576
+ }
16577
+ minScore = ms;
16578
+ requiredMinScore = Math.round(ms * 10);
16579
+ } else if (typeof rawRubric.required_min_score === "number") {
16580
+ const rms = rawRubric.required_min_score;
16581
+ if (!Number.isInteger(rms) || rms < 0 || rms > 10) {
16291
16582
  throw new Error(
16292
- `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${minScore})`
16583
+ `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${rms})`
16293
16584
  );
16294
16585
  }
16295
- requiredMinScore = minScore;
16586
+ requiredMinScore = rms;
16587
+ minScore = rms / 10;
16588
+ logWarning2(
16589
+ `Rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': 'required_min_score: ${rms}' is deprecated. Use 'min_score: ${rms / 10}' (0-1 scale) instead.`
16590
+ );
16296
16591
  }
16297
16592
  if (typeof rawRubric.required === "boolean") {
16298
16593
  required = rawRubric.required;
@@ -16312,6 +16607,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
16312
16607
  weight,
16313
16608
  ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
16314
16609
  ...required !== void 0 ? { required } : {},
16610
+ ...minScore !== void 0 ? { min_score: minScore } : {},
16315
16611
  ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
16316
16612
  score_ranges: scoreRanges
16317
16613
  });
@@ -16328,6 +16624,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
16328
16624
  weight,
16329
16625
  // Default to required: true if not specified (backward compatibility)
16330
16626
  required: required ?? true,
16627
+ ...minScore !== void 0 ? { min_score: minScore } : {},
16331
16628
  ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
16332
16629
  });
16333
16630
  }
@@ -16456,12 +16753,22 @@ function parseInlineRubrics(rawRubrics) {
16456
16753
  id: asString(rubric.id) ?? `rubric-${index + 1}`,
16457
16754
  weight: typeof rubric.weight === "number" ? rubric.weight : 1
16458
16755
  };
16756
+ let inlineMinScore;
16757
+ let inlineRequiredMinScore;
16758
+ if (typeof rubric.min_score === "number") {
16759
+ inlineMinScore = rubric.min_score;
16760
+ inlineRequiredMinScore = Math.round(inlineMinScore * 10);
16761
+ } else if (typeof rubric.required_min_score === "number") {
16762
+ inlineRequiredMinScore = rubric.required_min_score;
16763
+ inlineMinScore = inlineRequiredMinScore / 10;
16764
+ }
16459
16765
  if (scoreRanges && scoreRanges.length > 0) {
16460
16766
  return {
16461
16767
  ...baseRubric,
16462
16768
  ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
16463
16769
  ...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
16464
- ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {},
16770
+ ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
16771
+ ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {},
16465
16772
  score_ranges: scoreRanges
16466
16773
  };
16467
16774
  }
@@ -16469,7 +16776,8 @@ function parseInlineRubrics(rawRubrics) {
16469
16776
  ...baseRubric,
16470
16777
  outcome: expectedOutcome,
16471
16778
  required: typeof rubric.required === "boolean" ? rubric.required : true,
16472
- ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {}
16779
+ ...inlineMinScore !== void 0 ? { min_score: inlineMinScore } : {},
16780
+ ...inlineRequiredMinScore !== void 0 ? { required_min_score: inlineRequiredMinScore } : {}
16473
16781
  };
16474
16782
  }).filter((r) => r.outcome && r.outcome.length > 0 || "score_ranges" in r && r.score_ranges);
16475
16783
  if (rubricItems.length === 0) {
@@ -16851,6 +17159,9 @@ function resolveExpectedMessages(raw) {
16851
17159
  var ANSI_YELLOW5 = "\x1B[33m";
16852
17160
  var ANSI_RED2 = "\x1B[31m";
16853
17161
  var ANSI_RESET6 = "\x1B[0m";
17162
+ function matchesFilter(id, filter2) {
17163
+ return typeof filter2 === "string" ? micromatch.isMatch(id, filter2) : filter2.some((pattern) => micromatch.isMatch(id, pattern));
17164
+ }
16854
17165
  function detectFormat(filePath) {
16855
17166
  const ext = path6.extname(filePath).toLowerCase();
16856
17167
  if (ext === ".jsonl") return "jsonl";
@@ -16918,40 +17229,40 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
16918
17229
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
16919
17230
  const rawFile = await readFile5(absoluteTestPath, "utf8");
16920
17231
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
16921
- const fallbackEvalSet = path6.basename(absoluteTestPath, ".jsonl") || "eval";
16922
- const evalSetName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackEvalSet;
17232
+ const fallbackSuiteName = path6.basename(absoluteTestPath, ".jsonl") || "eval";
17233
+ const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
16923
17234
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
16924
17235
  const globalExecution = sidecar.execution;
16925
17236
  if (verbose) {
16926
17237
  console.log(`
16927
- [JSONL Dataset: ${evalFilePath}]`);
17238
+ [JSONL Suite: ${evalFilePath}]`);
16928
17239
  console.log(` Cases: ${rawCases.length}`);
16929
- console.log(` Eval set: ${evalSetName}`);
17240
+ console.log(` Suite: ${suiteName}`);
16930
17241
  if (sidecar.description) {
16931
17242
  console.log(` Description: ${sidecar.description}`);
16932
17243
  }
16933
17244
  }
16934
17245
  const results = [];
16935
17246
  for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
16936
- const evalcase = rawCases[lineIndex];
17247
+ const testCaseConfig = rawCases[lineIndex];
16937
17248
  const lineNumber = lineIndex + 1;
16938
- const id = asString4(evalcase.id);
16939
- if (filterPattern && (!id || !micromatch.isMatch(id, filterPattern))) {
17249
+ const id = asString4(testCaseConfig.id);
17250
+ if (filterPattern && (!id || !matchesFilter(id, filterPattern))) {
16940
17251
  continue;
16941
17252
  }
16942
- const conversationId = asString4(evalcase.conversation_id);
16943
- let outcome = asString4(evalcase.criteria);
16944
- if (!outcome && evalcase.expected_outcome !== void 0) {
16945
- outcome = asString4(evalcase.expected_outcome);
17253
+ const conversationId = asString4(testCaseConfig.conversation_id);
17254
+ let outcome = asString4(testCaseConfig.criteria);
17255
+ if (!outcome && testCaseConfig.expected_outcome !== void 0) {
17256
+ outcome = asString4(testCaseConfig.expected_outcome);
16946
17257
  if (outcome) {
16947
17258
  logWarning4(
16948
- `Test '${asString4(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
17259
+ `Test '${asString4(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
16949
17260
  );
16950
17261
  }
16951
17262
  }
16952
- const rawInputMessages = resolveInputMessages(evalcase);
16953
- const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
16954
- const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assert !== void 0;
17263
+ const rawInputMessages = resolveInputMessages(testCaseConfig);
17264
+ const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
17265
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assert !== void 0;
16955
17266
  if (!id || !hasEvaluationSpec || !rawInputMessages || rawInputMessages.length === 0) {
16956
17267
  logError2(
16957
17268
  `Skipping incomplete test at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assert`
@@ -16988,18 +17299,23 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
16988
17299
  }
16989
17300
  }
16990
17301
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
16991
- const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
17302
+ const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
16992
17303
  const mergedExecution = caseExecution ?? globalExecution;
16993
- const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
17304
+ const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
16994
17305
  let evaluators;
16995
17306
  try {
16996
- evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
17307
+ evaluators = await parseEvaluators(
17308
+ testCaseConfig,
17309
+ mergedExecution,
17310
+ searchRoots,
17311
+ id ?? "unknown"
17312
+ );
16997
17313
  } catch (error) {
16998
17314
  const message = error instanceof Error ? error.message : String(error);
16999
17315
  logError2(`Skipping test '${id}' at line ${lineNumber}: ${message}`);
17000
17316
  continue;
17001
17317
  }
17002
- const inlineRubrics = evalcase.rubrics;
17318
+ const inlineRubrics = testCaseConfig.rubrics;
17003
17319
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
17004
17320
  const rubricEvaluator = parseInlineRubrics(inlineRubrics);
17005
17321
  if (rubricEvaluator) {
@@ -17010,7 +17326,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
17010
17326
  const userFilePaths = collectResolvedInputFilePaths(inputMessages);
17011
17327
  const testCase = {
17012
17328
  id,
17013
- dataset: evalSetName,
17329
+ suite: suiteName,
17014
17330
  conversation_id: conversationId,
17015
17331
  question,
17016
17332
  input: inputMessages,
@@ -17018,7 +17334,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
17018
17334
  reference_answer: referenceAnswer,
17019
17335
  file_paths: userFilePaths,
17020
17336
  criteria: outcome ?? "",
17021
- evaluator: evalCaseEvaluatorKind,
17337
+ evaluator: testCaseEvaluatorKind,
17022
17338
  assertions: evaluators
17023
17339
  };
17024
17340
  results.push(testCase);
@@ -17194,6 +17510,9 @@ function buildChatPromptFromSegments(options) {
17194
17510
  var ANSI_YELLOW6 = "\x1B[33m";
17195
17511
  var ANSI_RED3 = "\x1B[31m";
17196
17512
  var ANSI_RESET7 = "\x1B[0m";
17513
+ function matchesFilter2(id, filter2) {
17514
+ return typeof filter2 === "string" ? micromatch2.isMatch(id, filter2) : filter2.some((pattern) => micromatch2.isMatch(id, pattern));
17515
+ }
17197
17516
  function resolveTests(suite) {
17198
17517
  if (suite.tests !== void 0) return suite.tests;
17199
17518
  if (suite.eval_cases !== void 0) {
@@ -17273,18 +17592,18 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
17273
17592
  throw new Error(`Invalid test file format: ${evalFilePath}`);
17274
17593
  }
17275
17594
  const suite = interpolated;
17276
- const evalSetNameFromSuite = asString5(suite.name)?.trim();
17277
- const fallbackEvalSet = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
17278
- const evalSetName = evalSetNameFromSuite && evalSetNameFromSuite.length > 0 ? evalSetNameFromSuite : fallbackEvalSet;
17279
- const rawTestcases = resolveTests(suite);
17595
+ const suiteNameFromFile = asString5(suite.name)?.trim();
17596
+ const fallbackSuiteName = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
17597
+ const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
17598
+ const rawTestCases = resolveTests(suite);
17280
17599
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
17281
17600
  const evalFileDir = path7.dirname(absoluteTestPath);
17282
- let expandedTestcases;
17283
- if (typeof rawTestcases === "string") {
17284
- const externalPath = path7.resolve(evalFileDir, rawTestcases);
17285
- expandedTestcases = await loadCasesFromFile(externalPath);
17286
- } else if (Array.isArray(rawTestcases)) {
17287
- expandedTestcases = await expandFileReferences(rawTestcases, evalFileDir);
17601
+ let expandedTestCases;
17602
+ if (typeof rawTestCases === "string") {
17603
+ const externalPath = path7.resolve(evalFileDir, rawTestCases);
17604
+ expandedTestCases = await loadCasesFromFile(externalPath);
17605
+ } else if (Array.isArray(rawTestCases)) {
17606
+ expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
17288
17607
  } else {
17289
17608
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
17290
17609
  }
@@ -17299,32 +17618,33 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
17299
17618
  }
17300
17619
  const globalExecution = suiteAssertions !== void 0 ? { ...rawGlobalExecution ?? {}, assertions: suiteAssertions } : rawGlobalExecution;
17301
17620
  const results = [];
17302
- for (const rawEvalcase of expandedTestcases) {
17303
- if (!isJsonObject(rawEvalcase)) {
17621
+ for (const rawTestCase of expandedTestCases) {
17622
+ if (!isJsonObject(rawTestCase)) {
17304
17623
  logWarning5("Skipping invalid test entry (expected object)");
17305
17624
  continue;
17306
17625
  }
17307
- const evalcase = rawEvalcase;
17308
- const id = asString5(evalcase.id);
17309
- if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
17626
+ const testCaseConfig = rawTestCase;
17627
+ const id = asString5(testCaseConfig.id);
17628
+ if (filterPattern && (!id || !matchesFilter2(id, filterPattern))) {
17310
17629
  continue;
17311
17630
  }
17312
- const conversationId = asString5(evalcase.conversation_id);
17313
- let outcome = asString5(evalcase.criteria);
17314
- if (!outcome && evalcase.expected_outcome !== void 0) {
17315
- outcome = asString5(evalcase.expected_outcome);
17631
+ const conversationId = asString5(testCaseConfig.conversation_id);
17632
+ let outcome = asString5(testCaseConfig.criteria);
17633
+ if (!outcome && testCaseConfig.expected_outcome !== void 0) {
17634
+ outcome = asString5(testCaseConfig.expected_outcome);
17316
17635
  if (outcome) {
17317
17636
  logWarning5(
17318
- `Test '${asString5(evalcase.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
17637
+ `Test '${asString5(testCaseConfig.id) ?? "unknown"}': 'expected_outcome' is deprecated. Use 'criteria' instead.`
17319
17638
  );
17320
17639
  }
17321
17640
  }
17322
- const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
17641
+ const caseExecution = isJsonObject(testCaseConfig.execution) ? testCaseConfig.execution : void 0;
17323
17642
  const skipDefaults = caseExecution?.skip_defaults === true;
17643
+ const caseThreshold = typeof caseExecution?.threshold === "number" && caseExecution.threshold >= 0 && caseExecution.threshold <= 1 ? caseExecution.threshold : void 0;
17324
17644
  const effectiveSuiteInputFiles = suiteInputFiles && !skipDefaults ? suiteInputFiles : void 0;
17325
- const testInputMessages = resolveInputMessages(evalcase, effectiveSuiteInputFiles);
17326
- const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
17327
- const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || evalcase.assertions !== void 0 || evalcase.assert !== void 0;
17645
+ const testInputMessages = resolveInputMessages(testCaseConfig, effectiveSuiteInputFiles);
17646
+ const expectedMessages = resolveExpectedMessages(testCaseConfig) ?? [];
17647
+ const hasEvaluationSpec = !!outcome || expectedMessages.length > 0 || testCaseConfig.assertions !== void 0 || testCaseConfig.assert !== void 0;
17328
17648
  if (!id || !hasEvaluationSpec || !testInputMessages || testInputMessages.length === 0) {
17329
17649
  logError3(
17330
17650
  `Skipping incomplete test: ${id ?? "unknown"}. Missing required fields: id, input, and at least one of criteria/expected_output/assertions`
@@ -17371,16 +17691,21 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
17371
17691
  }
17372
17692
  }
17373
17693
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
17374
- const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
17694
+ const testCaseEvaluatorKind = coerceEvaluator(testCaseConfig.evaluator, id) ?? globalEvaluator;
17375
17695
  let evaluators;
17376
17696
  try {
17377
- evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
17697
+ evaluators = await parseEvaluators(
17698
+ testCaseConfig,
17699
+ globalExecution,
17700
+ searchRoots,
17701
+ id ?? "unknown"
17702
+ );
17378
17703
  } catch (error) {
17379
17704
  const message = error instanceof Error ? error.message : String(error);
17380
17705
  logError3(`Skipping test '${id}': ${message}`);
17381
17706
  continue;
17382
17707
  }
17383
- const inlineRubrics = evalcase.rubrics;
17708
+ const inlineRubrics = testCaseConfig.rubrics;
17384
17709
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
17385
17710
  const rubricEvaluator = parseInlineRubrics(inlineRubrics);
17386
17711
  if (rubricEvaluator) {
@@ -17389,13 +17714,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
17389
17714
  }
17390
17715
  warnUnconsumedCriteria(outcome, evaluators, id ?? "unknown");
17391
17716
  const userFilePaths = collectResolvedInputFilePaths(inputMessages);
17392
- const caseWorkspace = await resolveWorkspaceConfig(evalcase.workspace, evalFileDir);
17717
+ const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
17393
17718
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
17394
- const metadata = isJsonObject(evalcase.metadata) ? evalcase.metadata : void 0;
17395
- const caseTargets = extractTargetsFromTestCase(evalcase);
17719
+ const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
17720
+ const caseTargets = extractTargetsFromTestCase(testCaseConfig);
17396
17721
  const testCase = {
17397
17722
  id,
17398
- dataset: evalSetName,
17723
+ suite: suiteName,
17399
17724
  category: options?.category,
17400
17725
  conversation_id: conversationId,
17401
17726
  question,
@@ -17404,11 +17729,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
17404
17729
  reference_answer: referenceAnswer,
17405
17730
  file_paths: userFilePaths,
17406
17731
  criteria: outcome ?? "",
17407
- evaluator: evalCaseEvaluatorKind,
17732
+ evaluator: testCaseEvaluatorKind,
17408
17733
  assertions: evaluators,
17409
17734
  workspace: mergedWorkspace,
17410
17735
  metadata,
17411
- targets: caseTargets
17736
+ targets: caseTargets,
17737
+ ...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
17412
17738
  };
17413
17739
  results.push(testCase);
17414
17740
  }
@@ -17939,7 +18265,7 @@ var AzureProvider = class {
17939
18265
  };
17940
18266
  this.retryConfig = config.retry;
17941
18267
  const azure = createAzure(buildAzureOptions(config));
17942
- this.model = azure.chat(config.deploymentName);
18268
+ this.model = config.apiFormat === "responses" ? azure(config.deploymentName) : azure.chat(config.deploymentName);
17943
18269
  }
17944
18270
  id;
17945
18271
  kind = "azure";
@@ -18065,7 +18391,9 @@ function buildAzureOptions(config) {
18065
18391
  const options = {
18066
18392
  apiKey: config.apiKey,
18067
18393
  apiVersion: config.version,
18068
- useDeploymentBasedUrls: true
18394
+ // Chat completions still use deployment-scoped Azure URLs for compatibility
18395
+ // with existing deployments. Responses API should use the SDK's v1 path.
18396
+ useDeploymentBasedUrls: config.apiFormat !== "responses"
18069
18397
  };
18070
18398
  const baseURL = normalizeAzureBaseUrl(config.resourceName);
18071
18399
  if (baseURL) {
@@ -21181,6 +21509,25 @@ var CopilotSdkProvider = class {
21181
21509
  content: systemPrompt
21182
21510
  };
21183
21511
  }
21512
+ if (this.config.byokBaseUrl) {
21513
+ const byokType = this.config.byokType ?? "openai";
21514
+ const provider = {
21515
+ type: byokType,
21516
+ baseUrl: normalizeByokBaseUrl(this.config.byokBaseUrl, byokType)
21517
+ };
21518
+ if (this.config.byokBearerToken) {
21519
+ provider.bearerToken = this.config.byokBearerToken;
21520
+ } else if (this.config.byokApiKey) {
21521
+ provider.apiKey = this.config.byokApiKey;
21522
+ }
21523
+ if (this.config.byokWireApi) {
21524
+ provider.wireApi = this.config.byokWireApi;
21525
+ }
21526
+ if (this.config.byokType === "azure" && this.config.byokApiVersion) {
21527
+ provider.azure = { apiVersion: this.config.byokApiVersion };
21528
+ }
21529
+ sessionOptions.provider = provider;
21530
+ }
21184
21531
  let session;
21185
21532
  try {
21186
21533
  session = await client.createSession(sessionOptions);
@@ -21412,6 +21759,16 @@ function resolveSkillDirectories(cwd) {
21412
21759
  ];
21413
21760
  return candidates.filter((dir) => existsSync2(dir));
21414
21761
  }
21762
+ function normalizeByokBaseUrl(baseUrl, type) {
21763
+ const trimmed = baseUrl.trim().replace(/\/+$/, "");
21764
+ if (/^https?:\/\//i.test(trimmed)) {
21765
+ return trimmed;
21766
+ }
21767
+ if (type === "azure") {
21768
+ return `https://${trimmed}.openai.azure.com`;
21769
+ }
21770
+ return trimmed;
21771
+ }
21415
21772
  function summarizeSdkEvent(eventType, data) {
21416
21773
  if (!data || typeof data !== "object") {
21417
21774
  return eventType;
@@ -21575,6 +21932,22 @@ function extractAzureResourceName(baseUrl) {
21575
21932
  if (urlMatch) return urlMatch[1];
21576
21933
  return baseUrl;
21577
21934
  }
21935
+ function normalizeAzureSdkBaseUrl(baseUrl) {
21936
+ const trimmed = baseUrl.trim().replace(/\/+$/, "");
21937
+ if (!trimmed) {
21938
+ return trimmed;
21939
+ }
21940
+ if (!/^https?:\/\//i.test(trimmed)) {
21941
+ return `https://${trimmed}.openai.azure.com/openai/v1`;
21942
+ }
21943
+ if (/\/openai\/v1$/i.test(trimmed)) {
21944
+ return trimmed;
21945
+ }
21946
+ if (/\/openai$/i.test(trimmed)) {
21947
+ return `${trimmed}/v1`;
21948
+ }
21949
+ return `${trimmed}/openai/v1`;
21950
+ }
21578
21951
  function extractPiTextContent(content) {
21579
21952
  if (typeof content === "string") {
21580
21953
  return content;
@@ -22397,6 +22770,30 @@ async function defaultPiRunner(options) {
22397
22770
  });
22398
22771
  });
22399
22772
  }
22773
+ var logged = false;
22774
+ function getAgentvHome() {
22775
+ const envHome = process.env.AGENTV_HOME;
22776
+ if (envHome && envHome !== "undefined") {
22777
+ if (!logged) {
22778
+ logged = true;
22779
+ console.warn(`Using AGENTV_HOME: ${envHome}`);
22780
+ }
22781
+ return envHome;
22782
+ }
22783
+ return path20.join(os2.homedir(), ".agentv");
22784
+ }
22785
+ function getWorkspacesRoot() {
22786
+ return path20.join(getAgentvHome(), "workspaces");
22787
+ }
22788
+ function getSubagentsRoot() {
22789
+ return path20.join(getAgentvHome(), "subagents");
22790
+ }
22791
+ function getTraceStateRoot() {
22792
+ return path20.join(getAgentvHome(), "trace-state");
22793
+ }
22794
+ function getWorkspacePoolRoot() {
22795
+ return path20.join(getAgentvHome(), "workspace-pool");
22796
+ }
22400
22797
  var piCodingAgentModule = null;
22401
22798
  var piAiModule = null;
22402
22799
  var loadingPromise = null;
@@ -22414,46 +22811,126 @@ async function promptInstall() {
22414
22811
  rl.close();
22415
22812
  }
22416
22813
  }
22417
- function findAgentvRoot() {
22418
- const thisFile = fileURLToPath3(import.meta.url);
22419
- let dir = path20.dirname(thisFile);
22420
- for (let i = 0; i < 10; i++) {
22814
+ function findManagedSdkInstallRoot() {
22815
+ return path21.join(getAgentvHome(), "deps", "pi-sdk");
22816
+ }
22817
+ function resolveGlobalNpmRoot() {
22818
+ try {
22819
+ const root = execSync2("npm root -g", {
22820
+ encoding: "utf-8",
22821
+ stdio: ["ignore", "pipe", "ignore"]
22822
+ }).trim();
22823
+ return root.length > 0 ? root : void 0;
22824
+ } catch {
22825
+ return void 0;
22826
+ }
22827
+ }
22828
+ function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
22829
+ return path21.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
22830
+ }
22831
+ function findAccessiblePath(paths) {
22832
+ for (const candidate of paths) {
22421
22833
  try {
22422
- const pkg = path20.join(dir, "package.json");
22423
- accessSync2(pkg);
22424
- return dir;
22834
+ accessSync2(candidate);
22835
+ return candidate;
22425
22836
  } catch {
22426
- const parent = path20.dirname(dir);
22427
- if (parent === dir) break;
22428
- dir = parent;
22429
22837
  }
22430
22838
  }
22431
- return path20.dirname(thisFile);
22839
+ return void 0;
22432
22840
  }
22433
- async function doLoadSdkModules() {
22841
+ async function tryImportLocalSdkModules() {
22434
22842
  try {
22435
22843
  [piCodingAgentModule, piAiModule] = await Promise.all([
22436
22844
  import("@mariozechner/pi-coding-agent"),
22437
22845
  import("@mariozechner/pi-ai")
22438
22846
  ]);
22847
+ return true;
22439
22848
  } catch {
22440
- if (await promptInstall()) {
22441
- const installDir = findAgentvRoot();
22442
- console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
22443
- execSync2("bun add @mariozechner/pi-coding-agent", {
22444
- cwd: installDir,
22445
- stdio: "inherit"
22446
- });
22447
- [piCodingAgentModule, piAiModule] = await Promise.all([
22448
- import("@mariozechner/pi-coding-agent"),
22449
- import("@mariozechner/pi-ai")
22450
- ]);
22451
- } else {
22452
- throw new Error(
22453
- "pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
22454
- );
22849
+ return false;
22850
+ }
22851
+ }
22852
+ async function tryImportManagedSdkModules() {
22853
+ const managedRoot = findManagedSdkInstallRoot();
22854
+ const piCodingAgentEntry = findAccessiblePath([
22855
+ path21.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
22856
+ ]);
22857
+ const piAiEntry = findAccessiblePath([
22858
+ path21.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
22859
+ path21.join(
22860
+ managedRoot,
22861
+ "node_modules",
22862
+ "@mariozechner",
22863
+ "pi-coding-agent",
22864
+ "node_modules",
22865
+ "@mariozechner",
22866
+ "pi-ai",
22867
+ "dist",
22868
+ "index.js"
22869
+ )
22870
+ ]);
22871
+ if (!piCodingAgentEntry || !piAiEntry) return false;
22872
+ try {
22873
+ [piCodingAgentModule, piAiModule] = await Promise.all([
22874
+ import(pathToFileURL(piCodingAgentEntry).href),
22875
+ import(pathToFileURL(piAiEntry).href)
22876
+ ]);
22877
+ return true;
22878
+ } catch {
22879
+ return false;
22880
+ }
22881
+ }
22882
+ async function tryImportGlobalSdkModules() {
22883
+ const globalNpmRoot = resolveGlobalNpmRoot();
22884
+ if (!globalNpmRoot) return false;
22885
+ const piCodingAgentEntry = findAccessiblePath([
22886
+ buildGlobalModuleEntry("@mariozechner/pi-coding-agent", globalNpmRoot)
22887
+ ]);
22888
+ const piAiEntry = findAccessiblePath([
22889
+ buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
22890
+ path21.join(
22891
+ globalNpmRoot,
22892
+ "@mariozechner",
22893
+ "pi-coding-agent",
22894
+ "node_modules",
22895
+ "@mariozechner",
22896
+ "pi-ai",
22897
+ "dist",
22898
+ "index.js"
22899
+ )
22900
+ ]);
22901
+ if (!piCodingAgentEntry || !piAiEntry) return false;
22902
+ try {
22903
+ [piCodingAgentModule, piAiModule] = await Promise.all([
22904
+ import(pathToFileURL(piCodingAgentEntry).href),
22905
+ import(pathToFileURL(piAiEntry).href)
22906
+ ]);
22907
+ return true;
22908
+ } catch {
22909
+ return false;
22910
+ }
22911
+ }
22912
+ function installSdkModules(installDir) {
22913
+ console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
22914
+ mkdirSync(installDir, { recursive: true });
22915
+ execSync2("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
22916
+ cwd: installDir,
22917
+ stdio: "inherit"
22918
+ });
22919
+ }
22920
+ async function doLoadSdkModules() {
22921
+ if (await tryImportLocalSdkModules() || await tryImportManagedSdkModules() || await tryImportGlobalSdkModules()) {
22922
+ return;
22923
+ }
22924
+ if (await promptInstall()) {
22925
+ const installDir = findManagedSdkInstallRoot();
22926
+ installSdkModules(installDir);
22927
+ if (await tryImportManagedSdkModules()) {
22928
+ return;
22455
22929
  }
22456
22930
  }
22931
+ throw new Error(
22932
+ "pi-coding-agent SDK is not installed. Install it with:\n npm install @mariozechner/pi-coding-agent"
22933
+ );
22457
22934
  }
22458
22935
  async function loadSdkModules() {
22459
22936
  if (!piCodingAgentModule || !piAiModule) {
@@ -22510,12 +22987,16 @@ var PiCodingAgentProvider = class {
22510
22987
  try {
22511
22988
  const cwd = this.resolveCwd(request.cwd);
22512
22989
  const rawProvider = this.config.subprovider ?? "google";
22513
- const hasBaseUrl = !!this.config.baseUrl;
22990
+ const normalizedBaseUrl = this.normalizeSdkBaseUrl(rawProvider, this.config.baseUrl);
22991
+ const hasBaseUrl = !!normalizedBaseUrl;
22514
22992
  const providerName = resolveSubprovider(rawProvider, hasBaseUrl);
22515
22993
  const modelId = this.config.model ?? "gemini-2.5-flash";
22516
22994
  this.setApiKeyEnv(rawProvider, hasBaseUrl);
22517
- this.setBaseUrlEnv(rawProvider, hasBaseUrl);
22995
+ this.setBaseUrlEnv(rawProvider, normalizedBaseUrl, hasBaseUrl);
22518
22996
  let model = sdk.getModel(providerName, modelId);
22997
+ if (model && normalizedBaseUrl) {
22998
+ model = { ...model, baseUrl: normalizedBaseUrl };
22999
+ }
22519
23000
  if (!model) {
22520
23001
  const envProvider = providerName.replace(/-responses$/, "");
22521
23002
  model = {
@@ -22523,7 +23004,7 @@ var PiCodingAgentProvider = class {
22523
23004
  name: modelId,
22524
23005
  api: providerName,
22525
23006
  provider: envProvider,
22526
- baseUrl: this.config.baseUrl ?? "",
23007
+ baseUrl: normalizedBaseUrl ?? "",
22527
23008
  reasoning: false,
22528
23009
  input: ["text"],
22529
23010
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
@@ -22690,19 +23171,27 @@ ${fileList}`;
22690
23171
  }
22691
23172
  }
22692
23173
  /** Maps config baseUrl to the provider-specific env var the SDK reads. */
22693
- setBaseUrlEnv(providerName, hasBaseUrl = false) {
22694
- if (!this.config.baseUrl) return;
23174
+ setBaseUrlEnv(providerName, baseUrl = this.config.baseUrl, hasBaseUrl = false) {
23175
+ const normalizedBaseUrl = this.normalizeSdkBaseUrl(providerName, baseUrl);
23176
+ if (!normalizedBaseUrl) return;
22695
23177
  const envKey = resolveEnvBaseUrlName(providerName, hasBaseUrl);
22696
23178
  if (envKey) {
22697
- process.env[envKey] = this.config.baseUrl;
23179
+ process.env[envKey] = normalizedBaseUrl;
22698
23180
  }
22699
23181
  }
23182
+ normalizeSdkBaseUrl(providerName, baseUrl) {
23183
+ if (!baseUrl) return void 0;
23184
+ if (providerName.toLowerCase() === "azure") {
23185
+ return normalizeAzureSdkBaseUrl(baseUrl);
23186
+ }
23187
+ return baseUrl;
23188
+ }
22700
23189
  resolveCwd(cwdOverride) {
22701
23190
  if (cwdOverride) {
22702
- return path20.resolve(cwdOverride);
23191
+ return path21.resolve(cwdOverride);
22703
23192
  }
22704
23193
  if (this.config.cwd) {
22705
- return path20.resolve(this.config.cwd);
23194
+ return path21.resolve(this.config.cwd);
22706
23195
  }
22707
23196
  return process.cwd();
22708
23197
  }
@@ -22721,9 +23210,9 @@ ${fileList}`;
22721
23210
  }
22722
23211
  resolveLogDirectory() {
22723
23212
  if (this.config.logDir) {
22724
- return path20.resolve(this.config.logDir);
23213
+ return path21.resolve(this.config.logDir);
22725
23214
  }
22726
- return path20.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
23215
+ return path21.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
22727
23216
  }
22728
23217
  async createStreamLogger(request) {
22729
23218
  const logDir = this.resolveLogDirectory();
@@ -22737,7 +23226,7 @@ ${fileList}`;
22737
23226
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
22738
23227
  return void 0;
22739
23228
  }
22740
- const filePath = path20.join(logDir, buildLogFilename6(request, this.targetName));
23229
+ const filePath = path21.join(logDir, buildLogFilename6(request, this.targetName));
22741
23230
  try {
22742
23231
  const logger = await PiStreamLogger2.create({
22743
23232
  filePath,
@@ -22961,7 +23450,7 @@ async function readDirEntries(target) {
22961
23450
  const entries = await readdir2(target, { withFileTypes: true });
22962
23451
  return entries.map((entry) => ({
22963
23452
  name: entry.name,
22964
- absolutePath: path21.join(target, entry.name),
23453
+ absolutePath: path222.join(target, entry.name),
22965
23454
  isDirectory: entry.isDirectory()
22966
23455
  }));
22967
23456
  }
@@ -22975,7 +23464,7 @@ async function removeIfExists(target) {
22975
23464
  }
22976
23465
  }
22977
23466
  function pathToFileUri2(filePath) {
22978
- const absolutePath = path222.isAbsolute(filePath) ? filePath : path222.resolve(filePath);
23467
+ const absolutePath = path23.isAbsolute(filePath) ? filePath : path23.resolve(filePath);
22979
23468
  const normalizedPath = absolutePath.replace(/\\/g, "/");
22980
23469
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
22981
23470
  return `file:///${normalizedPath}`;
@@ -23067,8 +23556,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
23067
23556
  });
23068
23557
  }
23069
23558
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
23070
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path23.basename(file)}`).join("\n");
23071
- const responseList = responseFiles.map((file) => `"${path23.basename(file)}"`).join(", ");
23559
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path24.basename(file)}`).join("\n");
23560
+ const responseList = responseFiles.map((file) => `"${path24.basename(file)}"`).join(", ");
23072
23561
  return renderTemplate2(templateContent, {
23073
23562
  requestFiles: requestLines,
23074
23563
  responseList
@@ -23128,7 +23617,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
23128
23617
  }
23129
23618
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
23130
23619
  if (!silent) {
23131
- const fileList = responseFilesFinal.map((file) => path24.basename(file)).join(", ");
23620
+ const fileList = responseFilesFinal.map((file) => path25.basename(file)).join(", ");
23132
23621
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
23133
23622
  }
23134
23623
  const deadline = Date.now() + timeoutMs;
@@ -23137,7 +23626,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
23137
23626
  while (pending.size > 0) {
23138
23627
  if (Date.now() >= deadline) {
23139
23628
  if (!silent) {
23140
- const remaining = [...pending].map((f) => path24.basename(f)).join(", ");
23629
+ const remaining = [...pending].map((f) => path25.basename(f)).join(", ");
23141
23630
  console.error(
23142
23631
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
23143
23632
  );
@@ -23184,30 +23673,6 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
23184
23673
  }
23185
23674
  return true;
23186
23675
  }
23187
- var logged = false;
23188
- function getAgentvHome() {
23189
- const envHome = process.env.AGENTV_HOME;
23190
- if (envHome && envHome !== "undefined") {
23191
- if (!logged) {
23192
- logged = true;
23193
- console.warn(`Using AGENTV_HOME: ${envHome}`);
23194
- }
23195
- return envHome;
23196
- }
23197
- return path25.join(os2.homedir(), ".agentv");
23198
- }
23199
- function getWorkspacesRoot() {
23200
- return path25.join(getAgentvHome(), "workspaces");
23201
- }
23202
- function getSubagentsRoot() {
23203
- return path25.join(getAgentvHome(), "subagents");
23204
- }
23205
- function getTraceStateRoot() {
23206
- return path25.join(getAgentvHome(), "trace-state");
23207
- }
23208
- function getWorkspacePoolRoot() {
23209
- return path25.join(getAgentvHome(), "workspace-pool");
23210
- }
23211
23676
  var DEFAULT_LOCK_NAME = "subagent.lock";
23212
23677
  var DEFAULT_ALIVE_FILENAME = ".alive";
23213
23678
  function getDefaultSubagentRoot(vscodeCmd = "code") {
@@ -24428,9 +24893,10 @@ function resolveAndCreateProvider(definition, env = process.env) {
24428
24893
  const resolved = resolveTargetDefinition(definition, env);
24429
24894
  return createProvider(resolved);
24430
24895
  }
24431
- var PASS_THRESHOLD = 0.8;
24432
- function scoreToVerdict(score) {
24433
- return score >= PASS_THRESHOLD ? "pass" : "fail";
24896
+ var DEFAULT_THRESHOLD = 0.8;
24897
+ var PASS_THRESHOLD = DEFAULT_THRESHOLD;
24898
+ function scoreToVerdict(score, threshold = DEFAULT_THRESHOLD) {
24899
+ return score >= threshold ? "pass" : "fail";
24434
24900
  }
24435
24901
  function clampScore(value) {
24436
24902
  if (Number.isNaN(value) || !Number.isFinite(value)) {
@@ -24612,13 +25078,13 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
24612
25078
  async function execShellWithStdin(command, stdinPayload, options = {}) {
24613
25079
  const { mkdir: mkdir16, readFile: readFile15, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
24614
25080
  const { tmpdir: tmpdir3 } = await import("node:os");
24615
- const path49 = await import("node:path");
25081
+ const path50 = await import("node:path");
24616
25082
  const { randomUUID: randomUUID10 } = await import("node:crypto");
24617
- const dir = path49.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
25083
+ const dir = path50.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
24618
25084
  await mkdir16(dir, { recursive: true });
24619
- const stdinPath = path49.join(dir, "stdin.txt");
24620
- const stdoutPath = path49.join(dir, "stdout.txt");
24621
- const stderrPath = path49.join(dir, "stderr.txt");
25085
+ const stdinPath = path50.join(dir, "stdin.txt");
25086
+ const stdoutPath = path50.join(dir, "stdout.txt");
25087
+ const stderrPath = path50.join(dir, "stderr.txt");
24622
25088
  await writeFile9(stdinPath, stdinPayload, "utf8");
24623
25089
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
24624
25090
  const { spawn: spawn5 } = await import("node:child_process");
@@ -25799,7 +26265,7 @@ ${outputSchema2}`;
25799
26265
  parts.push("[[ ## scoring_criteria ## ]]");
25800
26266
  for (const rubric of rubrics) {
25801
26267
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
25802
- const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
26268
+ const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
25803
26269
  parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
25804
26270
  if (rubric.outcome) {
25805
26271
  parts.push(`Description: ${rubric.outcome}`);
@@ -25853,54 +26319,106 @@ ${outputSchema2}`;
25853
26319
  async runWithRetry(options) {
25854
26320
  const { context: context2, graderProvider, systemPrompt, userPrompt, schema, images } = options;
25855
26321
  let lastError;
26322
+ let lastInvalidResponse;
26323
+ let shouldAttemptStructureFix = false;
25856
26324
  for (let attempt = 1; attempt <= 3; attempt++) {
25857
26325
  try {
25858
- const model = graderProvider.asLanguageModel?.();
25859
- if (model) {
25860
- const modelOptions = {
25861
- ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
25862
- ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
25863
- };
25864
- const hasImages = images && images.length > 0;
25865
- const result = hasImages ? await generateText({
25866
- model,
25867
- system: systemPrompt,
25868
- messages: [
25869
- {
25870
- role: "user",
25871
- content: [
25872
- { type: "text", text: userPrompt },
25873
- ...toAiSdkImageParts(images)
25874
- ]
25875
- }
25876
- ],
25877
- ...modelOptions
25878
- }) : await generateText({
25879
- model,
25880
- system: systemPrompt,
25881
- prompt: userPrompt,
25882
- ...modelOptions
25883
- });
25884
- const data2 = schema.parse(parseJsonFromText(result.text));
25885
- const rawUsage = result.usage;
25886
- const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
25887
- return { data: data2, tokenUsage };
26326
+ const result = await this.generateStructuredResponse({
26327
+ context: context2,
26328
+ graderProvider,
26329
+ systemPrompt,
26330
+ userPrompt,
26331
+ images
26332
+ });
26333
+ const canRepairResponse = result.text.trim().length > 0;
26334
+ lastInvalidResponse = canRepairResponse ? result : void 0;
26335
+ let data;
26336
+ try {
26337
+ data = schema.parse(parseJsonFromText(result.text));
26338
+ } catch (e) {
26339
+ lastError = e instanceof Error ? e : new Error(String(e));
26340
+ shouldAttemptStructureFix = canRepairResponse;
26341
+ continue;
25888
26342
  }
25889
- const response = await graderProvider.invoke({
25890
- question: userPrompt,
26343
+ return {
26344
+ data,
26345
+ providerResponse: result.providerResponse,
26346
+ tokenUsage: result.tokenUsage
26347
+ };
26348
+ } catch (e) {
26349
+ lastError = e instanceof Error ? e : new Error(String(e));
26350
+ }
26351
+ }
26352
+ if (shouldAttemptStructureFix && lastInvalidResponse) {
26353
+ try {
26354
+ const repaired = await this.generateStructuredResponse({
26355
+ context: context2,
26356
+ graderProvider,
25891
26357
  systemPrompt,
25892
- evalCaseId: context2.evalCase.id,
25893
- attempt: context2.attempt,
25894
- maxOutputTokens: this.maxOutputTokens,
25895
- temperature: this.temperature
26358
+ userPrompt: buildStructureRepairPrompt({
26359
+ validationError: lastError?.message ?? "Schema validation failed",
26360
+ invalidResponse: lastInvalidResponse.text
26361
+ })
25896
26362
  });
25897
- const data = schema.parse(parseJsonFromText(extractLastAssistantContent(response.output)));
25898
- return { data, providerResponse: response, tokenUsage: response.tokenUsage };
26363
+ const data = schema.parse(parseJsonFromText(repaired.text));
26364
+ return {
26365
+ data,
26366
+ providerResponse: repaired.providerResponse,
26367
+ tokenUsage: sumTokenUsage(lastInvalidResponse.tokenUsage, repaired.tokenUsage)
26368
+ };
25899
26369
  } catch (e) {
25900
26370
  lastError = e instanceof Error ? e : new Error(String(e));
25901
26371
  }
25902
26372
  }
25903
- throw new Error(`Failed to parse evaluator response after 3 attempts: ${lastError?.message}`);
26373
+ throw new Error(
26374
+ `Failed to parse evaluator response after 3 attempts and 1 structure-fix attempt: ${lastError?.message}`
26375
+ );
26376
+ }
26377
+ async generateStructuredResponse(options) {
26378
+ const { context: context2, graderProvider, systemPrompt, userPrompt, images } = options;
26379
+ const model = graderProvider.asLanguageModel?.();
26380
+ if (model) {
26381
+ const modelOptions = {
26382
+ ...this.maxOutputTokens ? { maxTokens: this.maxOutputTokens } : {},
26383
+ ...typeof this.temperature === "number" ? { temperature: this.temperature } : {}
26384
+ };
26385
+ const hasImages = images && images.length > 0;
26386
+ const result = hasImages ? await generateText({
26387
+ model,
26388
+ system: systemPrompt,
26389
+ messages: [
26390
+ {
26391
+ role: "user",
26392
+ content: [
26393
+ { type: "text", text: userPrompt },
26394
+ ...toAiSdkImageParts(images)
26395
+ ]
26396
+ }
26397
+ ],
26398
+ ...modelOptions
26399
+ }) : await generateText({
26400
+ model,
26401
+ system: systemPrompt,
26402
+ prompt: userPrompt,
26403
+ ...modelOptions
26404
+ });
26405
+ const rawUsage = result.usage;
26406
+ const tokenUsage = rawUsage?.inputTokens != null && rawUsage?.outputTokens != null ? { input: rawUsage.inputTokens, output: rawUsage.outputTokens } : void 0;
26407
+ return { text: result.text, tokenUsage };
26408
+ }
26409
+ const response = await graderProvider.invoke({
26410
+ question: userPrompt,
26411
+ systemPrompt,
26412
+ evalCaseId: context2.evalCase.id,
26413
+ attempt: context2.attempt,
26414
+ maxOutputTokens: this.maxOutputTokens,
26415
+ temperature: this.temperature
26416
+ });
26417
+ return {
26418
+ text: extractLastAssistantContent(response.output),
26419
+ providerResponse: response,
26420
+ tokenUsage: response.tokenUsage
26421
+ };
25904
26422
  }
25905
26423
  };
25906
26424
  function buildOutputSchema() {
@@ -25920,6 +26438,29 @@ function buildOutputSchema() {
25920
26438
  "}"
25921
26439
  ].join("\n");
25922
26440
  }
26441
+ function buildStructureRepairPrompt(options) {
26442
+ const { validationError, invalidResponse } = options;
26443
+ return [
26444
+ "The following evaluation response has useful grading content but invalid JSON structure.",
26445
+ "Repair it to satisfy the schema in the system prompt.",
26446
+ "Preserve the evaluation meaning, do not re-grade the answer, and return only a single JSON object.",
26447
+ "",
26448
+ "Validation error:",
26449
+ validationError,
26450
+ "",
26451
+ "Invalid response:",
26452
+ invalidResponse
26453
+ ].join("\n");
26454
+ }
26455
+ function sumTokenUsage(first, second) {
26456
+ if (!first && !second) {
26457
+ return void 0;
26458
+ }
26459
+ return {
26460
+ input: (first?.input ?? 0) + (second?.input ?? 0),
26461
+ output: (first?.output ?? 0) + (second?.output ?? 0)
26462
+ };
26463
+ }
25923
26464
  function buildRubricOutputSchema() {
25924
26465
  return `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
25925
26466
  You must return a valid JSON object matching this schema:
@@ -26019,19 +26560,21 @@ function calculateScoreRangeResult(result, rubrics) {
26019
26560
  rawScores[rubric.id] = rawScore;
26020
26561
  totalWeight += rubric.weight;
26021
26562
  weightedScoreSum += normalizedScore * rubric.weight;
26022
- let requiredMinScore;
26023
- if (rubric.required_min_score !== void 0) {
26024
- requiredMinScore = rubric.required_min_score;
26563
+ let minScoreThreshold;
26564
+ if (rubric.min_score !== void 0) {
26565
+ minScoreThreshold = rubric.min_score;
26566
+ } else if (rubric.required_min_score !== void 0) {
26567
+ minScoreThreshold = rubric.required_min_score / 10;
26025
26568
  } else if (rubric.required === true) {
26026
- requiredMinScore = 10;
26569
+ minScoreThreshold = 1;
26027
26570
  }
26028
26571
  const matchingRange = rubric.score_ranges?.find(
26029
26572
  (r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
26030
26573
  );
26031
26574
  const rangeDescription = matchingRange?.outcome ?? "";
26032
26575
  const criterionLabel = rubric.outcome ?? rubric.id;
26033
- const passed = !(requiredMinScore !== void 0 && rawScore < requiredMinScore) && rawScore >= 7;
26034
- if (requiredMinScore !== void 0 && rawScore < requiredMinScore) {
26576
+ const passed = !(minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) && rawScore >= 7;
26577
+ if (minScoreThreshold !== void 0 && normalizedScore < minScoreThreshold) {
26035
26578
  failedRequired = true;
26036
26579
  }
26037
26580
  assertions.push({
@@ -26108,11 +26651,11 @@ function createFilesystemTools(workspacePath) {
26108
26651
  execute: async (input) => {
26109
26652
  try {
26110
26653
  const resolved = resolveSandboxed(workspacePath, input.path);
26111
- const stat10 = await fs2.stat(resolved);
26112
- if (stat10.isDirectory()) {
26654
+ const stat11 = await fs2.stat(resolved);
26655
+ if (stat11.isDirectory()) {
26113
26656
  return { error: `'${input.path}' is a directory, not a file` };
26114
26657
  }
26115
- const buffer = Buffer.alloc(Math.min(stat10.size, MAX_FILE_SIZE));
26658
+ const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
26116
26659
  const fd = await fs2.open(resolved, "r");
26117
26660
  try {
26118
26661
  await fd.read(buffer, 0, buffer.length, 0);
@@ -26120,8 +26663,8 @@ function createFilesystemTools(workspacePath) {
26120
26663
  await fd.close();
26121
26664
  }
26122
26665
  const content = buffer.toString("utf-8");
26123
- const truncated = stat10.size > MAX_FILE_SIZE;
26124
- return { content, truncated, size: stat10.size };
26666
+ const truncated = stat11.size > MAX_FILE_SIZE;
26667
+ return { content, truncated, size: stat11.size };
26125
26668
  } catch (error) {
26126
26669
  return { error: error instanceof Error ? error.message : String(error) };
26127
26670
  }
@@ -26172,8 +26715,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
26172
26715
  const ext = path35.extname(entry.name).toLowerCase();
26173
26716
  if (BINARY_EXTENSIONS.has(ext)) continue;
26174
26717
  try {
26175
- const stat10 = await fs2.stat(fullPath);
26176
- if (stat10.size > MAX_FILE_SIZE) continue;
26718
+ const stat11 = await fs2.stat(fullPath);
26719
+ if (stat11.size > MAX_FILE_SIZE) continue;
26177
26720
  const content = await fs2.readFile(fullPath, "utf-8");
26178
26721
  const lines = content.split("\n");
26179
26722
  for (let i = 0; i < lines.length; i++) {
@@ -26806,115 +27349,115 @@ var FieldAccuracyEvaluator = class {
26806
27349
  * Evaluate a single field against the expected value.
26807
27350
  */
26808
27351
  evaluateField(fieldConfig, candidateData, expectedData) {
26809
- const { path: path49, match, required = true, weight = 1 } = fieldConfig;
26810
- const candidateValue = resolvePath(candidateData, path49);
26811
- const expectedValue = resolvePath(expectedData, path49);
27352
+ const { path: path50, match, required = true, weight = 1 } = fieldConfig;
27353
+ const candidateValue = resolvePath(candidateData, path50);
27354
+ const expectedValue = resolvePath(expectedData, path50);
26812
27355
  if (expectedValue === void 0) {
26813
27356
  return {
26814
- path: path49,
27357
+ path: path50,
26815
27358
  score: 1,
26816
27359
  // No expected value means no comparison needed
26817
27360
  weight,
26818
27361
  hit: true,
26819
- message: `${path49}: no expected value`
27362
+ message: `${path50}: no expected value`
26820
27363
  };
26821
27364
  }
26822
27365
  if (candidateValue === void 0) {
26823
27366
  if (required) {
26824
27367
  return {
26825
- path: path49,
27368
+ path: path50,
26826
27369
  score: 0,
26827
27370
  weight,
26828
27371
  hit: false,
26829
- message: `${path49} (required, missing)`
27372
+ message: `${path50} (required, missing)`
26830
27373
  };
26831
27374
  }
26832
27375
  return {
26833
- path: path49,
27376
+ path: path50,
26834
27377
  score: 1,
26835
27378
  // Don't penalize missing optional fields
26836
27379
  weight: 0,
26837
27380
  // Zero weight means it won't affect the score
26838
27381
  hit: true,
26839
- message: `${path49}: optional field missing`
27382
+ message: `${path50}: optional field missing`
26840
27383
  };
26841
27384
  }
26842
27385
  switch (match) {
26843
27386
  case "exact":
26844
- return this.compareExact(path49, candidateValue, expectedValue, weight);
27387
+ return this.compareExact(path50, candidateValue, expectedValue, weight);
26845
27388
  case "numeric_tolerance":
26846
27389
  return this.compareNumericTolerance(
26847
- path49,
27390
+ path50,
26848
27391
  candidateValue,
26849
27392
  expectedValue,
26850
27393
  fieldConfig,
26851
27394
  weight
26852
27395
  );
26853
27396
  case "date":
26854
- return this.compareDate(path49, candidateValue, expectedValue, fieldConfig, weight);
27397
+ return this.compareDate(path50, candidateValue, expectedValue, fieldConfig, weight);
26855
27398
  default:
26856
27399
  return {
26857
- path: path49,
27400
+ path: path50,
26858
27401
  score: 0,
26859
27402
  weight,
26860
27403
  hit: false,
26861
- message: `${path49}: unknown match type "${match}"`
27404
+ message: `${path50}: unknown match type "${match}"`
26862
27405
  };
26863
27406
  }
26864
27407
  }
26865
27408
  /**
26866
27409
  * Exact equality comparison.
26867
27410
  */
26868
- compareExact(path49, candidateValue, expectedValue, weight) {
27411
+ compareExact(path50, candidateValue, expectedValue, weight) {
26869
27412
  if (deepEqual(candidateValue, expectedValue)) {
26870
27413
  return {
26871
- path: path49,
27414
+ path: path50,
26872
27415
  score: 1,
26873
27416
  weight,
26874
27417
  hit: true,
26875
- message: path49
27418
+ message: path50
26876
27419
  };
26877
27420
  }
26878
27421
  if (typeof candidateValue !== typeof expectedValue) {
26879
27422
  return {
26880
- path: path49,
27423
+ path: path50,
26881
27424
  score: 0,
26882
27425
  weight,
26883
27426
  hit: false,
26884
- message: `${path49} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
27427
+ message: `${path50} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
26885
27428
  };
26886
27429
  }
26887
27430
  return {
26888
- path: path49,
27431
+ path: path50,
26889
27432
  score: 0,
26890
27433
  weight,
26891
27434
  hit: false,
26892
- message: `${path49} (value mismatch)`
27435
+ message: `${path50} (value mismatch)`
26893
27436
  };
26894
27437
  }
26895
27438
  /**
26896
27439
  * Numeric comparison with absolute or relative tolerance.
26897
27440
  */
26898
- compareNumericTolerance(path49, candidateValue, expectedValue, fieldConfig, weight) {
27441
+ compareNumericTolerance(path50, candidateValue, expectedValue, fieldConfig, weight) {
26899
27442
  const { tolerance = 0, relative = false } = fieldConfig;
26900
27443
  const candidateNum = toNumber(candidateValue);
26901
27444
  const expectedNum = toNumber(expectedValue);
26902
27445
  if (candidateNum === null || expectedNum === null) {
26903
27446
  return {
26904
- path: path49,
27447
+ path: path50,
26905
27448
  score: 0,
26906
27449
  weight,
26907
27450
  hit: false,
26908
- message: `${path49} (non-numeric value)`
27451
+ message: `${path50} (non-numeric value)`
26909
27452
  };
26910
27453
  }
26911
27454
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
26912
27455
  return {
26913
- path: path49,
27456
+ path: path50,
26914
27457
  score: 0,
26915
27458
  weight,
26916
27459
  hit: false,
26917
- message: `${path49} (invalid numeric value)`
27460
+ message: `${path50} (invalid numeric value)`
26918
27461
  };
26919
27462
  }
26920
27463
  const diff = Math.abs(candidateNum - expectedNum);
@@ -26927,61 +27470,61 @@ var FieldAccuracyEvaluator = class {
26927
27470
  }
26928
27471
  if (withinTolerance) {
26929
27472
  return {
26930
- path: path49,
27473
+ path: path50,
26931
27474
  score: 1,
26932
27475
  weight,
26933
27476
  hit: true,
26934
- message: `${path49} (within tolerance: diff=${diff.toFixed(2)})`
27477
+ message: `${path50} (within tolerance: diff=${diff.toFixed(2)})`
26935
27478
  };
26936
27479
  }
26937
27480
  return {
26938
- path: path49,
27481
+ path: path50,
26939
27482
  score: 0,
26940
27483
  weight,
26941
27484
  hit: false,
26942
- message: `${path49} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
27485
+ message: `${path50} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
26943
27486
  };
26944
27487
  }
26945
27488
  /**
26946
27489
  * Date comparison with format normalization.
26947
27490
  */
26948
- compareDate(path49, candidateValue, expectedValue, fieldConfig, weight) {
27491
+ compareDate(path50, candidateValue, expectedValue, fieldConfig, weight) {
26949
27492
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
26950
27493
  const candidateDate = parseDate(String(candidateValue), formats);
26951
27494
  const expectedDate = parseDate(String(expectedValue), formats);
26952
27495
  if (candidateDate === null) {
26953
27496
  return {
26954
- path: path49,
27497
+ path: path50,
26955
27498
  score: 0,
26956
27499
  weight,
26957
27500
  hit: false,
26958
- message: `${path49} (unparseable candidate date)`
27501
+ message: `${path50} (unparseable candidate date)`
26959
27502
  };
26960
27503
  }
26961
27504
  if (expectedDate === null) {
26962
27505
  return {
26963
- path: path49,
27506
+ path: path50,
26964
27507
  score: 0,
26965
27508
  weight,
26966
27509
  hit: false,
26967
- message: `${path49} (unparseable expected date)`
27510
+ message: `${path50} (unparseable expected date)`
26968
27511
  };
26969
27512
  }
26970
27513
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
26971
27514
  return {
26972
- path: path49,
27515
+ path: path50,
26973
27516
  score: 1,
26974
27517
  weight,
26975
27518
  hit: true,
26976
- message: path49
27519
+ message: path50
26977
27520
  };
26978
27521
  }
26979
27522
  return {
26980
- path: path49,
27523
+ path: path50,
26981
27524
  score: 0,
26982
27525
  weight,
26983
27526
  hit: false,
26984
- message: `${path49} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
27527
+ message: `${path50} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
26985
27528
  };
26986
27529
  }
26987
27530
  /**
@@ -27014,11 +27557,11 @@ var FieldAccuracyEvaluator = class {
27014
27557
  };
27015
27558
  }
27016
27559
  };
27017
- function resolvePath(obj, path49) {
27018
- if (!path49 || !obj) {
27560
+ function resolvePath(obj, path50) {
27561
+ if (!path50 || !obj) {
27019
27562
  return void 0;
27020
27563
  }
27021
- const parts = path49.split(/\.|\[|\]/).filter((p) => p.length > 0);
27564
+ const parts = path50.split(/\.|\[|\]/).filter((p) => p.length > 0);
27022
27565
  let current = obj;
27023
27566
  for (const part of parts) {
27024
27567
  if (current === null || current === void 0) {
@@ -27500,8 +28043,8 @@ var TokenUsageEvaluator = class {
27500
28043
  };
27501
28044
  }
27502
28045
  };
27503
- function getNestedValue(obj, path49) {
27504
- const parts = path49.split(".");
28046
+ function getNestedValue(obj, path50) {
28047
+ const parts = path50.split(".");
27505
28048
  let current = obj;
27506
28049
  for (const part of parts) {
27507
28050
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -29224,7 +29767,7 @@ var WorkspacePoolManager = class {
29224
29767
  }
29225
29768
  /**
29226
29769
  * Reset an existing slot for reuse:
29227
- * 1. Reset repos (git reset --hard {ref} && git clean -fd per repo)
29770
+ * 1. Reset repos (fetch from origin when resolve=remote, then git reset --hard && git clean per repo)
29228
29771
  * 2. Re-copy template files (skip repo directories)
29229
29772
  */
29230
29773
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
@@ -29237,7 +29780,17 @@ var WorkspacePoolManager = class {
29237
29780
  continue;
29238
29781
  }
29239
29782
  const ref = repo.checkout?.ref ?? "HEAD";
29240
- await git(["reset", "--hard", ref], { cwd: repoDir });
29783
+ const resolve2 = repo.checkout?.resolve ?? "remote";
29784
+ if (resolve2 === "remote") {
29785
+ const fetchArgs = ["fetch", "origin", ref];
29786
+ if (repo.clone?.depth) {
29787
+ fetchArgs.splice(1, 0, "--depth", String(repo.clone.depth));
29788
+ }
29789
+ await git(fetchArgs, { cwd: repoDir });
29790
+ await git(["reset", "--hard", "FETCH_HEAD"], { cwd: repoDir });
29791
+ } else {
29792
+ await git(["reset", "--hard", ref], { cwd: repoDir });
29793
+ }
29241
29794
  const cleanFlag = poolReset === "strict" ? "-fdx" : "-fd";
29242
29795
  await git(["clean", cleanFlag], { cwd: repoDir });
29243
29796
  }
@@ -29520,7 +30073,7 @@ async function executeWorkspaceScript(config, context2, failureMode = "fatal") {
29520
30073
  }
29521
30074
  return result.stdout;
29522
30075
  }
29523
- function classifyQualityStatus(score, threshold = PASS_THRESHOLD) {
30076
+ function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
29524
30077
  return score >= threshold ? "ok" : "quality_failure";
29525
30078
  }
29526
30079
  function buildSkippedEvaluatorError(scores) {
@@ -29612,7 +30165,7 @@ async function runEvaluation(options) {
29612
30165
  const filteredEvalCases = filterEvalCases(evalCases, filter2);
29613
30166
  if (filteredEvalCases.length === 0) {
29614
30167
  if (filter2) {
29615
- throw new Error(`No tests matched filter '${filter2}' in ${evalFilePath}`);
30168
+ throw new Error(`No tests matched filter '${formatFilter(filter2)}' in ${evalFilePath}`);
29616
30169
  }
29617
30170
  return [];
29618
30171
  }
@@ -29664,6 +30217,9 @@ async function runEvaluation(options) {
29664
30217
  const graderName = targetContext.graderTarget ?? targetContext.name;
29665
30218
  const resolvedGrader = resolveTargetByName(graderName);
29666
30219
  if (!resolvedGrader) {
30220
+ if (!LLM_GRADER_CAPABLE_KINDS.includes(targetContext.kind)) {
30221
+ return void 0;
30222
+ }
29667
30223
  return getOrCreateProvider(targetContext);
29668
30224
  }
29669
30225
  return getOrCreateProvider(resolvedGrader);
@@ -29994,7 +30550,7 @@ async function runEvaluation(options) {
29994
30550
  const budgetResult = {
29995
30551
  timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
29996
30552
  testId: evalCase.id,
29997
- dataset: evalCase.dataset,
30553
+ suite: evalCase.suite,
29998
30554
  category: evalCase.category,
29999
30555
  score: 0,
30000
30556
  assertions: [],
@@ -30031,7 +30587,7 @@ async function runEvaluation(options) {
30031
30587
  const haltResult = {
30032
30588
  timestamp: (now2 ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
30033
30589
  testId: evalCase.id,
30034
- dataset: evalCase.dataset,
30590
+ suite: evalCase.suite,
30035
30591
  category: evalCase.category,
30036
30592
  score: 0,
30037
30593
  assertions: [],
@@ -30343,7 +30899,7 @@ async function runBatchEvaluation(options) {
30343
30899
  targetResolver,
30344
30900
  availableTargets,
30345
30901
  verbose,
30346
- threshold: batchThreshold
30902
+ threshold: evalCase.threshold ?? batchThreshold
30347
30903
  });
30348
30904
  if (providerError) {
30349
30905
  result = {
@@ -30805,8 +31361,9 @@ async function runEvalCase(options) {
30805
31361
  fileChanges,
30806
31362
  workspacePath,
30807
31363
  verbose,
30808
- threshold: caseThreshold
31364
+ threshold: evalCase.threshold ?? caseThreshold
30809
31365
  });
31366
+ const effectiveThreshold = evalCase.threshold ?? caseThreshold;
30810
31367
  const totalDurationMs = Date.now() - caseStartMs;
30811
31368
  const graderTokens = aggregateEvaluatorTokenUsage(result.scores);
30812
31369
  const evalRunTokenUsage = tokenUsage || graderTokens ? {
@@ -30820,7 +31377,7 @@ async function runEvalCase(options) {
30820
31377
  ...evalRunTokenUsage ? { tokenUsage: evalRunTokenUsage } : {}
30821
31378
  };
30822
31379
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
30823
- const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
31380
+ const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, effectiveThreshold);
30824
31381
  const targetUsedField = targetUsed ? { targetUsed } : {};
30825
31382
  const finalResult = providerError ? {
30826
31383
  ...result,
@@ -31021,7 +31578,8 @@ async function evaluateCandidate(options) {
31021
31578
  targetResolver,
31022
31579
  availableTargets,
31023
31580
  fileChanges,
31024
- workspacePath
31581
+ workspacePath,
31582
+ threshold: evalThreshold
31025
31583
  });
31026
31584
  const completedAt = nowFn();
31027
31585
  let agentRequest;
@@ -31052,7 +31610,7 @@ async function evaluateCandidate(options) {
31052
31610
  return {
31053
31611
  timestamp: completedAt.toISOString(),
31054
31612
  testId: evalCase.id,
31055
- dataset: evalCase.dataset,
31613
+ suite: evalCase.suite,
31056
31614
  category: evalCase.category,
31057
31615
  conversationId: evalCase.conversation_id,
31058
31616
  score: score.score,
@@ -31095,7 +31653,8 @@ async function runEvaluatorsForCase(options) {
31095
31653
  targetResolver,
31096
31654
  availableTargets,
31097
31655
  fileChanges,
31098
- workspacePath
31656
+ workspacePath,
31657
+ threshold
31099
31658
  } = options;
31100
31659
  if (evalCase.assertions && evalCase.assertions.length > 0) {
31101
31660
  return runEvaluatorList({
@@ -31121,7 +31680,8 @@ async function runEvaluatorsForCase(options) {
31121
31680
  targetResolver,
31122
31681
  availableTargets,
31123
31682
  fileChanges,
31124
- workspacePath
31683
+ workspacePath,
31684
+ threshold
31125
31685
  });
31126
31686
  }
31127
31687
  const evaluatorKind = evalCase.evaluator ?? "llm-grader";
@@ -31223,7 +31783,8 @@ async function runEvaluatorList(options) {
31223
31783
  name: evaluatorConfig.name,
31224
31784
  type: evaluatorConfig.type,
31225
31785
  weight,
31226
- ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
31786
+ ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
31787
+ ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
31227
31788
  });
31228
31789
  scores.push({
31229
31790
  name: evaluatorConfig.name,
@@ -31258,7 +31819,8 @@ async function runEvaluatorList(options) {
31258
31819
  name: evaluatorConfig.name ?? "unknown",
31259
31820
  type: evaluatorConfig.type ?? "llm-grader",
31260
31821
  weight,
31261
- ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {}
31822
+ ...evaluatorConfig.required !== void 0 ? { required: evaluatorConfig.required } : {},
31823
+ ...evaluatorConfig.min_score !== void 0 ? { min_score: evaluatorConfig.min_score } : {}
31262
31824
  });
31263
31825
  scores.push({
31264
31826
  name: evaluatorConfig.name ?? "unknown",
@@ -31292,9 +31854,10 @@ async function runEvaluatorList(options) {
31292
31854
  }
31293
31855
  }
31294
31856
  }
31857
+ const effectiveThreshold = options.threshold ?? DEFAULT_THRESHOLD;
31295
31858
  const hasRequiredFailure = scored.some((entry) => {
31296
31859
  if (!entry.required) return false;
31297
- const minScore = typeof entry.required === "number" ? entry.required : PASS_THRESHOLD;
31860
+ const minScore = entry.min_score ?? (typeof entry.required === "number" ? entry.required : effectiveThreshold);
31298
31861
  return entry.score.score < minScore;
31299
31862
  });
31300
31863
  const scorable = scored.filter((entry) => entry.score.verdict !== "skip");
@@ -31305,17 +31868,23 @@ async function runEvaluatorList(options) {
31305
31868
  const expectedAspectCount = assertions.length || 1;
31306
31869
  const score = {
31307
31870
  score: aggregateScore,
31308
- verdict: scoreToVerdict(aggregateScore),
31871
+ verdict: scoreToVerdict(aggregateScore, effectiveThreshold),
31309
31872
  assertions,
31310
31873
  expectedAspectCount
31311
31874
  };
31312
31875
  return { score, scores };
31313
31876
  }
31877
+ function formatFilter(filter2) {
31878
+ return typeof filter2 === "string" ? filter2 : filter2.join(", ");
31879
+ }
31880
+ function matchesFilter3(id, filter2) {
31881
+ return typeof filter2 === "string" ? micromatch3.isMatch(id, filter2) : filter2.some((pattern) => micromatch3.isMatch(id, pattern));
31882
+ }
31314
31883
  function filterEvalCases(evalCases, filter2) {
31315
31884
  if (!filter2) {
31316
31885
  return evalCases;
31317
31886
  }
31318
- return evalCases.filter((evalCase) => micromatch3.isMatch(evalCase.id, filter2));
31887
+ return evalCases.filter((evalCase) => matchesFilter3(evalCase.id, filter2));
31319
31888
  }
31320
31889
  function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
31321
31890
  const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
@@ -31402,7 +31971,7 @@ function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs,
31402
31971
  return {
31403
31972
  timestamp: timestamp.toISOString(),
31404
31973
  testId: evalCase.id,
31405
- dataset: evalCase.dataset,
31974
+ suite: evalCase.suite,
31406
31975
  category: evalCase.category,
31407
31976
  conversationId: evalCase.conversation_id,
31408
31977
  score: 0,
@@ -31666,6 +32235,7 @@ async function evaluate(config) {
31666
32235
  verbose: config.verbose,
31667
32236
  maxConcurrency: config.workers ?? 3,
31668
32237
  filter: config.filter,
32238
+ threshold: config.threshold,
31669
32239
  evalCases,
31670
32240
  onResult: async (result) => {
31671
32241
  collectedResults.push(result);
@@ -31676,19 +32246,19 @@ async function evaluate(config) {
31676
32246
  const durationMs = Date.now() - startTime;
31677
32247
  return {
31678
32248
  results: allResults,
31679
- summary: computeSummary(allResults, durationMs)
32249
+ summary: computeSummary(allResults, durationMs, config.threshold)
31680
32250
  };
31681
32251
  }
31682
32252
  function mapAssertionType(type) {
31683
32253
  return type.replace(/_/g, "-");
31684
32254
  }
31685
- function computeSummary(results, durationMs) {
32255
+ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
31686
32256
  const total = results.length;
31687
32257
  let passed = 0;
31688
32258
  let scoreSum = 0;
31689
32259
  for (const r of results) {
31690
32260
  scoreSum += r.score;
31691
- if (r.score >= PASS_THRESHOLD) {
32261
+ if (r.score >= threshold) {
31692
32262
  passed++;
31693
32263
  }
31694
32264
  }
@@ -31798,7 +32368,7 @@ var CONFIG_FILE_NAMES = [
31798
32368
  ];
31799
32369
  async function loadTsConfig(projectRoot) {
31800
32370
  const { existsSync: existsSync7 } = await import("node:fs");
31801
- const { pathToFileURL } = await import("node:url");
32371
+ const { pathToFileURL: pathToFileURL2 } = await import("node:url");
31802
32372
  const { join: join2 } = await import("node:path");
31803
32373
  for (const fileName of CONFIG_FILE_NAMES) {
31804
32374
  const filePath = join2(projectRoot, fileName);
@@ -31806,7 +32376,7 @@ async function loadTsConfig(projectRoot) {
31806
32376
  continue;
31807
32377
  }
31808
32378
  try {
31809
- const fileUrl = pathToFileURL(filePath).href;
32379
+ const fileUrl = pathToFileURL2(filePath).href;
31810
32380
  const mod = await import(fileUrl);
31811
32381
  const config = mod.default ?? mod;
31812
32382
  return AgentVConfigSchema.parse(config);
@@ -31953,7 +32523,7 @@ function saveProjectRegistry(registry) {
31953
32523
  const registryPath = getProjectsRegistryPath();
31954
32524
  const dir = path47.dirname(registryPath);
31955
32525
  if (!existsSync6(dir)) {
31956
- mkdirSync(dir, { recursive: true });
32526
+ mkdirSync2(dir, { recursive: true });
31957
32527
  }
31958
32528
  writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
31959
32529
  }
@@ -32213,7 +32783,7 @@ var OtelTraceExporter = class {
32213
32783
  rootSpan.setAttribute("gen_ai.system", "agentv");
32214
32784
  rootSpan.setAttribute("agentv.test_id", result.testId);
32215
32785
  rootSpan.setAttribute("agentv.target", result.target);
32216
- if (result.dataset) rootSpan.setAttribute("agentv.dataset", result.dataset);
32786
+ if (result.suite) rootSpan.setAttribute("agentv.suite", result.suite);
32217
32787
  rootSpan.setAttribute("agentv.score", result.score);
32218
32788
  if (captureContent && result.output.length > 0) {
32219
32789
  const lastMsg = result.output[result.output.length - 1];
@@ -32422,7 +32992,7 @@ var OtelStreamingObserver = class {
32422
32992
  this.rootSpan.setAttribute("gen_ai.system", "agentv");
32423
32993
  this.rootSpan.setAttribute("agentv.test_id", testId);
32424
32994
  this.rootSpan.setAttribute("agentv.target", target);
32425
- if (evalSet) this.rootSpan.setAttribute("agentv.dataset", evalSet);
32995
+ if (evalSet) this.rootSpan.setAttribute("agentv.suite", evalSet);
32426
32996
  this.rootCtx = this.api.trace.setSpan(this.api.context.active(), this.rootSpan);
32427
32997
  }
32428
32998
  /** Create and immediately export a tool span */
@@ -32768,7 +33338,230 @@ function extractToolResultContent(content) {
32768
33338
  }
32769
33339
  return parts.length > 0 ? parts.join("") : void 0;
32770
33340
  }
32771
- var DEFAULT_PROJECTS_DIR = () => path48.join(homedir3(), ".claude", "projects");
33341
+ function parseCodexSession(jsonl) {
33342
+ const messages = [];
33343
+ let sessionId = "";
33344
+ let cwd;
33345
+ let model;
33346
+ let version;
33347
+ let startTimestamp;
33348
+ let endTimestamp;
33349
+ const pendingCalls = /* @__PURE__ */ new Map();
33350
+ const lines = jsonl.split("\n").filter((l) => l.trim().length > 0);
33351
+ for (const line of lines) {
33352
+ let entry;
33353
+ try {
33354
+ entry = JSON.parse(line);
33355
+ } catch {
33356
+ continue;
33357
+ }
33358
+ if (!entry.type) continue;
33359
+ if (entry.timestamp) {
33360
+ if (!startTimestamp) startTimestamp = entry.timestamp;
33361
+ endTimestamp = entry.timestamp;
33362
+ }
33363
+ const payload = entry.payload ?? {};
33364
+ switch (entry.type) {
33365
+ case "session_meta": {
33366
+ sessionId = String(payload.id ?? "");
33367
+ cwd = payload.cwd ? String(payload.cwd) : void 0;
33368
+ version = payload.cli_version ? String(payload.cli_version) : void 0;
33369
+ if (payload.model && !model) {
33370
+ model = String(payload.model);
33371
+ }
33372
+ break;
33373
+ }
33374
+ case "turn_context": {
33375
+ if (payload.model && !model) {
33376
+ model = String(payload.model);
33377
+ }
33378
+ if (payload.cwd && !cwd) {
33379
+ cwd = String(payload.cwd);
33380
+ }
33381
+ break;
33382
+ }
33383
+ case "response_item": {
33384
+ const itemType = String(payload.type ?? "");
33385
+ const role = String(payload.role ?? "");
33386
+ switch (itemType) {
33387
+ case "message": {
33388
+ if (role === "developer") break;
33389
+ const content = extractResponseItemContent(payload.content);
33390
+ if (role === "user" && content) {
33391
+ messages.push({ role: "user", content });
33392
+ } else if (role === "assistant" && content) {
33393
+ messages.push({ role: "assistant", content });
33394
+ }
33395
+ break;
33396
+ }
33397
+ case "function_call": {
33398
+ const toolName = String(payload.name ?? "");
33399
+ const callId = String(payload.call_id ?? "");
33400
+ let input;
33401
+ if (typeof payload.arguments === "string") {
33402
+ try {
33403
+ input = JSON.parse(payload.arguments);
33404
+ } catch {
33405
+ input = payload.arguments;
33406
+ }
33407
+ } else {
33408
+ input = payload.arguments;
33409
+ }
33410
+ const toolCall = { tool: toolName, input, id: callId };
33411
+ const msgIdx = messages.length;
33412
+ messages.push({
33413
+ role: "assistant",
33414
+ toolCalls: [toolCall]
33415
+ });
33416
+ if (callId) {
33417
+ pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
33418
+ }
33419
+ break;
33420
+ }
33421
+ case "custom_tool_call": {
33422
+ const toolName = String(payload.name ?? "");
33423
+ const callId = String(payload.call_id ?? "");
33424
+ let input;
33425
+ if (typeof payload.arguments === "string") {
33426
+ try {
33427
+ input = JSON.parse(payload.arguments);
33428
+ } catch {
33429
+ input = payload.arguments;
33430
+ }
33431
+ } else {
33432
+ input = payload.arguments;
33433
+ }
33434
+ const toolCall = { tool: toolName, input, id: callId };
33435
+ const msgIdx = messages.length;
33436
+ messages.push({
33437
+ role: "assistant",
33438
+ toolCalls: [toolCall]
33439
+ });
33440
+ if (callId) {
33441
+ pendingCalls.set(callId, { msgIdx, toolIdx: 0 });
33442
+ }
33443
+ break;
33444
+ }
33445
+ case "function_call_output":
33446
+ case "custom_tool_call_output": {
33447
+ const callId = String(payload.call_id ?? "");
33448
+ const pending = pendingCalls.get(callId);
33449
+ if (pending) {
33450
+ const existingMsg = messages[pending.msgIdx];
33451
+ const existingCalls = [...existingMsg.toolCalls ?? []];
33452
+ existingCalls[pending.toolIdx] = {
33453
+ ...existingCalls[pending.toolIdx],
33454
+ output: payload.output
33455
+ };
33456
+ messages[pending.msgIdx] = { ...existingMsg, toolCalls: existingCalls };
33457
+ pendingCalls.delete(callId);
33458
+ }
33459
+ break;
33460
+ }
33461
+ // Skip reasoning blocks (thinking tokens)
33462
+ case "reasoning":
33463
+ break;
33464
+ }
33465
+ break;
33466
+ }
33467
+ }
33468
+ }
33469
+ let durationMs;
33470
+ if (startTimestamp && endTimestamp) {
33471
+ durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
33472
+ }
33473
+ const source = {
33474
+ provider: "codex",
33475
+ sessionId,
33476
+ cwd,
33477
+ startedAt: startTimestamp,
33478
+ model,
33479
+ version
33480
+ };
33481
+ return {
33482
+ messages,
33483
+ source,
33484
+ // Codex rollout files don't include token counts (only rate limit info)
33485
+ tokenUsage: void 0,
33486
+ durationMs,
33487
+ costUsd: null
33488
+ };
33489
+ }
33490
+ function extractResponseItemContent(content) {
33491
+ if (typeof content === "string") return content;
33492
+ if (!Array.isArray(content)) return void 0;
33493
+ const parts = [];
33494
+ for (const block of content) {
33495
+ if (typeof block === "object" && block !== null) {
33496
+ const b = block;
33497
+ if (typeof b.text === "string") {
33498
+ parts.push(b.text);
33499
+ }
33500
+ }
33501
+ }
33502
+ return parts.length > 0 ? parts.join("") : void 0;
33503
+ }
33504
+ var DEFAULT_SESSIONS_DIR = () => path48.join(homedir3(), ".codex", "sessions");
33505
+ async function discoverCodexSessions(opts) {
33506
+ const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
33507
+ const limit = opts?.latest ? 1 : opts?.limit ?? 10;
33508
+ const sessions = [];
33509
+ let yearDirs;
33510
+ try {
33511
+ yearDirs = await readdir8(sessionsDir);
33512
+ } catch {
33513
+ return [];
33514
+ }
33515
+ for (const year of yearDirs) {
33516
+ const yearPath = path48.join(sessionsDir, year);
33517
+ let monthDirs;
33518
+ try {
33519
+ monthDirs = await readdir8(yearPath);
33520
+ } catch {
33521
+ continue;
33522
+ }
33523
+ for (const month of monthDirs) {
33524
+ const monthPath = path48.join(yearPath, month);
33525
+ let dayDirs;
33526
+ try {
33527
+ dayDirs = await readdir8(monthPath);
33528
+ } catch {
33529
+ continue;
33530
+ }
33531
+ for (const day of dayDirs) {
33532
+ if (opts?.date) {
33533
+ const dirDate = `${year}-${month}-${day}`;
33534
+ if (dirDate !== opts.date) continue;
33535
+ }
33536
+ const dayPath = path48.join(monthPath, day);
33537
+ let files;
33538
+ try {
33539
+ files = await readdir8(dayPath);
33540
+ } catch {
33541
+ continue;
33542
+ }
33543
+ for (const file of files) {
33544
+ if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
33545
+ const filePath = path48.join(dayPath, file);
33546
+ const nameWithoutExt = file.replace(/\.jsonl$/, "");
33547
+ const parts = nameWithoutExt.split("-");
33548
+ const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
33549
+ let updatedAt;
33550
+ try {
33551
+ const fileStat = await stat9(filePath);
33552
+ updatedAt = fileStat.mtime;
33553
+ } catch {
33554
+ updatedAt = /* @__PURE__ */ new Date(0);
33555
+ }
33556
+ sessions.push({ sessionId, filePath, filename: file, updatedAt });
33557
+ }
33558
+ }
33559
+ }
33560
+ }
33561
+ sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
33562
+ return sessions.slice(0, limit);
33563
+ }
33564
+ var DEFAULT_PROJECTS_DIR = () => path49.join(homedir4(), ".claude", "projects");
32772
33565
  function encodeProjectPath(projectPath) {
32773
33566
  return projectPath.replace(/\//g, "-");
32774
33567
  }
@@ -32777,7 +33570,7 @@ async function discoverClaudeSessions(opts) {
32777
33570
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
32778
33571
  let projectDirs;
32779
33572
  try {
32780
- projectDirs = await readdir8(projectsDir);
33573
+ projectDirs = await readdir9(projectsDir);
32781
33574
  } catch {
32782
33575
  return [];
32783
33576
  }
@@ -32787,10 +33580,10 @@ async function discoverClaudeSessions(opts) {
32787
33580
  }
32788
33581
  const sessions = [];
32789
33582
  for (const projectDir of projectDirs) {
32790
- const dirPath = path48.join(projectsDir, projectDir);
33583
+ const dirPath = path49.join(projectsDir, projectDir);
32791
33584
  let entries;
32792
33585
  try {
32793
- entries = await readdir8(dirPath);
33586
+ entries = await readdir9(dirPath);
32794
33587
  } catch {
32795
33588
  continue;
32796
33589
  }
@@ -32798,10 +33591,10 @@ async function discoverClaudeSessions(opts) {
32798
33591
  if (!entry.endsWith(".jsonl")) continue;
32799
33592
  const sessionId = entry.replace(/\.jsonl$/, "");
32800
33593
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
32801
- const filePath = path48.join(dirPath, entry);
33594
+ const filePath = path49.join(dirPath, entry);
32802
33595
  let updatedAt;
32803
33596
  try {
32804
- const fileStat = await stat9(filePath);
33597
+ const fileStat = await stat10(filePath);
32805
33598
  updatedAt = fileStat.mtime;
32806
33599
  } catch {
32807
33600
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -32817,9 +33610,82 @@ async function discoverClaudeSessions(opts) {
32817
33610
  sessions.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
32818
33611
  return sessions.slice(0, limit);
32819
33612
  }
33613
+ function toTranscriptJsonLine(entry) {
33614
+ const firstUserMessage = entry.messages.find((m) => m.role === "user");
33615
+ const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
33616
+ return {
33617
+ input,
33618
+ output: entry.messages,
33619
+ token_usage: entry.tokenUsage ? {
33620
+ input: entry.tokenUsage.input,
33621
+ output: entry.tokenUsage.output,
33622
+ cached: entry.tokenUsage.cached
33623
+ } : void 0,
33624
+ duration_ms: entry.durationMs,
33625
+ cost_usd: entry.costUsd,
33626
+ source: {
33627
+ provider: entry.source.provider,
33628
+ session_id: entry.source.sessionId,
33629
+ model: entry.source.model,
33630
+ timestamp: entry.source.startedAt,
33631
+ git_branch: entry.source.gitBranch,
33632
+ cwd: entry.source.cwd ?? entry.source.projectPath,
33633
+ version: entry.source.version
33634
+ }
33635
+ };
33636
+ }
33637
+ async function readTranscriptJsonl(filePath) {
33638
+ const text2 = await readFile14(filePath, "utf8");
33639
+ return text2.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
33640
+ }
32820
33641
  async function readTranscriptFile(filePath) {
32821
33642
  return readFile14(filePath, "utf8");
32822
33643
  }
33644
+ var TranscriptProvider = class _TranscriptProvider {
33645
+ id;
33646
+ kind = "transcript";
33647
+ targetName;
33648
+ lines;
33649
+ cursor = 0;
33650
+ constructor(targetName, lines) {
33651
+ this.targetName = targetName;
33652
+ this.id = `transcript:${targetName}`;
33653
+ this.lines = lines;
33654
+ }
33655
+ /**
33656
+ * Create a TranscriptProvider from a JSONL file path.
33657
+ */
33658
+ static async fromFile(filePath) {
33659
+ const lines = await readTranscriptJsonl(filePath);
33660
+ if (lines.length === 0) {
33661
+ throw new Error(`Transcript file is empty: ${filePath}`);
33662
+ }
33663
+ const providerName = lines[0].source.provider ?? "transcript";
33664
+ return new _TranscriptProvider(providerName, lines);
33665
+ }
33666
+ get lineCount() {
33667
+ return this.lines.length;
33668
+ }
33669
+ async invoke(_request) {
33670
+ if (this.cursor >= this.lines.length) {
33671
+ throw new Error(
33672
+ `Transcript exhausted: ${this.lines.length} line(s) available but ${this.cursor + 1} invocations attempted. Each transcript line maps to one test case.`
33673
+ );
33674
+ }
33675
+ const line = this.lines[this.cursor++];
33676
+ return {
33677
+ output: line.output,
33678
+ tokenUsage: line.token_usage ? {
33679
+ input: line.token_usage.input,
33680
+ output: line.token_usage.output,
33681
+ cached: line.token_usage.cached
33682
+ } : void 0,
33683
+ durationMs: line.duration_ms,
33684
+ costUsd: line.cost_usd ?? void 0,
33685
+ startTime: line.source.timestamp
33686
+ };
33687
+ }
33688
+ };
32823
33689
  function createAgentKernel() {
32824
33690
  return { status: "stub" };
32825
33691
  }
@@ -32843,6 +33709,7 @@ export {
32843
33709
  buildSearchRoots,
32844
33710
  resolveFileReference,
32845
33711
  CLI_PLACEHOLDERS,
33712
+ findDeprecatedCamelCaseTargetWarnings,
32846
33713
  COMMON_TARGET_SETTINGS,
32847
33714
  resolveDelegatedTargetDefinition,
32848
33715
  resolveTargetDefinition,
@@ -32887,17 +33754,18 @@ export {
32887
33754
  subscribeToCodexLogEntries,
32888
33755
  consumeCopilotCliLogEntries,
32889
33756
  subscribeToCopilotCliLogEntries,
33757
+ parseCopilotEvents,
32890
33758
  discoverCopilotSessions,
32891
33759
  consumeCopilotSdkLogEntries,
32892
33760
  subscribeToCopilotSdkLogEntries,
32893
33761
  consumePiLogEntries,
32894
33762
  subscribeToPiLogEntries,
32895
- ProviderRegistry,
32896
33763
  getAgentvHome,
32897
33764
  getWorkspacesRoot,
32898
33765
  getSubagentsRoot,
32899
33766
  getTraceStateRoot,
32900
33767
  getWorkspacePoolRoot,
33768
+ ProviderRegistry,
32901
33769
  ensureVSCodeSubagents,
32902
33770
  readTargetDefinitions,
32903
33771
  listTargetNames,
@@ -32905,6 +33773,7 @@ export {
32905
33773
  createBuiltinProviderRegistry,
32906
33774
  createProvider,
32907
33775
  resolveAndCreateProvider,
33776
+ DEFAULT_THRESHOLD,
32908
33777
  PASS_THRESHOLD,
32909
33778
  scoreToVerdict,
32910
33779
  clampScore,
@@ -32992,8 +33861,13 @@ export {
32992
33861
  OtelTraceExporter,
32993
33862
  OtelStreamingObserver,
32994
33863
  parseClaudeSession,
33864
+ parseCodexSession,
33865
+ discoverCodexSessions,
32995
33866
  discoverClaudeSessions,
33867
+ toTranscriptJsonLine,
33868
+ readTranscriptJsonl,
32996
33869
  readTranscriptFile,
33870
+ TranscriptProvider,
32997
33871
  createAgentKernel
32998
33872
  };
32999
- //# sourceMappingURL=chunk-YXXD27OK.js.map
33873
+ //# sourceMappingURL=chunk-H4GQXK5M.js.map