@agentv/core 4.5.1 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -2585,6 +2585,7 @@ function validateTemplateVariables(content, source) {
2585
2585
  // src/evaluation/loaders/evaluator-parser.ts
2586
2586
  var ANSI_YELLOW4 = "\x1B[33m";
2587
2587
  var ANSI_RESET5 = "\x1B[0m";
2588
+ var PROMPT_FILE_PREFIX = "file://";
2588
2589
  function normalizeEvaluatorType(type) {
2589
2590
  return type.replace(/_/g, "-");
2590
2591
  }
@@ -2883,12 +2884,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2883
2884
  threshold: thresholdValue
2884
2885
  };
2885
2886
  } else {
2886
- const aggregatorPrompt = asString(rawAggregator.prompt);
2887
+ const rawAggPrompt = asString(rawAggregator.prompt);
2888
+ let aggregatorPrompt;
2887
2889
  let promptPath2;
2888
- if (aggregatorPrompt) {
2889
- const resolved = await resolveFileReference2(aggregatorPrompt, searchRoots);
2890
- if (resolved.resolvedPath) {
2891
- promptPath2 = import_node_path5.default.resolve(resolved.resolvedPath);
2890
+ if (rawAggPrompt) {
2891
+ if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
2892
+ const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
2893
+ aggregatorPrompt = fileRef;
2894
+ const resolved = await resolveFileReference2(fileRef, searchRoots);
2895
+ if (resolved.resolvedPath) {
2896
+ promptPath2 = import_node_path5.default.resolve(resolved.resolvedPath);
2897
+ } else {
2898
+ throw new Error(
2899
+ `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
2900
+ );
2901
+ }
2902
+ } else {
2903
+ aggregatorPrompt = rawAggPrompt;
2892
2904
  }
2893
2905
  }
2894
2906
  aggregator = {
@@ -3468,21 +3480,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
3468
3480
  promptScriptConfig = rawPrompt.config;
3469
3481
  }
3470
3482
  } else if (typeof rawPrompt === "string") {
3471
- prompt = rawPrompt;
3472
- const resolved = await resolveFileReference2(prompt, searchRoots);
3473
- if (resolved.resolvedPath) {
3474
- promptPath = import_node_path5.default.resolve(resolved.resolvedPath);
3475
- try {
3476
- await validateCustomPromptContent(promptPath);
3477
- } catch (error) {
3478
- const message = error instanceof Error ? error.message : String(error);
3479
- throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
3483
+ if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
3484
+ const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
3485
+ prompt = fileRef;
3486
+ const resolved = await resolveFileReference2(fileRef, searchRoots);
3487
+ if (resolved.resolvedPath) {
3488
+ promptPath = import_node_path5.default.resolve(resolved.resolvedPath);
3489
+ try {
3490
+ await validateCustomPromptContent(promptPath);
3491
+ } catch (error) {
3492
+ const message = error instanceof Error ? error.message : String(error);
3493
+ throw new Error(`Evaluator '${name}' template (${promptPath}): ${message}`);
3494
+ }
3495
+ } else {
3496
+ throw new Error(
3497
+ `Evaluator '${name}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
3498
+ );
3480
3499
  }
3481
3500
  } else {
3482
- logWarning2(
3483
- `Inline prompt used for evaluator '${name}' in '${evalId}' (file not found: ${resolved.displayPath})`,
3484
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
3485
- );
3501
+ prompt = rawPrompt;
3486
3502
  }
3487
3503
  }
3488
3504
  const _model = asString(rawEvaluator.model);
@@ -5519,7 +5535,7 @@ var OpenAIProvider = class {
5519
5535
  apiKey: config.apiKey,
5520
5536
  baseURL: config.baseURL
5521
5537
  });
5522
- this.model = openai(config.model);
5538
+ this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
5523
5539
  }
5524
5540
  id;
5525
5541
  kind = "openai";
@@ -10752,21 +10768,27 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
10752
10768
  "OUTPUT_FILE"
10753
10769
  ]);
10754
10770
  var COMMON_TARGET_SETTINGS = [
10771
+ "use_target",
10755
10772
  "provider_batching",
10756
10773
  "providerBatching",
10757
10774
  "subagent_mode_allowed",
10758
- "subagentModeAllowed"
10775
+ "subagentModeAllowed",
10776
+ "fallback_targets",
10777
+ "fallbackTargets"
10759
10778
  ];
10760
10779
  var BASE_TARGET_SCHEMA = import_zod3.z.object({
10761
10780
  name: import_zod3.z.string().min(1, "target name is required"),
10762
- provider: import_zod3.z.string().min(1, "provider is required"),
10781
+ provider: import_zod3.z.string().optional(),
10782
+ use_target: import_zod3.z.string().optional(),
10763
10783
  grader_target: import_zod3.z.string().optional(),
10764
10784
  judge_target: import_zod3.z.string().optional(),
10765
10785
  // backward compat
10766
10786
  workers: import_zod3.z.number().int().min(1).optional(),
10767
10787
  workspace_template: import_zod3.z.string().optional(),
10768
10788
  workspaceTemplate: import_zod3.z.string().optional(),
10769
- subagent_mode_allowed: import_zod3.z.boolean().optional()
10789
+ subagent_mode_allowed: import_zod3.z.boolean().optional(),
10790
+ fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional(),
10791
+ fallbackTargets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
10770
10792
  }).passthrough();
10771
10793
  var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
10772
10794
  var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
@@ -10820,6 +10842,11 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
10820
10842
  `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
10821
10843
  );
10822
10844
  }
10845
+ if (!parsed.provider) {
10846
+ throw new Error(
10847
+ `${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
10848
+ );
10849
+ }
10823
10850
  const provider = resolveString(
10824
10851
  parsed.provider,
10825
10852
  env,
@@ -10832,12 +10859,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
10832
10859
  const subagentModeAllowed = resolveOptionalBoolean(
10833
10860
  parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
10834
10861
  );
10862
+ const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
10835
10863
  const base = {
10836
10864
  name: parsed.name,
10837
10865
  graderTarget: parsed.grader_target ?? parsed.judge_target,
10838
10866
  workers: parsed.workers,
10839
10867
  providerBatching,
10840
- subagentModeAllowed
10868
+ subagentModeAllowed,
10869
+ ...fallbackTargets ? { fallbackTargets } : {}
10841
10870
  };
10842
10871
  switch (provider) {
10843
10872
  case "openai":
@@ -11011,6 +11040,14 @@ function resolveAzureConfig(target, env) {
11011
11040
  retry
11012
11041
  };
11013
11042
  }
11043
+ function resolveApiFormat(target, targetName) {
11044
+ const raw = target.api_format ?? target.apiFormat;
11045
+ if (raw === void 0) return void 0;
11046
+ if (raw === "chat" || raw === "responses") return raw;
11047
+ throw new Error(
11048
+ `Invalid api_format '${raw}' for target '${targetName}'. Must be 'chat' or 'responses'.`
11049
+ );
11050
+ }
11014
11051
  function resolveOpenAIConfig(target, env) {
11015
11052
  const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
11016
11053
  const apiKeySource = target.api_key ?? target.apiKey;
@@ -11030,6 +11067,7 @@ function resolveOpenAIConfig(target, env) {
11030
11067
  baseURL,
11031
11068
  apiKey,
11032
11069
  model,
11070
+ apiFormat: resolveApiFormat(target, target.name),
11033
11071
  temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
11034
11072
  maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
11035
11073
  retry
@@ -13364,8 +13402,11 @@ function assertTargetDefinition(value, index, filePath) {
13364
13402
  `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
13365
13403
  );
13366
13404
  }
13367
- if (typeof provider !== "string" || provider.trim().length === 0) {
13368
- throw new Error(`targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider'`);
13405
+ const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
13406
+ if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
13407
+ throw new Error(
13408
+ `targets.yaml entry '${name}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
13409
+ );
13369
13410
  }
13370
13411
  return value;
13371
13412
  }
@@ -18818,10 +18859,20 @@ async function runEvaluation(options) {
18818
18859
  if (resolvedTargetsByName.has(name)) {
18819
18860
  return resolvedTargetsByName.get(name);
18820
18861
  }
18821
- const definition = targetDefinitions.get(name);
18862
+ let definition = targetDefinitions.get(name);
18822
18863
  if (!definition) {
18823
18864
  return void 0;
18824
18865
  }
18866
+ for (let depth = 0; depth < 5; depth++) {
18867
+ const useTarget = definition.use_target;
18868
+ if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
18869
+ const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
18870
+ const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
18871
+ if (resolvedName.length === 0) break;
18872
+ const next = targetDefinitions.get(resolvedName);
18873
+ if (!next) break;
18874
+ definition = next;
18875
+ }
18825
18876
  const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
18826
18877
  resolvedTargetsByName.set(name, resolved);
18827
18878
  return resolved;
@@ -19826,6 +19877,7 @@ async function runEvalCase(options) {
19826
19877
  let attempt = 0;
19827
19878
  let providerResponse = cachedResponse;
19828
19879
  let lastError;
19880
+ let targetUsed;
19829
19881
  while (!providerResponse && attempt < attemptBudget) {
19830
19882
  try {
19831
19883
  providerResponse = await invokeProvider(provider, {
@@ -19848,25 +19900,33 @@ async function runEvalCase(options) {
19848
19900
  attempt += 1;
19849
19901
  continue;
19850
19902
  }
19851
- const errorResult = buildErrorResult(
19852
- evalCase,
19853
- target.name,
19854
- nowFn(),
19855
- error,
19856
- promptInputs,
19857
- provider,
19858
- "agent",
19859
- "provider_error",
19860
- verbose
19861
- );
19862
- if (workspacePath) {
19863
- if (forceCleanup) {
19864
- await cleanupWorkspace(workspacePath).catch(() => {
19865
- });
19866
- }
19867
- return { ...errorResult, workspacePath };
19903
+ break;
19904
+ }
19905
+ }
19906
+ if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
19907
+ for (const fallbackName of target.fallbackTargets) {
19908
+ const fallbackProvider = targetResolver(fallbackName);
19909
+ if (!fallbackProvider) {
19910
+ continue;
19911
+ }
19912
+ try {
19913
+ providerResponse = await invokeProvider(fallbackProvider, {
19914
+ evalCase,
19915
+ target,
19916
+ promptInputs,
19917
+ attempt: 0,
19918
+ agentTimeoutMs,
19919
+ signal,
19920
+ cwd: workspacePath,
19921
+ workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
19922
+ captureFileChanges: !!baselineCommit,
19923
+ streamCallbacks: options.streamCallbacks
19924
+ });
19925
+ targetUsed = fallbackName;
19926
+ break;
19927
+ } catch (error) {
19928
+ lastError = error;
19868
19929
  }
19869
- return errorResult;
19870
19930
  }
19871
19931
  }
19872
19932
  if (!providerResponse) {
@@ -19992,8 +20052,10 @@ async function runEvalCase(options) {
19992
20052
  };
19993
20053
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
19994
20054
  const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
20055
+ const targetUsedField = targetUsed ? { targetUsed } : {};
19995
20056
  const finalResult = providerError ? {
19996
20057
  ...result,
20058
+ ...targetUsedField,
19997
20059
  evalRun,
19998
20060
  error: providerError,
19999
20061
  executionStatus,
@@ -20005,6 +20067,7 @@ async function runEvalCase(options) {
20005
20067
  afterEachOutput
20006
20068
  } : skippedEvaluatorError ? {
20007
20069
  ...result,
20070
+ ...targetUsedField,
20008
20071
  score: 0,
20009
20072
  evalRun,
20010
20073
  error: skippedEvaluatorError,
@@ -20017,6 +20080,7 @@ async function runEvalCase(options) {
20017
20080
  afterEachOutput
20018
20081
  } : {
20019
20082
  ...result,
20083
+ ...targetUsedField,
20020
20084
  evalRun,
20021
20085
  executionStatus,
20022
20086
  beforeAllOutput,