agentv 4.5.1 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-M65PVDQ5.js
304
+ // ../../packages/core/dist/chunk-AIQ5FO4G.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-M65PVDQ5.js
422
+ // ../../packages/core/dist/chunk-AIQ5FO4G.js
423
423
  import { readFile as readFile2 } from "node:fs/promises";
424
424
  import path3 from "node:path";
425
425
  import fg from "fast-glob";
@@ -798,21 +798,27 @@ var CLI_PLACEHOLDERS = /* @__PURE__ */ new Set([
798
798
  "OUTPUT_FILE"
799
799
  ]);
800
800
  var COMMON_TARGET_SETTINGS = [
801
+ "use_target",
801
802
  "provider_batching",
802
803
  "providerBatching",
803
804
  "subagent_mode_allowed",
804
- "subagentModeAllowed"
805
+ "subagentModeAllowed",
806
+ "fallback_targets",
807
+ "fallbackTargets"
805
808
  ];
806
809
  var BASE_TARGET_SCHEMA = external_exports2.object({
807
810
  name: external_exports2.string().min(1, "target name is required"),
808
- provider: external_exports2.string().min(1, "provider is required"),
811
+ provider: external_exports2.string().optional(),
812
+ use_target: external_exports2.string().optional(),
809
813
  grader_target: external_exports2.string().optional(),
810
814
  judge_target: external_exports2.string().optional(),
811
815
  // backward compat
812
816
  workers: external_exports2.number().int().min(1).optional(),
813
817
  workspace_template: external_exports2.string().optional(),
814
818
  workspaceTemplate: external_exports2.string().optional(),
815
- subagent_mode_allowed: external_exports2.boolean().optional()
819
+ subagent_mode_allowed: external_exports2.boolean().optional(),
820
+ fallback_targets: external_exports2.array(external_exports2.string().min(1)).optional(),
821
+ fallbackTargets: external_exports2.array(external_exports2.string().min(1)).optional()
816
822
  }).passthrough();
817
823
  var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
818
824
  var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
@@ -866,6 +872,11 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
866
872
  `${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
867
873
  );
868
874
  }
875
+ if (!parsed.provider) {
876
+ throw new Error(
877
+ `${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
878
+ );
879
+ }
869
880
  const provider = resolveString(
870
881
  parsed.provider,
871
882
  env,
@@ -878,12 +889,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
878
889
  const subagentModeAllowed = resolveOptionalBoolean(
879
890
  parsed.subagent_mode_allowed ?? parsed.subagentModeAllowed
880
891
  );
892
+ const fallbackTargets = parsed.fallback_targets ?? parsed.fallbackTargets;
881
893
  const base = {
882
894
  name: parsed.name,
883
895
  graderTarget: parsed.grader_target ?? parsed.judge_target,
884
896
  workers: parsed.workers,
885
897
  providerBatching,
886
- subagentModeAllowed
898
+ subagentModeAllowed,
899
+ ...fallbackTargets ? { fallbackTargets } : {}
887
900
  };
888
901
  switch (provider) {
889
902
  case "openai":
@@ -1057,6 +1070,14 @@ function resolveAzureConfig(target, env) {
1057
1070
  retry
1058
1071
  };
1059
1072
  }
1073
+ function resolveApiFormat(target, targetName) {
1074
+ const raw = target.api_format ?? target.apiFormat;
1075
+ if (raw === void 0) return void 0;
1076
+ if (raw === "chat" || raw === "responses") return raw;
1077
+ throw new Error(
1078
+ `Invalid api_format '${raw}' for target '${targetName}'. Must be 'chat' or 'responses'.`
1079
+ );
1080
+ }
1060
1081
  function resolveOpenAIConfig(target, env) {
1061
1082
  const endpointSource = target.endpoint ?? target.base_url ?? target.baseUrl;
1062
1083
  const apiKeySource = target.api_key ?? target.apiKey;
@@ -1076,6 +1097,7 @@ function resolveOpenAIConfig(target, env) {
1076
1097
  baseURL,
1077
1098
  apiKey,
1078
1099
  model,
1100
+ apiFormat: resolveApiFormat(target, target.name),
1079
1101
  temperature: resolveOptionalNumber(temperatureSource, `${target.name} temperature`),
1080
1102
  maxOutputTokens: resolveOptionalNumber(maxTokensSource, `${target.name} max output tokens`),
1081
1103
  retry
@@ -15043,6 +15065,7 @@ function validateTemplateVariables(content, source) {
15043
15065
  }
15044
15066
  var ANSI_YELLOW3 = "\x1B[33m";
15045
15067
  var ANSI_RESET4 = "\x1B[0m";
15068
+ var PROMPT_FILE_PREFIX = "file://";
15046
15069
  function normalizeEvaluatorType(type) {
15047
15070
  return type.replace(/_/g, "-");
15048
15071
  }
@@ -15341,12 +15364,23 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15341
15364
  threshold: thresholdValue
15342
15365
  };
15343
15366
  } else {
15344
- const aggregatorPrompt = asString(rawAggregator.prompt);
15367
+ const rawAggPrompt = asString(rawAggregator.prompt);
15368
+ let aggregatorPrompt;
15345
15369
  let promptPath2;
15346
- if (aggregatorPrompt) {
15347
- const resolved = await resolveFileReference22(aggregatorPrompt, searchRoots);
15348
- if (resolved.resolvedPath) {
15349
- promptPath2 = path42.resolve(resolved.resolvedPath);
15370
+ if (rawAggPrompt) {
15371
+ if (rawAggPrompt.startsWith(PROMPT_FILE_PREFIX)) {
15372
+ const fileRef = rawAggPrompt.slice(PROMPT_FILE_PREFIX.length);
15373
+ aggregatorPrompt = fileRef;
15374
+ const resolved = await resolveFileReference22(fileRef, searchRoots);
15375
+ if (resolved.resolvedPath) {
15376
+ promptPath2 = path42.resolve(resolved.resolvedPath);
15377
+ } else {
15378
+ throw new Error(
15379
+ `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
15380
+ );
15381
+ }
15382
+ } else {
15383
+ aggregatorPrompt = rawAggPrompt;
15350
15384
  }
15351
15385
  }
15352
15386
  aggregator = {
@@ -15926,21 +15960,25 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
15926
15960
  promptScriptConfig = rawPrompt.config;
15927
15961
  }
15928
15962
  } else if (typeof rawPrompt === "string") {
15929
- prompt = rawPrompt;
15930
- const resolved = await resolveFileReference22(prompt, searchRoots);
15931
- if (resolved.resolvedPath) {
15932
- promptPath = path42.resolve(resolved.resolvedPath);
15933
- try {
15934
- await validateCustomPromptContent(promptPath);
15935
- } catch (error) {
15936
- const message = error instanceof Error ? error.message : String(error);
15937
- throw new Error(`Evaluator '${name21}' template (${promptPath}): ${message}`);
15963
+ if (rawPrompt.startsWith(PROMPT_FILE_PREFIX)) {
15964
+ const fileRef = rawPrompt.slice(PROMPT_FILE_PREFIX.length);
15965
+ prompt = fileRef;
15966
+ const resolved = await resolveFileReference22(fileRef, searchRoots);
15967
+ if (resolved.resolvedPath) {
15968
+ promptPath = path42.resolve(resolved.resolvedPath);
15969
+ try {
15970
+ await validateCustomPromptContent(promptPath);
15971
+ } catch (error) {
15972
+ const message = error instanceof Error ? error.message : String(error);
15973
+ throw new Error(`Evaluator '${name21}' template (${promptPath}): ${message}`);
15974
+ }
15975
+ } else {
15976
+ throw new Error(
15977
+ `Evaluator '${name21}' in '${evalId}': prompt file not found: ${resolved.displayPath}`
15978
+ );
15938
15979
  }
15939
15980
  } else {
15940
- logWarning2(
15941
- `Inline prompt used for evaluator '${name21}' in '${evalId}' (file not found: ${resolved.displayPath})`,
15942
- resolved.attempted.length > 0 ? resolved.attempted.map((attempt) => ` Tried: ${attempt}`) : void 0
15943
- );
15981
+ prompt = rawPrompt;
15944
15982
  }
15945
15983
  }
15946
15984
  const _model = asString(rawEvaluator.model);
@@ -17811,7 +17849,7 @@ var OpenAIProvider = class {
17811
17849
  apiKey: config.apiKey,
17812
17850
  baseURL: config.baseURL
17813
17851
  });
17814
- this.model = openai(config.model);
17852
+ this.model = config.apiFormat === "responses" ? openai(config.model) : openai.chat(config.model);
17815
17853
  }
17816
17854
  id;
17817
17855
  kind = "openai";
@@ -24132,8 +24170,11 @@ function assertTargetDefinition(value, index, filePath) {
24132
24170
  `targets.yaml entry at index ${index} in ${filePath} is missing a valid 'name'`
24133
24171
  );
24134
24172
  }
24135
- if (typeof provider !== "string" || provider.trim().length === 0) {
24136
- throw new Error(`targets.yaml entry '${name21}' in ${filePath} is missing a valid 'provider'`);
24173
+ const hasUseTarget = typeof value.use_target === "string" && value.use_target.trim().length > 0;
24174
+ if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
24175
+ throw new Error(
24176
+ `targets.yaml entry '${name21}' in ${filePath} is missing a valid 'provider' (or use use_target for delegation)`
24177
+ );
24137
24178
  }
24138
24179
  return value;
24139
24180
  }
@@ -29427,10 +29468,20 @@ async function runEvaluation(options) {
29427
29468
  if (resolvedTargetsByName.has(name21)) {
29428
29469
  return resolvedTargetsByName.get(name21);
29429
29470
  }
29430
- const definition = targetDefinitions.get(name21);
29471
+ let definition = targetDefinitions.get(name21);
29431
29472
  if (!definition) {
29432
29473
  return void 0;
29433
29474
  }
29475
+ for (let depth = 0; depth < 5; depth++) {
29476
+ const useTarget = definition.use_target;
29477
+ if (typeof useTarget !== "string" || useTarget.trim().length === 0) break;
29478
+ const envMatch = useTarget.trim().match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
29479
+ const resolvedName = envMatch ? envLookup[envMatch[1]] ?? "" : useTarget.trim();
29480
+ if (resolvedName.length === 0) break;
29481
+ const next = targetDefinitions.get(resolvedName);
29482
+ if (!next) break;
29483
+ definition = next;
29484
+ }
29434
29485
  const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
29435
29486
  resolvedTargetsByName.set(name21, resolved);
29436
29487
  return resolved;
@@ -30435,6 +30486,7 @@ async function runEvalCase(options) {
30435
30486
  let attempt = 0;
30436
30487
  let providerResponse = cachedResponse;
30437
30488
  let lastError;
30489
+ let targetUsed;
30438
30490
  while (!providerResponse && attempt < attemptBudget) {
30439
30491
  try {
30440
30492
  providerResponse = await invokeProvider(provider, {
@@ -30457,25 +30509,33 @@ async function runEvalCase(options) {
30457
30509
  attempt += 1;
30458
30510
  continue;
30459
30511
  }
30460
- const errorResult = buildErrorResult(
30461
- evalCase,
30462
- target.name,
30463
- nowFn(),
30464
- error,
30465
- promptInputs,
30466
- provider,
30467
- "agent",
30468
- "provider_error",
30469
- verbose
30470
- );
30471
- if (workspacePath) {
30472
- if (forceCleanup) {
30473
- await cleanupWorkspace(workspacePath).catch(() => {
30474
- });
30475
- }
30476
- return { ...errorResult, workspacePath };
30512
+ break;
30513
+ }
30514
+ }
30515
+ if (!providerResponse && target.fallbackTargets?.length && targetResolver) {
30516
+ for (const fallbackName of target.fallbackTargets) {
30517
+ const fallbackProvider = targetResolver(fallbackName);
30518
+ if (!fallbackProvider) {
30519
+ continue;
30520
+ }
30521
+ try {
30522
+ providerResponse = await invokeProvider(fallbackProvider, {
30523
+ evalCase,
30524
+ target,
30525
+ promptInputs,
30526
+ attempt: 0,
30527
+ agentTimeoutMs,
30528
+ signal,
30529
+ cwd: workspacePath,
30530
+ workspaceFile: caseWorkspaceFile ?? suiteWorkspaceFile,
30531
+ captureFileChanges: !!baselineCommit,
30532
+ streamCallbacks: options.streamCallbacks
30533
+ });
30534
+ targetUsed = fallbackName;
30535
+ break;
30536
+ } catch (error) {
30537
+ lastError = error;
30477
30538
  }
30478
- return errorResult;
30479
30539
  }
30480
30540
  }
30481
30541
  if (!providerResponse) {
@@ -30601,8 +30661,10 @@ async function runEvalCase(options) {
30601
30661
  };
30602
30662
  const skippedEvaluatorError = buildSkippedEvaluatorError(result.scores);
30603
30663
  const executionStatus = providerError || skippedEvaluatorError ? "execution_error" : classifyQualityStatus(result.score, caseThreshold);
30664
+ const targetUsedField = targetUsed ? { targetUsed } : {};
30604
30665
  const finalResult = providerError ? {
30605
30666
  ...result,
30667
+ ...targetUsedField,
30606
30668
  evalRun,
30607
30669
  error: providerError,
30608
30670
  executionStatus,
@@ -30614,6 +30676,7 @@ async function runEvalCase(options) {
30614
30676
  afterEachOutput
30615
30677
  } : skippedEvaluatorError ? {
30616
30678
  ...result,
30679
+ ...targetUsedField,
30617
30680
  score: 0,
30618
30681
  evalRun,
30619
30682
  error: skippedEvaluatorError,
@@ -30626,6 +30689,7 @@ async function runEvalCase(options) {
30626
30689
  afterEachOutput
30627
30690
  } : {
30628
30691
  ...result,
30692
+ ...targetUsedField,
30629
30693
  evalRun,
30630
30694
  executionStatus,
30631
30695
  beforeAllOutput,
@@ -32771,4 +32835,4 @@ export {
32771
32835
  readTranscriptFile,
32772
32836
  createAgentKernel
32773
32837
  };
32774
- //# sourceMappingURL=chunk-7DRAXDVC.js.map
32838
+ //# sourceMappingURL=chunk-KQQTEWZF.js.map