agentv 4.5.1 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,12 +29,12 @@ import {
29
29
  subscribeToCopilotCliLogEntries,
30
30
  subscribeToCopilotSdkLogEntries,
31
31
  subscribeToPiLogEntries
32
- } from "./chunk-7DRAXDVC.js";
32
+ } from "./chunk-KQQTEWZF.js";
33
33
 
34
34
  // package.json
35
35
  var package_default = {
36
36
  name: "agentv",
37
- version: "4.5.1",
37
+ version: "4.6.0",
38
38
  description: "CLI entry point for AgentV",
39
39
  type: "module",
40
40
  repository: {
@@ -102,9 +102,21 @@ async function resolveEvalPaths(evalPaths, cwd) {
102
102
  if (normalizedInputs.length === 0) {
103
103
  throw new Error("No eval paths provided.");
104
104
  }
105
+ const includePatterns = [];
106
+ const ignorePatterns = [];
107
+ for (const input of normalizedInputs) {
108
+ if (input.startsWith("!")) {
109
+ ignorePatterns.push(input.slice(1));
110
+ } else {
111
+ includePatterns.push(input);
112
+ }
113
+ }
114
+ if (includePatterns.length === 0) {
115
+ throw new Error("No eval paths provided (only negation patterns found).");
116
+ }
105
117
  const unmatched = [];
106
118
  const results = /* @__PURE__ */ new Set();
107
- for (const pattern of normalizedInputs) {
119
+ for (const pattern of includePatterns) {
108
120
  const candidatePath = path.isAbsolute(pattern) ? path.normalize(pattern) : path.resolve(cwd, pattern);
109
121
  try {
110
122
  const stats = await stat(candidatePath);
@@ -119,7 +131,8 @@ async function resolveEvalPaths(evalPaths, cwd) {
119
131
  onlyFiles: true,
120
132
  unique: true,
121
133
  dot: true,
122
- followSymbolicLinks: true
134
+ followSymbolicLinks: true,
135
+ ignore: ignorePatterns
123
136
  });
124
137
  if (dirMatches.length === 0) {
125
138
  unmatched.push(pattern);
@@ -139,7 +152,8 @@ async function resolveEvalPaths(evalPaths, cwd) {
139
152
  onlyFiles: true,
140
153
  unique: true,
141
154
  dot: true,
142
- followSymbolicLinks: true
155
+ followSymbolicLinks: true,
156
+ ignore: ignorePatterns
143
157
  });
144
158
  const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl|json)$/i.test(filePath));
145
159
  if (yamlMatches.length === 0) {
@@ -3114,6 +3128,8 @@ var OPENAI_SETTINGS = /* @__PURE__ */ new Set([
3114
3128
  "model",
3115
3129
  "deployment",
3116
3130
  "variant",
3131
+ "api_format",
3132
+ "apiFormat",
3117
3133
  "temperature",
3118
3134
  "max_output_tokens",
3119
3135
  "maxTokens"
@@ -3506,16 +3522,17 @@ async function validateTargetsFile(filePath) {
3506
3522
  });
3507
3523
  }
3508
3524
  const provider = target.provider;
3525
+ const hasUseTarget = typeof target.use_target === "string" && target.use_target.trim().length > 0;
3509
3526
  const providerValue = typeof provider === "string" ? provider.trim().toLowerCase() : void 0;
3510
3527
  const isTemplated = typeof provider === "string" && /^\$\{\{.+\}\}$/.test(provider.trim());
3511
- if (typeof provider !== "string" || provider.trim().length === 0) {
3528
+ if (!hasUseTarget && (typeof provider !== "string" || provider.trim().length === 0)) {
3512
3529
  errors.push({
3513
3530
  severity: "error",
3514
3531
  filePath: absolutePath,
3515
3532
  location: `${location}.provider`,
3516
- message: "Missing or invalid 'provider' field (must be a non-empty string)"
3533
+ message: "Missing or invalid 'provider' field (must be a non-empty string, or use use_target for delegation)"
3517
3534
  });
3518
- } else if (!isTemplated && !knownProviders.includes(provider)) {
3535
+ } else if (typeof provider === "string" && !isTemplated && !knownProviders.includes(provider)) {
3519
3536
  errors.push({
3520
3537
  severity: "warning",
3521
3538
  filePath: absolutePath,
@@ -3761,6 +3778,34 @@ var ANSI_RESET3 = "\x1B[0m";
3761
3778
  function isTTY() {
3762
3779
  return process.stdout.isTTY ?? false;
3763
3780
  }
3781
+ function resolveUseTarget(name, definitions, env, targetsFilePath) {
3782
+ const maxDepth = 5;
3783
+ let current = definitions.find((d) => d.name === name);
3784
+ if (!current) {
3785
+ const available = listTargetNames(definitions).join(", ");
3786
+ throw new Error(
3787
+ `Target '${name}' not found in ${targetsFilePath}. Available targets: ${available}`
3788
+ );
3789
+ }
3790
+ for (let depth = 0; depth < maxDepth; depth++) {
3791
+ const useTarget = current.use_target;
3792
+ if (useTarget === void 0 || useTarget === null) break;
3793
+ const raw = String(useTarget).trim();
3794
+ if (raw.length === 0) break;
3795
+ const envMatch = raw.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
3796
+ const resolved = envMatch ? env[envMatch[1]] ?? "" : raw;
3797
+ if (resolved.trim().length === 0) break;
3798
+ const next = definitions.find((d) => d.name === resolved.trim());
3799
+ if (!next) {
3800
+ const available = listTargetNames(definitions).join(", ");
3801
+ throw new Error(
3802
+ `Target '${name}' use_target '${resolved.trim()}' not found in ${targetsFilePath}. Available targets: ${available}`
3803
+ );
3804
+ }
3805
+ current = next;
3806
+ }
3807
+ return current;
3808
+ }
3764
3809
  async function readTestSuiteTarget(testFilePath) {
3765
3810
  const metadata = await readTestSuiteMetadata(testFilePath);
3766
3811
  return metadata.target;
@@ -3824,15 +3869,7 @@ Errors in ${targetsFilePath}:`);
3824
3869
  const definitions = await readTargetDefinitions(targetsFilePath);
3825
3870
  const fileTargetName = await readTestSuiteTarget(testFilePath);
3826
3871
  const targetChoice = pickTargetName({ cliTargetName, fileTargetName });
3827
- const targetDefinition = definitions.find(
3828
- (definition) => definition.name === targetChoice.name
3829
- );
3830
- if (!targetDefinition) {
3831
- const available = listTargetNames(definitions).join(", ");
3832
- throw new Error(
3833
- `Target '${targetChoice.name}' not found in ${targetsFilePath}. Available targets: ${available}`
3834
- );
3835
- }
3872
+ const targetDefinition = resolveUseTarget(targetChoice.name, definitions, env, targetsFilePath);
3836
3873
  if (dryRun) {
3837
3874
  const mockTarget = {
3838
3875
  kind: "mock",
@@ -3915,15 +3952,7 @@ Errors in ${targetsFilePath}:`);
3915
3952
  const definitions = await readTargetDefinitions(targetsFilePath);
3916
3953
  const results = [];
3917
3954
  for (const name of targetNames) {
3918
- const targetDefinition = definitions.find(
3919
- (definition) => definition.name === name
3920
- );
3921
- if (!targetDefinition) {
3922
- const available = listTargetNames(definitions).join(", ");
3923
- throw new Error(
3924
- `Target '${name}' not found in ${targetsFilePath}. Available targets: ${available}`
3925
- );
3926
- }
3955
+ const targetDefinition = resolveUseTarget(name, definitions, env, targetsFilePath);
3927
3956
  if (dryRun) {
3928
3957
  const mockTarget = {
3929
3958
  kind: "mock",
@@ -4008,6 +4037,24 @@ function normalizeOptionalNumber(value) {
4008
4037
  function normalizeWorkspaceMode(value) {
4009
4038
  return value === "pooled" || value === "temp" || value === "static" ? value : void 0;
4010
4039
  }
4040
+ function normalizeStringArray(value) {
4041
+ if (Array.isArray(value)) {
4042
+ return value.filter((v) => typeof v === "string" && v.trim().length > 0);
4043
+ }
4044
+ return [];
4045
+ }
4046
+ function matchesTagFilters(fileTags, includeTags, excludeTags) {
4047
+ const tags = new Set(fileTags ?? []);
4048
+ if (includeTags.length > 0) {
4049
+ for (const required of includeTags) {
4050
+ if (!tags.has(required)) return false;
4051
+ }
4052
+ }
4053
+ for (const excluded of excludeTags) {
4054
+ if (tags.has(excluded)) return false;
4055
+ }
4056
+ return true;
4057
+ }
4011
4058
  function normalizeOutputMessages(cliValue) {
4012
4059
  if (cliValue === void 0) {
4013
4060
  return 1;
@@ -4116,7 +4163,9 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
4116
4163
  graderTarget: normalizeString(rawOptions.graderTarget),
4117
4164
  model: normalizeString(rawOptions.model),
4118
4165
  outputMessages: normalizeOutputMessages(normalizeString(rawOptions.outputMessages)),
4119
- threshold: normalizeOptionalNumber(rawOptions.threshold)
4166
+ threshold: normalizeOptionalNumber(rawOptions.threshold),
4167
+ tags: normalizeStringArray(rawOptions.tag),
4168
+ excludeTags: normalizeStringArray(rawOptions.excludeTag)
4120
4169
  };
4121
4170
  }
4122
4171
  async function ensureFileExists(filePath, description) {
@@ -4266,7 +4315,8 @@ async function prepareFileMetadata(params) {
4266
4315
  yamlCachePath: suite.cacheConfig?.cachePath,
4267
4316
  totalBudgetUsd: suite.totalBudgetUsd,
4268
4317
  failOnError: suite.failOnError,
4269
- threshold: suite.threshold
4318
+ threshold: suite.threshold,
4319
+ tags: suite.metadata?.tags
4270
4320
  };
4271
4321
  }
4272
4322
  async function runWithLimit(items, limit, task) {
@@ -4484,7 +4534,7 @@ async function runEvalCommand(input) {
4484
4534
  const useFileExport = !!options.otelFile;
4485
4535
  if (options.exportOtel || useFileExport) {
4486
4536
  try {
4487
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-VWMHFUXR.js");
4537
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-FBPCDLOY.js");
4488
4538
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4489
4539
  let headers = {};
4490
4540
  if (options.otelBackend) {
@@ -4578,6 +4628,25 @@ async function runEvalCommand(input) {
4578
4628
  });
4579
4629
  fileMetadata.set(testFilePath, meta);
4580
4630
  }
4631
+ const hasTagFilters = options.tags.length > 0 || options.excludeTags.length > 0;
4632
+ if (hasTagFilters) {
4633
+ const skippedFiles = [];
4634
+ for (const [testFilePath, meta] of fileMetadata.entries()) {
4635
+ if (!matchesTagFilters(meta.tags, options.tags, options.excludeTags)) {
4636
+ fileMetadata.delete(testFilePath);
4637
+ skippedFiles.push(path15.relative(cwd, testFilePath));
4638
+ }
4639
+ }
4640
+ if (skippedFiles.length > 0 && options.verbose) {
4641
+ console.log(
4642
+ `Skipped ${skippedFiles.length} eval file(s) by tag filter: ${skippedFiles.join(", ")}`
4643
+ );
4644
+ }
4645
+ if (fileMetadata.size === 0) {
4646
+ console.log("No eval files matched the tag filters. Nothing to run.");
4647
+ return;
4648
+ }
4649
+ }
4581
4650
  const firstMeta = fileMetadata.values().next().value;
4582
4651
  const yamlCacheEnabled = firstMeta?.yamlCache;
4583
4652
  const yamlCachePath = firstMeta?.yamlCachePath;
@@ -4667,8 +4736,9 @@ async function runEvalCommand(input) {
4667
4736
  }
4668
4737
  }
4669
4738
  }
4739
+ const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
4670
4740
  try {
4671
- await runWithLimit(resolvedTestFiles, fileConcurrency, async (testFilePath) => {
4741
+ await runWithLimit(activeTestFiles, fileConcurrency, async (testFilePath) => {
4672
4742
  const targetPrep = fileMetadata.get(testFilePath);
4673
4743
  if (!targetPrep) {
4674
4744
  throw new Error(`Missing metadata for ${testFilePath}`);
@@ -4685,30 +4755,56 @@ async function runEvalCommand(input) {
4685
4755
  if (applicableEvalCases.length === 0) {
4686
4756
  return [];
4687
4757
  }
4688
- const result = await runSingleEvalFile({
4689
- testFilePath,
4690
- cwd,
4691
- repoRoot,
4692
- options,
4693
- outputWriter,
4694
- otelExporter,
4695
- cache,
4696
- evaluationRunner,
4697
- workersOverride: perFileWorkers,
4698
- yamlWorkers: targetPrep.yamlWorkers,
4699
- progressReporter,
4700
- seenEvalCases,
4701
- displayIdTracker,
4702
- selection,
4703
- inlineTargetLabel,
4704
- evalCases: applicableEvalCases,
4705
- trialsConfig: targetPrep.trialsConfig,
4706
- matrixMode: targetPrep.selections.length > 1,
4707
- totalBudgetUsd: targetPrep.totalBudgetUsd,
4708
- failOnError: targetPrep.failOnError,
4709
- threshold: resolvedThreshold
4710
- });
4711
- return result.results;
4758
+ try {
4759
+ const result = await runSingleEvalFile({
4760
+ testFilePath,
4761
+ cwd,
4762
+ repoRoot,
4763
+ options,
4764
+ outputWriter,
4765
+ otelExporter,
4766
+ cache,
4767
+ evaluationRunner,
4768
+ workersOverride: perFileWorkers,
4769
+ yamlWorkers: targetPrep.yamlWorkers,
4770
+ progressReporter,
4771
+ seenEvalCases,
4772
+ displayIdTracker,
4773
+ selection,
4774
+ inlineTargetLabel,
4775
+ evalCases: applicableEvalCases,
4776
+ trialsConfig: targetPrep.trialsConfig,
4777
+ matrixMode: targetPrep.selections.length > 1,
4778
+ totalBudgetUsd: targetPrep.totalBudgetUsd,
4779
+ failOnError: targetPrep.failOnError,
4780
+ threshold: resolvedThreshold
4781
+ });
4782
+ return result.results;
4783
+ } catch (fileError) {
4784
+ const message = fileError instanceof Error ? fileError.message : String(fileError);
4785
+ console.error(`
4786
+ \u26A0 Eval file failed: ${path15.basename(testFilePath)} \u2014 ${message}
4787
+ `);
4788
+ const errorResults = applicableEvalCases.map((evalCase) => ({
4789
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
4790
+ testId: evalCase.id,
4791
+ score: 0,
4792
+ assertions: [],
4793
+ output: [],
4794
+ scores: [],
4795
+ error: message,
4796
+ executionStatus: "execution_error",
4797
+ failureStage: "setup",
4798
+ failureReasonCode: "setup_error",
4799
+ durationMs: 0,
4800
+ tokenUsage: { input: 0, output: 0, inputTokens: 0, outputTokens: 0 },
4801
+ target: selection.targetName
4802
+ }));
4803
+ for (const errResult of errorResults) {
4804
+ await outputWriter.append(errResult);
4805
+ }
4806
+ return errorResults;
4807
+ }
4712
4808
  })
4713
4809
  );
4714
4810
  for (const results of targetResults) {
@@ -4738,7 +4834,7 @@ async function runEvalCommand(input) {
4738
4834
  console.log(`Benchmark written to: ${benchmarkPath}`);
4739
4835
  }
4740
4836
  if (usesDefaultArtifactWorkspace) {
4741
- const evalFile = resolvedTestFiles.length === 1 ? resolvedTestFiles[0] : "";
4837
+ const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
4742
4838
  const workspaceDir = path15.dirname(outputPath);
4743
4839
  const {
4744
4840
  testArtifactDir,
@@ -4758,7 +4854,7 @@ async function runEvalCommand(input) {
4758
4854
  }
4759
4855
  if (options.artifacts) {
4760
4856
  const artifactsDir = path15.resolve(options.artifacts);
4761
- const evalFile = resolvedTestFiles.length === 1 ? resolvedTestFiles[0] : "";
4857
+ const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
4762
4858
  const {
4763
4859
  testArtifactDir,
4764
4860
  indexPath,
@@ -4797,7 +4893,7 @@ Results written to: ${outputPath}`);
4797
4893
  await saveRunCache(cwd, outputPath).catch(() => void 0);
4798
4894
  }
4799
4895
  if (summary.executionErrorCount > 0 && !options.retryErrors) {
4800
- const evalFileArgs = resolvedTestFiles.map((f) => path15.relative(cwd, f)).join(" ");
4896
+ const evalFileArgs = activeTestFiles.map((f) => path15.relative(cwd, f)).join(" ");
4801
4897
  const targetFlag = options.target ? ` --target ${options.target}` : "";
4802
4898
  const relativeOutputPath = path15.relative(cwd, outputPath);
4803
4899
  console.log(
@@ -4809,7 +4905,7 @@ Tip: ${summary.executionErrorCount} execution error(s) detected. Re-run failed t
4809
4905
  return {
4810
4906
  executionErrorCount: summary.executionErrorCount,
4811
4907
  outputPath,
4812
- testFiles: resolvedTestFiles,
4908
+ testFiles: activeTestFiles,
4813
4909
  target: options.target,
4814
4910
  thresholdFailed
4815
4911
  };
@@ -4872,4 +4968,4 @@ export {
4872
4968
  selectTarget,
4873
4969
  runEvalCommand
4874
4970
  };
4875
- //# sourceMappingURL=chunk-5DEZ72J3.js.map
4971
+ //# sourceMappingURL=chunk-5GZJIXTY.js.map