agentv 2.13.0 → 2.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ import {
11
11
  validateEvalFile,
12
12
  validateFileReferences,
13
13
  validateTargetsFile
14
- } from "./chunk-UWDI4UVN.js";
14
+ } from "./chunk-5646K2XJ.js";
15
15
  import {
16
16
  RepoManager,
17
17
  assembleLlmJudgePrompt,
@@ -26,7 +26,7 @@ import {
26
26
  toCamelCaseDeep,
27
27
  toSnakeCaseDeep,
28
28
  trimBaselineResult
29
- } from "./chunk-FSBZM3HT.js";
29
+ } from "./chunk-OQN2GDEU.js";
30
30
  import {
31
31
  __commonJS,
32
32
  __esm,
@@ -3484,7 +3484,7 @@ tests:
3484
3484
  input: "Hello, how are you?"
3485
3485
  expected_output: "I'm doing well, thank you for asking!"
3486
3486
  assert:
3487
- - type: llm_judge
3487
+ - type: llm-judge
3488
3488
  rubric:
3489
3489
  accuracy:
3490
3490
  weight: 0.6
@@ -3763,7 +3763,7 @@ var evalPromptJudgeCommand = command({
3763
3763
  });
3764
3764
  outputs.push({
3765
3765
  name: "default_llm_judge",
3766
- type: "llm_judge",
3766
+ type: "llm-judge",
3767
3767
  status: "prompt_ready",
3768
3768
  prompt: {
3769
3769
  system_prompt: assembly.systemPrompt,
@@ -3781,7 +3781,7 @@ var evalPromptJudgeCommand = command({
3781
3781
  });
3782
3782
  async function processEvaluator(config, evalCase, candidate, promptInputs) {
3783
3783
  switch (config.type) {
3784
- case "code": {
3784
+ case "code-judge": {
3785
3785
  const codeConfig = config;
3786
3786
  const script = codeConfig.command ?? codeConfig.script ?? [];
3787
3787
  const scriptCwd = codeConfig.resolvedCwd ?? codeConfig.cwd;
@@ -3806,14 +3806,14 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
3806
3806
  const parsed = JSON.parse(stdout);
3807
3807
  return {
3808
3808
  name: codeConfig.name,
3809
- type: "code_judge",
3809
+ type: "code-judge",
3810
3810
  status: "completed",
3811
3811
  result: parsed
3812
3812
  };
3813
3813
  } catch (error) {
3814
3814
  return {
3815
3815
  name: codeConfig.name,
3816
- type: "code_judge",
3816
+ type: "code-judge",
3817
3817
  status: "completed",
3818
3818
  result: {
3819
3819
  score: 0,
@@ -3822,7 +3822,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
3822
3822
  };
3823
3823
  }
3824
3824
  }
3825
- case "llm_judge": {
3825
+ case "llm-judge": {
3826
3826
  const llmConfig = config;
3827
3827
  const assembly = assembleLlmJudgePrompt({
3828
3828
  evalCase,
@@ -3832,7 +3832,7 @@ async function processEvaluator(config, evalCase, candidate, promptInputs) {
3832
3832
  });
3833
3833
  return {
3834
3834
  name: llmConfig.name,
3835
- type: "llm_judge",
3835
+ type: "llm-judge",
3836
3836
  status: "prompt_ready",
3837
3837
  prompt: {
3838
3838
  system_prompt: assembly.systemPrompt,
@@ -3898,7 +3898,7 @@ var evalPromptOverviewCommand = command({
3898
3898
  "",
3899
3899
  "The output contains an `evaluators` array. Each evaluator has a `status`:",
3900
3900
  "",
3901
- '- **`"completed"`** \u2014 Score is final (code_judge ran deterministically). Read `result.score` (0.0\u20131.0).',
3901
+ '- **`"completed"`** \u2014 Score is final (code-judge ran deterministically). Read `result.score` (0.0\u20131.0).',
3902
3902
  '- **`"prompt_ready"`** \u2014 LLM grading required. Send `prompt.system_prompt` as system and',
3903
3903
  " `prompt.user_prompt` as user to your LLM. Parse the JSON response to get `score`, `hits`, `misses`.",
3904
3904
  ""
@@ -4087,7 +4087,7 @@ var evalRunCommand = command({
4087
4087
  },
4088
4088
  handler: async (args) => {
4089
4089
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4090
- const { launchInteractiveWizard } = await import("./interactive-P3D5O673.js");
4090
+ const { launchInteractiveWizard } = await import("./interactive-Z6ZV5OGM.js");
4091
4091
  await launchInteractiveWizard();
4092
4092
  return;
4093
4093
  }
@@ -4720,12 +4720,12 @@ var traceListCommand = command({
4720
4720
  var SUPPORTED_TYPES = [
4721
4721
  "contains",
4722
4722
  "regex",
4723
- "is_json",
4723
+ "is-json",
4724
4724
  "equals",
4725
4725
  "latency",
4726
4726
  "cost",
4727
- "token_usage",
4728
- "execution_metrics"
4727
+ "token-usage",
4728
+ "execution-metrics"
4729
4729
  ];
4730
4730
  function parseKeyValues(s) {
4731
4731
  const result = {};
@@ -4739,7 +4739,7 @@ function parseKeyValues(s) {
4739
4739
  }
4740
4740
  function parseAssertSpec(spec) {
4741
4741
  const colonIdx = spec.indexOf(":");
4742
- const type = colonIdx === -1 ? spec : spec.slice(0, colonIdx);
4742
+ const type = (colonIdx === -1 ? spec : spec.slice(0, colonIdx)).replace(/_/g, "-");
4743
4743
  const params = colonIdx === -1 ? "" : spec.slice(colonIdx + 1);
4744
4744
  switch (type) {
4745
4745
  case "contains":
@@ -4748,8 +4748,8 @@ function parseAssertSpec(spec) {
4748
4748
  case "regex":
4749
4749
  if (!params) throw new Error("regex requires a pattern: regex:<pattern>");
4750
4750
  return { name: "regex", type: "regex", value: params };
4751
- case "is_json":
4752
- return { name: "is_json", type: "is_json" };
4751
+ case "is-json":
4752
+ return { name: "is-json", type: "is-json" };
4753
4753
  case "equals":
4754
4754
  if (!params) throw new Error("equals requires a value: equals:<value>");
4755
4755
  return { name: "equals", type: "equals", value: params };
@@ -4765,19 +4765,19 @@ function parseAssertSpec(spec) {
4765
4765
  throw new Error("cost requires a budget in USD: cost:<usd>");
4766
4766
  return { name: "cost", type: "cost", budget };
4767
4767
  }
4768
- case "token_usage": {
4768
+ case "token-usage": {
4769
4769
  const kv = parseKeyValues(params);
4770
- const config = { name: "token_usage", type: "token_usage" };
4770
+ const config = { name: "token-usage", type: "token-usage" };
4771
4771
  if (kv.max_total) config.max_total = Number(kv.max_total);
4772
4772
  if (kv.max_input) config.max_input = Number(kv.max_input);
4773
4773
  if (kv.max_output) config.max_output = Number(kv.max_output);
4774
4774
  return config;
4775
4775
  }
4776
- case "execution_metrics": {
4776
+ case "execution-metrics": {
4777
4777
  const kv = parseKeyValues(params);
4778
4778
  const config = {
4779
- name: "execution_metrics",
4780
- type: "execution_metrics"
4779
+ name: "execution-metrics",
4780
+ type: "execution-metrics"
4781
4781
  };
4782
4782
  if (kv.max_tool_calls) config.max_tool_calls = Number(kv.max_tool_calls);
4783
4783
  if (kv.max_llm_calls) config.max_llm_calls = Number(kv.max_llm_calls);
@@ -4823,7 +4823,7 @@ var stubProvider = {
4823
4823
  }
4824
4824
  };
4825
4825
  var stubLlmJudge = {
4826
- kind: "llm_judge",
4826
+ kind: "llm-judge",
4827
4827
  evaluate() {
4828
4828
  throw new Error("trace score does not support LLM-based evaluators");
4829
4829
  }
@@ -4917,7 +4917,7 @@ var traceScoreCommand = command({
4917
4917
  type: string,
4918
4918
  long: "assert",
4919
4919
  short: "a",
4920
- description: "Evaluator spec: contains:<val>, regex:<pat>, is_json, equals:<val>, latency:<ms>, cost:<usd>, token_usage:<params>, execution_metrics:<params>"
4920
+ description: "Evaluator spec: contains:<val>, regex:<pat>, is-json, equals:<val>, latency:<ms>, cost:<usd>, token-usage:<params>, execution-metrics:<params>"
4921
4921
  }),
4922
4922
  testId: option({
4923
4923
  type: optional(string),
@@ -4952,7 +4952,7 @@ var traceScoreCommand = command({
4952
4952
  console.error(`${c2.yellow}Warning:${c2.reset} No results found in ${file}`);
4953
4953
  process.exit(0);
4954
4954
  }
4955
- const traceRequired = ["latency", "cost", "token_usage", "execution_metrics"].includes(
4955
+ const traceRequired = ["latency", "cost", "token-usage", "execution-metrics"].includes(
4956
4956
  evaluatorConfig.type
4957
4957
  );
4958
4958
  if (traceRequired) {
@@ -5793,4 +5793,4 @@ export {
5793
5793
  preprocessArgv,
5794
5794
  runCli
5795
5795
  };
5796
- //# sourceMappingURL=chunk-M6JYP6A6.js.map
5796
+ //# sourceMappingURL=chunk-YVWP4Z3W.js.map