agentv 3.5.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ import {
16
16
  validateEvalFile,
17
17
  validateFileReferences,
18
18
  validateTargetsFile
19
- } from "./chunk-RLL4QGNL.js";
19
+ } from "./chunk-7YS6YNJZ.js";
20
20
  import {
21
21
  createBuiltinRegistry,
22
22
  createProvider,
@@ -34,7 +34,7 @@ import {
34
34
  toSnakeCaseDeep as toSnakeCaseDeep2,
35
35
  transpileEvalYamlFile,
36
36
  trimBaselineResult
37
- } from "./chunk-D6G4N2H2.js";
37
+ } from "./chunk-XGG64VIY.js";
38
38
  import {
39
39
  __commonJS,
40
40
  __esm,
@@ -3493,9 +3493,9 @@ var ASSERTION_TEMPLATES = {
3493
3493
  default: `#!/usr/bin/env bun
3494
3494
  import { defineAssertion } from '@agentv/eval';
3495
3495
 
3496
- export default defineAssertion(({ answer }) => {
3496
+ export default defineAssertion(({ outputText }) => {
3497
3497
  // TODO: Implement your assertion logic
3498
- const pass = answer.length > 0;
3498
+ const pass = outputText.length > 0;
3499
3499
  return {
3500
3500
  pass,
3501
3501
  reasoning: pass ? 'Output has content' : 'Output is empty',
@@ -3505,9 +3505,9 @@ export default defineAssertion(({ answer }) => {
3505
3505
  score: `#!/usr/bin/env bun
3506
3506
  import { defineAssertion } from '@agentv/eval';
3507
3507
 
3508
- export default defineAssertion(({ answer }) => {
3508
+ export default defineAssertion(({ outputText }) => {
3509
3509
  // TODO: Implement your scoring logic (0.0 to 1.0)
3510
- const score = answer.length > 0 ? 1.0 : 0.0;
3510
+ const score = outputText.length > 0 ? 1.0 : 0.0;
3511
3511
  return {
3512
3512
  pass: score >= 0.5,
3513
3513
  score,
@@ -3967,7 +3967,6 @@ var evalAssertCommand = command({
3967
3967
  }
3968
3968
  const payload = JSON.stringify(
3969
3969
  {
3970
- answer: resolvedOutput,
3971
3970
  output: [{ role: "assistant", content: resolvedOutput }],
3972
3971
  input: [{ role: "user", content: resolvedInput }],
3973
3972
  question: resolvedInput,
@@ -4185,7 +4184,7 @@ var evalRunCommand = command({
4185
4184
  },
4186
4185
  handler: async (args) => {
4187
4186
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4188
- const { launchInteractiveWizard } = await import("./interactive-J7SUWZH2.js");
4187
+ const { launchInteractiveWizard } = await import("./interactive-F6XECJ33.js");
4189
4188
  await launchInteractiveWizard();
4190
4189
  return;
4191
4190
  }
@@ -4707,10 +4706,10 @@ function exportResults(sourceFile, content, outputDir) {
4707
4706
  const outputsDir = path8.join(outputDir, "outputs");
4708
4707
  mkdirSync2(outputsDir, { recursive: true });
4709
4708
  for (const result of patched) {
4710
- const answer = result.answer;
4711
- if (answer) {
4709
+ const outputText = result.outputText;
4710
+ if (outputText) {
4712
4711
  const id = safeTestId(result);
4713
- writeFileSync3(path8.join(outputsDir, `${id}.txt`), answer);
4712
+ writeFileSync3(path8.join(outputsDir, `${id}.txt`), outputText);
4714
4713
  }
4715
4714
  }
4716
4715
  }
@@ -5022,7 +5021,7 @@ function toTraceSummary(raw) {
5022
5021
  return toCamelCaseDeep(raw.trace);
5023
5022
  }
5024
5023
  function extractCandidate(raw) {
5025
- if (raw.answer !== void 0) return raw.answer;
5024
+ if (raw.output_text !== void 0) return raw.output_text;
5026
5025
  if (raw.output !== void 0)
5027
5026
  return typeof raw.output === "string" ? raw.output : JSON.stringify(raw.output);
5028
5027
  return "";
@@ -5221,9 +5220,8 @@ var traceScoreCommand = command({
5221
5220
  function renderFlatTrace(result) {
5222
5221
  const trace = result.trace;
5223
5222
  const parts = [];
5224
- if (trace?.tool_names && trace.tool_names.length > 0) {
5225
- const toolParts = trace.tool_names.map((name) => {
5226
- const count = trace.tool_calls_by_name?.[name] ?? 0;
5223
+ if (trace?.tool_calls && Object.keys(trace.tool_calls).length > 0) {
5224
+ const toolParts = Object.entries(trace.tool_calls).map(([name, count]) => {
5227
5225
  return count > 1 ? `${name} \xD7${count}` : name;
5228
5226
  });
5229
5227
  parts.push(`Tools: ${toolParts.join(", ")}`);
@@ -6278,4 +6276,4 @@ export {
6278
6276
  preprocessArgv,
6279
6277
  runCli
6280
6278
  };
6281
- //# sourceMappingURL=chunk-5GG6DDP5.js.map
6279
+ //# sourceMappingURL=chunk-TR6H437M.js.map