@agentv/core 3.12.0 → 3.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import {
8
8
  isEvaluatorKind,
9
9
  loadCasesFromFile,
10
10
  resolveFileReference
11
- } from "../../chunk-4XWPXNQM.js";
11
+ } from "../../chunk-ZB3AUPES.js";
12
12
 
13
13
  // src/evaluation/validation/file-type.ts
14
14
  import { readFile } from "node:fs/promises";
package/dist/index.cjs CHANGED
@@ -1380,132 +1380,6 @@ var init_otlp_json_file_exporter = __esm({
1380
1380
  }
1381
1381
  });
1382
1382
 
1383
- // src/observability/simple-trace-file-exporter.ts
1384
- var simple_trace_file_exporter_exports = {};
1385
- __export(simple_trace_file_exporter_exports, {
1386
- SimpleTraceFileExporter: () => SimpleTraceFileExporter
1387
- });
1388
- function hrTimeDiffMs(start, end) {
1389
- const diffSec = end[0] - start[0];
1390
- const diffNano = end[1] - start[1];
1391
- return Math.round(diffSec * 1e3 + diffNano / 1e6);
1392
- }
1393
- var import_node_fs16, import_promises34, import_node_path50, SimpleTraceFileExporter;
1394
- var init_simple_trace_file_exporter = __esm({
1395
- "src/observability/simple-trace-file-exporter.ts"() {
1396
- "use strict";
1397
- import_node_fs16 = require("fs");
1398
- import_promises34 = require("fs/promises");
1399
- import_node_path50 = require("path");
1400
- SimpleTraceFileExporter = class {
1401
- stream = null;
1402
- filePath;
1403
- streamReady = null;
1404
- pendingWrites = [];
1405
- _shuttingDown = false;
1406
- spansByTraceId = /* @__PURE__ */ new Map();
1407
- constructor(filePath) {
1408
- this.filePath = filePath;
1409
- }
1410
- async ensureStream() {
1411
- if (!this.streamReady) {
1412
- this.streamReady = (async () => {
1413
- await (0, import_promises34.mkdir)((0, import_node_path50.dirname)(this.filePath), { recursive: true });
1414
- this.stream = (0, import_node_fs16.createWriteStream)(this.filePath, { flags: "w" });
1415
- return this.stream;
1416
- })();
1417
- }
1418
- return this.streamReady;
1419
- }
1420
- export(spans, resultCallback) {
1421
- if (this._shuttingDown) {
1422
- resultCallback({ code: 0 });
1423
- return;
1424
- }
1425
- const rootSpans = [];
1426
- for (const span of spans) {
1427
- const traceId = span.spanContext().traceId;
1428
- const existing = this.spansByTraceId.get(traceId) ?? [];
1429
- existing.push(span);
1430
- this.spansByTraceId.set(traceId, existing);
1431
- if (span.name === "agentv.eval") {
1432
- rootSpans.push(span);
1433
- }
1434
- }
1435
- const writePromise = this.ensureStream().then((stream) => {
1436
- for (const root of rootSpans) {
1437
- const traceId = root.spanContext().traceId;
1438
- const traceSpans = this.spansByTraceId.get(traceId) ?? [root];
1439
- const children = traceSpans.filter(
1440
- (span) => span.spanContext().spanId !== root.spanContext().spanId
1441
- );
1442
- const record = this.buildSimpleRecord(root, children);
1443
- stream.write(`${JSON.stringify(record)}
1444
- `);
1445
- this.spansByTraceId.delete(traceId);
1446
- }
1447
- });
1448
- this.pendingWrites.push(writePromise);
1449
- resultCallback({ code: 0 });
1450
- }
1451
- async shutdown() {
1452
- this._shuttingDown = true;
1453
- await Promise.all(this.pendingWrites);
1454
- this.pendingWrites = [];
1455
- this.spansByTraceId.clear();
1456
- return new Promise((resolve) => {
1457
- if (this.stream) {
1458
- this.stream.end(() => resolve());
1459
- } else {
1460
- resolve();
1461
- }
1462
- });
1463
- }
1464
- async forceFlush() {
1465
- await Promise.all(this.pendingWrites);
1466
- this.pendingWrites = [];
1467
- }
1468
- buildSimpleRecord(root, children) {
1469
- const attrs = root.attributes || {};
1470
- const durationMs = typeof attrs["agentv.trace.duration_ms"] === "number" ? attrs["agentv.trace.duration_ms"] : hrTimeDiffMs(root.startTime, root.endTime);
1471
- let inputTokens = 0;
1472
- let outputTokens = 0;
1473
- for (const child of children) {
1474
- const ca = child.attributes || {};
1475
- if (ca["gen_ai.usage.input_tokens"]) inputTokens += ca["gen_ai.usage.input_tokens"];
1476
- if (ca["gen_ai.usage.output_tokens"]) outputTokens += ca["gen_ai.usage.output_tokens"];
1477
- }
1478
- const rootInputTokens = typeof attrs["agentv.trace.token_input"] === "number" ? attrs["agentv.trace.token_input"] : 0;
1479
- const rootOutputTokens = typeof attrs["agentv.trace.token_output"] === "number" ? attrs["agentv.trace.token_output"] : 0;
1480
- const rootCachedTokens = typeof attrs["agentv.trace.token_cached"] === "number" ? attrs["agentv.trace.token_cached"] : void 0;
1481
- const llmSpans = children.filter((s) => s.attributes?.["gen_ai.operation.name"] === "chat").map((s) => ({
1482
- type: "llm",
1483
- name: s.name,
1484
- duration_ms: hrTimeDiffMs(s.startTime, s.endTime)
1485
- }));
1486
- const toolSpans = children.filter((s) => s.attributes?.["gen_ai.tool.name"]).map((s) => ({
1487
- type: "tool",
1488
- name: s.attributes["gen_ai.tool.name"],
1489
- duration_ms: hrTimeDiffMs(s.startTime, s.endTime)
1490
- }));
1491
- return {
1492
- test_id: attrs["agentv.test_id"],
1493
- target: attrs["agentv.target"],
1494
- score: attrs["agentv.score"],
1495
- duration_ms: durationMs,
1496
- cost_usd: attrs["agentv.trace.cost_usd"],
1497
- token_usage: inputTokens || outputTokens || rootInputTokens || rootOutputTokens || rootCachedTokens ? {
1498
- input: inputTokens || rootInputTokens,
1499
- output: outputTokens || rootOutputTokens,
1500
- ...rootCachedTokens ? { cached: rootCachedTokens } : {}
1501
- } : void 0,
1502
- spans: [...llmSpans, ...toolSpans].length > 0 ? [...llmSpans, ...toolSpans] : void 0
1503
- };
1504
- }
1505
- };
1506
- }
1507
- });
1508
-
1509
1383
  // src/index.ts
1510
1384
  var index_exports = {};
1511
1385
  __export(index_exports, {
@@ -1529,7 +1403,6 @@ __export(index_exports, {
1529
1403
  ProviderRegistry: () => ProviderRegistry,
1530
1404
  RepoManager: () => RepoManager,
1531
1405
  ResponseCache: () => ResponseCache,
1532
- SimpleTraceFileExporter: () => SimpleTraceFileExporter,
1533
1406
  SkillTriggerEvaluator: () => SkillTriggerEvaluator,
1534
1407
  TEST_MESSAGE_ROLES: () => TEST_MESSAGE_ROLES,
1535
1408
  TemplateNotDirectoryError: () => TemplateNotDirectoryError,
@@ -1708,8 +1581,6 @@ function isTestMessage(value) {
1708
1581
  var EVALUATOR_KIND_VALUES = [
1709
1582
  "code-grader",
1710
1583
  "llm-grader",
1711
- "code-judge",
1712
- "llm-judge",
1713
1584
  "rubric",
1714
1585
  "composite",
1715
1586
  "tool-trajectory",
@@ -2460,12 +2331,6 @@ function parseExecutionDefaults(raw, configPath) {
2460
2331
  } else if (obj.verbose !== void 0) {
2461
2332
  logWarning(`Invalid execution.verbose in ${configPath}, expected boolean`);
2462
2333
  }
2463
- const traceFile = obj.trace_file;
2464
- if (typeof traceFile === "string" && traceFile.trim().length > 0) {
2465
- result.trace_file = traceFile.trim();
2466
- } else if (traceFile !== void 0) {
2467
- logWarning(`Invalid execution.trace_file in ${configPath}, expected non-empty string`);
2468
- }
2469
2334
  if (typeof obj.keep_workspaces === "boolean") {
2470
2335
  result.keep_workspaces = obj.keep_workspaces;
2471
2336
  } else if (obj.keep_workspaces !== void 0) {
@@ -2582,6 +2447,9 @@ var ANSI_RESET5 = "\x1B[0m";
2582
2447
  function normalizeEvaluatorType(type) {
2583
2448
  return type.replace(/_/g, "-");
2584
2449
  }
2450
+ function isDeprecatedJudgeType(type) {
2451
+ return type === "code-judge" || type === "llm-judge";
2452
+ }
2585
2453
  async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
2586
2454
  const execution = rawEvalCase.execution;
2587
2455
  const executionObject = isJsonObject2(execution) ? execution : void 0;
@@ -2644,6 +2512,12 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2644
2512
  const rawName = asString(rawEvaluator.name);
2645
2513
  const rawType = rawEvaluator.type;
2646
2514
  const typeValue = typeof rawType === "string" ? normalizeEvaluatorType(rawType) : rawType;
2515
+ if (typeof typeValue === "string" && isDeprecatedJudgeType(typeValue)) {
2516
+ logWarning2(
2517
+ `Skipping evaluator '${rawName ?? "<unnamed>"}' in '${evalId}': '${rawType}' is deprecated. Use '${typeValue.replace("-judge", "-grader")}' instead`
2518
+ );
2519
+ continue;
2520
+ }
2647
2521
  const isCustomType = typeof typeValue === "string" && !isEvaluatorKind(typeValue);
2648
2522
  if (typeof typeValue !== "string") {
2649
2523
  logWarning2(`Skipping evaluator with invalid type in '${evalId}'`);
@@ -2676,7 +2550,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2676
2550
  });
2677
2551
  continue;
2678
2552
  }
2679
- if (typeValue === "code-grader" || typeValue === "code-judge") {
2553
+ if (typeValue === "code-grader") {
2680
2554
  let command;
2681
2555
  if (rawEvaluator.script !== void 0 && rawEvaluator.command === void 0) {
2682
2556
  console.warn(
@@ -2786,7 +2660,14 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2786
2660
  continue;
2787
2661
  }
2788
2662
  const aggregatorType = asString(rawAggregator.type);
2789
- if (aggregatorType !== "weighted_average" && aggregatorType !== "code-grader" && aggregatorType !== "code-judge" && aggregatorType !== "llm-grader" && aggregatorType !== "llm-judge" && aggregatorType !== "threshold") {
2663
+ const normalizedAggregatorType = typeof aggregatorType === "string" ? aggregatorType === "weighted_average" || aggregatorType === "threshold" ? aggregatorType : normalizeEvaluatorType(aggregatorType) : aggregatorType;
2664
+ if (typeof normalizedAggregatorType === "string" && isDeprecatedJudgeType(normalizedAggregatorType)) {
2665
+ logWarning2(
2666
+ `Skipping composite evaluator '${name}' in '${evalId}': aggregator type '${aggregatorType}' is deprecated. Use '${normalizedAggregatorType.replace("-judge", "-grader")}' instead`
2667
+ );
2668
+ continue;
2669
+ }
2670
+ if (normalizedAggregatorType !== "weighted_average" && normalizedAggregatorType !== "code-grader" && normalizedAggregatorType !== "llm-grader" && normalizedAggregatorType !== "threshold") {
2790
2671
  logWarning2(
2791
2672
  `Skipping composite evaluator '${name}' in '${evalId}': invalid aggregator type '${aggregatorType}'`
2792
2673
  );
@@ -2821,7 +2702,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2821
2702
  continue;
2822
2703
  }
2823
2704
  let aggregator;
2824
- if (aggregatorType === "weighted_average") {
2705
+ if (normalizedAggregatorType === "weighted_average") {
2825
2706
  const weights = isJsonObject2(rawAggregator.weights) ? rawAggregator.weights : void 0;
2826
2707
  const parsedWeights = {};
2827
2708
  if (weights) {
@@ -2835,7 +2716,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2835
2716
  type: "weighted_average",
2836
2717
  ...Object.keys(parsedWeights).length > 0 ? { weights: parsedWeights } : {}
2837
2718
  };
2838
- } else if (aggregatorType === "code-grader" || aggregatorType === "code-judge") {
2719
+ } else if (normalizedAggregatorType === "code-grader") {
2839
2720
  const aggregatorPath = asString(rawAggregator.path);
2840
2721
  if (!aggregatorPath) {
2841
2722
  logWarning2(
@@ -2848,7 +2729,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
2848
2729
  path: aggregatorPath,
2849
2730
  cwd: searchRoots[0]
2850
2731
  };
2851
- } else if (aggregatorType === "threshold") {
2732
+ } else if (normalizedAggregatorType === "threshold") {
2852
2733
  const thresholdValue = rawAggregator.threshold;
2853
2734
  if (typeof thresholdValue !== "number" || thresholdValue < 0 || thresholdValue > 1) {
2854
2735
  logWarning2(
@@ -3596,10 +3477,15 @@ function coerceEvaluator(candidate, contextId) {
3596
3477
  return void 0;
3597
3478
  }
3598
3479
  const normalized = normalizeEvaluatorType(candidate);
3480
+ if (isDeprecatedJudgeType(normalized)) {
3481
+ throw new Error(
3482
+ `Unsupported grader '${candidate}' in ${contextId}. Use '${normalized.replace("-judge", "-grader")}' instead.`
3483
+ );
3484
+ }
3599
3485
  if (isEvaluatorKind(normalized)) {
3600
3486
  return normalized;
3601
3487
  }
3602
- logWarning2(`Unknown evaluator '${candidate}' in ${contextId}, falling back to default`);
3488
+ logWarning2(`Unknown grader '${candidate}' in ${contextId}, falling back to default`);
3603
3489
  return void 0;
3604
3490
  }
3605
3491
  function asString(value) {
@@ -5032,9 +4918,7 @@ function assertionToNaturalLanguage(entry) {
5032
4918
  case "ends_with":
5033
4919
  return `Output ends with '${entry.value}'`;
5034
4920
  case "llm-grader":
5035
- case "llm_grader":
5036
- case "llm-judge":
5037
- case "llm_judge": {
4921
+ case "llm_grader": {
5038
4922
  if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
5039
4923
  return null;
5040
4924
  }
@@ -5047,9 +4931,7 @@ function assertionToNaturalLanguage(entry) {
5047
4931
  return tools ? `Agent called tools in order: ${tools}` : "Agent followed expected tool trajectory";
5048
4932
  }
5049
4933
  case "code-grader":
5050
- case "code_grader":
5051
- case "code-judge":
5052
- case "code_judge": {
4934
+ case "code_grader": {
5053
4935
  const graderName = entry.name ?? deriveGraderNameFromCommand(entry.command) ?? "code-grader";
5054
4936
  const desc = typeof entry.description === "string" ? entry.description : void 0;
5055
4937
  return codeGraderInstruction(graderName, desc);
@@ -5080,7 +4962,7 @@ function assertionToNaturalLanguage(entry) {
5080
4962
  }
5081
4963
  }
5082
4964
  function assertionToNaturalLanguageList(entry) {
5083
- if (entry.type === "llm-grader" || entry.type === "llm_grader" || entry.type === "llm-judge" || entry.type === "llm_judge") {
4965
+ if (entry.type === "llm-grader" || entry.type === "llm_grader") {
5084
4966
  if (Array.isArray(entry.rubrics) && entry.rubrics.length > 0) {
5085
4967
  return entry.rubrics.map((r) => r.outcome ?? r.criteria ?? r.id).filter((s) => typeof s === "string");
5086
4968
  }
@@ -12810,10 +12692,26 @@ function extractJsonBlob(text) {
12810
12692
  const match = text.match(/\{[\s\S]*\}/);
12811
12693
  return match?.[0];
12812
12694
  }
12695
+ function repairSchemaNearBooleanFields(text) {
12696
+ return text.replace(
12697
+ /("passed"\s*:\s*)(?:"([^"]+)"|([A-Za-z_][A-Za-z0-9_-]*))/gi,
12698
+ (_match, prefix, quotedValue, bareValue) => {
12699
+ const value = (quotedValue ?? bareValue ?? "").trim().toLowerCase();
12700
+ if (value === "true") {
12701
+ return `${prefix}true`;
12702
+ }
12703
+ if (value === "false") {
12704
+ return `${prefix}false`;
12705
+ }
12706
+ return `${prefix}false`;
12707
+ }
12708
+ );
12709
+ }
12813
12710
  function parseJsonFromText(text) {
12814
12711
  const cleaned = typeof text === "string" ? text.replace(/```json\n?|```/g, "").trim() : "";
12815
12712
  const blob = extractJsonBlob(cleaned) ?? cleaned;
12816
- return JSON.parse(blob);
12713
+ const repaired = repairSchemaNearBooleanFields(blob);
12714
+ return JSON.parse(repaired);
12817
12715
  }
12818
12716
  function isNonEmptyString(value) {
12819
12717
  return typeof value === "string" && value.trim().length > 0;
@@ -12960,12 +12858,12 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
12960
12858
  });
12961
12859
  }
12962
12860
  async function execShellWithStdin(command, stdinPayload, options = {}) {
12963
- const { mkdir: mkdir18, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12861
+ const { mkdir: mkdir17, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12964
12862
  const { tmpdir: tmpdir3 } = await import("os");
12965
12863
  const path48 = await import("path");
12966
12864
  const { randomUUID: randomUUID10 } = await import("crypto");
12967
12865
  const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
12968
- await mkdir18(dir, { recursive: true });
12866
+ await mkdir17(dir, { recursive: true });
12969
12867
  const stdinPath = path48.join(dir, "stdin.txt");
12970
12868
  const stdoutPath = path48.join(dir, "stdout.txt");
12971
12869
  const stderrPath = path48.join(dir, "stderr.txt");
@@ -13285,7 +13183,7 @@ function toCamelCaseDeep(obj) {
13285
13183
  // src/evaluation/evaluators/code-evaluator.ts
13286
13184
  var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
13287
13185
  var CodeEvaluator = class {
13288
- kind = "code-judge";
13186
+ kind = "code-grader";
13289
13187
  command;
13290
13188
  cwd;
13291
13189
  agentTimeoutMs;
@@ -13304,7 +13202,7 @@ var CodeEvaluator = class {
13304
13202
  if (outputForPayload) {
13305
13203
  const serialized = JSON.stringify(outputForPayload);
13306
13204
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
13307
- const tmpDir = await (0, import_promises26.mkdtemp)((0, import_node_path36.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
13205
+ const tmpDir = await (0, import_promises26.mkdtemp)((0, import_node_path36.join)((0, import_node_os5.tmpdir)(), "agentv-grader-"));
13308
13206
  outputPath = (0, import_node_path36.join)(tmpDir, "output.json");
13309
13207
  await (0, import_promises26.writeFile)(outputPath, serialized);
13310
13208
  outputForPayload = null;
@@ -13594,7 +13492,7 @@ var LlmGraderEvaluator = class {
13594
13492
  return this.evaluateWithDelegatedAgent(context2, graderProvider);
13595
13493
  }
13596
13494
  const config = context2.evaluator;
13597
- if ((config?.type === "llm-grader" || config?.type === "llm-judge") && config.rubrics && config.rubrics.length > 0) {
13495
+ if (config?.type === "llm-grader" && config.rubrics && config.rubrics.length > 0) {
13598
13496
  return this.evaluateWithRubrics(context2, graderProvider, config.rubrics);
13599
13497
  }
13600
13498
  return this.evaluateFreeform(context2, graderProvider);
@@ -13779,7 +13677,7 @@ ${context2.fileChanges}`;
13779
13677
  const systemPrompt = this.buildAgentSystemPrompt(context2);
13780
13678
  const userPrompt = this.buildAgentUserPrompt(context2);
13781
13679
  const config = context2.evaluator;
13782
- const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
13680
+ const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
13783
13681
  const fsTools = createFilesystemTools(workspacePath);
13784
13682
  const evaluatorRawRequest = {
13785
13683
  mode: "built-in",
@@ -13875,7 +13773,7 @@ ${context2.fileChanges}`;
13875
13773
  };
13876
13774
  }
13877
13775
  const config = context2.evaluator;
13878
- const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
13776
+ const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
13879
13777
  const details = {
13880
13778
  mode: modeLabel,
13881
13779
  grader_target: provider.targetName
@@ -13915,7 +13813,7 @@ ${context2.fileChanges}`;
13915
13813
  */
13916
13814
  buildAgentSystemPrompt(context2) {
13917
13815
  const config = context2.evaluator;
13918
- const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
13816
+ const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
13919
13817
  const parts = [
13920
13818
  "You are an expert evaluator with access to the workspace filesystem.",
13921
13819
  "Use the provided tools to investigate the workspace and verify the criteria are met.",
@@ -13946,7 +13844,7 @@ ${context2.fileChanges}`;
13946
13844
  return substituteVariables(this.evaluatorTemplate, variables);
13947
13845
  }
13948
13846
  const config = context2.evaluator;
13949
- const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
13847
+ const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
13950
13848
  const parts = [
13951
13849
  "Evaluate the candidate answer by investigating the workspace.",
13952
13850
  "",
@@ -13989,7 +13887,7 @@ ${context2.fileChanges}`;
13989
13887
  buildDelegatedPrompt(context2) {
13990
13888
  const formattedQuestion = context2.promptInputs.question && context2.promptInputs.question.trim().length > 0 ? context2.promptInputs.question : context2.evalCase.question;
13991
13889
  const config = context2.evaluator;
13992
- const rubrics = config?.type === "llm-grader" || config?.type === "llm-judge" ? config.rubrics : void 0;
13890
+ const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
13993
13891
  if (this.evaluatorTemplate) {
13994
13892
  const variables = {
13995
13893
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
@@ -14486,10 +14384,8 @@ var CompositeEvaluator = class {
14486
14384
  const aggregator = this.config.aggregator;
14487
14385
  switch (aggregator.type) {
14488
14386
  case "code-grader":
14489
- case "code-judge":
14490
14387
  return this.runCodeAggregator(results, aggregator.path, aggregator.cwd ?? this.cwd);
14491
14388
  case "llm-grader":
14492
- case "llm-judge":
14493
14389
  return this.runLlmAggregator(results, context2, aggregator);
14494
14390
  case "threshold":
14495
14391
  return this.runThreshold(results, aggregator.threshold);
@@ -16911,7 +16807,7 @@ var endsWithFactory = (config) => {
16911
16807
  };
16912
16808
  function createBuiltinRegistry() {
16913
16809
  const registry = new EvaluatorRegistry();
16914
- registry.register("llm-grader", llmGraderFactory).register("llm-judge", llmGraderFactory).register("code-grader", codeFactory).register("code-judge", codeFactory).register("composite", compositeFactory).register("tool-trajectory", toolTrajectoryFactory).register("field-accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token-usage", tokenUsageFactory).register("execution-metrics", executionMetricsFactory).register("skill-trigger", skillTriggerFactory).register("contains", containsFactory).register("contains-any", containsAnyFactory).register("contains-all", containsAllFactory).register("icontains", icontainsFactory).register("icontains-any", icontainsAnyFactory).register("icontains-all", icontainsAllFactory).register("starts-with", startsWithFactory).register("ends-with", endsWithFactory).register("regex", regexFactory).register("is-json", isJsonFactory).register("equals", equalsFactory).register("inline-assert", (config) => {
16810
+ registry.register("llm-grader", llmGraderFactory).register("code-grader", codeFactory).register("composite", compositeFactory).register("tool-trajectory", toolTrajectoryFactory).register("field-accuracy", fieldAccuracyFactory).register("latency", latencyFactory).register("cost", costFactory).register("token-usage", tokenUsageFactory).register("execution-metrics", executionMetricsFactory).register("skill-trigger", skillTriggerFactory).register("contains", containsFactory).register("contains-any", containsAnyFactory).register("contains-all", containsAllFactory).register("icontains", icontainsFactory).register("icontains-any", icontainsAnyFactory).register("icontains-all", icontainsAllFactory).register("starts-with", startsWithFactory).register("ends-with", endsWithFactory).register("regex", regexFactory).register("is-json", isJsonFactory).register("equals", equalsFactory).register("inline-assert", (config) => {
16915
16811
  const fn = config[INLINE_ASSERT_FN];
16916
16812
  if (!fn) {
16917
16813
  throw new Error(
@@ -19629,7 +19525,7 @@ function filterEvalCases(evalCases, filter) {
19629
19525
  return evalCases.filter((evalCase) => import_micromatch3.default.isMatch(evalCase.id, filter));
19630
19526
  }
19631
19527
  function buildEvaluatorRegistry(overrides, resolveGraderProvider) {
19632
- const llmGrader = overrides?.["llm-grader"] ?? overrides?.["llm-judge"] ?? new LlmGraderEvaluator({
19528
+ const llmGrader = overrides?.["llm-grader"] ?? new LlmGraderEvaluator({
19633
19529
  resolveGraderProvider: async (context2) => {
19634
19530
  if (context2.graderProvider) {
19635
19531
  return context2.graderProvider;
@@ -20061,8 +19957,6 @@ var AgentVConfigSchema = import_zod5.z.object({
20061
19957
  agentTimeoutMs: import_zod5.z.number().int().min(0).optional(),
20062
19958
  /** Enable verbose logging */
20063
19959
  verbose: import_zod5.z.boolean().optional(),
20064
- /** Write human-readable trace JSONL to this path (supports {timestamp} placeholder) */
20065
- traceFile: import_zod5.z.string().optional(),
20066
19960
  /** Always keep temp workspaces after eval */
20067
19961
  keepWorkspaces: import_zod5.z.boolean().optional(),
20068
19962
  /** Write OTLP JSON trace to this path (supports {timestamp} placeholder) */
@@ -20362,12 +20256,6 @@ var OtelTraceExporter = class {
20362
20256
  new SimpleSpanProcessor(new OtlpJsonFileExporter2(this.options.otlpFilePath))
20363
20257
  );
20364
20258
  }
20365
- if (this.options.traceFilePath) {
20366
- const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await Promise.resolve().then(() => (init_simple_trace_file_exporter(), simple_trace_file_exporter_exports));
20367
- processors.push(
20368
- new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
20369
- );
20370
- }
20371
20259
  if (processors.length === 0) {
20372
20260
  return false;
20373
20261
  }
@@ -20481,10 +20369,10 @@ var OtelTraceExporter = class {
20481
20369
  }
20482
20370
  if (result.scores) {
20483
20371
  for (const score of result.scores) {
20484
- rootSpan.addEvent(`agentv.evaluator.${score.name}`, {
20485
- "agentv.evaluator.score": score.score,
20486
- "agentv.evaluator.type": score.type,
20487
- ...score.verdict ? { "agentv.evaluator.verdict": score.verdict } : {}
20372
+ rootSpan.addEvent(`agentv.grader.${score.name}`, {
20373
+ "agentv.grader.score": score.score,
20374
+ "agentv.grader.type": score.type,
20375
+ ...score.verdict ? { "agentv.grader.verdict": score.verdict } : {}
20488
20376
  });
20489
20377
  }
20490
20378
  }
@@ -20795,7 +20683,6 @@ function toHrTime(iso) {
20795
20683
 
20796
20684
  // src/observability/index.ts
20797
20685
  init_otlp_json_file_exporter();
20798
- init_simple_trace_file_exporter();
20799
20686
 
20800
20687
  // src/index.ts
20801
20688
  function createAgentKernel() {
@@ -20823,7 +20710,6 @@ function createAgentKernel() {
20823
20710
  ProviderRegistry,
20824
20711
  RepoManager,
20825
20712
  ResponseCache,
20826
- SimpleTraceFileExporter,
20827
20713
  SkillTriggerEvaluator,
20828
20714
  TEST_MESSAGE_ROLES,
20829
20715
  TemplateNotDirectoryError,