@agentv/core 4.32.0-next.1 → 4.34.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,16 @@
1
1
  import {
2
2
  LLM_GRADER_CAPABLE_KINDS,
3
+ RUBRIC_OPERATOR_VALUES,
3
4
  buildDirectoryChain,
4
5
  expandFileReferences,
5
6
  extractLastAssistantContent,
6
7
  fileExists,
7
8
  findGitRoot,
9
+ getAgentvConfigDir,
10
+ getAgentvDataDir,
11
+ getSubagentsRoot,
12
+ getWorkspacePoolRoot,
13
+ getWorkspacesRoot,
8
14
  interpolateEnv,
9
15
  interpolateTemplateVars,
10
16
  isAgentProvider,
@@ -18,7 +24,7 @@ import {
18
24
  readTextFile,
19
25
  resolveDelegatedTargetDefinition,
20
26
  resolveTargetDefinition
21
- } from "./chunk-5RQMJZDJ.js";
27
+ } from "./chunk-EW5X2RGJ.js";
22
28
  import {
23
29
  execFileWithStdin,
24
30
  execShellWithStdin
@@ -41,6 +47,49 @@ import { existsSync as existsSync6 } from "node:fs";
41
47
  import path45 from "node:path";
42
48
  import micromatch4 from "micromatch";
43
49
 
50
+ // src/evaluation/cache/response-cache.ts
51
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
52
+ import path from "node:path";
53
+ var DEFAULT_CACHE_PATH = ".agentv/cache";
54
+ var ResponseCache = class {
55
+ cachePath;
56
+ constructor(cachePath) {
57
+ this.cachePath = cachePath ?? DEFAULT_CACHE_PATH;
58
+ }
59
+ async get(key) {
60
+ const filePath = this.keyToPath(key);
61
+ try {
62
+ const data = await readFile(filePath, "utf8");
63
+ return JSON.parse(data);
64
+ } catch {
65
+ return void 0;
66
+ }
67
+ }
68
+ async set(key, value) {
69
+ const filePath = this.keyToPath(key);
70
+ const dir = path.dirname(filePath);
71
+ await mkdir(dir, { recursive: true });
72
+ await writeFile(filePath, JSON.stringify(value, null, 2), "utf8");
73
+ }
74
+ keyToPath(key) {
75
+ const prefix = key.slice(0, 2);
76
+ return path.join(this.cachePath, prefix, `${key}.json`);
77
+ }
78
+ };
79
+ function shouldEnableCache(params) {
80
+ if (params.cliNoCache) return false;
81
+ if (params.cliCache) return true;
82
+ if (params.yamlCache !== void 0) return params.yamlCache;
83
+ return params.tsConfigCache === true;
84
+ }
85
+ function shouldSkipCacheForTemperature(targetConfig) {
86
+ const temp = targetConfig.temperature;
87
+ if (typeof temp === "number" && temp > 0) {
88
+ return true;
89
+ }
90
+ return false;
91
+ }
92
+
44
93
  // src/evaluation/graders/scoring.ts
45
94
  var DEFAULT_THRESHOLD = 0.8;
46
95
  var PASS_THRESHOLD = DEFAULT_THRESHOLD;
@@ -133,7 +182,7 @@ function negateScore(score) {
133
182
  import { execFile as execFile3 } from "node:child_process";
134
183
  import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
135
184
  import { existsSync as existsSync5 } from "node:fs";
136
- import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir8, stat as stat9 } from "node:fs/promises";
185
+ import { copyFile as copyFile2, mkdir as mkdir15, readdir as readdir8, stat as stat9 } from "node:fs/promises";
137
186
  import path44 from "node:path";
138
187
  import { promisify as promisify7 } from "node:util";
139
188
  import micromatch3 from "micromatch";
@@ -277,38 +326,8 @@ function validateConcurrency(concurrency) {
277
326
  }
278
327
  }
279
328
 
280
- // src/paths.ts
281
- import os from "node:os";
282
- import path from "node:path";
283
- function readEnvPath(name) {
284
- const value = process.env[name];
285
- if (!value || value === "undefined") return void 0;
286
- return value;
287
- }
288
- function getAgentvConfigDir() {
289
- return readEnvPath("AGENTV_HOME") ?? path.join(os.homedir(), ".agentv");
290
- }
291
- function getAgentvHome() {
292
- return getAgentvConfigDir();
293
- }
294
- function getAgentvDataDir() {
295
- return readEnvPath("AGENTV_DATA_DIR") ?? getAgentvConfigDir();
296
- }
297
- function getWorkspacesRoot() {
298
- return path.join(getAgentvDataDir(), "workspaces");
299
- }
300
- function getSubagentsRoot() {
301
- return path.join(getAgentvDataDir(), "subagents");
302
- }
303
- function getTraceStateRoot() {
304
- return path.join(getAgentvDataDir(), "trace-state");
305
- }
306
- function getWorkspacePoolRoot() {
307
- return path.join(getAgentvDataDir(), "workspace-pool");
308
- }
309
-
310
329
  // src/evaluation/graders/code-grader.ts
311
- import { mkdtemp, rm, writeFile } from "node:fs/promises";
330
+ import { mkdtemp, rm, writeFile as writeFile2 } from "node:fs/promises";
312
331
  import { tmpdir } from "node:os";
313
332
  import { dirname, join } from "node:path";
314
333
 
@@ -642,7 +661,7 @@ async function materializeContentForGrader(messages, getWorkDir) {
642
661
  const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
643
662
  const dir = await getWorkDir();
644
663
  const filePath = join(dir, `img-${counter++}.${ext}`);
645
- await writeFile(filePath, Buffer.from(base64Data, "base64"));
664
+ await writeFile2(filePath, Buffer.from(base64Data, "base64"));
646
665
  blocks.push({ type: "image", media_type: img.media_type, path: filePath });
647
666
  } else {
648
667
  blocks.push({ type: "image", media_type: img.media_type, path: img.source });
@@ -685,7 +704,7 @@ var CodeGrader = class {
685
704
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
686
705
  const tmpDir = await mkdtemp(join(tmpdir(), "agentv-grader-"));
687
706
  outputPath = join(tmpDir, "output.json");
688
- await writeFile(outputPath, serialized);
707
+ await writeFile2(outputPath, serialized);
689
708
  outputForPayload = null;
690
709
  }
691
710
  }
@@ -702,6 +721,7 @@ var CodeGrader = class {
702
721
  context.evalCase.input,
703
722
  getImageDir
704
723
  ),
724
+ metadata: context.evalCase.metadata ?? null,
705
725
  trace: context.trace ?? null,
706
726
  tokenUsage: context.tokenUsage ?? null,
707
727
  costUsd: context.costUsd ?? null,
@@ -874,7 +894,7 @@ import path3 from "node:path";
874
894
  import { z } from "zod";
875
895
 
876
896
  // src/evaluation/content-preprocessor.ts
877
- import { readFile } from "node:fs/promises";
897
+ import { readFile as readFile2 } from "node:fs/promises";
878
898
  import path2 from "node:path";
879
899
  import { fileURLToPath } from "node:url";
880
900
  var MIME_TYPE_ALIASES = {
@@ -943,7 +963,7 @@ async function preprocessContentFile(block, preprocessors, basePath) {
943
963
  return runContentPreprocessor(block, resolvedPath, preprocessor);
944
964
  }
945
965
  try {
946
- const buffer = await readFile(resolvedPath);
966
+ const buffer = await readFile2(resolvedPath);
947
967
  const text = buffer.toString("utf8").replace(/\r\n/g, "\n");
948
968
  if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) {
949
969
  return {
@@ -1039,6 +1059,10 @@ ${text}`;
1039
1059
  var TEMPLATE_VARIABLES = {
1040
1060
  EXPECTED_OUTPUT: "expected_output",
1041
1061
  CRITERIA: "criteria",
1062
+ METADATA: "metadata",
1063
+ METADATA_JSON: "metadata_json",
1064
+ RUBRICS: "rubrics",
1065
+ RUBRICS_JSON: "rubrics_json",
1042
1066
  INPUT: "input",
1043
1067
  OUTPUT: "output",
1044
1068
  FILE_CHANGES: "file_changes",
@@ -1061,6 +1085,27 @@ var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
1061
1085
  [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT, TEMPLATE_VARIABLES.EXPECTED_OUTPUT]
1062
1086
  ]);
1063
1087
 
1088
+ // src/evaluation/graders/rubric-operators.ts
1089
+ var OPERATOR_GUIDANCE = {
1090
+ correctness: "Correctness: mark satisfied only when the answer positively supports or fulfills the outcome. Omission or contradiction should not satisfy it.",
1091
+ contradiction: "Contradiction guard: mark satisfied when the answer does not make a claim that contradicts the outcome. Do not require the answer to mention the outcome; mark unsatisfied only for incompatible claims."
1092
+ };
1093
+ function formatRubricOperatorLabel(operator) {
1094
+ return operator ? ` (operator: ${operator})` : "";
1095
+ }
1096
+ function formatRubricOperatorGuidance(rubrics) {
1097
+ const operators = /* @__PURE__ */ new Set();
1098
+ for (const rubric of rubrics) {
1099
+ if (rubric.operator) {
1100
+ operators.add(rubric.operator);
1101
+ }
1102
+ }
1103
+ if (operators.size === 0) {
1104
+ return [];
1105
+ }
1106
+ return [...operators].map((operator) => OPERATOR_GUIDANCE[operator]);
1107
+ }
1108
+
1064
1109
  // src/evaluation/graders/llm-grader.ts
1065
1110
  var DEFAULT_MAX_STEPS = 10;
1066
1111
  var MAX_STEPS_LIMIT = 50;
@@ -1143,6 +1188,32 @@ var scoreRangeEvaluationSchema = z.object({
1143
1188
  checks: z.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
1144
1189
  overall_reasoning: z.string().describe("Overall assessment summary (1-2 sentences)").optional()
1145
1190
  });
1191
+ function stringifyPretty(value) {
1192
+ return value === void 0 ? "" : JSON.stringify(value, null, 2);
1193
+ }
1194
+ function stringifyCompact(value) {
1195
+ return value === void 0 ? "" : JSON.stringify(value);
1196
+ }
1197
+ function buildTemplateVariables(context) {
1198
+ const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
1199
+ const rubrics = context.evaluator?.type === "llm-grader" ? context.evaluator.rubrics : void 0;
1200
+ return {
1201
+ [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
1202
+ [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
1203
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
1204
+ [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
1205
+ [TEMPLATE_VARIABLES.METADATA]: stringifyPretty(context.evalCase.metadata),
1206
+ [TEMPLATE_VARIABLES.METADATA_JSON]: stringifyCompact(context.evalCase.metadata),
1207
+ [TEMPLATE_VARIABLES.RUBRICS]: stringifyPretty(rubrics),
1208
+ [TEMPLATE_VARIABLES.RUBRICS_JSON]: stringifyCompact(rubrics),
1209
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
1210
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
1211
+ // Deprecated aliases — same values as the primary variables above
1212
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
1213
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
1214
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
1215
+ };
1216
+ }
1146
1217
  function resolveContentBasePath(context) {
1147
1218
  if (context.workspacePath) {
1148
1219
  return context.workspacePath;
@@ -1214,19 +1285,7 @@ var LlmGrader = class {
1214
1285
  // LLM mode (existing)
1215
1286
  // ---------------------------------------------------------------------------
1216
1287
  async evaluateFreeform(context, graderProvider) {
1217
- const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
1218
- const variables = {
1219
- [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
1220
- [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
1221
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
1222
- [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
1223
- [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
1224
- [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
1225
- // Deprecated aliases — same values as the primary variables above
1226
- [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
1227
- [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
1228
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
1229
- };
1288
+ const variables = buildTemplateVariables(context);
1230
1289
  const systemPrompt = buildOutputSchema();
1231
1290
  const graderTemplate = context.graderTemplateOverride ?? this.graderTemplate ?? DEFAULT_GRADER_TEMPLATE;
1232
1291
  warnDeprecatedTemplateVars(graderTemplate);
@@ -1293,7 +1352,7 @@ ${context.toolCalls}`;
1293
1352
  if (hasScoreRanges) {
1294
1353
  return this.evaluateWithScoreRanges(context, graderProvider, rubrics);
1295
1354
  }
1296
- const prompt = this.buildRubricPrompt(context, rubrics);
1355
+ const prompt = context.graderTemplateOverride || this.graderTemplate ? this.buildCustomPrompt(context) : this.buildRubricPrompt(context, rubrics);
1297
1356
  const systemPrompt = buildRubricOutputSchema();
1298
1357
  const graderRawRequest = {
1299
1358
  userPrompt: prompt,
@@ -1338,7 +1397,7 @@ ${context.toolCalls}`;
1338
1397
  * Each criterion is scored 0-10 and normalized to 0-1.
1339
1398
  */
1340
1399
  async evaluateWithScoreRanges(context, graderProvider, rubrics) {
1341
- const prompt = this.buildScoreRangePrompt(context, rubrics);
1400
+ const prompt = context.graderTemplateOverride || this.graderTemplate ? this.buildCustomPrompt(context) : this.buildScoreRangePrompt(context, rubrics);
1342
1401
  const systemPrompt = buildScoreRangeOutputSchema();
1343
1402
  const graderRawRequest = {
1344
1403
  userPrompt: prompt,
@@ -1557,21 +1616,11 @@ ${context.toolCalls}`;
1557
1616
  */
1558
1617
  buildAgentUserPrompt(context) {
1559
1618
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
1560
- const variables = {
1561
- [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
1562
- [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
1563
- [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
1564
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
1565
- [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
1566
- [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
1567
- // Deprecated aliases
1568
- [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
1569
- [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
1570
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
1571
- };
1572
- if (this.graderTemplate) {
1573
- warnDeprecatedTemplateVars(this.graderTemplate);
1574
- return substituteVariables(this.graderTemplate, variables);
1619
+ const variables = buildTemplateVariables(context);
1620
+ const template = context.graderTemplateOverride ?? this.graderTemplate;
1621
+ if (template) {
1622
+ warnDeprecatedTemplateVars(template);
1623
+ return substituteVariables(template, variables);
1575
1624
  }
1576
1625
  const config = context.evaluator;
1577
1626
  const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
@@ -1621,21 +1670,11 @@ ${context.toolCalls}`;
1621
1670
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
1622
1671
  const config = context.evaluator;
1623
1672
  const rubrics = config?.type === "llm-grader" ? config.rubrics : void 0;
1624
- if (this.graderTemplate) {
1625
- const variables = {
1626
- [TEMPLATE_VARIABLES.CRITERIA]: context.evalCase.criteria.trim(),
1627
- [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
1628
- [TEMPLATE_VARIABLES.OUTPUT]: context.candidate.trim(),
1629
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (context.evalCase.reference_answer ?? "").trim(),
1630
- [TEMPLATE_VARIABLES.FILE_CHANGES]: context.fileChanges ?? "",
1631
- [TEMPLATE_VARIABLES.TOOL_CALLS]: context.toolCalls ?? "",
1632
- // Deprecated aliases
1633
- [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
1634
- [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context.candidate.trim(),
1635
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context.evalCase.reference_answer ?? "").trim()
1636
- };
1637
- warnDeprecatedTemplateVars(this.graderTemplate);
1638
- const customPrompt = substituteVariables(this.graderTemplate, variables);
1673
+ const template = context.graderTemplateOverride ?? this.graderTemplate;
1674
+ if (template) {
1675
+ const variables = buildTemplateVariables(context);
1676
+ warnDeprecatedTemplateVars(template);
1677
+ const customPrompt = substituteVariables(template, variables);
1639
1678
  const outputSchema = rubrics && rubrics.length > 0 ? buildRubricOutputSchema() : buildOutputSchema();
1640
1679
  return `${customPrompt}
1641
1680
 
@@ -1761,6 +1800,9 @@ ${outputSchema}`;
1761
1800
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
1762
1801
  const minScoreLabel = rubric.min_score !== void 0 ? ` [REQUIRED: min score ${rubric.min_score}]` : rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
1763
1802
  parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
1803
+ if (rubric.operator) {
1804
+ parts.push(`Operator: ${rubric.operator}`);
1805
+ }
1764
1806
  if (rubric.outcome) {
1765
1807
  parts.push(`Description: ${rubric.outcome}`);
1766
1808
  }
@@ -1773,12 +1815,21 @@ ${outputSchema}`;
1773
1815
  }
1774
1816
  }
1775
1817
  }
1818
+ const operatorGuidance = formatRubricOperatorGuidance(rubrics);
1819
+ if (operatorGuidance.length > 0) {
1820
+ parts.push("", ...operatorGuidance);
1821
+ }
1776
1822
  parts.push(
1777
1823
  "",
1778
1824
  "For each criterion, provide an integer score 0-10 that matches one of its defined score ranges."
1779
1825
  );
1780
1826
  return parts.join("\n");
1781
1827
  }
1828
+ buildCustomPrompt(context) {
1829
+ const template = context.graderTemplateOverride ?? this.graderTemplate ?? "";
1830
+ warnDeprecatedTemplateVars(template);
1831
+ return substituteVariables(template, buildTemplateVariables(context));
1832
+ }
1782
1833
  buildRubricPrompt(context, rubrics) {
1783
1834
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
1784
1835
  const parts = [
@@ -1802,10 +1853,21 @@ ${outputSchema}`;
1802
1853
  parts.push("[[ ## tool_calls ## ]]", context.toolCalls, "");
1803
1854
  }
1804
1855
  parts.push("[[ ## rubrics ## ]]");
1856
+ const operatorGuidance = formatRubricOperatorGuidance(rubrics);
1857
+ if (operatorGuidance.length > 0) {
1858
+ parts.push("", "Operator guidance:");
1859
+ for (const guidance of operatorGuidance) {
1860
+ parts.push(`- ${guidance}`);
1861
+ }
1862
+ parts.push("");
1863
+ }
1805
1864
  for (const rubric of rubrics) {
1806
1865
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
1807
1866
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
1808
- parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
1867
+ const operatorLabel = formatRubricOperatorLabel(rubric.operator);
1868
+ parts.push(
1869
+ `- [${rubric.id}]${requiredLabel}${weightLabel}${operatorLabel}: ${rubric.outcome}`
1870
+ );
1809
1871
  }
1810
1872
  parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
1811
1873
  return parts.join("\n");
@@ -2537,6 +2599,385 @@ var CostGrader = class {
2537
2599
  };
2538
2600
 
2539
2601
  // src/evaluation/trace.ts
2602
+ import { z as z2 } from "zod";
2603
+ var NORMALIZED_TRAJECTORY_SCHEMA_VERSION = "agentv.trace.v1";
2604
+ var NORMALIZED_TRACE_SOURCE_KINDS = [
2605
+ "agentv_run",
2606
+ "otlp",
2607
+ "phoenix",
2608
+ "langfuse",
2609
+ "pi_session",
2610
+ "imported_transcript",
2611
+ "compact_transcript"
2612
+ ];
2613
+ var NORMALIZED_TRACE_EVENT_TYPES = [
2614
+ "message",
2615
+ "model_turn",
2616
+ "tool_call",
2617
+ "tool_result"
2618
+ ];
2619
+ var NORMALIZED_TOOL_STATUSES = ["ok", "error", "timeout", "cancelled", "unknown"];
2620
+ var NORMALIZED_REDACTION_LEVELS = ["none", "partial", "full"];
2621
+ function omitUndefinedProperties(value) {
2622
+ return Object.fromEntries(
2623
+ Object.entries(value).filter(([, property]) => property !== void 0)
2624
+ );
2625
+ }
2626
+ var MetadataWireSchema = z2.record(z2.string(), z2.unknown());
2627
+ var TokenUsageWireSchema = z2.object({
2628
+ input: z2.number(),
2629
+ output: z2.number(),
2630
+ cached: z2.number().optional(),
2631
+ reasoning: z2.number().optional()
2632
+ });
2633
+ var NormalizedRedactionStateWireSchema = z2.object({
2634
+ level: z2.enum(NORMALIZED_REDACTION_LEVELS),
2635
+ fields: z2.array(z2.string()).optional(),
2636
+ reason: z2.string().optional()
2637
+ });
2638
+ var NormalizedTraceErrorWireSchema = z2.object({
2639
+ message: z2.string(),
2640
+ name: z2.string().optional(),
2641
+ code: z2.string().optional(),
2642
+ stack: z2.string().optional(),
2643
+ metadata: MetadataWireSchema.optional()
2644
+ });
2645
+ var NormalizedTraceSourceWireSchema = z2.object({
2646
+ kind: z2.enum(NORMALIZED_TRACE_SOURCE_KINDS),
2647
+ path: z2.string().optional(),
2648
+ url: z2.string().optional(),
2649
+ provider: z2.string().optional(),
2650
+ format: z2.string().optional(),
2651
+ version: z2.string().optional(),
2652
+ metadata: MetadataWireSchema.optional()
2653
+ });
2654
+ var NormalizedTraceSessionWireSchema = z2.object({
2655
+ session_id: z2.string().optional(),
2656
+ conversation_id: z2.string().optional(),
2657
+ cwd: z2.string().optional(),
2658
+ started_at: z2.string().optional(),
2659
+ ended_at: z2.string().optional(),
2660
+ metadata: MetadataWireSchema.optional()
2661
+ });
2662
+ var NormalizedTraceBranchWireSchema = z2.object({
2663
+ selected_leaf_id: z2.string().optional(),
2664
+ selected_path_ids: z2.array(z2.string()).optional(),
2665
+ included_event_ids: z2.array(z2.string()).optional(),
2666
+ omitted_event_ids: z2.array(z2.string()).optional(),
2667
+ selection_reason: z2.string().optional()
2668
+ });
2669
+ var NormalizedTraceSourceRefWireSchema = z2.object({
2670
+ event_id: z2.string().optional(),
2671
+ message_id: z2.string().optional(),
2672
+ span_id: z2.string().optional(),
2673
+ trace_id: z2.string().optional(),
2674
+ raw_kind: z2.string().optional(),
2675
+ path: z2.string().optional(),
2676
+ line: z2.number().int().nonnegative().optional(),
2677
+ metadata: MetadataWireSchema.optional()
2678
+ });
2679
+ var NormalizedRawEvidenceWireSchema = z2.object({
2680
+ kind: z2.string(),
2681
+ ref: z2.string().optional(),
2682
+ media_type: z2.string().optional(),
2683
+ content: z2.unknown().optional(),
2684
+ redacted: z2.boolean().optional(),
2685
+ metadata: MetadataWireSchema.optional()
2686
+ });
2687
+ var NormalizedTraceMessageWireSchema = z2.object({
2688
+ role: z2.string(),
2689
+ name: z2.string().optional(),
2690
+ content: z2.unknown().optional(),
2691
+ redaction: NormalizedRedactionStateWireSchema.optional(),
2692
+ token_usage: TokenUsageWireSchema.optional(),
2693
+ metadata: MetadataWireSchema.optional()
2694
+ });
2695
+ var NormalizedTraceModelWireSchema = z2.object({
2696
+ provider: z2.string().optional(),
2697
+ name: z2.string().optional(),
2698
+ invocation_id: z2.string().optional(),
2699
+ token_usage: TokenUsageWireSchema.optional(),
2700
+ metadata: MetadataWireSchema.optional()
2701
+ });
2702
+ var NormalizedTraceToolWireSchema = z2.object({
2703
+ name: z2.string(),
2704
+ call_id: z2.string().optional(),
2705
+ input: z2.unknown().optional(),
2706
+ output: z2.unknown().optional(),
2707
+ status: z2.enum(NORMALIZED_TOOL_STATUSES).optional(),
2708
+ error: NormalizedTraceErrorWireSchema.optional(),
2709
+ redaction: NormalizedRedactionStateWireSchema.optional(),
2710
+ metadata: MetadataWireSchema.optional()
2711
+ });
2712
+ var NormalizedTraceEventWireSchema = z2.object({
2713
+ event_id: z2.string(),
2714
+ parent_event_id: z2.string().optional(),
2715
+ ordinal: z2.number().int().nonnegative(),
2716
+ type: z2.enum(NORMALIZED_TRACE_EVENT_TYPES),
2717
+ timestamp: z2.string().optional(),
2718
+ duration_ms: z2.number().nonnegative().optional(),
2719
+ duration_inferred: z2.boolean().optional(),
2720
+ turn_index: z2.number().int().nonnegative().optional(),
2721
+ message: NormalizedTraceMessageWireSchema.optional(),
2722
+ model: NormalizedTraceModelWireSchema.optional(),
2723
+ tool: NormalizedTraceToolWireSchema.optional(),
2724
+ source_ref: NormalizedTraceSourceRefWireSchema.optional(),
2725
+ raw_evidence: z2.array(NormalizedRawEvidenceWireSchema).optional(),
2726
+ redaction: NormalizedRedactionStateWireSchema.optional(),
2727
+ metadata: MetadataWireSchema.optional()
2728
+ });
2729
+ var NormalizedTrajectoryWireSchema = z2.object({
2730
+ schema_version: z2.literal(NORMALIZED_TRAJECTORY_SCHEMA_VERSION),
2731
+ source: NormalizedTraceSourceWireSchema,
2732
+ session: NormalizedTraceSessionWireSchema,
2733
+ branch: NormalizedTraceBranchWireSchema.optional(),
2734
+ events: z2.array(NormalizedTraceEventWireSchema),
2735
+ token_usage: TokenUsageWireSchema.optional(),
2736
+ cost_usd: z2.number().optional(),
2737
+ duration_ms: z2.number().optional(),
2738
+ started_at: z2.string().optional(),
2739
+ ended_at: z2.string().optional(),
2740
+ metadata: MetadataWireSchema.optional()
2741
+ });
2742
+ function toNormalizedTrajectoryWire(trajectory) {
2743
+ return NormalizedTrajectoryWireSchema.parse(
2744
+ omitUndefinedProperties({
2745
+ schema_version: trajectory.schemaVersion,
2746
+ source: toNormalizedTraceSourceWire(trajectory.source),
2747
+ session: toNormalizedTraceSessionWire(trajectory.session),
2748
+ branch: trajectory.branch ? toNormalizedTraceBranchWire(trajectory.branch) : void 0,
2749
+ events: trajectory.events.map(toNormalizedTraceEventWire),
2750
+ token_usage: trajectory.tokenUsage,
2751
+ cost_usd: trajectory.costUsd,
2752
+ duration_ms: trajectory.durationMs,
2753
+ started_at: trajectory.startedAt,
2754
+ ended_at: trajectory.endedAt,
2755
+ metadata: trajectory.metadata
2756
+ })
2757
+ );
2758
+ }
2759
+ function fromNormalizedTrajectoryWire(input) {
2760
+ const wire = NormalizedTrajectoryWireSchema.parse(input);
2761
+ return {
2762
+ schemaVersion: wire.schema_version,
2763
+ source: fromNormalizedTraceSourceWire(wire.source),
2764
+ session: fromNormalizedTraceSessionWire(wire.session),
2765
+ branch: wire.branch ? fromNormalizedTraceBranchWire(wire.branch) : void 0,
2766
+ events: wire.events.map(fromNormalizedTraceEventWire),
2767
+ tokenUsage: wire.token_usage,
2768
+ costUsd: wire.cost_usd,
2769
+ durationMs: wire.duration_ms,
2770
+ startedAt: wire.started_at,
2771
+ endedAt: wire.ended_at,
2772
+ metadata: wire.metadata
2773
+ };
2774
+ }
2775
+ function toNormalizedTraceSourceWire(source) {
2776
+ return omitUndefinedProperties({
2777
+ kind: source.kind,
2778
+ path: source.path,
2779
+ url: source.url,
2780
+ provider: source.provider,
2781
+ format: source.format,
2782
+ version: source.version,
2783
+ metadata: source.metadata
2784
+ });
2785
+ }
2786
+ function fromNormalizedTraceSourceWire(source) {
2787
+ return {
2788
+ kind: source.kind,
2789
+ path: source.path,
2790
+ url: source.url,
2791
+ provider: source.provider,
2792
+ format: source.format,
2793
+ version: source.version,
2794
+ metadata: source.metadata
2795
+ };
2796
+ }
2797
+ function toNormalizedTraceSessionWire(session) {
2798
+ return omitUndefinedProperties({
2799
+ session_id: session.sessionId,
2800
+ conversation_id: session.conversationId,
2801
+ cwd: session.cwd,
2802
+ started_at: session.startedAt,
2803
+ ended_at: session.endedAt,
2804
+ metadata: session.metadata
2805
+ });
2806
+ }
2807
+ function fromNormalizedTraceSessionWire(session) {
2808
+ return {
2809
+ sessionId: session.session_id,
2810
+ conversationId: session.conversation_id,
2811
+ cwd: session.cwd,
2812
+ startedAt: session.started_at,
2813
+ endedAt: session.ended_at,
2814
+ metadata: session.metadata
2815
+ };
2816
+ }
2817
+ function toNormalizedTraceBranchWire(branch) {
2818
+ return omitUndefinedProperties({
2819
+ selected_leaf_id: branch.selectedLeafId,
2820
+ selected_path_ids: branch.selectedPathIds,
2821
+ included_event_ids: branch.includedEventIds,
2822
+ omitted_event_ids: branch.omittedEventIds,
2823
+ selection_reason: branch.selectionReason
2824
+ });
2825
+ }
2826
+ function fromNormalizedTraceBranchWire(branch) {
2827
+ return {
2828
+ selectedLeafId: branch.selected_leaf_id,
2829
+ selectedPathIds: branch.selected_path_ids,
2830
+ includedEventIds: branch.included_event_ids,
2831
+ omittedEventIds: branch.omitted_event_ids,
2832
+ selectionReason: branch.selection_reason
2833
+ };
2834
+ }
2835
+ function toNormalizedTraceEventWire(event) {
2836
+ return NormalizedTraceEventWireSchema.parse(
2837
+ omitUndefinedProperties({
2838
+ event_id: event.eventId,
2839
+ parent_event_id: event.parentEventId,
2840
+ ordinal: event.ordinal,
2841
+ type: event.type,
2842
+ timestamp: event.timestamp,
2843
+ duration_ms: event.durationMs,
2844
+ duration_inferred: event.durationInferred,
2845
+ turn_index: event.turnIndex,
2846
+ message: event.message ? toNormalizedTraceMessageWire(event.message) : void 0,
2847
+ model: event.model ? toNormalizedTraceModelWire(event.model) : void 0,
2848
+ tool: event.tool ? toNormalizedTraceToolWire(event.tool) : void 0,
2849
+ source_ref: event.sourceRef ? toNormalizedTraceSourceRefWire(event.sourceRef) : void 0,
2850
+ raw_evidence: event.rawEvidence?.map(toNormalizedRawEvidenceWire),
2851
+ redaction: event.redaction,
2852
+ metadata: event.metadata
2853
+ })
2854
+ );
2855
+ }
2856
+ function fromNormalizedTraceEventWire(event) {
2857
+ return {
2858
+ eventId: event.event_id,
2859
+ parentEventId: event.parent_event_id,
2860
+ ordinal: event.ordinal,
2861
+ type: event.type,
2862
+ timestamp: event.timestamp,
2863
+ durationMs: event.duration_ms,
2864
+ durationInferred: event.duration_inferred,
2865
+ turnIndex: event.turn_index,
2866
+ message: event.message ? fromNormalizedTraceMessageWire(event.message) : void 0,
2867
+ model: event.model ? fromNormalizedTraceModelWire(event.model) : void 0,
2868
+ tool: event.tool ? fromNormalizedTraceToolWire(event.tool) : void 0,
2869
+ sourceRef: event.source_ref ? fromNormalizedTraceSourceRefWire(event.source_ref) : void 0,
2870
+ rawEvidence: event.raw_evidence?.map(fromNormalizedRawEvidenceWire),
2871
+ redaction: event.redaction,
2872
+ metadata: event.metadata
2873
+ };
2874
+ }
2875
+ function toNormalizedTraceMessageWire(message) {
2876
+ return omitUndefinedProperties({
2877
+ role: message.role,
2878
+ name: message.name,
2879
+ content: message.content,
2880
+ redaction: message.redaction,
2881
+ token_usage: message.tokenUsage,
2882
+ metadata: message.metadata
2883
+ });
2884
+ }
2885
+ function fromNormalizedTraceMessageWire(message) {
2886
+ return {
2887
+ role: message.role,
2888
+ name: message.name,
2889
+ content: message.content,
2890
+ redaction: message.redaction,
2891
+ tokenUsage: message.token_usage,
2892
+ metadata: message.metadata
2893
+ };
2894
+ }
2895
+ function toNormalizedTraceModelWire(model) {
2896
+ return omitUndefinedProperties({
2897
+ provider: model.provider,
2898
+ name: model.name,
2899
+ invocation_id: model.invocationId,
2900
+ token_usage: model.tokenUsage,
2901
+ metadata: model.metadata
2902
+ });
2903
+ }
2904
+ function fromNormalizedTraceModelWire(model) {
2905
+ return {
2906
+ provider: model.provider,
2907
+ name: model.name,
2908
+ invocationId: model.invocation_id,
2909
+ tokenUsage: model.token_usage,
2910
+ metadata: model.metadata
2911
+ };
2912
+ }
2913
+ function toNormalizedTraceToolWire(tool) {
2914
+ return omitUndefinedProperties({
2915
+ name: tool.name,
2916
+ call_id: tool.callId,
2917
+ input: tool.input,
2918
+ output: tool.output,
2919
+ status: tool.status,
2920
+ error: tool.error,
2921
+ redaction: tool.redaction,
2922
+ metadata: tool.metadata
2923
+ });
2924
+ }
2925
+ function fromNormalizedTraceToolWire(tool) {
2926
+ return {
2927
+ name: tool.name,
2928
+ callId: tool.call_id,
2929
+ input: tool.input,
2930
+ output: tool.output,
2931
+ status: tool.status,
2932
+ error: tool.error,
2933
+ redaction: tool.redaction,
2934
+ metadata: tool.metadata
2935
+ };
2936
+ }
2937
+ function toNormalizedTraceSourceRefWire(sourceRef) {
2938
+ return omitUndefinedProperties({
2939
+ event_id: sourceRef.eventId,
2940
+ message_id: sourceRef.messageId,
2941
+ span_id: sourceRef.spanId,
2942
+ trace_id: sourceRef.traceId,
2943
+ raw_kind: sourceRef.rawKind,
2944
+ path: sourceRef.path,
2945
+ line: sourceRef.line,
2946
+ metadata: sourceRef.metadata
2947
+ });
2948
+ }
2949
+ function fromNormalizedTraceSourceRefWire(sourceRef) {
2950
+ return {
2951
+ eventId: sourceRef.event_id,
2952
+ messageId: sourceRef.message_id,
2953
+ spanId: sourceRef.span_id,
2954
+ traceId: sourceRef.trace_id,
2955
+ rawKind: sourceRef.raw_kind,
2956
+ path: sourceRef.path,
2957
+ line: sourceRef.line,
2958
+ metadata: sourceRef.metadata
2959
+ };
2960
+ }
2961
+ function toNormalizedRawEvidenceWire(evidence) {
2962
+ return omitUndefinedProperties({
2963
+ kind: evidence.kind,
2964
+ ref: evidence.ref,
2965
+ media_type: evidence.mediaType,
2966
+ content: evidence.content,
2967
+ redacted: evidence.redacted,
2968
+ metadata: evidence.metadata
2969
+ });
2970
+ }
2971
+ function fromNormalizedRawEvidenceWire(evidence) {
2972
+ return {
2973
+ kind: evidence.kind,
2974
+ ref: evidence.ref,
2975
+ mediaType: evidence.media_type,
2976
+ content: evidence.content,
2977
+ redacted: evidence.redacted,
2978
+ metadata: evidence.metadata
2979
+ };
2980
+ }
2540
2981
  function computeTraceSummary(messages) {
2541
2982
  const toolCallCounts = {};
2542
2983
  const toolDurations = {};
@@ -2604,6 +3045,82 @@ function computeTraceSummary(messages) {
2604
3045
  endTime: latestEnd?.toISOString()
2605
3046
  };
2606
3047
  }
3048
+ function getSelectedTrajectoryEvents(trajectory) {
3049
+ if (!trajectory.branch?.includedEventIds || trajectory.branch.includedEventIds.length === 0) {
3050
+ return trajectory.events;
3051
+ }
3052
+ const includedIds = new Set(trajectory.branch.includedEventIds);
3053
+ return trajectory.events.filter((event) => includedIds.has(event.eventId));
3054
+ }
3055
+ function computeTraceSummaryFromTrajectory(trajectory) {
3056
+ const selectedEvents = getSelectedTrajectoryEvents(trajectory);
3057
+ const hasModelTurnEvents = selectedEvents.some((event) => event.type === "model_turn");
3058
+ const toolCallCounts = {};
3059
+ const toolDurations = {};
3060
+ let totalToolCalls = 0;
3061
+ let errorCount = 0;
3062
+ let llmCallCount = 0;
3063
+ let earliestStart;
3064
+ let latestEnd;
3065
+ let hasAnyDuration = false;
3066
+ for (const event of selectedEvents) {
3067
+ if (event.type === "model_turn" || !hasModelTurnEvents && event.type === "message" && event.message?.role === "assistant") {
3068
+ llmCallCount++;
3069
+ }
3070
+ const eventStart = parseTimestamp(event.timestamp);
3071
+ if (eventStart && (!earliestStart || eventStart < earliestStart)) {
3072
+ earliestStart = eventStart;
3073
+ }
3074
+ const eventEnd = deriveEventEnd(eventStart, event.durationMs);
3075
+ if (eventEnd && (!latestEnd || eventEnd > latestEnd)) {
3076
+ latestEnd = eventEnd;
3077
+ }
3078
+ if (event.type !== "tool_call" || !event.tool) {
3079
+ continue;
3080
+ }
3081
+ toolCallCounts[event.tool.name] = (toolCallCounts[event.tool.name] ?? 0) + 1;
3082
+ totalToolCalls++;
3083
+ if (isErrorToolEvent(event)) {
3084
+ errorCount++;
3085
+ }
3086
+ if (event.durationMs !== void 0) {
3087
+ hasAnyDuration = true;
3088
+ if (!toolDurations[event.tool.name]) {
3089
+ toolDurations[event.tool.name] = [];
3090
+ }
3091
+ toolDurations[event.tool.name].push(event.durationMs);
3092
+ }
3093
+ }
3094
+ return {
3095
+ trace: {
3096
+ eventCount: totalToolCalls,
3097
+ toolCalls: toolCallCounts,
3098
+ errorCount,
3099
+ llmCallCount,
3100
+ ...hasAnyDuration ? { toolDurations } : {}
3101
+ },
3102
+ tokenUsage: trajectory.tokenUsage,
3103
+ costUsd: trajectory.costUsd,
3104
+ durationMs: trajectory.durationMs,
3105
+ startTime: trajectory.startedAt ?? earliestStart?.toISOString(),
3106
+ endTime: trajectory.endedAt ?? latestEnd?.toISOString()
3107
+ };
3108
+ }
3109
+ function parseTimestamp(timestamp) {
3110
+ if (!timestamp) return void 0;
3111
+ const value = new Date(timestamp);
3112
+ return Number.isNaN(value.getTime()) ? void 0 : value;
3113
+ }
3114
+ function deriveEventEnd(start, durationMs) {
3115
+ if (!start) return void 0;
3116
+ if (durationMs === void 0) return start;
3117
+ return new Date(start.getTime() + durationMs);
3118
+ }
3119
+ function isErrorToolEvent(event) {
3120
+ return Boolean(
3121
+ event.tool?.error || event.tool?.status === "error" || event.tool?.status === "timeout" || event.tool?.status === "cancelled"
3122
+ );
3123
+ }
2607
3124
  var DEFAULT_EXPLORATION_TOOLS = [
2608
3125
  "read",
2609
3126
  "grep",
@@ -3400,6 +3917,30 @@ var SkillTriggerGrader = class {
3400
3917
  };
3401
3918
 
3402
3919
  // src/evaluation/graders/llm-grader-prompt.ts
3920
+ function stringifyPretty2(value) {
3921
+ return value === void 0 ? "" : JSON.stringify(value, null, 2);
3922
+ }
3923
+ function stringifyCompact2(value) {
3924
+ return value === void 0 ? "" : JSON.stringify(value);
3925
+ }
3926
+ function buildTemplateVariables2(input) {
3927
+ const formattedQuestion = input.promptInputs.question && input.promptInputs.question.trim().length > 0 ? input.promptInputs.question : input.evalCase.question;
3928
+ return {
3929
+ [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
3930
+ [TEMPLATE_VARIABLES.OUTPUT]: input.candidate.trim(),
3931
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (input.evalCase.reference_answer ?? "").trim(),
3932
+ [TEMPLATE_VARIABLES.CRITERIA]: input.evalCase.criteria.trim(),
3933
+ [TEMPLATE_VARIABLES.METADATA]: stringifyPretty2(input.evalCase.metadata),
3934
+ [TEMPLATE_VARIABLES.METADATA_JSON]: stringifyCompact2(input.evalCase.metadata),
3935
+ [TEMPLATE_VARIABLES.RUBRICS]: stringifyPretty2(input.rubrics),
3936
+ [TEMPLATE_VARIABLES.RUBRICS_JSON]: stringifyCompact2(input.rubrics),
3937
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: input.fileChanges ?? "",
3938
+ [TEMPLATE_VARIABLES.TOOL_CALLS]: input.toolCalls ?? "",
3939
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
3940
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: input.candidate.trim(),
3941
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (input.evalCase.reference_answer ?? "").trim()
3942
+ };
3943
+ }
3403
3944
  function assembleLlmGraderPrompt(input) {
3404
3945
  const {
3405
3946
  evalCase,
@@ -3412,6 +3953,17 @@ function assembleLlmGraderPrompt(input) {
3412
3953
  } = input;
3413
3954
  const rubrics = evaluatorConfig?.rubrics;
3414
3955
  if (rubrics && rubrics.length > 0) {
3956
+ if (graderTemplateOverride) {
3957
+ return assembleCustom(
3958
+ evalCase,
3959
+ candidate,
3960
+ promptInputs,
3961
+ rubrics,
3962
+ fileChanges,
3963
+ toolCalls,
3964
+ graderTemplateOverride
3965
+ );
3966
+ }
3415
3967
  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
3416
3968
  if (hasScoreRanges) {
3417
3969
  return assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls);
@@ -3428,19 +3980,13 @@ function assembleLlmGraderPrompt(input) {
3428
3980
  );
3429
3981
  }
3430
3982
  function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, toolCalls, graderTemplateOverride) {
3431
- const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
3432
- const variables = {
3433
- [TEMPLATE_VARIABLES.INPUT]: formattedQuestion.trim(),
3434
- [TEMPLATE_VARIABLES.OUTPUT]: candidate.trim(),
3435
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT]: (evalCase.reference_answer ?? "").trim(),
3436
- [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
3437
- [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
3438
- [TEMPLATE_VARIABLES.TOOL_CALLS]: toolCalls ?? "",
3439
- // Deprecated aliases
3440
- [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
3441
- [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
3442
- [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
3443
- };
3983
+ const variables = buildTemplateVariables2({
3984
+ evalCase,
3985
+ candidate,
3986
+ promptInputs,
3987
+ fileChanges,
3988
+ toolCalls
3989
+ });
3444
3990
  const systemPrompt = buildOutputSchema();
3445
3991
  const template = graderTemplateOverride ?? DEFAULT_GRADER_TEMPLATE;
3446
3992
  let userPrompt = substituteVariables(template, variables);
@@ -3463,6 +4009,27 @@ ${toolCalls}`;
3463
4009
  mode: "freeform"
3464
4010
  };
3465
4011
  }
4012
+ function assembleCustom(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls, graderTemplateOverride) {
4013
+ const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
4014
+ const systemPrompt = hasScoreRanges ? buildScoreRangeOutputSchema() : buildRubricOutputSchema();
4015
+ const userPrompt = substituteVariables(
4016
+ graderTemplateOverride,
4017
+ buildTemplateVariables2({
4018
+ evalCase,
4019
+ candidate,
4020
+ promptInputs,
4021
+ rubrics,
4022
+ fileChanges,
4023
+ toolCalls
4024
+ })
4025
+ );
4026
+ return {
4027
+ systemPrompt,
4028
+ userPrompt,
4029
+ responseSchema: systemPrompt,
4030
+ mode: hasScoreRanges ? "score_range" : "checklist"
4031
+ };
4032
+ }
3466
4033
  function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChanges, toolCalls) {
3467
4034
  const formattedQuestion = promptInputs.question && promptInputs.question.trim().length > 0 ? promptInputs.question : evalCase.question;
3468
4035
  const parts = [
@@ -3486,10 +4053,19 @@ function assembleChecklist(evalCase, candidate, promptInputs, rubrics, fileChang
3486
4053
  parts.push("[[ ## tool_calls ## ]]", toolCalls, "");
3487
4054
  }
3488
4055
  parts.push("[[ ## rubrics ## ]]");
4056
+ const operatorGuidance = formatRubricOperatorGuidance(rubrics);
4057
+ if (operatorGuidance.length > 0) {
4058
+ parts.push("", "Operator guidance:");
4059
+ for (const guidance of operatorGuidance) {
4060
+ parts.push(`- ${guidance}`);
4061
+ }
4062
+ parts.push("");
4063
+ }
3489
4064
  for (const rubric of rubrics) {
3490
4065
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
3491
4066
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
3492
- parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.outcome}`);
4067
+ const operatorLabel = formatRubricOperatorLabel(rubric.operator);
4068
+ parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}${operatorLabel}: ${rubric.outcome}`);
3493
4069
  }
3494
4070
  parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
3495
4071
  const systemPrompt = buildRubricOutputSchema();
@@ -3529,6 +4105,9 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
3529
4105
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
3530
4106
  const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
3531
4107
  parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
4108
+ if (rubric.operator) {
4109
+ parts.push(`Operator: ${rubric.operator}`);
4110
+ }
3532
4111
  if (rubric.outcome) {
3533
4112
  parts.push(`Description: ${rubric.outcome}`);
3534
4113
  }
@@ -3541,6 +4120,10 @@ function assembleScoreRange(evalCase, candidate, promptInputs, rubrics, fileChan
3541
4120
  }
3542
4121
  }
3543
4122
  }
4123
+ const operatorGuidance = formatRubricOperatorGuidance(rubrics);
4124
+ if (operatorGuidance.length > 0) {
4125
+ parts.push("", ...operatorGuidance);
4126
+ }
3544
4127
  parts.push(
3545
4128
  "",
3546
4129
  "For each criterion, provide an integer score 0-10 that matches one of its defined score ranges."
@@ -4259,7 +4842,7 @@ function runEqualsAssertion(output, value) {
4259
4842
  import { spawn } from "node:child_process";
4260
4843
  import { randomUUID } from "node:crypto";
4261
4844
  import { createWriteStream } from "node:fs";
4262
- import { mkdir } from "node:fs/promises";
4845
+ import { mkdir as mkdir2 } from "node:fs/promises";
4263
4846
  import path5 from "node:path";
4264
4847
 
4265
4848
  // src/runtime/child-tracker.ts
@@ -4759,7 +5342,7 @@ var ClaudeCliProvider = class {
4759
5342
  return void 0;
4760
5343
  }
4761
5344
  try {
4762
- await mkdir(logDir, { recursive: true });
5345
+ await mkdir2(logDir, { recursive: true });
4763
5346
  } catch (error) {
4764
5347
  const message = error instanceof Error ? error.message : String(error);
4765
5348
  console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
@@ -5069,7 +5652,7 @@ function tryParseJson(line) {
5069
5652
  // src/evaluation/providers/claude-sdk.ts
5070
5653
  import { randomUUID as randomUUID2 } from "node:crypto";
5071
5654
  import { createWriteStream as createWriteStream2 } from "node:fs";
5072
- import { mkdir as mkdir2 } from "node:fs/promises";
5655
+ import { mkdir as mkdir3 } from "node:fs/promises";
5073
5656
  import path6 from "node:path";
5074
5657
  var claudeSdkModule = null;
5075
5658
  async function loadClaudeSdk() {
@@ -5254,7 +5837,7 @@ var ClaudeSdkProvider = class {
5254
5837
  return void 0;
5255
5838
  }
5256
5839
  try {
5257
- await mkdir2(logDir, { recursive: true });
5840
+ await mkdir3(logDir, { recursive: true });
5258
5841
  } catch (error) {
5259
5842
  const message = error instanceof Error ? error.message : String(error);
5260
5843
  console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
@@ -5449,44 +6032,44 @@ function formatElapsed2(startedAt) {
5449
6032
  // src/evaluation/providers/cli.ts
5450
6033
  import { exec as execWithCallback } from "node:child_process";
5451
6034
  import fs2 from "node:fs/promises";
5452
- import os2 from "node:os";
6035
+ import os from "node:os";
5453
6036
  import path7 from "node:path";
5454
6037
  import { promisify } from "node:util";
5455
- import { z as z2 } from "zod";
5456
- var ToolCallSchema = z2.object({
5457
- tool: z2.string(),
5458
- input: z2.unknown().optional(),
5459
- output: z2.unknown().optional(),
5460
- id: z2.string().optional(),
5461
- start_time: z2.string().optional(),
5462
- end_time: z2.string().optional(),
5463
- duration_ms: z2.number().optional()
6038
+ import { z as z3 } from "zod";
6039
+ var ToolCallSchema = z3.object({
6040
+ tool: z3.string(),
6041
+ input: z3.unknown().optional(),
6042
+ output: z3.unknown().optional(),
6043
+ id: z3.string().optional(),
6044
+ start_time: z3.string().optional(),
6045
+ end_time: z3.string().optional(),
6046
+ duration_ms: z3.number().optional()
5464
6047
  });
5465
- var MessageInputSchema = z2.object({
5466
- role: z2.string(),
5467
- name: z2.string().optional(),
5468
- content: z2.unknown().optional(),
5469
- tool_calls: z2.array(ToolCallSchema).optional(),
5470
- start_time: z2.string().optional(),
5471
- end_time: z2.string().optional(),
5472
- duration_ms: z2.number().optional(),
5473
- metadata: z2.record(z2.unknown()).optional()
6048
+ var MessageInputSchema = z3.object({
6049
+ role: z3.string(),
6050
+ name: z3.string().optional(),
6051
+ content: z3.unknown().optional(),
6052
+ tool_calls: z3.array(ToolCallSchema).optional(),
6053
+ start_time: z3.string().optional(),
6054
+ end_time: z3.string().optional(),
6055
+ duration_ms: z3.number().optional(),
6056
+ metadata: z3.record(z3.unknown()).optional()
5474
6057
  });
5475
- var TokenUsageSchema = z2.object({
5476
- input: z2.number(),
5477
- output: z2.number(),
5478
- cached: z2.number().optional()
6058
+ var TokenUsageSchema = z3.object({
6059
+ input: z3.number(),
6060
+ output: z3.number(),
6061
+ cached: z3.number().optional()
5479
6062
  });
5480
- var CliOutputSchema = z2.object({
5481
- text: z2.unknown().optional(),
5482
- output: z2.array(MessageInputSchema).optional(),
5483
- output_messages: z2.array(MessageInputSchema).optional(),
6063
+ var CliOutputSchema = z3.object({
6064
+ text: z3.unknown().optional(),
6065
+ output: z3.array(MessageInputSchema).optional(),
6066
+ output_messages: z3.array(MessageInputSchema).optional(),
5484
6067
  token_usage: TokenUsageSchema.optional(),
5485
- cost_usd: z2.number().optional(),
5486
- duration_ms: z2.number().optional()
6068
+ cost_usd: z3.number().optional(),
6069
+ duration_ms: z3.number().optional()
5487
6070
  });
5488
6071
  var CliJsonlRecordSchema = CliOutputSchema.extend({
5489
- id: z2.string().min(1)
6072
+ id: z3.string().min(1)
5490
6073
  });
5491
6074
  function validateMetrics(costUsd, durationMs, context) {
5492
6075
  let validCostUsd = costUsd;
@@ -5991,7 +6574,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
5991
6574
  const safeEvalId = evalCaseId || "unknown";
5992
6575
  const timestamp = Date.now();
5993
6576
  const random = Math.random().toString(36).substring(2, 9);
5994
- return path7.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
6577
+ return path7.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5995
6578
  }
5996
6579
  function formatTimeoutSuffix2(timeoutMs) {
5997
6580
  if (!timeoutMs || timeoutMs <= 0) {
@@ -6004,7 +6587,7 @@ function formatTimeoutSuffix2(timeoutMs) {
6004
6587
  // src/evaluation/providers/codex.ts
6005
6588
  import { randomUUID as randomUUID3 } from "node:crypto";
6006
6589
  import { createWriteStream as createWriteStream3 } from "node:fs";
6007
- import { mkdir as mkdir3 } from "node:fs/promises";
6590
+ import { mkdir as mkdir4 } from "node:fs/promises";
6008
6591
  import path8 from "node:path";
6009
6592
 
6010
6593
  // src/evaluation/providers/codex-log-tracker.ts
@@ -6097,6 +6680,9 @@ var CodexProvider = class {
6097
6680
  const startMs = Date.now();
6098
6681
  const logger = await this.createStreamLogger(request).catch(() => void 0);
6099
6682
  const codexOptions = {};
6683
+ if (this.config.executable) {
6684
+ codexOptions.codexPathOverride = this.config.executable;
6685
+ }
6100
6686
  if (this.config.model) {
6101
6687
  codexOptions.config = { model: this.config.model };
6102
6688
  }
@@ -6108,6 +6694,9 @@ var CodexProvider = class {
6108
6694
  if (cwd) {
6109
6695
  threadOptions.workingDirectory = cwd;
6110
6696
  }
6697
+ if (this.config.modelReasoningEffort) {
6698
+ threadOptions.modelReasoningEffort = this.config.modelReasoningEffort;
6699
+ }
6111
6700
  const thread = codex.startThread(threadOptions);
6112
6701
  const inputFiles = normalizeInputFiles(request.inputFiles);
6113
6702
  const basePrompt = buildPromptDocument(request, inputFiles);
@@ -6255,7 +6844,7 @@ ${basePrompt}` : basePrompt;
6255
6844
  }
6256
6845
  resolveLogDirectory() {
6257
6846
  const disabled = isCodexLogStreamingDisabled();
6258
- if (disabled) {
6847
+ if (disabled || this.config.streamLog === false) {
6259
6848
  return void 0;
6260
6849
  }
6261
6850
  if (this.config.logDir) {
@@ -6269,7 +6858,7 @@ ${basePrompt}` : basePrompt;
6269
6858
  return void 0;
6270
6859
  }
6271
6860
  try {
6272
- await mkdir3(logDir, { recursive: true });
6861
+ await mkdir4(logDir, { recursive: true });
6273
6862
  } catch (error) {
6274
6863
  const message = error instanceof Error ? error.message : String(error);
6275
6864
  console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
@@ -6282,7 +6871,7 @@ ${basePrompt}` : basePrompt;
6282
6871
  targetName: this.targetName,
6283
6872
  evalCaseId: request.evalCaseId,
6284
6873
  attempt: request.attempt,
6285
- format: this.config.logFormat ?? "summary"
6874
+ format: this.config.streamLog === "raw" ? "json" : "summary"
6286
6875
  });
6287
6876
  recordCodexLogEntry({
6288
6877
  filePath,
@@ -6418,7 +7007,7 @@ function formatElapsed3(startedAt) {
6418
7007
 
6419
7008
  // src/evaluation/providers/copilot-cli.ts
6420
7009
  import { randomUUID as randomUUID5 } from "node:crypto";
6421
- import { mkdir as mkdir4 } from "node:fs/promises";
7010
+ import { mkdir as mkdir5 } from "node:fs/promises";
6422
7011
  import { homedir as homedir2 } from "node:os";
6423
7012
  import path11 from "node:path";
6424
7013
  import { Readable, Writable } from "node:stream";
@@ -6428,7 +7017,7 @@ import * as acp from "@agentclientprotocol/sdk";
6428
7017
  // src/evaluation/workspace/file-changes.ts
6429
7018
  import { exec as execCallback } from "node:child_process";
6430
7019
  import { readdirSync, statSync } from "node:fs";
6431
- import { readFile as readFile2, readdir, stat } from "node:fs/promises";
7020
+ import { readFile as readFile3, readdir, stat } from "node:fs/promises";
6432
7021
  import path9 from "node:path";
6433
7022
  import { promisify as promisify2 } from "node:util";
6434
7023
  var execAsync2 = promisify2(execCallback);
@@ -6503,7 +7092,7 @@ async function walkDir(rootDir, currentDir, snapshot) {
6503
7092
  if (fileStat.size > SNAPSHOT_MAX_FILE_BYTES) continue;
6504
7093
  let content;
6505
7094
  try {
6506
- content = await readFile2(fullPath, "utf8");
7095
+ content = await readFile3(fullPath, "utf8");
6507
7096
  if (content.includes("\0")) continue;
6508
7097
  } catch {
6509
7098
  continue;
@@ -6596,7 +7185,7 @@ import { arch, homedir, platform } from "node:os";
6596
7185
  import path10 from "node:path";
6597
7186
  import { fileURLToPath as fileURLToPath2 } from "node:url";
6598
7187
  function resolvePlatformCliPath() {
6599
- const os3 = platform();
7188
+ const os2 = platform();
6600
7189
  const cpu = arch();
6601
7190
  const platformMap = {
6602
7191
  linux: "linux",
@@ -6607,13 +7196,13 @@ function resolvePlatformCliPath() {
6607
7196
  x64: "x64",
6608
7197
  arm64: "arm64"
6609
7198
  };
6610
- const osPart = platformMap[os3];
7199
+ const osPart = platformMap[os2];
6611
7200
  const archPart = archMap[cpu];
6612
7201
  if (!osPart || !archPart) {
6613
7202
  return void 0;
6614
7203
  }
6615
7204
  const packageName = `@github/copilot-${osPart}-${archPart}`;
6616
- const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
7205
+ const binaryName = os2 === "win32" ? "copilot.exe" : "copilot";
6617
7206
  try {
6618
7207
  const resolved = import.meta.resolve(`${packageName}/package.json`);
6619
7208
  const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
@@ -6681,9 +7270,9 @@ function resolvePlatformCliPath() {
6681
7270
  }
6682
7271
  function globalNpmRoots() {
6683
7272
  const roots = [];
6684
- const os3 = platform();
7273
+ const os2 = platform();
6685
7274
  const home = homedir();
6686
- if (os3 === "win32") {
7275
+ if (os2 === "win32") {
6687
7276
  if (process.env.APPDATA) {
6688
7277
  roots.push(path10.join(process.env.APPDATA, "npm", "node_modules"));
6689
7278
  }
@@ -6698,7 +7287,7 @@ function globalNpmRoots() {
6698
7287
  if (process.env.npm_config_prefix) {
6699
7288
  const prefix = process.env.npm_config_prefix;
6700
7289
  roots.push(
6701
- os3 === "win32" ? path10.join(prefix, "node_modules") : path10.join(prefix, "lib", "node_modules")
7290
+ os2 === "win32" ? path10.join(prefix, "node_modules") : path10.join(prefix, "lib", "node_modules")
6702
7291
  );
6703
7292
  }
6704
7293
  return Array.from(new Set(roots));
@@ -7119,7 +7708,7 @@ var CopilotCliProvider = class {
7119
7708
  return void 0;
7120
7709
  }
7121
7710
  try {
7122
- await mkdir4(logDir, { recursive: true });
7711
+ await mkdir5(logDir, { recursive: true });
7123
7712
  } catch (error) {
7124
7713
  const message = error instanceof Error ? error.message : String(error);
7125
7714
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
@@ -7227,7 +7816,7 @@ function summarizeAcpEvent(eventType, data) {
7227
7816
  }
7228
7817
 
7229
7818
  // src/evaluation/providers/copilot-log.ts
7230
- import { readFile as readFile4 } from "node:fs/promises";
7819
+ import { readFile as readFile5 } from "node:fs/promises";
7231
7820
  import { homedir as homedir4 } from "node:os";
7232
7821
  import path13 from "node:path";
7233
7822
 
@@ -7363,7 +7952,7 @@ function parseCopilotEvents(eventsJsonl) {
7363
7952
  }
7364
7953
 
7365
7954
  // src/evaluation/providers/copilot-session-discovery.ts
7366
- import { readFile as readFile3, readdir as readdir2, stat as stat2 } from "node:fs/promises";
7955
+ import { readFile as readFile4, readdir as readdir2, stat as stat2 } from "node:fs/promises";
7367
7956
  import { homedir as homedir3 } from "node:os";
7368
7957
  import path12 from "node:path";
7369
7958
  var DEFAULT_SESSION_STATE_DIR = () => path12.join(homedir3(), ".copilot", "session-state");
@@ -7382,7 +7971,7 @@ async function discoverCopilotSessions(opts) {
7382
7971
  const workspacePath = path12.join(sessionDir, "workspace.yaml");
7383
7972
  const eventsPath = path12.join(sessionDir, "events.jsonl");
7384
7973
  try {
7385
- const workspaceContent = await readFile3(workspacePath, "utf8");
7974
+ const workspaceContent = await readFile4(workspacePath, "utf8");
7386
7975
  const workspace = parseYamlValue(workspaceContent) ?? {};
7387
7976
  const cwd = String(workspace.cwd ?? "");
7388
7977
  let updatedAt;
@@ -7444,7 +8033,7 @@ var CopilotLogProvider = class {
7444
8033
  const eventsPath = path13.join(sessionDir, "events.jsonl");
7445
8034
  let eventsContent;
7446
8035
  try {
7447
- eventsContent = await readFile4(eventsPath, "utf8");
8036
+ eventsContent = await readFile5(eventsPath, "utf8");
7448
8037
  } catch (err) {
7449
8038
  throw new Error(
7450
8039
  `Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
@@ -7491,7 +8080,7 @@ var CopilotLogProvider = class {
7491
8080
  // src/evaluation/providers/copilot-sdk.ts
7492
8081
  import { randomUUID as randomUUID6 } from "node:crypto";
7493
8082
  import { existsSync as existsSync2 } from "node:fs";
7494
- import { mkdir as mkdir5 } from "node:fs/promises";
8083
+ import { mkdir as mkdir6 } from "node:fs/promises";
7495
8084
  import path14 from "node:path";
7496
8085
 
7497
8086
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
@@ -7831,7 +8420,7 @@ var CopilotSdkProvider = class {
7831
8420
  return void 0;
7832
8421
  }
7833
8422
  try {
7834
- await mkdir5(logDir, { recursive: true });
8423
+ await mkdir6(logDir, { recursive: true });
7835
8424
  } catch (error) {
7836
8425
  const message = error instanceof Error ? error.message : String(error);
7837
8426
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
@@ -7957,7 +8546,7 @@ var MockProvider = class {
7957
8546
  import { execSync, spawn as spawn3 } from "node:child_process";
7958
8547
  import { randomUUID as randomUUID7 } from "node:crypto";
7959
8548
  import { accessSync, createWriteStream as createWriteStream5, readFileSync } from "node:fs";
7960
- import { mkdir as mkdir6, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile2 } from "node:fs/promises";
8549
+ import { mkdir as mkdir7, mkdtemp as mkdtemp2, rm as rm2, writeFile as writeFile3 } from "node:fs/promises";
7961
8550
  import { tmpdir as tmpdir2 } from "node:os";
7962
8551
  import path15 from "node:path";
7963
8552
 
@@ -8166,7 +8755,7 @@ var PiCliProvider = class {
8166
8755
  const logger = await this.createStreamLogger(request).catch(() => void 0);
8167
8756
  try {
8168
8757
  const promptFile = path15.join(cwd, PROMPT_FILENAME);
8169
- await writeFile2(promptFile, request.question, "utf8");
8758
+ await writeFile3(promptFile, request.question, "utf8");
8170
8759
  const args = this.buildPiArgs(request.question, inputFiles);
8171
8760
  const result = await this.executePi(args, cwd, request.signal, logger);
8172
8761
  if (result.timedOut) {
@@ -8357,7 +8946,7 @@ ${prompt}` : prompt;
8357
8946
  return void 0;
8358
8947
  }
8359
8948
  try {
8360
- await mkdir6(logDir, { recursive: true });
8949
+ await mkdir7(logDir, { recursive: true });
8361
8950
  } catch (error) {
8362
8951
  const message = error instanceof Error ? error.message : String(error);
8363
8952
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
@@ -8920,7 +9509,7 @@ async function defaultPiRunner(options) {
8920
9509
  import { execSync as execSync2 } from "node:child_process";
8921
9510
  import { randomUUID as randomUUID8 } from "node:crypto";
8922
9511
  import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
8923
- import { mkdir as mkdir7 } from "node:fs/promises";
9512
+ import { mkdir as mkdir8 } from "node:fs/promises";
8924
9513
  import path16 from "node:path";
8925
9514
  import { createInterface } from "node:readline";
8926
9515
  import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
@@ -9357,7 +9946,7 @@ ${fileList}`;
9357
9946
  return void 0;
9358
9947
  }
9359
9948
  try {
9360
- await mkdir7(logDir, { recursive: true });
9949
+ await mkdir8(logDir, { recursive: true });
9361
9950
  } catch (error) {
9362
9951
  const message = error instanceof Error ? error.message : String(error);
9363
9952
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
@@ -9582,12 +10171,12 @@ import path27 from "node:path";
9582
10171
  import { promisify as promisify4 } from "node:util";
9583
10172
 
9584
10173
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
9585
- import { stat as stat5, writeFile as writeFile5 } from "node:fs/promises";
10174
+ import { stat as stat5, writeFile as writeFile6 } from "node:fs/promises";
9586
10175
  import path25 from "node:path";
9587
10176
 
9588
10177
  // src/evaluation/providers/vscode/utils/fs.ts
9589
10178
  import { constants } from "node:fs";
9590
- import { access, mkdir as mkdir8, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
10179
+ import { access, mkdir as mkdir9, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
9591
10180
  import path17 from "node:path";
9592
10181
  async function pathExists(target) {
9593
10182
  try {
@@ -9598,7 +10187,7 @@ async function pathExists(target) {
9598
10187
  }
9599
10188
  }
9600
10189
  async function ensureDir(target) {
9601
- await mkdir8(target, { recursive: true });
10190
+ await mkdir9(target, { recursive: true });
9602
10191
  }
9603
10192
  async function readDirEntries(target) {
9604
10193
  const entries = await readdir3(target, { withFileTypes: true });
@@ -9731,7 +10320,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
9731
10320
  }
9732
10321
 
9733
10322
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
9734
- import { readFile as readFile5 } from "node:fs/promises";
10323
+ import { readFile as readFile6 } from "node:fs/promises";
9735
10324
  import path20 from "node:path";
9736
10325
 
9737
10326
  // src/evaluation/providers/vscode/utils/time.ts
@@ -9770,7 +10359,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
9770
10359
  const maxAttempts = 10;
9771
10360
  while (attempts < maxAttempts) {
9772
10361
  try {
9773
- const content = await readFile5(responseFileFinal, { encoding: "utf8" });
10362
+ const content = await readFile6(responseFileFinal, { encoding: "utf8" });
9774
10363
  if (!silent) {
9775
10364
  process.stdout.write(`${content}
9776
10365
  `);
@@ -9827,7 +10416,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9827
10416
  const maxAttempts = 10;
9828
10417
  while (attempts < maxAttempts) {
9829
10418
  try {
9830
- const content = await readFile5(file, { encoding: "utf8" });
10419
+ const content = await readFile6(file, { encoding: "utf8" });
9831
10420
  if (!silent) {
9832
10421
  process.stdout.write(`${content}
9833
10422
  `);
@@ -9850,7 +10439,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9850
10439
 
9851
10440
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
9852
10441
  import { exec, spawn as spawn4 } from "node:child_process";
9853
- import { mkdir as mkdir9, writeFile as writeFile3 } from "node:fs/promises";
10442
+ import { mkdir as mkdir10, writeFile as writeFile4 } from "node:fs/promises";
9854
10443
  import path22 from "node:path";
9855
10444
  import { promisify as promisify3 } from "node:util";
9856
10445
 
@@ -9931,9 +10520,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9931
10520
  const aliveFile = path22.join(subagentDir, DEFAULT_ALIVE_FILENAME);
9932
10521
  await removeIfExists(aliveFile);
9933
10522
  const githubAgentsDir = path22.join(subagentDir, ".github", "agents");
9934
- await mkdir9(githubAgentsDir, { recursive: true });
10523
+ await mkdir10(githubAgentsDir, { recursive: true });
9935
10524
  const wakeupDst = path22.join(githubAgentsDir, "wakeup.md");
9936
- await writeFile3(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
10525
+ await writeFile4(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
9937
10526
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
9938
10527
  label: "open-workspace"
9939
10528
  });
@@ -9962,9 +10551,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9962
10551
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
9963
10552
  const workspacePath = path22.join(subagentDir, `${path22.basename(subagentDir)}.code-workspace`);
9964
10553
  const messagesDir = path22.join(subagentDir, "messages");
9965
- await mkdir9(messagesDir, { recursive: true });
10554
+ await mkdir10(messagesDir, { recursive: true });
9966
10555
  const reqFile = path22.join(messagesDir, `${timestamp}_req.md`);
9967
- await writeFile3(reqFile, requestInstructions, { encoding: "utf8" });
10556
+ await writeFile4(reqFile, requestInstructions, { encoding: "utf8" });
9968
10557
  const reqUri = pathToFileUri2(reqFile);
9969
10558
  const chatArgs = ["-r", "chat", "-m", chatId];
9970
10559
  for (const attachment of attachmentPaths) {
@@ -9990,7 +10579,7 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
9990
10579
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
9991
10580
  const workspacePath = path22.join(subagentDir, `${path22.basename(subagentDir)}.code-workspace`);
9992
10581
  const messagesDir = path22.join(subagentDir, "messages");
9993
- await mkdir9(messagesDir, { recursive: true });
10582
+ await mkdir10(messagesDir, { recursive: true });
9994
10583
  const chatArgs = ["-r", "chat", "-m", chatId];
9995
10584
  for (const attachment of attachmentPaths) {
9996
10585
  chatArgs.push("-a", attachment);
@@ -10013,7 +10602,7 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
10013
10602
  }
10014
10603
 
10015
10604
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
10016
- import { copyFile, mkdir as mkdir10, readFile as readFile6, readdir as readdir4, stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
10605
+ import { copyFile, mkdir as mkdir11, readFile as readFile7, readdir as readdir4, stat as stat4, writeFile as writeFile5 } from "node:fs/promises";
10017
10606
  import path24 from "node:path";
10018
10607
 
10019
10608
  // src/evaluation/providers/vscode/utils/workspace.ts
@@ -10130,7 +10719,7 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
10130
10719
  if (!stats.isFile()) {
10131
10720
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
10132
10721
  }
10133
- const templateText = await readFile6(workspaceSrc, "utf8");
10722
+ const templateText = await readFile7(workspaceSrc, "utf8");
10134
10723
  workspaceContent = JSON.parse(templateText);
10135
10724
  } else {
10136
10725
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
@@ -10149,9 +10738,9 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
10149
10738
  transformedContent = JSON.stringify(parsed, null, 2);
10150
10739
  }
10151
10740
  }
10152
- await writeFile4(workspaceDst, transformedContent, "utf8");
10741
+ await writeFile5(workspaceDst, transformedContent, "utf8");
10153
10742
  const messagesDir = path24.join(subagentDir, "messages");
10154
- await mkdir10(messagesDir, { recursive: true });
10743
+ await mkdir11(messagesDir, { recursive: true });
10155
10744
  return { workspace: workspaceDst, messagesDir };
10156
10745
  }
10157
10746
  async function createSubagentLock(subagentDir) {
@@ -10174,7 +10763,7 @@ async function createSubagentLock(subagentDir) {
10174
10763
  );
10175
10764
  }
10176
10765
  const lockFile = path24.join(subagentDir, DEFAULT_LOCK_NAME);
10177
- await writeFile4(lockFile, "", { encoding: "utf8" });
10766
+ await writeFile5(lockFile, "", { encoding: "utf8" });
10178
10767
  return lockFile;
10179
10768
  }
10180
10769
  async function removeSubagentLock(subagentDir) {
@@ -10199,7 +10788,7 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
10199
10788
  }
10200
10789
  if (promptFile) {
10201
10790
  const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
10202
- await mkdir10(githubAgentsDir, { recursive: true });
10791
+ await mkdir11(githubAgentsDir, { recursive: true });
10203
10792
  const agentFile = path24.join(githubAgentsDir, `${chatId}.md`);
10204
10793
  try {
10205
10794
  await copyFile(promptFile, agentFile);
@@ -10460,7 +11049,7 @@ async function dispatchBatchAgent(options) {
10460
11049
  const reqFile = requestFiles[index];
10461
11050
  const tmpFile = responseTmpFiles[index];
10462
11051
  const finalFile = responseFilesFinal[index];
10463
- return writeFile5(
11052
+ return writeFile6(
10464
11053
  reqFile,
10465
11054
  createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
10466
11055
  { encoding: "utf8" }
@@ -10472,7 +11061,7 @@ async function dispatchBatchAgent(options) {
10472
11061
  responseFilesFinal,
10473
11062
  orchestratorTemplateContent
10474
11063
  );
10475
- await writeFile5(orchestratorFile, orchestratorContent, { encoding: "utf8" });
11064
+ await writeFile6(orchestratorFile, orchestratorContent, { encoding: "utf8" });
10476
11065
  }
10477
11066
  const chatAttachments = [orchestratorFile, ...attachments];
10478
11067
  const orchestratorUri = pathToFileUri2(orchestratorFile);
@@ -10538,7 +11127,7 @@ async function dispatchBatchAgent(options) {
10538
11127
  }
10539
11128
 
10540
11129
  // src/evaluation/providers/vscode/dispatch/provision.ts
10541
- import { writeFile as writeFile6 } from "node:fs/promises";
11130
+ import { writeFile as writeFile7 } from "node:fs/promises";
10542
11131
  import path26 from "node:path";
10543
11132
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
10544
11133
  folders: [
@@ -10619,8 +11208,8 @@ async function provisionSubagents(options) {
10619
11208
  if (!dryRun) {
10620
11209
  await removeIfExists(lockFile);
10621
11210
  await ensureDir(githubAgentsDir);
10622
- await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
10623
- await writeFile6(wakeupDst, wakeupContent, "utf8");
11211
+ await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
11212
+ await writeFile7(wakeupDst, wakeupContent, "utf8");
10624
11213
  }
10625
11214
  created.push(subagentDir);
10626
11215
  lockedSubagents.delete(subagentDir);
@@ -10630,8 +11219,8 @@ async function provisionSubagents(options) {
10630
11219
  if (!isLocked && force) {
10631
11220
  if (!dryRun) {
10632
11221
  await ensureDir(githubAgentsDir);
10633
- await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
10634
- await writeFile6(wakeupDst, wakeupContent, "utf8");
11222
+ await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
11223
+ await writeFile7(wakeupDst, wakeupContent, "utf8");
10635
11224
  }
10636
11225
  created.push(subagentDir);
10637
11226
  subagentsProvisioned += 1;
@@ -10639,8 +11228,8 @@ async function provisionSubagents(options) {
10639
11228
  }
10640
11229
  if (!dryRun && !await pathExists(workspaceDst)) {
10641
11230
  await ensureDir(githubAgentsDir);
10642
- await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
10643
- await writeFile6(wakeupDst, wakeupContent, "utf8");
11231
+ await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
11232
+ await writeFile7(wakeupDst, wakeupContent, "utf8");
10644
11233
  }
10645
11234
  skippedExisting.push(subagentDir);
10646
11235
  subagentsProvisioned += 1;
@@ -10655,8 +11244,8 @@ async function provisionSubagents(options) {
10655
11244
  if (!dryRun) {
10656
11245
  await ensureDir(subagentDir);
10657
11246
  await ensureDir(githubAgentsDir);
10658
- await writeFile6(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
10659
- await writeFile6(wakeupDst, wakeupContent, "utf8");
11247
+ await writeFile7(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
11248
+ await writeFile7(wakeupDst, wakeupContent, "utf8");
10660
11249
  }
10661
11250
  created.push(subagentDir);
10662
11251
  subagentsProvisioned += 1;
@@ -10981,7 +11570,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
10981
11570
 
10982
11571
  // src/evaluation/providers/targets-file.ts
10983
11572
  import { constants as constants3 } from "node:fs";
10984
- import { access as access3, readFile as readFile7 } from "node:fs/promises";
11573
+ import { access as access3, readFile as readFile8 } from "node:fs/promises";
10985
11574
  import path28 from "node:path";
10986
11575
  function isRecord(value) {
10987
11576
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -11025,7 +11614,7 @@ async function readTargetDefinitions(filePath) {
11025
11614
  if (!await fileExists2(absolutePath)) {
11026
11615
  throw new Error(`targets.yaml not found at ${absolutePath}`);
11027
11616
  }
11028
- const raw = await readFile7(absolutePath, "utf8");
11617
+ const raw = await readFile8(absolutePath, "utf8");
11029
11618
  const parsed = parseYamlValue(raw);
11030
11619
  if (!isRecord(parsed)) {
11031
11620
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -11216,6 +11805,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
11216
11805
  output: context.output ?? null,
11217
11806
  inputFiles: context.evalCase.file_paths,
11218
11807
  input: context.evalCase.input,
11808
+ metadata: context.evalCase.metadata ?? null,
11219
11809
  trace: context.trace ?? null,
11220
11810
  fileChanges: context.fileChanges ?? null,
11221
11811
  workspacePath: context.workspacePath ?? null,
@@ -11733,7 +12323,7 @@ function getTCritical(df) {
11733
12323
  }
11734
12324
 
11735
12325
  // src/evaluation/workspace/manager.ts
11736
- import { cp, mkdir as mkdir12, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
12326
+ import { cp, mkdir as mkdir13, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
11737
12327
  import path33 from "node:path";
11738
12328
  var TemplateNotFoundError = class extends Error {
11739
12329
  constructor(templatePath) {
@@ -11767,7 +12357,7 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
11767
12357
  return path33.join(root, evalRunId, caseId);
11768
12358
  }
11769
12359
  async function copyDirectoryRecursive(src, dest) {
11770
- await mkdir12(dest, { recursive: true });
12360
+ await mkdir13(dest, { recursive: true });
11771
12361
  const entries = await readdir5(src, { withFileTypes: true });
11772
12362
  for (const entry of entries) {
11773
12363
  const srcPath = path33.join(src, entry.name);
@@ -11842,7 +12432,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
11842
12432
  import { execFile } from "node:child_process";
11843
12433
  import { createHash } from "node:crypto";
11844
12434
  import { existsSync as existsSync3 } from "node:fs";
11845
- import { cp as cp2, mkdir as mkdir13, readFile as readFile8, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
12435
+ import { cp as cp2, mkdir as mkdir14, readFile as readFile9, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile8 } from "node:fs/promises";
11846
12436
  import path34 from "node:path";
11847
12437
  import { promisify as promisify5 } from "node:util";
11848
12438
  var execFileAsync = promisify5(execFile);
@@ -11896,7 +12486,7 @@ function computeWorkspaceFingerprint(repos) {
11896
12486
  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
11897
12487
  }
11898
12488
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
11899
- await mkdir13(dest, { recursive: true });
12489
+ await mkdir14(dest, { recursive: true });
11900
12490
  const entries = await readdir6(src, { withFileTypes: true });
11901
12491
  for (const entry of entries) {
11902
12492
  const srcPath = path34.join(src, entry.name);
@@ -11934,7 +12524,7 @@ var WorkspacePoolManager = class {
11934
12524
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
11935
12525
  const fingerprint = computeWorkspaceFingerprint(repos);
11936
12526
  const poolDir = path34.join(this.poolRoot, fingerprint);
11937
- await mkdir13(poolDir, { recursive: true });
12527
+ await mkdir14(poolDir, { recursive: true });
11938
12528
  const drifted = await this.checkDrift(poolDir, fingerprint);
11939
12529
  if (drifted) {
11940
12530
  console.warn(
@@ -11961,7 +12551,7 @@ var WorkspacePoolManager = class {
11961
12551
  poolDir
11962
12552
  };
11963
12553
  }
11964
- await mkdir13(slotPath, { recursive: true });
12554
+ await mkdir14(slotPath, { recursive: true });
11965
12555
  if (templatePath) {
11966
12556
  await copyDirectoryRecursive2(templatePath, slotPath);
11967
12557
  }
@@ -11998,14 +12588,14 @@ var WorkspacePoolManager = class {
11998
12588
  async tryLock(lockPath) {
11999
12589
  for (let attempt = 0; attempt < 3; attempt++) {
12000
12590
  try {
12001
- await writeFile7(lockPath, String(process.pid), { flag: "wx" });
12591
+ await writeFile8(lockPath, String(process.pid), { flag: "wx" });
12002
12592
  return true;
12003
12593
  } catch (err) {
12004
12594
  if (err.code !== "EEXIST") {
12005
12595
  throw err;
12006
12596
  }
12007
12597
  try {
12008
- const pidStr = await readFile8(lockPath, "utf-8");
12598
+ const pidStr = await readFile9(lockPath, "utf-8");
12009
12599
  const pid = Number.parseInt(pidStr.trim(), 10);
12010
12600
  if (!Number.isNaN(pid)) {
12011
12601
  try {
@@ -12032,7 +12622,7 @@ var WorkspacePoolManager = class {
12032
12622
  async checkDrift(poolDir, fingerprint) {
12033
12623
  const metadataPath = path34.join(poolDir, "metadata.json");
12034
12624
  try {
12035
- const raw = await readFile8(metadataPath, "utf-8");
12625
+ const raw = await readFile9(metadataPath, "utf-8");
12036
12626
  const metadata = JSON.parse(raw);
12037
12627
  return metadata.fingerprint !== fingerprint;
12038
12628
  } catch {
@@ -12047,7 +12637,7 @@ var WorkspacePoolManager = class {
12047
12637
  repos,
12048
12638
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
12049
12639
  };
12050
- await writeFile7(path34.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
12640
+ await writeFile8(path34.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
12051
12641
  }
12052
12642
  /** Remove all slot directories and their lock files from a pool directory. */
12053
12643
  async removeAllSlots(poolDir) {
@@ -12057,7 +12647,7 @@ var WorkspacePoolManager = class {
12057
12647
  const lockPath = path34.join(poolDir, `${entry}.lock`);
12058
12648
  if (existsSync3(lockPath)) {
12059
12649
  try {
12060
- const pidStr = await readFile8(lockPath, "utf-8");
12650
+ const pidStr = await readFile9(lockPath, "utf-8");
12061
12651
  const pid = Number.parseInt(pidStr.trim(), 10);
12062
12652
  if (!Number.isNaN(pid)) {
12063
12653
  try {
@@ -12416,9 +13006,10 @@ async function executeWorkspaceScript(config, context, failureMode = "fatal") {
12416
13006
  }
12417
13007
 
12418
13008
  // src/evaluation/yaml-parser.ts
12419
- import { readFile as readFile15, stat as stat8 } from "node:fs/promises";
13009
+ import { readFile as readFile16, stat as stat8 } from "node:fs/promises";
12420
13010
  import path43 from "node:path";
12421
13011
  import micromatch2 from "micromatch";
13012
+ import { stringify as stringifyYaml } from "yaml";
12422
13013
 
12423
13014
  // src/evaluation/input-message-utils.ts
12424
13015
  function flattenInputMessages(messages) {
@@ -12485,7 +13076,7 @@ function cloneJsonValue(value) {
12485
13076
  }
12486
13077
 
12487
13078
  // src/evaluation/loaders/agent-skills-parser.ts
12488
- import { readFile as readFile9 } from "node:fs/promises";
13079
+ import { readFile as readFile10 } from "node:fs/promises";
12489
13080
  import path37 from "node:path";
12490
13081
  var ANSI_RED = "\x1B[31m";
12491
13082
  var ANSI_RESET2 = "\x1B[0m";
@@ -12498,7 +13089,7 @@ function isAgentSkillsFormat(parsed) {
12498
13089
  return Array.isArray(obj.evals);
12499
13090
  }
12500
13091
  async function loadTestsFromAgentSkills(filePath) {
12501
- const raw = await readFile9(filePath, "utf8");
13092
+ const raw = await readFile10(filePath, "utf8");
12502
13093
  let parsed;
12503
13094
  try {
12504
13095
  parsed = JSON.parse(raw);
@@ -12565,7 +13156,7 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
12565
13156
  }
12566
13157
 
12567
13158
  // src/evaluation/loaders/config-loader.ts
12568
- import { readFile as readFile10 } from "node:fs/promises";
13159
+ import { readFile as readFile11 } from "node:fs/promises";
12569
13160
  import path39 from "node:path";
12570
13161
 
12571
13162
  // src/evaluation/loaders/file-resolver.ts
@@ -12679,20 +13270,22 @@ var DEFAULT_EVAL_PATTERNS = [
12679
13270
  ];
12680
13271
  async function loadConfig(evalFilePath, repoRoot) {
12681
13272
  const directories = buildDirectoryChain2(evalFilePath, repoRoot);
13273
+ const globalConfigPath = path39.join(getAgentvConfigDir(), "config.yaml");
12682
13274
  for (const directory of directories) {
12683
13275
  const configPath = path39.join(directory, ".agentv", "config.yaml");
12684
13276
  if (!await fileExists3(configPath)) {
12685
13277
  continue;
12686
13278
  }
12687
13279
  const config = await readConfigFile(configPath);
12688
- if (config) return config;
13280
+ if (config) {
13281
+ return config;
13282
+ }
12689
13283
  }
12690
- const globalConfigPath = path39.join(getAgentvConfigDir(), "config.yaml");
12691
13284
  return await fileExists3(globalConfigPath) ? readConfigFile(globalConfigPath) : null;
12692
13285
  }
12693
13286
  async function readConfigFile(configPath) {
12694
13287
  try {
12695
- const rawConfig = await readFile10(configPath, "utf8");
13288
+ const rawConfig = await readFile11(configPath, "utf8");
12696
13289
  const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
12697
13290
  if (!isJsonObject(parsed)) {
12698
13291
  logWarning(`Invalid config.yaml format at ${configPath}`);
@@ -12905,7 +13498,10 @@ function extractCacheConfig(suite) {
12905
13498
  logWarning(`Invalid execution.cache: ${cache}. Must be a boolean. Ignoring.`);
12906
13499
  return void 0;
12907
13500
  }
12908
- const cachePath = executionObj.cache_path ?? executionObj.cachePath;
13501
+ if (executionObj.cachePath !== void 0) {
13502
+ logWarning("Invalid execution.cachePath: use snake_case execution.cache_path in YAML.");
13503
+ }
13504
+ const cachePath = executionObj.cache_path;
12909
13505
  const resolvedCachePath = typeof cachePath === "string" && cachePath.trim().length > 0 ? cachePath.trim() : void 0;
12910
13506
  return { enabled: cache, cachePath: resolvedCachePath };
12911
13507
  }
@@ -13074,6 +13670,12 @@ function parseResultsConfig(raw, configPath) {
13074
13670
  ...branchPrefix && { branch_prefix: branchPrefix }
13075
13671
  };
13076
13672
  }
13673
+ function resolveResultsConfigForProject(config, _projectId) {
13674
+ if (!config) {
13675
+ return void 0;
13676
+ }
13677
+ return config.results;
13678
+ }
13077
13679
  function parseHooksConfig(raw, configPath) {
13078
13680
  if (raw === void 0 || raw === null) {
13079
13681
  return void 0;
@@ -13098,15 +13700,15 @@ function logWarning(message) {
13098
13700
  }
13099
13701
 
13100
13702
  // src/evaluation/loaders/grader-parser.ts
13101
- import { readFile as readFile12 } from "node:fs/promises";
13703
+ import { readFile as readFile13 } from "node:fs/promises";
13102
13704
  import path40 from "node:path";
13103
13705
 
13104
13706
  // src/evaluation/validation/prompt-validator.ts
13105
- import { readFile as readFile11 } from "node:fs/promises";
13707
+ import { readFile as readFile12 } from "node:fs/promises";
13106
13708
  var ANSI_YELLOW3 = "\x1B[33m";
13107
13709
  var ANSI_RESET4 = "\x1B[0m";
13108
13710
  async function validateCustomPromptContent(promptPath) {
13109
- const content = await readFile11(promptPath, "utf8");
13711
+ const content = await readFile12(promptPath, "utf8");
13110
13712
  validateTemplateVariables(content, promptPath);
13111
13713
  }
13112
13714
  function validateTemplateVariables(content, source) {
@@ -13238,7 +13840,7 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
13238
13840
  const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
13239
13841
  throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
13240
13842
  }
13241
- const content = await readFile12(resolved.resolvedPath, "utf8");
13843
+ const content = await readFile13(resolved.resolvedPath, "utf8");
13242
13844
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
13243
13845
  if (!isJsonObject2(parsed)) {
13244
13846
  throw new Error(
@@ -13285,6 +13887,103 @@ async function expandGraderEntries(candidateEvaluators, searchRoots, evalId, inc
13285
13887
  }
13286
13888
  return expanded;
13287
13889
  }
13890
+ async function collectAssertionTemplateSourceReferences(rawEvalCase, globalExecution, searchRoots, evalId) {
13891
+ const execution = rawEvalCase.execution;
13892
+ const executionObject = isJsonObject2(execution) ? execution : void 0;
13893
+ const caseEvaluators = rawEvalCase.assertions ?? rawEvalCase.assert ?? (executionObject ? executionObject.evaluators : void 0) ?? rawEvalCase.evaluators;
13894
+ const skipDefaults = executionObject?.skip_defaults === true;
13895
+ const rootEvaluators = skipDefaults ? void 0 : globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators;
13896
+ return [
13897
+ ...await collectAssertionTemplateReferencesFromValue(caseEvaluators, searchRoots, evalId),
13898
+ ...await collectAssertionTemplateReferencesFromValue(rootEvaluators, searchRoots, evalId)
13899
+ ];
13900
+ }
13901
+ async function collectAssertionTemplateReferencesFromValue(value, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
13902
+ if (value === void 0) {
13903
+ return [];
13904
+ }
13905
+ const references = [];
13906
+ if (Array.isArray(value)) {
13907
+ for (const item of value) {
13908
+ if (isIncludeEntry(item)) {
13909
+ const nextDepth = includeContext.depth + 1;
13910
+ if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
13911
+ const chain = [...includeContext.chain, item.include].join(" -> ");
13912
+ throw new Error(
13913
+ `Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
13914
+ );
13915
+ }
13916
+ const resolved = await resolveAssertionTemplateReference(item.include, searchRoots);
13917
+ references.push({
13918
+ kind: "assertion_template",
13919
+ displayPath: resolved.displayPath,
13920
+ ...resolved.resolvedPath ? { resolvedPath: path40.resolve(resolved.resolvedPath) } : {}
13921
+ });
13922
+ if (resolved.resolvedPath) {
13923
+ if (includeContext.chain.includes(resolved.resolvedPath)) {
13924
+ const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
13925
+ throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
13926
+ }
13927
+ const content = await readFile13(resolved.resolvedPath, "utf8");
13928
+ const parsed = interpolateEnv(parseYamlValue(content), process.env);
13929
+ if (isJsonObject2(parsed) && Array.isArray(parsed.assertions)) {
13930
+ const templateDir = path40.dirname(resolved.resolvedPath);
13931
+ const nestedSearchRoots = [
13932
+ templateDir,
13933
+ ...searchRoots.filter((root) => path40.resolve(root) !== templateDir)
13934
+ ];
13935
+ references.push(
13936
+ ...await collectAssertionTemplateReferencesFromValue(
13937
+ parsed.assertions,
13938
+ nestedSearchRoots,
13939
+ evalId,
13940
+ {
13941
+ depth: nextDepth,
13942
+ chain: [...includeContext.chain, resolved.resolvedPath]
13943
+ }
13944
+ )
13945
+ );
13946
+ }
13947
+ }
13948
+ continue;
13949
+ }
13950
+ if (isJsonObject2(item)) {
13951
+ references.push(
13952
+ ...await collectAssertionTemplateReferencesFromObject(
13953
+ item,
13954
+ searchRoots,
13955
+ evalId,
13956
+ includeContext
13957
+ )
13958
+ );
13959
+ }
13960
+ }
13961
+ } else if (isJsonObject2(value)) {
13962
+ references.push(
13963
+ ...await collectAssertionTemplateReferencesFromObject(
13964
+ value,
13965
+ searchRoots,
13966
+ evalId,
13967
+ includeContext
13968
+ )
13969
+ );
13970
+ }
13971
+ return references;
13972
+ }
13973
+ async function collectAssertionTemplateReferencesFromObject(value, searchRoots, evalId, includeContext) {
13974
+ const references = [];
13975
+ for (const key of ["assertions", "assert", "evaluators"]) {
13976
+ references.push(
13977
+ ...await collectAssertionTemplateReferencesFromValue(
13978
+ value[key],
13979
+ searchRoots,
13980
+ evalId,
13981
+ includeContext
13982
+ )
13983
+ );
13984
+ }
13985
+ return references;
13986
+ }
13288
13987
  async function parseGraderList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
13289
13988
  const expandedEvaluators = await expandGraderEntries(candidateEvaluators, searchRoots, evalId);
13290
13989
  if (!expandedEvaluators) {
@@ -13411,6 +14110,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
13411
14110
  continue;
13412
14111
  }
13413
14112
  const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
14113
+ const resolvedScriptPath = await resolveOptionalCommandSource(command, searchRoots);
13414
14114
  const cwd = asString(rawEvaluator.cwd);
13415
14115
  let resolvedCwd;
13416
14116
  if (cwd) {
@@ -13476,6 +14176,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
13476
14176
  name,
13477
14177
  type: "code-grader",
13478
14178
  command,
14179
+ ...resolvedScriptPath ? { resolvedScriptPath } : {},
13479
14180
  cwd,
13480
14181
  resolvedCwd,
13481
14182
  ...weight2 !== void 0 ? { weight: weight2 } : {},
@@ -14543,6 +15244,17 @@ function asStringArray(value, description) {
14543
15244
  }
14544
15245
  return result;
14545
15246
  }
15247
+ async function resolveOptionalCommandSource(command, searchRoots) {
15248
+ const candidate = command.at(-1);
15249
+ if (!candidate || !looksLikeFilePath(candidate)) {
15250
+ return void 0;
15251
+ }
15252
+ const resolved = await resolveFileReference(candidate, searchRoots);
15253
+ return resolved.resolvedPath ? path40.resolve(resolved.resolvedPath) : void 0;
15254
+ }
15255
+ function looksLikeFilePath(value) {
15256
+ return path40.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
15257
+ }
14546
15258
  function parseCommandToArgv(command) {
14547
15259
  if (process.platform === "win32") {
14548
15260
  return ["cmd.exe", "/c", command];
@@ -14611,6 +15323,19 @@ var VALID_FIELD_AGGREGATION_TYPES = /* @__PURE__ */ new Set(["weighted_average",
14611
15323
  function isValidFieldAggregationType(value) {
14612
15324
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
14613
15325
  }
15326
+ var VALID_RUBRIC_OPERATORS = new Set(RUBRIC_OPERATOR_VALUES);
15327
+ function parseRubricOperator(value, rubricId, evaluatorName, evalId) {
15328
+ if (value === void 0) {
15329
+ return void 0;
15330
+ }
15331
+ if (typeof value === "string" && VALID_RUBRIC_OPERATORS.has(value)) {
15332
+ return value;
15333
+ }
15334
+ logWarning2(
15335
+ `Ignoring invalid operator for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': must be one of ${RUBRIC_OPERATOR_VALUES.join(", ")}`
15336
+ );
15337
+ return void 0;
15338
+ }
14614
15339
  function parseRubricItems(rawRubrics, evaluatorName, evalId) {
14615
15340
  const items = [];
14616
15341
  for (const [index, rawRubric] of rawRubrics.entries()) {
@@ -14621,7 +15346,8 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
14621
15346
  continue;
14622
15347
  }
14623
15348
  const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
14624
- const expectedOutcome = asString(rawRubric.outcome) ?? "";
15349
+ const expectedOutcome = asString(rawRubric.outcome) ?? asString(rawRubric.criteria) ?? "";
15350
+ const operator = parseRubricOperator(rawRubric.operator, id, evaluatorName, evalId);
14625
15351
  const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
14626
15352
  let minScore;
14627
15353
  let requiredMinScore;
@@ -14665,6 +15391,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
14665
15391
  id,
14666
15392
  weight,
14667
15393
  ...expectedOutcome.length > 0 ? { outcome: expectedOutcome } : {},
15394
+ ...operator !== void 0 ? { operator } : {},
14668
15395
  ...required !== void 0 ? { required } : {},
14669
15396
  ...minScore !== void 0 ? { min_score: minScore } : {},
14670
15397
  ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
@@ -14680,6 +15407,7 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
14680
15407
  items.push({
14681
15408
  id,
14682
15409
  outcome: expectedOutcome,
15410
+ ...operator !== void 0 ? { operator } : {},
14683
15411
  weight,
14684
15412
  // Default to required: true if not specified (backward compatibility)
14685
15413
  required: required ?? true,
@@ -14802,6 +15530,8 @@ function parseInlineRubrics(rawRubrics) {
14802
15530
  };
14803
15531
  }
14804
15532
  const expectedOutcome = asString(rubric.outcome) ?? "";
15533
+ const id = asString(rubric.id) ?? `rubric-${index + 1}`;
15534
+ const operator = parseRubricOperator(rubric.operator, id, "rubrics", "<inline>");
14805
15535
  const rawScoreRanges = rubric.score_ranges;
14806
15536
  const normalizedScoreRanges = rawScoreRanges !== void 0 ? normalizeScoreRangesShorthand(rawScoreRanges) : void 0;
14807
15537
  const scoreRanges = Array.isArray(normalizedScoreRanges) && normalizedScoreRanges.length > 0 ? normalizedScoreRanges.filter((r) => isJsonObject2(r)).map((range) => ({
@@ -14809,7 +15539,8 @@ function parseInlineRubrics(rawRubrics) {
14809
15539
  outcome: asString(range.outcome) ?? ""
14810
15540
  })).filter((r) => r.outcome.length > 0) : void 0;
14811
15541
  const baseRubric = {
14812
- id: asString(rubric.id) ?? `rubric-${index + 1}`,
15542
+ id,
15543
+ ...operator !== void 0 ? { operator } : {},
14813
15544
  weight: typeof rubric.weight === "number" ? rubric.weight : 1
14814
15545
  };
14815
15546
  let inlineMinScore;
@@ -14850,12 +15581,12 @@ function parseInlineRubrics(rawRubrics) {
14850
15581
  }
14851
15582
 
14852
15583
  // src/evaluation/loaders/jsonl-parser.ts
14853
- import { readFile as readFile14 } from "node:fs/promises";
15584
+ import { readFile as readFile15 } from "node:fs/promises";
14854
15585
  import path42 from "node:path";
14855
15586
  import micromatch from "micromatch";
14856
15587
 
14857
15588
  // src/evaluation/loaders/message-processor.ts
14858
- import { readFile as readFile13 } from "node:fs/promises";
15589
+ import { readFile as readFile14 } from "node:fs/promises";
14859
15590
  import path41 from "node:path";
14860
15591
 
14861
15592
  // src/evaluation/formatting/segment-formatter.ts
@@ -14982,7 +15713,7 @@ async function processMessages(options) {
14982
15713
  continue;
14983
15714
  }
14984
15715
  try {
14985
- const fileContent = (await readFile13(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
15716
+ const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
14986
15717
  processedContent.push({
14987
15718
  ...cloneJsonObject(rawSegment),
14988
15719
  path: displayPath,
@@ -15023,7 +15754,7 @@ async function processMessages(options) {
15023
15754
  continue;
15024
15755
  }
15025
15756
  try {
15026
- const imageBuffer = await readFile13(resolvedPath);
15757
+ const imageBuffer = await readFile14(resolvedPath);
15027
15758
  const base64 = imageBuffer.toString("base64");
15028
15759
  processedContent.push({
15029
15760
  type: "image",
@@ -15106,7 +15837,7 @@ async function processExpectedMessages(options) {
15106
15837
  continue;
15107
15838
  }
15108
15839
  try {
15109
- const fileContent = (await readFile13(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
15840
+ const fileContent = (await readFile14(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
15110
15841
  processedContent.push({
15111
15842
  type: "file",
15112
15843
  path: displayPath,
@@ -15146,7 +15877,7 @@ async function processExpectedMessages(options) {
15146
15877
  continue;
15147
15878
  }
15148
15879
  try {
15149
- const imageBuffer = await readFile13(resolvedPath);
15880
+ const imageBuffer = await readFile14(resolvedPath);
15150
15881
  const base64 = imageBuffer.toString("base64");
15151
15882
  processedContent.push({
15152
15883
  type: "image",
@@ -15188,6 +15919,12 @@ function expandInputShorthand(value) {
15188
15919
  if (typeof value === "string") {
15189
15920
  return [{ role: "user", content: value }];
15190
15921
  }
15922
+ if (isJsonObject(value)) {
15923
+ if ("role" in value) {
15924
+ return isTestMessage(value) ? [value] : void 0;
15925
+ }
15926
+ return [{ role: "user", content: value }];
15927
+ }
15191
15928
  if (Array.isArray(value)) {
15192
15929
  const messages = value.filter((msg) => isTestMessage(msg));
15193
15930
  return messages.length > 0 ? messages : void 0;
@@ -15275,7 +16012,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
15275
16012
  return {};
15276
16013
  }
15277
16014
  try {
15278
- const content = await readFile14(sidecarPath, "utf8");
16015
+ const content = await readFile15(sidecarPath, "utf8");
15279
16016
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
15280
16017
  if (!isJsonObject(parsed)) {
15281
16018
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
@@ -15320,7 +16057,7 @@ async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
15320
16057
  const repoRootPath = resolveToAbsolutePath(repoRoot);
15321
16058
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
15322
16059
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
15323
- const rawFile = await readFile14(absoluteTestPath, "utf8");
16060
+ const rawFile = await readFile15(absoluteTestPath, "utf8");
15324
16061
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
15325
16062
  const fallbackSuiteName = path42.basename(absoluteTestPath, ".jsonl") || "eval";
15326
16063
  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
@@ -15457,16 +16194,16 @@ ${detailBlock}${ANSI_RESET7}`);
15457
16194
  }
15458
16195
 
15459
16196
  // src/evaluation/metadata.ts
15460
- import { z as z3 } from "zod";
15461
- var MetadataSchema = z3.object({
15462
- name: z3.string().min(1).max(64).regex(/^[a-z0-9-]+$/).optional(),
15463
- description: z3.string().min(1).max(1024).optional(),
15464
- version: z3.string().optional(),
15465
- author: z3.string().optional(),
15466
- tags: z3.array(z3.string()).optional(),
15467
- license: z3.string().optional(),
15468
- requires: z3.object({
15469
- agentv: z3.string().optional()
16197
+ import { z as z4 } from "zod";
16198
+ var MetadataSchema = z4.object({
16199
+ name: z4.string().min(1).max(64).regex(/^[a-z0-9-]+$/).optional(),
16200
+ description: z4.string().min(1).max(1024).optional(),
16201
+ version: z4.string().optional(),
16202
+ author: z4.string().optional(),
16203
+ tags: z4.array(z4.string()).optional(),
16204
+ license: z4.string().optional(),
16205
+ requires: z4.object({
16206
+ agentv: z4.string().optional()
15470
16207
  }).optional()
15471
16208
  });
15472
16209
  function parseMetadata(suite) {
@@ -15738,7 +16475,7 @@ function interpolateRawEvalCase(raw, vars) {
15738
16475
  async function readTestSuiteMetadata(testFilePath) {
15739
16476
  try {
15740
16477
  const absolutePath = path43.resolve(testFilePath);
15741
- const content = await readFile15(absolutePath, "utf8");
16478
+ const content = await readFile16(absolutePath, "utf8");
15742
16479
  const parsed = interpolateEnv(parseYamlValue(content), process.env);
15743
16480
  if (!isJsonObject(parsed)) {
15744
16481
  return {};
@@ -15762,7 +16499,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
15762
16499
  return { tests: await loadTestsFromAgentSkills(evalFilePath) };
15763
16500
  }
15764
16501
  if (format === "typescript") {
15765
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-Z6IUSDNA.js");
16502
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT.js");
15766
16503
  return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
15767
16504
  }
15768
16505
  const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
@@ -15797,7 +16534,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
15797
16534
  return loadTestsFromAgentSkills(evalFilePath);
15798
16535
  }
15799
16536
  if (format === "typescript") {
15800
- const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-Z6IUSDNA.js");
16537
+ const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-EQJX3OLT.js");
15801
16538
  const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
15802
16539
  return suite.tests;
15803
16540
  }
@@ -15812,8 +16549,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15812
16549
  const repoRootPath = resolveToAbsolutePath(repoRoot);
15813
16550
  const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
15814
16551
  const config = await loadConfig(absoluteTestPath, repoRootPath);
15815
- const rawFile = await readFile15(absoluteTestPath, "utf8");
15816
- const interpolated = interpolateEnv(parseYamlValue(rawFile), process.env);
16552
+ const rawFile = await readFile16(absoluteTestPath, "utf8");
16553
+ const rawParsed = parseYamlValue(rawFile);
16554
+ const rawCaseSnapshots = buildRawInlineTestSnapshots(rawParsed);
16555
+ const interpolated = interpolateEnv(rawParsed, process.env);
15817
16556
  if (!isJsonObject(interpolated)) {
15818
16557
  throw new Error(`Invalid test file format: ${evalFilePath}`);
15819
16558
  }
@@ -15850,7 +16589,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15850
16589
  throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
15851
16590
  }
15852
16591
  const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
15853
- const suiteGovernance = extractSuiteGovernance(suite);
16592
+ const suiteMetadataPayload = extractSuiteMetadataPayload(suite);
15854
16593
  const rawSuiteInput = suite.input;
15855
16594
  const rawSuiteInputFiles = suite.input_files;
15856
16595
  const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
@@ -15952,6 +16691,12 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15952
16691
  logError3(`Skipping test '${id}': ${message}`);
15953
16692
  continue;
15954
16693
  }
16694
+ const assertionTemplateReferences = await collectAssertionTemplateSourceReferences(
16695
+ renderedCase,
16696
+ globalExecution,
16697
+ searchRoots,
16698
+ id ?? "unknown"
16699
+ );
15955
16700
  const inlineRubrics = renderedCase.rubrics;
15956
16701
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
15957
16702
  const rubricEvaluator = parseInlineRubrics(inlineRubrics);
@@ -15964,8 +16709,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
15964
16709
  const caseWorkspace = await resolveWorkspaceConfig(renderedCase.workspace, evalFileDir);
15965
16710
  const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
15966
16711
  const rawCaseMetadata = isJsonObject(renderedCase.metadata) ? renderedCase.metadata : void 0;
15967
- const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
15968
- const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
16712
+ const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suiteMetadataPayload);
15969
16713
  const caseTargets = extractTargetsFromTestCase(renderedCase);
15970
16714
  const dependsOn = Array.isArray(renderedCase.depends_on) ? renderedCase.depends_on.filter(
15971
16715
  (v) => typeof v === "string"
@@ -16004,12 +16748,245 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
16004
16748
  ...onTurnFailure ? { on_turn_failure: onTurnFailure } : {},
16005
16749
  ...windowSize !== void 0 ? { window_size: windowSize } : {},
16006
16750
  ...dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {},
16007
- ...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}
16751
+ ...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {},
16752
+ source: buildEvalTestSource({
16753
+ evalFilePath,
16754
+ absoluteTestPath,
16755
+ repoRootPath,
16756
+ id,
16757
+ renderedCase,
16758
+ rawCaseSnapshots,
16759
+ inputMessages,
16760
+ evaluators,
16761
+ assertionTemplateReferences
16762
+ })
16008
16763
  };
16009
16764
  results.push(testCase);
16010
16765
  }
16011
16766
  return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
16012
16767
  }
16768
+ var SOURCE_SECRET_KEY_PATTERN = /(api[_-]?key|authorization|bearer|credential|password|private[_-]?key|secret|token)/i;
16769
+ var REDACTED_SOURCE_VALUE = "[redacted]";
16770
+ function buildRawInlineTestSnapshots(rawParsed) {
16771
+ const snapshots = /* @__PURE__ */ new Map();
16772
+ if (!isJsonObject(rawParsed)) {
16773
+ return snapshots;
16774
+ }
16775
+ const rawTests = rawParsed.tests ?? rawParsed.eval_cases ?? rawParsed.evalcases;
16776
+ if (!Array.isArray(rawTests)) {
16777
+ return snapshots;
16778
+ }
16779
+ for (const rawTest of rawTests) {
16780
+ if (!isJsonObject(rawTest) || typeof rawTest.id !== "string") {
16781
+ continue;
16782
+ }
16783
+ snapshots.set(rawTest.id, stringifySourceYaml(rawTest));
16784
+ }
16785
+ return snapshots;
16786
+ }
16787
+ function buildEvalTestSource(params) {
16788
+ const evalFileRepoPath = toPortableRelativePath(params.repoRootPath, params.absoluteTestPath);
16789
+ const testSnapshotYaml = params.rawCaseSnapshots.get(params.id) ?? stringifySourceYaml(params.renderedCase);
16790
+ const evaluatorReferences = collectGraderSourceReferences(params.evaluators);
16791
+ const inputReferences = collectInputSourceReferences(params.inputMessages);
16792
+ const references = dedupeSourceReferences([
16793
+ ...inputReferences,
16794
+ ...evaluatorReferences,
16795
+ ...params.assertionTemplateReferences
16796
+ ]);
16797
+ return {
16798
+ evalFilePath: params.evalFilePath,
16799
+ evalFileAbsolutePath: params.absoluteTestPath,
16800
+ ...evalFileRepoPath ? { evalFileRepoPath } : {},
16801
+ testId: params.id,
16802
+ testSnapshotYaml,
16803
+ graderDefinitions: buildGraderSourceDefinitions(params.evaluators),
16804
+ references
16805
+ };
16806
+ }
16807
+ function stringifySourceYaml(value) {
16808
+ return stringifyYaml(sanitizeSourceValue(value), { lineWidth: 0 }).trimEnd();
16809
+ }
16810
+ function sanitizeSourceValue(value, keyHint) {
16811
+ if (keyHint && SOURCE_SECRET_KEY_PATTERN.test(keyHint)) {
16812
+ return REDACTED_SOURCE_VALUE;
16813
+ }
16814
+ if (value === null || typeof value === "string" || typeof value === "number") {
16815
+ return value;
16816
+ }
16817
+ if (typeof value === "boolean") {
16818
+ return value;
16819
+ }
16820
+ if (Array.isArray(value)) {
16821
+ return value.map((item) => sanitizeSourceValue(item));
16822
+ }
16823
+ if (typeof value === "object" && value !== null) {
16824
+ const entries = Object.entries(value).map(([key, entryValue]) => [
16825
+ key,
16826
+ sanitizeSourceValue(entryValue, key)
16827
+ ]);
16828
+ return Object.fromEntries(entries);
16829
+ }
16830
+ return String(value);
16831
+ }
16832
+ function buildGraderSourceDefinitions(evaluators) {
16833
+ return (evaluators ?? []).map((evaluator) => ({
16834
+ name: evaluator.name,
16835
+ type: evaluator.type,
16836
+ ...evaluator.weight !== void 0 ? { weight: evaluator.weight } : {},
16837
+ ...evaluator.required !== void 0 ? { required: evaluator.required } : {},
16838
+ ..."min_score" in evaluator && evaluator.min_score !== void 0 ? { minScore: evaluator.min_score } : {},
16839
+ definition: sanitizeGraderDefinition(evaluator)
16840
+ }));
16841
+ }
16842
+ function sanitizeGraderDefinition(evaluator) {
16843
+ const copy = sanitizeSourceValue(evaluator);
16844
+ return stripRuntimeResolutionFields(copy);
16845
+ }
16846
+ function stripRuntimeResolutionFields(value) {
16847
+ const stripped = {};
16848
+ for (const [key, entryValue] of Object.entries(value)) {
16849
+ if (key === "resolvedPromptPath" || key === "promptPath" || key === "resolvedPromptScript" || key === "resolvedScriptPath" || key === "resolvedCwd" || key === "resolvedCommand") {
16850
+ continue;
16851
+ }
16852
+ if (Array.isArray(entryValue)) {
16853
+ stripped[key] = entryValue.map(
16854
+ (item) => isJsonObject(item) ? stripRuntimeResolutionFields(item) : item
16855
+ );
16856
+ } else if (isJsonObject(entryValue)) {
16857
+ stripped[key] = stripRuntimeResolutionFields(entryValue);
16858
+ } else {
16859
+ stripped[key] = entryValue;
16860
+ }
16861
+ }
16862
+ return stripped;
16863
+ }
16864
+ function collectInputSourceReferences(inputMessages) {
16865
+ const references = [];
16866
+ for (const message of inputMessages) {
16867
+ if (!Array.isArray(message.content)) {
16868
+ continue;
16869
+ }
16870
+ for (const segment of message.content) {
16871
+ if (!isJsonObject(segment) || segment.type !== "file") {
16872
+ continue;
16873
+ }
16874
+ const displayPath = typeof segment.path === "string" ? segment.path : typeof segment.value === "string" ? segment.value : "input file";
16875
+ references.push({
16876
+ kind: "input_file",
16877
+ displayPath,
16878
+ ...typeof segment.resolvedPath === "string" ? { resolvedPath: path43.resolve(segment.resolvedPath) } : {}
16879
+ });
16880
+ }
16881
+ }
16882
+ return references;
16883
+ }
16884
+ function collectGraderSourceReferences(evaluators) {
16885
+ const references = [];
16886
+ for (const evaluator of evaluators ?? []) {
16887
+ references.push(...collectSingleGraderSourceReferences(evaluator));
16888
+ }
16889
+ return references;
16890
+ }
16891
+ function collectSingleGraderSourceReferences(evaluator) {
16892
+ const references = [];
16893
+ if (evaluator.type === "code-grader") {
16894
+ const command = evaluator.command ?? evaluator.script ?? [];
16895
+ references.push({
16896
+ kind: "code_grader_command",
16897
+ displayPath: evaluator.resolvedScriptPath ?? command.join(" "),
16898
+ ...evaluator.resolvedScriptPath ? { resolvedPath: evaluator.resolvedScriptPath } : {},
16899
+ graderName: evaluator.name,
16900
+ command
16901
+ });
16902
+ if (evaluator.resolvedCwd) {
16903
+ references.push({
16904
+ kind: "code_grader_cwd",
16905
+ displayPath: evaluator.cwd ?? evaluator.resolvedCwd,
16906
+ resolvedPath: evaluator.resolvedCwd,
16907
+ graderName: evaluator.name
16908
+ });
16909
+ }
16910
+ }
16911
+ if (evaluator.type === "llm-grader") {
16912
+ const promptPath = evaluator.resolvedPromptPath ?? evaluator.promptPath;
16913
+ if (promptPath) {
16914
+ references.push({
16915
+ kind: "llm_grader_prompt",
16916
+ displayPath: typeof evaluator.prompt === "string" ? evaluator.prompt : promptPath,
16917
+ resolvedPath: promptPath,
16918
+ graderName: evaluator.name
16919
+ });
16920
+ }
16921
+ if (evaluator.resolvedPromptScript && evaluator.resolvedPromptScript.length > 0) {
16922
+ references.push({
16923
+ kind: "prompt_script",
16924
+ displayPath: evaluator.resolvedPromptScript.at(-1) ?? evaluator.name,
16925
+ resolvedPath: evaluator.resolvedPromptScript.at(-1),
16926
+ graderName: evaluator.name,
16927
+ command: evaluator.resolvedPromptScript
16928
+ });
16929
+ }
16930
+ }
16931
+ const preprocessors = "preprocessors" in evaluator ? evaluator.preprocessors : void 0;
16932
+ for (const preprocessor of preprocessors ?? []) {
16933
+ if (preprocessor.resolvedCommand && preprocessor.resolvedCommand.length > 0) {
16934
+ references.push({
16935
+ kind: "preprocessor_command",
16936
+ displayPath: preprocessor.resolvedCommand.at(-1) ?? preprocessor.type,
16937
+ resolvedPath: preprocessor.resolvedCommand.at(-1),
16938
+ graderName: evaluator.name,
16939
+ command: preprocessor.resolvedCommand
16940
+ });
16941
+ }
16942
+ }
16943
+ if (evaluator.type === "composite") {
16944
+ for (const member of evaluator.assertions) {
16945
+ references.push(...collectSingleGraderSourceReferences(member));
16946
+ }
16947
+ if (evaluator.aggregator.type === "code-grader") {
16948
+ references.push({
16949
+ kind: "code_grader_command",
16950
+ displayPath: evaluator.aggregator.path,
16951
+ resolvedPath: path43.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
16952
+ graderName: evaluator.name
16953
+ });
16954
+ } else if (evaluator.aggregator.type === "llm-grader" && evaluator.aggregator.promptPath) {
16955
+ references.push({
16956
+ kind: "llm_grader_prompt",
16957
+ displayPath: evaluator.aggregator.prompt ?? evaluator.aggregator.promptPath,
16958
+ resolvedPath: evaluator.aggregator.promptPath,
16959
+ graderName: evaluator.name
16960
+ });
16961
+ }
16962
+ }
16963
+ return references;
16964
+ }
16965
+ function dedupeSourceReferences(references) {
16966
+ const seen = /* @__PURE__ */ new Set();
16967
+ const deduped = [];
16968
+ for (const reference of references) {
16969
+ const key = JSON.stringify([
16970
+ reference.kind,
16971
+ reference.resolvedPath ?? reference.displayPath,
16972
+ reference.graderName ?? "",
16973
+ reference.command?.join("\0") ?? ""
16974
+ ]);
16975
+ if (seen.has(key)) {
16976
+ continue;
16977
+ }
16978
+ seen.add(key);
16979
+ deduped.push(reference);
16980
+ }
16981
+ return deduped;
16982
+ }
16983
+ function toPortableRelativePath(root, candidate) {
16984
+ const relative = path43.relative(root, candidate);
16985
+ if (relative && !relative.startsWith("..") && !path43.isAbsolute(relative)) {
16986
+ return relative.split(path43.sep).join("/");
16987
+ }
16988
+ return void 0;
16989
+ }
16013
16990
  async function loadTestById(evalFilePath, repoRoot, evalId) {
16014
16991
  const tests = await loadTests(evalFilePath, repoRoot);
16015
16992
  const match = tests.find((c) => c.id === evalId);
@@ -16102,7 +17079,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
16102
17079
  const workspaceFilePath = path43.resolve(evalFileDir, raw);
16103
17080
  let content;
16104
17081
  try {
16105
- content = await readFile15(workspaceFilePath, "utf8");
17082
+ content = await readFile16(workspaceFilePath, "utf8");
16106
17083
  } catch {
16107
17084
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
16108
17085
  }
@@ -16226,19 +17203,18 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
16226
17203
  function asString5(value) {
16227
17204
  return typeof value === "string" ? value : void 0;
16228
17205
  }
16229
- function extractSuiteGovernance(suite) {
17206
+ function extractSuiteMetadataPayload(suite) {
17207
+ const payload = isJsonObject(suite.metadata) ? { ...suite.metadata } : {};
16230
17208
  const top = suite.governance;
16231
17209
  if (isJsonObject(top)) {
16232
- return top;
16233
- }
16234
- const wrapper = suite.metadata;
16235
- if (isJsonObject(wrapper)) {
16236
- const nested = wrapper.governance;
17210
+ payload.governance = top;
17211
+ } else {
17212
+ const nested = payload.governance;
16237
17213
  if (isJsonObject(nested)) {
16238
- return nested;
17214
+ payload.governance = nested;
16239
17215
  }
16240
17216
  }
16241
- return void 0;
17217
+ return Object.keys(payload).length > 0 ? payload : void 0;
16242
17218
  }
16243
17219
  function mergeSuiteMetadataPayload(caseMetadata, suitePayload) {
16244
17220
  if (!suitePayload) return caseMetadata;
@@ -16729,7 +17705,7 @@ async function runEvaluation(options) {
16729
17705
  const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
16730
17706
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
16731
17707
  if (!dirExists) {
16732
- await mkdir14(configuredStaticPath, { recursive: true });
17708
+ await mkdir15(configuredStaticPath, { recursive: true });
16733
17709
  }
16734
17710
  if (workspaceTemplate) {
16735
17711
  await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -16774,7 +17750,7 @@ async function runEvaluation(options) {
16774
17750
  }
16775
17751
  } else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
16776
17752
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
16777
- await mkdir14(sharedWorkspacePath, { recursive: true });
17753
+ await mkdir15(sharedWorkspacePath, { recursive: true });
16778
17754
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
16779
17755
  }
16780
17756
  try {
@@ -17624,7 +18600,7 @@ async function runEvalCase(options) {
17624
18600
  }
17625
18601
  if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
17626
18602
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
17627
- await mkdir14(workspacePath, { recursive: true });
18603
+ await mkdir15(workspacePath, { recursive: true });
17628
18604
  }
17629
18605
  if (evalCase.workspace?.repos?.length && workspacePath) {
17630
18606
  const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -17679,7 +18655,7 @@ async function runEvalCase(options) {
17679
18655
  const srcPath = path44.resolve(baseDir, relPath);
17680
18656
  const destPath = path44.resolve(workspacePath, relPath);
17681
18657
  try {
17682
- await mkdir14(path44.dirname(destPath), { recursive: true });
18658
+ await mkdir15(path44.dirname(destPath), { recursive: true });
17683
18659
  await copyFile2(srcPath, destPath);
17684
18660
  } catch (error) {
17685
18661
  const message = error instanceof Error ? error.message : String(error);
@@ -19247,6 +20223,12 @@ async function evaluate(config) {
19247
20223
  resolvedTarget = resolveTargetDefinition(targetDef);
19248
20224
  }
19249
20225
  const collectedResults = [];
20226
+ const cacheEnabled = shouldEnableCache({
20227
+ cliCache: config.cache === true,
20228
+ cliNoCache: false,
20229
+ yamlCache: config.cache === void 0 ? materialized.cache : void 0
20230
+ });
20231
+ const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path45.resolve(materialized.cachePath) : void 0) : void 0;
19250
20232
  const results = await runEvaluation({
19251
20233
  testFilePath,
19252
20234
  repoRoot,
@@ -19259,6 +20241,8 @@ async function evaluate(config) {
19259
20241
  filter: config.filter,
19260
20242
  threshold: config.threshold,
19261
20243
  evalCases: materialized.tests,
20244
+ cache,
20245
+ useCache: !!cache && !shouldSkipCacheForTemperature(resolvedTarget.config),
19262
20246
  ...materialized.budgetUsd !== void 0 && { budgetUsd: materialized.budgetUsd },
19263
20247
  onResult: async (result) => {
19264
20248
  collectedResults.push(result);
@@ -19289,6 +20273,7 @@ async function materializeEvalConfig(config, options) {
19289
20273
  tests: tests2,
19290
20274
  workers: config.workers ?? suite.workers,
19291
20275
  cache: config.cache ?? suite.cacheConfig?.enabled,
20276
+ cachePath: config.cachePath ?? suite.cacheConfig?.cachePath,
19292
20277
  budgetUsd: config.budgetUsd ?? suite.budgetUsd,
19293
20278
  threshold: config.threshold ?? suite.threshold,
19294
20279
  metadata: config.metadata ?? suite.metadata,
@@ -19307,6 +20292,7 @@ async function materializeEvalConfig(config, options) {
19307
20292
  tests,
19308
20293
  workers: config.workers,
19309
20294
  cache: config.cache,
20295
+ cachePath: config.cachePath,
19310
20296
  budgetUsd: config.budgetUsd,
19311
20297
  threshold: config.threshold,
19312
20298
  metadata: config.metadata,
@@ -19424,9 +20410,11 @@ function mapAssertionType(type) {
19424
20410
  }
19425
20411
  function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
19426
20412
  const total = results.length;
20413
+ const qualityResults = results.filter((r) => r.executionStatus !== "execution_error");
20414
+ const executionErrors = total - qualityResults.length;
19427
20415
  let passed = 0;
19428
20416
  let scoreSum = 0;
19429
- for (const r of results) {
20417
+ for (const r of qualityResults) {
19430
20418
  scoreSum += r.score;
19431
20419
  if (r.score >= threshold) {
19432
20420
  passed++;
@@ -19435,9 +20423,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
19435
20423
  return {
19436
20424
  total,
19437
20425
  passed,
19438
- failed: total - passed,
20426
+ failed: qualityResults.length - passed,
20427
+ executionErrors,
19439
20428
  durationMs,
19440
- meanScore: total > 0 ? scoreSum / total : 0
20429
+ meanScore: qualityResults.length > 0 ? scoreSum / qualityResults.length : 0
19441
20430
  };
19442
20431
  }
19443
20432
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
@@ -19520,7 +20509,12 @@ async function loadTsEvalSuite(filePath, repoRoot, options) {
19520
20509
  return {
19521
20510
  tests: materialized.tests,
19522
20511
  ...materialized.workers !== void 0 && { workers: materialized.workers },
19523
- ...materialized.cache !== void 0 && { cacheConfig: { enabled: materialized.cache } },
20512
+ ...materialized.cache !== void 0 && {
20513
+ cacheConfig: {
20514
+ enabled: materialized.cache,
20515
+ ...materialized.cachePath !== void 0 && { cachePath: materialized.cachePath }
20516
+ }
20517
+ },
19524
20518
  ...materialized.budgetUsd !== void 0 && { budgetUsd: materialized.budgetUsd },
19525
20519
  ...materialized.threshold !== void 0 && { threshold: materialized.threshold },
19526
20520
  ...materialized.metadata !== void 0 && { metadata: materialized.metadata },
@@ -19543,7 +20537,28 @@ function isEvalConfigLike(value) {
19543
20537
  }
19544
20538
 
19545
20539
  export {
20540
+ NORMALIZED_TRAJECTORY_SCHEMA_VERSION,
20541
+ NORMALIZED_TRACE_SOURCE_KINDS,
20542
+ NORMALIZED_TRACE_EVENT_TYPES,
20543
+ NORMALIZED_TOOL_STATUSES,
20544
+ NORMALIZED_REDACTION_LEVELS,
20545
+ NormalizedRedactionStateWireSchema,
20546
+ NormalizedTraceErrorWireSchema,
20547
+ NormalizedTraceSourceWireSchema,
20548
+ NormalizedTraceSessionWireSchema,
20549
+ NormalizedTraceBranchWireSchema,
20550
+ NormalizedTraceSourceRefWireSchema,
20551
+ NormalizedRawEvidenceWireSchema,
20552
+ NormalizedTraceMessageWireSchema,
20553
+ NormalizedTraceModelWireSchema,
20554
+ NormalizedTraceToolWireSchema,
20555
+ NormalizedTraceEventWireSchema,
20556
+ NormalizedTrajectoryWireSchema,
20557
+ toNormalizedTrajectoryWire,
20558
+ fromNormalizedTrajectoryWire,
19546
20559
  computeTraceSummary,
20560
+ getSelectedTrajectoryEvents,
20561
+ computeTraceSummaryFromTrajectory,
19547
20562
  DEFAULT_EXPLORATION_TOOLS,
19548
20563
  explorationRatio,
19549
20564
  tokensPerTool,
@@ -19551,13 +20566,6 @@ export {
19551
20566
  mergeExecutionMetrics,
19552
20567
  isAgentSkillsFormat,
19553
20568
  parseAgentSkillsEvals,
19554
- getAgentvConfigDir,
19555
- getAgentvHome,
19556
- getAgentvDataDir,
19557
- getWorkspacesRoot,
19558
- getSubagentsRoot,
19559
- getTraceStateRoot,
19560
- getWorkspacePoolRoot,
19561
20569
  DEFAULT_EVAL_PATTERNS,
19562
20570
  loadConfig,
19563
20571
  extractTargetFromSuite,
@@ -19569,11 +20577,15 @@ export {
19569
20577
  extractCacheConfig,
19570
20578
  extractFailOnError,
19571
20579
  extractThreshold,
20580
+ resolveResultsConfigForProject,
19572
20581
  detectFormat,
19573
20582
  parseRepoSource,
19574
20583
  parseRepoCheckout,
19575
20584
  parseRepoClone,
19576
20585
  buildPromptInputs,
20586
+ ResponseCache,
20587
+ shouldEnableCache,
20588
+ shouldSkipCacheForTemperature,
19577
20589
  DEFAULT_THRESHOLD,
19578
20590
  PASS_THRESHOLD,
19579
20591
  scoreToVerdict,
@@ -19676,4 +20688,4 @@ export {
19676
20688
  loadTestById,
19677
20689
  loadEvalCaseById
19678
20690
  };
19679
- //# sourceMappingURL=chunk-N5EU446L.js.map
20691
+ //# sourceMappingURL=chunk-7QB53OPK.js.map