@agentv/core 4.9.1 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -25,10 +25,17 @@ import {
25
25
  resolveDelegatedTargetDefinition,
26
26
  resolveFileReference,
27
27
  resolveTargetDefinition
28
- } from "./chunk-VCVVKCC4.js";
28
+ } from "./chunk-5POFMJJ7.js";
29
+ import {
30
+ execFileWithStdin,
31
+ execShellWithStdin
32
+ } from "./chunk-3WGHC7LC.js";
29
33
  import {
30
34
  AgentvProvider
31
35
  } from "./chunk-PRNXHNLF.js";
36
+ import {
37
+ DockerWorkspaceProvider
38
+ } from "./chunk-SDIANPEY.js";
32
39
  import {
33
40
  OtlpJsonFileExporter
34
41
  } from "./chunk-KPSI5CSL.js";
@@ -152,10 +159,10 @@ function mergeExecutionMetrics(computed, metrics) {
152
159
  }
153
160
 
154
161
  // src/evaluation/yaml-parser.ts
155
- import { readFile as readFile6 } from "node:fs/promises";
156
- import path7 from "node:path";
162
+ import { readFile as readFile8 } from "node:fs/promises";
163
+ import path8 from "node:path";
157
164
  import micromatch2 from "micromatch";
158
- import { parse as parse2 } from "yaml";
165
+ import { parse as parse3 } from "yaml";
159
166
 
160
167
  // src/evaluation/input-message-utils.ts
161
168
  function flattenInputMessages(messages) {
@@ -441,10 +448,12 @@ async function loadConfig(evalFilePath, repoRoot) {
441
448
  parsed.execution,
442
449
  configPath
443
450
  );
451
+ const results = parseResultsConfig(parsed.results, configPath);
444
452
  return {
445
453
  required_version: requiredVersion,
446
454
  eval_patterns: evalPatterns,
447
- execution: executionDefaults
455
+ execution: executionDefaults,
456
+ results
448
457
  };
449
458
  } catch (error) {
450
459
  logWarning(
@@ -679,15 +688,234 @@ function parseExecutionDefaults(raw, configPath) {
679
688
  }
680
689
  return Object.keys(result).length > 0 ? result : void 0;
681
690
  }
691
+ function parseResultsConfig(raw, configPath) {
692
+ if (raw === void 0 || raw === null) {
693
+ return void 0;
694
+ }
695
+ if (typeof raw !== "object" || Array.isArray(raw)) {
696
+ logWarning(`Invalid results in ${configPath}, expected object`);
697
+ return void 0;
698
+ }
699
+ const obj = raw;
700
+ const exportConfig = parseResultsExportConfig(obj.export, configPath);
701
+ if (!exportConfig) {
702
+ return void 0;
703
+ }
704
+ return { export: exportConfig };
705
+ }
706
+ function parseResultsExportConfig(raw, configPath) {
707
+ if (raw === void 0 || raw === null) {
708
+ return void 0;
709
+ }
710
+ if (typeof raw !== "object" || Array.isArray(raw)) {
711
+ logWarning(`Invalid results.export in ${configPath}, expected object`);
712
+ return void 0;
713
+ }
714
+ const obj = raw;
715
+ const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
716
+ const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
717
+ if (!repo) {
718
+ logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
719
+ return void 0;
720
+ }
721
+ if (!exportPath) {
722
+ logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
723
+ return void 0;
724
+ }
725
+ if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
726
+ logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
727
+ return void 0;
728
+ }
729
+ let branchPrefix;
730
+ if (obj.branch_prefix !== void 0) {
731
+ if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
732
+ logWarning(
733
+ `Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
734
+ );
735
+ return void 0;
736
+ }
737
+ branchPrefix = obj.branch_prefix.trim();
738
+ }
739
+ return {
740
+ repo,
741
+ path: exportPath,
742
+ ...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
743
+ ...branchPrefix && { branch_prefix: branchPrefix }
744
+ };
745
+ }
682
746
  function logWarning(message) {
683
747
  console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
684
748
  }
685
749
 
686
750
  // src/evaluation/loaders/evaluator-parser.ts
751
+ import { readFile as readFile5 } from "node:fs/promises";
752
+ import path5 from "node:path";
753
+ import { parse as parse2 } from "yaml";
754
+
755
+ // src/evaluation/content-preprocessor.ts
756
+ import { readFile as readFile3 } from "node:fs/promises";
687
757
  import path4 from "node:path";
758
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
759
+ var MIME_TYPE_ALIASES = {
760
+ csv: "text/csv",
761
+ docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
762
+ htm: "text/html",
763
+ html: "text/html",
764
+ json: "application/json",
765
+ markdown: "text/markdown",
766
+ md: "text/markdown",
767
+ pdf: "application/pdf",
768
+ sql: "application/sql",
769
+ txt: "text/plain",
770
+ xhtml: "application/xhtml+xml",
771
+ xls: "application/vnd.ms-excel",
772
+ xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
773
+ xml: "application/xml",
774
+ yaml: "application/yaml",
775
+ yml: "application/yaml"
776
+ };
777
+ var REPLACEMENT_CHAR = "\uFFFD";
778
+ async function extractTextWithPreprocessors(content, preprocessors, options = {}) {
779
+ if (typeof content === "string") {
780
+ return { text: content, warnings: [] };
781
+ }
782
+ if (!content || content.length === 0) {
783
+ return { text: "", warnings: [] };
784
+ }
785
+ const parts = [];
786
+ const warnings = [];
787
+ for (const block of content) {
788
+ if (block.type === "text") {
789
+ parts.push(block.text);
790
+ continue;
791
+ }
792
+ if (block.type !== "file") {
793
+ continue;
794
+ }
795
+ const result = await preprocessContentFile(block, preprocessors, options.basePath);
796
+ if (result.text) {
797
+ parts.push(result.text);
798
+ }
799
+ warnings.push(...result.warnings);
800
+ }
801
+ return { text: parts.join("\n"), warnings };
802
+ }
803
+ async function preprocessContentFile(block, preprocessors, basePath) {
804
+ const mediaType = normalizePreprocessorType(block.media_type);
805
+ const resolvedPath = resolveLocalFilePath(block.path, basePath);
806
+ if (!resolvedPath) {
807
+ return {
808
+ text: "",
809
+ warnings: [
810
+ {
811
+ file: block.path,
812
+ mediaType: block.media_type,
813
+ reason: "remote file paths are not supported for preprocessing"
814
+ }
815
+ ]
816
+ };
817
+ }
818
+ const preprocessor = preprocessors?.find(
819
+ (entry) => normalizePreprocessorType(entry.type) === mediaType
820
+ );
821
+ if (preprocessor) {
822
+ return runContentPreprocessor(block, resolvedPath, preprocessor);
823
+ }
824
+ try {
825
+ const buffer = await readFile3(resolvedPath);
826
+ const text = buffer.toString("utf8").replace(/\r\n/g, "\n");
827
+ if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) {
828
+ return {
829
+ text: "",
830
+ warnings: [
831
+ {
832
+ file: block.path,
833
+ mediaType: block.media_type,
834
+ reason: "default UTF-8 read produced binary or invalid text; configure a preprocessor"
835
+ }
836
+ ]
837
+ };
838
+ }
839
+ return { text: formatFileText(block.path, text), warnings: [] };
840
+ } catch (error) {
841
+ return {
842
+ text: "",
843
+ warnings: [
844
+ {
845
+ file: block.path,
846
+ mediaType: block.media_type,
847
+ reason: error instanceof Error ? error.message : String(error)
848
+ }
849
+ ]
850
+ };
851
+ }
852
+ }
853
+ async function runContentPreprocessor(block, resolvedPath, preprocessor) {
854
+ try {
855
+ const argv = preprocessor.resolvedCommand ?? preprocessor.command;
856
+ const { stdout, stderr, exitCode } = await execFileWithStdin(
857
+ argv,
858
+ JSON.stringify({
859
+ path: resolvedPath,
860
+ original_path: block.path,
861
+ media_type: block.media_type
862
+ })
863
+ );
864
+ if (exitCode !== 0) {
865
+ return {
866
+ text: "",
867
+ warnings: [
868
+ {
869
+ file: block.path,
870
+ mediaType: block.media_type,
871
+ reason: stderr.trim() || `preprocessor exited with code ${exitCode}`
872
+ }
873
+ ]
874
+ };
875
+ }
876
+ return { text: formatFileText(block.path, stdout.trim()), warnings: [] };
877
+ } catch (error) {
878
+ return {
879
+ text: "",
880
+ warnings: [
881
+ {
882
+ file: block.path,
883
+ mediaType: block.media_type,
884
+ reason: error instanceof Error ? error.message : String(error)
885
+ }
886
+ ]
887
+ };
888
+ }
889
+ }
890
+ function appendPreprocessingWarnings(text, warnings) {
891
+ if (warnings.length === 0) {
892
+ return text;
893
+ }
894
+ const notes = warnings.map(
895
+ (warning) => `[file preprocessing warning] ${warning.file} (${warning.mediaType}): ${warning.reason}`
896
+ );
897
+ return [text, ...notes].filter((part) => part.length > 0).join("\n");
898
+ }
899
+ function normalizePreprocessorType(value) {
900
+ const normalized = value.trim().toLowerCase();
901
+ return MIME_TYPE_ALIASES[normalized] ?? normalized;
902
+ }
903
+ function resolveLocalFilePath(value, basePath) {
904
+ if (value.startsWith("file://")) {
905
+ return fileURLToPath2(value);
906
+ }
907
+ if (/^[a-z]+:\/\//i.test(value)) {
908
+ return void 0;
909
+ }
910
+ return basePath ? path4.resolve(basePath, value) : path4.resolve(value);
911
+ }
912
+ function formatFileText(filePath, text) {
913
+ return `[[ file: ${filePath} ]]
914
+ ${text}`;
915
+ }
688
916
 
689
917
  // src/evaluation/validation/prompt-validator.ts
690
- import { readFile as readFile3 } from "node:fs/promises";
918
+ import { readFile as readFile4 } from "node:fs/promises";
691
919
 
692
920
  // src/evaluation/template-variables.ts
693
921
  var TEMPLATE_VARIABLES = {
@@ -718,7 +946,7 @@ var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
718
946
  var ANSI_YELLOW2 = "\x1B[33m";
719
947
  var ANSI_RESET3 = "\x1B[0m";
720
948
  async function validateCustomPromptContent(promptPath) {
721
- const content = await readFile3(promptPath, "utf8");
949
+ const content = await readFile4(promptPath, "utf8");
722
950
  validateTemplateVariables(content, promptPath);
723
951
  }
724
952
  function validateTemplateVariables(content, source) {
@@ -768,6 +996,7 @@ function validateTemplateVariables(content, source) {
768
996
  // src/evaluation/loaders/evaluator-parser.ts
769
997
  var ANSI_YELLOW3 = "\x1B[33m";
770
998
  var ANSI_RESET4 = "\x1B[0m";
999
+ var MAX_ASSERTION_INCLUDE_DEPTH = 3;
771
1000
  var PROMPT_FILE_PREFIX = "file://";
772
1001
  function normalizeEvaluatorType(type) {
773
1002
  return type.replace(/_/g, "-");
@@ -775,22 +1004,104 @@ function normalizeEvaluatorType(type) {
775
1004
  function isDeprecatedJudgeType(type) {
776
1005
  return type === "code-judge" || type === "llm-judge";
777
1006
  }
778
- async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
1007
+ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId, defaultPreprocessors) {
779
1008
  const execution = rawEvalCase.execution;
780
1009
  const executionObject = isJsonObject2(execution) ? execution : void 0;
781
1010
  const caseEvaluators = rawEvalCase.assertions ?? rawEvalCase.assert ?? (executionObject ? executionObject.evaluators : void 0) ?? // deprecated: use assertions
782
1011
  rawEvalCase.evaluators;
783
1012
  const skipDefaults = executionObject?.skip_defaults === true;
784
1013
  const rootEvaluators = skipDefaults ? void 0 : globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators;
785
- const parsedCase = await parseEvaluatorList(caseEvaluators, searchRoots, evalId);
786
- const parsedRoot = await parseEvaluatorList(rootEvaluators, searchRoots, evalId);
1014
+ const parsedCase = await parseEvaluatorList(
1015
+ caseEvaluators,
1016
+ searchRoots,
1017
+ evalId,
1018
+ defaultPreprocessors
1019
+ );
1020
+ const parsedRoot = await parseEvaluatorList(
1021
+ rootEvaluators,
1022
+ searchRoots,
1023
+ evalId,
1024
+ defaultPreprocessors
1025
+ );
787
1026
  if (!parsedCase && !parsedRoot) {
788
1027
  return void 0;
789
1028
  }
790
1029
  const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
791
1030
  return evaluators.length > 0 ? evaluators : void 0;
792
1031
  }
793
- async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1032
+ function isIncludeEntry(value) {
1033
+ return isJsonObject2(value) && typeof value.include === "string" && Object.keys(value).length === 1;
1034
+ }
1035
+ function isTemplateReference(value) {
1036
+ return !value.startsWith(".") && !value.includes("/") && !value.includes("\\");
1037
+ }
1038
+ async function resolveAssertionTemplateReference(include, searchRoots) {
1039
+ const templateCandidates = isTemplateReference(include) ? [
1040
+ path5.join(".agentv", "templates", `${include}.yaml`),
1041
+ path5.join(".agentv", "templates", `${include}.yml`)
1042
+ ] : [include];
1043
+ const attempted = [];
1044
+ for (const candidate of templateCandidates) {
1045
+ const resolved = await resolveFileReference2(candidate, searchRoots);
1046
+ attempted.push(...resolved.attempted);
1047
+ if (resolved.resolvedPath) {
1048
+ return {
1049
+ displayPath: resolved.displayPath,
1050
+ resolvedPath: resolved.resolvedPath,
1051
+ attempted
1052
+ };
1053
+ }
1054
+ }
1055
+ return {
1056
+ displayPath: templateCandidates[0] ?? include,
1057
+ resolvedPath: "",
1058
+ attempted
1059
+ };
1060
+ }
1061
+ async function loadAssertionTemplateEntries(include, searchRoots, evalId, includeContext) {
1062
+ const nextDepth = includeContext.depth + 1;
1063
+ if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
1064
+ const chain = [...includeContext.chain, include].join(" -> ");
1065
+ throw new Error(
1066
+ `Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
1067
+ );
1068
+ }
1069
+ const resolved = await resolveAssertionTemplateReference(include, searchRoots);
1070
+ if (!resolved.resolvedPath) {
1071
+ const attempted = resolved.attempted.length > 0 ? `
1072
+ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
1073
+ throw new Error(
1074
+ `Assertion template not found in '${evalId}': ${resolved.displayPath}${attempted}`
1075
+ );
1076
+ }
1077
+ if (includeContext.chain.includes(resolved.resolvedPath)) {
1078
+ const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
1079
+ throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
1080
+ }
1081
+ const content = await readFile5(resolved.resolvedPath, "utf8");
1082
+ const parsed = interpolateEnv(parse2(content), process.env);
1083
+ if (!isJsonObject2(parsed)) {
1084
+ throw new Error(
1085
+ `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
1086
+ );
1087
+ }
1088
+ const assertions = parsed.assertions;
1089
+ if (!Array.isArray(assertions)) {
1090
+ throw new Error(
1091
+ `Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
1092
+ );
1093
+ }
1094
+ const templateDir = path5.dirname(resolved.resolvedPath);
1095
+ const nestedSearchRoots = [
1096
+ templateDir,
1097
+ ...searchRoots.filter((root) => path5.resolve(root) !== templateDir)
1098
+ ];
1099
+ return await expandEvaluatorEntries(assertions, nestedSearchRoots, evalId, {
1100
+ depth: nextDepth,
1101
+ chain: [...includeContext.chain, resolved.resolvedPath]
1102
+ }) ?? [];
1103
+ }
1104
+ async function expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
794
1105
  if (candidateEvaluators === void 0) {
795
1106
  return void 0;
796
1107
  }
@@ -798,13 +1109,34 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
798
1109
  logWarning2(`Skipping evaluators for '${evalId}': expected array`);
799
1110
  return void 0;
800
1111
  }
801
- const firstStringIndex = candidateEvaluators.findIndex((e) => typeof e === "string");
802
- const processedEvaluators = firstStringIndex === -1 ? [...candidateEvaluators] : (() => {
1112
+ const expanded = [];
1113
+ for (const rawEvaluator of candidateEvaluators) {
1114
+ if (isIncludeEntry(rawEvaluator)) {
1115
+ const included = await loadAssertionTemplateEntries(
1116
+ rawEvaluator.include,
1117
+ searchRoots,
1118
+ evalId,
1119
+ includeContext
1120
+ );
1121
+ expanded.push(...included);
1122
+ continue;
1123
+ }
1124
+ expanded.push(rawEvaluator);
1125
+ }
1126
+ return expanded;
1127
+ }
1128
+ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
1129
+ const expandedEvaluators = await expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId);
1130
+ if (!expandedEvaluators) {
1131
+ return void 0;
1132
+ }
1133
+ const firstStringIndex = expandedEvaluators.findIndex((e) => typeof e === "string");
1134
+ const processedEvaluators = firstStringIndex === -1 ? [...expandedEvaluators] : (() => {
803
1135
  const PLACEHOLDER = Symbol("rubric-placeholder");
804
1136
  const strings = [];
805
1137
  const result = [];
806
1138
  let rubricInserted = false;
807
- for (const item of candidateEvaluators) {
1139
+ for (const item of expandedEvaluators) {
808
1140
  if (typeof item === "string") {
809
1141
  const trimmed = item.trim();
810
1142
  if (trimmed.length === 0) {
@@ -855,6 +1187,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
855
1187
  continue;
856
1188
  }
857
1189
  const negate = rawEvaluator.negate === true ? true : void 0;
1190
+ const mergedPreprocessors = await parseMergedPreprocessors(
1191
+ rawEvaluator.preprocessors,
1192
+ defaultPreprocessors,
1193
+ searchRoots,
1194
+ name,
1195
+ evalId
1196
+ );
858
1197
  if (isCustomType) {
859
1198
  const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
860
1199
  const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
@@ -913,7 +1252,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
913
1252
  if (cwd) {
914
1253
  const resolved = await resolveFileReference2(cwd, searchRoots);
915
1254
  if (resolved.resolvedPath) {
916
- resolvedCwd = path4.resolve(resolved.resolvedPath);
1255
+ resolvedCwd = path5.resolve(resolved.resolvedPath);
917
1256
  } else {
918
1257
  logWarning2(
919
1258
  `Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
@@ -959,6 +1298,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
959
1298
  "cwd",
960
1299
  "weight",
961
1300
  "target",
1301
+ "preprocessors",
962
1302
  "required",
963
1303
  "negate"
964
1304
  ]);
@@ -979,6 +1319,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
979
1319
  ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
980
1320
  ...negate !== void 0 ? { negate } : {},
981
1321
  ...Object.keys(config2).length > 0 ? { config: config2 } : {},
1322
+ ...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {},
982
1323
  ...targetConfig !== void 0 ? { target: targetConfig } : {}
983
1324
  });
984
1325
  continue;
@@ -1010,8 +1351,16 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1010
1351
  );
1011
1352
  continue;
1012
1353
  }
1354
+ const expandedMembers = await expandEvaluatorEntries(
1355
+ rawMembers,
1356
+ searchRoots,
1357
+ `${evalId}:${name}`
1358
+ );
1359
+ if (!expandedMembers) {
1360
+ continue;
1361
+ }
1013
1362
  const memberEvaluators = [];
1014
- for (const rawMember of rawMembers) {
1363
+ for (const rawMember of expandedMembers) {
1015
1364
  if (!isJsonObject2(rawMember)) {
1016
1365
  logWarning2(`Skipping invalid member evaluator in composite '${name}' (expected object)`);
1017
1366
  continue;
@@ -1088,7 +1437,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1088
1437
  aggregatorPrompt = fileRef;
1089
1438
  const resolved = await resolveFileReference2(fileRef, searchRoots);
1090
1439
  if (resolved.resolvedPath) {
1091
- promptPath2 = path4.resolve(resolved.resolvedPath);
1440
+ promptPath2 = path5.resolve(resolved.resolvedPath);
1092
1441
  } else {
1093
1442
  throw new Error(
1094
1443
  `Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
@@ -1742,7 +2091,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1742
2091
  ...weight2 !== void 0 ? { weight: weight2 } : {},
1743
2092
  ...required2 !== void 0 ? { required: required2 } : {},
1744
2093
  ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
1745
- ...negate !== void 0 ? { negate } : {}
2094
+ ...negate !== void 0 ? { negate } : {},
2095
+ ...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
1746
2096
  });
1747
2097
  continue;
1748
2098
  }
@@ -1767,7 +2117,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1767
2117
  const commandPath = commandArray[commandArray.length - 1];
1768
2118
  const resolved = await resolveFileReference2(commandPath, searchRoots);
1769
2119
  if (resolved.resolvedPath) {
1770
- resolvedPromptScript = [...commandArray.slice(0, -1), path4.resolve(resolved.resolvedPath)];
2120
+ resolvedPromptScript = [...commandArray.slice(0, -1), path5.resolve(resolved.resolvedPath)];
1771
2121
  } else {
1772
2122
  throw new Error(
1773
2123
  `Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
@@ -1782,7 +2132,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1782
2132
  prompt = fileRef;
1783
2133
  const resolved = await resolveFileReference2(fileRef, searchRoots);
1784
2134
  if (resolved.resolvedPath) {
1785
- promptPath = path4.resolve(resolved.resolvedPath);
2135
+ promptPath = path5.resolve(resolved.resolvedPath);
1786
2136
  try {
1787
2137
  await validateCustomPromptContent(promptPath);
1788
2138
  } catch (error) {
@@ -1825,7 +2175,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1825
2175
  ...weight2 !== void 0 ? { weight: weight2 } : {},
1826
2176
  ...required2 !== void 0 ? { required: required2 } : {},
1827
2177
  ...min_score2 !== void 0 ? { min_score: min_score2 } : {},
1828
- ...negate !== void 0 ? { negate } : {}
2178
+ ...negate !== void 0 ? { negate } : {},
2179
+ ...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
1829
2180
  });
1830
2181
  continue;
1831
2182
  }
@@ -1850,7 +2201,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1850
2201
  "negate",
1851
2202
  "max_steps",
1852
2203
  "maxSteps",
1853
- "temperature"
2204
+ "temperature",
2205
+ "preprocessors"
1854
2206
  ]);
1855
2207
  const config = {};
1856
2208
  for (const [key, value] of Object.entries(rawEvaluator)) {
@@ -1880,30 +2232,70 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
1880
2232
  ...negate !== void 0 ? { negate } : {},
1881
2233
  ...finalConfig ? { config: finalConfig } : {},
1882
2234
  ...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
1883
- ...llmTemperature !== void 0 ? { temperature: llmTemperature } : {}
2235
+ ...llmTemperature !== void 0 ? { temperature: llmTemperature } : {},
2236
+ ...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
1884
2237
  });
1885
2238
  }
1886
2239
  return evaluators.length > 0 ? evaluators : void 0;
1887
2240
  }
1888
- var ASSERTION_TYPES = /* @__PURE__ */ new Set([
1889
- "skill-trigger",
1890
- "contains",
1891
- "contains-any",
1892
- "contains-all",
1893
- "icontains",
1894
- "icontains-any",
1895
- "icontains-all",
1896
- "starts-with",
1897
- "ends-with",
1898
- "regex",
1899
- "is-json",
1900
- "equals",
1901
- "rubrics"
1902
- ]);
1903
- function generateAssertionName(typeValue, rawEvaluator) {
1904
- if (!ASSERTION_TYPES.has(typeValue)) {
2241
+ async function parseMergedPreprocessors(rawValue, defaultPreprocessors, searchRoots, evaluatorName, evalId) {
2242
+ const parsedDefaults = defaultPreprocessors ?? [];
2243
+ const parsedOverrides = await parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId);
2244
+ if (parsedDefaults.length === 0 && (!parsedOverrides || parsedOverrides.length === 0)) {
2245
+ return void 0;
2246
+ }
2247
+ const merged = /* @__PURE__ */ new Map();
2248
+ for (const entry of parsedDefaults) {
2249
+ merged.set(normalizePreprocessorType(entry.type), entry);
2250
+ }
2251
+ for (const entry of parsedOverrides ?? []) {
2252
+ merged.set(normalizePreprocessorType(entry.type), entry);
2253
+ }
2254
+ return [...merged.values()];
2255
+ }
2256
+ async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId) {
2257
+ if (rawValue === void 0) {
1905
2258
  return void 0;
1906
2259
  }
2260
+ if (!Array.isArray(rawValue)) {
2261
+ throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessors must be an array`);
2262
+ }
2263
+ const preprocessors = [];
2264
+ for (const rawEntry of rawValue) {
2265
+ if (!isJsonObject2(rawEntry)) {
2266
+ throw new Error(
2267
+ `Evaluator '${evaluatorName}' in '${evalId}': each preprocessor must be an object`
2268
+ );
2269
+ }
2270
+ const type = asString(rawEntry.type)?.trim();
2271
+ if (!type) {
2272
+ throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessor.type is required`);
2273
+ }
2274
+ const command = asStringArray(
2275
+ rawEntry.command,
2276
+ `preprocessor command for evaluator '${evaluatorName}' in '${evalId}'`
2277
+ );
2278
+ if (!command || command.length === 0) {
2279
+ throw new Error(
2280
+ `Evaluator '${evaluatorName}' in '${evalId}': preprocessor '${type}' requires command`
2281
+ );
2282
+ }
2283
+ const commandPath = command[command.length - 1];
2284
+ const resolved = await resolveFileReference2(commandPath, searchRoots);
2285
+ if (!resolved.resolvedPath) {
2286
+ throw new Error(
2287
+ `Evaluator '${evaluatorName}' in '${evalId}': preprocessor command file not found: ${resolved.displayPath}`
2288
+ );
2289
+ }
2290
+ preprocessors.push({
2291
+ type,
2292
+ command,
2293
+ resolvedCommand: [...command.slice(0, -1), path5.resolve(resolved.resolvedPath)]
2294
+ });
2295
+ }
2296
+ return preprocessors;
2297
+ }
2298
+ function generateAssertionName(typeValue, rawEvaluator) {
1907
2299
  const value = asString(rawEvaluator.value);
1908
2300
  const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : void 0;
1909
2301
  switch (typeValue) {
@@ -1936,7 +2328,7 @@ function generateAssertionName(typeValue, rawEvaluator) {
1936
2328
  case "rubrics":
1937
2329
  return "rubrics";
1938
2330
  default:
1939
- return void 0;
2331
+ return typeValue;
1940
2332
  }
1941
2333
  }
1942
2334
  function coerceEvaluator(candidate, contextId) {
@@ -2294,14 +2686,14 @@ function parseInlineRubrics(rawRubrics) {
2294
2686
  }
2295
2687
 
2296
2688
  // src/evaluation/loaders/jsonl-parser.ts
2297
- import { readFile as readFile5 } from "node:fs/promises";
2298
- import path6 from "node:path";
2689
+ import { readFile as readFile7 } from "node:fs/promises";
2690
+ import path7 from "node:path";
2299
2691
  import micromatch from "micromatch";
2300
2692
  import { parse as parseYaml } from "yaml";
2301
2693
 
2302
2694
  // src/evaluation/loaders/message-processor.ts
2303
- import { readFile as readFile4 } from "node:fs/promises";
2304
- import path5 from "node:path";
2695
+ import { readFile as readFile6 } from "node:fs/promises";
2696
+ import path6 from "node:path";
2305
2697
 
2306
2698
  // src/evaluation/formatting/segment-formatter.ts
2307
2699
  function formatFileContents(parts) {
@@ -2367,7 +2759,7 @@ var IMAGE_MEDIA_TYPES = {
2367
2759
  ".bmp": "image/bmp"
2368
2760
  };
2369
2761
  function detectImageMediaType(filePath) {
2370
- const ext = path5.extname(filePath).toLowerCase();
2762
+ const ext = path6.extname(filePath).toLowerCase();
2371
2763
  return IMAGE_MEDIA_TYPES[ext];
2372
2764
  }
2373
2765
  var ANSI_YELLOW4 = "\x1B[33m";
@@ -2417,12 +2809,12 @@ async function processMessages(options) {
2417
2809
  continue;
2418
2810
  }
2419
2811
  try {
2420
- const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2812
+ const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2421
2813
  processedContent.push({
2422
2814
  ...cloneJsonObject(rawSegment),
2423
2815
  path: displayPath,
2424
2816
  text: fileContent,
2425
- resolvedPath: path5.resolve(resolvedPath)
2817
+ resolvedPath: path6.resolve(resolvedPath)
2426
2818
  });
2427
2819
  if (verbose) {
2428
2820
  const label = messageType === "input" ? "[File]" : "[Expected Output File]";
@@ -2458,7 +2850,7 @@ async function processMessages(options) {
2458
2850
  continue;
2459
2851
  }
2460
2852
  try {
2461
- const imageBuffer = await readFile4(resolvedPath);
2853
+ const imageBuffer = await readFile6(resolvedPath);
2462
2854
  const base64 = imageBuffer.toString("base64");
2463
2855
  processedContent.push({
2464
2856
  type: "image",
@@ -2535,12 +2927,12 @@ async function processExpectedMessages(options) {
2535
2927
  continue;
2536
2928
  }
2537
2929
  try {
2538
- const fileContent = (await readFile4(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2930
+ const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
2539
2931
  processedContent.push({
2540
2932
  type: "file",
2541
2933
  path: displayPath,
2542
2934
  text: fileContent,
2543
- resolvedPath: path5.resolve(resolvedPath)
2935
+ resolvedPath: path6.resolve(resolvedPath)
2544
2936
  });
2545
2937
  if (verbose) {
2546
2938
  console.log(` [Expected Output File] Found: ${displayPath}`);
@@ -2575,7 +2967,7 @@ async function processExpectedMessages(options) {
2575
2967
  continue;
2576
2968
  }
2577
2969
  try {
2578
- const imageBuffer = await readFile4(resolvedPath);
2970
+ const imageBuffer = await readFile6(resolvedPath);
2579
2971
  const base64 = imageBuffer.toString("base64");
2580
2972
  processedContent.push({
2581
2973
  type: "image",
@@ -2684,7 +3076,7 @@ function matchesFilter(id, filter) {
2684
3076
  return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
2685
3077
  }
2686
3078
  function detectFormat(filePath) {
2687
- const ext = path6.extname(filePath).toLowerCase();
3079
+ const ext = path7.extname(filePath).toLowerCase();
2688
3080
  if (ext === ".jsonl") return "jsonl";
2689
3081
  if (ext === ".yaml" || ext === ".yml") return "yaml";
2690
3082
  if (ext === ".json") return "agent-skills-json";
@@ -2693,9 +3085,9 @@ function detectFormat(filePath) {
2693
3085
  );
2694
3086
  }
2695
3087
  async function loadSidecarMetadata(jsonlPath, verbose) {
2696
- const dir = path6.dirname(jsonlPath);
2697
- const base = path6.basename(jsonlPath, ".jsonl");
2698
- const sidecarPath = path6.join(dir, `${base}.yaml`);
3088
+ const dir = path7.dirname(jsonlPath);
3089
+ const base = path7.basename(jsonlPath, ".jsonl");
3090
+ const sidecarPath = path7.join(dir, `${base}.yaml`);
2699
3091
  if (!await fileExists2(sidecarPath)) {
2700
3092
  if (verbose) {
2701
3093
  logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
@@ -2703,7 +3095,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
2703
3095
  return {};
2704
3096
  }
2705
3097
  try {
2706
- const content = await readFile5(sidecarPath, "utf8");
3098
+ const content = await readFile7(sidecarPath, "utf8");
2707
3099
  const parsed = interpolateEnv(parseYaml(content), process.env);
2708
3100
  if (!isJsonObject(parsed)) {
2709
3101
  logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
@@ -2744,13 +3136,13 @@ function parseJsonlContent(content, filePath) {
2744
3136
  async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
2745
3137
  const verbose = options?.verbose ?? false;
2746
3138
  const filterPattern = options?.filter;
2747
- const absoluteTestPath = path6.resolve(evalFilePath);
3139
+ const absoluteTestPath = path7.resolve(evalFilePath);
2748
3140
  const repoRootPath = resolveToAbsolutePath(repoRoot);
2749
3141
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
2750
3142
  const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
2751
- const rawFile = await readFile5(absoluteTestPath, "utf8");
3143
+ const rawFile = await readFile7(absoluteTestPath, "utf8");
2752
3144
  const rawCases = parseJsonlContent(rawFile, evalFilePath);
2753
- const fallbackSuiteName = path6.basename(absoluteTestPath, ".jsonl") || "eval";
3145
+ const fallbackSuiteName = path7.basename(absoluteTestPath, ".jsonl") || "eval";
2754
3146
  const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
2755
3147
  const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
2756
3148
  const globalExecution = sidecar.execution;
@@ -2930,11 +3322,13 @@ function parseRepoCheckout(raw) {
2930
3322
  if (!isJsonObject(raw)) return void 0;
2931
3323
  const obj = raw;
2932
3324
  const ref = typeof obj.ref === "string" ? obj.ref : void 0;
3325
+ const baseCommit = typeof obj.base_commit === "string" ? obj.base_commit : void 0;
2933
3326
  const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
2934
3327
  const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
2935
- if (!ref && !resolve && ancestor === void 0) return void 0;
3328
+ if (!ref && !baseCommit && !resolve && ancestor === void 0) return void 0;
2936
3329
  return {
2937
3330
  ...ref !== void 0 && { ref },
3331
+ ...baseCommit !== void 0 && { base_commit: baseCommit },
2938
3332
  ...resolve !== void 0 && { resolve },
2939
3333
  ...ancestor !== void 0 && { ancestor }
2940
3334
  };
@@ -2957,12 +3351,12 @@ function parseRepoConfig(raw) {
2957
3351
  const obj = raw;
2958
3352
  const repoPath = typeof obj.path === "string" ? obj.path : void 0;
2959
3353
  const source = parseRepoSource(obj.source);
2960
- if (!repoPath || !source) return void 0;
2961
3354
  const checkout = parseRepoCheckout(obj.checkout);
2962
3355
  const clone = parseRepoClone(obj.clone);
3356
+ if (!repoPath && !source && !checkout && !clone) return void 0;
2963
3357
  return {
2964
- path: repoPath,
2965
- source,
3358
+ ...repoPath !== void 0 && { path: repoPath },
3359
+ ...source !== void 0 && { source },
2966
3360
  ...checkout !== void 0 && { checkout },
2967
3361
  ...clone !== void 0 && { clone }
2968
3362
  };
@@ -3013,7 +3407,8 @@ ${messageContent}`);
3013
3407
  segmentsByMessage,
3014
3408
  mode
3015
3409
  }) : void 0;
3016
- return { question, chatPrompt };
3410
+ const systemMessage = extractSystemMessage(testCase.input, segmentsByMessage, mode);
3411
+ return { question, chatPrompt, systemMessage };
3017
3412
  }
3018
3413
  function needsRoleMarkers(messages, processedSegmentsByMessage) {
3019
3414
  if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
@@ -3027,6 +3422,26 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
3027
3422
  }
3028
3423
  return messagesWithContent > 1;
3029
3424
  }
3425
+ function extractSystemMessage(messages, segmentsByMessage, mode) {
3426
+ const systemParts = [];
3427
+ for (let i = 0; i < messages.length; i++) {
3428
+ if (messages[i].role !== "system") {
3429
+ break;
3430
+ }
3431
+ const segments = segmentsByMessage[i];
3432
+ const contentParts = [];
3433
+ for (const segment of segments) {
3434
+ const formatted = formatSegment(segment, mode);
3435
+ if (formatted) {
3436
+ contentParts.push(formatted);
3437
+ }
3438
+ }
3439
+ if (contentParts.length > 0) {
3440
+ systemParts.push(contentParts.join("\n"));
3441
+ }
3442
+ }
3443
+ return systemParts.length > 0 ? systemParts.join("\n\n") : void 0;
3444
+ }
3030
3445
  function buildChatPromptFromSegments(options) {
3031
3446
  const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
3032
3447
  if (messages.length === 0) {
@@ -3109,9 +3524,9 @@ function resolveTests(suite) {
3109
3524
  }
3110
3525
  async function readTestSuiteMetadata(testFilePath) {
3111
3526
  try {
3112
- const absolutePath = path7.resolve(testFilePath);
3113
- const content = await readFile6(absolutePath, "utf8");
3114
- const parsed = interpolateEnv(parse2(content), process.env);
3527
+ const absolutePath = path8.resolve(testFilePath);
3528
+ const content = await readFile8(absolutePath, "utf8");
3529
+ const parsed = interpolateEnv(parse3(content), process.env);
3115
3530
  if (!isJsonObject(parsed)) {
3116
3531
  return {};
3117
3532
  }
@@ -3164,25 +3579,31 @@ var loadEvalCases = loadTests;
3164
3579
  async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3165
3580
  const verbose = options?.verbose ?? false;
3166
3581
  const filterPattern = options?.filter;
3167
- const absoluteTestPath = path7.resolve(evalFilePath);
3582
+ const absoluteTestPath = path8.resolve(evalFilePath);
3168
3583
  const repoRootPath = resolveToAbsolutePath(repoRoot);
3169
3584
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
3170
3585
  const config = await loadConfig(absoluteTestPath, repoRootPath);
3171
- const rawFile = await readFile6(absoluteTestPath, "utf8");
3172
- const interpolated = interpolateEnv(parse2(rawFile), process.env);
3586
+ const rawFile = await readFile8(absoluteTestPath, "utf8");
3587
+ const interpolated = interpolateEnv(parse3(rawFile), process.env);
3173
3588
  if (!isJsonObject(interpolated)) {
3174
3589
  throw new Error(`Invalid test file format: ${evalFilePath}`);
3175
3590
  }
3176
3591
  const suite = interpolated;
3177
3592
  const suiteNameFromFile = asString5(suite.name)?.trim();
3178
- const fallbackSuiteName = path7.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
3593
+ const fallbackSuiteName = path8.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
3179
3594
  const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
3180
3595
  const rawTestCases = resolveTests(suite);
3181
3596
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
3182
- const evalFileDir = path7.dirname(absoluteTestPath);
3597
+ const suitePreprocessors = await parsePreprocessors(
3598
+ suite.preprocessors,
3599
+ searchRoots,
3600
+ "<suite>",
3601
+ absoluteTestPath
3602
+ );
3603
+ const evalFileDir = path8.dirname(absoluteTestPath);
3183
3604
  let expandedTestCases;
3184
3605
  if (typeof rawTestCases === "string") {
3185
- const externalPath = path7.resolve(evalFileDir, rawTestCases);
3606
+ const externalPath = path8.resolve(evalFileDir, rawTestCases);
3186
3607
  expandedTestCases = await loadCasesFromFile(externalPath);
3187
3608
  } else if (Array.isArray(rawTestCases)) {
3188
3609
  expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
@@ -3280,7 +3701,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3280
3701
  testCaseConfig,
3281
3702
  globalExecution,
3282
3703
  searchRoots,
3283
- id ?? "unknown"
3704
+ id ?? "unknown",
3705
+ suitePreprocessors
3284
3706
  );
3285
3707
  } catch (error) {
3286
3708
  const message = error instanceof Error ? error.message : String(error);
@@ -3303,7 +3725,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3303
3725
  const testCase = {
3304
3726
  id,
3305
3727
  suite: suiteName,
3306
- category: options?.category,
3728
+ category: suite.category ?? options?.category,
3307
3729
  conversation_id: conversationId,
3308
3730
  question,
3309
3731
  input: inputMessages,
@@ -3313,6 +3735,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
3313
3735
  criteria: outcome ?? "",
3314
3736
  evaluator: testCaseEvaluatorKind,
3315
3737
  assertions: evaluators,
3738
+ ...suitePreprocessors ? { preprocessors: suitePreprocessors } : {},
3316
3739
  workspace: mergedWorkspace,
3317
3740
  metadata,
3318
3741
  targets: caseTargets,
@@ -3353,8 +3776,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
3353
3776
  if (!command) return void 0;
3354
3777
  const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
3355
3778
  let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
3356
- if (cwd && !path7.isAbsolute(cwd)) {
3357
- cwd = path7.resolve(evalFileDir, cwd);
3779
+ if (cwd && !path8.isAbsolute(cwd)) {
3780
+ cwd = path8.resolve(evalFileDir, cwd);
3358
3781
  }
3359
3782
  const config = { command };
3360
3783
  if (timeoutMs !== void 0) {
@@ -3392,20 +3815,20 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
3392
3815
  }
3393
3816
  async function resolveWorkspaceConfig(raw, evalFileDir) {
3394
3817
  if (typeof raw === "string") {
3395
- const workspaceFilePath = path7.resolve(evalFileDir, raw);
3818
+ const workspaceFilePath = path8.resolve(evalFileDir, raw);
3396
3819
  let content;
3397
3820
  try {
3398
- content = await readFile6(workspaceFilePath, "utf8");
3821
+ content = await readFile8(workspaceFilePath, "utf8");
3399
3822
  } catch {
3400
3823
  throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
3401
3824
  }
3402
- const parsed = interpolateEnv(parse2(content), process.env);
3825
+ const parsed = interpolateEnv(parse3(content), process.env);
3403
3826
  if (!isJsonObject(parsed)) {
3404
3827
  throw new Error(
3405
3828
  `Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
3406
3829
  );
3407
3830
  }
3408
- const workspaceFileDir = path7.dirname(workspaceFilePath);
3831
+ const workspaceFileDir = path8.dirname(workspaceFilePath);
3409
3832
  return parseWorkspaceConfig(parsed, workspaceFileDir);
3410
3833
  }
3411
3834
  return parseWorkspaceConfig(raw, evalFileDir);
@@ -3425,8 +3848,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
3425
3848
  throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
3426
3849
  }
3427
3850
  let template = typeof obj.template === "string" ? obj.template : void 0;
3428
- if (template && !path7.isAbsolute(template)) {
3429
- template = path7.resolve(evalFileDir, template);
3851
+ if (template && !path8.isAbsolute(template)) {
3852
+ template = path8.resolve(evalFileDir, template);
3430
3853
  }
3431
3854
  const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
3432
3855
  const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
@@ -3434,14 +3857,28 @@ function parseWorkspaceConfig(raw, evalFileDir) {
3434
3857
  const explicitMode = obj.mode === "pooled" || obj.mode === "temp" || obj.mode === "static" ? obj.mode : void 0;
3435
3858
  const workspacePath = typeof obj.path === "string" ? obj.path : void 0;
3436
3859
  const mode = explicitMode ?? (workspacePath ? "static" : void 0);
3437
- if (!template && !isolation && !repos && !hooks && !mode && !workspacePath) return void 0;
3860
+ const docker = parseDockerWorkspaceConfig(obj.docker);
3861
+ if (!template && !isolation && !repos && !hooks && !mode && !workspacePath && !docker)
3862
+ return void 0;
3438
3863
  return {
3439
3864
  ...template !== void 0 && { template },
3440
3865
  ...isolation !== void 0 && { isolation },
3441
3866
  ...repos !== void 0 && { repos },
3442
3867
  ...hooks !== void 0 && { hooks },
3443
3868
  ...mode !== void 0 && { mode },
3444
- ...workspacePath !== void 0 && { path: workspacePath }
3869
+ ...workspacePath !== void 0 && { path: workspacePath },
3870
+ ...docker !== void 0 && { docker }
3871
+ };
3872
+ }
3873
+ function parseDockerWorkspaceConfig(raw) {
3874
+ if (!isJsonObject(raw)) return void 0;
3875
+ const obj = raw;
3876
+ if (typeof obj.image !== "string") return void 0;
3877
+ return {
3878
+ image: obj.image,
3879
+ ...typeof obj.timeout === "number" && { timeout: obj.timeout },
3880
+ ...typeof obj.memory === "string" && { memory: obj.memory },
3881
+ ...typeof obj.cpus === "number" && { cpus: obj.cpus }
3445
3882
  };
3446
3883
  }
3447
3884
  function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
@@ -3470,7 +3907,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
3470
3907
  repos: caseLevel.repos ?? suiteLevel.repos,
3471
3908
  ...hasHooks && { hooks: mergedHooks },
3472
3909
  mode: caseLevel.mode ?? suiteLevel.mode,
3473
- path: caseLevel.path ?? suiteLevel.path
3910
+ path: caseLevel.path ?? suiteLevel.path,
3911
+ docker: caseLevel.docker ?? suiteLevel.docker
3474
3912
  };
3475
3913
  }
3476
3914
  function asString5(value) {
@@ -3497,8 +3935,8 @@ ${detailBlock}${ANSI_RESET7}`);
3497
3935
 
3498
3936
  // src/evaluation/loaders/eval-yaml-transpiler.ts
3499
3937
  import { readFileSync } from "node:fs";
3500
- import path8 from "node:path";
3501
- import { parse as parse3 } from "yaml";
3938
+ import path9 from "node:path";
3939
+ import { parse as parse4 } from "yaml";
3502
3940
  function codeGraderInstruction(graderName, description) {
3503
3941
  const desc = description ? ` This grader: ${description}.` : "";
3504
3942
  return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
@@ -3737,8 +4175,8 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
3737
4175
  }
3738
4176
  function transpileEvalYamlFile(evalYamlPath) {
3739
4177
  const content = readFileSync(evalYamlPath, "utf8");
3740
- const parsed = parse3(content);
3741
- return transpileEvalYaml(parsed, path8.basename(evalYamlPath));
4178
+ const parsed = parse4(content);
4179
+ return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
3742
4180
  }
3743
4181
  function getOutputFilenames(result) {
3744
4182
  const names = /* @__PURE__ */ new Map();
@@ -4176,7 +4614,7 @@ import { spawn } from "node:child_process";
4176
4614
  import { randomUUID } from "node:crypto";
4177
4615
  import { createWriteStream } from "node:fs";
4178
4616
  import { mkdir } from "node:fs/promises";
4179
- import path10 from "node:path";
4617
+ import path11 from "node:path";
4180
4618
 
4181
4619
  // src/evaluation/providers/claude-content.ts
4182
4620
  function toContentArray(content) {
@@ -4275,7 +4713,7 @@ function subscribeToClaudeLogEntries(listener) {
4275
4713
  }
4276
4714
 
4277
4715
  // src/evaluation/providers/preread.ts
4278
- import path9 from "node:path";
4716
+ import path10 from "node:path";
4279
4717
  function buildPromptDocument(request, inputFiles) {
4280
4718
  const parts = [];
4281
4719
  const inputFilesList = collectInputFiles(inputFiles);
@@ -4292,7 +4730,7 @@ function normalizeInputFiles(inputFiles) {
4292
4730
  }
4293
4731
  const deduped = /* @__PURE__ */ new Map();
4294
4732
  for (const inputFile of inputFiles) {
4295
- const absolutePath = path9.resolve(inputFile);
4733
+ const absolutePath = path10.resolve(inputFile);
4296
4734
  if (!deduped.has(absolutePath)) {
4297
4735
  deduped.set(absolutePath, absolutePath);
4298
4736
  }
@@ -4305,7 +4743,7 @@ function collectInputFiles(inputFiles) {
4305
4743
  }
4306
4744
  const unique = /* @__PURE__ */ new Map();
4307
4745
  for (const inputFile of inputFiles) {
4308
- const absolutePath = path9.resolve(inputFile);
4746
+ const absolutePath = path10.resolve(inputFile);
4309
4747
  if (!unique.has(absolutePath)) {
4310
4748
  unique.set(absolutePath, absolutePath);
4311
4749
  }
@@ -4317,7 +4755,7 @@ function buildMandatoryPrereadBlock(inputFiles) {
4317
4755
  return "";
4318
4756
  }
4319
4757
  const buildList = (files) => files.map((absolutePath) => {
4320
- const fileName = path9.basename(absolutePath);
4758
+ const fileName = path10.basename(absolutePath);
4321
4759
  const fileUri = pathToFileUri(absolutePath);
4322
4760
  return `* [${fileName}](${fileUri})`;
4323
4761
  });
@@ -4333,7 +4771,7 @@ ${buildList(inputFiles).join("\n")}.`);
4333
4771
  return sections.join("\n");
4334
4772
  }
4335
4773
  function pathToFileUri(filePath) {
4336
- const absolutePath = path9.isAbsolute(filePath) ? filePath : path9.resolve(filePath);
4774
+ const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
4337
4775
  const normalizedPath = absolutePath.replace(/\\/g, "/");
4338
4776
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
4339
4777
  return `file:///${normalizedPath}`;
@@ -4481,10 +4919,10 @@ var ClaudeCliProvider = class {
4481
4919
  }
4482
4920
  resolveCwd(cwdOverride) {
4483
4921
  if (cwdOverride) {
4484
- return path10.resolve(cwdOverride);
4922
+ return path11.resolve(cwdOverride);
4485
4923
  }
4486
4924
  if (this.config.cwd) {
4487
- return path10.resolve(this.config.cwd);
4925
+ return path11.resolve(this.config.cwd);
4488
4926
  }
4489
4927
  return void 0;
4490
4928
  }
@@ -4494,9 +4932,9 @@ var ClaudeCliProvider = class {
4494
4932
  return void 0;
4495
4933
  }
4496
4934
  if (this.config.logDir) {
4497
- return path10.resolve(this.config.logDir);
4935
+ return path11.resolve(this.config.logDir);
4498
4936
  }
4499
- return path10.join(process.cwd(), ".agentv", "logs", "claude-cli");
4937
+ return path11.join(process.cwd(), ".agentv", "logs", "claude-cli");
4500
4938
  }
4501
4939
  async createStreamLogger(request) {
4502
4940
  const logDir = this.resolveLogDirectory();
@@ -4510,7 +4948,7 @@ var ClaudeCliProvider = class {
4510
4948
  console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
4511
4949
  return void 0;
4512
4950
  }
4513
- const filePath = path10.join(logDir, buildLogFilename(request, this.targetName));
4951
+ const filePath = path11.join(logDir, buildLogFilename(request, this.targetName));
4514
4952
  try {
4515
4953
  const logger = await ClaudeCliStreamLogger.create({
4516
4954
  filePath,
@@ -4812,7 +5250,7 @@ function tryParseJson(line) {
4812
5250
  import { randomUUID as randomUUID2 } from "node:crypto";
4813
5251
  import { createWriteStream as createWriteStream2 } from "node:fs";
4814
5252
  import { mkdir as mkdir2 } from "node:fs/promises";
4815
- import path11 from "node:path";
5253
+ import path12 from "node:path";
4816
5254
  var claudeSdkModule = null;
4817
5255
  async function loadClaudeSdk() {
4818
5256
  if (!claudeSdkModule) {
@@ -4973,10 +5411,10 @@ var ClaudeSdkProvider = class {
4973
5411
  }
4974
5412
  resolveCwd(cwdOverride) {
4975
5413
  if (cwdOverride) {
4976
- return path11.resolve(cwdOverride);
5414
+ return path12.resolve(cwdOverride);
4977
5415
  }
4978
5416
  if (this.config.cwd) {
4979
- return path11.resolve(this.config.cwd);
5417
+ return path12.resolve(this.config.cwd);
4980
5418
  }
4981
5419
  return void 0;
4982
5420
  }
@@ -4986,9 +5424,9 @@ var ClaudeSdkProvider = class {
4986
5424
  return void 0;
4987
5425
  }
4988
5426
  if (this.config.logDir) {
4989
- return path11.resolve(this.config.logDir);
5427
+ return path12.resolve(this.config.logDir);
4990
5428
  }
4991
- return path11.join(process.cwd(), ".agentv", "logs", "claude");
5429
+ return path12.join(process.cwd(), ".agentv", "logs", "claude");
4992
5430
  }
4993
5431
  async createStreamLogger(request) {
4994
5432
  const logDir = this.resolveLogDirectory();
@@ -5002,7 +5440,7 @@ var ClaudeSdkProvider = class {
5002
5440
  console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
5003
5441
  return void 0;
5004
5442
  }
5005
- const filePath = path11.join(logDir, buildLogFilename2(request, this.targetName));
5443
+ const filePath = path12.join(logDir, buildLogFilename2(request, this.targetName));
5006
5444
  try {
5007
5445
  const logger = await ClaudeStreamLogger.create({
5008
5446
  filePath,
@@ -5190,7 +5628,7 @@ function formatElapsed2(startedAt) {
5190
5628
  import { exec as execWithCallback } from "node:child_process";
5191
5629
  import fs from "node:fs/promises";
5192
5630
  import os from "node:os";
5193
- import path12 from "node:path";
5631
+ import path13 from "node:path";
5194
5632
  import { promisify } from "node:util";
5195
5633
  import { z as z2 } from "zod";
5196
5634
  var ToolCallSchema = z2.object({
@@ -5693,7 +6131,7 @@ function normalizeInputFiles2(inputFiles) {
5693
6131
  }
5694
6132
  const unique = /* @__PURE__ */ new Map();
5695
6133
  for (const inputFile of inputFiles) {
5696
- const absolutePath = path12.resolve(inputFile);
6134
+ const absolutePath = path13.resolve(inputFile);
5697
6135
  if (!unique.has(absolutePath)) {
5698
6136
  unique.set(absolutePath, absolutePath);
5699
6137
  }
@@ -5707,7 +6145,7 @@ function formatFileList(files, template) {
5707
6145
  const formatter = template ?? "{path}";
5708
6146
  return files.map((filePath) => {
5709
6147
  const escapedPath = shellEscape(filePath);
5710
- const escapedName = shellEscape(path12.basename(filePath));
6148
+ const escapedName = shellEscape(path13.basename(filePath));
5711
6149
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
5712
6150
  }).join(" ");
5713
6151
  }
@@ -5731,7 +6169,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
5731
6169
  const safeEvalId = evalCaseId || "unknown";
5732
6170
  const timestamp = Date.now();
5733
6171
  const random = Math.random().toString(36).substring(2, 9);
5734
- return path12.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
6172
+ return path13.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
5735
6173
  }
5736
6174
  function formatTimeoutSuffix2(timeoutMs) {
5737
6175
  if (!timeoutMs || timeoutMs <= 0) {
@@ -5745,7 +6183,7 @@ function formatTimeoutSuffix2(timeoutMs) {
5745
6183
  import { randomUUID as randomUUID3 } from "node:crypto";
5746
6184
  import { createWriteStream as createWriteStream3 } from "node:fs";
5747
6185
  import { mkdir as mkdir3 } from "node:fs/promises";
5748
- import path13 from "node:path";
6186
+ import path14 from "node:path";
5749
6187
 
5750
6188
  // src/evaluation/providers/codex-log-tracker.ts
5751
6189
  var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
@@ -5980,10 +6418,10 @@ ${basePrompt}` : basePrompt;
5980
6418
  }
5981
6419
  resolveCwd(cwdOverride) {
5982
6420
  if (cwdOverride) {
5983
- return path13.resolve(cwdOverride);
6421
+ return path14.resolve(cwdOverride);
5984
6422
  }
5985
6423
  if (this.config.cwd) {
5986
- return path13.resolve(this.config.cwd);
6424
+ return path14.resolve(this.config.cwd);
5987
6425
  }
5988
6426
  return void 0;
5989
6427
  }
@@ -5993,9 +6431,9 @@ ${basePrompt}` : basePrompt;
5993
6431
  return void 0;
5994
6432
  }
5995
6433
  if (this.config.logDir) {
5996
- return path13.resolve(this.config.logDir);
6434
+ return path14.resolve(this.config.logDir);
5997
6435
  }
5998
- return path13.join(process.cwd(), ".agentv", "logs", "codex");
6436
+ return path14.join(process.cwd(), ".agentv", "logs", "codex");
5999
6437
  }
6000
6438
  async createStreamLogger(request) {
6001
6439
  const logDir = this.resolveLogDirectory();
@@ -6009,7 +6447,7 @@ ${basePrompt}` : basePrompt;
6009
6447
  console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
6010
6448
  return void 0;
6011
6449
  }
6012
- const filePath = path13.join(logDir, buildLogFilename3(request, this.targetName));
6450
+ const filePath = path14.join(logDir, buildLogFilename3(request, this.targetName));
6013
6451
  try {
6014
6452
  const logger = await CodexSdkStreamLogger.create({
6015
6453
  filePath,
@@ -6153,7 +6591,7 @@ function formatElapsed3(startedAt) {
6153
6591
  // src/evaluation/providers/copilot-cli.ts
6154
6592
  import { randomUUID as randomUUID5 } from "node:crypto";
6155
6593
  import { mkdir as mkdir4 } from "node:fs/promises";
6156
- import path15 from "node:path";
6594
+ import path16 from "node:path";
6157
6595
  import { Readable, Writable } from "node:stream";
6158
6596
  import { spawn as spawn2 } from "node:child_process";
6159
6597
  import * as acp from "@agentclientprotocol/sdk";
@@ -6215,10 +6653,10 @@ function subscribeToCopilotCliLogEntries(listener) {
6215
6653
  import { randomUUID as randomUUID4 } from "node:crypto";
6216
6654
  import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
6217
6655
  import { arch, platform } from "node:os";
6218
- import path14 from "node:path";
6219
- import { fileURLToPath as fileURLToPath2 } from "node:url";
6656
+ import path15 from "node:path";
6657
+ import { fileURLToPath as fileURLToPath3 } from "node:url";
6220
6658
  function resolvePlatformCliPath() {
6221
- const os3 = platform();
6659
+ const os4 = platform();
6222
6660
  const cpu = arch();
6223
6661
  const platformMap = {
6224
6662
  linux: "linux",
@@ -6229,17 +6667,17 @@ function resolvePlatformCliPath() {
6229
6667
  x64: "x64",
6230
6668
  arm64: "arm64"
6231
6669
  };
6232
- const osPart = platformMap[os3];
6670
+ const osPart = platformMap[os4];
6233
6671
  const archPart = archMap[cpu];
6234
6672
  if (!osPart || !archPart) {
6235
6673
  return void 0;
6236
6674
  }
6237
6675
  const packageName = `@github/copilot-${osPart}-${archPart}`;
6238
- const binaryName = os3 === "win32" ? "copilot.exe" : "copilot";
6676
+ const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
6239
6677
  try {
6240
6678
  const resolved = import.meta.resolve(`${packageName}/package.json`);
6241
- const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath2(resolved) : resolved;
6242
- const binaryPath = path14.join(path14.dirname(packageJsonPath), binaryName);
6679
+ const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
6680
+ const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
6243
6681
  if (existsSync(binaryPath)) {
6244
6682
  return binaryPath;
6245
6683
  }
@@ -6247,7 +6685,7 @@ function resolvePlatformCliPath() {
6247
6685
  }
6248
6686
  let searchDir = process.cwd();
6249
6687
  for (let i = 0; i < 10; i++) {
6250
- const standardPath = path14.join(
6688
+ const standardPath = path15.join(
6251
6689
  searchDir,
6252
6690
  "node_modules",
6253
6691
  ...packageName.split("/"),
@@ -6256,13 +6694,13 @@ function resolvePlatformCliPath() {
6256
6694
  if (existsSync(standardPath)) {
6257
6695
  return standardPath;
6258
6696
  }
6259
- const bunDir = path14.join(searchDir, "node_modules", ".bun");
6697
+ const bunDir = path15.join(searchDir, "node_modules", ".bun");
6260
6698
  const prefix = `@github+copilot-${osPart}-${archPart}@`;
6261
6699
  try {
6262
6700
  const entries = readdirSync(bunDir);
6263
6701
  for (const entry of entries) {
6264
6702
  if (entry.startsWith(prefix)) {
6265
- const candidate = path14.join(
6703
+ const candidate = path15.join(
6266
6704
  bunDir,
6267
6705
  entry,
6268
6706
  "node_modules",
@@ -6277,7 +6715,7 @@ function resolvePlatformCliPath() {
6277
6715
  }
6278
6716
  } catch {
6279
6717
  }
6280
- const parent = path14.dirname(searchDir);
6718
+ const parent = path15.dirname(searchDir);
6281
6719
  if (parent === searchDir) break;
6282
6720
  searchDir = parent;
6283
6721
  }
@@ -6621,10 +7059,10 @@ var CopilotCliProvider = class {
6621
7059
  }
6622
7060
  resolveCwd(cwdOverride) {
6623
7061
  if (cwdOverride) {
6624
- return path15.resolve(cwdOverride);
7062
+ return path16.resolve(cwdOverride);
6625
7063
  }
6626
7064
  if (this.config.cwd) {
6627
- return path15.resolve(this.config.cwd);
7065
+ return path16.resolve(this.config.cwd);
6628
7066
  }
6629
7067
  return void 0;
6630
7068
  }
@@ -6643,9 +7081,9 @@ var CopilotCliProvider = class {
6643
7081
  return void 0;
6644
7082
  }
6645
7083
  if (this.config.logDir) {
6646
- return path15.resolve(this.config.logDir);
7084
+ return path16.resolve(this.config.logDir);
6647
7085
  }
6648
- return path15.join(process.cwd(), ".agentv", "logs", "copilot-cli");
7086
+ return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
6649
7087
  }
6650
7088
  async createStreamLogger(request) {
6651
7089
  const logDir = this.resolveLogDirectory();
@@ -6659,7 +7097,7 @@ var CopilotCliProvider = class {
6659
7097
  console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
6660
7098
  return void 0;
6661
7099
  }
6662
- const filePath = path15.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
7100
+ const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
6663
7101
  try {
6664
7102
  const logger = await CopilotStreamLogger.create(
6665
7103
  {
@@ -6752,9 +7190,9 @@ function summarizeAcpEvent(eventType, data) {
6752
7190
  }
6753
7191
 
6754
7192
  // src/evaluation/providers/copilot-log.ts
6755
- import { readFile as readFile8 } from "node:fs/promises";
7193
+ import { readFile as readFile10 } from "node:fs/promises";
6756
7194
  import { homedir as homedir2 } from "node:os";
6757
- import path17 from "node:path";
7195
+ import path18 from "node:path";
6758
7196
 
6759
7197
  // src/evaluation/providers/copilot-log-parser.ts
6760
7198
  function parseCopilotEvents(eventsJsonl) {
@@ -6886,11 +7324,11 @@ function parseCopilotEvents(eventsJsonl) {
6886
7324
  }
6887
7325
 
6888
7326
  // src/evaluation/providers/copilot-session-discovery.ts
6889
- import { readFile as readFile7, readdir, stat } from "node:fs/promises";
7327
+ import { readFile as readFile9, readdir, stat } from "node:fs/promises";
6890
7328
  import { homedir } from "node:os";
6891
- import path16 from "node:path";
7329
+ import path17 from "node:path";
6892
7330
  import { parse as parseYaml2 } from "yaml";
6893
- var DEFAULT_SESSION_STATE_DIR = () => path16.join(homedir(), ".copilot", "session-state");
7331
+ var DEFAULT_SESSION_STATE_DIR = () => path17.join(homedir(), ".copilot", "session-state");
6894
7332
  async function discoverCopilotSessions(opts) {
6895
7333
  const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
6896
7334
  const limit = opts?.limit ?? 10;
@@ -6902,11 +7340,11 @@ async function discoverCopilotSessions(opts) {
6902
7340
  }
6903
7341
  const sessions = [];
6904
7342
  for (const entry of entries) {
6905
- const sessionDir = path16.join(sessionStateDir, entry);
6906
- const workspacePath = path16.join(sessionDir, "workspace.yaml");
6907
- const eventsPath = path16.join(sessionDir, "events.jsonl");
7343
+ const sessionDir = path17.join(sessionStateDir, entry);
7344
+ const workspacePath = path17.join(sessionDir, "workspace.yaml");
7345
+ const eventsPath = path17.join(sessionDir, "events.jsonl");
6908
7346
  try {
6909
- const workspaceContent = await readFile7(workspacePath, "utf8");
7347
+ const workspaceContent = await readFile9(workspacePath, "utf8");
6910
7348
  const workspace = parseYaml2(workspaceContent) ?? {};
6911
7349
  const cwd = String(workspace.cwd ?? "");
6912
7350
  let updatedAt;
@@ -6965,10 +7403,10 @@ var CopilotLogProvider = class {
6965
7403
  }
6966
7404
  async invoke(_request) {
6967
7405
  const sessionDir = await this.resolveSessionDir();
6968
- const eventsPath = path17.join(sessionDir, "events.jsonl");
7406
+ const eventsPath = path18.join(sessionDir, "events.jsonl");
6969
7407
  let eventsContent;
6970
7408
  try {
6971
- eventsContent = await readFile8(eventsPath, "utf8");
7409
+ eventsContent = await readFile10(eventsPath, "utf8");
6972
7410
  } catch (err) {
6973
7411
  throw new Error(
6974
7412
  `Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
@@ -6987,8 +7425,8 @@ var CopilotLogProvider = class {
6987
7425
  return this.config.sessionDir;
6988
7426
  }
6989
7427
  if (this.config.sessionId) {
6990
- const stateDir = this.config.sessionStateDir ?? path17.join(homedir2(), ".copilot", "session-state");
6991
- return path17.join(stateDir, this.config.sessionId);
7428
+ const stateDir = this.config.sessionStateDir ?? path18.join(homedir2(), ".copilot", "session-state");
7429
+ return path18.join(stateDir, this.config.sessionId);
6992
7430
  }
6993
7431
  if (this.config.discover === "latest") {
6994
7432
  const sessions = await discoverCopilotSessions({
@@ -7013,7 +7451,7 @@ var CopilotLogProvider = class {
7013
7451
  import { randomUUID as randomUUID6 } from "node:crypto";
7014
7452
  import { existsSync as existsSync2 } from "node:fs";
7015
7453
  import { mkdir as mkdir5 } from "node:fs/promises";
7016
- import path18 from "node:path";
7454
+ import path19 from "node:path";
7017
7455
 
7018
7456
  // src/evaluation/providers/copilot-sdk-log-tracker.ts
7019
7457
  var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
@@ -7323,10 +7761,10 @@ var CopilotSdkProvider = class {
7323
7761
  }
7324
7762
  resolveCwd(cwdOverride) {
7325
7763
  if (cwdOverride) {
7326
- return path18.resolve(cwdOverride);
7764
+ return path19.resolve(cwdOverride);
7327
7765
  }
7328
7766
  if (this.config.cwd) {
7329
- return path18.resolve(this.config.cwd);
7767
+ return path19.resolve(this.config.cwd);
7330
7768
  }
7331
7769
  return void 0;
7332
7770
  }
@@ -7335,9 +7773,9 @@ var CopilotSdkProvider = class {
7335
7773
  return void 0;
7336
7774
  }
7337
7775
  if (this.config.logDir) {
7338
- return path18.resolve(this.config.logDir);
7776
+ return path19.resolve(this.config.logDir);
7339
7777
  }
7340
- return path18.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
7778
+ return path19.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
7341
7779
  }
7342
7780
  async createStreamLogger(request) {
7343
7781
  const logDir = this.resolveLogDirectory();
@@ -7351,7 +7789,7 @@ var CopilotSdkProvider = class {
7351
7789
  console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
7352
7790
  return void 0;
7353
7791
  }
7354
- const filePath = path18.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
7792
+ const filePath = path19.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
7355
7793
  try {
7356
7794
  const logger = await CopilotStreamLogger.create(
7357
7795
  {
@@ -7380,9 +7818,9 @@ var CopilotSdkProvider = class {
7380
7818
  };
7381
7819
  function resolveSkillDirectories(cwd) {
7382
7820
  const candidates = [
7383
- path18.join(cwd, ".claude", "skills"),
7384
- path18.join(cwd, ".agents", "skills"),
7385
- path18.join(cwd, ".codex", "skills")
7821
+ path19.join(cwd, ".claude", "skills"),
7822
+ path19.join(cwd, ".agents", "skills"),
7823
+ path19.join(cwd, ".codex", "skills")
7386
7824
  ];
7387
7825
  return candidates.filter((dir) => existsSync2(dir));
7388
7826
  }
@@ -7466,7 +7904,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
7466
7904
  import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
7467
7905
  import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
7468
7906
  import { tmpdir } from "node:os";
7469
- import path19 from "node:path";
7907
+ import path20 from "node:path";
7470
7908
 
7471
7909
  // src/evaluation/providers/pi-log-tracker.ts
7472
7910
  var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
@@ -7672,7 +8110,7 @@ var PiCliProvider = class {
7672
8110
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
7673
8111
  const logger = await this.createStreamLogger(request).catch(() => void 0);
7674
8112
  try {
7675
- const promptFile = path19.join(cwd, PROMPT_FILENAME);
8113
+ const promptFile = path20.join(cwd, PROMPT_FILENAME);
7676
8114
  await writeFile(promptFile, request.question, "utf8");
7677
8115
  const args = this.buildPiArgs(request.question, inputFiles);
7678
8116
  const result = await this.executePi(args, cwd, request.signal, logger);
@@ -7735,10 +8173,10 @@ var PiCliProvider = class {
7735
8173
  }
7736
8174
  resolveCwd(workspaceRoot, cwdOverride) {
7737
8175
  if (cwdOverride) {
7738
- return path19.resolve(cwdOverride);
8176
+ return path20.resolve(cwdOverride);
7739
8177
  }
7740
8178
  if (this.config.cwd) {
7741
- return path19.resolve(this.config.cwd);
8179
+ return path20.resolve(this.config.cwd);
7742
8180
  }
7743
8181
  if (workspaceRoot) {
7744
8182
  return workspaceRoot;
@@ -7844,7 +8282,7 @@ ${prompt}` : prompt;
7844
8282
  return env;
7845
8283
  }
7846
8284
  async createWorkspace() {
7847
- return await mkdtemp(path19.join(tmpdir(), WORKSPACE_PREFIX));
8285
+ return await mkdtemp(path20.join(tmpdir(), WORKSPACE_PREFIX));
7848
8286
  }
7849
8287
  async cleanupWorkspace(workspaceRoot) {
7850
8288
  try {
@@ -7854,9 +8292,9 @@ ${prompt}` : prompt;
7854
8292
  }
7855
8293
  resolveLogDirectory() {
7856
8294
  if (this.config.logDir) {
7857
- return path19.resolve(this.config.logDir);
8295
+ return path20.resolve(this.config.logDir);
7858
8296
  }
7859
- return path19.join(process.cwd(), ".agentv", "logs", "pi-cli");
8297
+ return path20.join(process.cwd(), ".agentv", "logs", "pi-cli");
7860
8298
  }
7861
8299
  async createStreamLogger(request) {
7862
8300
  const logDir = this.resolveLogDirectory();
@@ -7870,7 +8308,7 @@ ${prompt}` : prompt;
7870
8308
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
7871
8309
  return void 0;
7872
8310
  }
7873
- const filePath = path19.join(logDir, buildLogFilename5(request, this.targetName));
8311
+ const filePath = path20.join(logDir, buildLogFilename5(request, this.targetName));
7874
8312
  try {
7875
8313
  const logger = await PiStreamLogger.create({
7876
8314
  filePath,
@@ -8341,8 +8779,8 @@ function resolveWindowsCmd(executable) {
8341
8779
  const content = readFileSync2(cmdPath, "utf-8");
8342
8780
  const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
8343
8781
  if (match) {
8344
- const dp0 = path19.dirname(path19.resolve(cmdPath));
8345
- const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path19.sep}`);
8782
+ const dp0 = path20.dirname(path20.resolve(cmdPath));
8783
+ const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path20.sep}`);
8346
8784
  try {
8347
8785
  accessSync(scriptPath);
8348
8786
  return ["node", [scriptPath]];
@@ -8421,13 +8859,13 @@ import { execSync as execSync2 } from "node:child_process";
8421
8859
  import { randomUUID as randomUUID8 } from "node:crypto";
8422
8860
  import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
8423
8861
  import { mkdir as mkdir7 } from "node:fs/promises";
8424
- import path21 from "node:path";
8862
+ import path22 from "node:path";
8425
8863
  import { createInterface } from "node:readline";
8426
- import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
8864
+ import { fileURLToPath as fileURLToPath4, pathToFileURL } from "node:url";
8427
8865
 
8428
8866
  // src/paths.ts
8429
8867
  import os2 from "node:os";
8430
- import path20 from "node:path";
8868
+ import path21 from "node:path";
8431
8869
  var logged = false;
8432
8870
  function getAgentvHome() {
8433
8871
  const envHome = process.env.AGENTV_HOME;
@@ -8438,19 +8876,19 @@ function getAgentvHome() {
8438
8876
  }
8439
8877
  return envHome;
8440
8878
  }
8441
- return path20.join(os2.homedir(), ".agentv");
8879
+ return path21.join(os2.homedir(), ".agentv");
8442
8880
  }
8443
8881
  function getWorkspacesRoot() {
8444
- return path20.join(getAgentvHome(), "workspaces");
8882
+ return path21.join(getAgentvHome(), "workspaces");
8445
8883
  }
8446
8884
  function getSubagentsRoot() {
8447
- return path20.join(getAgentvHome(), "subagents");
8885
+ return path21.join(getAgentvHome(), "subagents");
8448
8886
  }
8449
8887
  function getTraceStateRoot() {
8450
- return path20.join(getAgentvHome(), "trace-state");
8888
+ return path21.join(getAgentvHome(), "trace-state");
8451
8889
  }
8452
8890
  function getWorkspacePoolRoot() {
8453
- return path20.join(getAgentvHome(), "workspace-pool");
8891
+ return path21.join(getAgentvHome(), "workspace-pool");
8454
8892
  }
8455
8893
 
8456
8894
  // src/evaluation/providers/pi-coding-agent.ts
@@ -8472,7 +8910,7 @@ async function promptInstall() {
8472
8910
  }
8473
8911
  }
8474
8912
  function findManagedSdkInstallRoot() {
8475
- return path21.join(getAgentvHome(), "deps", "pi-sdk");
8913
+ return path22.join(getAgentvHome(), "deps", "pi-sdk");
8476
8914
  }
8477
8915
  function resolveGlobalNpmRoot() {
8478
8916
  try {
@@ -8486,7 +8924,7 @@ function resolveGlobalNpmRoot() {
8486
8924
  }
8487
8925
  }
8488
8926
  function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
8489
- return path21.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
8927
+ return path22.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
8490
8928
  }
8491
8929
  function findAccessiblePath(paths) {
8492
8930
  for (const candidate of paths) {
@@ -8512,11 +8950,11 @@ async function tryImportLocalSdkModules() {
8512
8950
  async function tryImportManagedSdkModules() {
8513
8951
  const managedRoot = findManagedSdkInstallRoot();
8514
8952
  const piCodingAgentEntry = findAccessiblePath([
8515
- path21.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
8953
+ path22.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
8516
8954
  ]);
8517
8955
  const piAiEntry = findAccessiblePath([
8518
- path21.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
8519
- path21.join(
8956
+ path22.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
8957
+ path22.join(
8520
8958
  managedRoot,
8521
8959
  "node_modules",
8522
8960
  "@mariozechner",
@@ -8547,7 +8985,7 @@ async function tryImportGlobalSdkModules() {
8547
8985
  ]);
8548
8986
  const piAiEntry = findAccessiblePath([
8549
8987
  buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
8550
- path21.join(
8988
+ path22.join(
8551
8989
  globalNpmRoot,
8552
8990
  "@mariozechner",
8553
8991
  "pi-coding-agent",
@@ -8848,10 +9286,10 @@ ${fileList}`;
8848
9286
  }
8849
9287
  resolveCwd(cwdOverride) {
8850
9288
  if (cwdOverride) {
8851
- return path21.resolve(cwdOverride);
9289
+ return path22.resolve(cwdOverride);
8852
9290
  }
8853
9291
  if (this.config.cwd) {
8854
- return path21.resolve(this.config.cwd);
9292
+ return path22.resolve(this.config.cwd);
8855
9293
  }
8856
9294
  return process.cwd();
8857
9295
  }
@@ -8870,9 +9308,9 @@ ${fileList}`;
8870
9308
  }
8871
9309
  resolveLogDirectory() {
8872
9310
  if (this.config.logDir) {
8873
- return path21.resolve(this.config.logDir);
9311
+ return path22.resolve(this.config.logDir);
8874
9312
  }
8875
- return path21.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9313
+ return path22.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
8876
9314
  }
8877
9315
  async createStreamLogger(request) {
8878
9316
  const logDir = this.resolveLogDirectory();
@@ -8886,7 +9324,7 @@ ${fileList}`;
8886
9324
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
8887
9325
  return void 0;
8888
9326
  }
8889
- const filePath = path21.join(logDir, buildLogFilename6(request, this.targetName));
9327
+ const filePath = path22.join(logDir, buildLogFilename6(request, this.targetName));
8890
9328
  try {
8891
9329
  const logger = await PiStreamLogger2.create({
8892
9330
  filePath,
@@ -9101,17 +9539,17 @@ var ProviderRegistry = class {
9101
9539
  // src/evaluation/providers/vscode-provider.ts
9102
9540
  import { exec as exec2 } from "node:child_process";
9103
9541
  import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
9104
- import path32 from "node:path";
9542
+ import path33 from "node:path";
9105
9543
  import { promisify as promisify3 } from "node:util";
9106
9544
 
9107
9545
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
9108
9546
  import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
9109
- import path30 from "node:path";
9547
+ import path31 from "node:path";
9110
9548
 
9111
9549
  // src/evaluation/providers/vscode/utils/fs.ts
9112
9550
  import { constants as constants2 } from "node:fs";
9113
9551
  import { access as access2, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
9114
- import path22 from "node:path";
9552
+ import path23 from "node:path";
9115
9553
  async function pathExists(target) {
9116
9554
  try {
9117
9555
  await access2(target, constants2.F_OK);
@@ -9127,7 +9565,7 @@ async function readDirEntries(target) {
9127
9565
  const entries = await readdir2(target, { withFileTypes: true });
9128
9566
  return entries.map((entry) => ({
9129
9567
  name: entry.name,
9130
- absolutePath: path22.join(target, entry.name),
9568
+ absolutePath: path23.join(target, entry.name),
9131
9569
  isDirectory: entry.isDirectory()
9132
9570
  }));
9133
9571
  }
@@ -9142,9 +9580,9 @@ async function removeIfExists(target) {
9142
9580
  }
9143
9581
 
9144
9582
  // src/evaluation/providers/vscode/utils/path.ts
9145
- import path23 from "node:path";
9583
+ import path24 from "node:path";
9146
9584
  function pathToFileUri2(filePath) {
9147
- const absolutePath = path23.isAbsolute(filePath) ? filePath : path23.resolve(filePath);
9585
+ const absolutePath = path24.isAbsolute(filePath) ? filePath : path24.resolve(filePath);
9148
9586
  const normalizedPath = absolutePath.replace(/\\/g, "/");
9149
9587
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
9150
9588
  return `file:///${normalizedPath}`;
@@ -9153,7 +9591,7 @@ function pathToFileUri2(filePath) {
9153
9591
  }
9154
9592
 
9155
9593
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
9156
- import path24 from "node:path";
9594
+ import path25 from "node:path";
9157
9595
 
9158
9596
  // src/evaluation/providers/vscode/utils/template.ts
9159
9597
  function renderTemplate2(content, variables) {
@@ -9245,8 +9683,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
9245
9683
  });
9246
9684
  }
9247
9685
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
9248
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path24.basename(file)}`).join("\n");
9249
- const responseList = responseFiles.map((file) => `"${path24.basename(file)}"`).join(", ");
9686
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path25.basename(file)}`).join("\n");
9687
+ const responseList = responseFiles.map((file) => `"${path25.basename(file)}"`).join(", ");
9250
9688
  return renderTemplate2(templateContent, {
9251
9689
  requestFiles: requestLines,
9252
9690
  responseList
@@ -9254,8 +9692,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
9254
9692
  }
9255
9693
 
9256
9694
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
9257
- import { readFile as readFile9 } from "node:fs/promises";
9258
- import path25 from "node:path";
9695
+ import { readFile as readFile11 } from "node:fs/promises";
9696
+ import path26 from "node:path";
9259
9697
 
9260
9698
  // src/evaluation/providers/vscode/utils/time.ts
9261
9699
  function sleep2(ms) {
@@ -9293,7 +9731,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
9293
9731
  const maxAttempts = 10;
9294
9732
  while (attempts < maxAttempts) {
9295
9733
  try {
9296
- const content = await readFile9(responseFileFinal, { encoding: "utf8" });
9734
+ const content = await readFile11(responseFileFinal, { encoding: "utf8" });
9297
9735
  if (!silent) {
9298
9736
  process.stdout.write(`${content}
9299
9737
  `);
@@ -9314,7 +9752,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
9314
9752
  }
9315
9753
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
9316
9754
  if (!silent) {
9317
- const fileList = responseFilesFinal.map((file) => path25.basename(file)).join(", ");
9755
+ const fileList = responseFilesFinal.map((file) => path26.basename(file)).join(", ");
9318
9756
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
9319
9757
  }
9320
9758
  const deadline = Date.now() + timeoutMs;
@@ -9323,7 +9761,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9323
9761
  while (pending.size > 0) {
9324
9762
  if (Date.now() >= deadline) {
9325
9763
  if (!silent) {
9326
- const remaining = [...pending].map((f) => path25.basename(f)).join(", ");
9764
+ const remaining = [...pending].map((f) => path26.basename(f)).join(", ");
9327
9765
  console.error(
9328
9766
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
9329
9767
  );
@@ -9350,7 +9788,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9350
9788
  const maxAttempts = 10;
9351
9789
  while (attempts < maxAttempts) {
9352
9790
  try {
9353
- const content = await readFile9(file, { encoding: "utf8" });
9791
+ const content = await readFile11(file, { encoding: "utf8" });
9354
9792
  if (!silent) {
9355
9793
  process.stdout.write(`${content}
9356
9794
  `);
@@ -9374,16 +9812,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
9374
9812
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
9375
9813
  import { exec, spawn as spawn4 } from "node:child_process";
9376
9814
  import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
9377
- import path27 from "node:path";
9815
+ import path28 from "node:path";
9378
9816
  import { promisify as promisify2 } from "node:util";
9379
9817
 
9380
9818
  // src/evaluation/providers/vscode/dispatch/constants.ts
9381
- import path26 from "node:path";
9819
+ import path27 from "node:path";
9382
9820
  var DEFAULT_LOCK_NAME = "subagent.lock";
9383
9821
  var DEFAULT_ALIVE_FILENAME = ".alive";
9384
9822
  function getDefaultSubagentRoot(vscodeCmd = "code") {
9385
9823
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
9386
- return path26.join(getSubagentsRoot(), folder);
9824
+ return path27.join(getSubagentsRoot(), folder);
9387
9825
  }
9388
9826
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
9389
9827
 
@@ -9450,11 +9888,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9450
9888
  await raceSpawnError(child);
9451
9889
  return true;
9452
9890
  }
9453
- const aliveFile = path27.join(subagentDir, DEFAULT_ALIVE_FILENAME);
9891
+ const aliveFile = path28.join(subagentDir, DEFAULT_ALIVE_FILENAME);
9454
9892
  await removeIfExists(aliveFile);
9455
- const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
9893
+ const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
9456
9894
  await mkdir9(githubAgentsDir, { recursive: true });
9457
- const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
9895
+ const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9458
9896
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
9459
9897
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
9460
9898
  label: "open-workspace"
@@ -9467,7 +9905,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9467
9905
  "chat",
9468
9906
  "-m",
9469
9907
  wakeupChatId,
9470
- `create a file named .alive in the ${path27.basename(subagentDir)} folder`
9908
+ `create a file named .alive in the ${path28.basename(subagentDir)} folder`
9471
9909
  ];
9472
9910
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
9473
9911
  await raceSpawnError(wakeupChild);
@@ -9482,10 +9920,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
9482
9920
  return true;
9483
9921
  }
9484
9922
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
9485
- const workspacePath = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
9486
- const messagesDir = path27.join(subagentDir, "messages");
9923
+ const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
9924
+ const messagesDir = path28.join(subagentDir, "messages");
9487
9925
  await mkdir9(messagesDir, { recursive: true });
9488
- const reqFile = path27.join(messagesDir, `${timestamp}_req.md`);
9926
+ const reqFile = path28.join(messagesDir, `${timestamp}_req.md`);
9489
9927
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
9490
9928
  const reqUri = pathToFileUri2(reqFile);
9491
9929
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -9493,16 +9931,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
9493
9931
  chatArgs.push("-a", attachment);
9494
9932
  }
9495
9933
  chatArgs.push("-a", reqFile);
9496
- chatArgs.push(`Follow instructions in [${path27.basename(reqFile)}](${reqUri})`);
9934
+ chatArgs.push(`Follow instructions in [${path28.basename(reqFile)}](${reqUri})`);
9497
9935
  const workspaceReady = await ensureWorkspaceFocused(
9498
9936
  workspacePath,
9499
- path27.basename(subagentDir),
9937
+ path28.basename(subagentDir),
9500
9938
  subagentDir,
9501
9939
  vscodeCmd
9502
9940
  );
9503
9941
  if (!workspaceReady) {
9504
9942
  throw new Error(
9505
- `VS Code workspace '${path27.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
9943
+ `VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
9506
9944
  );
9507
9945
  }
9508
9946
  await sleep2(500);
@@ -9510,8 +9948,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
9510
9948
  await raceSpawnError(child);
9511
9949
  }
9512
9950
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
9513
- const workspacePath = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
9514
- const messagesDir = path27.join(subagentDir, "messages");
9951
+ const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
9952
+ const messagesDir = path28.join(subagentDir, "messages");
9515
9953
  await mkdir9(messagesDir, { recursive: true });
9516
9954
  const chatArgs = ["-r", "chat", "-m", chatId];
9517
9955
  for (const attachment of attachmentPaths) {
@@ -9520,13 +9958,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
9520
9958
  chatArgs.push(chatInstruction);
9521
9959
  const workspaceReady = await ensureWorkspaceFocused(
9522
9960
  workspacePath,
9523
- path27.basename(subagentDir),
9961
+ path28.basename(subagentDir),
9524
9962
  subagentDir,
9525
9963
  vscodeCmd
9526
9964
  );
9527
9965
  if (!workspaceReady) {
9528
9966
  throw new Error(
9529
- `VS Code workspace '${path27.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
9967
+ `VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
9530
9968
  );
9531
9969
  }
9532
9970
  await sleep2(500);
@@ -9535,11 +9973,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
9535
9973
  }
9536
9974
 
9537
9975
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
9538
- import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
9539
- import path29 from "node:path";
9976
+ import { copyFile, mkdir as mkdir10, readFile as readFile12, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
9977
+ import path30 from "node:path";
9540
9978
 
9541
9979
  // src/evaluation/providers/vscode/utils/workspace.ts
9542
- import path28 from "node:path";
9980
+ import path29 from "node:path";
9543
9981
  import JSON5 from "json5";
9544
9982
  function transformWorkspacePaths(workspaceContent, templateDir) {
9545
9983
  let workspace;
@@ -9556,10 +9994,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
9556
9994
  }
9557
9995
  const transformedFolders = workspace.folders.map((folder) => {
9558
9996
  const folderPath = folder.path;
9559
- if (path28.isAbsolute(folderPath)) {
9997
+ if (path29.isAbsolute(folderPath)) {
9560
9998
  return folder;
9561
9999
  }
9562
- const absolutePath = path28.resolve(templateDir, folderPath);
10000
+ const absolutePath = path29.resolve(templateDir, folderPath);
9563
10001
  return {
9564
10002
  ...folder,
9565
10003
  path: absolutePath
@@ -9581,19 +10019,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
9581
10019
  if (locationMap && typeof locationMap === "object") {
9582
10020
  const transformedMap = {};
9583
10021
  for (const [locationPath, value] of Object.entries(locationMap)) {
9584
- const isAbsolute = path28.isAbsolute(locationPath);
10022
+ const isAbsolute = path29.isAbsolute(locationPath);
9585
10023
  if (isAbsolute) {
9586
10024
  transformedMap[locationPath] = value;
9587
10025
  } else {
9588
10026
  const firstGlobIndex = locationPath.search(/[*]/);
9589
10027
  if (firstGlobIndex === -1) {
9590
- const resolvedPath = path28.resolve(templateDir, locationPath).replace(/\\/g, "/");
10028
+ const resolvedPath = path29.resolve(templateDir, locationPath).replace(/\\/g, "/");
9591
10029
  transformedMap[resolvedPath] = value;
9592
10030
  } else {
9593
10031
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
9594
10032
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
9595
10033
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
9596
- const resolvedPath = (path28.resolve(templateDir, basePath) + patternPath).replace(
10034
+ const resolvedPath = (path29.resolve(templateDir, basePath) + patternPath).replace(
9597
10035
  /\\/g,
9598
10036
  "/"
9599
10037
  );
@@ -9634,7 +10072,7 @@ async function findUnlockedSubagent(subagentRoot) {
9634
10072
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
9635
10073
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
9636
10074
  for (const subagent of subagents) {
9637
- const lockFile = path29.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
10075
+ const lockFile = path30.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
9638
10076
  if (!await pathExists(lockFile)) {
9639
10077
  return subagent.absolutePath;
9640
10078
  }
@@ -9644,7 +10082,7 @@ async function findUnlockedSubagent(subagentRoot) {
9644
10082
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
9645
10083
  let workspaceContent;
9646
10084
  if (workspaceTemplate) {
9647
- const workspaceSrc = path29.resolve(workspaceTemplate);
10085
+ const workspaceSrc = path30.resolve(workspaceTemplate);
9648
10086
  if (!await pathExists(workspaceSrc)) {
9649
10087
  throw new Error(`workspace template not found: ${workspaceSrc}`);
9650
10088
  }
@@ -9652,18 +10090,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
9652
10090
  if (!stats.isFile()) {
9653
10091
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
9654
10092
  }
9655
- const templateText = await readFile10(workspaceSrc, "utf8");
10093
+ const templateText = await readFile12(workspaceSrc, "utf8");
9656
10094
  workspaceContent = JSON.parse(templateText);
9657
10095
  } else {
9658
10096
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
9659
10097
  }
9660
- const workspaceName = `${path29.basename(subagentDir)}.code-workspace`;
9661
- const workspaceDst = path29.join(subagentDir, workspaceName);
9662
- const templateDir = workspaceTemplate ? path29.dirname(path29.resolve(workspaceTemplate)) : subagentDir;
10098
+ const workspaceName = `${path30.basename(subagentDir)}.code-workspace`;
10099
+ const workspaceDst = path30.join(subagentDir, workspaceName);
10100
+ const templateDir = workspaceTemplate ? path30.dirname(path30.resolve(workspaceTemplate)) : subagentDir;
9663
10101
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
9664
10102
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
9665
10103
  if (cwd) {
9666
- const absCwd = path29.resolve(cwd);
10104
+ const absCwd = path30.resolve(cwd);
9667
10105
  const parsed = JSON.parse(transformedContent);
9668
10106
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
9669
10107
  if (!alreadyPresent) {
@@ -9672,35 +10110,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
9672
10110
  }
9673
10111
  }
9674
10112
  await writeFile3(workspaceDst, transformedContent, "utf8");
9675
- const messagesDir = path29.join(subagentDir, "messages");
10113
+ const messagesDir = path30.join(subagentDir, "messages");
9676
10114
  await mkdir10(messagesDir, { recursive: true });
9677
10115
  return { workspace: workspaceDst, messagesDir };
9678
10116
  }
9679
10117
  async function createSubagentLock(subagentDir) {
9680
- const messagesDir = path29.join(subagentDir, "messages");
10118
+ const messagesDir = path30.join(subagentDir, "messages");
9681
10119
  if (await pathExists(messagesDir)) {
9682
10120
  const files = await readdir3(messagesDir);
9683
10121
  await Promise.all(
9684
10122
  files.map(async (file) => {
9685
- const target = path29.join(messagesDir, file);
10123
+ const target = path30.join(messagesDir, file);
9686
10124
  await removeIfExists(target);
9687
10125
  })
9688
10126
  );
9689
10127
  }
9690
- const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
10128
+ const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
9691
10129
  if (await pathExists(githubAgentsDir)) {
9692
10130
  const agentFiles = await readdir3(githubAgentsDir);
9693
10131
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
9694
10132
  await Promise.all(
9695
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path29.join(githubAgentsDir, file)))
10133
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path30.join(githubAgentsDir, file)))
9696
10134
  );
9697
10135
  }
9698
- const lockFile = path29.join(subagentDir, DEFAULT_LOCK_NAME);
10136
+ const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
9699
10137
  await writeFile3(lockFile, "", { encoding: "utf8" });
9700
10138
  return lockFile;
9701
10139
  }
9702
10140
  async function removeSubagentLock(subagentDir) {
9703
- const lockFile = path29.join(subagentDir, DEFAULT_LOCK_NAME);
10141
+ const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
9704
10142
  await removeIfExists(lockFile);
9705
10143
  }
9706
10144
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -9720,9 +10158,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
9720
10158
  return 1;
9721
10159
  }
9722
10160
  if (promptFile) {
9723
- const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
10161
+ const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
9724
10162
  await mkdir10(githubAgentsDir, { recursive: true });
9725
- const agentFile = path29.join(githubAgentsDir, `${chatId}.md`);
10163
+ const agentFile = path30.join(githubAgentsDir, `${chatId}.md`);
9726
10164
  try {
9727
10165
  await copyFile(promptFile, agentFile);
9728
10166
  } catch (error) {
@@ -9741,7 +10179,7 @@ async function resolvePromptFile(promptFile) {
9741
10179
  if (!promptFile) {
9742
10180
  return void 0;
9743
10181
  }
9744
- const resolvedPrompt = path30.resolve(promptFile);
10182
+ const resolvedPrompt = path31.resolve(promptFile);
9745
10183
  if (!await pathExists(resolvedPrompt)) {
9746
10184
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
9747
10185
  }
@@ -9757,7 +10195,7 @@ async function resolveAttachments(extraAttachments) {
9757
10195
  }
9758
10196
  const resolved = [];
9759
10197
  for (const attachment of extraAttachments) {
9760
- const resolvedPath = path30.resolve(attachment);
10198
+ const resolvedPath = path31.resolve(attachment);
9761
10199
  if (!await pathExists(resolvedPath)) {
9762
10200
  throw new Error(`Attachment not found: ${resolvedPath}`);
9763
10201
  }
@@ -9799,7 +10237,7 @@ async function dispatchAgentSession(options) {
9799
10237
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
9800
10238
  };
9801
10239
  }
9802
- const subagentName = path30.basename(subagentDir);
10240
+ const subagentName = path31.basename(subagentDir);
9803
10241
  const chatId = Math.random().toString(16).slice(2, 10);
9804
10242
  const preparationResult = await prepareSubagentDirectory(
9805
10243
  subagentDir,
@@ -9827,9 +10265,9 @@ async function dispatchAgentSession(options) {
9827
10265
  };
9828
10266
  }
9829
10267
  const timestamp = generateTimestamp();
9830
- const messagesDir = path30.join(subagentDir, "messages");
9831
- const responseFileTmp = path30.join(messagesDir, `${timestamp}_res.tmp.md`);
9832
- const responseFileFinal = path30.join(messagesDir, `${timestamp}_res.md`);
10268
+ const messagesDir = path31.join(subagentDir, "messages");
10269
+ const responseFileTmp = path31.join(messagesDir, `${timestamp}_res.tmp.md`);
10270
+ const responseFileFinal = path31.join(messagesDir, `${timestamp}_res.md`);
9833
10271
  const requestInstructions = createRequestPrompt(
9834
10272
  userQuery,
9835
10273
  responseFileTmp,
@@ -9934,7 +10372,7 @@ async function dispatchBatchAgent(options) {
9934
10372
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
9935
10373
  };
9936
10374
  }
9937
- subagentName = path30.basename(subagentDir);
10375
+ subagentName = path31.basename(subagentDir);
9938
10376
  const chatId = Math.random().toString(16).slice(2, 10);
9939
10377
  const preparationResult = await prepareSubagentDirectory(
9940
10378
  subagentDir,
@@ -9965,17 +10403,17 @@ async function dispatchBatchAgent(options) {
9965
10403
  };
9966
10404
  }
9967
10405
  const timestamp = generateTimestamp();
9968
- const messagesDir = path30.join(subagentDir, "messages");
10406
+ const messagesDir = path31.join(subagentDir, "messages");
9969
10407
  requestFiles = userQueries.map(
9970
- (_, index) => path30.join(messagesDir, `${timestamp}_${index}_req.md`)
10408
+ (_, index) => path31.join(messagesDir, `${timestamp}_${index}_req.md`)
9971
10409
  );
9972
10410
  const responseTmpFiles = userQueries.map(
9973
- (_, index) => path30.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
10411
+ (_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
9974
10412
  );
9975
10413
  responseFilesFinal = userQueries.map(
9976
- (_, index) => path30.join(messagesDir, `${timestamp}_${index}_res.md`)
10414
+ (_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.md`)
9977
10415
  );
9978
- const orchestratorFile = path30.join(messagesDir, `${timestamp}_orchestrator.md`);
10416
+ const orchestratorFile = path31.join(messagesDir, `${timestamp}_orchestrator.md`);
9979
10417
  if (!dryRun) {
9980
10418
  await Promise.all(
9981
10419
  userQueries.map((query, index) => {
@@ -10061,7 +10499,7 @@ async function dispatchBatchAgent(options) {
10061
10499
 
10062
10500
  // src/evaluation/providers/vscode/dispatch/provision.ts
10063
10501
  import { writeFile as writeFile5 } from "node:fs/promises";
10064
- import path31 from "node:path";
10502
+ import path32 from "node:path";
10065
10503
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
10066
10504
  folders: [
10067
10505
  {
@@ -10092,7 +10530,7 @@ async function provisionSubagents(options) {
10092
10530
  if (!Number.isInteger(subagents) || subagents < 1) {
10093
10531
  throw new Error("subagents must be a positive integer");
10094
10532
  }
10095
- const targetPath = path31.resolve(targetRoot);
10533
+ const targetPath = path32.resolve(targetRoot);
10096
10534
  if (!dryRun) {
10097
10535
  await ensureDir(targetPath);
10098
10536
  }
@@ -10112,7 +10550,7 @@ async function provisionSubagents(options) {
10112
10550
  continue;
10113
10551
  }
10114
10552
  highestNumber = Math.max(highestNumber, parsed);
10115
- const lockFile = path31.join(entry.absolutePath, lockName);
10553
+ const lockFile = path32.join(entry.absolutePath, lockName);
10116
10554
  const locked = await pathExists(lockFile);
10117
10555
  if (locked) {
10118
10556
  lockedSubagents.add(entry.absolutePath);
@@ -10129,10 +10567,10 @@ async function provisionSubagents(options) {
10129
10567
  break;
10130
10568
  }
10131
10569
  const subagentDir = subagent.absolutePath;
10132
- const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
10133
- const lockFile = path31.join(subagentDir, lockName);
10134
- const workspaceDst = path31.join(subagentDir, `${path31.basename(subagentDir)}.code-workspace`);
10135
- const wakeupDst = path31.join(githubAgentsDir, "wakeup.md");
10570
+ const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
10571
+ const lockFile = path32.join(subagentDir, lockName);
10572
+ const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
10573
+ const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
10136
10574
  const isLocked = await pathExists(lockFile);
10137
10575
  if (isLocked && !force) {
10138
10576
  continue;
@@ -10170,10 +10608,10 @@ async function provisionSubagents(options) {
10170
10608
  let nextIndex = highestNumber;
10171
10609
  while (subagentsProvisioned < subagents) {
10172
10610
  nextIndex += 1;
10173
- const subagentDir = path31.join(targetPath, `subagent-${nextIndex}`);
10174
- const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
10175
- const workspaceDst = path31.join(subagentDir, `${path31.basename(subagentDir)}.code-workspace`);
10176
- const wakeupDst = path31.join(githubAgentsDir, "wakeup.md");
10611
+ const subagentDir = path32.join(targetPath, `subagent-${nextIndex}`);
10612
+ const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
10613
+ const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
10614
+ const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
10177
10615
  if (!dryRun) {
10178
10616
  await ensureDir(subagentDir);
10179
10617
  await ensureDir(githubAgentsDir);
@@ -10363,7 +10801,7 @@ var VSCodeProvider = class {
10363
10801
  async function locateVSCodeExecutable(candidate) {
10364
10802
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
10365
10803
  if (includesPathSeparator) {
10366
- const resolved = path32.isAbsolute(candidate) ? candidate : path32.resolve(candidate);
10804
+ const resolved = path33.isAbsolute(candidate) ? candidate : path33.resolve(candidate);
10367
10805
  try {
10368
10806
  await access3(resolved, constants3.F_OK);
10369
10807
  return resolved;
@@ -10392,7 +10830,7 @@ async function resolveWorkspaceTemplateFile(template) {
10392
10830
  return void 0;
10393
10831
  }
10394
10832
  try {
10395
- const stats = await stat5(path32.resolve(template));
10833
+ const stats = await stat5(path33.resolve(template));
10396
10834
  return stats.isFile() ? template : void 0;
10397
10835
  } catch {
10398
10836
  return template;
@@ -10416,7 +10854,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
10416
10854
  return "";
10417
10855
  }
10418
10856
  const buildList = (files) => files.map((absolutePath) => {
10419
- const fileName = path32.basename(absolutePath);
10857
+ const fileName = path33.basename(absolutePath);
10420
10858
  const fileUri = pathToFileUri3(absolutePath);
10421
10859
  return `* [${fileName}](${fileUri})`;
10422
10860
  });
@@ -10437,7 +10875,7 @@ function collectAttachmentFiles(attachments) {
10437
10875
  }
10438
10876
  const unique = /* @__PURE__ */ new Map();
10439
10877
  for (const attachment of attachments) {
10440
- const absolutePath = path32.resolve(attachment);
10878
+ const absolutePath = path33.resolve(attachment);
10441
10879
  if (!unique.has(absolutePath)) {
10442
10880
  unique.set(absolutePath, absolutePath);
10443
10881
  }
@@ -10445,7 +10883,7 @@ function collectAttachmentFiles(attachments) {
10445
10883
  return Array.from(unique.values());
10446
10884
  }
10447
10885
  function pathToFileUri3(filePath) {
10448
- const absolutePath = path32.isAbsolute(filePath) ? filePath : path32.resolve(filePath);
10886
+ const absolutePath = path33.isAbsolute(filePath) ? filePath : path33.resolve(filePath);
10449
10887
  const normalizedPath = absolutePath.replace(/\\/g, "/");
10450
10888
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
10451
10889
  return `file:///${normalizedPath}`;
@@ -10458,7 +10896,7 @@ function normalizeAttachments(attachments) {
10458
10896
  }
10459
10897
  const deduped = /* @__PURE__ */ new Set();
10460
10898
  for (const attachment of attachments) {
10461
- deduped.add(path32.resolve(attachment));
10899
+ deduped.add(path33.resolve(attachment));
10462
10900
  }
10463
10901
  return Array.from(deduped);
10464
10902
  }
@@ -10467,7 +10905,7 @@ function mergeAttachments(all) {
10467
10905
  for (const list of all) {
10468
10906
  if (!list) continue;
10469
10907
  for (const inputFile of list) {
10470
- deduped.add(path32.resolve(inputFile));
10908
+ deduped.add(path33.resolve(inputFile));
10471
10909
  }
10472
10910
  }
10473
10911
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -10515,9 +10953,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
10515
10953
 
10516
10954
  // src/evaluation/providers/targets-file.ts
10517
10955
  import { constants as constants4 } from "node:fs";
10518
- import { access as access4, readFile as readFile11 } from "node:fs/promises";
10519
- import path33 from "node:path";
10520
- import { parse as parse4 } from "yaml";
10956
+ import { access as access4, readFile as readFile13 } from "node:fs/promises";
10957
+ import path34 from "node:path";
10958
+ import { parse as parse5 } from "yaml";
10521
10959
  function isRecord(value) {
10522
10960
  return typeof value === "object" && value !== null && !Array.isArray(value);
10523
10961
  }
@@ -10556,12 +10994,12 @@ async function fileExists3(filePath) {
10556
10994
  }
10557
10995
  }
10558
10996
  async function readTargetDefinitions(filePath) {
10559
- const absolutePath = path33.resolve(filePath);
10997
+ const absolutePath = path34.resolve(filePath);
10560
10998
  if (!await fileExists3(absolutePath)) {
10561
10999
  throw new Error(`targets.yaml not found at ${absolutePath}`);
10562
11000
  }
10563
- const raw = await readFile11(absolutePath, "utf8");
10564
- const parsed = parse4(raw);
11001
+ const raw = await readFile13(absolutePath, "utf8");
11002
+ const parsed = parse5(raw);
10565
11003
  if (!isRecord(parsed)) {
10566
11004
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
10567
11005
  }
@@ -10576,16 +11014,16 @@ function listTargetNames(definitions) {
10576
11014
  }
10577
11015
 
10578
11016
  // src/evaluation/providers/provider-discovery.ts
10579
- import path34 from "node:path";
11017
+ import path35 from "node:path";
10580
11018
  import fg from "fast-glob";
10581
11019
  async function discoverProviders(registry, baseDir) {
10582
11020
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
10583
11021
  const candidateDirs = [];
10584
- let dir = path34.resolve(baseDir);
10585
- const root = path34.parse(dir).root;
11022
+ let dir = path35.resolve(baseDir);
11023
+ const root = path35.parse(dir).root;
10586
11024
  while (dir !== root) {
10587
- candidateDirs.push(path34.join(dir, ".agentv", "providers"));
10588
- dir = path34.dirname(dir);
11025
+ candidateDirs.push(path35.join(dir, ".agentv", "providers"));
11026
+ dir = path35.dirname(dir);
10589
11027
  }
10590
11028
  let files = [];
10591
11029
  for (const providersDir of candidateDirs) {
@@ -10601,7 +11039,7 @@ async function discoverProviders(registry, baseDir) {
10601
11039
  }
10602
11040
  const discoveredKinds = [];
10603
11041
  for (const filePath of files) {
10604
- const basename = path34.basename(filePath);
11042
+ const basename = path35.basename(filePath);
10605
11043
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
10606
11044
  if (registry.has(kindName)) {
10607
11045
  continue;
@@ -10727,150 +11165,6 @@ import { mkdtemp as mkdtemp2, rm as rm3, writeFile as writeFile6 } from "node:fs
10727
11165
  import { tmpdir as tmpdir2 } from "node:os";
10728
11166
  import { dirname, join } from "node:path";
10729
11167
 
10730
- // src/runtime/exec.ts
10731
- function shellEscapePath(value) {
10732
- if (process.platform === "win32") {
10733
- return `"${value.replaceAll('"', '""')}"`;
10734
- }
10735
- return `'${value.replaceAll("'", `'"'"'`)}'`;
10736
- }
10737
- async function execFileWithStdin(argv, stdinPayload, options = {}) {
10738
- if (argv.length === 0) {
10739
- throw new Error("Executable argv must include at least one entry");
10740
- }
10741
- if (typeof Bun !== "undefined") {
10742
- return execFileWithStdinBun(argv, stdinPayload, options);
10743
- }
10744
- return execFileWithStdinNode(argv, stdinPayload, options);
10745
- }
10746
- async function execFileWithStdinBun(argv, stdinPayload, options) {
10747
- const command = [...argv];
10748
- const encoder = new TextEncoder();
10749
- const proc = Bun.spawn(command, {
10750
- cwd: options.cwd,
10751
- stdin: encoder.encode(stdinPayload),
10752
- stdout: "pipe",
10753
- stderr: "pipe",
10754
- // Merge additional env vars with process.env
10755
- env: options.env ? { ...process.env, ...options.env } : process.env
10756
- });
10757
- let timedOut = false;
10758
- const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
10759
- timedOut = true;
10760
- proc.kill("SIGKILL");
10761
- }, options.timeoutMs) : void 0;
10762
- try {
10763
- const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
10764
- const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
10765
- const [stdout, stderr, exitCode] = await Promise.all([
10766
- stdoutPromise,
10767
- stderrPromise,
10768
- proc.exited
10769
- ]);
10770
- if (timedOut) {
10771
- throw new Error(`Process timed out after ${options.timeoutMs}ms`);
10772
- }
10773
- return {
10774
- stdout: stdout.replace(/\r\n/g, "\n"),
10775
- stderr: stderr.replace(/\r\n/g, "\n"),
10776
- exitCode
10777
- };
10778
- } finally {
10779
- if (timeout !== void 0) {
10780
- clearTimeout(timeout);
10781
- }
10782
- }
10783
- }
10784
- async function execFileWithStdinNode(argv, stdinPayload, options) {
10785
- const { spawn: spawn5 } = await import("node:child_process");
10786
- return new Promise((resolve, reject) => {
10787
- const [cmd, ...args] = argv;
10788
- const child = spawn5(cmd, args, {
10789
- cwd: options.cwd,
10790
- stdio: ["pipe", "pipe", "pipe"],
10791
- // Merge additional env vars with process.env
10792
- env: options.env ? { ...process.env, ...options.env } : process.env
10793
- });
10794
- const stdoutChunks = [];
10795
- const stderrChunks = [];
10796
- child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
10797
- child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
10798
- let timedOut = false;
10799
- const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
10800
- timedOut = true;
10801
- child.kill("SIGKILL");
10802
- }, options.timeoutMs) : void 0;
10803
- child.on("error", (error) => {
10804
- if (timeout !== void 0) clearTimeout(timeout);
10805
- reject(error);
10806
- });
10807
- child.on("close", (code) => {
10808
- if (timeout !== void 0) clearTimeout(timeout);
10809
- if (timedOut) {
10810
- reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
10811
- return;
10812
- }
10813
- const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
10814
- const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
10815
- resolve({
10816
- stdout,
10817
- stderr,
10818
- exitCode: code ?? 0
10819
- });
10820
- });
10821
- if (child.stdin) {
10822
- child.stdin.write(stdinPayload);
10823
- child.stdin.end();
10824
- }
10825
- });
10826
- }
10827
- async function execShellWithStdin(command, stdinPayload, options = {}) {
10828
- const { mkdir: mkdir16, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
10829
- const { tmpdir: tmpdir3 } = await import("node:os");
10830
- const path51 = await import("node:path");
10831
- const { randomUUID: randomUUID10 } = await import("node:crypto");
10832
- const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
10833
- await mkdir16(dir, { recursive: true });
10834
- const stdinPath = path51.join(dir, "stdin.txt");
10835
- const stdoutPath = path51.join(dir, "stdout.txt");
10836
- const stderrPath = path51.join(dir, "stderr.txt");
10837
- await writeFile9(stdinPath, stdinPayload, "utf8");
10838
- const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
10839
- const { spawn: spawn5 } = await import("node:child_process");
10840
- try {
10841
- const exitCode = await new Promise((resolve, reject) => {
10842
- const child = spawn5(wrappedCommand, {
10843
- shell: true,
10844
- cwd: options.cwd,
10845
- stdio: ["ignore", "ignore", "ignore"],
10846
- // Merge additional env vars with process.env
10847
- env: options.env ? { ...process.env, ...options.env } : process.env
10848
- });
10849
- const timeout = options.timeoutMs ? setTimeout(() => {
10850
- child.kill();
10851
- reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
10852
- }, options.timeoutMs) : void 0;
10853
- child.on("error", (error) => {
10854
- if (timeout !== void 0) {
10855
- clearTimeout(timeout);
10856
- }
10857
- reject(error);
10858
- });
10859
- child.on("exit", (code) => {
10860
- if (timeout !== void 0) {
10861
- clearTimeout(timeout);
10862
- }
10863
- resolve(code ?? 0);
10864
- });
10865
- });
10866
- const stdout = (await readFile16(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
10867
- const stderr = (await readFile16(stderrPath, "utf8")).replace(/\r\n/g, "\n");
10868
- return { stdout, stderr, exitCode };
10869
- } finally {
10870
- await rm6(dir, { recursive: true, force: true });
10871
- }
10872
- }
10873
-
10874
11168
  // src/runtime/target-proxy.ts
10875
11169
  import { randomBytes } from "node:crypto";
10876
11170
  import { createServer } from "node:http";
@@ -11147,6 +11441,18 @@ function toCamelCaseDeep(obj) {
11147
11441
  return obj;
11148
11442
  }
11149
11443
 
11444
+ // src/evaluation/workspace/repo-checkout.ts
11445
+ function getRepoCheckoutRef(checkout) {
11446
+ return checkout?.base_commit ?? checkout?.ref ?? "HEAD";
11447
+ }
11448
+ function getRepoCheckoutTargets(repos) {
11449
+ if (!repos) return [];
11450
+ return repos.filter((repo) => repo.checkout?.base_commit || repo.checkout?.ref).map((repo) => ({
11451
+ path: repo.path,
11452
+ ref: getRepoCheckoutRef(repo.checkout)
11453
+ }));
11454
+ }
11455
+
11150
11456
  // src/evaluation/evaluators/code-evaluator.ts
11151
11457
  var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
11152
11458
  var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
@@ -11281,13 +11587,31 @@ var CodeEvaluator = class {
11281
11587
  const workspaceEnv = context.workspacePath ? { AGENTV_WORKSPACE_PATH: context.workspacePath } : void 0;
11282
11588
  const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
11283
11589
  try {
11284
- const stdout = await executeScript(
11285
- this.command,
11286
- inputPayload,
11287
- this.agentTimeoutMs,
11288
- this.cwd,
11289
- env
11290
- );
11590
+ let stdout;
11591
+ if (context.dockerConfig) {
11592
+ const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
11593
+ const dockerProvider = new DockerWorkspaceProvider2(context.dockerConfig);
11594
+ const result = await dockerProvider.runGraderInContainer({
11595
+ command: [...this.command],
11596
+ stdin: inputPayload,
11597
+ repoCheckouts: getRepoCheckoutTargets(context.evalCase.workspace?.repos)
11598
+ });
11599
+ if (result.exitCode !== 0) {
11600
+ const trimmedErr = result.stderr.trim();
11601
+ throw new Error(
11602
+ trimmedErr.length > 0 ? `Code evaluator exited with code ${result.exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${result.exitCode}`
11603
+ );
11604
+ }
11605
+ stdout = result.stdout.trim();
11606
+ } else {
11607
+ stdout = await executeScript(
11608
+ this.command,
11609
+ inputPayload,
11610
+ this.agentTimeoutMs,
11611
+ this.cwd,
11612
+ env
11613
+ );
11614
+ }
11291
11615
  const parsed = parseJsonSafe(stdout);
11292
11616
  const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
11293
11617
  const assertions = Array.isArray(parsed?.assertions) ? parsed.assertions.filter(
@@ -11380,7 +11704,7 @@ import { generateText as generateText3 } from "ai";
11380
11704
 
11381
11705
  // src/evaluation/evaluators/llm-grader.ts
11382
11706
  import fs2 from "node:fs/promises";
11383
- import path35 from "node:path";
11707
+ import path36 from "node:path";
11384
11708
  import { generateText as generateText2, stepCountIs, tool } from "ai";
11385
11709
  import { z as z3 } from "zod";
11386
11710
  var DEFAULT_MAX_STEPS = 10;
@@ -11464,6 +11788,15 @@ var scoreRangeEvaluationSchema = z3.object({
11464
11788
  checks: z3.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
11465
11789
  overall_reasoning: z3.string().describe("Overall assessment summary (1-2 sentences)").optional()
11466
11790
  });
11791
+ function resolveContentBasePath(context) {
11792
+ if (context.workspacePath) {
11793
+ return context.workspacePath;
11794
+ }
11795
+ if ("config" in context.target && context.target.config && typeof context.target.config === "object" && "cwd" in context.target.config && typeof context.target.config.cwd === "string") {
11796
+ return context.target.config.cwd;
11797
+ }
11798
+ return void 0;
11799
+ }
11467
11800
  var LlmGraderEvaluator = class {
11468
11801
  kind = "llm-grader";
11469
11802
  resolveGraderProvider;
@@ -11481,24 +11814,46 @@ var LlmGraderEvaluator = class {
11481
11814
  this.graderTargetProvider = options.graderTargetProvider ?? options.judgeTargetProvider;
11482
11815
  }
11483
11816
  async evaluate(context) {
11817
+ const preparedContext = await this.prepareContext(context);
11484
11818
  if (this.graderTargetProvider) {
11485
- return this.evaluateWithGraderTarget(context);
11819
+ return this.evaluateWithGraderTarget(preparedContext);
11486
11820
  }
11487
- const graderProvider = await this.resolveGraderProvider(context);
11821
+ const graderProvider = await this.resolveGraderProvider(preparedContext);
11488
11822
  if (!graderProvider) {
11489
11823
  throw new Error("No grader provider available for LLM grading");
11490
11824
  }
11491
11825
  if (graderProvider.kind === "agentv") {
11492
- return this.evaluateBuiltIn(context, graderProvider);
11826
+ return this.evaluateBuiltIn(preparedContext, graderProvider);
11493
11827
  }
11494
11828
  if (isAgentProvider(graderProvider)) {
11495
- return this.evaluateWithDelegatedAgent(context, graderProvider);
11829
+ return this.evaluateWithDelegatedAgent(preparedContext, graderProvider);
11496
11830
  }
11497
- const config = context.evaluator;
11831
+ const config = preparedContext.evaluator;
11498
11832
  if (config?.type === "llm-grader" && config.rubrics && config.rubrics.length > 0) {
11499
- return this.evaluateWithRubrics(context, graderProvider, config.rubrics);
11833
+ return this.evaluateWithRubrics(preparedContext, graderProvider, config.rubrics);
11500
11834
  }
11501
- return this.evaluateFreeform(context, graderProvider);
11835
+ return this.evaluateFreeform(preparedContext, graderProvider);
11836
+ }
11837
+ async prepareContext(context) {
11838
+ const config = context.evaluator;
11839
+ if (config?.type !== "llm-grader" || !context.output) {
11840
+ return context;
11841
+ }
11842
+ const lastAssistant = [...context.output].reverse().find((message) => message.role === "assistant" && message.content !== void 0);
11843
+ if (!lastAssistant || typeof lastAssistant.content === "string") {
11844
+ return context;
11845
+ }
11846
+ const extracted = await extractTextWithPreprocessors(
11847
+ lastAssistant.content,
11848
+ config.preprocessors,
11849
+ {
11850
+ basePath: resolveContentBasePath(context)
11851
+ }
11852
+ );
11853
+ return {
11854
+ ...context,
11855
+ candidate: appendPreprocessingWarnings(extracted.text, extracted.warnings)
11856
+ };
11502
11857
  }
11503
11858
  // ---------------------------------------------------------------------------
11504
11859
  // LLM mode (existing)
@@ -12383,8 +12738,8 @@ function toAiSdkImageParts(images) {
12383
12738
  }));
12384
12739
  }
12385
12740
  function resolveSandboxed(basePath, relativePath) {
12386
- const resolved = path35.resolve(basePath, relativePath);
12387
- if (!resolved.startsWith(basePath + path35.sep) && resolved !== basePath) {
12741
+ const resolved = path36.resolve(basePath, relativePath);
12742
+ if (!resolved.startsWith(basePath + path36.sep) && resolved !== basePath) {
12388
12743
  throw new Error(`Path '${relativePath}' is outside the workspace`);
12389
12744
  }
12390
12745
  return resolved;
@@ -12417,11 +12772,11 @@ function createFilesystemTools(workspacePath) {
12417
12772
  execute: async (input) => {
12418
12773
  try {
12419
12774
  const resolved = resolveSandboxed(workspacePath, input.path);
12420
- const stat11 = await fs2.stat(resolved);
12421
- if (stat11.isDirectory()) {
12775
+ const stat12 = await fs2.stat(resolved);
12776
+ if (stat12.isDirectory()) {
12422
12777
  return { error: `'${input.path}' is a directory, not a file` };
12423
12778
  }
12424
- const buffer = Buffer.alloc(Math.min(stat11.size, MAX_FILE_SIZE));
12779
+ const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
12425
12780
  const fd = await fs2.open(resolved, "r");
12426
12781
  try {
12427
12782
  await fd.read(buffer, 0, buffer.length, 0);
@@ -12429,8 +12784,8 @@ function createFilesystemTools(workspacePath) {
12429
12784
  await fd.close();
12430
12785
  }
12431
12786
  const content = buffer.toString("utf-8");
12432
- const truncated = stat11.size > MAX_FILE_SIZE;
12433
- return { content, truncated, size: stat11.size };
12787
+ const truncated = stat12.size > MAX_FILE_SIZE;
12788
+ return { content, truncated, size: stat12.size };
12434
12789
  } catch (error) {
12435
12790
  return { error: error instanceof Error ? error.message : String(error) };
12436
12791
  }
@@ -12474,15 +12829,15 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
12474
12829
  for (const entry of entries) {
12475
12830
  if (matches.length >= MAX_SEARCH_MATCHES) return;
12476
12831
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
12477
- const fullPath = path35.join(dirPath, entry.name);
12832
+ const fullPath = path36.join(dirPath, entry.name);
12478
12833
  if (entry.isDirectory()) {
12479
12834
  await searchDirectory(fullPath, workspacePath, regex, matches);
12480
12835
  } else if (entry.isFile()) {
12481
- const ext = path35.extname(entry.name).toLowerCase();
12836
+ const ext = path36.extname(entry.name).toLowerCase();
12482
12837
  if (BINARY_EXTENSIONS.has(ext)) continue;
12483
12838
  try {
12484
- const stat11 = await fs2.stat(fullPath);
12485
- if (stat11.size > MAX_FILE_SIZE) continue;
12839
+ const stat12 = await fs2.stat(fullPath);
12840
+ if (stat12.size > MAX_FILE_SIZE) continue;
12486
12841
  const content = await fs2.readFile(fullPath, "utf-8");
12487
12842
  const lines = content.split("\n");
12488
12843
  for (let i = 0; i < lines.length; i++) {
@@ -12490,7 +12845,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
12490
12845
  regex.lastIndex = 0;
12491
12846
  if (regex.test(lines[i])) {
12492
12847
  matches.push({
12493
- file: path35.relative(workspacePath, fullPath),
12848
+ file: path36.relative(workspacePath, fullPath),
12494
12849
  line: i + 1,
12495
12850
  text: lines[i].substring(0, 200)
12496
12851
  });
@@ -13123,115 +13478,115 @@ var FieldAccuracyEvaluator = class {
13123
13478
  * Evaluate a single field against the expected value.
13124
13479
  */
13125
13480
  evaluateField(fieldConfig, candidateData, expectedData) {
13126
- const { path: path51, match, required = true, weight = 1 } = fieldConfig;
13127
- const candidateValue = resolvePath(candidateData, path51);
13128
- const expectedValue = resolvePath(expectedData, path51);
13481
+ const { path: path53, match, required = true, weight = 1 } = fieldConfig;
13482
+ const candidateValue = resolvePath(candidateData, path53);
13483
+ const expectedValue = resolvePath(expectedData, path53);
13129
13484
  if (expectedValue === void 0) {
13130
13485
  return {
13131
- path: path51,
13486
+ path: path53,
13132
13487
  score: 1,
13133
13488
  // No expected value means no comparison needed
13134
13489
  weight,
13135
13490
  hit: true,
13136
- message: `${path51}: no expected value`
13491
+ message: `${path53}: no expected value`
13137
13492
  };
13138
13493
  }
13139
13494
  if (candidateValue === void 0) {
13140
13495
  if (required) {
13141
13496
  return {
13142
- path: path51,
13497
+ path: path53,
13143
13498
  score: 0,
13144
13499
  weight,
13145
13500
  hit: false,
13146
- message: `${path51} (required, missing)`
13501
+ message: `${path53} (required, missing)`
13147
13502
  };
13148
13503
  }
13149
13504
  return {
13150
- path: path51,
13505
+ path: path53,
13151
13506
  score: 1,
13152
13507
  // Don't penalize missing optional fields
13153
13508
  weight: 0,
13154
13509
  // Zero weight means it won't affect the score
13155
13510
  hit: true,
13156
- message: `${path51}: optional field missing`
13511
+ message: `${path53}: optional field missing`
13157
13512
  };
13158
13513
  }
13159
13514
  switch (match) {
13160
13515
  case "exact":
13161
- return this.compareExact(path51, candidateValue, expectedValue, weight);
13516
+ return this.compareExact(path53, candidateValue, expectedValue, weight);
13162
13517
  case "numeric_tolerance":
13163
13518
  return this.compareNumericTolerance(
13164
- path51,
13519
+ path53,
13165
13520
  candidateValue,
13166
13521
  expectedValue,
13167
13522
  fieldConfig,
13168
13523
  weight
13169
13524
  );
13170
13525
  case "date":
13171
- return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
13526
+ return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
13172
13527
  default:
13173
13528
  return {
13174
- path: path51,
13529
+ path: path53,
13175
13530
  score: 0,
13176
13531
  weight,
13177
13532
  hit: false,
13178
- message: `${path51}: unknown match type "${match}"`
13533
+ message: `${path53}: unknown match type "${match}"`
13179
13534
  };
13180
13535
  }
13181
13536
  }
13182
13537
  /**
13183
13538
  * Exact equality comparison.
13184
13539
  */
13185
- compareExact(path51, candidateValue, expectedValue, weight) {
13540
+ compareExact(path53, candidateValue, expectedValue, weight) {
13186
13541
  if (deepEqual(candidateValue, expectedValue)) {
13187
13542
  return {
13188
- path: path51,
13543
+ path: path53,
13189
13544
  score: 1,
13190
13545
  weight,
13191
13546
  hit: true,
13192
- message: path51
13547
+ message: path53
13193
13548
  };
13194
13549
  }
13195
13550
  if (typeof candidateValue !== typeof expectedValue) {
13196
13551
  return {
13197
- path: path51,
13552
+ path: path53,
13198
13553
  score: 0,
13199
13554
  weight,
13200
13555
  hit: false,
13201
- message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
13556
+ message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
13202
13557
  };
13203
13558
  }
13204
13559
  return {
13205
- path: path51,
13560
+ path: path53,
13206
13561
  score: 0,
13207
13562
  weight,
13208
13563
  hit: false,
13209
- message: `${path51} (value mismatch)`
13564
+ message: `${path53} (value mismatch)`
13210
13565
  };
13211
13566
  }
13212
13567
  /**
13213
13568
  * Numeric comparison with absolute or relative tolerance.
13214
13569
  */
13215
- compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
13570
+ compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
13216
13571
  const { tolerance = 0, relative = false } = fieldConfig;
13217
13572
  const candidateNum = toNumber(candidateValue);
13218
13573
  const expectedNum = toNumber(expectedValue);
13219
13574
  if (candidateNum === null || expectedNum === null) {
13220
13575
  return {
13221
- path: path51,
13576
+ path: path53,
13222
13577
  score: 0,
13223
13578
  weight,
13224
13579
  hit: false,
13225
- message: `${path51} (non-numeric value)`
13580
+ message: `${path53} (non-numeric value)`
13226
13581
  };
13227
13582
  }
13228
13583
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
13229
13584
  return {
13230
- path: path51,
13585
+ path: path53,
13231
13586
  score: 0,
13232
13587
  weight,
13233
13588
  hit: false,
13234
- message: `${path51} (invalid numeric value)`
13589
+ message: `${path53} (invalid numeric value)`
13235
13590
  };
13236
13591
  }
13237
13592
  const diff = Math.abs(candidateNum - expectedNum);
@@ -13244,61 +13599,61 @@ var FieldAccuracyEvaluator = class {
13244
13599
  }
13245
13600
  if (withinTolerance) {
13246
13601
  return {
13247
- path: path51,
13602
+ path: path53,
13248
13603
  score: 1,
13249
13604
  weight,
13250
13605
  hit: true,
13251
- message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
13606
+ message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
13252
13607
  };
13253
13608
  }
13254
13609
  return {
13255
- path: path51,
13610
+ path: path53,
13256
13611
  score: 0,
13257
13612
  weight,
13258
13613
  hit: false,
13259
- message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
13614
+ message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
13260
13615
  };
13261
13616
  }
13262
13617
  /**
13263
13618
  * Date comparison with format normalization.
13264
13619
  */
13265
- compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
13620
+ compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
13266
13621
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
13267
13622
  const candidateDate = parseDate(String(candidateValue), formats);
13268
13623
  const expectedDate = parseDate(String(expectedValue), formats);
13269
13624
  if (candidateDate === null) {
13270
13625
  return {
13271
- path: path51,
13626
+ path: path53,
13272
13627
  score: 0,
13273
13628
  weight,
13274
13629
  hit: false,
13275
- message: `${path51} (unparseable candidate date)`
13630
+ message: `${path53} (unparseable candidate date)`
13276
13631
  };
13277
13632
  }
13278
13633
  if (expectedDate === null) {
13279
13634
  return {
13280
- path: path51,
13635
+ path: path53,
13281
13636
  score: 0,
13282
13637
  weight,
13283
13638
  hit: false,
13284
- message: `${path51} (unparseable expected date)`
13639
+ message: `${path53} (unparseable expected date)`
13285
13640
  };
13286
13641
  }
13287
13642
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
13288
13643
  return {
13289
- path: path51,
13644
+ path: path53,
13290
13645
  score: 1,
13291
13646
  weight,
13292
13647
  hit: true,
13293
- message: path51
13648
+ message: path53
13294
13649
  };
13295
13650
  }
13296
13651
  return {
13297
- path: path51,
13652
+ path: path53,
13298
13653
  score: 0,
13299
13654
  weight,
13300
13655
  hit: false,
13301
- message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
13656
+ message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
13302
13657
  };
13303
13658
  }
13304
13659
  /**
@@ -13331,11 +13686,11 @@ var FieldAccuracyEvaluator = class {
13331
13686
  };
13332
13687
  }
13333
13688
  };
13334
- function resolvePath(obj, path51) {
13335
- if (!path51 || !obj) {
13689
+ function resolvePath(obj, path53) {
13690
+ if (!path53 || !obj) {
13336
13691
  return void 0;
13337
13692
  }
13338
- const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
13693
+ const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
13339
13694
  let current = obj;
13340
13695
  for (const part of parts) {
13341
13696
  if (current === null || current === void 0) {
@@ -13827,8 +14182,8 @@ var TokenUsageEvaluator = class {
13827
14182
  };
13828
14183
 
13829
14184
  // src/evaluation/evaluators/tool-trajectory.ts
13830
- function getNestedValue(obj, path51) {
13831
- const parts = path51.split(".");
14185
+ function getNestedValue(obj, path53) {
14186
+ const parts = path53.split(".");
13832
14187
  let current = obj;
13833
14188
  for (const part of parts) {
13834
14189
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -14451,7 +14806,7 @@ function runEqualsAssertion(output, value) {
14451
14806
  import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
14452
14807
  import { existsSync as existsSync5 } from "node:fs";
14453
14808
  import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir7, stat as stat8 } from "node:fs/promises";
14454
- import path44 from "node:path";
14809
+ import path45 from "node:path";
14455
14810
  import micromatch3 from "micromatch";
14456
14811
 
14457
14812
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -14665,7 +15020,7 @@ var InlineAssertEvaluator = class {
14665
15020
  };
14666
15021
 
14667
15022
  // src/evaluation/evaluators/prompt-resolution.ts
14668
- import path36 from "node:path";
15023
+ import path37 from "node:path";
14669
15024
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
14670
15025
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
14671
15026
  if (!context) {
@@ -14694,6 +15049,15 @@ async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
14694
15049
  }
14695
15050
  return void 0;
14696
15051
  }
15052
+ function containsTemplateVariables(text) {
15053
+ const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
15054
+ for (const match of text.matchAll(variablePattern)) {
15055
+ if (VALID_TEMPLATE_VARIABLES.has(match[1])) {
15056
+ return true;
15057
+ }
15058
+ }
15059
+ return false;
15060
+ }
14697
15061
  async function executePromptTemplate(script, context, config, timeoutMs) {
14698
15062
  const payload = {
14699
15063
  criteria: context.evalCase.criteria,
@@ -14708,7 +15072,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
14708
15072
  };
14709
15073
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
14710
15074
  const scriptPath = script[script.length - 1];
14711
- const cwd = path36.dirname(scriptPath);
15075
+ const cwd = path37.dirname(scriptPath);
14712
15076
  try {
14713
15077
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
14714
15078
  const prompt = stdout.trim();
@@ -14766,9 +15130,20 @@ var llmGraderFactory = (config, context) => {
14766
15130
  },
14767
15131
  agentTimeoutMs
14768
15132
  );
15133
+ const isFromInlinePrompt = !c.resolvedPromptScript?.length && !c.resolvedPromptPath && !c.promptPath;
15134
+ let evaluatorTemplateOverride;
15135
+ let evalCase = evalContext.evalCase;
15136
+ if (customPrompt) {
15137
+ if (!isFromInlinePrompt || containsTemplateVariables(customPrompt)) {
15138
+ evaluatorTemplateOverride = customPrompt;
15139
+ } else {
15140
+ evalCase = { ...evalCase, criteria: customPrompt };
15141
+ }
15142
+ }
14769
15143
  return evaluator.evaluate({
14770
15144
  ...evalContext,
14771
- evaluatorTemplateOverride: customPrompt,
15145
+ evalCase,
15146
+ evaluatorTemplateOverride,
14772
15147
  evaluator: c
14773
15148
  });
14774
15149
  }
@@ -14980,16 +15355,16 @@ function createBuiltinRegistry() {
14980
15355
  }
14981
15356
 
14982
15357
  // src/evaluation/registry/assertion-discovery.ts
14983
- import path37 from "node:path";
15358
+ import path38 from "node:path";
14984
15359
  import fg2 from "fast-glob";
14985
15360
  async function discoverAssertions(registry, baseDir) {
14986
15361
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
14987
15362
  const candidateDirs = [];
14988
- let dir = path37.resolve(baseDir);
14989
- const root = path37.parse(dir).root;
15363
+ let dir = path38.resolve(baseDir);
15364
+ const root = path38.parse(dir).root;
14990
15365
  while (dir !== root) {
14991
- candidateDirs.push(path37.join(dir, ".agentv", "assertions"));
14992
- dir = path37.dirname(dir);
15366
+ candidateDirs.push(path38.join(dir, ".agentv", "assertions"));
15367
+ dir = path38.dirname(dir);
14993
15368
  }
14994
15369
  let files = [];
14995
15370
  for (const assertionsDir of candidateDirs) {
@@ -15005,7 +15380,7 @@ async function discoverAssertions(registry, baseDir) {
15005
15380
  }
15006
15381
  const discoveredTypes = [];
15007
15382
  for (const filePath of files) {
15008
- const basename = path37.basename(filePath);
15383
+ const basename = path38.basename(filePath);
15009
15384
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
15010
15385
  if (registry.has(typeName)) {
15011
15386
  continue;
@@ -15023,17 +15398,17 @@ async function discoverAssertions(registry, baseDir) {
15023
15398
  }
15024
15399
 
15025
15400
  // src/evaluation/registry/grader-discovery.ts
15026
- import path38 from "node:path";
15401
+ import path39 from "node:path";
15027
15402
  import fg3 from "fast-glob";
15028
15403
  async function discoverGraders(registry, baseDir) {
15029
15404
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
15030
15405
  const candidateDirs = [];
15031
- let dir = path38.resolve(baseDir);
15032
- const root = path38.parse(dir).root;
15406
+ let dir = path39.resolve(baseDir);
15407
+ const root = path39.parse(dir).root;
15033
15408
  while (dir !== root) {
15034
- candidateDirs.push(path38.join(dir, ".agentv", "graders"));
15035
- candidateDirs.push(path38.join(dir, ".agentv", "judges"));
15036
- dir = path38.dirname(dir);
15409
+ candidateDirs.push(path39.join(dir, ".agentv", "graders"));
15410
+ candidateDirs.push(path39.join(dir, ".agentv", "judges"));
15411
+ dir = path39.dirname(dir);
15037
15412
  }
15038
15413
  let files = [];
15039
15414
  for (const gradersDir of candidateDirs) {
@@ -15049,7 +15424,7 @@ async function discoverGraders(registry, baseDir) {
15049
15424
  }
15050
15425
  const discoveredTypes = [];
15051
15426
  for (const filePath of files) {
15052
- const basename = path38.basename(filePath);
15427
+ const basename = path39.basename(filePath);
15053
15428
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
15054
15429
  if (registry.has(typeName)) {
15055
15430
  continue;
@@ -15209,7 +15584,7 @@ function getTCritical(df) {
15209
15584
  // src/evaluation/workspace/file-changes.ts
15210
15585
  import { exec as execCallback } from "node:child_process";
15211
15586
  import { readdirSync as readdirSync2, statSync } from "node:fs";
15212
- import path39 from "node:path";
15587
+ import path40 from "node:path";
15213
15588
  import { promisify as promisify4 } from "node:util";
15214
15589
  var execAsync4 = promisify4(execCallback);
15215
15590
  function gitExecOpts(workspacePath) {
@@ -15243,10 +15618,10 @@ async function stageNestedRepoChanges(workspacePath) {
15243
15618
  }
15244
15619
  for (const entry of entries) {
15245
15620
  if (entry === ".git" || entry === "node_modules") continue;
15246
- const childPath = path39.join(workspacePath, entry);
15621
+ const childPath = path40.join(workspacePath, entry);
15247
15622
  try {
15248
15623
  if (!statSync(childPath).isDirectory()) continue;
15249
- if (!statSync(path39.join(childPath, ".git")).isDirectory()) continue;
15624
+ if (!statSync(path40.join(childPath, ".git")).isDirectory()) continue;
15250
15625
  } catch {
15251
15626
  continue;
15252
15627
  }
@@ -15257,7 +15632,7 @@ async function stageNestedRepoChanges(workspacePath) {
15257
15632
 
15258
15633
  // src/evaluation/workspace/manager.ts
15259
15634
  import { cp, mkdir as mkdir12, readdir as readdir4, rm as rm4, stat as stat6 } from "node:fs/promises";
15260
- import path40 from "node:path";
15635
+ import path41 from "node:path";
15261
15636
  var TemplateNotFoundError = class extends Error {
15262
15637
  constructor(templatePath) {
15263
15638
  super(`Workspace template not found: ${templatePath}`);
@@ -15287,14 +15662,14 @@ async function isDirectory(filePath) {
15287
15662
  }
15288
15663
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
15289
15664
  const root = workspaceRoot ?? getWorkspacesRoot();
15290
- return path40.join(root, evalRunId, caseId);
15665
+ return path41.join(root, evalRunId, caseId);
15291
15666
  }
15292
15667
  async function copyDirectoryRecursive(src, dest) {
15293
15668
  await mkdir12(dest, { recursive: true });
15294
15669
  const entries = await readdir4(src, { withFileTypes: true });
15295
15670
  for (const entry of entries) {
15296
- const srcPath = path40.join(src, entry.name);
15297
- const destPath = path40.join(dest, entry.name);
15671
+ const srcPath = path41.join(src, entry.name);
15672
+ const destPath = path41.join(dest, entry.name);
15298
15673
  if (entry.name === ".git") {
15299
15674
  continue;
15300
15675
  }
@@ -15306,7 +15681,7 @@ async function copyDirectoryRecursive(src, dest) {
15306
15681
  }
15307
15682
  }
15308
15683
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
15309
- const resolvedTemplatePath = path40.resolve(templatePath);
15684
+ const resolvedTemplatePath = path41.resolve(templatePath);
15310
15685
  if (!await fileExists(resolvedTemplatePath)) {
15311
15686
  throw new TemplateNotFoundError(resolvedTemplatePath);
15312
15687
  }
@@ -15355,7 +15730,7 @@ async function cleanupWorkspace(workspacePath) {
15355
15730
  }
15356
15731
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
15357
15732
  const root = workspaceRoot ?? getWorkspacesRoot();
15358
- const evalDir = path40.join(root, evalRunId);
15733
+ const evalDir = path41.join(root, evalRunId);
15359
15734
  if (await fileExists(evalDir)) {
15360
15735
  await rm4(evalDir, { recursive: true, force: true });
15361
15736
  }
@@ -15365,8 +15740,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
15365
15740
  import { execFile } from "node:child_process";
15366
15741
  import { createHash } from "node:crypto";
15367
15742
  import { existsSync as existsSync3 } from "node:fs";
15368
- import { cp as cp2, mkdir as mkdir13, readFile as readFile12, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
15369
- import path41 from "node:path";
15743
+ import { cp as cp2, mkdir as mkdir13, readFile as readFile14, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
15744
+ import path42 from "node:path";
15370
15745
  import { promisify as promisify5 } from "node:util";
15371
15746
  var execFileAsync = promisify5(execFile);
15372
15747
  function gitEnv() {
@@ -15393,12 +15768,14 @@ async function git(args, opts) {
15393
15768
  return stdout.trim();
15394
15769
  }
15395
15770
  function normalizeRepoForFingerprint(repo) {
15396
- const source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
15397
- const result = {
15398
- path: repo.path,
15399
- source,
15400
- ref: repo.checkout?.ref ?? "HEAD"
15401
- };
15771
+ const result = {};
15772
+ if (repo.path) {
15773
+ result.path = repo.path;
15774
+ }
15775
+ if (repo.source) {
15776
+ result.source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
15777
+ }
15778
+ result.ref = getRepoCheckoutRef(repo.checkout);
15402
15779
  if (repo.clone?.depth !== void 0) {
15403
15780
  result.depth = repo.clone.depth;
15404
15781
  }
@@ -15412,7 +15789,7 @@ function normalizeRepoForFingerprint(repo) {
15412
15789
  }
15413
15790
  function computeWorkspaceFingerprint(repos) {
15414
15791
  const canonical = {
15415
- repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
15792
+ repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
15416
15793
  };
15417
15794
  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
15418
15795
  }
@@ -15420,8 +15797,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
15420
15797
  await mkdir13(dest, { recursive: true });
15421
15798
  const entries = await readdir5(src, { withFileTypes: true });
15422
15799
  for (const entry of entries) {
15423
- const srcPath = path41.join(src, entry.name);
15424
- const destPath = path41.join(dest, entry.name);
15800
+ const srcPath = path42.join(src, entry.name);
15801
+ const destPath = path42.join(dest, entry.name);
15425
15802
  if (entry.name === ".git") {
15426
15803
  continue;
15427
15804
  }
@@ -15454,7 +15831,7 @@ var WorkspacePoolManager = class {
15454
15831
  async acquireWorkspace(options) {
15455
15832
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
15456
15833
  const fingerprint = computeWorkspaceFingerprint(repos);
15457
- const poolDir = path41.join(this.poolRoot, fingerprint);
15834
+ const poolDir = path42.join(this.poolRoot, fingerprint);
15458
15835
  await mkdir13(poolDir, { recursive: true });
15459
15836
  const drifted = await this.checkDrift(poolDir, fingerprint);
15460
15837
  if (drifted) {
@@ -15464,7 +15841,7 @@ var WorkspacePoolManager = class {
15464
15841
  await this.removeAllSlots(poolDir);
15465
15842
  }
15466
15843
  for (let i = 0; i < maxSlots; i++) {
15467
- const slotPath = path41.join(poolDir, `slot-${i}`);
15844
+ const slotPath = path42.join(poolDir, `slot-${i}`);
15468
15845
  const lockPath = `${slotPath}.lock`;
15469
15846
  const locked = await this.tryLock(lockPath);
15470
15847
  if (!locked) {
@@ -15526,7 +15903,7 @@ var WorkspacePoolManager = class {
15526
15903
  throw err;
15527
15904
  }
15528
15905
  try {
15529
- const pidStr = await readFile12(lockPath, "utf-8");
15906
+ const pidStr = await readFile14(lockPath, "utf-8");
15530
15907
  const pid = Number.parseInt(pidStr.trim(), 10);
15531
15908
  if (!Number.isNaN(pid)) {
15532
15909
  try {
@@ -15551,9 +15928,9 @@ var WorkspacePoolManager = class {
15551
15928
  * Returns false (no drift) if metadata.json doesn't exist (first use).
15552
15929
  */
15553
15930
  async checkDrift(poolDir, fingerprint) {
15554
- const metadataPath = path41.join(poolDir, "metadata.json");
15931
+ const metadataPath = path42.join(poolDir, "metadata.json");
15555
15932
  try {
15556
- const raw = await readFile12(metadataPath, "utf-8");
15933
+ const raw = await readFile14(metadataPath, "utf-8");
15557
15934
  const metadata = JSON.parse(raw);
15558
15935
  return metadata.fingerprint !== fingerprint;
15559
15936
  } catch {
@@ -15568,17 +15945,17 @@ var WorkspacePoolManager = class {
15568
15945
  repos,
15569
15946
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
15570
15947
  };
15571
- await writeFile7(path41.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
15948
+ await writeFile7(path42.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
15572
15949
  }
15573
15950
  /** Remove all slot directories and their lock files from a pool directory. */
15574
15951
  async removeAllSlots(poolDir) {
15575
15952
  const entries = await readdir5(poolDir);
15576
15953
  for (const entry of entries) {
15577
15954
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
15578
- const lockPath = path41.join(poolDir, `${entry}.lock`);
15955
+ const lockPath = path42.join(poolDir, `${entry}.lock`);
15579
15956
  if (existsSync3(lockPath)) {
15580
15957
  try {
15581
- const pidStr = await readFile12(lockPath, "utf-8");
15958
+ const pidStr = await readFile14(lockPath, "utf-8");
15582
15959
  const pid = Number.parseInt(pidStr.trim(), 10);
15583
15960
  if (!Number.isNaN(pid)) {
15584
15961
  try {
@@ -15591,12 +15968,12 @@ var WorkspacePoolManager = class {
15591
15968
  } catch {
15592
15969
  }
15593
15970
  }
15594
- await rm5(path41.join(poolDir, entry), { recursive: true, force: true });
15971
+ await rm5(path42.join(poolDir, entry), { recursive: true, force: true });
15595
15972
  await rm5(lockPath, { force: true }).catch(() => {
15596
15973
  });
15597
15974
  }
15598
15975
  }
15599
- await rm5(path41.join(poolDir, "metadata.json"), { force: true }).catch(() => {
15976
+ await rm5(path42.join(poolDir, "metadata.json"), { force: true }).catch(() => {
15600
15977
  });
15601
15978
  }
15602
15979
  /**
@@ -15606,14 +15983,15 @@ var WorkspacePoolManager = class {
15606
15983
  */
15607
15984
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
15608
15985
  for (const repo of repos) {
15609
- const repoDir = path41.join(slotPath, repo.path);
15986
+ if (!repo.path || !repo.source) continue;
15987
+ const repoDir = path42.join(slotPath, repo.path);
15610
15988
  if (!existsSync3(repoDir)) {
15611
15989
  continue;
15612
15990
  }
15613
15991
  if (poolReset === "none") {
15614
15992
  continue;
15615
15993
  }
15616
- const ref = repo.checkout?.ref ?? "HEAD";
15994
+ const ref = getRepoCheckoutRef(repo.checkout);
15617
15995
  const resolve = repo.checkout?.resolve ?? "remote";
15618
15996
  if (resolve === "remote") {
15619
15997
  const fetchArgs = ["fetch", "origin", ref];
@@ -15630,8 +16008,8 @@ var WorkspacePoolManager = class {
15630
16008
  }
15631
16009
  if (templatePath) {
15632
16010
  const repoDirNames = new Set(
15633
- repos.map((r) => {
15634
- const normalized = r.path.replace(/^\.\//, "");
16011
+ repos.filter((r) => r.path).map((r) => {
16012
+ const normalized = (r.path ?? "").replace(/^\.\//, "");
15635
16013
  return normalized.split("/")[0];
15636
16014
  })
15637
16015
  );
@@ -15643,7 +16021,7 @@ var WorkspacePoolManager = class {
15643
16021
  // src/evaluation/workspace/repo-manager.ts
15644
16022
  import { execFile as execFile2 } from "node:child_process";
15645
16023
  import { existsSync as existsSync4 } from "node:fs";
15646
- import path42 from "node:path";
16024
+ import path43 from "node:path";
15647
16025
  import { promisify as promisify6 } from "node:util";
15648
16026
  var execFileAsync2 = promisify6(execFile2);
15649
16027
  var DEFAULT_TIMEOUT_MS2 = 3e5;
@@ -15686,17 +16064,17 @@ var RepoManager = class {
15686
16064
  static validateLocalPaths(repos) {
15687
16065
  const errors = [];
15688
16066
  for (const repo of repos) {
15689
- if (repo.source.type !== "local") continue;
16067
+ if (!repo.source || repo.source.type !== "local") continue;
15690
16068
  const sourcePath = repo.source.path;
15691
16069
  if (!sourcePath || sourcePath.trim() === "") {
15692
16070
  errors.push({
15693
- repoPath: repo.path,
16071
+ repoPath: repo.path ?? "(none)",
15694
16072
  resolvedSourcePath: sourcePath ?? "",
15695
16073
  reason: "empty_path"
15696
16074
  });
15697
16075
  } else if (!existsSync4(sourcePath)) {
15698
16076
  errors.push({
15699
- repoPath: repo.path,
16077
+ repoPath: repo.path ?? "(none)",
15700
16078
  resolvedSourcePath: sourcePath,
15701
16079
  reason: "not_found"
15702
16080
  });
@@ -15743,7 +16121,13 @@ ${lines.join("\n")}`;
15743
16121
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
15744
16122
  */
15745
16123
  async materialize(repo, workspacePath) {
15746
- const targetDir = path42.join(workspacePath, repo.path);
16124
+ if (!repo.source || !repo.path) {
16125
+ if (this.verbose) {
16126
+ console.log(`[repo] materialize skip path=${repo.path ?? "(none)"} (no source or path)`);
16127
+ }
16128
+ return;
16129
+ }
16130
+ const targetDir = path43.join(workspacePath, repo.path);
15747
16131
  const sourceUrl = getSourceUrl(repo.source);
15748
16132
  const startedAt = Date.now();
15749
16133
  if (this.verbose) {
@@ -15766,7 +16150,7 @@ ${lines.join("\n")}`;
15766
16150
  await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
15767
16151
  await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
15768
16152
  }
15769
- const ref = repo.checkout?.ref ?? "HEAD";
16153
+ const ref = getRepoCheckoutRef(repo.checkout);
15770
16154
  const resolve = repo.checkout?.resolve ?? "remote";
15771
16155
  let resolvedSha;
15772
16156
  if (resolve === "remote" && repo.source.type === "git") {
@@ -15818,23 +16202,27 @@ ${lines.join("\n")}`;
15818
16202
  );
15819
16203
  }
15820
16204
  }
15821
- /** Materialize all repos into the workspace. */
16205
+ /** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */
15822
16206
  async materializeAll(repos, workspacePath) {
16207
+ const materializableRepos = repos.filter((r) => r.source);
15823
16208
  if (this.verbose) {
15824
- console.log(`[repo] materializeAll count=${repos.length} workspace=${workspacePath}`);
16209
+ console.log(
16210
+ `[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`
16211
+ );
15825
16212
  }
15826
- for (const repo of repos) {
16213
+ for (const repo of materializableRepos) {
15827
16214
  await this.materialize(repo, workspacePath);
15828
16215
  }
15829
16216
  if (this.verbose) {
15830
16217
  console.log("[repo] materializeAll complete");
15831
16218
  }
15832
16219
  }
15833
- /** Reset repos in workspace to their checkout state. */
16220
+ /** Reset repos in workspace to their checkout state. Skips repos without path or source. */
15834
16221
  async reset(repos, workspacePath, reset) {
15835
16222
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
15836
16223
  for (const repo of repos) {
15837
- const targetDir = path42.join(workspacePath, repo.path);
16224
+ if (!repo.path || !repo.source) continue;
16225
+ const targetDir = path43.join(workspacePath, repo.path);
15838
16226
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
15839
16227
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
15840
16228
  }
@@ -15843,16 +16231,16 @@ ${lines.join("\n")}`;
15843
16231
 
15844
16232
  // src/evaluation/workspace/resolve.ts
15845
16233
  import { readdir as readdir6, stat as stat7 } from "node:fs/promises";
15846
- import path43 from "node:path";
16234
+ import path44 from "node:path";
15847
16235
  async function resolveWorkspaceTemplate(templatePath) {
15848
16236
  if (!templatePath) {
15849
16237
  return void 0;
15850
16238
  }
15851
- const resolved = path43.resolve(templatePath);
16239
+ const resolved = path44.resolve(templatePath);
15852
16240
  const stats = await stat7(resolved);
15853
16241
  if (stats.isFile()) {
15854
16242
  return {
15855
- dir: path43.dirname(resolved),
16243
+ dir: path44.dirname(resolved),
15856
16244
  workspaceFile: resolved
15857
16245
  };
15858
16246
  }
@@ -15864,14 +16252,14 @@ async function resolveWorkspaceTemplate(templatePath) {
15864
16252
  if (workspaceFiles.length === 1) {
15865
16253
  return {
15866
16254
  dir: resolved,
15867
- workspaceFile: path43.join(resolved, workspaceFiles[0])
16255
+ workspaceFile: path44.join(resolved, workspaceFiles[0])
15868
16256
  };
15869
16257
  }
15870
16258
  if (workspaceFiles.length > 1) {
15871
16259
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
15872
16260
  return {
15873
16261
  dir: resolved,
15874
- workspaceFile: conventionFile ? path43.join(resolved, conventionFile) : void 0
16262
+ workspaceFile: conventionFile ? path44.join(resolved, conventionFile) : void 0
15875
16263
  };
15876
16264
  }
15877
16265
  return { dir: resolved };
@@ -16090,7 +16478,7 @@ async function runEvaluation(options) {
16090
16478
  ];
16091
16479
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
16092
16480
  const typeRegistry = createBuiltinRegistry();
16093
- const discoveryBaseDir = evalFilePath ? path44.dirname(path44.resolve(evalFilePath)) : process.cwd();
16481
+ const discoveryBaseDir = evalFilePath ? path45.dirname(path45.resolve(evalFilePath)) : process.cwd();
16094
16482
  const evalDir = discoveryBaseDir;
16095
16483
  await discoverAssertions(typeRegistry, discoveryBaseDir);
16096
16484
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -16157,7 +16545,8 @@ async function runEvaluation(options) {
16157
16545
  for (const ec of filteredEvalCases) {
16158
16546
  if (ec.workspace?.repos) {
16159
16547
  for (const repo of ec.workspace.repos) {
16160
- const key = `${repo.path}::${repo.source.type === "local" ? repo.source.path : ""}`;
16548
+ if (!repo.source) continue;
16549
+ const key = `${repo.path ?? ""}::${repo.source.type === "local" ? repo.source.path : ""}`;
16161
16550
  if (!allRepos.has(key)) {
16162
16551
  allRepos.set(key, repo);
16163
16552
  }
@@ -16170,7 +16559,7 @@ async function runEvaluation(options) {
16170
16559
  const message = RepoManager.formatValidationErrors(localPathErrors);
16171
16560
  console.warn(`Warning: ${message}`);
16172
16561
  const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath));
16173
- if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) {
16562
+ if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) {
16174
16563
  throw new Error(message);
16175
16564
  }
16176
16565
  }
@@ -16288,7 +16677,7 @@ async function runEvaluation(options) {
16288
16677
  }
16289
16678
  try {
16290
16679
  if (suiteWorkspaceFile && sharedWorkspacePath) {
16291
- const copiedWorkspaceFile = path44.join(sharedWorkspacePath, path44.basename(suiteWorkspaceFile));
16680
+ const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
16292
16681
  try {
16293
16682
  await stat8(copiedWorkspaceFile);
16294
16683
  suiteWorkspaceFile = copiedWorkspaceFile;
@@ -16303,7 +16692,8 @@ async function runEvaluation(options) {
16303
16692
  try {
16304
16693
  if (needsPerRepoCheck) {
16305
16694
  for (const repo of suiteWorkspace.repos) {
16306
- const targetDir = path44.join(sharedWorkspacePath, repo.path);
16695
+ if (!repo.path || !repo.source) continue;
16696
+ const targetDir = path45.join(sharedWorkspacePath, repo.path);
16307
16697
  if (existsSync5(targetDir)) {
16308
16698
  setupLog(`reusing existing repo at: ${targetDir}`);
16309
16699
  continue;
@@ -16327,6 +16717,19 @@ async function runEvaluation(options) {
16327
16717
  throw new Error(`Failed to materialize repos: ${message}`);
16328
16718
  }
16329
16719
  }
16720
+ const suiteDockerConfig = suiteWorkspace?.docker;
16721
+ if (suiteDockerConfig) {
16722
+ setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
16723
+ const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
16724
+ const dockerSetup = new DockerWorkspaceProvider2(suiteDockerConfig);
16725
+ if (!await dockerSetup.isDockerAvailable()) {
16726
+ throw new Error(
16727
+ "Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
16728
+ );
16729
+ }
16730
+ await dockerSetup.pullImage();
16731
+ setupLog("Docker image pull complete");
16732
+ }
16330
16733
  const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
16331
16734
  const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
16332
16735
  if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
@@ -16687,11 +17090,9 @@ async function runBatchEvaluation(options) {
16687
17090
  const promptInputs = promptInputsList[index];
16688
17091
  return {
16689
17092
  question: promptInputs.question,
17093
+ systemPrompt: promptInputs.systemMessage,
16690
17094
  inputFiles: evalCase.file_paths,
16691
- evalCaseId: evalCase.id,
16692
- metadata: {
16693
- systemPrompt: promptInputs.systemMessage ?? ""
16694
- }
17095
+ evalCaseId: evalCase.id
16695
17096
  };
16696
17097
  });
16697
17098
  const batchResponse = await provider.invokeBatch?.(batchRequests);
@@ -16890,7 +17291,7 @@ async function runEvalCase(options) {
16890
17291
  );
16891
17292
  }
16892
17293
  if (caseWorkspaceFile && workspacePath) {
16893
- const copiedFile = path44.join(workspacePath, path44.basename(caseWorkspaceFile));
17294
+ const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
16894
17295
  try {
16895
17296
  await stat8(copiedFile);
16896
17297
  caseWorkspaceFile = copiedFile;
@@ -16952,10 +17353,10 @@ async function runEvalCase(options) {
16952
17353
  const files = evalCase.metadata.agent_skills_files;
16953
17354
  if (baseDir && files.length > 0) {
16954
17355
  for (const relPath of files) {
16955
- const srcPath = path44.resolve(baseDir, relPath);
16956
- const destPath = path44.resolve(workspacePath, relPath);
17356
+ const srcPath = path45.resolve(baseDir, relPath);
17357
+ const destPath = path45.resolve(workspacePath, relPath);
16957
17358
  try {
16958
- await mkdir14(path44.dirname(destPath), { recursive: true });
17359
+ await mkdir14(path45.dirname(destPath), { recursive: true });
16959
17360
  await copyFile2(srcPath, destPath);
16960
17361
  } catch (error) {
16961
17362
  const message = error instanceof Error ? error.message : String(error);
@@ -17222,6 +17623,7 @@ async function runEvalCase(options) {
17222
17623
  availableTargets,
17223
17624
  fileChanges,
17224
17625
  workspacePath,
17626
+ dockerConfig: evalCase.workspace?.docker,
17225
17627
  verbose,
17226
17628
  threshold: evalCase.threshold ?? caseThreshold
17227
17629
  });
@@ -17415,6 +17817,7 @@ async function evaluateCandidate(options) {
17415
17817
  availableTargets,
17416
17818
  fileChanges,
17417
17819
  workspacePath,
17820
+ dockerConfig,
17418
17821
  threshold: evalThreshold
17419
17822
  } = options;
17420
17823
  const gradeTimestamp = nowFn();
@@ -17441,6 +17844,7 @@ async function evaluateCandidate(options) {
17441
17844
  availableTargets,
17442
17845
  fileChanges,
17443
17846
  workspacePath,
17847
+ dockerConfig,
17444
17848
  threshold: evalThreshold
17445
17849
  });
17446
17850
  const completedAt = nowFn();
@@ -17516,6 +17920,7 @@ async function runEvaluatorsForCase(options) {
17516
17920
  availableTargets,
17517
17921
  fileChanges,
17518
17922
  workspacePath,
17923
+ dockerConfig,
17519
17924
  threshold
17520
17925
  } = options;
17521
17926
  if (evalCase.assertions && evalCase.assertions.length > 0) {
@@ -17543,6 +17948,7 @@ async function runEvaluatorsForCase(options) {
17543
17948
  availableTargets,
17544
17949
  fileChanges,
17545
17950
  workspacePath,
17951
+ dockerConfig,
17546
17952
  threshold
17547
17953
  });
17548
17954
  }
@@ -17551,6 +17957,7 @@ async function runEvaluatorsForCase(options) {
17551
17957
  if (!activeEvaluator) {
17552
17958
  throw new Error(`No evaluator registered for kind '${evaluatorKind}'`);
17553
17959
  }
17960
+ const implicitEvaluator = evaluatorKind === "llm-grader" && !evalCase.assertions ? buildImplicitLlmGraderConfig(evalCase) : void 0;
17554
17961
  const score = await activeEvaluator.evaluate({
17555
17962
  evalCase,
17556
17963
  candidate,
@@ -17570,10 +17977,22 @@ async function runEvaluatorsForCase(options) {
17570
17977
  targetResolver,
17571
17978
  availableTargets,
17572
17979
  fileChanges,
17573
- workspacePath
17980
+ workspacePath,
17981
+ dockerConfig,
17982
+ ...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
17574
17983
  });
17575
17984
  return { score };
17576
17985
  }
17986
+ function buildImplicitLlmGraderConfig(evalCase) {
17987
+ if (!evalCase.preprocessors || evalCase.preprocessors.length === 0) {
17988
+ return void 0;
17989
+ }
17990
+ return {
17991
+ name: "llm-grader",
17992
+ type: "llm-grader",
17993
+ preprocessors: evalCase.preprocessors
17994
+ };
17995
+ }
17577
17996
  async function runEvaluatorList(options) {
17578
17997
  const {
17579
17998
  evalCase,
@@ -17598,7 +18017,8 @@ async function runEvaluatorList(options) {
17598
18017
  targetResolver,
17599
18018
  availableTargets,
17600
18019
  fileChanges,
17601
- workspacePath
18020
+ workspacePath,
18021
+ dockerConfig
17602
18022
  } = options;
17603
18023
  const scored = [];
17604
18024
  const scores = [];
@@ -17621,9 +18041,10 @@ async function runEvaluatorList(options) {
17621
18041
  targetResolver,
17622
18042
  availableTargets,
17623
18043
  fileChanges,
17624
- workspacePath
18044
+ workspacePath,
18045
+ dockerConfig
17625
18046
  };
17626
- const evalFileDir = evalCase.file_paths[0] ? path44.dirname(evalCase.file_paths[0]) : process.cwd();
18047
+ const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
17627
18048
  const dispatchContext = {
17628
18049
  graderProvider,
17629
18050
  targetResolver,
@@ -17783,13 +18204,11 @@ async function invokeProvider(provider, options) {
17783
18204
  const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
17784
18205
  return await provider.invoke({
17785
18206
  question: promptInputs.question,
18207
+ systemPrompt: promptInputs.systemMessage,
17786
18208
  chatPrompt: promptInputs.chatPrompt,
17787
18209
  inputFiles: evalCase.file_paths,
17788
18210
  evalCaseId: evalCase.id,
17789
18211
  attempt,
17790
- metadata: {
17791
- systemPrompt: promptInputs.systemMessage ?? ""
17792
- },
17793
18212
  signal: controller.signal,
17794
18213
  cwd,
17795
18214
  workspaceFile,
@@ -17991,7 +18410,7 @@ function computeWeightedMean(entries) {
17991
18410
 
17992
18411
  // src/evaluation/evaluate.ts
17993
18412
  import { existsSync as existsSync6 } from "node:fs";
17994
- import path45 from "node:path";
18413
+ import path46 from "node:path";
17995
18414
 
17996
18415
  // src/evaluation/providers/function-provider.ts
17997
18416
  function createFunctionProvider(taskFn) {
@@ -18028,7 +18447,7 @@ async function evaluate(config) {
18028
18447
  }
18029
18448
  const gitRoot = await findGitRoot(process.cwd());
18030
18449
  const repoRoot = gitRoot ?? process.cwd();
18031
- const testFilePath = config.specFile ? path45.resolve(config.specFile) : path45.join(process.cwd(), "__programmatic__.yaml");
18450
+ const testFilePath = config.specFile ? path46.resolve(config.specFile) : path46.join(process.cwd(), "__programmatic__.yaml");
18032
18451
  await loadEnvHierarchy(repoRoot, testFilePath);
18033
18452
  let resolvedTarget;
18034
18453
  let taskProvider;
@@ -18143,10 +18562,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
18143
18562
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
18144
18563
  async function discoverDefaultTarget(repoRoot) {
18145
18564
  const cwd = process.cwd();
18146
- const chain = buildDirectoryChain(path45.join(cwd, "_placeholder"), repoRoot);
18565
+ const chain = buildDirectoryChain(path46.join(cwd, "_placeholder"), repoRoot);
18147
18566
  for (const dir of chain) {
18148
18567
  for (const candidate of TARGET_FILE_CANDIDATES) {
18149
- const targetsPath = path45.join(dir, candidate);
18568
+ const targetsPath = path46.join(dir, candidate);
18150
18569
  if (!existsSync6(targetsPath)) continue;
18151
18570
  try {
18152
18571
  const definitions = await readTargetDefinitions(targetsPath);
@@ -18159,16 +18578,16 @@ async function discoverDefaultTarget(repoRoot) {
18159
18578
  return null;
18160
18579
  }
18161
18580
  async function loadEnvHierarchy(repoRoot, startPath) {
18162
- const { readFileSync: readFileSync4 } = await import("node:fs");
18581
+ const { readFileSync: readFileSync5 } = await import("node:fs");
18163
18582
  const chain = buildDirectoryChain(startPath, repoRoot);
18164
18583
  const envFiles = [];
18165
18584
  for (const dir of chain) {
18166
- const envPath = path45.join(dir, ".env");
18585
+ const envPath = path46.join(dir, ".env");
18167
18586
  if (existsSync6(envPath)) envFiles.push(envPath);
18168
18587
  }
18169
18588
  for (let i = 0; i < envFiles.length; i++) {
18170
18589
  try {
18171
- const content = readFileSync4(envFiles[i], "utf8");
18590
+ const content = readFileSync5(envFiles[i], "utf8");
18172
18591
  for (const line of content.split("\n")) {
18173
18592
  const trimmed = line.trim();
18174
18593
  if (!trimmed || trimmed.startsWith("#")) continue;
@@ -18240,12 +18659,12 @@ var CONFIG_FILE_NAMES = [
18240
18659
  ".agentv/config.js"
18241
18660
  ];
18242
18661
  async function loadTsConfig(projectRoot) {
18243
- const { existsSync: existsSync8 } = await import("node:fs");
18662
+ const { existsSync: existsSync9 } = await import("node:fs");
18244
18663
  const { pathToFileURL: pathToFileURL2 } = await import("node:url");
18245
18664
  const { join: join2 } = await import("node:path");
18246
18665
  for (const fileName of CONFIG_FILE_NAMES) {
18247
18666
  const filePath = join2(projectRoot, fileName);
18248
- if (!existsSync8(filePath)) {
18667
+ if (!existsSync9(filePath)) {
18249
18668
  continue;
18250
18669
  }
18251
18670
  try {
@@ -18342,9 +18761,9 @@ function buildPrompt(criteria, question, referenceAnswer) {
18342
18761
  }
18343
18762
 
18344
18763
  // src/evaluation/workspace/deps-scanner.ts
18345
- import { readFile as readFile13 } from "node:fs/promises";
18346
- import path46 from "node:path";
18347
- import { parse as parse5 } from "yaml";
18764
+ import { readFile as readFile15 } from "node:fs/promises";
18765
+ import path47 from "node:path";
18766
+ import { parse as parse6 } from "yaml";
18348
18767
  function normalizeGitUrl(url) {
18349
18768
  let normalized = url.replace(/\.git$/, "");
18350
18769
  try {
@@ -18362,7 +18781,7 @@ async function scanRepoDeps(evalFilePaths) {
18362
18781
  try {
18363
18782
  const repos = await extractReposFromEvalFile(filePath);
18364
18783
  for (const repo of repos) {
18365
- if (repo.source.type !== "git") continue;
18784
+ if (!repo.source || repo.source.type !== "git") continue;
18366
18785
  const ref = repo.checkout?.ref;
18367
18786
  const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
18368
18787
  const existing = seen.get(key);
@@ -18390,11 +18809,11 @@ async function scanRepoDeps(evalFilePaths) {
18390
18809
  return { repos: [...seen.values()], errors };
18391
18810
  }
18392
18811
  async function extractReposFromEvalFile(filePath) {
18393
- const content = await readFile13(filePath, "utf8");
18394
- const parsed = interpolateEnv(parse5(content), process.env);
18812
+ const content = await readFile15(filePath, "utf8");
18813
+ const parsed = interpolateEnv(parse6(content), process.env);
18395
18814
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
18396
18815
  const obj = parsed;
18397
- const evalFileDir = path46.dirname(path46.resolve(filePath));
18816
+ const evalFileDir = path47.dirname(path47.resolve(filePath));
18398
18817
  const repos = [];
18399
18818
  const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
18400
18819
  repos.push(...suiteRepos);
@@ -18410,9 +18829,9 @@ async function extractReposFromEvalFile(filePath) {
18410
18829
  }
18411
18830
  async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
18412
18831
  if (typeof raw === "string") {
18413
- const workspaceFilePath = path46.resolve(evalFileDir, raw);
18414
- const content = await readFile13(workspaceFilePath, "utf8");
18415
- const parsed = interpolateEnv(parse5(content), process.env);
18832
+ const workspaceFilePath = path47.resolve(evalFileDir, raw);
18833
+ const content = await readFile15(workspaceFilePath, "utf8");
18834
+ const parsed = interpolateEnv(parse6(content), process.env);
18416
18835
  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
18417
18836
  return extractReposFromObject(parsed);
18418
18837
  }
@@ -18439,8 +18858,8 @@ function extractReposFromObject(obj) {
18439
18858
  }
18440
18859
 
18441
18860
  // src/evaluation/cache/response-cache.ts
18442
- import { mkdir as mkdir15, readFile as readFile14, writeFile as writeFile8 } from "node:fs/promises";
18443
- import path47 from "node:path";
18861
+ import { mkdir as mkdir15, readFile as readFile16, writeFile as writeFile8 } from "node:fs/promises";
18862
+ import path48 from "node:path";
18444
18863
  var DEFAULT_CACHE_PATH = ".agentv/cache";
18445
18864
  var ResponseCache = class {
18446
18865
  cachePath;
@@ -18450,7 +18869,7 @@ var ResponseCache = class {
18450
18869
  async get(key) {
18451
18870
  const filePath = this.keyToPath(key);
18452
18871
  try {
18453
- const data = await readFile14(filePath, "utf8");
18872
+ const data = await readFile16(filePath, "utf8");
18454
18873
  return JSON.parse(data);
18455
18874
  } catch {
18456
18875
  return void 0;
@@ -18458,13 +18877,13 @@ var ResponseCache = class {
18458
18877
  }
18459
18878
  async set(key, value) {
18460
18879
  const filePath = this.keyToPath(key);
18461
- const dir = path47.dirname(filePath);
18880
+ const dir = path48.dirname(filePath);
18462
18881
  await mkdir15(dir, { recursive: true });
18463
18882
  await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
18464
18883
  }
18465
18884
  keyToPath(key) {
18466
18885
  const prefix = key.slice(0, 2);
18467
- return path47.join(this.cachePath, prefix, `${key}.json`);
18886
+ return path48.join(this.cachePath, prefix, `${key}.json`);
18468
18887
  }
18469
18888
  };
18470
18889
  function shouldEnableCache(params) {
@@ -18479,20 +18898,301 @@ function shouldSkipCacheForTemperature(targetConfig) {
18479
18898
  return false;
18480
18899
  }
18481
18900
 
18901
+ // src/evaluation/results-repo.ts
18902
+ import { execFile as execFile3 } from "node:child_process";
18903
+ import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
18904
+ import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir8, rm as rm6, stat as stat9 } from "node:fs/promises";
18905
+ import os3 from "node:os";
18906
+ import path49 from "node:path";
18907
+ import { promisify as promisify7 } from "node:util";
18908
+ var execFileAsync3 = promisify7(execFile3);
18909
+ function sanitizeRepoSlug(repo) {
18910
+ return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
18911
+ }
18912
+ function withFriendlyGitHubAuthError(error) {
18913
+ const message = error instanceof Error ? error.message : String(error);
18914
+ const lower = message.toLowerCase();
18915
+ if (lower.includes("authentication failed") || lower.includes("could not read username") || lower.includes("permission denied") || lower.includes("not logged into any github hosts")) {
18916
+ return new Error(`${message}. Run 'gh auth login' to authenticate.`);
18917
+ }
18918
+ return new Error(message);
18919
+ }
18920
+ function normalizeResultsExportConfig(config) {
18921
+ return {
18922
+ repo: config.repo.trim(),
18923
+ path: config.path.trim().replace(/^\/+|\/+$/g, ""),
18924
+ auto_push: config.auto_push === true,
18925
+ branch_prefix: config.branch_prefix?.trim() || "eval-results"
18926
+ };
18927
+ }
18928
+ function resolveResultsRepoUrl(repo) {
18929
+ if (repo.includes("://") || repo.startsWith("git@")) {
18930
+ return repo;
18931
+ }
18932
+ return `https://github.com/${repo}.git`;
18933
+ }
18934
+ function getResultsRepoCachePaths(repo) {
18935
+ const rootDir = path49.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
18936
+ return {
18937
+ rootDir,
18938
+ repoDir: path49.join(rootDir, "repo"),
18939
+ statusFile: path49.join(rootDir, "status.json")
18940
+ };
18941
+ }
18942
+ function readPersistedStatus(statusFile) {
18943
+ if (!existsSync7(statusFile)) {
18944
+ return {};
18945
+ }
18946
+ try {
18947
+ return JSON.parse(readFileSync3(statusFile, "utf8"));
18948
+ } catch {
18949
+ return {};
18950
+ }
18951
+ }
18952
+ function writePersistedStatus(statusFile, status) {
18953
+ mkdirSync2(path49.dirname(statusFile), { recursive: true });
18954
+ writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
18955
+ `, "utf8");
18956
+ }
18957
+ async function runCommand(executable, args, options) {
18958
+ try {
18959
+ const { stdout, stderr } = await execFileAsync3(executable, [...args], {
18960
+ cwd: options?.cwd,
18961
+ env: process.env
18962
+ });
18963
+ return { stdout, stderr };
18964
+ } catch (error) {
18965
+ if (options?.check === false && error && typeof error === "object") {
18966
+ const execError = error;
18967
+ return {
18968
+ stdout: execError.stdout ?? "",
18969
+ stderr: execError.stderr ?? ""
18970
+ };
18971
+ }
18972
+ throw withFriendlyGitHubAuthError(error);
18973
+ }
18974
+ }
18975
+ async function runGit(args, options) {
18976
+ return runCommand("git", args, options);
18977
+ }
18978
+ async function runGh(args, options) {
18979
+ return runCommand("gh", args, options);
18980
+ }
18981
+ async function resolveDefaultBranch(repoDir) {
18982
+ try {
18983
+ const { stdout } = await runGit(["symbolic-ref", "refs/remotes/origin/HEAD"], { cwd: repoDir });
18984
+ const ref = stdout.trim();
18985
+ const prefix = "refs/remotes/origin/";
18986
+ if (ref.startsWith(prefix)) {
18987
+ return ref.slice(prefix.length);
18988
+ }
18989
+ } catch {
18990
+ }
18991
+ for (const candidate of ["main", "master"]) {
18992
+ try {
18993
+ await runGit(["rev-parse", "--verify", `origin/${candidate}`], { cwd: repoDir });
18994
+ return candidate;
18995
+ } catch {
18996
+ }
18997
+ }
18998
+ return "main";
18999
+ }
19000
+ async function updateCacheRepo(repoDir, baseBranch) {
19001
+ await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
19002
+ await runGit(["checkout", baseBranch], { cwd: repoDir });
19003
+ await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
19004
+ }
19005
+ function updateStatusFile(config, patch) {
19006
+ const cachePaths = getResultsRepoCachePaths(config.repo);
19007
+ const current = readPersistedStatus(cachePaths.statusFile);
19008
+ writePersistedStatus(cachePaths.statusFile, {
19009
+ ...current,
19010
+ ...patch
19011
+ });
19012
+ }
19013
+ async function ensureResultsRepoClone(config) {
19014
+ const normalized = normalizeResultsExportConfig(config);
19015
+ const cachePaths = getResultsRepoCachePaths(normalized.repo);
19016
+ mkdirSync2(cachePaths.rootDir, { recursive: true });
19017
+ if (!existsSync7(cachePaths.repoDir)) {
19018
+ try {
19019
+ await runGit([
19020
+ "clone",
19021
+ "--filter=blob:none",
19022
+ resolveResultsRepoUrl(normalized.repo),
19023
+ cachePaths.repoDir
19024
+ ]);
19025
+ return cachePaths.repoDir;
19026
+ } catch (error) {
19027
+ updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
19028
+ throw withFriendlyGitHubAuthError(error);
19029
+ }
19030
+ }
19031
+ if (!existsSync7(path49.join(cachePaths.repoDir, ".git"))) {
19032
+ throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
19033
+ }
19034
+ return cachePaths.repoDir;
19035
+ }
19036
+ function getResultsRepoStatus(config) {
19037
+ if (!config) {
19038
+ return {
19039
+ configured: false,
19040
+ available: false,
19041
+ repo: "",
19042
+ cache_dir: ""
19043
+ };
19044
+ }
19045
+ const normalized = normalizeResultsExportConfig(config);
19046
+ const cachePaths = getResultsRepoCachePaths(normalized.repo);
19047
+ const persisted = readPersistedStatus(cachePaths.statusFile);
19048
+ return {
19049
+ configured: true,
19050
+ available: existsSync7(cachePaths.repoDir),
19051
+ repo: normalized.repo,
19052
+ path: normalized.path,
19053
+ auto_push: normalized.auto_push,
19054
+ branch_prefix: normalized.branch_prefix,
19055
+ cache_dir: cachePaths.repoDir,
19056
+ last_synced_at: persisted.last_synced_at,
19057
+ last_error: persisted.last_error
19058
+ };
19059
+ }
19060
+ async function syncResultsRepo(config) {
19061
+ const normalized = normalizeResultsExportConfig(config);
19062
+ try {
19063
+ const repoDir = await ensureResultsRepoClone(normalized);
19064
+ const baseBranch = await resolveDefaultBranch(repoDir);
19065
+ await updateCacheRepo(repoDir, baseBranch);
19066
+ updateStatusFile(normalized, {
19067
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
19068
+ last_error: void 0
19069
+ });
19070
+ } catch (error) {
19071
+ updateStatusFile(normalized, {
19072
+ last_error: withFriendlyGitHubAuthError(error).message
19073
+ });
19074
+ throw withFriendlyGitHubAuthError(error);
19075
+ }
19076
+ return getResultsRepoStatus(normalized);
19077
+ }
19078
+ async function checkoutResultsRepoBranch(config, branchName) {
19079
+ const normalized = normalizeResultsExportConfig(config);
19080
+ const repoDir = await ensureResultsRepoClone(normalized);
19081
+ const baseBranch = await resolveDefaultBranch(repoDir);
19082
+ await updateCacheRepo(repoDir, baseBranch);
19083
+ await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
19084
+ updateStatusFile(normalized, { last_error: void 0 });
19085
+ return {
19086
+ branchName,
19087
+ baseBranch,
19088
+ repoDir
19089
+ };
19090
+ }
19091
+ async function prepareResultsRepoBranch(config, branchName) {
19092
+ const normalized = normalizeResultsExportConfig(config);
19093
+ const cloneDir = await ensureResultsRepoClone(normalized);
19094
+ const baseBranch = await resolveDefaultBranch(cloneDir);
19095
+ await updateCacheRepo(cloneDir, baseBranch);
19096
+ const worktreeRoot = await mkdtemp3(path49.join(os3.tmpdir(), "agentv-results-repo-"));
19097
+ const worktreeDir = path49.join(worktreeRoot, "repo");
19098
+ await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
19099
+ cwd: cloneDir
19100
+ });
19101
+ return {
19102
+ branchName,
19103
+ baseBranch,
19104
+ repoDir: worktreeDir,
19105
+ cleanup: async () => {
19106
+ try {
19107
+ await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
19108
+ } finally {
19109
+ await rm6(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
19110
+ }
19111
+ }
19112
+ };
19113
+ }
19114
+ async function stageResultsArtifacts(params) {
19115
+ rmSync(params.destinationDir, { recursive: true, force: true });
19116
+ mkdirSync2(path49.dirname(params.destinationDir), { recursive: true });
19117
+ await cp3(params.sourceDir, params.destinationDir, { recursive: true });
19118
+ }
19119
+ function resolveResultsRepoRunsDir(config) {
19120
+ const normalized = normalizeResultsExportConfig(config);
19121
+ return path49.join(
19122
+ getResultsRepoCachePaths(normalized.repo).repoDir,
19123
+ ...normalized.path.split("/")
19124
+ );
19125
+ }
19126
+ async function directorySizeBytes(targetPath) {
19127
+ const entry = await stat9(targetPath);
19128
+ if (entry.isFile()) {
19129
+ return entry.size;
19130
+ }
19131
+ let total = 0;
19132
+ for (const child of await readdir8(targetPath, { withFileTypes: true })) {
19133
+ total += await directorySizeBytes(path49.join(targetPath, child.name));
19134
+ }
19135
+ return total;
19136
+ }
19137
+ async function commitAndPushResultsBranch(params) {
19138
+ await runGit(["add", "--all"], { cwd: params.repoDir });
19139
+ const { stdout: diffStdout } = await runGit(["status", "--porcelain"], {
19140
+ cwd: params.repoDir,
19141
+ check: false
19142
+ });
19143
+ if (diffStdout.trim().length === 0) {
19144
+ return false;
19145
+ }
19146
+ await runGit(["commit", "-m", params.commitMessage], { cwd: params.repoDir });
19147
+ await runGit(["push", "-u", "origin", params.branchName], { cwd: params.repoDir });
19148
+ return true;
19149
+ }
19150
+ async function pushResultsRepoBranch(config, branchName, cwd) {
19151
+ const normalized = normalizeResultsExportConfig(config);
19152
+ await runGit(["push", "-u", "origin", branchName], {
19153
+ cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
19154
+ });
19155
+ updateStatusFile(normalized, {
19156
+ last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
19157
+ last_error: void 0
19158
+ });
19159
+ }
19160
+ async function createDraftResultsPr(params) {
19161
+ const { stdout } = await runGh(
19162
+ [
19163
+ "pr",
19164
+ "create",
19165
+ "--draft",
19166
+ "--repo",
19167
+ params.repo,
19168
+ "--base",
19169
+ params.baseBranch,
19170
+ "--head",
19171
+ params.branchName,
19172
+ "--title",
19173
+ params.title,
19174
+ "--body",
19175
+ params.body
19176
+ ],
19177
+ { cwd: params.repoDir }
19178
+ );
19179
+ return stdout.trim();
19180
+ }
19181
+
18482
19182
  // src/projects.ts
18483
- import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
18484
- import path48 from "node:path";
19183
+ import { existsSync as existsSync8, mkdirSync as mkdirSync3, readFileSync as readFileSync4, readdirSync as readdirSync3, statSync as statSync2, writeFileSync as writeFileSync2 } from "node:fs";
19184
+ import path50 from "node:path";
18485
19185
  import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
18486
19186
  function getProjectsRegistryPath() {
18487
- return path48.join(getAgentvHome(), "projects.yaml");
19187
+ return path50.join(getAgentvHome(), "projects.yaml");
18488
19188
  }
18489
19189
  function loadProjectRegistry() {
18490
19190
  const registryPath = getProjectsRegistryPath();
18491
- if (!existsSync7(registryPath)) {
19191
+ if (!existsSync8(registryPath)) {
18492
19192
  return { projects: [] };
18493
19193
  }
18494
19194
  try {
18495
- const raw = readFileSync3(registryPath, "utf-8");
19195
+ const raw = readFileSync4(registryPath, "utf-8");
18496
19196
  const parsed = parseYaml3(raw);
18497
19197
  if (!parsed || !Array.isArray(parsed.projects)) {
18498
19198
  return { projects: [] };
@@ -18504,14 +19204,14 @@ function loadProjectRegistry() {
18504
19204
  }
18505
19205
  function saveProjectRegistry(registry) {
18506
19206
  const registryPath = getProjectsRegistryPath();
18507
- const dir = path48.dirname(registryPath);
18508
- if (!existsSync7(dir)) {
18509
- mkdirSync2(dir, { recursive: true });
19207
+ const dir = path50.dirname(registryPath);
19208
+ if (!existsSync8(dir)) {
19209
+ mkdirSync3(dir, { recursive: true });
18510
19210
  }
18511
- writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
19211
+ writeFileSync2(registryPath, stringifyYaml(registry), "utf-8");
18512
19212
  }
18513
19213
  function deriveProjectId(dirPath, existingIds) {
18514
- const base = path48.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
19214
+ const base = path50.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
18515
19215
  let candidate = base || "project";
18516
19216
  let suffix = 2;
18517
19217
  while (existingIds.includes(candidate)) {
@@ -18521,11 +19221,11 @@ function deriveProjectId(dirPath, existingIds) {
18521
19221
  return candidate;
18522
19222
  }
18523
19223
  function addProject(projectPath) {
18524
- const absPath = path48.resolve(projectPath);
18525
- if (!existsSync7(absPath)) {
19224
+ const absPath = path50.resolve(projectPath);
19225
+ if (!existsSync8(absPath)) {
18526
19226
  throw new Error(`Directory not found: ${absPath}`);
18527
19227
  }
18528
- if (!existsSync7(path48.join(absPath, ".agentv"))) {
19228
+ if (!existsSync8(path50.join(absPath, ".agentv"))) {
18529
19229
  throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
18530
19230
  }
18531
19231
  const registry = loadProjectRegistry();
@@ -18539,7 +19239,7 @@ function addProject(projectPath) {
18539
19239
  absPath,
18540
19240
  registry.projects.map((p) => p.id)
18541
19241
  ),
18542
- name: path48.basename(absPath),
19242
+ name: path50.basename(absPath),
18543
19243
  path: absPath,
18544
19244
  addedAt: now,
18545
19245
  lastOpenedAt: now
@@ -18568,14 +19268,14 @@ function touchProject(projectId) {
18568
19268
  }
18569
19269
  }
18570
19270
  function discoverProjects(rootDir, maxDepth = 2) {
18571
- const absRoot = path48.resolve(rootDir);
18572
- if (!existsSync7(absRoot) || !statSync2(absRoot).isDirectory()) {
19271
+ const absRoot = path50.resolve(rootDir);
19272
+ if (!existsSync8(absRoot) || !statSync2(absRoot).isDirectory()) {
18573
19273
  return [];
18574
19274
  }
18575
19275
  const results = [];
18576
19276
  function scan(dir, depth) {
18577
19277
  if (depth > maxDepth) return;
18578
- if (existsSync7(path48.join(dir, ".agentv"))) {
19278
+ if (existsSync8(path50.join(dir, ".agentv"))) {
18579
19279
  results.push(dir);
18580
19280
  return;
18581
19281
  }
@@ -18585,7 +19285,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
18585
19285
  for (const entry of entries) {
18586
19286
  if (!entry.isDirectory()) continue;
18587
19287
  if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
18588
- scan(path48.join(dir, entry.name), depth + 1);
19288
+ scan(path50.join(dir, entry.name), depth + 1);
18589
19289
  }
18590
19290
  } catch {
18591
19291
  }
@@ -19496,33 +20196,33 @@ function extractResponseItemContent(content) {
19496
20196
  }
19497
20197
 
19498
20198
  // src/import/codex-session-discovery.ts
19499
- import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
20199
+ import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
19500
20200
  import { homedir as homedir3 } from "node:os";
19501
- import path49 from "node:path";
19502
- var DEFAULT_SESSIONS_DIR = () => path49.join(homedir3(), ".codex", "sessions");
20201
+ import path51 from "node:path";
20202
+ var DEFAULT_SESSIONS_DIR = () => path51.join(homedir3(), ".codex", "sessions");
19503
20203
  async function discoverCodexSessions(opts) {
19504
20204
  const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
19505
20205
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
19506
20206
  const sessions = [];
19507
20207
  let yearDirs;
19508
20208
  try {
19509
- yearDirs = await readdir8(sessionsDir);
20209
+ yearDirs = await readdir9(sessionsDir);
19510
20210
  } catch {
19511
20211
  return [];
19512
20212
  }
19513
20213
  for (const year of yearDirs) {
19514
- const yearPath = path49.join(sessionsDir, year);
20214
+ const yearPath = path51.join(sessionsDir, year);
19515
20215
  let monthDirs;
19516
20216
  try {
19517
- monthDirs = await readdir8(yearPath);
20217
+ monthDirs = await readdir9(yearPath);
19518
20218
  } catch {
19519
20219
  continue;
19520
20220
  }
19521
20221
  for (const month of monthDirs) {
19522
- const monthPath = path49.join(yearPath, month);
20222
+ const monthPath = path51.join(yearPath, month);
19523
20223
  let dayDirs;
19524
20224
  try {
19525
- dayDirs = await readdir8(monthPath);
20225
+ dayDirs = await readdir9(monthPath);
19526
20226
  } catch {
19527
20227
  continue;
19528
20228
  }
@@ -19531,22 +20231,22 @@ async function discoverCodexSessions(opts) {
19531
20231
  const dirDate = `${year}-${month}-${day}`;
19532
20232
  if (dirDate !== opts.date) continue;
19533
20233
  }
19534
- const dayPath = path49.join(monthPath, day);
20234
+ const dayPath = path51.join(monthPath, day);
19535
20235
  let files;
19536
20236
  try {
19537
- files = await readdir8(dayPath);
20237
+ files = await readdir9(dayPath);
19538
20238
  } catch {
19539
20239
  continue;
19540
20240
  }
19541
20241
  for (const file of files) {
19542
20242
  if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
19543
- const filePath = path49.join(dayPath, file);
20243
+ const filePath = path51.join(dayPath, file);
19544
20244
  const nameWithoutExt = file.replace(/\.jsonl$/, "");
19545
20245
  const parts = nameWithoutExt.split("-");
19546
20246
  const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
19547
20247
  let updatedAt;
19548
20248
  try {
19549
- const fileStat = await stat9(filePath);
20249
+ const fileStat = await stat10(filePath);
19550
20250
  updatedAt = fileStat.mtime;
19551
20251
  } catch {
19552
20252
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -19561,10 +20261,10 @@ async function discoverCodexSessions(opts) {
19561
20261
  }
19562
20262
 
19563
20263
  // src/import/session-discovery.ts
19564
- import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
20264
+ import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
19565
20265
  import { homedir as homedir4 } from "node:os";
19566
- import path50 from "node:path";
19567
- var DEFAULT_PROJECTS_DIR = () => path50.join(homedir4(), ".claude", "projects");
20266
+ import path52 from "node:path";
20267
+ var DEFAULT_PROJECTS_DIR = () => path52.join(homedir4(), ".claude", "projects");
19568
20268
  function encodeProjectPath(projectPath) {
19569
20269
  return projectPath.replace(/\//g, "-");
19570
20270
  }
@@ -19573,7 +20273,7 @@ async function discoverClaudeSessions(opts) {
19573
20273
  const limit = opts?.latest ? 1 : opts?.limit ?? 10;
19574
20274
  let projectDirs;
19575
20275
  try {
19576
- projectDirs = await readdir9(projectsDir);
20276
+ projectDirs = await readdir10(projectsDir);
19577
20277
  } catch {
19578
20278
  return [];
19579
20279
  }
@@ -19583,10 +20283,10 @@ async function discoverClaudeSessions(opts) {
19583
20283
  }
19584
20284
  const sessions = [];
19585
20285
  for (const projectDir of projectDirs) {
19586
- const dirPath = path50.join(projectsDir, projectDir);
20286
+ const dirPath = path52.join(projectsDir, projectDir);
19587
20287
  let entries;
19588
20288
  try {
19589
- entries = await readdir9(dirPath);
20289
+ entries = await readdir10(dirPath);
19590
20290
  } catch {
19591
20291
  continue;
19592
20292
  }
@@ -19594,10 +20294,10 @@ async function discoverClaudeSessions(opts) {
19594
20294
  if (!entry.endsWith(".jsonl")) continue;
19595
20295
  const sessionId = entry.replace(/\.jsonl$/, "");
19596
20296
  if (opts?.sessionId && sessionId !== opts.sessionId) continue;
19597
- const filePath = path50.join(dirPath, entry);
20297
+ const filePath = path52.join(dirPath, entry);
19598
20298
  let updatedAt;
19599
20299
  try {
19600
- const fileStat = await stat10(filePath);
20300
+ const fileStat = await stat11(filePath);
19601
20301
  updatedAt = fileStat.mtime;
19602
20302
  } catch {
19603
20303
  updatedAt = /* @__PURE__ */ new Date(0);
@@ -19615,7 +20315,7 @@ async function discoverClaudeSessions(opts) {
19615
20315
  }
19616
20316
 
19617
20317
  // src/import/types.ts
19618
- import { readFile as readFile15 } from "node:fs/promises";
20318
+ import { readFile as readFile17 } from "node:fs/promises";
19619
20319
  function toTranscriptJsonLine(entry) {
19620
20320
  const firstUserMessage = entry.messages.find((m) => m.role === "user");
19621
20321
  const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
@@ -19641,11 +20341,11 @@ function toTranscriptJsonLine(entry) {
19641
20341
  };
19642
20342
  }
19643
20343
  async function readTranscriptJsonl(filePath) {
19644
- const text = await readFile15(filePath, "utf8");
20344
+ const text = await readFile17(filePath, "utf8");
19645
20345
  return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
19646
20346
  }
19647
20347
  async function readTranscriptFile(filePath) {
19648
- return readFile15(filePath, "utf8");
20348
+ return readFile17(filePath, "utf8");
19649
20349
  }
19650
20350
 
19651
20351
  // src/import/transcript-provider.ts
@@ -19710,6 +20410,7 @@ export {
19710
20410
  DEFAULT_EXPLORATION_TOOLS,
19711
20411
  DEFAULT_THRESHOLD,
19712
20412
  DeterministicAssertionEvaluator,
20413
+ DockerWorkspaceProvider,
19713
20414
  EvaluatorRegistry,
19714
20415
  ExecutionMetricsEvaluator,
19715
20416
  FieldAccuracyEvaluator,
@@ -19745,9 +20446,11 @@ export {
19745
20446
  buildSearchRoots,
19746
20447
  calculateRubricScore,
19747
20448
  captureFileChanges,
20449
+ checkoutResultsRepoBranch,
19748
20450
  clampScore,
19749
20451
  cleanupEvalWorkspaces,
19750
20452
  cleanupWorkspace,
20453
+ commitAndPushResultsBranch,
19751
20454
  computeTraceSummary,
19752
20455
  computeWorkspaceFingerprint,
19753
20456
  consumeClaudeLogEntries,
@@ -19758,6 +20461,7 @@ export {
19758
20461
  createAgentKernel,
19759
20462
  createBuiltinProviderRegistry,
19760
20463
  createBuiltinRegistry,
20464
+ createDraftResultsPr,
19761
20465
  createProvider,
19762
20466
  createTempWorkspace,
19763
20467
  deepEqual,
@@ -19765,6 +20469,7 @@ export {
19765
20469
  deriveCategory,
19766
20470
  deriveProjectId,
19767
20471
  detectFormat,
20472
+ directorySizeBytes,
19768
20473
  discoverAssertions,
19769
20474
  discoverClaudeSessions,
19770
20475
  discoverCodexSessions,
@@ -19773,6 +20478,7 @@ export {
19773
20478
  discoverGraders as discoverJudges,
19774
20479
  discoverProjects,
19775
20480
  discoverProviders,
20481
+ ensureResultsRepoClone,
19776
20482
  ensureVSCodeSubagents,
19777
20483
  evaluate,
19778
20484
  executeScript,
@@ -19797,6 +20503,8 @@ export {
19797
20503
  getOutputFilenames,
19798
20504
  getProject,
19799
20505
  getProjectsRegistryPath,
20506
+ getResultsRepoCachePaths,
20507
+ getResultsRepoStatus,
19800
20508
  getSubagentsRoot,
19801
20509
  getTextContent,
19802
20510
  getTraceStateRoot,
@@ -19826,12 +20534,15 @@ export {
19826
20534
  mergeExecutionMetrics,
19827
20535
  negateScore,
19828
20536
  normalizeLineEndings,
20537
+ normalizeResultsExportConfig,
19829
20538
  parseAgentSkillsEvals,
19830
20539
  parseClaudeSession,
19831
20540
  parseCodexSession,
19832
20541
  parseCopilotEvents,
19833
20542
  parseJsonFromText,
19834
20543
  parseJsonSafe,
20544
+ prepareResultsRepoBranch,
20545
+ pushResultsRepoBranch,
19835
20546
  readJsonFile,
19836
20547
  readTargetDefinitions,
19837
20548
  readTestSuiteMetadata,
@@ -19842,6 +20553,8 @@ export {
19842
20553
  resolveAndCreateProvider,
19843
20554
  resolveDelegatedTargetDefinition,
19844
20555
  resolveFileReference,
20556
+ resolveResultsRepoRunsDir,
20557
+ resolveResultsRepoUrl,
19845
20558
  resolveTargetDefinition,
19846
20559
  resolveWorkspaceTemplate,
19847
20560
  rubricEvaluationSchema,
@@ -19863,12 +20576,14 @@ export {
19863
20576
  scoreToVerdict,
19864
20577
  shouldEnableCache,
19865
20578
  shouldSkipCacheForTemperature,
20579
+ stageResultsArtifacts,
19866
20580
  subscribeToClaudeLogEntries,
19867
20581
  subscribeToCodexLogEntries,
19868
20582
  subscribeToCopilotCliLogEntries,
19869
20583
  subscribeToCopilotSdkLogEntries,
19870
20584
  subscribeToPiLogEntries,
19871
20585
  substituteVariables,
20586
+ syncResultsRepo,
19872
20587
  toCamelCaseDeep,
19873
20588
  toSnakeCaseDeep,
19874
20589
  toTranscriptJsonLine,