@agentv/core 4.9.1 → 4.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-3WGHC7LC.js +149 -0
- package/dist/chunk-3WGHC7LC.js.map +1 -0
- package/dist/{chunk-VCVVKCC4.js → chunk-5POFMJJ7.js} +1 -1
- package/dist/chunk-5POFMJJ7.js.map +1 -0
- package/dist/chunk-SDIANPEY.js +181 -0
- package/dist/chunk-SDIANPEY.js.map +1 -0
- package/dist/docker-workspace-RPPXBT27.js +9 -0
- package/dist/docker-workspace-RPPXBT27.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +70 -3
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +71 -4
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/exec-AR6JUUN5.js +9 -0
- package/dist/exec-AR6JUUN5.js.map +1 -0
- package/dist/index.cjs +1932 -858
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +210 -9
- package/dist/index.d.ts +210 -9
- package/dist/index.js +1366 -651
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-VCVVKCC4.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -25,10 +25,17 @@ import {
|
|
|
25
25
|
resolveDelegatedTargetDefinition,
|
|
26
26
|
resolveFileReference,
|
|
27
27
|
resolveTargetDefinition
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-5POFMJJ7.js";
|
|
29
|
+
import {
|
|
30
|
+
execFileWithStdin,
|
|
31
|
+
execShellWithStdin
|
|
32
|
+
} from "./chunk-3WGHC7LC.js";
|
|
29
33
|
import {
|
|
30
34
|
AgentvProvider
|
|
31
35
|
} from "./chunk-PRNXHNLF.js";
|
|
36
|
+
import {
|
|
37
|
+
DockerWorkspaceProvider
|
|
38
|
+
} from "./chunk-SDIANPEY.js";
|
|
32
39
|
import {
|
|
33
40
|
OtlpJsonFileExporter
|
|
34
41
|
} from "./chunk-KPSI5CSL.js";
|
|
@@ -152,10 +159,10 @@ function mergeExecutionMetrics(computed, metrics) {
|
|
|
152
159
|
}
|
|
153
160
|
|
|
154
161
|
// src/evaluation/yaml-parser.ts
|
|
155
|
-
import { readFile as
|
|
156
|
-
import
|
|
162
|
+
import { readFile as readFile8 } from "node:fs/promises";
|
|
163
|
+
import path8 from "node:path";
|
|
157
164
|
import micromatch2 from "micromatch";
|
|
158
|
-
import { parse as
|
|
165
|
+
import { parse as parse3 } from "yaml";
|
|
159
166
|
|
|
160
167
|
// src/evaluation/input-message-utils.ts
|
|
161
168
|
function flattenInputMessages(messages) {
|
|
@@ -441,10 +448,12 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
441
448
|
parsed.execution,
|
|
442
449
|
configPath
|
|
443
450
|
);
|
|
451
|
+
const results = parseResultsConfig(parsed.results, configPath);
|
|
444
452
|
return {
|
|
445
453
|
required_version: requiredVersion,
|
|
446
454
|
eval_patterns: evalPatterns,
|
|
447
|
-
execution: executionDefaults
|
|
455
|
+
execution: executionDefaults,
|
|
456
|
+
results
|
|
448
457
|
};
|
|
449
458
|
} catch (error) {
|
|
450
459
|
logWarning(
|
|
@@ -679,15 +688,234 @@ function parseExecutionDefaults(raw, configPath) {
|
|
|
679
688
|
}
|
|
680
689
|
return Object.keys(result).length > 0 ? result : void 0;
|
|
681
690
|
}
|
|
691
|
+
function parseResultsConfig(raw, configPath) {
|
|
692
|
+
if (raw === void 0 || raw === null) {
|
|
693
|
+
return void 0;
|
|
694
|
+
}
|
|
695
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
696
|
+
logWarning(`Invalid results in ${configPath}, expected object`);
|
|
697
|
+
return void 0;
|
|
698
|
+
}
|
|
699
|
+
const obj = raw;
|
|
700
|
+
const exportConfig = parseResultsExportConfig(obj.export, configPath);
|
|
701
|
+
if (!exportConfig) {
|
|
702
|
+
return void 0;
|
|
703
|
+
}
|
|
704
|
+
return { export: exportConfig };
|
|
705
|
+
}
|
|
706
|
+
function parseResultsExportConfig(raw, configPath) {
|
|
707
|
+
if (raw === void 0 || raw === null) {
|
|
708
|
+
return void 0;
|
|
709
|
+
}
|
|
710
|
+
if (typeof raw !== "object" || Array.isArray(raw)) {
|
|
711
|
+
logWarning(`Invalid results.export in ${configPath}, expected object`);
|
|
712
|
+
return void 0;
|
|
713
|
+
}
|
|
714
|
+
const obj = raw;
|
|
715
|
+
const repo = typeof obj.repo === "string" ? obj.repo.trim() : "";
|
|
716
|
+
const exportPath = typeof obj.path === "string" ? obj.path.trim() : "";
|
|
717
|
+
if (!repo) {
|
|
718
|
+
logWarning(`Invalid results.export.repo in ${configPath}, expected non-empty string`);
|
|
719
|
+
return void 0;
|
|
720
|
+
}
|
|
721
|
+
if (!exportPath) {
|
|
722
|
+
logWarning(`Invalid results.export.path in ${configPath}, expected non-empty string`);
|
|
723
|
+
return void 0;
|
|
724
|
+
}
|
|
725
|
+
if (obj.auto_push !== void 0 && typeof obj.auto_push !== "boolean") {
|
|
726
|
+
logWarning(`Invalid results.export.auto_push in ${configPath}, expected boolean`);
|
|
727
|
+
return void 0;
|
|
728
|
+
}
|
|
729
|
+
let branchPrefix;
|
|
730
|
+
if (obj.branch_prefix !== void 0) {
|
|
731
|
+
if (typeof obj.branch_prefix !== "string" || obj.branch_prefix.trim().length === 0) {
|
|
732
|
+
logWarning(
|
|
733
|
+
`Invalid results.export.branch_prefix in ${configPath}, expected non-empty string`
|
|
734
|
+
);
|
|
735
|
+
return void 0;
|
|
736
|
+
}
|
|
737
|
+
branchPrefix = obj.branch_prefix.trim();
|
|
738
|
+
}
|
|
739
|
+
return {
|
|
740
|
+
repo,
|
|
741
|
+
path: exportPath,
|
|
742
|
+
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
743
|
+
...branchPrefix && { branch_prefix: branchPrefix }
|
|
744
|
+
};
|
|
745
|
+
}
|
|
682
746
|
function logWarning(message) {
|
|
683
747
|
console.warn(`${ANSI_YELLOW}Warning: ${message}${ANSI_RESET2}`);
|
|
684
748
|
}
|
|
685
749
|
|
|
686
750
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
751
|
+
import { readFile as readFile5 } from "node:fs/promises";
|
|
752
|
+
import path5 from "node:path";
|
|
753
|
+
import { parse as parse2 } from "yaml";
|
|
754
|
+
|
|
755
|
+
// src/evaluation/content-preprocessor.ts
|
|
756
|
+
import { readFile as readFile3 } from "node:fs/promises";
|
|
687
757
|
import path4 from "node:path";
|
|
758
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
759
|
+
var MIME_TYPE_ALIASES = {
|
|
760
|
+
csv: "text/csv",
|
|
761
|
+
docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
762
|
+
htm: "text/html",
|
|
763
|
+
html: "text/html",
|
|
764
|
+
json: "application/json",
|
|
765
|
+
markdown: "text/markdown",
|
|
766
|
+
md: "text/markdown",
|
|
767
|
+
pdf: "application/pdf",
|
|
768
|
+
sql: "application/sql",
|
|
769
|
+
txt: "text/plain",
|
|
770
|
+
xhtml: "application/xhtml+xml",
|
|
771
|
+
xls: "application/vnd.ms-excel",
|
|
772
|
+
xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
773
|
+
xml: "application/xml",
|
|
774
|
+
yaml: "application/yaml",
|
|
775
|
+
yml: "application/yaml"
|
|
776
|
+
};
|
|
777
|
+
var REPLACEMENT_CHAR = "\uFFFD";
|
|
778
|
+
async function extractTextWithPreprocessors(content, preprocessors, options = {}) {
|
|
779
|
+
if (typeof content === "string") {
|
|
780
|
+
return { text: content, warnings: [] };
|
|
781
|
+
}
|
|
782
|
+
if (!content || content.length === 0) {
|
|
783
|
+
return { text: "", warnings: [] };
|
|
784
|
+
}
|
|
785
|
+
const parts = [];
|
|
786
|
+
const warnings = [];
|
|
787
|
+
for (const block of content) {
|
|
788
|
+
if (block.type === "text") {
|
|
789
|
+
parts.push(block.text);
|
|
790
|
+
continue;
|
|
791
|
+
}
|
|
792
|
+
if (block.type !== "file") {
|
|
793
|
+
continue;
|
|
794
|
+
}
|
|
795
|
+
const result = await preprocessContentFile(block, preprocessors, options.basePath);
|
|
796
|
+
if (result.text) {
|
|
797
|
+
parts.push(result.text);
|
|
798
|
+
}
|
|
799
|
+
warnings.push(...result.warnings);
|
|
800
|
+
}
|
|
801
|
+
return { text: parts.join("\n"), warnings };
|
|
802
|
+
}
|
|
803
|
+
async function preprocessContentFile(block, preprocessors, basePath) {
|
|
804
|
+
const mediaType = normalizePreprocessorType(block.media_type);
|
|
805
|
+
const resolvedPath = resolveLocalFilePath(block.path, basePath);
|
|
806
|
+
if (!resolvedPath) {
|
|
807
|
+
return {
|
|
808
|
+
text: "",
|
|
809
|
+
warnings: [
|
|
810
|
+
{
|
|
811
|
+
file: block.path,
|
|
812
|
+
mediaType: block.media_type,
|
|
813
|
+
reason: "remote file paths are not supported for preprocessing"
|
|
814
|
+
}
|
|
815
|
+
]
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
const preprocessor = preprocessors?.find(
|
|
819
|
+
(entry) => normalizePreprocessorType(entry.type) === mediaType
|
|
820
|
+
);
|
|
821
|
+
if (preprocessor) {
|
|
822
|
+
return runContentPreprocessor(block, resolvedPath, preprocessor);
|
|
823
|
+
}
|
|
824
|
+
try {
|
|
825
|
+
const buffer = await readFile3(resolvedPath);
|
|
826
|
+
const text = buffer.toString("utf8").replace(/\r\n/g, "\n");
|
|
827
|
+
if (buffer.includes(0) || text.includes(REPLACEMENT_CHAR)) {
|
|
828
|
+
return {
|
|
829
|
+
text: "",
|
|
830
|
+
warnings: [
|
|
831
|
+
{
|
|
832
|
+
file: block.path,
|
|
833
|
+
mediaType: block.media_type,
|
|
834
|
+
reason: "default UTF-8 read produced binary or invalid text; configure a preprocessor"
|
|
835
|
+
}
|
|
836
|
+
]
|
|
837
|
+
};
|
|
838
|
+
}
|
|
839
|
+
return { text: formatFileText(block.path, text), warnings: [] };
|
|
840
|
+
} catch (error) {
|
|
841
|
+
return {
|
|
842
|
+
text: "",
|
|
843
|
+
warnings: [
|
|
844
|
+
{
|
|
845
|
+
file: block.path,
|
|
846
|
+
mediaType: block.media_type,
|
|
847
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
848
|
+
}
|
|
849
|
+
]
|
|
850
|
+
};
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
async function runContentPreprocessor(block, resolvedPath, preprocessor) {
|
|
854
|
+
try {
|
|
855
|
+
const argv = preprocessor.resolvedCommand ?? preprocessor.command;
|
|
856
|
+
const { stdout, stderr, exitCode } = await execFileWithStdin(
|
|
857
|
+
argv,
|
|
858
|
+
JSON.stringify({
|
|
859
|
+
path: resolvedPath,
|
|
860
|
+
original_path: block.path,
|
|
861
|
+
media_type: block.media_type
|
|
862
|
+
})
|
|
863
|
+
);
|
|
864
|
+
if (exitCode !== 0) {
|
|
865
|
+
return {
|
|
866
|
+
text: "",
|
|
867
|
+
warnings: [
|
|
868
|
+
{
|
|
869
|
+
file: block.path,
|
|
870
|
+
mediaType: block.media_type,
|
|
871
|
+
reason: stderr.trim() || `preprocessor exited with code ${exitCode}`
|
|
872
|
+
}
|
|
873
|
+
]
|
|
874
|
+
};
|
|
875
|
+
}
|
|
876
|
+
return { text: formatFileText(block.path, stdout.trim()), warnings: [] };
|
|
877
|
+
} catch (error) {
|
|
878
|
+
return {
|
|
879
|
+
text: "",
|
|
880
|
+
warnings: [
|
|
881
|
+
{
|
|
882
|
+
file: block.path,
|
|
883
|
+
mediaType: block.media_type,
|
|
884
|
+
reason: error instanceof Error ? error.message : String(error)
|
|
885
|
+
}
|
|
886
|
+
]
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
function appendPreprocessingWarnings(text, warnings) {
|
|
891
|
+
if (warnings.length === 0) {
|
|
892
|
+
return text;
|
|
893
|
+
}
|
|
894
|
+
const notes = warnings.map(
|
|
895
|
+
(warning) => `[file preprocessing warning] ${warning.file} (${warning.mediaType}): ${warning.reason}`
|
|
896
|
+
);
|
|
897
|
+
return [text, ...notes].filter((part) => part.length > 0).join("\n");
|
|
898
|
+
}
|
|
899
|
+
function normalizePreprocessorType(value) {
|
|
900
|
+
const normalized = value.trim().toLowerCase();
|
|
901
|
+
return MIME_TYPE_ALIASES[normalized] ?? normalized;
|
|
902
|
+
}
|
|
903
|
+
function resolveLocalFilePath(value, basePath) {
|
|
904
|
+
if (value.startsWith("file://")) {
|
|
905
|
+
return fileURLToPath2(value);
|
|
906
|
+
}
|
|
907
|
+
if (/^[a-z]+:\/\//i.test(value)) {
|
|
908
|
+
return void 0;
|
|
909
|
+
}
|
|
910
|
+
return basePath ? path4.resolve(basePath, value) : path4.resolve(value);
|
|
911
|
+
}
|
|
912
|
+
function formatFileText(filePath, text) {
|
|
913
|
+
return `[[ file: ${filePath} ]]
|
|
914
|
+
${text}`;
|
|
915
|
+
}
|
|
688
916
|
|
|
689
917
|
// src/evaluation/validation/prompt-validator.ts
|
|
690
|
-
import { readFile as
|
|
918
|
+
import { readFile as readFile4 } from "node:fs/promises";
|
|
691
919
|
|
|
692
920
|
// src/evaluation/template-variables.ts
|
|
693
921
|
var TEMPLATE_VARIABLES = {
|
|
@@ -718,7 +946,7 @@ var DEPRECATED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Map([
|
|
|
718
946
|
var ANSI_YELLOW2 = "\x1B[33m";
|
|
719
947
|
var ANSI_RESET3 = "\x1B[0m";
|
|
720
948
|
async function validateCustomPromptContent(promptPath) {
|
|
721
|
-
const content = await
|
|
949
|
+
const content = await readFile4(promptPath, "utf8");
|
|
722
950
|
validateTemplateVariables(content, promptPath);
|
|
723
951
|
}
|
|
724
952
|
function validateTemplateVariables(content, source) {
|
|
@@ -768,6 +996,7 @@ function validateTemplateVariables(content, source) {
|
|
|
768
996
|
// src/evaluation/loaders/evaluator-parser.ts
|
|
769
997
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
770
998
|
var ANSI_RESET4 = "\x1B[0m";
|
|
999
|
+
var MAX_ASSERTION_INCLUDE_DEPTH = 3;
|
|
771
1000
|
var PROMPT_FILE_PREFIX = "file://";
|
|
772
1001
|
function normalizeEvaluatorType(type) {
|
|
773
1002
|
return type.replace(/_/g, "-");
|
|
@@ -775,22 +1004,104 @@ function normalizeEvaluatorType(type) {
|
|
|
775
1004
|
function isDeprecatedJudgeType(type) {
|
|
776
1005
|
return type === "code-judge" || type === "llm-judge";
|
|
777
1006
|
}
|
|
778
|
-
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId) {
|
|
1007
|
+
async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId, defaultPreprocessors) {
|
|
779
1008
|
const execution = rawEvalCase.execution;
|
|
780
1009
|
const executionObject = isJsonObject2(execution) ? execution : void 0;
|
|
781
1010
|
const caseEvaluators = rawEvalCase.assertions ?? rawEvalCase.assert ?? (executionObject ? executionObject.evaluators : void 0) ?? // deprecated: use assertions
|
|
782
1011
|
rawEvalCase.evaluators;
|
|
783
1012
|
const skipDefaults = executionObject?.skip_defaults === true;
|
|
784
1013
|
const rootEvaluators = skipDefaults ? void 0 : globalExecution?.assertions ?? globalExecution?.assert ?? globalExecution?.evaluators;
|
|
785
|
-
const parsedCase = await parseEvaluatorList(
|
|
786
|
-
|
|
1014
|
+
const parsedCase = await parseEvaluatorList(
|
|
1015
|
+
caseEvaluators,
|
|
1016
|
+
searchRoots,
|
|
1017
|
+
evalId,
|
|
1018
|
+
defaultPreprocessors
|
|
1019
|
+
);
|
|
1020
|
+
const parsedRoot = await parseEvaluatorList(
|
|
1021
|
+
rootEvaluators,
|
|
1022
|
+
searchRoots,
|
|
1023
|
+
evalId,
|
|
1024
|
+
defaultPreprocessors
|
|
1025
|
+
);
|
|
787
1026
|
if (!parsedCase && !parsedRoot) {
|
|
788
1027
|
return void 0;
|
|
789
1028
|
}
|
|
790
1029
|
const evaluators = [...parsedCase ?? [], ...parsedRoot ?? []];
|
|
791
1030
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
792
1031
|
}
|
|
793
|
-
|
|
1032
|
+
function isIncludeEntry(value) {
|
|
1033
|
+
return isJsonObject2(value) && typeof value.include === "string" && Object.keys(value).length === 1;
|
|
1034
|
+
}
|
|
1035
|
+
function isTemplateReference(value) {
|
|
1036
|
+
return !value.startsWith(".") && !value.includes("/") && !value.includes("\\");
|
|
1037
|
+
}
|
|
1038
|
+
async function resolveAssertionTemplateReference(include, searchRoots) {
|
|
1039
|
+
const templateCandidates = isTemplateReference(include) ? [
|
|
1040
|
+
path5.join(".agentv", "templates", `${include}.yaml`),
|
|
1041
|
+
path5.join(".agentv", "templates", `${include}.yml`)
|
|
1042
|
+
] : [include];
|
|
1043
|
+
const attempted = [];
|
|
1044
|
+
for (const candidate of templateCandidates) {
|
|
1045
|
+
const resolved = await resolveFileReference2(candidate, searchRoots);
|
|
1046
|
+
attempted.push(...resolved.attempted);
|
|
1047
|
+
if (resolved.resolvedPath) {
|
|
1048
|
+
return {
|
|
1049
|
+
displayPath: resolved.displayPath,
|
|
1050
|
+
resolvedPath: resolved.resolvedPath,
|
|
1051
|
+
attempted
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
return {
|
|
1056
|
+
displayPath: templateCandidates[0] ?? include,
|
|
1057
|
+
resolvedPath: "",
|
|
1058
|
+
attempted
|
|
1059
|
+
};
|
|
1060
|
+
}
|
|
1061
|
+
async function loadAssertionTemplateEntries(include, searchRoots, evalId, includeContext) {
|
|
1062
|
+
const nextDepth = includeContext.depth + 1;
|
|
1063
|
+
if (nextDepth > MAX_ASSERTION_INCLUDE_DEPTH) {
|
|
1064
|
+
const chain = [...includeContext.chain, include].join(" -> ");
|
|
1065
|
+
throw new Error(
|
|
1066
|
+
`Assertion template include depth exceeded ${MAX_ASSERTION_INCLUDE_DEPTH} in '${evalId}'. Include chain: ${chain}`
|
|
1067
|
+
);
|
|
1068
|
+
}
|
|
1069
|
+
const resolved = await resolveAssertionTemplateReference(include, searchRoots);
|
|
1070
|
+
if (!resolved.resolvedPath) {
|
|
1071
|
+
const attempted = resolved.attempted.length > 0 ? `
|
|
1072
|
+
${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
1073
|
+
throw new Error(
|
|
1074
|
+
`Assertion template not found in '${evalId}': ${resolved.displayPath}${attempted}`
|
|
1075
|
+
);
|
|
1076
|
+
}
|
|
1077
|
+
if (includeContext.chain.includes(resolved.resolvedPath)) {
|
|
1078
|
+
const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
|
|
1079
|
+
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
1080
|
+
}
|
|
1081
|
+
const content = await readFile5(resolved.resolvedPath, "utf8");
|
|
1082
|
+
const parsed = interpolateEnv(parse2(content), process.env);
|
|
1083
|
+
if (!isJsonObject2(parsed)) {
|
|
1084
|
+
throw new Error(
|
|
1085
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
|
|
1086
|
+
);
|
|
1087
|
+
}
|
|
1088
|
+
const assertions = parsed.assertions;
|
|
1089
|
+
if (!Array.isArray(assertions)) {
|
|
1090
|
+
throw new Error(
|
|
1091
|
+
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
|
|
1092
|
+
);
|
|
1093
|
+
}
|
|
1094
|
+
const templateDir = path5.dirname(resolved.resolvedPath);
|
|
1095
|
+
const nestedSearchRoots = [
|
|
1096
|
+
templateDir,
|
|
1097
|
+
...searchRoots.filter((root) => path5.resolve(root) !== templateDir)
|
|
1098
|
+
];
|
|
1099
|
+
return await expandEvaluatorEntries(assertions, nestedSearchRoots, evalId, {
|
|
1100
|
+
depth: nextDepth,
|
|
1101
|
+
chain: [...includeContext.chain, resolved.resolvedPath]
|
|
1102
|
+
}) ?? [];
|
|
1103
|
+
}
|
|
1104
|
+
async function expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId, includeContext = { depth: 0, chain: [] }) {
|
|
794
1105
|
if (candidateEvaluators === void 0) {
|
|
795
1106
|
return void 0;
|
|
796
1107
|
}
|
|
@@ -798,13 +1109,34 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
798
1109
|
logWarning2(`Skipping evaluators for '${evalId}': expected array`);
|
|
799
1110
|
return void 0;
|
|
800
1111
|
}
|
|
801
|
-
const
|
|
802
|
-
const
|
|
1112
|
+
const expanded = [];
|
|
1113
|
+
for (const rawEvaluator of candidateEvaluators) {
|
|
1114
|
+
if (isIncludeEntry(rawEvaluator)) {
|
|
1115
|
+
const included = await loadAssertionTemplateEntries(
|
|
1116
|
+
rawEvaluator.include,
|
|
1117
|
+
searchRoots,
|
|
1118
|
+
evalId,
|
|
1119
|
+
includeContext
|
|
1120
|
+
);
|
|
1121
|
+
expanded.push(...included);
|
|
1122
|
+
continue;
|
|
1123
|
+
}
|
|
1124
|
+
expanded.push(rawEvaluator);
|
|
1125
|
+
}
|
|
1126
|
+
return expanded;
|
|
1127
|
+
}
|
|
1128
|
+
async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defaultPreprocessors) {
|
|
1129
|
+
const expandedEvaluators = await expandEvaluatorEntries(candidateEvaluators, searchRoots, evalId);
|
|
1130
|
+
if (!expandedEvaluators) {
|
|
1131
|
+
return void 0;
|
|
1132
|
+
}
|
|
1133
|
+
const firstStringIndex = expandedEvaluators.findIndex((e) => typeof e === "string");
|
|
1134
|
+
const processedEvaluators = firstStringIndex === -1 ? [...expandedEvaluators] : (() => {
|
|
803
1135
|
const PLACEHOLDER = Symbol("rubric-placeholder");
|
|
804
1136
|
const strings = [];
|
|
805
1137
|
const result = [];
|
|
806
1138
|
let rubricInserted = false;
|
|
807
|
-
for (const item of
|
|
1139
|
+
for (const item of expandedEvaluators) {
|
|
808
1140
|
if (typeof item === "string") {
|
|
809
1141
|
const trimmed = item.trim();
|
|
810
1142
|
if (trimmed.length === 0) {
|
|
@@ -855,6 +1187,13 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
855
1187
|
continue;
|
|
856
1188
|
}
|
|
857
1189
|
const negate = rawEvaluator.negate === true ? true : void 0;
|
|
1190
|
+
const mergedPreprocessors = await parseMergedPreprocessors(
|
|
1191
|
+
rawEvaluator.preprocessors,
|
|
1192
|
+
defaultPreprocessors,
|
|
1193
|
+
searchRoots,
|
|
1194
|
+
name,
|
|
1195
|
+
evalId
|
|
1196
|
+
);
|
|
858
1197
|
if (isCustomType) {
|
|
859
1198
|
const weight2 = validateWeight(rawEvaluator.weight, name, evalId);
|
|
860
1199
|
const { required: required2, min_score: min_score2 } = parseRequiredAndMinScore(
|
|
@@ -913,7 +1252,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
913
1252
|
if (cwd) {
|
|
914
1253
|
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
915
1254
|
if (resolved.resolvedPath) {
|
|
916
|
-
resolvedCwd =
|
|
1255
|
+
resolvedCwd = path5.resolve(resolved.resolvedPath);
|
|
917
1256
|
} else {
|
|
918
1257
|
logWarning2(
|
|
919
1258
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -959,6 +1298,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
959
1298
|
"cwd",
|
|
960
1299
|
"weight",
|
|
961
1300
|
"target",
|
|
1301
|
+
"preprocessors",
|
|
962
1302
|
"required",
|
|
963
1303
|
"negate"
|
|
964
1304
|
]);
|
|
@@ -979,6 +1319,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
979
1319
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
980
1320
|
...negate !== void 0 ? { negate } : {},
|
|
981
1321
|
...Object.keys(config2).length > 0 ? { config: config2 } : {},
|
|
1322
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {},
|
|
982
1323
|
...targetConfig !== void 0 ? { target: targetConfig } : {}
|
|
983
1324
|
});
|
|
984
1325
|
continue;
|
|
@@ -1010,8 +1351,16 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1010
1351
|
);
|
|
1011
1352
|
continue;
|
|
1012
1353
|
}
|
|
1354
|
+
const expandedMembers = await expandEvaluatorEntries(
|
|
1355
|
+
rawMembers,
|
|
1356
|
+
searchRoots,
|
|
1357
|
+
`${evalId}:${name}`
|
|
1358
|
+
);
|
|
1359
|
+
if (!expandedMembers) {
|
|
1360
|
+
continue;
|
|
1361
|
+
}
|
|
1013
1362
|
const memberEvaluators = [];
|
|
1014
|
-
for (const rawMember of
|
|
1363
|
+
for (const rawMember of expandedMembers) {
|
|
1015
1364
|
if (!isJsonObject2(rawMember)) {
|
|
1016
1365
|
logWarning2(`Skipping invalid member evaluator in composite '${name}' (expected object)`);
|
|
1017
1366
|
continue;
|
|
@@ -1088,7 +1437,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1088
1437
|
aggregatorPrompt = fileRef;
|
|
1089
1438
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
1090
1439
|
if (resolved.resolvedPath) {
|
|
1091
|
-
promptPath2 =
|
|
1440
|
+
promptPath2 = path5.resolve(resolved.resolvedPath);
|
|
1092
1441
|
} else {
|
|
1093
1442
|
throw new Error(
|
|
1094
1443
|
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
@@ -1742,7 +2091,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1742
2091
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
1743
2092
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
1744
2093
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
1745
|
-
...negate !== void 0 ? { negate } : {}
|
|
2094
|
+
...negate !== void 0 ? { negate } : {},
|
|
2095
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
1746
2096
|
});
|
|
1747
2097
|
continue;
|
|
1748
2098
|
}
|
|
@@ -1767,7 +2117,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1767
2117
|
const commandPath = commandArray[commandArray.length - 1];
|
|
1768
2118
|
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
1769
2119
|
if (resolved.resolvedPath) {
|
|
1770
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
2120
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path5.resolve(resolved.resolvedPath)];
|
|
1771
2121
|
} else {
|
|
1772
2122
|
throw new Error(
|
|
1773
2123
|
`Evaluator '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -1782,7 +2132,7 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1782
2132
|
prompt = fileRef;
|
|
1783
2133
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
1784
2134
|
if (resolved.resolvedPath) {
|
|
1785
|
-
promptPath =
|
|
2135
|
+
promptPath = path5.resolve(resolved.resolvedPath);
|
|
1786
2136
|
try {
|
|
1787
2137
|
await validateCustomPromptContent(promptPath);
|
|
1788
2138
|
} catch (error) {
|
|
@@ -1825,7 +2175,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1825
2175
|
...weight2 !== void 0 ? { weight: weight2 } : {},
|
|
1826
2176
|
...required2 !== void 0 ? { required: required2 } : {},
|
|
1827
2177
|
...min_score2 !== void 0 ? { min_score: min_score2 } : {},
|
|
1828
|
-
...negate !== void 0 ? { negate } : {}
|
|
2178
|
+
...negate !== void 0 ? { negate } : {},
|
|
2179
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
1829
2180
|
});
|
|
1830
2181
|
continue;
|
|
1831
2182
|
}
|
|
@@ -1850,7 +2201,8 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1850
2201
|
"negate",
|
|
1851
2202
|
"max_steps",
|
|
1852
2203
|
"maxSteps",
|
|
1853
|
-
"temperature"
|
|
2204
|
+
"temperature",
|
|
2205
|
+
"preprocessors"
|
|
1854
2206
|
]);
|
|
1855
2207
|
const config = {};
|
|
1856
2208
|
for (const [key, value] of Object.entries(rawEvaluator)) {
|
|
@@ -1880,30 +2232,70 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId) {
|
|
|
1880
2232
|
...negate !== void 0 ? { negate } : {},
|
|
1881
2233
|
...finalConfig ? { config: finalConfig } : {},
|
|
1882
2234
|
...llmMaxSteps !== void 0 ? { max_steps: llmMaxSteps } : {},
|
|
1883
|
-
...llmTemperature !== void 0 ? { temperature: llmTemperature } : {}
|
|
2235
|
+
...llmTemperature !== void 0 ? { temperature: llmTemperature } : {},
|
|
2236
|
+
...mergedPreprocessors ? { preprocessors: mergedPreprocessors } : {}
|
|
1884
2237
|
});
|
|
1885
2238
|
}
|
|
1886
2239
|
return evaluators.length > 0 ? evaluators : void 0;
|
|
1887
2240
|
}
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
function
|
|
1904
|
-
if (
|
|
2241
|
+
async function parseMergedPreprocessors(rawValue, defaultPreprocessors, searchRoots, evaluatorName, evalId) {
|
|
2242
|
+
const parsedDefaults = defaultPreprocessors ?? [];
|
|
2243
|
+
const parsedOverrides = await parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId);
|
|
2244
|
+
if (parsedDefaults.length === 0 && (!parsedOverrides || parsedOverrides.length === 0)) {
|
|
2245
|
+
return void 0;
|
|
2246
|
+
}
|
|
2247
|
+
const merged = /* @__PURE__ */ new Map();
|
|
2248
|
+
for (const entry of parsedDefaults) {
|
|
2249
|
+
merged.set(normalizePreprocessorType(entry.type), entry);
|
|
2250
|
+
}
|
|
2251
|
+
for (const entry of parsedOverrides ?? []) {
|
|
2252
|
+
merged.set(normalizePreprocessorType(entry.type), entry);
|
|
2253
|
+
}
|
|
2254
|
+
return [...merged.values()];
|
|
2255
|
+
}
|
|
2256
|
+
async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId) {
|
|
2257
|
+
if (rawValue === void 0) {
|
|
1905
2258
|
return void 0;
|
|
1906
2259
|
}
|
|
2260
|
+
if (!Array.isArray(rawValue)) {
|
|
2261
|
+
throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessors must be an array`);
|
|
2262
|
+
}
|
|
2263
|
+
const preprocessors = [];
|
|
2264
|
+
for (const rawEntry of rawValue) {
|
|
2265
|
+
if (!isJsonObject2(rawEntry)) {
|
|
2266
|
+
throw new Error(
|
|
2267
|
+
`Evaluator '${evaluatorName}' in '${evalId}': each preprocessor must be an object`
|
|
2268
|
+
);
|
|
2269
|
+
}
|
|
2270
|
+
const type = asString(rawEntry.type)?.trim();
|
|
2271
|
+
if (!type) {
|
|
2272
|
+
throw new Error(`Evaluator '${evaluatorName}' in '${evalId}': preprocessor.type is required`);
|
|
2273
|
+
}
|
|
2274
|
+
const command = asStringArray(
|
|
2275
|
+
rawEntry.command,
|
|
2276
|
+
`preprocessor command for evaluator '${evaluatorName}' in '${evalId}'`
|
|
2277
|
+
);
|
|
2278
|
+
if (!command || command.length === 0) {
|
|
2279
|
+
throw new Error(
|
|
2280
|
+
`Evaluator '${evaluatorName}' in '${evalId}': preprocessor '${type}' requires command`
|
|
2281
|
+
);
|
|
2282
|
+
}
|
|
2283
|
+
const commandPath = command[command.length - 1];
|
|
2284
|
+
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
2285
|
+
if (!resolved.resolvedPath) {
|
|
2286
|
+
throw new Error(
|
|
2287
|
+
`Evaluator '${evaluatorName}' in '${evalId}': preprocessor command file not found: ${resolved.displayPath}`
|
|
2288
|
+
);
|
|
2289
|
+
}
|
|
2290
|
+
preprocessors.push({
|
|
2291
|
+
type,
|
|
2292
|
+
command,
|
|
2293
|
+
resolvedCommand: [...command.slice(0, -1), path5.resolve(resolved.resolvedPath)]
|
|
2294
|
+
});
|
|
2295
|
+
}
|
|
2296
|
+
return preprocessors;
|
|
2297
|
+
}
|
|
2298
|
+
function generateAssertionName(typeValue, rawEvaluator) {
|
|
1907
2299
|
const value = asString(rawEvaluator.value);
|
|
1908
2300
|
const arrayValue = Array.isArray(rawEvaluator.value) ? rawEvaluator.value : void 0;
|
|
1909
2301
|
switch (typeValue) {
|
|
@@ -1936,7 +2328,7 @@ function generateAssertionName(typeValue, rawEvaluator) {
|
|
|
1936
2328
|
case "rubrics":
|
|
1937
2329
|
return "rubrics";
|
|
1938
2330
|
default:
|
|
1939
|
-
return
|
|
2331
|
+
return typeValue;
|
|
1940
2332
|
}
|
|
1941
2333
|
}
|
|
1942
2334
|
function coerceEvaluator(candidate, contextId) {
|
|
@@ -2294,14 +2686,14 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
2294
2686
|
}
|
|
2295
2687
|
|
|
2296
2688
|
// src/evaluation/loaders/jsonl-parser.ts
|
|
2297
|
-
import { readFile as
|
|
2298
|
-
import
|
|
2689
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
2690
|
+
import path7 from "node:path";
|
|
2299
2691
|
import micromatch from "micromatch";
|
|
2300
2692
|
import { parse as parseYaml } from "yaml";
|
|
2301
2693
|
|
|
2302
2694
|
// src/evaluation/loaders/message-processor.ts
|
|
2303
|
-
import { readFile as
|
|
2304
|
-
import
|
|
2695
|
+
import { readFile as readFile6 } from "node:fs/promises";
|
|
2696
|
+
import path6 from "node:path";
|
|
2305
2697
|
|
|
2306
2698
|
// src/evaluation/formatting/segment-formatter.ts
|
|
2307
2699
|
function formatFileContents(parts) {
|
|
@@ -2367,7 +2759,7 @@ var IMAGE_MEDIA_TYPES = {
|
|
|
2367
2759
|
".bmp": "image/bmp"
|
|
2368
2760
|
};
|
|
2369
2761
|
function detectImageMediaType(filePath) {
|
|
2370
|
-
const ext =
|
|
2762
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
2371
2763
|
return IMAGE_MEDIA_TYPES[ext];
|
|
2372
2764
|
}
|
|
2373
2765
|
var ANSI_YELLOW4 = "\x1B[33m";
|
|
@@ -2417,12 +2809,12 @@ async function processMessages(options) {
|
|
|
2417
2809
|
continue;
|
|
2418
2810
|
}
|
|
2419
2811
|
try {
|
|
2420
|
-
const fileContent = (await
|
|
2812
|
+
const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2421
2813
|
processedContent.push({
|
|
2422
2814
|
...cloneJsonObject(rawSegment),
|
|
2423
2815
|
path: displayPath,
|
|
2424
2816
|
text: fileContent,
|
|
2425
|
-
resolvedPath:
|
|
2817
|
+
resolvedPath: path6.resolve(resolvedPath)
|
|
2426
2818
|
});
|
|
2427
2819
|
if (verbose) {
|
|
2428
2820
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -2458,7 +2850,7 @@ async function processMessages(options) {
|
|
|
2458
2850
|
continue;
|
|
2459
2851
|
}
|
|
2460
2852
|
try {
|
|
2461
|
-
const imageBuffer = await
|
|
2853
|
+
const imageBuffer = await readFile6(resolvedPath);
|
|
2462
2854
|
const base64 = imageBuffer.toString("base64");
|
|
2463
2855
|
processedContent.push({
|
|
2464
2856
|
type: "image",
|
|
@@ -2535,12 +2927,12 @@ async function processExpectedMessages(options) {
|
|
|
2535
2927
|
continue;
|
|
2536
2928
|
}
|
|
2537
2929
|
try {
|
|
2538
|
-
const fileContent = (await
|
|
2930
|
+
const fileContent = (await readFile6(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
2539
2931
|
processedContent.push({
|
|
2540
2932
|
type: "file",
|
|
2541
2933
|
path: displayPath,
|
|
2542
2934
|
text: fileContent,
|
|
2543
|
-
resolvedPath:
|
|
2935
|
+
resolvedPath: path6.resolve(resolvedPath)
|
|
2544
2936
|
});
|
|
2545
2937
|
if (verbose) {
|
|
2546
2938
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -2575,7 +2967,7 @@ async function processExpectedMessages(options) {
|
|
|
2575
2967
|
continue;
|
|
2576
2968
|
}
|
|
2577
2969
|
try {
|
|
2578
|
-
const imageBuffer = await
|
|
2970
|
+
const imageBuffer = await readFile6(resolvedPath);
|
|
2579
2971
|
const base64 = imageBuffer.toString("base64");
|
|
2580
2972
|
processedContent.push({
|
|
2581
2973
|
type: "image",
|
|
@@ -2684,7 +3076,7 @@ function matchesFilter(id, filter) {
|
|
|
2684
3076
|
return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
|
|
2685
3077
|
}
|
|
2686
3078
|
function detectFormat(filePath) {
|
|
2687
|
-
const ext =
|
|
3079
|
+
const ext = path7.extname(filePath).toLowerCase();
|
|
2688
3080
|
if (ext === ".jsonl") return "jsonl";
|
|
2689
3081
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
2690
3082
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -2693,9 +3085,9 @@ function detectFormat(filePath) {
|
|
|
2693
3085
|
);
|
|
2694
3086
|
}
|
|
2695
3087
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
2696
|
-
const dir =
|
|
2697
|
-
const base =
|
|
2698
|
-
const sidecarPath =
|
|
3088
|
+
const dir = path7.dirname(jsonlPath);
|
|
3089
|
+
const base = path7.basename(jsonlPath, ".jsonl");
|
|
3090
|
+
const sidecarPath = path7.join(dir, `${base}.yaml`);
|
|
2699
3091
|
if (!await fileExists2(sidecarPath)) {
|
|
2700
3092
|
if (verbose) {
|
|
2701
3093
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -2703,7 +3095,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
2703
3095
|
return {};
|
|
2704
3096
|
}
|
|
2705
3097
|
try {
|
|
2706
|
-
const content = await
|
|
3098
|
+
const content = await readFile7(sidecarPath, "utf8");
|
|
2707
3099
|
const parsed = interpolateEnv(parseYaml(content), process.env);
|
|
2708
3100
|
if (!isJsonObject(parsed)) {
|
|
2709
3101
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
@@ -2744,13 +3136,13 @@ function parseJsonlContent(content, filePath) {
|
|
|
2744
3136
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
2745
3137
|
const verbose = options?.verbose ?? false;
|
|
2746
3138
|
const filterPattern = options?.filter;
|
|
2747
|
-
const absoluteTestPath =
|
|
3139
|
+
const absoluteTestPath = path7.resolve(evalFilePath);
|
|
2748
3140
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
2749
3141
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
2750
3142
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
2751
|
-
const rawFile = await
|
|
3143
|
+
const rawFile = await readFile7(absoluteTestPath, "utf8");
|
|
2752
3144
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
2753
|
-
const fallbackSuiteName =
|
|
3145
|
+
const fallbackSuiteName = path7.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
2754
3146
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
2755
3147
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
2756
3148
|
const globalExecution = sidecar.execution;
|
|
@@ -2930,11 +3322,13 @@ function parseRepoCheckout(raw) {
|
|
|
2930
3322
|
if (!isJsonObject(raw)) return void 0;
|
|
2931
3323
|
const obj = raw;
|
|
2932
3324
|
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
3325
|
+
const baseCommit = typeof obj.base_commit === "string" ? obj.base_commit : void 0;
|
|
2933
3326
|
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
2934
3327
|
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
2935
|
-
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
3328
|
+
if (!ref && !baseCommit && !resolve && ancestor === void 0) return void 0;
|
|
2936
3329
|
return {
|
|
2937
3330
|
...ref !== void 0 && { ref },
|
|
3331
|
+
...baseCommit !== void 0 && { base_commit: baseCommit },
|
|
2938
3332
|
...resolve !== void 0 && { resolve },
|
|
2939
3333
|
...ancestor !== void 0 && { ancestor }
|
|
2940
3334
|
};
|
|
@@ -2957,12 +3351,12 @@ function parseRepoConfig(raw) {
|
|
|
2957
3351
|
const obj = raw;
|
|
2958
3352
|
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
2959
3353
|
const source = parseRepoSource(obj.source);
|
|
2960
|
-
if (!repoPath || !source) return void 0;
|
|
2961
3354
|
const checkout = parseRepoCheckout(obj.checkout);
|
|
2962
3355
|
const clone = parseRepoClone(obj.clone);
|
|
3356
|
+
if (!repoPath && !source && !checkout && !clone) return void 0;
|
|
2963
3357
|
return {
|
|
2964
|
-
path: repoPath,
|
|
2965
|
-
source,
|
|
3358
|
+
...repoPath !== void 0 && { path: repoPath },
|
|
3359
|
+
...source !== void 0 && { source },
|
|
2966
3360
|
...checkout !== void 0 && { checkout },
|
|
2967
3361
|
...clone !== void 0 && { clone }
|
|
2968
3362
|
};
|
|
@@ -3013,7 +3407,8 @@ ${messageContent}`);
|
|
|
3013
3407
|
segmentsByMessage,
|
|
3014
3408
|
mode
|
|
3015
3409
|
}) : void 0;
|
|
3016
|
-
|
|
3410
|
+
const systemMessage = extractSystemMessage(testCase.input, segmentsByMessage, mode);
|
|
3411
|
+
return { question, chatPrompt, systemMessage };
|
|
3017
3412
|
}
|
|
3018
3413
|
function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
3019
3414
|
if (messages.some((msg) => msg.role === "assistant" || msg.role === "tool")) {
|
|
@@ -3027,6 +3422,26 @@ function needsRoleMarkers(messages, processedSegmentsByMessage) {
|
|
|
3027
3422
|
}
|
|
3028
3423
|
return messagesWithContent > 1;
|
|
3029
3424
|
}
|
|
3425
|
+
function extractSystemMessage(messages, segmentsByMessage, mode) {
|
|
3426
|
+
const systemParts = [];
|
|
3427
|
+
for (let i = 0; i < messages.length; i++) {
|
|
3428
|
+
if (messages[i].role !== "system") {
|
|
3429
|
+
break;
|
|
3430
|
+
}
|
|
3431
|
+
const segments = segmentsByMessage[i];
|
|
3432
|
+
const contentParts = [];
|
|
3433
|
+
for (const segment of segments) {
|
|
3434
|
+
const formatted = formatSegment(segment, mode);
|
|
3435
|
+
if (formatted) {
|
|
3436
|
+
contentParts.push(formatted);
|
|
3437
|
+
}
|
|
3438
|
+
}
|
|
3439
|
+
if (contentParts.length > 0) {
|
|
3440
|
+
systemParts.push(contentParts.join("\n"));
|
|
3441
|
+
}
|
|
3442
|
+
}
|
|
3443
|
+
return systemParts.length > 0 ? systemParts.join("\n\n") : void 0;
|
|
3444
|
+
}
|
|
3030
3445
|
function buildChatPromptFromSegments(options) {
|
|
3031
3446
|
const { messages, segmentsByMessage, systemPrompt, mode = "lm" } = options;
|
|
3032
3447
|
if (messages.length === 0) {
|
|
@@ -3109,9 +3524,9 @@ function resolveTests(suite) {
|
|
|
3109
3524
|
}
|
|
3110
3525
|
async function readTestSuiteMetadata(testFilePath) {
|
|
3111
3526
|
try {
|
|
3112
|
-
const absolutePath =
|
|
3113
|
-
const content = await
|
|
3114
|
-
const parsed = interpolateEnv(
|
|
3527
|
+
const absolutePath = path8.resolve(testFilePath);
|
|
3528
|
+
const content = await readFile8(absolutePath, "utf8");
|
|
3529
|
+
const parsed = interpolateEnv(parse3(content), process.env);
|
|
3115
3530
|
if (!isJsonObject(parsed)) {
|
|
3116
3531
|
return {};
|
|
3117
3532
|
}
|
|
@@ -3164,25 +3579,31 @@ var loadEvalCases = loadTests;
|
|
|
3164
3579
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
3165
3580
|
const verbose = options?.verbose ?? false;
|
|
3166
3581
|
const filterPattern = options?.filter;
|
|
3167
|
-
const absoluteTestPath =
|
|
3582
|
+
const absoluteTestPath = path8.resolve(evalFilePath);
|
|
3168
3583
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3169
3584
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
3170
3585
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
3171
|
-
const rawFile = await
|
|
3172
|
-
const interpolated = interpolateEnv(
|
|
3586
|
+
const rawFile = await readFile8(absoluteTestPath, "utf8");
|
|
3587
|
+
const interpolated = interpolateEnv(parse3(rawFile), process.env);
|
|
3173
3588
|
if (!isJsonObject(interpolated)) {
|
|
3174
3589
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
3175
3590
|
}
|
|
3176
3591
|
const suite = interpolated;
|
|
3177
3592
|
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
3178
|
-
const fallbackSuiteName =
|
|
3593
|
+
const fallbackSuiteName = path8.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
3179
3594
|
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
3180
3595
|
const rawTestCases = resolveTests(suite);
|
|
3181
3596
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
3182
|
-
const
|
|
3597
|
+
const suitePreprocessors = await parsePreprocessors(
|
|
3598
|
+
suite.preprocessors,
|
|
3599
|
+
searchRoots,
|
|
3600
|
+
"<suite>",
|
|
3601
|
+
absoluteTestPath
|
|
3602
|
+
);
|
|
3603
|
+
const evalFileDir = path8.dirname(absoluteTestPath);
|
|
3183
3604
|
let expandedTestCases;
|
|
3184
3605
|
if (typeof rawTestCases === "string") {
|
|
3185
|
-
const externalPath =
|
|
3606
|
+
const externalPath = path8.resolve(evalFileDir, rawTestCases);
|
|
3186
3607
|
expandedTestCases = await loadCasesFromFile(externalPath);
|
|
3187
3608
|
} else if (Array.isArray(rawTestCases)) {
|
|
3188
3609
|
expandedTestCases = await expandFileReferences(rawTestCases, evalFileDir);
|
|
@@ -3280,7 +3701,8 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3280
3701
|
testCaseConfig,
|
|
3281
3702
|
globalExecution,
|
|
3282
3703
|
searchRoots,
|
|
3283
|
-
id ?? "unknown"
|
|
3704
|
+
id ?? "unknown",
|
|
3705
|
+
suitePreprocessors
|
|
3284
3706
|
);
|
|
3285
3707
|
} catch (error) {
|
|
3286
3708
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -3303,7 +3725,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3303
3725
|
const testCase = {
|
|
3304
3726
|
id,
|
|
3305
3727
|
suite: suiteName,
|
|
3306
|
-
category: options?.category,
|
|
3728
|
+
category: suite.category ?? options?.category,
|
|
3307
3729
|
conversation_id: conversationId,
|
|
3308
3730
|
question,
|
|
3309
3731
|
input: inputMessages,
|
|
@@ -3313,6 +3735,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3313
3735
|
criteria: outcome ?? "",
|
|
3314
3736
|
evaluator: testCaseEvaluatorKind,
|
|
3315
3737
|
assertions: evaluators,
|
|
3738
|
+
...suitePreprocessors ? { preprocessors: suitePreprocessors } : {},
|
|
3316
3739
|
workspace: mergedWorkspace,
|
|
3317
3740
|
metadata,
|
|
3318
3741
|
targets: caseTargets,
|
|
@@ -3353,8 +3776,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3353
3776
|
if (!command) return void 0;
|
|
3354
3777
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
3355
3778
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
3356
|
-
if (cwd && !
|
|
3357
|
-
cwd =
|
|
3779
|
+
if (cwd && !path8.isAbsolute(cwd)) {
|
|
3780
|
+
cwd = path8.resolve(evalFileDir, cwd);
|
|
3358
3781
|
}
|
|
3359
3782
|
const config = { command };
|
|
3360
3783
|
if (timeoutMs !== void 0) {
|
|
@@ -3392,20 +3815,20 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
3392
3815
|
}
|
|
3393
3816
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
3394
3817
|
if (typeof raw === "string") {
|
|
3395
|
-
const workspaceFilePath =
|
|
3818
|
+
const workspaceFilePath = path8.resolve(evalFileDir, raw);
|
|
3396
3819
|
let content;
|
|
3397
3820
|
try {
|
|
3398
|
-
content = await
|
|
3821
|
+
content = await readFile8(workspaceFilePath, "utf8");
|
|
3399
3822
|
} catch {
|
|
3400
3823
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
3401
3824
|
}
|
|
3402
|
-
const parsed = interpolateEnv(
|
|
3825
|
+
const parsed = interpolateEnv(parse3(content), process.env);
|
|
3403
3826
|
if (!isJsonObject(parsed)) {
|
|
3404
3827
|
throw new Error(
|
|
3405
3828
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
3406
3829
|
);
|
|
3407
3830
|
}
|
|
3408
|
-
const workspaceFileDir =
|
|
3831
|
+
const workspaceFileDir = path8.dirname(workspaceFilePath);
|
|
3409
3832
|
return parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
3410
3833
|
}
|
|
3411
3834
|
return parseWorkspaceConfig(raw, evalFileDir);
|
|
@@ -3425,8 +3848,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3425
3848
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
3426
3849
|
}
|
|
3427
3850
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
3428
|
-
if (template && !
|
|
3429
|
-
template =
|
|
3851
|
+
if (template && !path8.isAbsolute(template)) {
|
|
3852
|
+
template = path8.resolve(evalFileDir, template);
|
|
3430
3853
|
}
|
|
3431
3854
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
3432
3855
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -3434,14 +3857,28 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
3434
3857
|
const explicitMode = obj.mode === "pooled" || obj.mode === "temp" || obj.mode === "static" ? obj.mode : void 0;
|
|
3435
3858
|
const workspacePath = typeof obj.path === "string" ? obj.path : void 0;
|
|
3436
3859
|
const mode = explicitMode ?? (workspacePath ? "static" : void 0);
|
|
3437
|
-
|
|
3860
|
+
const docker = parseDockerWorkspaceConfig(obj.docker);
|
|
3861
|
+
if (!template && !isolation && !repos && !hooks && !mode && !workspacePath && !docker)
|
|
3862
|
+
return void 0;
|
|
3438
3863
|
return {
|
|
3439
3864
|
...template !== void 0 && { template },
|
|
3440
3865
|
...isolation !== void 0 && { isolation },
|
|
3441
3866
|
...repos !== void 0 && { repos },
|
|
3442
3867
|
...hooks !== void 0 && { hooks },
|
|
3443
3868
|
...mode !== void 0 && { mode },
|
|
3444
|
-
...workspacePath !== void 0 && { path: workspacePath }
|
|
3869
|
+
...workspacePath !== void 0 && { path: workspacePath },
|
|
3870
|
+
...docker !== void 0 && { docker }
|
|
3871
|
+
};
|
|
3872
|
+
}
|
|
3873
|
+
function parseDockerWorkspaceConfig(raw) {
|
|
3874
|
+
if (!isJsonObject(raw)) return void 0;
|
|
3875
|
+
const obj = raw;
|
|
3876
|
+
if (typeof obj.image !== "string") return void 0;
|
|
3877
|
+
return {
|
|
3878
|
+
image: obj.image,
|
|
3879
|
+
...typeof obj.timeout === "number" && { timeout: obj.timeout },
|
|
3880
|
+
...typeof obj.memory === "string" && { memory: obj.memory },
|
|
3881
|
+
...typeof obj.cpus === "number" && { cpus: obj.cpus }
|
|
3445
3882
|
};
|
|
3446
3883
|
}
|
|
3447
3884
|
function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
@@ -3470,7 +3907,8 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
3470
3907
|
repos: caseLevel.repos ?? suiteLevel.repos,
|
|
3471
3908
|
...hasHooks && { hooks: mergedHooks },
|
|
3472
3909
|
mode: caseLevel.mode ?? suiteLevel.mode,
|
|
3473
|
-
path: caseLevel.path ?? suiteLevel.path
|
|
3910
|
+
path: caseLevel.path ?? suiteLevel.path,
|
|
3911
|
+
docker: caseLevel.docker ?? suiteLevel.docker
|
|
3474
3912
|
};
|
|
3475
3913
|
}
|
|
3476
3914
|
function asString5(value) {
|
|
@@ -3497,8 +3935,8 @@ ${detailBlock}${ANSI_RESET7}`);
|
|
|
3497
3935
|
|
|
3498
3936
|
// src/evaluation/loaders/eval-yaml-transpiler.ts
|
|
3499
3937
|
import { readFileSync } from "node:fs";
|
|
3500
|
-
import
|
|
3501
|
-
import { parse as
|
|
3938
|
+
import path9 from "node:path";
|
|
3939
|
+
import { parse as parse4 } from "yaml";
|
|
3502
3940
|
function codeGraderInstruction(graderName, description) {
|
|
3503
3941
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
3504
3942
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -3737,8 +4175,8 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
3737
4175
|
}
|
|
3738
4176
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
3739
4177
|
const content = readFileSync(evalYamlPath, "utf8");
|
|
3740
|
-
const parsed =
|
|
3741
|
-
return transpileEvalYaml(parsed,
|
|
4178
|
+
const parsed = parse4(content);
|
|
4179
|
+
return transpileEvalYaml(parsed, path9.basename(evalYamlPath));
|
|
3742
4180
|
}
|
|
3743
4181
|
function getOutputFilenames(result) {
|
|
3744
4182
|
const names = /* @__PURE__ */ new Map();
|
|
@@ -4176,7 +4614,7 @@ import { spawn } from "node:child_process";
|
|
|
4176
4614
|
import { randomUUID } from "node:crypto";
|
|
4177
4615
|
import { createWriteStream } from "node:fs";
|
|
4178
4616
|
import { mkdir } from "node:fs/promises";
|
|
4179
|
-
import
|
|
4617
|
+
import path11 from "node:path";
|
|
4180
4618
|
|
|
4181
4619
|
// src/evaluation/providers/claude-content.ts
|
|
4182
4620
|
function toContentArray(content) {
|
|
@@ -4275,7 +4713,7 @@ function subscribeToClaudeLogEntries(listener) {
|
|
|
4275
4713
|
}
|
|
4276
4714
|
|
|
4277
4715
|
// src/evaluation/providers/preread.ts
|
|
4278
|
-
import
|
|
4716
|
+
import path10 from "node:path";
|
|
4279
4717
|
function buildPromptDocument(request, inputFiles) {
|
|
4280
4718
|
const parts = [];
|
|
4281
4719
|
const inputFilesList = collectInputFiles(inputFiles);
|
|
@@ -4292,7 +4730,7 @@ function normalizeInputFiles(inputFiles) {
|
|
|
4292
4730
|
}
|
|
4293
4731
|
const deduped = /* @__PURE__ */ new Map();
|
|
4294
4732
|
for (const inputFile of inputFiles) {
|
|
4295
|
-
const absolutePath =
|
|
4733
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4296
4734
|
if (!deduped.has(absolutePath)) {
|
|
4297
4735
|
deduped.set(absolutePath, absolutePath);
|
|
4298
4736
|
}
|
|
@@ -4305,7 +4743,7 @@ function collectInputFiles(inputFiles) {
|
|
|
4305
4743
|
}
|
|
4306
4744
|
const unique = /* @__PURE__ */ new Map();
|
|
4307
4745
|
for (const inputFile of inputFiles) {
|
|
4308
|
-
const absolutePath =
|
|
4746
|
+
const absolutePath = path10.resolve(inputFile);
|
|
4309
4747
|
if (!unique.has(absolutePath)) {
|
|
4310
4748
|
unique.set(absolutePath, absolutePath);
|
|
4311
4749
|
}
|
|
@@ -4317,7 +4755,7 @@ function buildMandatoryPrereadBlock(inputFiles) {
|
|
|
4317
4755
|
return "";
|
|
4318
4756
|
}
|
|
4319
4757
|
const buildList = (files) => files.map((absolutePath) => {
|
|
4320
|
-
const fileName =
|
|
4758
|
+
const fileName = path10.basename(absolutePath);
|
|
4321
4759
|
const fileUri = pathToFileUri(absolutePath);
|
|
4322
4760
|
return `* [${fileName}](${fileUri})`;
|
|
4323
4761
|
});
|
|
@@ -4333,7 +4771,7 @@ ${buildList(inputFiles).join("\n")}.`);
|
|
|
4333
4771
|
return sections.join("\n");
|
|
4334
4772
|
}
|
|
4335
4773
|
function pathToFileUri(filePath) {
|
|
4336
|
-
const absolutePath =
|
|
4774
|
+
const absolutePath = path10.isAbsolute(filePath) ? filePath : path10.resolve(filePath);
|
|
4337
4775
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
4338
4776
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
4339
4777
|
return `file:///${normalizedPath}`;
|
|
@@ -4481,10 +4919,10 @@ var ClaudeCliProvider = class {
|
|
|
4481
4919
|
}
|
|
4482
4920
|
resolveCwd(cwdOverride) {
|
|
4483
4921
|
if (cwdOverride) {
|
|
4484
|
-
return
|
|
4922
|
+
return path11.resolve(cwdOverride);
|
|
4485
4923
|
}
|
|
4486
4924
|
if (this.config.cwd) {
|
|
4487
|
-
return
|
|
4925
|
+
return path11.resolve(this.config.cwd);
|
|
4488
4926
|
}
|
|
4489
4927
|
return void 0;
|
|
4490
4928
|
}
|
|
@@ -4494,9 +4932,9 @@ var ClaudeCliProvider = class {
|
|
|
4494
4932
|
return void 0;
|
|
4495
4933
|
}
|
|
4496
4934
|
if (this.config.logDir) {
|
|
4497
|
-
return
|
|
4935
|
+
return path11.resolve(this.config.logDir);
|
|
4498
4936
|
}
|
|
4499
|
-
return
|
|
4937
|
+
return path11.join(process.cwd(), ".agentv", "logs", "claude-cli");
|
|
4500
4938
|
}
|
|
4501
4939
|
async createStreamLogger(request) {
|
|
4502
4940
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4510,7 +4948,7 @@ var ClaudeCliProvider = class {
|
|
|
4510
4948
|
console.warn(`Skipping Claude CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4511
4949
|
return void 0;
|
|
4512
4950
|
}
|
|
4513
|
-
const filePath =
|
|
4951
|
+
const filePath = path11.join(logDir, buildLogFilename(request, this.targetName));
|
|
4514
4952
|
try {
|
|
4515
4953
|
const logger = await ClaudeCliStreamLogger.create({
|
|
4516
4954
|
filePath,
|
|
@@ -4812,7 +5250,7 @@ function tryParseJson(line) {
|
|
|
4812
5250
|
import { randomUUID as randomUUID2 } from "node:crypto";
|
|
4813
5251
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
4814
5252
|
import { mkdir as mkdir2 } from "node:fs/promises";
|
|
4815
|
-
import
|
|
5253
|
+
import path12 from "node:path";
|
|
4816
5254
|
var claudeSdkModule = null;
|
|
4817
5255
|
async function loadClaudeSdk() {
|
|
4818
5256
|
if (!claudeSdkModule) {
|
|
@@ -4973,10 +5411,10 @@ var ClaudeSdkProvider = class {
|
|
|
4973
5411
|
}
|
|
4974
5412
|
resolveCwd(cwdOverride) {
|
|
4975
5413
|
if (cwdOverride) {
|
|
4976
|
-
return
|
|
5414
|
+
return path12.resolve(cwdOverride);
|
|
4977
5415
|
}
|
|
4978
5416
|
if (this.config.cwd) {
|
|
4979
|
-
return
|
|
5417
|
+
return path12.resolve(this.config.cwd);
|
|
4980
5418
|
}
|
|
4981
5419
|
return void 0;
|
|
4982
5420
|
}
|
|
@@ -4986,9 +5424,9 @@ var ClaudeSdkProvider = class {
|
|
|
4986
5424
|
return void 0;
|
|
4987
5425
|
}
|
|
4988
5426
|
if (this.config.logDir) {
|
|
4989
|
-
return
|
|
5427
|
+
return path12.resolve(this.config.logDir);
|
|
4990
5428
|
}
|
|
4991
|
-
return
|
|
5429
|
+
return path12.join(process.cwd(), ".agentv", "logs", "claude");
|
|
4992
5430
|
}
|
|
4993
5431
|
async createStreamLogger(request) {
|
|
4994
5432
|
const logDir = this.resolveLogDirectory();
|
|
@@ -5002,7 +5440,7 @@ var ClaudeSdkProvider = class {
|
|
|
5002
5440
|
console.warn(`Skipping Claude stream logging (could not create ${logDir}): ${message}`);
|
|
5003
5441
|
return void 0;
|
|
5004
5442
|
}
|
|
5005
|
-
const filePath =
|
|
5443
|
+
const filePath = path12.join(logDir, buildLogFilename2(request, this.targetName));
|
|
5006
5444
|
try {
|
|
5007
5445
|
const logger = await ClaudeStreamLogger.create({
|
|
5008
5446
|
filePath,
|
|
@@ -5190,7 +5628,7 @@ function formatElapsed2(startedAt) {
|
|
|
5190
5628
|
import { exec as execWithCallback } from "node:child_process";
|
|
5191
5629
|
import fs from "node:fs/promises";
|
|
5192
5630
|
import os from "node:os";
|
|
5193
|
-
import
|
|
5631
|
+
import path13 from "node:path";
|
|
5194
5632
|
import { promisify } from "node:util";
|
|
5195
5633
|
import { z as z2 } from "zod";
|
|
5196
5634
|
var ToolCallSchema = z2.object({
|
|
@@ -5693,7 +6131,7 @@ function normalizeInputFiles2(inputFiles) {
|
|
|
5693
6131
|
}
|
|
5694
6132
|
const unique = /* @__PURE__ */ new Map();
|
|
5695
6133
|
for (const inputFile of inputFiles) {
|
|
5696
|
-
const absolutePath =
|
|
6134
|
+
const absolutePath = path13.resolve(inputFile);
|
|
5697
6135
|
if (!unique.has(absolutePath)) {
|
|
5698
6136
|
unique.set(absolutePath, absolutePath);
|
|
5699
6137
|
}
|
|
@@ -5707,7 +6145,7 @@ function formatFileList(files, template) {
|
|
|
5707
6145
|
const formatter = template ?? "{path}";
|
|
5708
6146
|
return files.map((filePath) => {
|
|
5709
6147
|
const escapedPath = shellEscape(filePath);
|
|
5710
|
-
const escapedName = shellEscape(
|
|
6148
|
+
const escapedName = shellEscape(path13.basename(filePath));
|
|
5711
6149
|
return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
|
|
5712
6150
|
}).join(" ");
|
|
5713
6151
|
}
|
|
@@ -5731,7 +6169,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
|
|
|
5731
6169
|
const safeEvalId = evalCaseId || "unknown";
|
|
5732
6170
|
const timestamp = Date.now();
|
|
5733
6171
|
const random = Math.random().toString(36).substring(2, 9);
|
|
5734
|
-
return
|
|
6172
|
+
return path13.join(os.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
|
|
5735
6173
|
}
|
|
5736
6174
|
function formatTimeoutSuffix2(timeoutMs) {
|
|
5737
6175
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
@@ -5745,7 +6183,7 @@ function formatTimeoutSuffix2(timeoutMs) {
|
|
|
5745
6183
|
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
5746
6184
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
5747
6185
|
import { mkdir as mkdir3 } from "node:fs/promises";
|
|
5748
|
-
import
|
|
6186
|
+
import path14 from "node:path";
|
|
5749
6187
|
|
|
5750
6188
|
// src/evaluation/providers/codex-log-tracker.ts
|
|
5751
6189
|
var GLOBAL_LOGS_KEY2 = Symbol.for("agentv.codexLogs");
|
|
@@ -5980,10 +6418,10 @@ ${basePrompt}` : basePrompt;
|
|
|
5980
6418
|
}
|
|
5981
6419
|
resolveCwd(cwdOverride) {
|
|
5982
6420
|
if (cwdOverride) {
|
|
5983
|
-
return
|
|
6421
|
+
return path14.resolve(cwdOverride);
|
|
5984
6422
|
}
|
|
5985
6423
|
if (this.config.cwd) {
|
|
5986
|
-
return
|
|
6424
|
+
return path14.resolve(this.config.cwd);
|
|
5987
6425
|
}
|
|
5988
6426
|
return void 0;
|
|
5989
6427
|
}
|
|
@@ -5993,9 +6431,9 @@ ${basePrompt}` : basePrompt;
|
|
|
5993
6431
|
return void 0;
|
|
5994
6432
|
}
|
|
5995
6433
|
if (this.config.logDir) {
|
|
5996
|
-
return
|
|
6434
|
+
return path14.resolve(this.config.logDir);
|
|
5997
6435
|
}
|
|
5998
|
-
return
|
|
6436
|
+
return path14.join(process.cwd(), ".agentv", "logs", "codex");
|
|
5999
6437
|
}
|
|
6000
6438
|
async createStreamLogger(request) {
|
|
6001
6439
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6009,7 +6447,7 @@ ${basePrompt}` : basePrompt;
|
|
|
6009
6447
|
console.warn(`Skipping Codex SDK stream logging (could not create ${logDir}): ${message}`);
|
|
6010
6448
|
return void 0;
|
|
6011
6449
|
}
|
|
6012
|
-
const filePath =
|
|
6450
|
+
const filePath = path14.join(logDir, buildLogFilename3(request, this.targetName));
|
|
6013
6451
|
try {
|
|
6014
6452
|
const logger = await CodexSdkStreamLogger.create({
|
|
6015
6453
|
filePath,
|
|
@@ -6153,7 +6591,7 @@ function formatElapsed3(startedAt) {
|
|
|
6153
6591
|
// src/evaluation/providers/copilot-cli.ts
|
|
6154
6592
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
6155
6593
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
6156
|
-
import
|
|
6594
|
+
import path16 from "node:path";
|
|
6157
6595
|
import { Readable, Writable } from "node:stream";
|
|
6158
6596
|
import { spawn as spawn2 } from "node:child_process";
|
|
6159
6597
|
import * as acp from "@agentclientprotocol/sdk";
|
|
@@ -6215,10 +6653,10 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6215
6653
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6216
6654
|
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6217
6655
|
import { arch, platform } from "node:os";
|
|
6218
|
-
import
|
|
6219
|
-
import { fileURLToPath as
|
|
6656
|
+
import path15 from "node:path";
|
|
6657
|
+
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
6220
6658
|
function resolvePlatformCliPath() {
|
|
6221
|
-
const
|
|
6659
|
+
const os4 = platform();
|
|
6222
6660
|
const cpu = arch();
|
|
6223
6661
|
const platformMap = {
|
|
6224
6662
|
linux: "linux",
|
|
@@ -6229,17 +6667,17 @@ function resolvePlatformCliPath() {
|
|
|
6229
6667
|
x64: "x64",
|
|
6230
6668
|
arm64: "arm64"
|
|
6231
6669
|
};
|
|
6232
|
-
const osPart = platformMap[
|
|
6670
|
+
const osPart = platformMap[os4];
|
|
6233
6671
|
const archPart = archMap[cpu];
|
|
6234
6672
|
if (!osPart || !archPart) {
|
|
6235
6673
|
return void 0;
|
|
6236
6674
|
}
|
|
6237
6675
|
const packageName = `@github/copilot-${osPart}-${archPart}`;
|
|
6238
|
-
const binaryName =
|
|
6676
|
+
const binaryName = os4 === "win32" ? "copilot.exe" : "copilot";
|
|
6239
6677
|
try {
|
|
6240
6678
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6241
|
-
const packageJsonPath = resolved.startsWith("file:") ?
|
|
6242
|
-
const binaryPath =
|
|
6679
|
+
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
|
|
6680
|
+
const binaryPath = path15.join(path15.dirname(packageJsonPath), binaryName);
|
|
6243
6681
|
if (existsSync(binaryPath)) {
|
|
6244
6682
|
return binaryPath;
|
|
6245
6683
|
}
|
|
@@ -6247,7 +6685,7 @@ function resolvePlatformCliPath() {
|
|
|
6247
6685
|
}
|
|
6248
6686
|
let searchDir = process.cwd();
|
|
6249
6687
|
for (let i = 0; i < 10; i++) {
|
|
6250
|
-
const standardPath =
|
|
6688
|
+
const standardPath = path15.join(
|
|
6251
6689
|
searchDir,
|
|
6252
6690
|
"node_modules",
|
|
6253
6691
|
...packageName.split("/"),
|
|
@@ -6256,13 +6694,13 @@ function resolvePlatformCliPath() {
|
|
|
6256
6694
|
if (existsSync(standardPath)) {
|
|
6257
6695
|
return standardPath;
|
|
6258
6696
|
}
|
|
6259
|
-
const bunDir =
|
|
6697
|
+
const bunDir = path15.join(searchDir, "node_modules", ".bun");
|
|
6260
6698
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6261
6699
|
try {
|
|
6262
6700
|
const entries = readdirSync(bunDir);
|
|
6263
6701
|
for (const entry of entries) {
|
|
6264
6702
|
if (entry.startsWith(prefix)) {
|
|
6265
|
-
const candidate =
|
|
6703
|
+
const candidate = path15.join(
|
|
6266
6704
|
bunDir,
|
|
6267
6705
|
entry,
|
|
6268
6706
|
"node_modules",
|
|
@@ -6277,7 +6715,7 @@ function resolvePlatformCliPath() {
|
|
|
6277
6715
|
}
|
|
6278
6716
|
} catch {
|
|
6279
6717
|
}
|
|
6280
|
-
const parent =
|
|
6718
|
+
const parent = path15.dirname(searchDir);
|
|
6281
6719
|
if (parent === searchDir) break;
|
|
6282
6720
|
searchDir = parent;
|
|
6283
6721
|
}
|
|
@@ -6621,10 +7059,10 @@ var CopilotCliProvider = class {
|
|
|
6621
7059
|
}
|
|
6622
7060
|
resolveCwd(cwdOverride) {
|
|
6623
7061
|
if (cwdOverride) {
|
|
6624
|
-
return
|
|
7062
|
+
return path16.resolve(cwdOverride);
|
|
6625
7063
|
}
|
|
6626
7064
|
if (this.config.cwd) {
|
|
6627
|
-
return
|
|
7065
|
+
return path16.resolve(this.config.cwd);
|
|
6628
7066
|
}
|
|
6629
7067
|
return void 0;
|
|
6630
7068
|
}
|
|
@@ -6643,9 +7081,9 @@ var CopilotCliProvider = class {
|
|
|
6643
7081
|
return void 0;
|
|
6644
7082
|
}
|
|
6645
7083
|
if (this.config.logDir) {
|
|
6646
|
-
return
|
|
7084
|
+
return path16.resolve(this.config.logDir);
|
|
6647
7085
|
}
|
|
6648
|
-
return
|
|
7086
|
+
return path16.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
6649
7087
|
}
|
|
6650
7088
|
async createStreamLogger(request) {
|
|
6651
7089
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6659,7 +7097,7 @@ var CopilotCliProvider = class {
|
|
|
6659
7097
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
6660
7098
|
return void 0;
|
|
6661
7099
|
}
|
|
6662
|
-
const filePath =
|
|
7100
|
+
const filePath = path16.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
6663
7101
|
try {
|
|
6664
7102
|
const logger = await CopilotStreamLogger.create(
|
|
6665
7103
|
{
|
|
@@ -6752,9 +7190,9 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6752
7190
|
}
|
|
6753
7191
|
|
|
6754
7192
|
// src/evaluation/providers/copilot-log.ts
|
|
6755
|
-
import { readFile as
|
|
7193
|
+
import { readFile as readFile10 } from "node:fs/promises";
|
|
6756
7194
|
import { homedir as homedir2 } from "node:os";
|
|
6757
|
-
import
|
|
7195
|
+
import path18 from "node:path";
|
|
6758
7196
|
|
|
6759
7197
|
// src/evaluation/providers/copilot-log-parser.ts
|
|
6760
7198
|
function parseCopilotEvents(eventsJsonl) {
|
|
@@ -6886,11 +7324,11 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
6886
7324
|
}
|
|
6887
7325
|
|
|
6888
7326
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
6889
|
-
import { readFile as
|
|
7327
|
+
import { readFile as readFile9, readdir, stat } from "node:fs/promises";
|
|
6890
7328
|
import { homedir } from "node:os";
|
|
6891
|
-
import
|
|
7329
|
+
import path17 from "node:path";
|
|
6892
7330
|
import { parse as parseYaml2 } from "yaml";
|
|
6893
|
-
var DEFAULT_SESSION_STATE_DIR = () =>
|
|
7331
|
+
var DEFAULT_SESSION_STATE_DIR = () => path17.join(homedir(), ".copilot", "session-state");
|
|
6894
7332
|
async function discoverCopilotSessions(opts) {
|
|
6895
7333
|
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
6896
7334
|
const limit = opts?.limit ?? 10;
|
|
@@ -6902,11 +7340,11 @@ async function discoverCopilotSessions(opts) {
|
|
|
6902
7340
|
}
|
|
6903
7341
|
const sessions = [];
|
|
6904
7342
|
for (const entry of entries) {
|
|
6905
|
-
const sessionDir =
|
|
6906
|
-
const workspacePath =
|
|
6907
|
-
const eventsPath =
|
|
7343
|
+
const sessionDir = path17.join(sessionStateDir, entry);
|
|
7344
|
+
const workspacePath = path17.join(sessionDir, "workspace.yaml");
|
|
7345
|
+
const eventsPath = path17.join(sessionDir, "events.jsonl");
|
|
6908
7346
|
try {
|
|
6909
|
-
const workspaceContent = await
|
|
7347
|
+
const workspaceContent = await readFile9(workspacePath, "utf8");
|
|
6910
7348
|
const workspace = parseYaml2(workspaceContent) ?? {};
|
|
6911
7349
|
const cwd = String(workspace.cwd ?? "");
|
|
6912
7350
|
let updatedAt;
|
|
@@ -6965,10 +7403,10 @@ var CopilotLogProvider = class {
|
|
|
6965
7403
|
}
|
|
6966
7404
|
async invoke(_request) {
|
|
6967
7405
|
const sessionDir = await this.resolveSessionDir();
|
|
6968
|
-
const eventsPath =
|
|
7406
|
+
const eventsPath = path18.join(sessionDir, "events.jsonl");
|
|
6969
7407
|
let eventsContent;
|
|
6970
7408
|
try {
|
|
6971
|
-
eventsContent = await
|
|
7409
|
+
eventsContent = await readFile10(eventsPath, "utf8");
|
|
6972
7410
|
} catch (err) {
|
|
6973
7411
|
throw new Error(
|
|
6974
7412
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
@@ -6987,8 +7425,8 @@ var CopilotLogProvider = class {
|
|
|
6987
7425
|
return this.config.sessionDir;
|
|
6988
7426
|
}
|
|
6989
7427
|
if (this.config.sessionId) {
|
|
6990
|
-
const stateDir = this.config.sessionStateDir ??
|
|
6991
|
-
return
|
|
7428
|
+
const stateDir = this.config.sessionStateDir ?? path18.join(homedir2(), ".copilot", "session-state");
|
|
7429
|
+
return path18.join(stateDir, this.config.sessionId);
|
|
6992
7430
|
}
|
|
6993
7431
|
if (this.config.discover === "latest") {
|
|
6994
7432
|
const sessions = await discoverCopilotSessions({
|
|
@@ -7013,7 +7451,7 @@ var CopilotLogProvider = class {
|
|
|
7013
7451
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
7014
7452
|
import { existsSync as existsSync2 } from "node:fs";
|
|
7015
7453
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
7016
|
-
import
|
|
7454
|
+
import path19 from "node:path";
|
|
7017
7455
|
|
|
7018
7456
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
7019
7457
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -7323,10 +7761,10 @@ var CopilotSdkProvider = class {
|
|
|
7323
7761
|
}
|
|
7324
7762
|
resolveCwd(cwdOverride) {
|
|
7325
7763
|
if (cwdOverride) {
|
|
7326
|
-
return
|
|
7764
|
+
return path19.resolve(cwdOverride);
|
|
7327
7765
|
}
|
|
7328
7766
|
if (this.config.cwd) {
|
|
7329
|
-
return
|
|
7767
|
+
return path19.resolve(this.config.cwd);
|
|
7330
7768
|
}
|
|
7331
7769
|
return void 0;
|
|
7332
7770
|
}
|
|
@@ -7335,9 +7773,9 @@ var CopilotSdkProvider = class {
|
|
|
7335
7773
|
return void 0;
|
|
7336
7774
|
}
|
|
7337
7775
|
if (this.config.logDir) {
|
|
7338
|
-
return
|
|
7776
|
+
return path19.resolve(this.config.logDir);
|
|
7339
7777
|
}
|
|
7340
|
-
return
|
|
7778
|
+
return path19.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
7341
7779
|
}
|
|
7342
7780
|
async createStreamLogger(request) {
|
|
7343
7781
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7351,7 +7789,7 @@ var CopilotSdkProvider = class {
|
|
|
7351
7789
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7352
7790
|
return void 0;
|
|
7353
7791
|
}
|
|
7354
|
-
const filePath =
|
|
7792
|
+
const filePath = path19.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
7355
7793
|
try {
|
|
7356
7794
|
const logger = await CopilotStreamLogger.create(
|
|
7357
7795
|
{
|
|
@@ -7380,9 +7818,9 @@ var CopilotSdkProvider = class {
|
|
|
7380
7818
|
};
|
|
7381
7819
|
function resolveSkillDirectories(cwd) {
|
|
7382
7820
|
const candidates = [
|
|
7383
|
-
|
|
7384
|
-
|
|
7385
|
-
|
|
7821
|
+
path19.join(cwd, ".claude", "skills"),
|
|
7822
|
+
path19.join(cwd, ".agents", "skills"),
|
|
7823
|
+
path19.join(cwd, ".codex", "skills")
|
|
7386
7824
|
];
|
|
7387
7825
|
return candidates.filter((dir) => existsSync2(dir));
|
|
7388
7826
|
}
|
|
@@ -7466,7 +7904,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7466
7904
|
import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
|
|
7467
7905
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7468
7906
|
import { tmpdir } from "node:os";
|
|
7469
|
-
import
|
|
7907
|
+
import path20 from "node:path";
|
|
7470
7908
|
|
|
7471
7909
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7472
7910
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -7672,7 +8110,7 @@ var PiCliProvider = class {
|
|
|
7672
8110
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
7673
8111
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7674
8112
|
try {
|
|
7675
|
-
const promptFile =
|
|
8113
|
+
const promptFile = path20.join(cwd, PROMPT_FILENAME);
|
|
7676
8114
|
await writeFile(promptFile, request.question, "utf8");
|
|
7677
8115
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
7678
8116
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
@@ -7735,10 +8173,10 @@ var PiCliProvider = class {
|
|
|
7735
8173
|
}
|
|
7736
8174
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
7737
8175
|
if (cwdOverride) {
|
|
7738
|
-
return
|
|
8176
|
+
return path20.resolve(cwdOverride);
|
|
7739
8177
|
}
|
|
7740
8178
|
if (this.config.cwd) {
|
|
7741
|
-
return
|
|
8179
|
+
return path20.resolve(this.config.cwd);
|
|
7742
8180
|
}
|
|
7743
8181
|
if (workspaceRoot) {
|
|
7744
8182
|
return workspaceRoot;
|
|
@@ -7844,7 +8282,7 @@ ${prompt}` : prompt;
|
|
|
7844
8282
|
return env;
|
|
7845
8283
|
}
|
|
7846
8284
|
async createWorkspace() {
|
|
7847
|
-
return await mkdtemp(
|
|
8285
|
+
return await mkdtemp(path20.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7848
8286
|
}
|
|
7849
8287
|
async cleanupWorkspace(workspaceRoot) {
|
|
7850
8288
|
try {
|
|
@@ -7854,9 +8292,9 @@ ${prompt}` : prompt;
|
|
|
7854
8292
|
}
|
|
7855
8293
|
resolveLogDirectory() {
|
|
7856
8294
|
if (this.config.logDir) {
|
|
7857
|
-
return
|
|
8295
|
+
return path20.resolve(this.config.logDir);
|
|
7858
8296
|
}
|
|
7859
|
-
return
|
|
8297
|
+
return path20.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
7860
8298
|
}
|
|
7861
8299
|
async createStreamLogger(request) {
|
|
7862
8300
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7870,7 +8308,7 @@ ${prompt}` : prompt;
|
|
|
7870
8308
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7871
8309
|
return void 0;
|
|
7872
8310
|
}
|
|
7873
|
-
const filePath =
|
|
8311
|
+
const filePath = path20.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7874
8312
|
try {
|
|
7875
8313
|
const logger = await PiStreamLogger.create({
|
|
7876
8314
|
filePath,
|
|
@@ -8341,8 +8779,8 @@ function resolveWindowsCmd(executable) {
|
|
|
8341
8779
|
const content = readFileSync2(cmdPath, "utf-8");
|
|
8342
8780
|
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
8343
8781
|
if (match) {
|
|
8344
|
-
const dp0 =
|
|
8345
|
-
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${
|
|
8782
|
+
const dp0 = path20.dirname(path20.resolve(cmdPath));
|
|
8783
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path20.sep}`);
|
|
8346
8784
|
try {
|
|
8347
8785
|
accessSync(scriptPath);
|
|
8348
8786
|
return ["node", [scriptPath]];
|
|
@@ -8421,13 +8859,13 @@ import { execSync as execSync2 } from "node:child_process";
|
|
|
8421
8859
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
8422
8860
|
import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
|
|
8423
8861
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
8424
|
-
import
|
|
8862
|
+
import path22 from "node:path";
|
|
8425
8863
|
import { createInterface } from "node:readline";
|
|
8426
|
-
import { fileURLToPath as
|
|
8864
|
+
import { fileURLToPath as fileURLToPath4, pathToFileURL } from "node:url";
|
|
8427
8865
|
|
|
8428
8866
|
// src/paths.ts
|
|
8429
8867
|
import os2 from "node:os";
|
|
8430
|
-
import
|
|
8868
|
+
import path21 from "node:path";
|
|
8431
8869
|
var logged = false;
|
|
8432
8870
|
function getAgentvHome() {
|
|
8433
8871
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8438,19 +8876,19 @@ function getAgentvHome() {
|
|
|
8438
8876
|
}
|
|
8439
8877
|
return envHome;
|
|
8440
8878
|
}
|
|
8441
|
-
return
|
|
8879
|
+
return path21.join(os2.homedir(), ".agentv");
|
|
8442
8880
|
}
|
|
8443
8881
|
function getWorkspacesRoot() {
|
|
8444
|
-
return
|
|
8882
|
+
return path21.join(getAgentvHome(), "workspaces");
|
|
8445
8883
|
}
|
|
8446
8884
|
function getSubagentsRoot() {
|
|
8447
|
-
return
|
|
8885
|
+
return path21.join(getAgentvHome(), "subagents");
|
|
8448
8886
|
}
|
|
8449
8887
|
function getTraceStateRoot() {
|
|
8450
|
-
return
|
|
8888
|
+
return path21.join(getAgentvHome(), "trace-state");
|
|
8451
8889
|
}
|
|
8452
8890
|
function getWorkspacePoolRoot() {
|
|
8453
|
-
return
|
|
8891
|
+
return path21.join(getAgentvHome(), "workspace-pool");
|
|
8454
8892
|
}
|
|
8455
8893
|
|
|
8456
8894
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
@@ -8472,7 +8910,7 @@ async function promptInstall() {
|
|
|
8472
8910
|
}
|
|
8473
8911
|
}
|
|
8474
8912
|
function findManagedSdkInstallRoot() {
|
|
8475
|
-
return
|
|
8913
|
+
return path22.join(getAgentvHome(), "deps", "pi-sdk");
|
|
8476
8914
|
}
|
|
8477
8915
|
function resolveGlobalNpmRoot() {
|
|
8478
8916
|
try {
|
|
@@ -8486,7 +8924,7 @@ function resolveGlobalNpmRoot() {
|
|
|
8486
8924
|
}
|
|
8487
8925
|
}
|
|
8488
8926
|
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
8489
|
-
return
|
|
8927
|
+
return path22.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
8490
8928
|
}
|
|
8491
8929
|
function findAccessiblePath(paths) {
|
|
8492
8930
|
for (const candidate of paths) {
|
|
@@ -8512,11 +8950,11 @@ async function tryImportLocalSdkModules() {
|
|
|
8512
8950
|
async function tryImportManagedSdkModules() {
|
|
8513
8951
|
const managedRoot = findManagedSdkInstallRoot();
|
|
8514
8952
|
const piCodingAgentEntry = findAccessiblePath([
|
|
8515
|
-
|
|
8953
|
+
path22.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
8516
8954
|
]);
|
|
8517
8955
|
const piAiEntry = findAccessiblePath([
|
|
8518
|
-
|
|
8519
|
-
|
|
8956
|
+
path22.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
8957
|
+
path22.join(
|
|
8520
8958
|
managedRoot,
|
|
8521
8959
|
"node_modules",
|
|
8522
8960
|
"@mariozechner",
|
|
@@ -8547,7 +8985,7 @@ async function tryImportGlobalSdkModules() {
|
|
|
8547
8985
|
]);
|
|
8548
8986
|
const piAiEntry = findAccessiblePath([
|
|
8549
8987
|
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
8550
|
-
|
|
8988
|
+
path22.join(
|
|
8551
8989
|
globalNpmRoot,
|
|
8552
8990
|
"@mariozechner",
|
|
8553
8991
|
"pi-coding-agent",
|
|
@@ -8848,10 +9286,10 @@ ${fileList}`;
|
|
|
8848
9286
|
}
|
|
8849
9287
|
resolveCwd(cwdOverride) {
|
|
8850
9288
|
if (cwdOverride) {
|
|
8851
|
-
return
|
|
9289
|
+
return path22.resolve(cwdOverride);
|
|
8852
9290
|
}
|
|
8853
9291
|
if (this.config.cwd) {
|
|
8854
|
-
return
|
|
9292
|
+
return path22.resolve(this.config.cwd);
|
|
8855
9293
|
}
|
|
8856
9294
|
return process.cwd();
|
|
8857
9295
|
}
|
|
@@ -8870,9 +9308,9 @@ ${fileList}`;
|
|
|
8870
9308
|
}
|
|
8871
9309
|
resolveLogDirectory() {
|
|
8872
9310
|
if (this.config.logDir) {
|
|
8873
|
-
return
|
|
9311
|
+
return path22.resolve(this.config.logDir);
|
|
8874
9312
|
}
|
|
8875
|
-
return
|
|
9313
|
+
return path22.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
8876
9314
|
}
|
|
8877
9315
|
async createStreamLogger(request) {
|
|
8878
9316
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8886,7 +9324,7 @@ ${fileList}`;
|
|
|
8886
9324
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
8887
9325
|
return void 0;
|
|
8888
9326
|
}
|
|
8889
|
-
const filePath =
|
|
9327
|
+
const filePath = path22.join(logDir, buildLogFilename6(request, this.targetName));
|
|
8890
9328
|
try {
|
|
8891
9329
|
const logger = await PiStreamLogger2.create({
|
|
8892
9330
|
filePath,
|
|
@@ -9101,17 +9539,17 @@ var ProviderRegistry = class {
|
|
|
9101
9539
|
// src/evaluation/providers/vscode-provider.ts
|
|
9102
9540
|
import { exec as exec2 } from "node:child_process";
|
|
9103
9541
|
import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
|
|
9104
|
-
import
|
|
9542
|
+
import path33 from "node:path";
|
|
9105
9543
|
import { promisify as promisify3 } from "node:util";
|
|
9106
9544
|
|
|
9107
9545
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
9108
9546
|
import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
|
|
9109
|
-
import
|
|
9547
|
+
import path31 from "node:path";
|
|
9110
9548
|
|
|
9111
9549
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
9112
9550
|
import { constants as constants2 } from "node:fs";
|
|
9113
9551
|
import { access as access2, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
|
|
9114
|
-
import
|
|
9552
|
+
import path23 from "node:path";
|
|
9115
9553
|
async function pathExists(target) {
|
|
9116
9554
|
try {
|
|
9117
9555
|
await access2(target, constants2.F_OK);
|
|
@@ -9127,7 +9565,7 @@ async function readDirEntries(target) {
|
|
|
9127
9565
|
const entries = await readdir2(target, { withFileTypes: true });
|
|
9128
9566
|
return entries.map((entry) => ({
|
|
9129
9567
|
name: entry.name,
|
|
9130
|
-
absolutePath:
|
|
9568
|
+
absolutePath: path23.join(target, entry.name),
|
|
9131
9569
|
isDirectory: entry.isDirectory()
|
|
9132
9570
|
}));
|
|
9133
9571
|
}
|
|
@@ -9142,9 +9580,9 @@ async function removeIfExists(target) {
|
|
|
9142
9580
|
}
|
|
9143
9581
|
|
|
9144
9582
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
9145
|
-
import
|
|
9583
|
+
import path24 from "node:path";
|
|
9146
9584
|
function pathToFileUri2(filePath) {
|
|
9147
|
-
const absolutePath =
|
|
9585
|
+
const absolutePath = path24.isAbsolute(filePath) ? filePath : path24.resolve(filePath);
|
|
9148
9586
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9149
9587
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9150
9588
|
return `file:///${normalizedPath}`;
|
|
@@ -9153,7 +9591,7 @@ function pathToFileUri2(filePath) {
|
|
|
9153
9591
|
}
|
|
9154
9592
|
|
|
9155
9593
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
9156
|
-
import
|
|
9594
|
+
import path25 from "node:path";
|
|
9157
9595
|
|
|
9158
9596
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
9159
9597
|
function renderTemplate2(content, variables) {
|
|
@@ -9245,8 +9683,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
9245
9683
|
});
|
|
9246
9684
|
}
|
|
9247
9685
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
9248
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
9249
|
-
const responseList = responseFiles.map((file) => `"${
|
|
9686
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path25.basename(file)}`).join("\n");
|
|
9687
|
+
const responseList = responseFiles.map((file) => `"${path25.basename(file)}"`).join(", ");
|
|
9250
9688
|
return renderTemplate2(templateContent, {
|
|
9251
9689
|
requestFiles: requestLines,
|
|
9252
9690
|
responseList
|
|
@@ -9254,8 +9692,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
9254
9692
|
}
|
|
9255
9693
|
|
|
9256
9694
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
9257
|
-
import { readFile as
|
|
9258
|
-
import
|
|
9695
|
+
import { readFile as readFile11 } from "node:fs/promises";
|
|
9696
|
+
import path26 from "node:path";
|
|
9259
9697
|
|
|
9260
9698
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
9261
9699
|
function sleep2(ms) {
|
|
@@ -9293,7 +9731,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
9293
9731
|
const maxAttempts = 10;
|
|
9294
9732
|
while (attempts < maxAttempts) {
|
|
9295
9733
|
try {
|
|
9296
|
-
const content = await
|
|
9734
|
+
const content = await readFile11(responseFileFinal, { encoding: "utf8" });
|
|
9297
9735
|
if (!silent) {
|
|
9298
9736
|
process.stdout.write(`${content}
|
|
9299
9737
|
`);
|
|
@@ -9314,7 +9752,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
9314
9752
|
}
|
|
9315
9753
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
9316
9754
|
if (!silent) {
|
|
9317
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
9755
|
+
const fileList = responseFilesFinal.map((file) => path26.basename(file)).join(", ");
|
|
9318
9756
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
9319
9757
|
}
|
|
9320
9758
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -9323,7 +9761,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9323
9761
|
while (pending.size > 0) {
|
|
9324
9762
|
if (Date.now() >= deadline) {
|
|
9325
9763
|
if (!silent) {
|
|
9326
|
-
const remaining = [...pending].map((f) =>
|
|
9764
|
+
const remaining = [...pending].map((f) => path26.basename(f)).join(", ");
|
|
9327
9765
|
console.error(
|
|
9328
9766
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
9329
9767
|
);
|
|
@@ -9350,7 +9788,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9350
9788
|
const maxAttempts = 10;
|
|
9351
9789
|
while (attempts < maxAttempts) {
|
|
9352
9790
|
try {
|
|
9353
|
-
const content = await
|
|
9791
|
+
const content = await readFile11(file, { encoding: "utf8" });
|
|
9354
9792
|
if (!silent) {
|
|
9355
9793
|
process.stdout.write(`${content}
|
|
9356
9794
|
`);
|
|
@@ -9374,16 +9812,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9374
9812
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
9375
9813
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
9376
9814
|
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
9377
|
-
import
|
|
9815
|
+
import path28 from "node:path";
|
|
9378
9816
|
import { promisify as promisify2 } from "node:util";
|
|
9379
9817
|
|
|
9380
9818
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
9381
|
-
import
|
|
9819
|
+
import path27 from "node:path";
|
|
9382
9820
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
9383
9821
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9384
9822
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
9385
9823
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
9386
|
-
return
|
|
9824
|
+
return path27.join(getSubagentsRoot(), folder);
|
|
9387
9825
|
}
|
|
9388
9826
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9389
9827
|
|
|
@@ -9450,11 +9888,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9450
9888
|
await raceSpawnError(child);
|
|
9451
9889
|
return true;
|
|
9452
9890
|
}
|
|
9453
|
-
const aliveFile =
|
|
9891
|
+
const aliveFile = path28.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
9454
9892
|
await removeIfExists(aliveFile);
|
|
9455
|
-
const githubAgentsDir =
|
|
9893
|
+
const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
|
|
9456
9894
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
9457
|
-
const wakeupDst =
|
|
9895
|
+
const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
|
|
9458
9896
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
9459
9897
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
9460
9898
|
label: "open-workspace"
|
|
@@ -9467,7 +9905,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9467
9905
|
"chat",
|
|
9468
9906
|
"-m",
|
|
9469
9907
|
wakeupChatId,
|
|
9470
|
-
`create a file named .alive in the ${
|
|
9908
|
+
`create a file named .alive in the ${path28.basename(subagentDir)} folder`
|
|
9471
9909
|
];
|
|
9472
9910
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
9473
9911
|
await raceSpawnError(wakeupChild);
|
|
@@ -9482,10 +9920,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9482
9920
|
return true;
|
|
9483
9921
|
}
|
|
9484
9922
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
9485
|
-
const workspacePath =
|
|
9486
|
-
const messagesDir =
|
|
9923
|
+
const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
9924
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
9487
9925
|
await mkdir9(messagesDir, { recursive: true });
|
|
9488
|
-
const reqFile =
|
|
9926
|
+
const reqFile = path28.join(messagesDir, `${timestamp}_req.md`);
|
|
9489
9927
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
9490
9928
|
const reqUri = pathToFileUri2(reqFile);
|
|
9491
9929
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -9493,16 +9931,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
9493
9931
|
chatArgs.push("-a", attachment);
|
|
9494
9932
|
}
|
|
9495
9933
|
chatArgs.push("-a", reqFile);
|
|
9496
|
-
chatArgs.push(`Follow instructions in [${
|
|
9934
|
+
chatArgs.push(`Follow instructions in [${path28.basename(reqFile)}](${reqUri})`);
|
|
9497
9935
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
9498
9936
|
workspacePath,
|
|
9499
|
-
|
|
9937
|
+
path28.basename(subagentDir),
|
|
9500
9938
|
subagentDir,
|
|
9501
9939
|
vscodeCmd
|
|
9502
9940
|
);
|
|
9503
9941
|
if (!workspaceReady) {
|
|
9504
9942
|
throw new Error(
|
|
9505
|
-
`VS Code workspace '${
|
|
9943
|
+
`VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
9506
9944
|
);
|
|
9507
9945
|
}
|
|
9508
9946
|
await sleep2(500);
|
|
@@ -9510,8 +9948,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
9510
9948
|
await raceSpawnError(child);
|
|
9511
9949
|
}
|
|
9512
9950
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
9513
|
-
const workspacePath =
|
|
9514
|
-
const messagesDir =
|
|
9951
|
+
const workspacePath = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
|
|
9952
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
9515
9953
|
await mkdir9(messagesDir, { recursive: true });
|
|
9516
9954
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
9517
9955
|
for (const attachment of attachmentPaths) {
|
|
@@ -9520,13 +9958,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
9520
9958
|
chatArgs.push(chatInstruction);
|
|
9521
9959
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
9522
9960
|
workspacePath,
|
|
9523
|
-
|
|
9961
|
+
path28.basename(subagentDir),
|
|
9524
9962
|
subagentDir,
|
|
9525
9963
|
vscodeCmd
|
|
9526
9964
|
);
|
|
9527
9965
|
if (!workspaceReady) {
|
|
9528
9966
|
throw new Error(
|
|
9529
|
-
`VS Code workspace '${
|
|
9967
|
+
`VS Code workspace '${path28.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
9530
9968
|
);
|
|
9531
9969
|
}
|
|
9532
9970
|
await sleep2(500);
|
|
@@ -9535,11 +9973,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
9535
9973
|
}
|
|
9536
9974
|
|
|
9537
9975
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
9538
|
-
import { copyFile, mkdir as mkdir10, readFile as
|
|
9539
|
-
import
|
|
9976
|
+
import { copyFile, mkdir as mkdir10, readFile as readFile12, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
9977
|
+
import path30 from "node:path";
|
|
9540
9978
|
|
|
9541
9979
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
9542
|
-
import
|
|
9980
|
+
import path29 from "node:path";
|
|
9543
9981
|
import JSON5 from "json5";
|
|
9544
9982
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
9545
9983
|
let workspace;
|
|
@@ -9556,10 +9994,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
9556
9994
|
}
|
|
9557
9995
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
9558
9996
|
const folderPath = folder.path;
|
|
9559
|
-
if (
|
|
9997
|
+
if (path29.isAbsolute(folderPath)) {
|
|
9560
9998
|
return folder;
|
|
9561
9999
|
}
|
|
9562
|
-
const absolutePath =
|
|
10000
|
+
const absolutePath = path29.resolve(templateDir, folderPath);
|
|
9563
10001
|
return {
|
|
9564
10002
|
...folder,
|
|
9565
10003
|
path: absolutePath
|
|
@@ -9581,19 +10019,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
9581
10019
|
if (locationMap && typeof locationMap === "object") {
|
|
9582
10020
|
const transformedMap = {};
|
|
9583
10021
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
9584
|
-
const isAbsolute =
|
|
10022
|
+
const isAbsolute = path29.isAbsolute(locationPath);
|
|
9585
10023
|
if (isAbsolute) {
|
|
9586
10024
|
transformedMap[locationPath] = value;
|
|
9587
10025
|
} else {
|
|
9588
10026
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
9589
10027
|
if (firstGlobIndex === -1) {
|
|
9590
|
-
const resolvedPath =
|
|
10028
|
+
const resolvedPath = path29.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
9591
10029
|
transformedMap[resolvedPath] = value;
|
|
9592
10030
|
} else {
|
|
9593
10031
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
9594
10032
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
9595
10033
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
9596
|
-
const resolvedPath = (
|
|
10034
|
+
const resolvedPath = (path29.resolve(templateDir, basePath) + patternPath).replace(
|
|
9597
10035
|
/\\/g,
|
|
9598
10036
|
"/"
|
|
9599
10037
|
);
|
|
@@ -9634,7 +10072,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
9634
10072
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
9635
10073
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
9636
10074
|
for (const subagent of subagents) {
|
|
9637
|
-
const lockFile =
|
|
10075
|
+
const lockFile = path30.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
9638
10076
|
if (!await pathExists(lockFile)) {
|
|
9639
10077
|
return subagent.absolutePath;
|
|
9640
10078
|
}
|
|
@@ -9644,7 +10082,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
9644
10082
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
9645
10083
|
let workspaceContent;
|
|
9646
10084
|
if (workspaceTemplate) {
|
|
9647
|
-
const workspaceSrc =
|
|
10085
|
+
const workspaceSrc = path30.resolve(workspaceTemplate);
|
|
9648
10086
|
if (!await pathExists(workspaceSrc)) {
|
|
9649
10087
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
9650
10088
|
}
|
|
@@ -9652,18 +10090,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
9652
10090
|
if (!stats.isFile()) {
|
|
9653
10091
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
9654
10092
|
}
|
|
9655
|
-
const templateText = await
|
|
10093
|
+
const templateText = await readFile12(workspaceSrc, "utf8");
|
|
9656
10094
|
workspaceContent = JSON.parse(templateText);
|
|
9657
10095
|
} else {
|
|
9658
10096
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
9659
10097
|
}
|
|
9660
|
-
const workspaceName = `${
|
|
9661
|
-
const workspaceDst =
|
|
9662
|
-
const templateDir = workspaceTemplate ?
|
|
10098
|
+
const workspaceName = `${path30.basename(subagentDir)}.code-workspace`;
|
|
10099
|
+
const workspaceDst = path30.join(subagentDir, workspaceName);
|
|
10100
|
+
const templateDir = workspaceTemplate ? path30.dirname(path30.resolve(workspaceTemplate)) : subagentDir;
|
|
9663
10101
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
9664
10102
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
9665
10103
|
if (cwd) {
|
|
9666
|
-
const absCwd =
|
|
10104
|
+
const absCwd = path30.resolve(cwd);
|
|
9667
10105
|
const parsed = JSON.parse(transformedContent);
|
|
9668
10106
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
9669
10107
|
if (!alreadyPresent) {
|
|
@@ -9672,35 +10110,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
9672
10110
|
}
|
|
9673
10111
|
}
|
|
9674
10112
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
9675
|
-
const messagesDir =
|
|
10113
|
+
const messagesDir = path30.join(subagentDir, "messages");
|
|
9676
10114
|
await mkdir10(messagesDir, { recursive: true });
|
|
9677
10115
|
return { workspace: workspaceDst, messagesDir };
|
|
9678
10116
|
}
|
|
9679
10117
|
async function createSubagentLock(subagentDir) {
|
|
9680
|
-
const messagesDir =
|
|
10118
|
+
const messagesDir = path30.join(subagentDir, "messages");
|
|
9681
10119
|
if (await pathExists(messagesDir)) {
|
|
9682
10120
|
const files = await readdir3(messagesDir);
|
|
9683
10121
|
await Promise.all(
|
|
9684
10122
|
files.map(async (file) => {
|
|
9685
|
-
const target =
|
|
10123
|
+
const target = path30.join(messagesDir, file);
|
|
9686
10124
|
await removeIfExists(target);
|
|
9687
10125
|
})
|
|
9688
10126
|
);
|
|
9689
10127
|
}
|
|
9690
|
-
const githubAgentsDir =
|
|
10128
|
+
const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
|
|
9691
10129
|
if (await pathExists(githubAgentsDir)) {
|
|
9692
10130
|
const agentFiles = await readdir3(githubAgentsDir);
|
|
9693
10131
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
9694
10132
|
await Promise.all(
|
|
9695
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
10133
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path30.join(githubAgentsDir, file)))
|
|
9696
10134
|
);
|
|
9697
10135
|
}
|
|
9698
|
-
const lockFile =
|
|
10136
|
+
const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
9699
10137
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
9700
10138
|
return lockFile;
|
|
9701
10139
|
}
|
|
9702
10140
|
async function removeSubagentLock(subagentDir) {
|
|
9703
|
-
const lockFile =
|
|
10141
|
+
const lockFile = path30.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
9704
10142
|
await removeIfExists(lockFile);
|
|
9705
10143
|
}
|
|
9706
10144
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -9720,9 +10158,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
9720
10158
|
return 1;
|
|
9721
10159
|
}
|
|
9722
10160
|
if (promptFile) {
|
|
9723
|
-
const githubAgentsDir =
|
|
10161
|
+
const githubAgentsDir = path30.join(subagentDir, ".github", "agents");
|
|
9724
10162
|
await mkdir10(githubAgentsDir, { recursive: true });
|
|
9725
|
-
const agentFile =
|
|
10163
|
+
const agentFile = path30.join(githubAgentsDir, `${chatId}.md`);
|
|
9726
10164
|
try {
|
|
9727
10165
|
await copyFile(promptFile, agentFile);
|
|
9728
10166
|
} catch (error) {
|
|
@@ -9741,7 +10179,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
9741
10179
|
if (!promptFile) {
|
|
9742
10180
|
return void 0;
|
|
9743
10181
|
}
|
|
9744
|
-
const resolvedPrompt =
|
|
10182
|
+
const resolvedPrompt = path31.resolve(promptFile);
|
|
9745
10183
|
if (!await pathExists(resolvedPrompt)) {
|
|
9746
10184
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
9747
10185
|
}
|
|
@@ -9757,7 +10195,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
9757
10195
|
}
|
|
9758
10196
|
const resolved = [];
|
|
9759
10197
|
for (const attachment of extraAttachments) {
|
|
9760
|
-
const resolvedPath =
|
|
10198
|
+
const resolvedPath = path31.resolve(attachment);
|
|
9761
10199
|
if (!await pathExists(resolvedPath)) {
|
|
9762
10200
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
9763
10201
|
}
|
|
@@ -9799,7 +10237,7 @@ async function dispatchAgentSession(options) {
|
|
|
9799
10237
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
9800
10238
|
};
|
|
9801
10239
|
}
|
|
9802
|
-
const subagentName =
|
|
10240
|
+
const subagentName = path31.basename(subagentDir);
|
|
9803
10241
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
9804
10242
|
const preparationResult = await prepareSubagentDirectory(
|
|
9805
10243
|
subagentDir,
|
|
@@ -9827,9 +10265,9 @@ async function dispatchAgentSession(options) {
|
|
|
9827
10265
|
};
|
|
9828
10266
|
}
|
|
9829
10267
|
const timestamp = generateTimestamp();
|
|
9830
|
-
const messagesDir =
|
|
9831
|
-
const responseFileTmp =
|
|
9832
|
-
const responseFileFinal =
|
|
10268
|
+
const messagesDir = path31.join(subagentDir, "messages");
|
|
10269
|
+
const responseFileTmp = path31.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
10270
|
+
const responseFileFinal = path31.join(messagesDir, `${timestamp}_res.md`);
|
|
9833
10271
|
const requestInstructions = createRequestPrompt(
|
|
9834
10272
|
userQuery,
|
|
9835
10273
|
responseFileTmp,
|
|
@@ -9934,7 +10372,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9934
10372
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
9935
10373
|
};
|
|
9936
10374
|
}
|
|
9937
|
-
subagentName =
|
|
10375
|
+
subagentName = path31.basename(subagentDir);
|
|
9938
10376
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
9939
10377
|
const preparationResult = await prepareSubagentDirectory(
|
|
9940
10378
|
subagentDir,
|
|
@@ -9965,17 +10403,17 @@ async function dispatchBatchAgent(options) {
|
|
|
9965
10403
|
};
|
|
9966
10404
|
}
|
|
9967
10405
|
const timestamp = generateTimestamp();
|
|
9968
|
-
const messagesDir =
|
|
10406
|
+
const messagesDir = path31.join(subagentDir, "messages");
|
|
9969
10407
|
requestFiles = userQueries.map(
|
|
9970
|
-
(_, index) =>
|
|
10408
|
+
(_, index) => path31.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
9971
10409
|
);
|
|
9972
10410
|
const responseTmpFiles = userQueries.map(
|
|
9973
|
-
(_, index) =>
|
|
10411
|
+
(_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
9974
10412
|
);
|
|
9975
10413
|
responseFilesFinal = userQueries.map(
|
|
9976
|
-
(_, index) =>
|
|
10414
|
+
(_, index) => path31.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
9977
10415
|
);
|
|
9978
|
-
const orchestratorFile =
|
|
10416
|
+
const orchestratorFile = path31.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
9979
10417
|
if (!dryRun) {
|
|
9980
10418
|
await Promise.all(
|
|
9981
10419
|
userQueries.map((query, index) => {
|
|
@@ -10061,7 +10499,7 @@ async function dispatchBatchAgent(options) {
|
|
|
10061
10499
|
|
|
10062
10500
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
10063
10501
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
10064
|
-
import
|
|
10502
|
+
import path32 from "node:path";
|
|
10065
10503
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
10066
10504
|
folders: [
|
|
10067
10505
|
{
|
|
@@ -10092,7 +10530,7 @@ async function provisionSubagents(options) {
|
|
|
10092
10530
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
10093
10531
|
throw new Error("subagents must be a positive integer");
|
|
10094
10532
|
}
|
|
10095
|
-
const targetPath =
|
|
10533
|
+
const targetPath = path32.resolve(targetRoot);
|
|
10096
10534
|
if (!dryRun) {
|
|
10097
10535
|
await ensureDir(targetPath);
|
|
10098
10536
|
}
|
|
@@ -10112,7 +10550,7 @@ async function provisionSubagents(options) {
|
|
|
10112
10550
|
continue;
|
|
10113
10551
|
}
|
|
10114
10552
|
highestNumber = Math.max(highestNumber, parsed);
|
|
10115
|
-
const lockFile =
|
|
10553
|
+
const lockFile = path32.join(entry.absolutePath, lockName);
|
|
10116
10554
|
const locked = await pathExists(lockFile);
|
|
10117
10555
|
if (locked) {
|
|
10118
10556
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -10129,10 +10567,10 @@ async function provisionSubagents(options) {
|
|
|
10129
10567
|
break;
|
|
10130
10568
|
}
|
|
10131
10569
|
const subagentDir = subagent.absolutePath;
|
|
10132
|
-
const githubAgentsDir =
|
|
10133
|
-
const lockFile =
|
|
10134
|
-
const workspaceDst =
|
|
10135
|
-
const wakeupDst =
|
|
10570
|
+
const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
|
|
10571
|
+
const lockFile = path32.join(subagentDir, lockName);
|
|
10572
|
+
const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
|
|
10573
|
+
const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
|
|
10136
10574
|
const isLocked = await pathExists(lockFile);
|
|
10137
10575
|
if (isLocked && !force) {
|
|
10138
10576
|
continue;
|
|
@@ -10170,10 +10608,10 @@ async function provisionSubagents(options) {
|
|
|
10170
10608
|
let nextIndex = highestNumber;
|
|
10171
10609
|
while (subagentsProvisioned < subagents) {
|
|
10172
10610
|
nextIndex += 1;
|
|
10173
|
-
const subagentDir =
|
|
10174
|
-
const githubAgentsDir =
|
|
10175
|
-
const workspaceDst =
|
|
10176
|
-
const wakeupDst =
|
|
10611
|
+
const subagentDir = path32.join(targetPath, `subagent-${nextIndex}`);
|
|
10612
|
+
const githubAgentsDir = path32.join(subagentDir, ".github", "agents");
|
|
10613
|
+
const workspaceDst = path32.join(subagentDir, `${path32.basename(subagentDir)}.code-workspace`);
|
|
10614
|
+
const wakeupDst = path32.join(githubAgentsDir, "wakeup.md");
|
|
10177
10615
|
if (!dryRun) {
|
|
10178
10616
|
await ensureDir(subagentDir);
|
|
10179
10617
|
await ensureDir(githubAgentsDir);
|
|
@@ -10363,7 +10801,7 @@ var VSCodeProvider = class {
|
|
|
10363
10801
|
async function locateVSCodeExecutable(candidate) {
|
|
10364
10802
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
10365
10803
|
if (includesPathSeparator) {
|
|
10366
|
-
const resolved =
|
|
10804
|
+
const resolved = path33.isAbsolute(candidate) ? candidate : path33.resolve(candidate);
|
|
10367
10805
|
try {
|
|
10368
10806
|
await access3(resolved, constants3.F_OK);
|
|
10369
10807
|
return resolved;
|
|
@@ -10392,7 +10830,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
10392
10830
|
return void 0;
|
|
10393
10831
|
}
|
|
10394
10832
|
try {
|
|
10395
|
-
const stats = await stat5(
|
|
10833
|
+
const stats = await stat5(path33.resolve(template));
|
|
10396
10834
|
return stats.isFile() ? template : void 0;
|
|
10397
10835
|
} catch {
|
|
10398
10836
|
return template;
|
|
@@ -10416,7 +10854,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
10416
10854
|
return "";
|
|
10417
10855
|
}
|
|
10418
10856
|
const buildList = (files) => files.map((absolutePath) => {
|
|
10419
|
-
const fileName =
|
|
10857
|
+
const fileName = path33.basename(absolutePath);
|
|
10420
10858
|
const fileUri = pathToFileUri3(absolutePath);
|
|
10421
10859
|
return `* [${fileName}](${fileUri})`;
|
|
10422
10860
|
});
|
|
@@ -10437,7 +10875,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
10437
10875
|
}
|
|
10438
10876
|
const unique = /* @__PURE__ */ new Map();
|
|
10439
10877
|
for (const attachment of attachments) {
|
|
10440
|
-
const absolutePath =
|
|
10878
|
+
const absolutePath = path33.resolve(attachment);
|
|
10441
10879
|
if (!unique.has(absolutePath)) {
|
|
10442
10880
|
unique.set(absolutePath, absolutePath);
|
|
10443
10881
|
}
|
|
@@ -10445,7 +10883,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
10445
10883
|
return Array.from(unique.values());
|
|
10446
10884
|
}
|
|
10447
10885
|
function pathToFileUri3(filePath) {
|
|
10448
|
-
const absolutePath =
|
|
10886
|
+
const absolutePath = path33.isAbsolute(filePath) ? filePath : path33.resolve(filePath);
|
|
10449
10887
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
10450
10888
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
10451
10889
|
return `file:///${normalizedPath}`;
|
|
@@ -10458,7 +10896,7 @@ function normalizeAttachments(attachments) {
|
|
|
10458
10896
|
}
|
|
10459
10897
|
const deduped = /* @__PURE__ */ new Set();
|
|
10460
10898
|
for (const attachment of attachments) {
|
|
10461
|
-
deduped.add(
|
|
10899
|
+
deduped.add(path33.resolve(attachment));
|
|
10462
10900
|
}
|
|
10463
10901
|
return Array.from(deduped);
|
|
10464
10902
|
}
|
|
@@ -10467,7 +10905,7 @@ function mergeAttachments(all) {
|
|
|
10467
10905
|
for (const list of all) {
|
|
10468
10906
|
if (!list) continue;
|
|
10469
10907
|
for (const inputFile of list) {
|
|
10470
|
-
deduped.add(
|
|
10908
|
+
deduped.add(path33.resolve(inputFile));
|
|
10471
10909
|
}
|
|
10472
10910
|
}
|
|
10473
10911
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -10515,9 +10953,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
10515
10953
|
|
|
10516
10954
|
// src/evaluation/providers/targets-file.ts
|
|
10517
10955
|
import { constants as constants4 } from "node:fs";
|
|
10518
|
-
import { access as access4, readFile as
|
|
10519
|
-
import
|
|
10520
|
-
import { parse as
|
|
10956
|
+
import { access as access4, readFile as readFile13 } from "node:fs/promises";
|
|
10957
|
+
import path34 from "node:path";
|
|
10958
|
+
import { parse as parse5 } from "yaml";
|
|
10521
10959
|
function isRecord(value) {
|
|
10522
10960
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
10523
10961
|
}
|
|
@@ -10556,12 +10994,12 @@ async function fileExists3(filePath) {
|
|
|
10556
10994
|
}
|
|
10557
10995
|
}
|
|
10558
10996
|
async function readTargetDefinitions(filePath) {
|
|
10559
|
-
const absolutePath =
|
|
10997
|
+
const absolutePath = path34.resolve(filePath);
|
|
10560
10998
|
if (!await fileExists3(absolutePath)) {
|
|
10561
10999
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
10562
11000
|
}
|
|
10563
|
-
const raw = await
|
|
10564
|
-
const parsed =
|
|
11001
|
+
const raw = await readFile13(absolutePath, "utf8");
|
|
11002
|
+
const parsed = parse5(raw);
|
|
10565
11003
|
if (!isRecord(parsed)) {
|
|
10566
11004
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
10567
11005
|
}
|
|
@@ -10576,16 +11014,16 @@ function listTargetNames(definitions) {
|
|
|
10576
11014
|
}
|
|
10577
11015
|
|
|
10578
11016
|
// src/evaluation/providers/provider-discovery.ts
|
|
10579
|
-
import
|
|
11017
|
+
import path35 from "node:path";
|
|
10580
11018
|
import fg from "fast-glob";
|
|
10581
11019
|
async function discoverProviders(registry, baseDir) {
|
|
10582
11020
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
10583
11021
|
const candidateDirs = [];
|
|
10584
|
-
let dir =
|
|
10585
|
-
const root =
|
|
11022
|
+
let dir = path35.resolve(baseDir);
|
|
11023
|
+
const root = path35.parse(dir).root;
|
|
10586
11024
|
while (dir !== root) {
|
|
10587
|
-
candidateDirs.push(
|
|
10588
|
-
dir =
|
|
11025
|
+
candidateDirs.push(path35.join(dir, ".agentv", "providers"));
|
|
11026
|
+
dir = path35.dirname(dir);
|
|
10589
11027
|
}
|
|
10590
11028
|
let files = [];
|
|
10591
11029
|
for (const providersDir of candidateDirs) {
|
|
@@ -10601,7 +11039,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
10601
11039
|
}
|
|
10602
11040
|
const discoveredKinds = [];
|
|
10603
11041
|
for (const filePath of files) {
|
|
10604
|
-
const basename =
|
|
11042
|
+
const basename = path35.basename(filePath);
|
|
10605
11043
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
10606
11044
|
if (registry.has(kindName)) {
|
|
10607
11045
|
continue;
|
|
@@ -10727,150 +11165,6 @@ import { mkdtemp as mkdtemp2, rm as rm3, writeFile as writeFile6 } from "node:fs
|
|
|
10727
11165
|
import { tmpdir as tmpdir2 } from "node:os";
|
|
10728
11166
|
import { dirname, join } from "node:path";
|
|
10729
11167
|
|
|
10730
|
-
// src/runtime/exec.ts
|
|
10731
|
-
function shellEscapePath(value) {
|
|
10732
|
-
if (process.platform === "win32") {
|
|
10733
|
-
return `"${value.replaceAll('"', '""')}"`;
|
|
10734
|
-
}
|
|
10735
|
-
return `'${value.replaceAll("'", `'"'"'`)}'`;
|
|
10736
|
-
}
|
|
10737
|
-
async function execFileWithStdin(argv, stdinPayload, options = {}) {
|
|
10738
|
-
if (argv.length === 0) {
|
|
10739
|
-
throw new Error("Executable argv must include at least one entry");
|
|
10740
|
-
}
|
|
10741
|
-
if (typeof Bun !== "undefined") {
|
|
10742
|
-
return execFileWithStdinBun(argv, stdinPayload, options);
|
|
10743
|
-
}
|
|
10744
|
-
return execFileWithStdinNode(argv, stdinPayload, options);
|
|
10745
|
-
}
|
|
10746
|
-
async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
10747
|
-
const command = [...argv];
|
|
10748
|
-
const encoder = new TextEncoder();
|
|
10749
|
-
const proc = Bun.spawn(command, {
|
|
10750
|
-
cwd: options.cwd,
|
|
10751
|
-
stdin: encoder.encode(stdinPayload),
|
|
10752
|
-
stdout: "pipe",
|
|
10753
|
-
stderr: "pipe",
|
|
10754
|
-
// Merge additional env vars with process.env
|
|
10755
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
10756
|
-
});
|
|
10757
|
-
let timedOut = false;
|
|
10758
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
10759
|
-
timedOut = true;
|
|
10760
|
-
proc.kill("SIGKILL");
|
|
10761
|
-
}, options.timeoutMs) : void 0;
|
|
10762
|
-
try {
|
|
10763
|
-
const stdoutPromise = proc.stdout ? new Response(proc.stdout).text() : Promise.resolve("");
|
|
10764
|
-
const stderrPromise = proc.stderr ? new Response(proc.stderr).text() : Promise.resolve("");
|
|
10765
|
-
const [stdout, stderr, exitCode] = await Promise.all([
|
|
10766
|
-
stdoutPromise,
|
|
10767
|
-
stderrPromise,
|
|
10768
|
-
proc.exited
|
|
10769
|
-
]);
|
|
10770
|
-
if (timedOut) {
|
|
10771
|
-
throw new Error(`Process timed out after ${options.timeoutMs}ms`);
|
|
10772
|
-
}
|
|
10773
|
-
return {
|
|
10774
|
-
stdout: stdout.replace(/\r\n/g, "\n"),
|
|
10775
|
-
stderr: stderr.replace(/\r\n/g, "\n"),
|
|
10776
|
-
exitCode
|
|
10777
|
-
};
|
|
10778
|
-
} finally {
|
|
10779
|
-
if (timeout !== void 0) {
|
|
10780
|
-
clearTimeout(timeout);
|
|
10781
|
-
}
|
|
10782
|
-
}
|
|
10783
|
-
}
|
|
10784
|
-
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
10785
|
-
const { spawn: spawn5 } = await import("node:child_process");
|
|
10786
|
-
return new Promise((resolve, reject) => {
|
|
10787
|
-
const [cmd, ...args] = argv;
|
|
10788
|
-
const child = spawn5(cmd, args, {
|
|
10789
|
-
cwd: options.cwd,
|
|
10790
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
10791
|
-
// Merge additional env vars with process.env
|
|
10792
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
10793
|
-
});
|
|
10794
|
-
const stdoutChunks = [];
|
|
10795
|
-
const stderrChunks = [];
|
|
10796
|
-
child.stdout?.on("data", (chunk) => stdoutChunks.push(chunk));
|
|
10797
|
-
child.stderr?.on("data", (chunk) => stderrChunks.push(chunk));
|
|
10798
|
-
let timedOut = false;
|
|
10799
|
-
const timeout = options.timeoutMs !== void 0 ? setTimeout(() => {
|
|
10800
|
-
timedOut = true;
|
|
10801
|
-
child.kill("SIGKILL");
|
|
10802
|
-
}, options.timeoutMs) : void 0;
|
|
10803
|
-
child.on("error", (error) => {
|
|
10804
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
10805
|
-
reject(error);
|
|
10806
|
-
});
|
|
10807
|
-
child.on("close", (code) => {
|
|
10808
|
-
if (timeout !== void 0) clearTimeout(timeout);
|
|
10809
|
-
if (timedOut) {
|
|
10810
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
10811
|
-
return;
|
|
10812
|
-
}
|
|
10813
|
-
const stdout = Buffer.concat(stdoutChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
10814
|
-
const stderr = Buffer.concat(stderrChunks).toString("utf8").replace(/\r\n/g, "\n");
|
|
10815
|
-
resolve({
|
|
10816
|
-
stdout,
|
|
10817
|
-
stderr,
|
|
10818
|
-
exitCode: code ?? 0
|
|
10819
|
-
});
|
|
10820
|
-
});
|
|
10821
|
-
if (child.stdin) {
|
|
10822
|
-
child.stdin.write(stdinPayload);
|
|
10823
|
-
child.stdin.end();
|
|
10824
|
-
}
|
|
10825
|
-
});
|
|
10826
|
-
}
|
|
10827
|
-
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10828
|
-
const { mkdir: mkdir16, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10829
|
-
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10830
|
-
const path51 = await import("node:path");
|
|
10831
|
-
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10832
|
-
const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10833
|
-
await mkdir16(dir, { recursive: true });
|
|
10834
|
-
const stdinPath = path51.join(dir, "stdin.txt");
|
|
10835
|
-
const stdoutPath = path51.join(dir, "stdout.txt");
|
|
10836
|
-
const stderrPath = path51.join(dir, "stderr.txt");
|
|
10837
|
-
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10838
|
-
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10839
|
-
const { spawn: spawn5 } = await import("node:child_process");
|
|
10840
|
-
try {
|
|
10841
|
-
const exitCode = await new Promise((resolve, reject) => {
|
|
10842
|
-
const child = spawn5(wrappedCommand, {
|
|
10843
|
-
shell: true,
|
|
10844
|
-
cwd: options.cwd,
|
|
10845
|
-
stdio: ["ignore", "ignore", "ignore"],
|
|
10846
|
-
// Merge additional env vars with process.env
|
|
10847
|
-
env: options.env ? { ...process.env, ...options.env } : process.env
|
|
10848
|
-
});
|
|
10849
|
-
const timeout = options.timeoutMs ? setTimeout(() => {
|
|
10850
|
-
child.kill();
|
|
10851
|
-
reject(new Error(`Process timed out after ${options.timeoutMs}ms`));
|
|
10852
|
-
}, options.timeoutMs) : void 0;
|
|
10853
|
-
child.on("error", (error) => {
|
|
10854
|
-
if (timeout !== void 0) {
|
|
10855
|
-
clearTimeout(timeout);
|
|
10856
|
-
}
|
|
10857
|
-
reject(error);
|
|
10858
|
-
});
|
|
10859
|
-
child.on("exit", (code) => {
|
|
10860
|
-
if (timeout !== void 0) {
|
|
10861
|
-
clearTimeout(timeout);
|
|
10862
|
-
}
|
|
10863
|
-
resolve(code ?? 0);
|
|
10864
|
-
});
|
|
10865
|
-
});
|
|
10866
|
-
const stdout = (await readFile16(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10867
|
-
const stderr = (await readFile16(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10868
|
-
return { stdout, stderr, exitCode };
|
|
10869
|
-
} finally {
|
|
10870
|
-
await rm6(dir, { recursive: true, force: true });
|
|
10871
|
-
}
|
|
10872
|
-
}
|
|
10873
|
-
|
|
10874
11168
|
// src/runtime/target-proxy.ts
|
|
10875
11169
|
import { randomBytes } from "node:crypto";
|
|
10876
11170
|
import { createServer } from "node:http";
|
|
@@ -11147,6 +11441,18 @@ function toCamelCaseDeep(obj) {
|
|
|
11147
11441
|
return obj;
|
|
11148
11442
|
}
|
|
11149
11443
|
|
|
11444
|
+
// src/evaluation/workspace/repo-checkout.ts
|
|
11445
|
+
function getRepoCheckoutRef(checkout) {
|
|
11446
|
+
return checkout?.base_commit ?? checkout?.ref ?? "HEAD";
|
|
11447
|
+
}
|
|
11448
|
+
function getRepoCheckoutTargets(repos) {
|
|
11449
|
+
if (!repos) return [];
|
|
11450
|
+
return repos.filter((repo) => repo.checkout?.base_commit || repo.checkout?.ref).map((repo) => ({
|
|
11451
|
+
path: repo.path,
|
|
11452
|
+
ref: getRepoCheckoutRef(repo.checkout)
|
|
11453
|
+
}));
|
|
11454
|
+
}
|
|
11455
|
+
|
|
11150
11456
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
11151
11457
|
var FILE_BACKED_OUTPUT_THRESHOLD = 5e4;
|
|
11152
11458
|
var DATA_URI_RE = /^data:([^;]+);base64,(.+)$/s;
|
|
@@ -11281,13 +11587,31 @@ var CodeEvaluator = class {
|
|
|
11281
11587
|
const workspaceEnv = context.workspacePath ? { AGENTV_WORKSPACE_PATH: context.workspacePath } : void 0;
|
|
11282
11588
|
const env = proxyEnv || workspaceEnv ? { ...proxyEnv, ...workspaceEnv } : void 0;
|
|
11283
11589
|
try {
|
|
11284
|
-
|
|
11285
|
-
|
|
11286
|
-
|
|
11287
|
-
|
|
11288
|
-
|
|
11289
|
-
|
|
11290
|
-
|
|
11590
|
+
let stdout;
|
|
11591
|
+
if (context.dockerConfig) {
|
|
11592
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
|
|
11593
|
+
const dockerProvider = new DockerWorkspaceProvider2(context.dockerConfig);
|
|
11594
|
+
const result = await dockerProvider.runGraderInContainer({
|
|
11595
|
+
command: [...this.command],
|
|
11596
|
+
stdin: inputPayload,
|
|
11597
|
+
repoCheckouts: getRepoCheckoutTargets(context.evalCase.workspace?.repos)
|
|
11598
|
+
});
|
|
11599
|
+
if (result.exitCode !== 0) {
|
|
11600
|
+
const trimmedErr = result.stderr.trim();
|
|
11601
|
+
throw new Error(
|
|
11602
|
+
trimmedErr.length > 0 ? `Code evaluator exited with code ${result.exitCode}: ${trimmedErr}` : `Code evaluator exited with code ${result.exitCode}`
|
|
11603
|
+
);
|
|
11604
|
+
}
|
|
11605
|
+
stdout = result.stdout.trim();
|
|
11606
|
+
} else {
|
|
11607
|
+
stdout = await executeScript(
|
|
11608
|
+
this.command,
|
|
11609
|
+
inputPayload,
|
|
11610
|
+
this.agentTimeoutMs,
|
|
11611
|
+
this.cwd,
|
|
11612
|
+
env
|
|
11613
|
+
);
|
|
11614
|
+
}
|
|
11291
11615
|
const parsed = parseJsonSafe(stdout);
|
|
11292
11616
|
const score = clampScore(typeof parsed?.score === "number" ? parsed.score : 0);
|
|
11293
11617
|
const assertions = Array.isArray(parsed?.assertions) ? parsed.assertions.filter(
|
|
@@ -11380,7 +11704,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
11380
11704
|
|
|
11381
11705
|
// src/evaluation/evaluators/llm-grader.ts
|
|
11382
11706
|
import fs2 from "node:fs/promises";
|
|
11383
|
-
import
|
|
11707
|
+
import path36 from "node:path";
|
|
11384
11708
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
11385
11709
|
import { z as z3 } from "zod";
|
|
11386
11710
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11464,6 +11788,15 @@ var scoreRangeEvaluationSchema = z3.object({
|
|
|
11464
11788
|
checks: z3.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
|
|
11465
11789
|
overall_reasoning: z3.string().describe("Overall assessment summary (1-2 sentences)").optional()
|
|
11466
11790
|
});
|
|
11791
|
+
function resolveContentBasePath(context) {
|
|
11792
|
+
if (context.workspacePath) {
|
|
11793
|
+
return context.workspacePath;
|
|
11794
|
+
}
|
|
11795
|
+
if ("config" in context.target && context.target.config && typeof context.target.config === "object" && "cwd" in context.target.config && typeof context.target.config.cwd === "string") {
|
|
11796
|
+
return context.target.config.cwd;
|
|
11797
|
+
}
|
|
11798
|
+
return void 0;
|
|
11799
|
+
}
|
|
11467
11800
|
var LlmGraderEvaluator = class {
|
|
11468
11801
|
kind = "llm-grader";
|
|
11469
11802
|
resolveGraderProvider;
|
|
@@ -11481,24 +11814,46 @@ var LlmGraderEvaluator = class {
|
|
|
11481
11814
|
this.graderTargetProvider = options.graderTargetProvider ?? options.judgeTargetProvider;
|
|
11482
11815
|
}
|
|
11483
11816
|
async evaluate(context) {
|
|
11817
|
+
const preparedContext = await this.prepareContext(context);
|
|
11484
11818
|
if (this.graderTargetProvider) {
|
|
11485
|
-
return this.evaluateWithGraderTarget(
|
|
11819
|
+
return this.evaluateWithGraderTarget(preparedContext);
|
|
11486
11820
|
}
|
|
11487
|
-
const graderProvider = await this.resolveGraderProvider(
|
|
11821
|
+
const graderProvider = await this.resolveGraderProvider(preparedContext);
|
|
11488
11822
|
if (!graderProvider) {
|
|
11489
11823
|
throw new Error("No grader provider available for LLM grading");
|
|
11490
11824
|
}
|
|
11491
11825
|
if (graderProvider.kind === "agentv") {
|
|
11492
|
-
return this.evaluateBuiltIn(
|
|
11826
|
+
return this.evaluateBuiltIn(preparedContext, graderProvider);
|
|
11493
11827
|
}
|
|
11494
11828
|
if (isAgentProvider(graderProvider)) {
|
|
11495
|
-
return this.evaluateWithDelegatedAgent(
|
|
11829
|
+
return this.evaluateWithDelegatedAgent(preparedContext, graderProvider);
|
|
11496
11830
|
}
|
|
11497
|
-
const config =
|
|
11831
|
+
const config = preparedContext.evaluator;
|
|
11498
11832
|
if (config?.type === "llm-grader" && config.rubrics && config.rubrics.length > 0) {
|
|
11499
|
-
return this.evaluateWithRubrics(
|
|
11833
|
+
return this.evaluateWithRubrics(preparedContext, graderProvider, config.rubrics);
|
|
11500
11834
|
}
|
|
11501
|
-
return this.evaluateFreeform(
|
|
11835
|
+
return this.evaluateFreeform(preparedContext, graderProvider);
|
|
11836
|
+
}
|
|
11837
|
+
async prepareContext(context) {
|
|
11838
|
+
const config = context.evaluator;
|
|
11839
|
+
if (config?.type !== "llm-grader" || !context.output) {
|
|
11840
|
+
return context;
|
|
11841
|
+
}
|
|
11842
|
+
const lastAssistant = [...context.output].reverse().find((message) => message.role === "assistant" && message.content !== void 0);
|
|
11843
|
+
if (!lastAssistant || typeof lastAssistant.content === "string") {
|
|
11844
|
+
return context;
|
|
11845
|
+
}
|
|
11846
|
+
const extracted = await extractTextWithPreprocessors(
|
|
11847
|
+
lastAssistant.content,
|
|
11848
|
+
config.preprocessors,
|
|
11849
|
+
{
|
|
11850
|
+
basePath: resolveContentBasePath(context)
|
|
11851
|
+
}
|
|
11852
|
+
);
|
|
11853
|
+
return {
|
|
11854
|
+
...context,
|
|
11855
|
+
candidate: appendPreprocessingWarnings(extracted.text, extracted.warnings)
|
|
11856
|
+
};
|
|
11502
11857
|
}
|
|
11503
11858
|
// ---------------------------------------------------------------------------
|
|
11504
11859
|
// LLM mode (existing)
|
|
@@ -12383,8 +12738,8 @@ function toAiSdkImageParts(images) {
|
|
|
12383
12738
|
}));
|
|
12384
12739
|
}
|
|
12385
12740
|
function resolveSandboxed(basePath, relativePath) {
|
|
12386
|
-
const resolved =
|
|
12387
|
-
if (!resolved.startsWith(basePath +
|
|
12741
|
+
const resolved = path36.resolve(basePath, relativePath);
|
|
12742
|
+
if (!resolved.startsWith(basePath + path36.sep) && resolved !== basePath) {
|
|
12388
12743
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
12389
12744
|
}
|
|
12390
12745
|
return resolved;
|
|
@@ -12417,11 +12772,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
12417
12772
|
execute: async (input) => {
|
|
12418
12773
|
try {
|
|
12419
12774
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
12420
|
-
const
|
|
12421
|
-
if (
|
|
12775
|
+
const stat12 = await fs2.stat(resolved);
|
|
12776
|
+
if (stat12.isDirectory()) {
|
|
12422
12777
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
12423
12778
|
}
|
|
12424
|
-
const buffer = Buffer.alloc(Math.min(
|
|
12779
|
+
const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
|
|
12425
12780
|
const fd = await fs2.open(resolved, "r");
|
|
12426
12781
|
try {
|
|
12427
12782
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -12429,8 +12784,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
12429
12784
|
await fd.close();
|
|
12430
12785
|
}
|
|
12431
12786
|
const content = buffer.toString("utf-8");
|
|
12432
|
-
const truncated =
|
|
12433
|
-
return { content, truncated, size:
|
|
12787
|
+
const truncated = stat12.size > MAX_FILE_SIZE;
|
|
12788
|
+
return { content, truncated, size: stat12.size };
|
|
12434
12789
|
} catch (error) {
|
|
12435
12790
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
12436
12791
|
}
|
|
@@ -12474,15 +12829,15 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
12474
12829
|
for (const entry of entries) {
|
|
12475
12830
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
12476
12831
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
12477
|
-
const fullPath =
|
|
12832
|
+
const fullPath = path36.join(dirPath, entry.name);
|
|
12478
12833
|
if (entry.isDirectory()) {
|
|
12479
12834
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
12480
12835
|
} else if (entry.isFile()) {
|
|
12481
|
-
const ext =
|
|
12836
|
+
const ext = path36.extname(entry.name).toLowerCase();
|
|
12482
12837
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
12483
12838
|
try {
|
|
12484
|
-
const
|
|
12485
|
-
if (
|
|
12839
|
+
const stat12 = await fs2.stat(fullPath);
|
|
12840
|
+
if (stat12.size > MAX_FILE_SIZE) continue;
|
|
12486
12841
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
12487
12842
|
const lines = content.split("\n");
|
|
12488
12843
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -12490,7 +12845,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
12490
12845
|
regex.lastIndex = 0;
|
|
12491
12846
|
if (regex.test(lines[i])) {
|
|
12492
12847
|
matches.push({
|
|
12493
|
-
file:
|
|
12848
|
+
file: path36.relative(workspacePath, fullPath),
|
|
12494
12849
|
line: i + 1,
|
|
12495
12850
|
text: lines[i].substring(0, 200)
|
|
12496
12851
|
});
|
|
@@ -13123,115 +13478,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
13123
13478
|
* Evaluate a single field against the expected value.
|
|
13124
13479
|
*/
|
|
13125
13480
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
13126
|
-
const { path:
|
|
13127
|
-
const candidateValue = resolvePath(candidateData,
|
|
13128
|
-
const expectedValue = resolvePath(expectedData,
|
|
13481
|
+
const { path: path53, match, required = true, weight = 1 } = fieldConfig;
|
|
13482
|
+
const candidateValue = resolvePath(candidateData, path53);
|
|
13483
|
+
const expectedValue = resolvePath(expectedData, path53);
|
|
13129
13484
|
if (expectedValue === void 0) {
|
|
13130
13485
|
return {
|
|
13131
|
-
path:
|
|
13486
|
+
path: path53,
|
|
13132
13487
|
score: 1,
|
|
13133
13488
|
// No expected value means no comparison needed
|
|
13134
13489
|
weight,
|
|
13135
13490
|
hit: true,
|
|
13136
|
-
message: `${
|
|
13491
|
+
message: `${path53}: no expected value`
|
|
13137
13492
|
};
|
|
13138
13493
|
}
|
|
13139
13494
|
if (candidateValue === void 0) {
|
|
13140
13495
|
if (required) {
|
|
13141
13496
|
return {
|
|
13142
|
-
path:
|
|
13497
|
+
path: path53,
|
|
13143
13498
|
score: 0,
|
|
13144
13499
|
weight,
|
|
13145
13500
|
hit: false,
|
|
13146
|
-
message: `${
|
|
13501
|
+
message: `${path53} (required, missing)`
|
|
13147
13502
|
};
|
|
13148
13503
|
}
|
|
13149
13504
|
return {
|
|
13150
|
-
path:
|
|
13505
|
+
path: path53,
|
|
13151
13506
|
score: 1,
|
|
13152
13507
|
// Don't penalize missing optional fields
|
|
13153
13508
|
weight: 0,
|
|
13154
13509
|
// Zero weight means it won't affect the score
|
|
13155
13510
|
hit: true,
|
|
13156
|
-
message: `${
|
|
13511
|
+
message: `${path53}: optional field missing`
|
|
13157
13512
|
};
|
|
13158
13513
|
}
|
|
13159
13514
|
switch (match) {
|
|
13160
13515
|
case "exact":
|
|
13161
|
-
return this.compareExact(
|
|
13516
|
+
return this.compareExact(path53, candidateValue, expectedValue, weight);
|
|
13162
13517
|
case "numeric_tolerance":
|
|
13163
13518
|
return this.compareNumericTolerance(
|
|
13164
|
-
|
|
13519
|
+
path53,
|
|
13165
13520
|
candidateValue,
|
|
13166
13521
|
expectedValue,
|
|
13167
13522
|
fieldConfig,
|
|
13168
13523
|
weight
|
|
13169
13524
|
);
|
|
13170
13525
|
case "date":
|
|
13171
|
-
return this.compareDate(
|
|
13526
|
+
return this.compareDate(path53, candidateValue, expectedValue, fieldConfig, weight);
|
|
13172
13527
|
default:
|
|
13173
13528
|
return {
|
|
13174
|
-
path:
|
|
13529
|
+
path: path53,
|
|
13175
13530
|
score: 0,
|
|
13176
13531
|
weight,
|
|
13177
13532
|
hit: false,
|
|
13178
|
-
message: `${
|
|
13533
|
+
message: `${path53}: unknown match type "${match}"`
|
|
13179
13534
|
};
|
|
13180
13535
|
}
|
|
13181
13536
|
}
|
|
13182
13537
|
/**
|
|
13183
13538
|
* Exact equality comparison.
|
|
13184
13539
|
*/
|
|
13185
|
-
compareExact(
|
|
13540
|
+
compareExact(path53, candidateValue, expectedValue, weight) {
|
|
13186
13541
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
13187
13542
|
return {
|
|
13188
|
-
path:
|
|
13543
|
+
path: path53,
|
|
13189
13544
|
score: 1,
|
|
13190
13545
|
weight,
|
|
13191
13546
|
hit: true,
|
|
13192
|
-
message:
|
|
13547
|
+
message: path53
|
|
13193
13548
|
};
|
|
13194
13549
|
}
|
|
13195
13550
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
13196
13551
|
return {
|
|
13197
|
-
path:
|
|
13552
|
+
path: path53,
|
|
13198
13553
|
score: 0,
|
|
13199
13554
|
weight,
|
|
13200
13555
|
hit: false,
|
|
13201
|
-
message: `${
|
|
13556
|
+
message: `${path53} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
13202
13557
|
};
|
|
13203
13558
|
}
|
|
13204
13559
|
return {
|
|
13205
|
-
path:
|
|
13560
|
+
path: path53,
|
|
13206
13561
|
score: 0,
|
|
13207
13562
|
weight,
|
|
13208
13563
|
hit: false,
|
|
13209
|
-
message: `${
|
|
13564
|
+
message: `${path53} (value mismatch)`
|
|
13210
13565
|
};
|
|
13211
13566
|
}
|
|
13212
13567
|
/**
|
|
13213
13568
|
* Numeric comparison with absolute or relative tolerance.
|
|
13214
13569
|
*/
|
|
13215
|
-
compareNumericTolerance(
|
|
13570
|
+
compareNumericTolerance(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13216
13571
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
13217
13572
|
const candidateNum = toNumber(candidateValue);
|
|
13218
13573
|
const expectedNum = toNumber(expectedValue);
|
|
13219
13574
|
if (candidateNum === null || expectedNum === null) {
|
|
13220
13575
|
return {
|
|
13221
|
-
path:
|
|
13576
|
+
path: path53,
|
|
13222
13577
|
score: 0,
|
|
13223
13578
|
weight,
|
|
13224
13579
|
hit: false,
|
|
13225
|
-
message: `${
|
|
13580
|
+
message: `${path53} (non-numeric value)`
|
|
13226
13581
|
};
|
|
13227
13582
|
}
|
|
13228
13583
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
13229
13584
|
return {
|
|
13230
|
-
path:
|
|
13585
|
+
path: path53,
|
|
13231
13586
|
score: 0,
|
|
13232
13587
|
weight,
|
|
13233
13588
|
hit: false,
|
|
13234
|
-
message: `${
|
|
13589
|
+
message: `${path53} (invalid numeric value)`
|
|
13235
13590
|
};
|
|
13236
13591
|
}
|
|
13237
13592
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -13244,61 +13599,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
13244
13599
|
}
|
|
13245
13600
|
if (withinTolerance) {
|
|
13246
13601
|
return {
|
|
13247
|
-
path:
|
|
13602
|
+
path: path53,
|
|
13248
13603
|
score: 1,
|
|
13249
13604
|
weight,
|
|
13250
13605
|
hit: true,
|
|
13251
|
-
message: `${
|
|
13606
|
+
message: `${path53} (within tolerance: diff=${diff.toFixed(2)})`
|
|
13252
13607
|
};
|
|
13253
13608
|
}
|
|
13254
13609
|
return {
|
|
13255
|
-
path:
|
|
13610
|
+
path: path53,
|
|
13256
13611
|
score: 0,
|
|
13257
13612
|
weight,
|
|
13258
13613
|
hit: false,
|
|
13259
|
-
message: `${
|
|
13614
|
+
message: `${path53} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
13260
13615
|
};
|
|
13261
13616
|
}
|
|
13262
13617
|
/**
|
|
13263
13618
|
* Date comparison with format normalization.
|
|
13264
13619
|
*/
|
|
13265
|
-
compareDate(
|
|
13620
|
+
compareDate(path53, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13266
13621
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
13267
13622
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
13268
13623
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
13269
13624
|
if (candidateDate === null) {
|
|
13270
13625
|
return {
|
|
13271
|
-
path:
|
|
13626
|
+
path: path53,
|
|
13272
13627
|
score: 0,
|
|
13273
13628
|
weight,
|
|
13274
13629
|
hit: false,
|
|
13275
|
-
message: `${
|
|
13630
|
+
message: `${path53} (unparseable candidate date)`
|
|
13276
13631
|
};
|
|
13277
13632
|
}
|
|
13278
13633
|
if (expectedDate === null) {
|
|
13279
13634
|
return {
|
|
13280
|
-
path:
|
|
13635
|
+
path: path53,
|
|
13281
13636
|
score: 0,
|
|
13282
13637
|
weight,
|
|
13283
13638
|
hit: false,
|
|
13284
|
-
message: `${
|
|
13639
|
+
message: `${path53} (unparseable expected date)`
|
|
13285
13640
|
};
|
|
13286
13641
|
}
|
|
13287
13642
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
13288
13643
|
return {
|
|
13289
|
-
path:
|
|
13644
|
+
path: path53,
|
|
13290
13645
|
score: 1,
|
|
13291
13646
|
weight,
|
|
13292
13647
|
hit: true,
|
|
13293
|
-
message:
|
|
13648
|
+
message: path53
|
|
13294
13649
|
};
|
|
13295
13650
|
}
|
|
13296
13651
|
return {
|
|
13297
|
-
path:
|
|
13652
|
+
path: path53,
|
|
13298
13653
|
score: 0,
|
|
13299
13654
|
weight,
|
|
13300
13655
|
hit: false,
|
|
13301
|
-
message: `${
|
|
13656
|
+
message: `${path53} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
13302
13657
|
};
|
|
13303
13658
|
}
|
|
13304
13659
|
/**
|
|
@@ -13331,11 +13686,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
13331
13686
|
};
|
|
13332
13687
|
}
|
|
13333
13688
|
};
|
|
13334
|
-
function resolvePath(obj,
|
|
13335
|
-
if (!
|
|
13689
|
+
function resolvePath(obj, path53) {
|
|
13690
|
+
if (!path53 || !obj) {
|
|
13336
13691
|
return void 0;
|
|
13337
13692
|
}
|
|
13338
|
-
const parts =
|
|
13693
|
+
const parts = path53.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
13339
13694
|
let current = obj;
|
|
13340
13695
|
for (const part of parts) {
|
|
13341
13696
|
if (current === null || current === void 0) {
|
|
@@ -13827,8 +14182,8 @@ var TokenUsageEvaluator = class {
|
|
|
13827
14182
|
};
|
|
13828
14183
|
|
|
13829
14184
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
13830
|
-
function getNestedValue(obj,
|
|
13831
|
-
const parts =
|
|
14185
|
+
function getNestedValue(obj, path53) {
|
|
14186
|
+
const parts = path53.split(".");
|
|
13832
14187
|
let current = obj;
|
|
13833
14188
|
for (const part of parts) {
|
|
13834
14189
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -14451,7 +14806,7 @@ function runEqualsAssertion(output, value) {
|
|
|
14451
14806
|
import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
|
|
14452
14807
|
import { existsSync as existsSync5 } from "node:fs";
|
|
14453
14808
|
import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir7, stat as stat8 } from "node:fs/promises";
|
|
14454
|
-
import
|
|
14809
|
+
import path45 from "node:path";
|
|
14455
14810
|
import micromatch3 from "micromatch";
|
|
14456
14811
|
|
|
14457
14812
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -14665,7 +15020,7 @@ var InlineAssertEvaluator = class {
|
|
|
14665
15020
|
};
|
|
14666
15021
|
|
|
14667
15022
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
14668
|
-
import
|
|
15023
|
+
import path37 from "node:path";
|
|
14669
15024
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
14670
15025
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
14671
15026
|
if (!context) {
|
|
@@ -14694,6 +15049,15 @@ async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
|
14694
15049
|
}
|
|
14695
15050
|
return void 0;
|
|
14696
15051
|
}
|
|
15052
|
+
function containsTemplateVariables(text) {
|
|
15053
|
+
const variablePattern = /\{\{\s*([a-zA-Z0-9_]+)\s*\}\}/g;
|
|
15054
|
+
for (const match of text.matchAll(variablePattern)) {
|
|
15055
|
+
if (VALID_TEMPLATE_VARIABLES.has(match[1])) {
|
|
15056
|
+
return true;
|
|
15057
|
+
}
|
|
15058
|
+
}
|
|
15059
|
+
return false;
|
|
15060
|
+
}
|
|
14697
15061
|
async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
14698
15062
|
const payload = {
|
|
14699
15063
|
criteria: context.evalCase.criteria,
|
|
@@ -14708,7 +15072,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
14708
15072
|
};
|
|
14709
15073
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
14710
15074
|
const scriptPath = script[script.length - 1];
|
|
14711
|
-
const cwd =
|
|
15075
|
+
const cwd = path37.dirname(scriptPath);
|
|
14712
15076
|
try {
|
|
14713
15077
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
14714
15078
|
const prompt = stdout.trim();
|
|
@@ -14766,9 +15130,20 @@ var llmGraderFactory = (config, context) => {
|
|
|
14766
15130
|
},
|
|
14767
15131
|
agentTimeoutMs
|
|
14768
15132
|
);
|
|
15133
|
+
const isFromInlinePrompt = !c.resolvedPromptScript?.length && !c.resolvedPromptPath && !c.promptPath;
|
|
15134
|
+
let evaluatorTemplateOverride;
|
|
15135
|
+
let evalCase = evalContext.evalCase;
|
|
15136
|
+
if (customPrompt) {
|
|
15137
|
+
if (!isFromInlinePrompt || containsTemplateVariables(customPrompt)) {
|
|
15138
|
+
evaluatorTemplateOverride = customPrompt;
|
|
15139
|
+
} else {
|
|
15140
|
+
evalCase = { ...evalCase, criteria: customPrompt };
|
|
15141
|
+
}
|
|
15142
|
+
}
|
|
14769
15143
|
return evaluator.evaluate({
|
|
14770
15144
|
...evalContext,
|
|
14771
|
-
|
|
15145
|
+
evalCase,
|
|
15146
|
+
evaluatorTemplateOverride,
|
|
14772
15147
|
evaluator: c
|
|
14773
15148
|
});
|
|
14774
15149
|
}
|
|
@@ -14980,16 +15355,16 @@ function createBuiltinRegistry() {
|
|
|
14980
15355
|
}
|
|
14981
15356
|
|
|
14982
15357
|
// src/evaluation/registry/assertion-discovery.ts
|
|
14983
|
-
import
|
|
15358
|
+
import path38 from "node:path";
|
|
14984
15359
|
import fg2 from "fast-glob";
|
|
14985
15360
|
async function discoverAssertions(registry, baseDir) {
|
|
14986
15361
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
14987
15362
|
const candidateDirs = [];
|
|
14988
|
-
let dir =
|
|
14989
|
-
const root =
|
|
15363
|
+
let dir = path38.resolve(baseDir);
|
|
15364
|
+
const root = path38.parse(dir).root;
|
|
14990
15365
|
while (dir !== root) {
|
|
14991
|
-
candidateDirs.push(
|
|
14992
|
-
dir =
|
|
15366
|
+
candidateDirs.push(path38.join(dir, ".agentv", "assertions"));
|
|
15367
|
+
dir = path38.dirname(dir);
|
|
14993
15368
|
}
|
|
14994
15369
|
let files = [];
|
|
14995
15370
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -15005,7 +15380,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
15005
15380
|
}
|
|
15006
15381
|
const discoveredTypes = [];
|
|
15007
15382
|
for (const filePath of files) {
|
|
15008
|
-
const basename =
|
|
15383
|
+
const basename = path38.basename(filePath);
|
|
15009
15384
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
15010
15385
|
if (registry.has(typeName)) {
|
|
15011
15386
|
continue;
|
|
@@ -15023,17 +15398,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
15023
15398
|
}
|
|
15024
15399
|
|
|
15025
15400
|
// src/evaluation/registry/grader-discovery.ts
|
|
15026
|
-
import
|
|
15401
|
+
import path39 from "node:path";
|
|
15027
15402
|
import fg3 from "fast-glob";
|
|
15028
15403
|
async function discoverGraders(registry, baseDir) {
|
|
15029
15404
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
15030
15405
|
const candidateDirs = [];
|
|
15031
|
-
let dir =
|
|
15032
|
-
const root =
|
|
15406
|
+
let dir = path39.resolve(baseDir);
|
|
15407
|
+
const root = path39.parse(dir).root;
|
|
15033
15408
|
while (dir !== root) {
|
|
15034
|
-
candidateDirs.push(
|
|
15035
|
-
candidateDirs.push(
|
|
15036
|
-
dir =
|
|
15409
|
+
candidateDirs.push(path39.join(dir, ".agentv", "graders"));
|
|
15410
|
+
candidateDirs.push(path39.join(dir, ".agentv", "judges"));
|
|
15411
|
+
dir = path39.dirname(dir);
|
|
15037
15412
|
}
|
|
15038
15413
|
let files = [];
|
|
15039
15414
|
for (const gradersDir of candidateDirs) {
|
|
@@ -15049,7 +15424,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
15049
15424
|
}
|
|
15050
15425
|
const discoveredTypes = [];
|
|
15051
15426
|
for (const filePath of files) {
|
|
15052
|
-
const basename =
|
|
15427
|
+
const basename = path39.basename(filePath);
|
|
15053
15428
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
15054
15429
|
if (registry.has(typeName)) {
|
|
15055
15430
|
continue;
|
|
@@ -15209,7 +15584,7 @@ function getTCritical(df) {
|
|
|
15209
15584
|
// src/evaluation/workspace/file-changes.ts
|
|
15210
15585
|
import { exec as execCallback } from "node:child_process";
|
|
15211
15586
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
15212
|
-
import
|
|
15587
|
+
import path40 from "node:path";
|
|
15213
15588
|
import { promisify as promisify4 } from "node:util";
|
|
15214
15589
|
var execAsync4 = promisify4(execCallback);
|
|
15215
15590
|
function gitExecOpts(workspacePath) {
|
|
@@ -15243,10 +15618,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
15243
15618
|
}
|
|
15244
15619
|
for (const entry of entries) {
|
|
15245
15620
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
15246
|
-
const childPath =
|
|
15621
|
+
const childPath = path40.join(workspacePath, entry);
|
|
15247
15622
|
try {
|
|
15248
15623
|
if (!statSync(childPath).isDirectory()) continue;
|
|
15249
|
-
if (!statSync(
|
|
15624
|
+
if (!statSync(path40.join(childPath, ".git")).isDirectory()) continue;
|
|
15250
15625
|
} catch {
|
|
15251
15626
|
continue;
|
|
15252
15627
|
}
|
|
@@ -15257,7 +15632,7 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
15257
15632
|
|
|
15258
15633
|
// src/evaluation/workspace/manager.ts
|
|
15259
15634
|
import { cp, mkdir as mkdir12, readdir as readdir4, rm as rm4, stat as stat6 } from "node:fs/promises";
|
|
15260
|
-
import
|
|
15635
|
+
import path41 from "node:path";
|
|
15261
15636
|
var TemplateNotFoundError = class extends Error {
|
|
15262
15637
|
constructor(templatePath) {
|
|
15263
15638
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -15287,14 +15662,14 @@ async function isDirectory(filePath) {
|
|
|
15287
15662
|
}
|
|
15288
15663
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
15289
15664
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
15290
|
-
return
|
|
15665
|
+
return path41.join(root, evalRunId, caseId);
|
|
15291
15666
|
}
|
|
15292
15667
|
async function copyDirectoryRecursive(src, dest) {
|
|
15293
15668
|
await mkdir12(dest, { recursive: true });
|
|
15294
15669
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
15295
15670
|
for (const entry of entries) {
|
|
15296
|
-
const srcPath =
|
|
15297
|
-
const destPath =
|
|
15671
|
+
const srcPath = path41.join(src, entry.name);
|
|
15672
|
+
const destPath = path41.join(dest, entry.name);
|
|
15298
15673
|
if (entry.name === ".git") {
|
|
15299
15674
|
continue;
|
|
15300
15675
|
}
|
|
@@ -15306,7 +15681,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
15306
15681
|
}
|
|
15307
15682
|
}
|
|
15308
15683
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
15309
|
-
const resolvedTemplatePath =
|
|
15684
|
+
const resolvedTemplatePath = path41.resolve(templatePath);
|
|
15310
15685
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
15311
15686
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
15312
15687
|
}
|
|
@@ -15355,7 +15730,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
15355
15730
|
}
|
|
15356
15731
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
15357
15732
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
15358
|
-
const evalDir =
|
|
15733
|
+
const evalDir = path41.join(root, evalRunId);
|
|
15359
15734
|
if (await fileExists(evalDir)) {
|
|
15360
15735
|
await rm4(evalDir, { recursive: true, force: true });
|
|
15361
15736
|
}
|
|
@@ -15365,8 +15740,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
15365
15740
|
import { execFile } from "node:child_process";
|
|
15366
15741
|
import { createHash } from "node:crypto";
|
|
15367
15742
|
import { existsSync as existsSync3 } from "node:fs";
|
|
15368
|
-
import { cp as cp2, mkdir as mkdir13, readFile as
|
|
15369
|
-
import
|
|
15743
|
+
import { cp as cp2, mkdir as mkdir13, readFile as readFile14, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
15744
|
+
import path42 from "node:path";
|
|
15370
15745
|
import { promisify as promisify5 } from "node:util";
|
|
15371
15746
|
var execFileAsync = promisify5(execFile);
|
|
15372
15747
|
function gitEnv() {
|
|
@@ -15393,12 +15768,14 @@ async function git(args, opts) {
|
|
|
15393
15768
|
return stdout.trim();
|
|
15394
15769
|
}
|
|
15395
15770
|
function normalizeRepoForFingerprint(repo) {
|
|
15396
|
-
const
|
|
15397
|
-
|
|
15398
|
-
path
|
|
15399
|
-
|
|
15400
|
-
|
|
15401
|
-
|
|
15771
|
+
const result = {};
|
|
15772
|
+
if (repo.path) {
|
|
15773
|
+
result.path = repo.path;
|
|
15774
|
+
}
|
|
15775
|
+
if (repo.source) {
|
|
15776
|
+
result.source = repo.source.type === "git" ? { type: "git", url: repo.source.url.toLowerCase().replace(/\.git$/, "") } : { type: "local", path: repo.source.path };
|
|
15777
|
+
}
|
|
15778
|
+
result.ref = getRepoCheckoutRef(repo.checkout);
|
|
15402
15779
|
if (repo.clone?.depth !== void 0) {
|
|
15403
15780
|
result.depth = repo.clone.depth;
|
|
15404
15781
|
}
|
|
@@ -15412,7 +15789,7 @@ function normalizeRepoForFingerprint(repo) {
|
|
|
15412
15789
|
}
|
|
15413
15790
|
function computeWorkspaceFingerprint(repos) {
|
|
15414
15791
|
const canonical = {
|
|
15415
|
-
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
15792
|
+
repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
|
|
15416
15793
|
};
|
|
15417
15794
|
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
15418
15795
|
}
|
|
@@ -15420,8 +15797,8 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
15420
15797
|
await mkdir13(dest, { recursive: true });
|
|
15421
15798
|
const entries = await readdir5(src, { withFileTypes: true });
|
|
15422
15799
|
for (const entry of entries) {
|
|
15423
|
-
const srcPath =
|
|
15424
|
-
const destPath =
|
|
15800
|
+
const srcPath = path42.join(src, entry.name);
|
|
15801
|
+
const destPath = path42.join(dest, entry.name);
|
|
15425
15802
|
if (entry.name === ".git") {
|
|
15426
15803
|
continue;
|
|
15427
15804
|
}
|
|
@@ -15454,7 +15831,7 @@ var WorkspacePoolManager = class {
|
|
|
15454
15831
|
async acquireWorkspace(options) {
|
|
15455
15832
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
15456
15833
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
15457
|
-
const poolDir =
|
|
15834
|
+
const poolDir = path42.join(this.poolRoot, fingerprint);
|
|
15458
15835
|
await mkdir13(poolDir, { recursive: true });
|
|
15459
15836
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
15460
15837
|
if (drifted) {
|
|
@@ -15464,7 +15841,7 @@ var WorkspacePoolManager = class {
|
|
|
15464
15841
|
await this.removeAllSlots(poolDir);
|
|
15465
15842
|
}
|
|
15466
15843
|
for (let i = 0; i < maxSlots; i++) {
|
|
15467
|
-
const slotPath =
|
|
15844
|
+
const slotPath = path42.join(poolDir, `slot-${i}`);
|
|
15468
15845
|
const lockPath = `${slotPath}.lock`;
|
|
15469
15846
|
const locked = await this.tryLock(lockPath);
|
|
15470
15847
|
if (!locked) {
|
|
@@ -15526,7 +15903,7 @@ var WorkspacePoolManager = class {
|
|
|
15526
15903
|
throw err;
|
|
15527
15904
|
}
|
|
15528
15905
|
try {
|
|
15529
|
-
const pidStr = await
|
|
15906
|
+
const pidStr = await readFile14(lockPath, "utf-8");
|
|
15530
15907
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15531
15908
|
if (!Number.isNaN(pid)) {
|
|
15532
15909
|
try {
|
|
@@ -15551,9 +15928,9 @@ var WorkspacePoolManager = class {
|
|
|
15551
15928
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
15552
15929
|
*/
|
|
15553
15930
|
async checkDrift(poolDir, fingerprint) {
|
|
15554
|
-
const metadataPath =
|
|
15931
|
+
const metadataPath = path42.join(poolDir, "metadata.json");
|
|
15555
15932
|
try {
|
|
15556
|
-
const raw = await
|
|
15933
|
+
const raw = await readFile14(metadataPath, "utf-8");
|
|
15557
15934
|
const metadata = JSON.parse(raw);
|
|
15558
15935
|
return metadata.fingerprint !== fingerprint;
|
|
15559
15936
|
} catch {
|
|
@@ -15568,17 +15945,17 @@ var WorkspacePoolManager = class {
|
|
|
15568
15945
|
repos,
|
|
15569
15946
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
15570
15947
|
};
|
|
15571
|
-
await writeFile7(
|
|
15948
|
+
await writeFile7(path42.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
15572
15949
|
}
|
|
15573
15950
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
15574
15951
|
async removeAllSlots(poolDir) {
|
|
15575
15952
|
const entries = await readdir5(poolDir);
|
|
15576
15953
|
for (const entry of entries) {
|
|
15577
15954
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
15578
|
-
const lockPath =
|
|
15955
|
+
const lockPath = path42.join(poolDir, `${entry}.lock`);
|
|
15579
15956
|
if (existsSync3(lockPath)) {
|
|
15580
15957
|
try {
|
|
15581
|
-
const pidStr = await
|
|
15958
|
+
const pidStr = await readFile14(lockPath, "utf-8");
|
|
15582
15959
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15583
15960
|
if (!Number.isNaN(pid)) {
|
|
15584
15961
|
try {
|
|
@@ -15591,12 +15968,12 @@ var WorkspacePoolManager = class {
|
|
|
15591
15968
|
} catch {
|
|
15592
15969
|
}
|
|
15593
15970
|
}
|
|
15594
|
-
await rm5(
|
|
15971
|
+
await rm5(path42.join(poolDir, entry), { recursive: true, force: true });
|
|
15595
15972
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
15596
15973
|
});
|
|
15597
15974
|
}
|
|
15598
15975
|
}
|
|
15599
|
-
await rm5(
|
|
15976
|
+
await rm5(path42.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
15600
15977
|
});
|
|
15601
15978
|
}
|
|
15602
15979
|
/**
|
|
@@ -15606,14 +15983,15 @@ var WorkspacePoolManager = class {
|
|
|
15606
15983
|
*/
|
|
15607
15984
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
15608
15985
|
for (const repo of repos) {
|
|
15609
|
-
|
|
15986
|
+
if (!repo.path || !repo.source) continue;
|
|
15987
|
+
const repoDir = path42.join(slotPath, repo.path);
|
|
15610
15988
|
if (!existsSync3(repoDir)) {
|
|
15611
15989
|
continue;
|
|
15612
15990
|
}
|
|
15613
15991
|
if (poolReset === "none") {
|
|
15614
15992
|
continue;
|
|
15615
15993
|
}
|
|
15616
|
-
const ref = repo.checkout
|
|
15994
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
15617
15995
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
15618
15996
|
if (resolve === "remote") {
|
|
15619
15997
|
const fetchArgs = ["fetch", "origin", ref];
|
|
@@ -15630,8 +16008,8 @@ var WorkspacePoolManager = class {
|
|
|
15630
16008
|
}
|
|
15631
16009
|
if (templatePath) {
|
|
15632
16010
|
const repoDirNames = new Set(
|
|
15633
|
-
repos.map((r) => {
|
|
15634
|
-
const normalized = r.path.replace(/^\.\//, "");
|
|
16011
|
+
repos.filter((r) => r.path).map((r) => {
|
|
16012
|
+
const normalized = (r.path ?? "").replace(/^\.\//, "");
|
|
15635
16013
|
return normalized.split("/")[0];
|
|
15636
16014
|
})
|
|
15637
16015
|
);
|
|
@@ -15643,7 +16021,7 @@ var WorkspacePoolManager = class {
|
|
|
15643
16021
|
// src/evaluation/workspace/repo-manager.ts
|
|
15644
16022
|
import { execFile as execFile2 } from "node:child_process";
|
|
15645
16023
|
import { existsSync as existsSync4 } from "node:fs";
|
|
15646
|
-
import
|
|
16024
|
+
import path43 from "node:path";
|
|
15647
16025
|
import { promisify as promisify6 } from "node:util";
|
|
15648
16026
|
var execFileAsync2 = promisify6(execFile2);
|
|
15649
16027
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -15686,17 +16064,17 @@ var RepoManager = class {
|
|
|
15686
16064
|
static validateLocalPaths(repos) {
|
|
15687
16065
|
const errors = [];
|
|
15688
16066
|
for (const repo of repos) {
|
|
15689
|
-
if (repo.source.type !== "local") continue;
|
|
16067
|
+
if (!repo.source || repo.source.type !== "local") continue;
|
|
15690
16068
|
const sourcePath = repo.source.path;
|
|
15691
16069
|
if (!sourcePath || sourcePath.trim() === "") {
|
|
15692
16070
|
errors.push({
|
|
15693
|
-
repoPath: repo.path,
|
|
16071
|
+
repoPath: repo.path ?? "(none)",
|
|
15694
16072
|
resolvedSourcePath: sourcePath ?? "",
|
|
15695
16073
|
reason: "empty_path"
|
|
15696
16074
|
});
|
|
15697
16075
|
} else if (!existsSync4(sourcePath)) {
|
|
15698
16076
|
errors.push({
|
|
15699
|
-
repoPath: repo.path,
|
|
16077
|
+
repoPath: repo.path ?? "(none)",
|
|
15700
16078
|
resolvedSourcePath: sourcePath,
|
|
15701
16079
|
reason: "not_found"
|
|
15702
16080
|
});
|
|
@@ -15743,7 +16121,13 @@ ${lines.join("\n")}`;
|
|
|
15743
16121
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
15744
16122
|
*/
|
|
15745
16123
|
async materialize(repo, workspacePath) {
|
|
15746
|
-
|
|
16124
|
+
if (!repo.source || !repo.path) {
|
|
16125
|
+
if (this.verbose) {
|
|
16126
|
+
console.log(`[repo] materialize skip path=${repo.path ?? "(none)"} (no source or path)`);
|
|
16127
|
+
}
|
|
16128
|
+
return;
|
|
16129
|
+
}
|
|
16130
|
+
const targetDir = path43.join(workspacePath, repo.path);
|
|
15747
16131
|
const sourceUrl = getSourceUrl(repo.source);
|
|
15748
16132
|
const startedAt = Date.now();
|
|
15749
16133
|
if (this.verbose) {
|
|
@@ -15766,7 +16150,7 @@ ${lines.join("\n")}`;
|
|
|
15766
16150
|
await this.runGit(["sparse-checkout", "init", "--cone"], { cwd: targetDir });
|
|
15767
16151
|
await this.runGit(["sparse-checkout", "set", ...repo.clone.sparse], { cwd: targetDir });
|
|
15768
16152
|
}
|
|
15769
|
-
const ref = repo.checkout
|
|
16153
|
+
const ref = getRepoCheckoutRef(repo.checkout);
|
|
15770
16154
|
const resolve = repo.checkout?.resolve ?? "remote";
|
|
15771
16155
|
let resolvedSha;
|
|
15772
16156
|
if (resolve === "remote" && repo.source.type === "git") {
|
|
@@ -15818,23 +16202,27 @@ ${lines.join("\n")}`;
|
|
|
15818
16202
|
);
|
|
15819
16203
|
}
|
|
15820
16204
|
}
|
|
15821
|
-
/** Materialize all repos into the workspace. */
|
|
16205
|
+
/** Materialize all repos into the workspace. Skips repos without source (Docker-only repos). */
|
|
15822
16206
|
async materializeAll(repos, workspacePath) {
|
|
16207
|
+
const materializableRepos = repos.filter((r) => r.source);
|
|
15823
16208
|
if (this.verbose) {
|
|
15824
|
-
console.log(
|
|
16209
|
+
console.log(
|
|
16210
|
+
`[repo] materializeAll count=${materializableRepos.length} (${repos.length - materializableRepos.length} skipped, no source) workspace=${workspacePath}`
|
|
16211
|
+
);
|
|
15825
16212
|
}
|
|
15826
|
-
for (const repo of
|
|
16213
|
+
for (const repo of materializableRepos) {
|
|
15827
16214
|
await this.materialize(repo, workspacePath);
|
|
15828
16215
|
}
|
|
15829
16216
|
if (this.verbose) {
|
|
15830
16217
|
console.log("[repo] materializeAll complete");
|
|
15831
16218
|
}
|
|
15832
16219
|
}
|
|
15833
|
-
/** Reset repos in workspace to their checkout state. */
|
|
16220
|
+
/** Reset repos in workspace to their checkout state. Skips repos without path or source. */
|
|
15834
16221
|
async reset(repos, workspacePath, reset) {
|
|
15835
16222
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
15836
16223
|
for (const repo of repos) {
|
|
15837
|
-
|
|
16224
|
+
if (!repo.path || !repo.source) continue;
|
|
16225
|
+
const targetDir = path43.join(workspacePath, repo.path);
|
|
15838
16226
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
15839
16227
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
15840
16228
|
}
|
|
@@ -15843,16 +16231,16 @@ ${lines.join("\n")}`;
|
|
|
15843
16231
|
|
|
15844
16232
|
// src/evaluation/workspace/resolve.ts
|
|
15845
16233
|
import { readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
15846
|
-
import
|
|
16234
|
+
import path44 from "node:path";
|
|
15847
16235
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
15848
16236
|
if (!templatePath) {
|
|
15849
16237
|
return void 0;
|
|
15850
16238
|
}
|
|
15851
|
-
const resolved =
|
|
16239
|
+
const resolved = path44.resolve(templatePath);
|
|
15852
16240
|
const stats = await stat7(resolved);
|
|
15853
16241
|
if (stats.isFile()) {
|
|
15854
16242
|
return {
|
|
15855
|
-
dir:
|
|
16243
|
+
dir: path44.dirname(resolved),
|
|
15856
16244
|
workspaceFile: resolved
|
|
15857
16245
|
};
|
|
15858
16246
|
}
|
|
@@ -15864,14 +16252,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
15864
16252
|
if (workspaceFiles.length === 1) {
|
|
15865
16253
|
return {
|
|
15866
16254
|
dir: resolved,
|
|
15867
|
-
workspaceFile:
|
|
16255
|
+
workspaceFile: path44.join(resolved, workspaceFiles[0])
|
|
15868
16256
|
};
|
|
15869
16257
|
}
|
|
15870
16258
|
if (workspaceFiles.length > 1) {
|
|
15871
16259
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
15872
16260
|
return {
|
|
15873
16261
|
dir: resolved,
|
|
15874
|
-
workspaceFile: conventionFile ?
|
|
16262
|
+
workspaceFile: conventionFile ? path44.join(resolved, conventionFile) : void 0
|
|
15875
16263
|
};
|
|
15876
16264
|
}
|
|
15877
16265
|
return { dir: resolved };
|
|
@@ -16090,7 +16478,7 @@ async function runEvaluation(options) {
|
|
|
16090
16478
|
];
|
|
16091
16479
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
16092
16480
|
const typeRegistry = createBuiltinRegistry();
|
|
16093
|
-
const discoveryBaseDir = evalFilePath ?
|
|
16481
|
+
const discoveryBaseDir = evalFilePath ? path45.dirname(path45.resolve(evalFilePath)) : process.cwd();
|
|
16094
16482
|
const evalDir = discoveryBaseDir;
|
|
16095
16483
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
16096
16484
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -16157,7 +16545,8 @@ async function runEvaluation(options) {
|
|
|
16157
16545
|
for (const ec of filteredEvalCases) {
|
|
16158
16546
|
if (ec.workspace?.repos) {
|
|
16159
16547
|
for (const repo of ec.workspace.repos) {
|
|
16160
|
-
|
|
16548
|
+
if (!repo.source) continue;
|
|
16549
|
+
const key = `${repo.path ?? ""}::${repo.source.type === "local" ? repo.source.path : ""}`;
|
|
16161
16550
|
if (!allRepos.has(key)) {
|
|
16162
16551
|
allRepos.set(key, repo);
|
|
16163
16552
|
}
|
|
@@ -16170,7 +16559,7 @@ async function runEvaluation(options) {
|
|
|
16170
16559
|
const message = RepoManager.formatValidationErrors(localPathErrors);
|
|
16171
16560
|
console.warn(`Warning: ${message}`);
|
|
16172
16561
|
const invalidLocalRepoPaths = new Set(localPathErrors.map((e) => e.repoPath));
|
|
16173
|
-
if (suiteWorkspace?.repos?.some((r) => invalidLocalRepoPaths.has(r.path))) {
|
|
16562
|
+
if (suiteWorkspace?.repos?.some((r) => r.path && invalidLocalRepoPaths.has(r.path))) {
|
|
16174
16563
|
throw new Error(message);
|
|
16175
16564
|
}
|
|
16176
16565
|
}
|
|
@@ -16288,7 +16677,7 @@ async function runEvaluation(options) {
|
|
|
16288
16677
|
}
|
|
16289
16678
|
try {
|
|
16290
16679
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
16291
|
-
const copiedWorkspaceFile =
|
|
16680
|
+
const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
|
|
16292
16681
|
try {
|
|
16293
16682
|
await stat8(copiedWorkspaceFile);
|
|
16294
16683
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -16303,7 +16692,8 @@ async function runEvaluation(options) {
|
|
|
16303
16692
|
try {
|
|
16304
16693
|
if (needsPerRepoCheck) {
|
|
16305
16694
|
for (const repo of suiteWorkspace.repos) {
|
|
16306
|
-
|
|
16695
|
+
if (!repo.path || !repo.source) continue;
|
|
16696
|
+
const targetDir = path45.join(sharedWorkspacePath, repo.path);
|
|
16307
16697
|
if (existsSync5(targetDir)) {
|
|
16308
16698
|
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
16309
16699
|
continue;
|
|
@@ -16327,6 +16717,19 @@ async function runEvaluation(options) {
|
|
|
16327
16717
|
throw new Error(`Failed to materialize repos: ${message}`);
|
|
16328
16718
|
}
|
|
16329
16719
|
}
|
|
16720
|
+
const suiteDockerConfig = suiteWorkspace?.docker;
|
|
16721
|
+
if (suiteDockerConfig) {
|
|
16722
|
+
setupLog(`pulling Docker image: ${suiteDockerConfig.image}`);
|
|
16723
|
+
const { DockerWorkspaceProvider: DockerWorkspaceProvider2 } = await import("./docker-workspace-RPPXBT27.js");
|
|
16724
|
+
const dockerSetup = new DockerWorkspaceProvider2(suiteDockerConfig);
|
|
16725
|
+
if (!await dockerSetup.isDockerAvailable()) {
|
|
16726
|
+
throw new Error(
|
|
16727
|
+
"Docker workspace configured but Docker CLI is not available. Install Docker and ensure it is running."
|
|
16728
|
+
);
|
|
16729
|
+
}
|
|
16730
|
+
await dockerSetup.pullImage();
|
|
16731
|
+
setupLog("Docker image pull complete");
|
|
16732
|
+
}
|
|
16330
16733
|
const suiteHooksEnabled = hooksEnabled(suiteWorkspace);
|
|
16331
16734
|
const suiteBeforeAllHook = suiteWorkspace?.hooks?.before_all;
|
|
16332
16735
|
if (sharedWorkspacePath && suiteHooksEnabled && hasHookCommand(suiteBeforeAllHook)) {
|
|
@@ -16687,11 +17090,9 @@ async function runBatchEvaluation(options) {
|
|
|
16687
17090
|
const promptInputs = promptInputsList[index];
|
|
16688
17091
|
return {
|
|
16689
17092
|
question: promptInputs.question,
|
|
17093
|
+
systemPrompt: promptInputs.systemMessage,
|
|
16690
17094
|
inputFiles: evalCase.file_paths,
|
|
16691
|
-
evalCaseId: evalCase.id
|
|
16692
|
-
metadata: {
|
|
16693
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
16694
|
-
}
|
|
17095
|
+
evalCaseId: evalCase.id
|
|
16695
17096
|
};
|
|
16696
17097
|
});
|
|
16697
17098
|
const batchResponse = await provider.invokeBatch?.(batchRequests);
|
|
@@ -16890,7 +17291,7 @@ async function runEvalCase(options) {
|
|
|
16890
17291
|
);
|
|
16891
17292
|
}
|
|
16892
17293
|
if (caseWorkspaceFile && workspacePath) {
|
|
16893
|
-
const copiedFile =
|
|
17294
|
+
const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
|
|
16894
17295
|
try {
|
|
16895
17296
|
await stat8(copiedFile);
|
|
16896
17297
|
caseWorkspaceFile = copiedFile;
|
|
@@ -16952,10 +17353,10 @@ async function runEvalCase(options) {
|
|
|
16952
17353
|
const files = evalCase.metadata.agent_skills_files;
|
|
16953
17354
|
if (baseDir && files.length > 0) {
|
|
16954
17355
|
for (const relPath of files) {
|
|
16955
|
-
const srcPath =
|
|
16956
|
-
const destPath =
|
|
17356
|
+
const srcPath = path45.resolve(baseDir, relPath);
|
|
17357
|
+
const destPath = path45.resolve(workspacePath, relPath);
|
|
16957
17358
|
try {
|
|
16958
|
-
await mkdir14(
|
|
17359
|
+
await mkdir14(path45.dirname(destPath), { recursive: true });
|
|
16959
17360
|
await copyFile2(srcPath, destPath);
|
|
16960
17361
|
} catch (error) {
|
|
16961
17362
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -17222,6 +17623,7 @@ async function runEvalCase(options) {
|
|
|
17222
17623
|
availableTargets,
|
|
17223
17624
|
fileChanges,
|
|
17224
17625
|
workspacePath,
|
|
17626
|
+
dockerConfig: evalCase.workspace?.docker,
|
|
17225
17627
|
verbose,
|
|
17226
17628
|
threshold: evalCase.threshold ?? caseThreshold
|
|
17227
17629
|
});
|
|
@@ -17415,6 +17817,7 @@ async function evaluateCandidate(options) {
|
|
|
17415
17817
|
availableTargets,
|
|
17416
17818
|
fileChanges,
|
|
17417
17819
|
workspacePath,
|
|
17820
|
+
dockerConfig,
|
|
17418
17821
|
threshold: evalThreshold
|
|
17419
17822
|
} = options;
|
|
17420
17823
|
const gradeTimestamp = nowFn();
|
|
@@ -17441,6 +17844,7 @@ async function evaluateCandidate(options) {
|
|
|
17441
17844
|
availableTargets,
|
|
17442
17845
|
fileChanges,
|
|
17443
17846
|
workspacePath,
|
|
17847
|
+
dockerConfig,
|
|
17444
17848
|
threshold: evalThreshold
|
|
17445
17849
|
});
|
|
17446
17850
|
const completedAt = nowFn();
|
|
@@ -17516,6 +17920,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17516
17920
|
availableTargets,
|
|
17517
17921
|
fileChanges,
|
|
17518
17922
|
workspacePath,
|
|
17923
|
+
dockerConfig,
|
|
17519
17924
|
threshold
|
|
17520
17925
|
} = options;
|
|
17521
17926
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
@@ -17543,6 +17948,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17543
17948
|
availableTargets,
|
|
17544
17949
|
fileChanges,
|
|
17545
17950
|
workspacePath,
|
|
17951
|
+
dockerConfig,
|
|
17546
17952
|
threshold
|
|
17547
17953
|
});
|
|
17548
17954
|
}
|
|
@@ -17551,6 +17957,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
17551
17957
|
if (!activeEvaluator) {
|
|
17552
17958
|
throw new Error(`No evaluator registered for kind '${evaluatorKind}'`);
|
|
17553
17959
|
}
|
|
17960
|
+
const implicitEvaluator = evaluatorKind === "llm-grader" && !evalCase.assertions ? buildImplicitLlmGraderConfig(evalCase) : void 0;
|
|
17554
17961
|
const score = await activeEvaluator.evaluate({
|
|
17555
17962
|
evalCase,
|
|
17556
17963
|
candidate,
|
|
@@ -17570,10 +17977,22 @@ async function runEvaluatorsForCase(options) {
|
|
|
17570
17977
|
targetResolver,
|
|
17571
17978
|
availableTargets,
|
|
17572
17979
|
fileChanges,
|
|
17573
|
-
workspacePath
|
|
17980
|
+
workspacePath,
|
|
17981
|
+
dockerConfig,
|
|
17982
|
+
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
17574
17983
|
});
|
|
17575
17984
|
return { score };
|
|
17576
17985
|
}
|
|
17986
|
+
function buildImplicitLlmGraderConfig(evalCase) {
|
|
17987
|
+
if (!evalCase.preprocessors || evalCase.preprocessors.length === 0) {
|
|
17988
|
+
return void 0;
|
|
17989
|
+
}
|
|
17990
|
+
return {
|
|
17991
|
+
name: "llm-grader",
|
|
17992
|
+
type: "llm-grader",
|
|
17993
|
+
preprocessors: evalCase.preprocessors
|
|
17994
|
+
};
|
|
17995
|
+
}
|
|
17577
17996
|
async function runEvaluatorList(options) {
|
|
17578
17997
|
const {
|
|
17579
17998
|
evalCase,
|
|
@@ -17598,7 +18017,8 @@ async function runEvaluatorList(options) {
|
|
|
17598
18017
|
targetResolver,
|
|
17599
18018
|
availableTargets,
|
|
17600
18019
|
fileChanges,
|
|
17601
|
-
workspacePath
|
|
18020
|
+
workspacePath,
|
|
18021
|
+
dockerConfig
|
|
17602
18022
|
} = options;
|
|
17603
18023
|
const scored = [];
|
|
17604
18024
|
const scores = [];
|
|
@@ -17621,9 +18041,10 @@ async function runEvaluatorList(options) {
|
|
|
17621
18041
|
targetResolver,
|
|
17622
18042
|
availableTargets,
|
|
17623
18043
|
fileChanges,
|
|
17624
|
-
workspacePath
|
|
18044
|
+
workspacePath,
|
|
18045
|
+
dockerConfig
|
|
17625
18046
|
};
|
|
17626
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
18047
|
+
const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
17627
18048
|
const dispatchContext = {
|
|
17628
18049
|
graderProvider,
|
|
17629
18050
|
targetResolver,
|
|
@@ -17783,13 +18204,11 @@ async function invokeProvider(provider, options) {
|
|
|
17783
18204
|
const braintrustSpanIds = streamCallbacks?.getActiveSpanIds?.() ?? void 0;
|
|
17784
18205
|
return await provider.invoke({
|
|
17785
18206
|
question: promptInputs.question,
|
|
18207
|
+
systemPrompt: promptInputs.systemMessage,
|
|
17786
18208
|
chatPrompt: promptInputs.chatPrompt,
|
|
17787
18209
|
inputFiles: evalCase.file_paths,
|
|
17788
18210
|
evalCaseId: evalCase.id,
|
|
17789
18211
|
attempt,
|
|
17790
|
-
metadata: {
|
|
17791
|
-
systemPrompt: promptInputs.systemMessage ?? ""
|
|
17792
|
-
},
|
|
17793
18212
|
signal: controller.signal,
|
|
17794
18213
|
cwd,
|
|
17795
18214
|
workspaceFile,
|
|
@@ -17991,7 +18410,7 @@ function computeWeightedMean(entries) {
|
|
|
17991
18410
|
|
|
17992
18411
|
// src/evaluation/evaluate.ts
|
|
17993
18412
|
import { existsSync as existsSync6 } from "node:fs";
|
|
17994
|
-
import
|
|
18413
|
+
import path46 from "node:path";
|
|
17995
18414
|
|
|
17996
18415
|
// src/evaluation/providers/function-provider.ts
|
|
17997
18416
|
function createFunctionProvider(taskFn) {
|
|
@@ -18028,7 +18447,7 @@ async function evaluate(config) {
|
|
|
18028
18447
|
}
|
|
18029
18448
|
const gitRoot = await findGitRoot(process.cwd());
|
|
18030
18449
|
const repoRoot = gitRoot ?? process.cwd();
|
|
18031
|
-
const testFilePath = config.specFile ?
|
|
18450
|
+
const testFilePath = config.specFile ? path46.resolve(config.specFile) : path46.join(process.cwd(), "__programmatic__.yaml");
|
|
18032
18451
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
18033
18452
|
let resolvedTarget;
|
|
18034
18453
|
let taskProvider;
|
|
@@ -18143,10 +18562,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
|
18143
18562
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
18144
18563
|
async function discoverDefaultTarget(repoRoot) {
|
|
18145
18564
|
const cwd = process.cwd();
|
|
18146
|
-
const chain = buildDirectoryChain(
|
|
18565
|
+
const chain = buildDirectoryChain(path46.join(cwd, "_placeholder"), repoRoot);
|
|
18147
18566
|
for (const dir of chain) {
|
|
18148
18567
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
18149
|
-
const targetsPath =
|
|
18568
|
+
const targetsPath = path46.join(dir, candidate);
|
|
18150
18569
|
if (!existsSync6(targetsPath)) continue;
|
|
18151
18570
|
try {
|
|
18152
18571
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -18159,16 +18578,16 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
18159
18578
|
return null;
|
|
18160
18579
|
}
|
|
18161
18580
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
18162
|
-
const { readFileSync:
|
|
18581
|
+
const { readFileSync: readFileSync5 } = await import("node:fs");
|
|
18163
18582
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
18164
18583
|
const envFiles = [];
|
|
18165
18584
|
for (const dir of chain) {
|
|
18166
|
-
const envPath =
|
|
18585
|
+
const envPath = path46.join(dir, ".env");
|
|
18167
18586
|
if (existsSync6(envPath)) envFiles.push(envPath);
|
|
18168
18587
|
}
|
|
18169
18588
|
for (let i = 0; i < envFiles.length; i++) {
|
|
18170
18589
|
try {
|
|
18171
|
-
const content =
|
|
18590
|
+
const content = readFileSync5(envFiles[i], "utf8");
|
|
18172
18591
|
for (const line of content.split("\n")) {
|
|
18173
18592
|
const trimmed = line.trim();
|
|
18174
18593
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -18240,12 +18659,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
18240
18659
|
".agentv/config.js"
|
|
18241
18660
|
];
|
|
18242
18661
|
async function loadTsConfig(projectRoot) {
|
|
18243
|
-
const { existsSync:
|
|
18662
|
+
const { existsSync: existsSync9 } = await import("node:fs");
|
|
18244
18663
|
const { pathToFileURL: pathToFileURL2 } = await import("node:url");
|
|
18245
18664
|
const { join: join2 } = await import("node:path");
|
|
18246
18665
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
18247
18666
|
const filePath = join2(projectRoot, fileName);
|
|
18248
|
-
if (!
|
|
18667
|
+
if (!existsSync9(filePath)) {
|
|
18249
18668
|
continue;
|
|
18250
18669
|
}
|
|
18251
18670
|
try {
|
|
@@ -18342,9 +18761,9 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18342
18761
|
}
|
|
18343
18762
|
|
|
18344
18763
|
// src/evaluation/workspace/deps-scanner.ts
|
|
18345
|
-
import { readFile as
|
|
18346
|
-
import
|
|
18347
|
-
import { parse as
|
|
18764
|
+
import { readFile as readFile15 } from "node:fs/promises";
|
|
18765
|
+
import path47 from "node:path";
|
|
18766
|
+
import { parse as parse6 } from "yaml";
|
|
18348
18767
|
function normalizeGitUrl(url) {
|
|
18349
18768
|
let normalized = url.replace(/\.git$/, "");
|
|
18350
18769
|
try {
|
|
@@ -18362,7 +18781,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
18362
18781
|
try {
|
|
18363
18782
|
const repos = await extractReposFromEvalFile(filePath);
|
|
18364
18783
|
for (const repo of repos) {
|
|
18365
|
-
if (repo.source.type !== "git") continue;
|
|
18784
|
+
if (!repo.source || repo.source.type !== "git") continue;
|
|
18366
18785
|
const ref = repo.checkout?.ref;
|
|
18367
18786
|
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
|
|
18368
18787
|
const existing = seen.get(key);
|
|
@@ -18390,11 +18809,11 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
18390
18809
|
return { repos: [...seen.values()], errors };
|
|
18391
18810
|
}
|
|
18392
18811
|
async function extractReposFromEvalFile(filePath) {
|
|
18393
|
-
const content = await
|
|
18394
|
-
const parsed = interpolateEnv(
|
|
18812
|
+
const content = await readFile15(filePath, "utf8");
|
|
18813
|
+
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18395
18814
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18396
18815
|
const obj = parsed;
|
|
18397
|
-
const evalFileDir =
|
|
18816
|
+
const evalFileDir = path47.dirname(path47.resolve(filePath));
|
|
18398
18817
|
const repos = [];
|
|
18399
18818
|
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
|
|
18400
18819
|
repos.push(...suiteRepos);
|
|
@@ -18410,9 +18829,9 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
18410
18829
|
}
|
|
18411
18830
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
18412
18831
|
if (typeof raw === "string") {
|
|
18413
|
-
const workspaceFilePath =
|
|
18414
|
-
const content = await
|
|
18415
|
-
const parsed = interpolateEnv(
|
|
18832
|
+
const workspaceFilePath = path47.resolve(evalFileDir, raw);
|
|
18833
|
+
const content = await readFile15(workspaceFilePath, "utf8");
|
|
18834
|
+
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18416
18835
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18417
18836
|
return extractReposFromObject(parsed);
|
|
18418
18837
|
}
|
|
@@ -18439,8 +18858,8 @@ function extractReposFromObject(obj) {
|
|
|
18439
18858
|
}
|
|
18440
18859
|
|
|
18441
18860
|
// src/evaluation/cache/response-cache.ts
|
|
18442
|
-
import { mkdir as mkdir15, readFile as
|
|
18443
|
-
import
|
|
18861
|
+
import { mkdir as mkdir15, readFile as readFile16, writeFile as writeFile8 } from "node:fs/promises";
|
|
18862
|
+
import path48 from "node:path";
|
|
18444
18863
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18445
18864
|
var ResponseCache = class {
|
|
18446
18865
|
cachePath;
|
|
@@ -18450,7 +18869,7 @@ var ResponseCache = class {
|
|
|
18450
18869
|
async get(key) {
|
|
18451
18870
|
const filePath = this.keyToPath(key);
|
|
18452
18871
|
try {
|
|
18453
|
-
const data = await
|
|
18872
|
+
const data = await readFile16(filePath, "utf8");
|
|
18454
18873
|
return JSON.parse(data);
|
|
18455
18874
|
} catch {
|
|
18456
18875
|
return void 0;
|
|
@@ -18458,13 +18877,13 @@ var ResponseCache = class {
|
|
|
18458
18877
|
}
|
|
18459
18878
|
async set(key, value) {
|
|
18460
18879
|
const filePath = this.keyToPath(key);
|
|
18461
|
-
const dir =
|
|
18880
|
+
const dir = path48.dirname(filePath);
|
|
18462
18881
|
await mkdir15(dir, { recursive: true });
|
|
18463
18882
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
18464
18883
|
}
|
|
18465
18884
|
keyToPath(key) {
|
|
18466
18885
|
const prefix = key.slice(0, 2);
|
|
18467
|
-
return
|
|
18886
|
+
return path48.join(this.cachePath, prefix, `${key}.json`);
|
|
18468
18887
|
}
|
|
18469
18888
|
};
|
|
18470
18889
|
function shouldEnableCache(params) {
|
|
@@ -18479,20 +18898,301 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
18479
18898
|
return false;
|
|
18480
18899
|
}
|
|
18481
18900
|
|
|
18901
|
+
// src/evaluation/results-repo.ts
|
|
18902
|
+
import { execFile as execFile3 } from "node:child_process";
|
|
18903
|
+
import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
|
|
18904
|
+
import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir8, rm as rm6, stat as stat9 } from "node:fs/promises";
|
|
18905
|
+
import os3 from "node:os";
|
|
18906
|
+
import path49 from "node:path";
|
|
18907
|
+
import { promisify as promisify7 } from "node:util";
|
|
18908
|
+
var execFileAsync3 = promisify7(execFile3);
|
|
18909
|
+
function sanitizeRepoSlug(repo) {
|
|
18910
|
+
return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
|
|
18911
|
+
}
|
|
18912
|
+
function withFriendlyGitHubAuthError(error) {
|
|
18913
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
18914
|
+
const lower = message.toLowerCase();
|
|
18915
|
+
if (lower.includes("authentication failed") || lower.includes("could not read username") || lower.includes("permission denied") || lower.includes("not logged into any github hosts")) {
|
|
18916
|
+
return new Error(`${message}. Run 'gh auth login' to authenticate.`);
|
|
18917
|
+
}
|
|
18918
|
+
return new Error(message);
|
|
18919
|
+
}
|
|
18920
|
+
function normalizeResultsExportConfig(config) {
|
|
18921
|
+
return {
|
|
18922
|
+
repo: config.repo.trim(),
|
|
18923
|
+
path: config.path.trim().replace(/^\/+|\/+$/g, ""),
|
|
18924
|
+
auto_push: config.auto_push === true,
|
|
18925
|
+
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
18926
|
+
};
|
|
18927
|
+
}
|
|
18928
|
+
function resolveResultsRepoUrl(repo) {
|
|
18929
|
+
if (repo.includes("://") || repo.startsWith("git@")) {
|
|
18930
|
+
return repo;
|
|
18931
|
+
}
|
|
18932
|
+
return `https://github.com/${repo}.git`;
|
|
18933
|
+
}
|
|
18934
|
+
function getResultsRepoCachePaths(repo) {
|
|
18935
|
+
const rootDir = path49.join(getAgentvHome(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
18936
|
+
return {
|
|
18937
|
+
rootDir,
|
|
18938
|
+
repoDir: path49.join(rootDir, "repo"),
|
|
18939
|
+
statusFile: path49.join(rootDir, "status.json")
|
|
18940
|
+
};
|
|
18941
|
+
}
|
|
18942
|
+
function readPersistedStatus(statusFile) {
|
|
18943
|
+
if (!existsSync7(statusFile)) {
|
|
18944
|
+
return {};
|
|
18945
|
+
}
|
|
18946
|
+
try {
|
|
18947
|
+
return JSON.parse(readFileSync3(statusFile, "utf8"));
|
|
18948
|
+
} catch {
|
|
18949
|
+
return {};
|
|
18950
|
+
}
|
|
18951
|
+
}
|
|
18952
|
+
function writePersistedStatus(statusFile, status) {
|
|
18953
|
+
mkdirSync2(path49.dirname(statusFile), { recursive: true });
|
|
18954
|
+
writeFileSync(statusFile, `${JSON.stringify(status, null, 2)}
|
|
18955
|
+
`, "utf8");
|
|
18956
|
+
}
|
|
18957
|
+
async function runCommand(executable, args, options) {
|
|
18958
|
+
try {
|
|
18959
|
+
const { stdout, stderr } = await execFileAsync3(executable, [...args], {
|
|
18960
|
+
cwd: options?.cwd,
|
|
18961
|
+
env: process.env
|
|
18962
|
+
});
|
|
18963
|
+
return { stdout, stderr };
|
|
18964
|
+
} catch (error) {
|
|
18965
|
+
if (options?.check === false && error && typeof error === "object") {
|
|
18966
|
+
const execError = error;
|
|
18967
|
+
return {
|
|
18968
|
+
stdout: execError.stdout ?? "",
|
|
18969
|
+
stderr: execError.stderr ?? ""
|
|
18970
|
+
};
|
|
18971
|
+
}
|
|
18972
|
+
throw withFriendlyGitHubAuthError(error);
|
|
18973
|
+
}
|
|
18974
|
+
}
|
|
18975
|
+
async function runGit(args, options) {
|
|
18976
|
+
return runCommand("git", args, options);
|
|
18977
|
+
}
|
|
18978
|
+
async function runGh(args, options) {
|
|
18979
|
+
return runCommand("gh", args, options);
|
|
18980
|
+
}
|
|
18981
|
+
async function resolveDefaultBranch(repoDir) {
|
|
18982
|
+
try {
|
|
18983
|
+
const { stdout } = await runGit(["symbolic-ref", "refs/remotes/origin/HEAD"], { cwd: repoDir });
|
|
18984
|
+
const ref = stdout.trim();
|
|
18985
|
+
const prefix = "refs/remotes/origin/";
|
|
18986
|
+
if (ref.startsWith(prefix)) {
|
|
18987
|
+
return ref.slice(prefix.length);
|
|
18988
|
+
}
|
|
18989
|
+
} catch {
|
|
18990
|
+
}
|
|
18991
|
+
for (const candidate of ["main", "master"]) {
|
|
18992
|
+
try {
|
|
18993
|
+
await runGit(["rev-parse", "--verify", `origin/${candidate}`], { cwd: repoDir });
|
|
18994
|
+
return candidate;
|
|
18995
|
+
} catch {
|
|
18996
|
+
}
|
|
18997
|
+
}
|
|
18998
|
+
return "main";
|
|
18999
|
+
}
|
|
19000
|
+
async function updateCacheRepo(repoDir, baseBranch) {
|
|
19001
|
+
await runGit(["fetch", "origin", "--prune"], { cwd: repoDir });
|
|
19002
|
+
await runGit(["checkout", baseBranch], { cwd: repoDir });
|
|
19003
|
+
await runGit(["pull", "--ff-only", "origin", baseBranch], { cwd: repoDir });
|
|
19004
|
+
}
|
|
19005
|
+
function updateStatusFile(config, patch) {
|
|
19006
|
+
const cachePaths = getResultsRepoCachePaths(config.repo);
|
|
19007
|
+
const current = readPersistedStatus(cachePaths.statusFile);
|
|
19008
|
+
writePersistedStatus(cachePaths.statusFile, {
|
|
19009
|
+
...current,
|
|
19010
|
+
...patch
|
|
19011
|
+
});
|
|
19012
|
+
}
|
|
19013
|
+
async function ensureResultsRepoClone(config) {
|
|
19014
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19015
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
19016
|
+
mkdirSync2(cachePaths.rootDir, { recursive: true });
|
|
19017
|
+
if (!existsSync7(cachePaths.repoDir)) {
|
|
19018
|
+
try {
|
|
19019
|
+
await runGit([
|
|
19020
|
+
"clone",
|
|
19021
|
+
"--filter=blob:none",
|
|
19022
|
+
resolveResultsRepoUrl(normalized.repo),
|
|
19023
|
+
cachePaths.repoDir
|
|
19024
|
+
]);
|
|
19025
|
+
return cachePaths.repoDir;
|
|
19026
|
+
} catch (error) {
|
|
19027
|
+
updateStatusFile(normalized, { last_error: withFriendlyGitHubAuthError(error).message });
|
|
19028
|
+
throw withFriendlyGitHubAuthError(error);
|
|
19029
|
+
}
|
|
19030
|
+
}
|
|
19031
|
+
if (!existsSync7(path49.join(cachePaths.repoDir, ".git"))) {
|
|
19032
|
+
throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
|
|
19033
|
+
}
|
|
19034
|
+
return cachePaths.repoDir;
|
|
19035
|
+
}
|
|
19036
|
+
function getResultsRepoStatus(config) {
|
|
19037
|
+
if (!config) {
|
|
19038
|
+
return {
|
|
19039
|
+
configured: false,
|
|
19040
|
+
available: false,
|
|
19041
|
+
repo: "",
|
|
19042
|
+
cache_dir: ""
|
|
19043
|
+
};
|
|
19044
|
+
}
|
|
19045
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19046
|
+
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
19047
|
+
const persisted = readPersistedStatus(cachePaths.statusFile);
|
|
19048
|
+
return {
|
|
19049
|
+
configured: true,
|
|
19050
|
+
available: existsSync7(cachePaths.repoDir),
|
|
19051
|
+
repo: normalized.repo,
|
|
19052
|
+
path: normalized.path,
|
|
19053
|
+
auto_push: normalized.auto_push,
|
|
19054
|
+
branch_prefix: normalized.branch_prefix,
|
|
19055
|
+
cache_dir: cachePaths.repoDir,
|
|
19056
|
+
last_synced_at: persisted.last_synced_at,
|
|
19057
|
+
last_error: persisted.last_error
|
|
19058
|
+
};
|
|
19059
|
+
}
|
|
19060
|
+
async function syncResultsRepo(config) {
|
|
19061
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19062
|
+
try {
|
|
19063
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
19064
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
19065
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
19066
|
+
updateStatusFile(normalized, {
|
|
19067
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
19068
|
+
last_error: void 0
|
|
19069
|
+
});
|
|
19070
|
+
} catch (error) {
|
|
19071
|
+
updateStatusFile(normalized, {
|
|
19072
|
+
last_error: withFriendlyGitHubAuthError(error).message
|
|
19073
|
+
});
|
|
19074
|
+
throw withFriendlyGitHubAuthError(error);
|
|
19075
|
+
}
|
|
19076
|
+
return getResultsRepoStatus(normalized);
|
|
19077
|
+
}
|
|
19078
|
+
async function checkoutResultsRepoBranch(config, branchName) {
|
|
19079
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19080
|
+
const repoDir = await ensureResultsRepoClone(normalized);
|
|
19081
|
+
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
19082
|
+
await updateCacheRepo(repoDir, baseBranch);
|
|
19083
|
+
await runGit(["checkout", "-B", branchName, `origin/${baseBranch}`], { cwd: repoDir });
|
|
19084
|
+
updateStatusFile(normalized, { last_error: void 0 });
|
|
19085
|
+
return {
|
|
19086
|
+
branchName,
|
|
19087
|
+
baseBranch,
|
|
19088
|
+
repoDir
|
|
19089
|
+
};
|
|
19090
|
+
}
|
|
19091
|
+
async function prepareResultsRepoBranch(config, branchName) {
|
|
19092
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19093
|
+
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
19094
|
+
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
19095
|
+
await updateCacheRepo(cloneDir, baseBranch);
|
|
19096
|
+
const worktreeRoot = await mkdtemp3(path49.join(os3.tmpdir(), "agentv-results-repo-"));
|
|
19097
|
+
const worktreeDir = path49.join(worktreeRoot, "repo");
|
|
19098
|
+
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
19099
|
+
cwd: cloneDir
|
|
19100
|
+
});
|
|
19101
|
+
return {
|
|
19102
|
+
branchName,
|
|
19103
|
+
baseBranch,
|
|
19104
|
+
repoDir: worktreeDir,
|
|
19105
|
+
cleanup: async () => {
|
|
19106
|
+
try {
|
|
19107
|
+
await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
|
|
19108
|
+
} finally {
|
|
19109
|
+
await rm6(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
|
|
19110
|
+
}
|
|
19111
|
+
}
|
|
19112
|
+
};
|
|
19113
|
+
}
|
|
19114
|
+
async function stageResultsArtifacts(params) {
|
|
19115
|
+
rmSync(params.destinationDir, { recursive: true, force: true });
|
|
19116
|
+
mkdirSync2(path49.dirname(params.destinationDir), { recursive: true });
|
|
19117
|
+
await cp3(params.sourceDir, params.destinationDir, { recursive: true });
|
|
19118
|
+
}
|
|
19119
|
+
function resolveResultsRepoRunsDir(config) {
|
|
19120
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19121
|
+
return path49.join(
|
|
19122
|
+
getResultsRepoCachePaths(normalized.repo).repoDir,
|
|
19123
|
+
...normalized.path.split("/")
|
|
19124
|
+
);
|
|
19125
|
+
}
|
|
19126
|
+
async function directorySizeBytes(targetPath) {
|
|
19127
|
+
const entry = await stat9(targetPath);
|
|
19128
|
+
if (entry.isFile()) {
|
|
19129
|
+
return entry.size;
|
|
19130
|
+
}
|
|
19131
|
+
let total = 0;
|
|
19132
|
+
for (const child of await readdir8(targetPath, { withFileTypes: true })) {
|
|
19133
|
+
total += await directorySizeBytes(path49.join(targetPath, child.name));
|
|
19134
|
+
}
|
|
19135
|
+
return total;
|
|
19136
|
+
}
|
|
19137
|
+
async function commitAndPushResultsBranch(params) {
|
|
19138
|
+
await runGit(["add", "--all"], { cwd: params.repoDir });
|
|
19139
|
+
const { stdout: diffStdout } = await runGit(["status", "--porcelain"], {
|
|
19140
|
+
cwd: params.repoDir,
|
|
19141
|
+
check: false
|
|
19142
|
+
});
|
|
19143
|
+
if (diffStdout.trim().length === 0) {
|
|
19144
|
+
return false;
|
|
19145
|
+
}
|
|
19146
|
+
await runGit(["commit", "-m", params.commitMessage], { cwd: params.repoDir });
|
|
19147
|
+
await runGit(["push", "-u", "origin", params.branchName], { cwd: params.repoDir });
|
|
19148
|
+
return true;
|
|
19149
|
+
}
|
|
19150
|
+
async function pushResultsRepoBranch(config, branchName, cwd) {
|
|
19151
|
+
const normalized = normalizeResultsExportConfig(config);
|
|
19152
|
+
await runGit(["push", "-u", "origin", branchName], {
|
|
19153
|
+
cwd: cwd ?? getResultsRepoCachePaths(normalized.repo).repoDir
|
|
19154
|
+
});
|
|
19155
|
+
updateStatusFile(normalized, {
|
|
19156
|
+
last_synced_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
19157
|
+
last_error: void 0
|
|
19158
|
+
});
|
|
19159
|
+
}
|
|
19160
|
+
async function createDraftResultsPr(params) {
|
|
19161
|
+
const { stdout } = await runGh(
|
|
19162
|
+
[
|
|
19163
|
+
"pr",
|
|
19164
|
+
"create",
|
|
19165
|
+
"--draft",
|
|
19166
|
+
"--repo",
|
|
19167
|
+
params.repo,
|
|
19168
|
+
"--base",
|
|
19169
|
+
params.baseBranch,
|
|
19170
|
+
"--head",
|
|
19171
|
+
params.branchName,
|
|
19172
|
+
"--title",
|
|
19173
|
+
params.title,
|
|
19174
|
+
"--body",
|
|
19175
|
+
params.body
|
|
19176
|
+
],
|
|
19177
|
+
{ cwd: params.repoDir }
|
|
19178
|
+
);
|
|
19179
|
+
return stdout.trim();
|
|
19180
|
+
}
|
|
19181
|
+
|
|
18482
19182
|
// src/projects.ts
|
|
18483
|
-
import { existsSync as
|
|
18484
|
-
import
|
|
19183
|
+
import { existsSync as existsSync8, mkdirSync as mkdirSync3, readFileSync as readFileSync4, readdirSync as readdirSync3, statSync as statSync2, writeFileSync as writeFileSync2 } from "node:fs";
|
|
19184
|
+
import path50 from "node:path";
|
|
18485
19185
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
18486
19186
|
function getProjectsRegistryPath() {
|
|
18487
|
-
return
|
|
19187
|
+
return path50.join(getAgentvHome(), "projects.yaml");
|
|
18488
19188
|
}
|
|
18489
19189
|
function loadProjectRegistry() {
|
|
18490
19190
|
const registryPath = getProjectsRegistryPath();
|
|
18491
|
-
if (!
|
|
19191
|
+
if (!existsSync8(registryPath)) {
|
|
18492
19192
|
return { projects: [] };
|
|
18493
19193
|
}
|
|
18494
19194
|
try {
|
|
18495
|
-
const raw =
|
|
19195
|
+
const raw = readFileSync4(registryPath, "utf-8");
|
|
18496
19196
|
const parsed = parseYaml3(raw);
|
|
18497
19197
|
if (!parsed || !Array.isArray(parsed.projects)) {
|
|
18498
19198
|
return { projects: [] };
|
|
@@ -18504,14 +19204,14 @@ function loadProjectRegistry() {
|
|
|
18504
19204
|
}
|
|
18505
19205
|
function saveProjectRegistry(registry) {
|
|
18506
19206
|
const registryPath = getProjectsRegistryPath();
|
|
18507
|
-
const dir =
|
|
18508
|
-
if (!
|
|
18509
|
-
|
|
19207
|
+
const dir = path50.dirname(registryPath);
|
|
19208
|
+
if (!existsSync8(dir)) {
|
|
19209
|
+
mkdirSync3(dir, { recursive: true });
|
|
18510
19210
|
}
|
|
18511
|
-
|
|
19211
|
+
writeFileSync2(registryPath, stringifyYaml(registry), "utf-8");
|
|
18512
19212
|
}
|
|
18513
19213
|
function deriveProjectId(dirPath, existingIds) {
|
|
18514
|
-
const base =
|
|
19214
|
+
const base = path50.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18515
19215
|
let candidate = base || "project";
|
|
18516
19216
|
let suffix = 2;
|
|
18517
19217
|
while (existingIds.includes(candidate)) {
|
|
@@ -18521,11 +19221,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
18521
19221
|
return candidate;
|
|
18522
19222
|
}
|
|
18523
19223
|
function addProject(projectPath) {
|
|
18524
|
-
const absPath =
|
|
18525
|
-
if (!
|
|
19224
|
+
const absPath = path50.resolve(projectPath);
|
|
19225
|
+
if (!existsSync8(absPath)) {
|
|
18526
19226
|
throw new Error(`Directory not found: ${absPath}`);
|
|
18527
19227
|
}
|
|
18528
|
-
if (!
|
|
19228
|
+
if (!existsSync8(path50.join(absPath, ".agentv"))) {
|
|
18529
19229
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
18530
19230
|
}
|
|
18531
19231
|
const registry = loadProjectRegistry();
|
|
@@ -18539,7 +19239,7 @@ function addProject(projectPath) {
|
|
|
18539
19239
|
absPath,
|
|
18540
19240
|
registry.projects.map((p) => p.id)
|
|
18541
19241
|
),
|
|
18542
|
-
name:
|
|
19242
|
+
name: path50.basename(absPath),
|
|
18543
19243
|
path: absPath,
|
|
18544
19244
|
addedAt: now,
|
|
18545
19245
|
lastOpenedAt: now
|
|
@@ -18568,14 +19268,14 @@ function touchProject(projectId) {
|
|
|
18568
19268
|
}
|
|
18569
19269
|
}
|
|
18570
19270
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
18571
|
-
const absRoot =
|
|
18572
|
-
if (!
|
|
19271
|
+
const absRoot = path50.resolve(rootDir);
|
|
19272
|
+
if (!existsSync8(absRoot) || !statSync2(absRoot).isDirectory()) {
|
|
18573
19273
|
return [];
|
|
18574
19274
|
}
|
|
18575
19275
|
const results = [];
|
|
18576
19276
|
function scan(dir, depth) {
|
|
18577
19277
|
if (depth > maxDepth) return;
|
|
18578
|
-
if (
|
|
19278
|
+
if (existsSync8(path50.join(dir, ".agentv"))) {
|
|
18579
19279
|
results.push(dir);
|
|
18580
19280
|
return;
|
|
18581
19281
|
}
|
|
@@ -18585,7 +19285,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
|
|
|
18585
19285
|
for (const entry of entries) {
|
|
18586
19286
|
if (!entry.isDirectory()) continue;
|
|
18587
19287
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
18588
|
-
scan(
|
|
19288
|
+
scan(path50.join(dir, entry.name), depth + 1);
|
|
18589
19289
|
}
|
|
18590
19290
|
} catch {
|
|
18591
19291
|
}
|
|
@@ -19496,33 +20196,33 @@ function extractResponseItemContent(content) {
|
|
|
19496
20196
|
}
|
|
19497
20197
|
|
|
19498
20198
|
// src/import/codex-session-discovery.ts
|
|
19499
|
-
import { readdir as
|
|
20199
|
+
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
19500
20200
|
import { homedir as homedir3 } from "node:os";
|
|
19501
|
-
import
|
|
19502
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
20201
|
+
import path51 from "node:path";
|
|
20202
|
+
var DEFAULT_SESSIONS_DIR = () => path51.join(homedir3(), ".codex", "sessions");
|
|
19503
20203
|
async function discoverCodexSessions(opts) {
|
|
19504
20204
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
19505
20205
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
19506
20206
|
const sessions = [];
|
|
19507
20207
|
let yearDirs;
|
|
19508
20208
|
try {
|
|
19509
|
-
yearDirs = await
|
|
20209
|
+
yearDirs = await readdir9(sessionsDir);
|
|
19510
20210
|
} catch {
|
|
19511
20211
|
return [];
|
|
19512
20212
|
}
|
|
19513
20213
|
for (const year of yearDirs) {
|
|
19514
|
-
const yearPath =
|
|
20214
|
+
const yearPath = path51.join(sessionsDir, year);
|
|
19515
20215
|
let monthDirs;
|
|
19516
20216
|
try {
|
|
19517
|
-
monthDirs = await
|
|
20217
|
+
monthDirs = await readdir9(yearPath);
|
|
19518
20218
|
} catch {
|
|
19519
20219
|
continue;
|
|
19520
20220
|
}
|
|
19521
20221
|
for (const month of monthDirs) {
|
|
19522
|
-
const monthPath =
|
|
20222
|
+
const monthPath = path51.join(yearPath, month);
|
|
19523
20223
|
let dayDirs;
|
|
19524
20224
|
try {
|
|
19525
|
-
dayDirs = await
|
|
20225
|
+
dayDirs = await readdir9(monthPath);
|
|
19526
20226
|
} catch {
|
|
19527
20227
|
continue;
|
|
19528
20228
|
}
|
|
@@ -19531,22 +20231,22 @@ async function discoverCodexSessions(opts) {
|
|
|
19531
20231
|
const dirDate = `${year}-${month}-${day}`;
|
|
19532
20232
|
if (dirDate !== opts.date) continue;
|
|
19533
20233
|
}
|
|
19534
|
-
const dayPath =
|
|
20234
|
+
const dayPath = path51.join(monthPath, day);
|
|
19535
20235
|
let files;
|
|
19536
20236
|
try {
|
|
19537
|
-
files = await
|
|
20237
|
+
files = await readdir9(dayPath);
|
|
19538
20238
|
} catch {
|
|
19539
20239
|
continue;
|
|
19540
20240
|
}
|
|
19541
20241
|
for (const file of files) {
|
|
19542
20242
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
19543
|
-
const filePath =
|
|
20243
|
+
const filePath = path51.join(dayPath, file);
|
|
19544
20244
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
19545
20245
|
const parts = nameWithoutExt.split("-");
|
|
19546
20246
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
19547
20247
|
let updatedAt;
|
|
19548
20248
|
try {
|
|
19549
|
-
const fileStat = await
|
|
20249
|
+
const fileStat = await stat10(filePath);
|
|
19550
20250
|
updatedAt = fileStat.mtime;
|
|
19551
20251
|
} catch {
|
|
19552
20252
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -19561,10 +20261,10 @@ async function discoverCodexSessions(opts) {
|
|
|
19561
20261
|
}
|
|
19562
20262
|
|
|
19563
20263
|
// src/import/session-discovery.ts
|
|
19564
|
-
import { readdir as
|
|
20264
|
+
import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
|
|
19565
20265
|
import { homedir as homedir4 } from "node:os";
|
|
19566
|
-
import
|
|
19567
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
20266
|
+
import path52 from "node:path";
|
|
20267
|
+
var DEFAULT_PROJECTS_DIR = () => path52.join(homedir4(), ".claude", "projects");
|
|
19568
20268
|
function encodeProjectPath(projectPath) {
|
|
19569
20269
|
return projectPath.replace(/\//g, "-");
|
|
19570
20270
|
}
|
|
@@ -19573,7 +20273,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19573
20273
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
19574
20274
|
let projectDirs;
|
|
19575
20275
|
try {
|
|
19576
|
-
projectDirs = await
|
|
20276
|
+
projectDirs = await readdir10(projectsDir);
|
|
19577
20277
|
} catch {
|
|
19578
20278
|
return [];
|
|
19579
20279
|
}
|
|
@@ -19583,10 +20283,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
19583
20283
|
}
|
|
19584
20284
|
const sessions = [];
|
|
19585
20285
|
for (const projectDir of projectDirs) {
|
|
19586
|
-
const dirPath =
|
|
20286
|
+
const dirPath = path52.join(projectsDir, projectDir);
|
|
19587
20287
|
let entries;
|
|
19588
20288
|
try {
|
|
19589
|
-
entries = await
|
|
20289
|
+
entries = await readdir10(dirPath);
|
|
19590
20290
|
} catch {
|
|
19591
20291
|
continue;
|
|
19592
20292
|
}
|
|
@@ -19594,10 +20294,10 @@ async function discoverClaudeSessions(opts) {
|
|
|
19594
20294
|
if (!entry.endsWith(".jsonl")) continue;
|
|
19595
20295
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
19596
20296
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
19597
|
-
const filePath =
|
|
20297
|
+
const filePath = path52.join(dirPath, entry);
|
|
19598
20298
|
let updatedAt;
|
|
19599
20299
|
try {
|
|
19600
|
-
const fileStat = await
|
|
20300
|
+
const fileStat = await stat11(filePath);
|
|
19601
20301
|
updatedAt = fileStat.mtime;
|
|
19602
20302
|
} catch {
|
|
19603
20303
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -19615,7 +20315,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19615
20315
|
}
|
|
19616
20316
|
|
|
19617
20317
|
// src/import/types.ts
|
|
19618
|
-
import { readFile as
|
|
20318
|
+
import { readFile as readFile17 } from "node:fs/promises";
|
|
19619
20319
|
function toTranscriptJsonLine(entry) {
|
|
19620
20320
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
19621
20321
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -19641,11 +20341,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
19641
20341
|
};
|
|
19642
20342
|
}
|
|
19643
20343
|
async function readTranscriptJsonl(filePath) {
|
|
19644
|
-
const text = await
|
|
20344
|
+
const text = await readFile17(filePath, "utf8");
|
|
19645
20345
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
19646
20346
|
}
|
|
19647
20347
|
async function readTranscriptFile(filePath) {
|
|
19648
|
-
return
|
|
20348
|
+
return readFile17(filePath, "utf8");
|
|
19649
20349
|
}
|
|
19650
20350
|
|
|
19651
20351
|
// src/import/transcript-provider.ts
|
|
@@ -19710,6 +20410,7 @@ export {
|
|
|
19710
20410
|
DEFAULT_EXPLORATION_TOOLS,
|
|
19711
20411
|
DEFAULT_THRESHOLD,
|
|
19712
20412
|
DeterministicAssertionEvaluator,
|
|
20413
|
+
DockerWorkspaceProvider,
|
|
19713
20414
|
EvaluatorRegistry,
|
|
19714
20415
|
ExecutionMetricsEvaluator,
|
|
19715
20416
|
FieldAccuracyEvaluator,
|
|
@@ -19745,9 +20446,11 @@ export {
|
|
|
19745
20446
|
buildSearchRoots,
|
|
19746
20447
|
calculateRubricScore,
|
|
19747
20448
|
captureFileChanges,
|
|
20449
|
+
checkoutResultsRepoBranch,
|
|
19748
20450
|
clampScore,
|
|
19749
20451
|
cleanupEvalWorkspaces,
|
|
19750
20452
|
cleanupWorkspace,
|
|
20453
|
+
commitAndPushResultsBranch,
|
|
19751
20454
|
computeTraceSummary,
|
|
19752
20455
|
computeWorkspaceFingerprint,
|
|
19753
20456
|
consumeClaudeLogEntries,
|
|
@@ -19758,6 +20461,7 @@ export {
|
|
|
19758
20461
|
createAgentKernel,
|
|
19759
20462
|
createBuiltinProviderRegistry,
|
|
19760
20463
|
createBuiltinRegistry,
|
|
20464
|
+
createDraftResultsPr,
|
|
19761
20465
|
createProvider,
|
|
19762
20466
|
createTempWorkspace,
|
|
19763
20467
|
deepEqual,
|
|
@@ -19765,6 +20469,7 @@ export {
|
|
|
19765
20469
|
deriveCategory,
|
|
19766
20470
|
deriveProjectId,
|
|
19767
20471
|
detectFormat,
|
|
20472
|
+
directorySizeBytes,
|
|
19768
20473
|
discoverAssertions,
|
|
19769
20474
|
discoverClaudeSessions,
|
|
19770
20475
|
discoverCodexSessions,
|
|
@@ -19773,6 +20478,7 @@ export {
|
|
|
19773
20478
|
discoverGraders as discoverJudges,
|
|
19774
20479
|
discoverProjects,
|
|
19775
20480
|
discoverProviders,
|
|
20481
|
+
ensureResultsRepoClone,
|
|
19776
20482
|
ensureVSCodeSubagents,
|
|
19777
20483
|
evaluate,
|
|
19778
20484
|
executeScript,
|
|
@@ -19797,6 +20503,8 @@ export {
|
|
|
19797
20503
|
getOutputFilenames,
|
|
19798
20504
|
getProject,
|
|
19799
20505
|
getProjectsRegistryPath,
|
|
20506
|
+
getResultsRepoCachePaths,
|
|
20507
|
+
getResultsRepoStatus,
|
|
19800
20508
|
getSubagentsRoot,
|
|
19801
20509
|
getTextContent,
|
|
19802
20510
|
getTraceStateRoot,
|
|
@@ -19826,12 +20534,15 @@ export {
|
|
|
19826
20534
|
mergeExecutionMetrics,
|
|
19827
20535
|
negateScore,
|
|
19828
20536
|
normalizeLineEndings,
|
|
20537
|
+
normalizeResultsExportConfig,
|
|
19829
20538
|
parseAgentSkillsEvals,
|
|
19830
20539
|
parseClaudeSession,
|
|
19831
20540
|
parseCodexSession,
|
|
19832
20541
|
parseCopilotEvents,
|
|
19833
20542
|
parseJsonFromText,
|
|
19834
20543
|
parseJsonSafe,
|
|
20544
|
+
prepareResultsRepoBranch,
|
|
20545
|
+
pushResultsRepoBranch,
|
|
19835
20546
|
readJsonFile,
|
|
19836
20547
|
readTargetDefinitions,
|
|
19837
20548
|
readTestSuiteMetadata,
|
|
@@ -19842,6 +20553,8 @@ export {
|
|
|
19842
20553
|
resolveAndCreateProvider,
|
|
19843
20554
|
resolveDelegatedTargetDefinition,
|
|
19844
20555
|
resolveFileReference,
|
|
20556
|
+
resolveResultsRepoRunsDir,
|
|
20557
|
+
resolveResultsRepoUrl,
|
|
19845
20558
|
resolveTargetDefinition,
|
|
19846
20559
|
resolveWorkspaceTemplate,
|
|
19847
20560
|
rubricEvaluationSchema,
|
|
@@ -19863,12 +20576,14 @@ export {
|
|
|
19863
20576
|
scoreToVerdict,
|
|
19864
20577
|
shouldEnableCache,
|
|
19865
20578
|
shouldSkipCacheForTemperature,
|
|
20579
|
+
stageResultsArtifacts,
|
|
19866
20580
|
subscribeToClaudeLogEntries,
|
|
19867
20581
|
subscribeToCodexLogEntries,
|
|
19868
20582
|
subscribeToCopilotCliLogEntries,
|
|
19869
20583
|
subscribeToCopilotSdkLogEntries,
|
|
19870
20584
|
subscribeToPiLogEntries,
|
|
19871
20585
|
substituteVariables,
|
|
20586
|
+
syncResultsRepo,
|
|
19872
20587
|
toCamelCaseDeep,
|
|
19873
20588
|
toSnakeCaseDeep,
|
|
19874
20589
|
toTranscriptJsonLine,
|