@agentv/core 4.31.3 → 4.32.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-A27NE3R7.js → chunk-N5EU446L.js} +70 -66
- package/dist/chunk-N5EU446L.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -0
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +163 -147
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +28 -16
- package/dist/index.d.ts +28 -16
- package/dist/index.js +22 -10
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-XR6DNOZ3.js → ts-eval-loader-Z6IUSDNA.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-A27NE3R7.js.map +0 -1
- /package/dist/{ts-eval-loader-XR6DNOZ3.js.map → ts-eval-loader-Z6IUSDNA.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -727,6 +727,43 @@ var init_case_file_loader = __esm({
|
|
|
727
727
|
}
|
|
728
728
|
});
|
|
729
729
|
|
|
730
|
+
// src/paths.ts
|
|
731
|
+
function readEnvPath(name) {
|
|
732
|
+
const value = process.env[name];
|
|
733
|
+
if (!value || value === "undefined") return void 0;
|
|
734
|
+
return value;
|
|
735
|
+
}
|
|
736
|
+
function getAgentvConfigDir() {
|
|
737
|
+
return readEnvPath("AGENTV_HOME") ?? import_node_path3.default.join(import_node_os.default.homedir(), ".agentv");
|
|
738
|
+
}
|
|
739
|
+
function getAgentvHome() {
|
|
740
|
+
return getAgentvConfigDir();
|
|
741
|
+
}
|
|
742
|
+
function getAgentvDataDir() {
|
|
743
|
+
return readEnvPath("AGENTV_DATA_DIR") ?? getAgentvConfigDir();
|
|
744
|
+
}
|
|
745
|
+
function getWorkspacesRoot() {
|
|
746
|
+
return import_node_path3.default.join(getAgentvDataDir(), "workspaces");
|
|
747
|
+
}
|
|
748
|
+
function getSubagentsRoot() {
|
|
749
|
+
return import_node_path3.default.join(getAgentvDataDir(), "subagents");
|
|
750
|
+
}
|
|
751
|
+
function getTraceStateRoot() {
|
|
752
|
+
return import_node_path3.default.join(getAgentvDataDir(), "trace-state");
|
|
753
|
+
}
|
|
754
|
+
function getWorkspacePoolRoot() {
|
|
755
|
+
return import_node_path3.default.join(getAgentvDataDir(), "workspace-pool");
|
|
756
|
+
}
|
|
757
|
+
var import_node_os, import_node_path3;
|
|
758
|
+
var init_paths = __esm({
|
|
759
|
+
"src/paths.ts"() {
|
|
760
|
+
"use strict";
|
|
761
|
+
init_cjs_shims();
|
|
762
|
+
import_node_os = __toESM(require("os"), 1);
|
|
763
|
+
import_node_path3 = __toESM(require("path"), 1);
|
|
764
|
+
}
|
|
765
|
+
});
|
|
766
|
+
|
|
730
767
|
// src/evaluation/loaders/file-resolver.ts
|
|
731
768
|
async function fileExists(absolutePath) {
|
|
732
769
|
try {
|
|
@@ -744,15 +781,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
744
781
|
if (candidate.startsWith("file:")) {
|
|
745
782
|
return (0, import_node_url.fileURLToPath)(candidate);
|
|
746
783
|
}
|
|
747
|
-
return
|
|
784
|
+
return import_node_path4.default.resolve(candidate);
|
|
748
785
|
}
|
|
749
786
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
750
787
|
}
|
|
751
788
|
function buildDirectoryChain(filePath, repoRoot) {
|
|
752
789
|
const directories = [];
|
|
753
790
|
const seen = /* @__PURE__ */ new Set();
|
|
754
|
-
const boundary =
|
|
755
|
-
let current =
|
|
791
|
+
const boundary = import_node_path4.default.resolve(repoRoot);
|
|
792
|
+
let current = import_node_path4.default.resolve(import_node_path4.default.dirname(filePath));
|
|
756
793
|
while (current !== void 0) {
|
|
757
794
|
if (!seen.has(current)) {
|
|
758
795
|
directories.push(current);
|
|
@@ -761,7 +798,7 @@ function buildDirectoryChain(filePath, repoRoot) {
|
|
|
761
798
|
if (current === boundary) {
|
|
762
799
|
break;
|
|
763
800
|
}
|
|
764
|
-
const parent =
|
|
801
|
+
const parent = import_node_path4.default.dirname(current);
|
|
765
802
|
if (parent === current) {
|
|
766
803
|
break;
|
|
767
804
|
}
|
|
@@ -775,16 +812,16 @@ function buildDirectoryChain(filePath, repoRoot) {
|
|
|
775
812
|
function buildSearchRoots(evalPath, repoRoot) {
|
|
776
813
|
const uniqueRoots = [];
|
|
777
814
|
const addRoot = (root) => {
|
|
778
|
-
const normalized =
|
|
815
|
+
const normalized = import_node_path4.default.resolve(root);
|
|
779
816
|
if (!uniqueRoots.includes(normalized)) {
|
|
780
817
|
uniqueRoots.push(normalized);
|
|
781
818
|
}
|
|
782
819
|
};
|
|
783
|
-
let currentDir =
|
|
820
|
+
let currentDir = import_node_path4.default.dirname(evalPath);
|
|
784
821
|
let reachedBoundary = false;
|
|
785
822
|
while (!reachedBoundary) {
|
|
786
823
|
addRoot(currentDir);
|
|
787
|
-
const parentDir =
|
|
824
|
+
const parentDir = import_node_path4.default.dirname(currentDir);
|
|
788
825
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
789
826
|
reachedBoundary = true;
|
|
790
827
|
} else {
|
|
@@ -802,16 +839,16 @@ function trimLeadingSeparators(value) {
|
|
|
802
839
|
async function resolveFileReference2(rawValue, searchRoots) {
|
|
803
840
|
const displayPath = trimLeadingSeparators(rawValue);
|
|
804
841
|
const potentialPaths = [];
|
|
805
|
-
if (
|
|
806
|
-
potentialPaths.push(
|
|
842
|
+
if (import_node_path4.default.isAbsolute(rawValue)) {
|
|
843
|
+
potentialPaths.push(import_node_path4.default.normalize(rawValue));
|
|
807
844
|
}
|
|
808
845
|
for (const base of searchRoots) {
|
|
809
|
-
potentialPaths.push(
|
|
846
|
+
potentialPaths.push(import_node_path4.default.resolve(base, displayPath));
|
|
810
847
|
}
|
|
811
848
|
const attempted = [];
|
|
812
849
|
const seen = /* @__PURE__ */ new Set();
|
|
813
850
|
for (const candidate of potentialPaths) {
|
|
814
|
-
const absoluteCandidate =
|
|
851
|
+
const absoluteCandidate = import_node_path4.default.resolve(candidate);
|
|
815
852
|
if (seen.has(absoluteCandidate)) {
|
|
816
853
|
continue;
|
|
817
854
|
}
|
|
@@ -823,14 +860,14 @@ async function resolveFileReference2(rawValue, searchRoots) {
|
|
|
823
860
|
}
|
|
824
861
|
return { displayPath, attempted };
|
|
825
862
|
}
|
|
826
|
-
var import_node_fs, import_promises3,
|
|
863
|
+
var import_node_fs, import_promises3, import_node_path4, import_node_url;
|
|
827
864
|
var init_file_resolver = __esm({
|
|
828
865
|
"src/evaluation/loaders/file-resolver.ts"() {
|
|
829
866
|
"use strict";
|
|
830
867
|
init_cjs_shims();
|
|
831
868
|
import_node_fs = require("fs");
|
|
832
869
|
import_promises3 = require("fs/promises");
|
|
833
|
-
|
|
870
|
+
import_node_path4 = __toESM(require("path"), 1);
|
|
834
871
|
import_node_url = require("url");
|
|
835
872
|
}
|
|
836
873
|
});
|
|
@@ -839,52 +876,56 @@ var init_file_resolver = __esm({
|
|
|
839
876
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
840
877
|
const directories = buildDirectoryChain(evalFilePath, repoRoot);
|
|
841
878
|
for (const directory of directories) {
|
|
842
|
-
const configPath =
|
|
879
|
+
const configPath = import_node_path5.default.join(directory, ".agentv", "config.yaml");
|
|
843
880
|
if (!await fileExists(configPath)) {
|
|
844
881
|
continue;
|
|
845
882
|
}
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
}
|
|
864
|
-
|
|
865
|
-
logWarning(`Invalid eval_patterns in ${configPath}, all entries must be strings`);
|
|
866
|
-
continue;
|
|
867
|
-
}
|
|
868
|
-
const executionDefaults = parseExecutionDefaults(
|
|
869
|
-
parsed.execution,
|
|
870
|
-
configPath
|
|
871
|
-
);
|
|
872
|
-
const results = parseResultsConfig(parsed.results, configPath);
|
|
873
|
-
const hooks = parseHooksConfig(parsed.hooks, configPath);
|
|
874
|
-
return {
|
|
875
|
-
required_version: requiredVersion,
|
|
876
|
-
eval_patterns: evalPatterns,
|
|
877
|
-
execution: executionDefaults,
|
|
878
|
-
results,
|
|
879
|
-
...hooks && { hooks }
|
|
880
|
-
};
|
|
881
|
-
} catch (error) {
|
|
882
|
-
logWarning(
|
|
883
|
-
`Could not read .agentv/config.yaml at ${configPath}: ${error.message}`
|
|
884
|
-
);
|
|
883
|
+
const config = await readConfigFile(configPath);
|
|
884
|
+
if (config) return config;
|
|
885
|
+
}
|
|
886
|
+
const globalConfigPath = import_node_path5.default.join(getAgentvConfigDir(), "config.yaml");
|
|
887
|
+
return await fileExists(globalConfigPath) ? readConfigFile(globalConfigPath) : null;
|
|
888
|
+
}
|
|
889
|
+
async function readConfigFile(configPath) {
|
|
890
|
+
try {
|
|
891
|
+
const rawConfig = await (0, import_promises4.readFile)(configPath, "utf8");
|
|
892
|
+
const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
|
|
893
|
+
if (!isJsonObject(parsed)) {
|
|
894
|
+
logWarning(`Invalid config.yaml format at ${configPath}`);
|
|
895
|
+
return null;
|
|
896
|
+
}
|
|
897
|
+
const config = parsed;
|
|
898
|
+
const requiredVersion = parsed.required_version;
|
|
899
|
+
if (requiredVersion !== void 0 && typeof requiredVersion !== "string") {
|
|
900
|
+
logWarning(`Invalid required_version in ${configPath}, expected string`);
|
|
901
|
+
return null;
|
|
885
902
|
}
|
|
903
|
+
const evalPatterns = config.eval_patterns;
|
|
904
|
+
if (evalPatterns !== void 0 && !Array.isArray(evalPatterns)) {
|
|
905
|
+
logWarning(`Invalid eval_patterns in ${configPath}, expected array`);
|
|
906
|
+
return null;
|
|
907
|
+
}
|
|
908
|
+
if (Array.isArray(evalPatterns) && !evalPatterns.every((p) => typeof p === "string")) {
|
|
909
|
+
logWarning(`Invalid eval_patterns in ${configPath}, all entries must be strings`);
|
|
910
|
+
return null;
|
|
911
|
+
}
|
|
912
|
+
const executionDefaults = parseExecutionDefaults(
|
|
913
|
+
parsed.execution,
|
|
914
|
+
configPath
|
|
915
|
+
);
|
|
916
|
+
const results = parseResultsConfig(parsed.results, configPath);
|
|
917
|
+
const hooks = parseHooksConfig(parsed.hooks, configPath);
|
|
918
|
+
return {
|
|
919
|
+
required_version: requiredVersion,
|
|
920
|
+
eval_patterns: evalPatterns,
|
|
921
|
+
execution: executionDefaults,
|
|
922
|
+
results,
|
|
923
|
+
...hooks && { hooks }
|
|
924
|
+
};
|
|
925
|
+
} catch (error) {
|
|
926
|
+
logWarning(`Could not read config.yaml at ${configPath}: ${error.message}`);
|
|
927
|
+
return null;
|
|
886
928
|
}
|
|
887
|
-
return null;
|
|
888
929
|
}
|
|
889
930
|
function extractTargetFromSuite(suite) {
|
|
890
931
|
const execution = suite.execution;
|
|
@@ -1246,13 +1287,14 @@ function parseHooksConfig(raw, configPath) {
|
|
|
1246
1287
|
function logWarning(message) {
|
|
1247
1288
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
1248
1289
|
}
|
|
1249
|
-
var import_promises4,
|
|
1290
|
+
var import_promises4, import_node_path5, ANSI_YELLOW2, ANSI_RESET3, DEFAULT_EVAL_PATTERNS, VALID_TRIAL_STRATEGIES;
|
|
1250
1291
|
var init_config_loader = __esm({
|
|
1251
1292
|
"src/evaluation/loaders/config-loader.ts"() {
|
|
1252
1293
|
"use strict";
|
|
1253
1294
|
init_cjs_shims();
|
|
1254
1295
|
import_promises4 = require("fs/promises");
|
|
1255
|
-
|
|
1296
|
+
import_node_path5 = __toESM(require("path"), 1);
|
|
1297
|
+
init_paths();
|
|
1256
1298
|
init_interpolation();
|
|
1257
1299
|
init_types();
|
|
1258
1300
|
init_yaml_loader();
|
|
@@ -1560,19 +1602,19 @@ function resolveLocalFilePath(value, basePath) {
|
|
|
1560
1602
|
if (/^[a-z]+:\/\//i.test(value)) {
|
|
1561
1603
|
return void 0;
|
|
1562
1604
|
}
|
|
1563
|
-
return basePath ?
|
|
1605
|
+
return basePath ? import_node_path6.default.resolve(basePath, value) : import_node_path6.default.resolve(value);
|
|
1564
1606
|
}
|
|
1565
1607
|
function formatFileText(filePath, text) {
|
|
1566
1608
|
return `[[ file: ${filePath} ]]
|
|
1567
1609
|
${text}`;
|
|
1568
1610
|
}
|
|
1569
|
-
var import_promises5,
|
|
1611
|
+
var import_promises5, import_node_path6, import_node_url2, MIME_TYPE_ALIASES, REPLACEMENT_CHAR;
|
|
1570
1612
|
var init_content_preprocessor = __esm({
|
|
1571
1613
|
"src/evaluation/content-preprocessor.ts"() {
|
|
1572
1614
|
"use strict";
|
|
1573
1615
|
init_cjs_shims();
|
|
1574
1616
|
import_promises5 = require("fs/promises");
|
|
1575
|
-
|
|
1617
|
+
import_node_path6 = __toESM(require("path"), 1);
|
|
1576
1618
|
import_node_url2 = require("url");
|
|
1577
1619
|
init_exec();
|
|
1578
1620
|
MIME_TYPE_ALIASES = {
|
|
@@ -1730,8 +1772,8 @@ function isTemplateReference(value) {
|
|
|
1730
1772
|
}
|
|
1731
1773
|
async function resolveAssertionTemplateReference(include, searchRoots) {
|
|
1732
1774
|
const templateCandidates = isTemplateReference(include) ? [
|
|
1733
|
-
|
|
1734
|
-
|
|
1775
|
+
import_node_path7.default.join(".agentv", "templates", `${include}.yaml`),
|
|
1776
|
+
import_node_path7.default.join(".agentv", "templates", `${include}.yml`)
|
|
1735
1777
|
] : [include];
|
|
1736
1778
|
const attempted = [];
|
|
1737
1779
|
for (const candidate of templateCandidates) {
|
|
@@ -1784,10 +1826,10 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
|
1784
1826
|
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
|
|
1785
1827
|
);
|
|
1786
1828
|
}
|
|
1787
|
-
const templateDir =
|
|
1829
|
+
const templateDir = import_node_path7.default.dirname(resolved.resolvedPath);
|
|
1788
1830
|
const nestedSearchRoots = [
|
|
1789
1831
|
templateDir,
|
|
1790
|
-
...searchRoots.filter((root) =>
|
|
1832
|
+
...searchRoots.filter((root) => import_node_path7.default.resolve(root) !== templateDir)
|
|
1791
1833
|
];
|
|
1792
1834
|
return await expandGraderEntries(assertions, nestedSearchRoots, evalId, {
|
|
1793
1835
|
depth: nextDepth,
|
|
@@ -1949,7 +1991,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
1949
1991
|
if (cwd) {
|
|
1950
1992
|
const resolved = await resolveFileReference2(cwd, searchRoots);
|
|
1951
1993
|
if (resolved.resolvedPath) {
|
|
1952
|
-
resolvedCwd =
|
|
1994
|
+
resolvedCwd = import_node_path7.default.resolve(resolved.resolvedPath);
|
|
1953
1995
|
} else {
|
|
1954
1996
|
logWarning2(
|
|
1955
1997
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -2134,7 +2176,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
2134
2176
|
aggregatorPrompt = fileRef;
|
|
2135
2177
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
2136
2178
|
if (resolved.resolvedPath) {
|
|
2137
|
-
promptPath2 =
|
|
2179
|
+
promptPath2 = import_node_path7.default.resolve(resolved.resolvedPath);
|
|
2138
2180
|
} else {
|
|
2139
2181
|
throw new Error(
|
|
2140
2182
|
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
@@ -2814,7 +2856,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
2814
2856
|
const commandPath = commandArray[commandArray.length - 1];
|
|
2815
2857
|
const resolved = await resolveFileReference2(commandPath, searchRoots);
|
|
2816
2858
|
if (resolved.resolvedPath) {
|
|
2817
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
2859
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), import_node_path7.default.resolve(resolved.resolvedPath)];
|
|
2818
2860
|
} else {
|
|
2819
2861
|
throw new Error(
|
|
2820
2862
|
`Grader '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -2829,7 +2871,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
2829
2871
|
prompt = fileRef;
|
|
2830
2872
|
const resolved = await resolveFileReference2(fileRef, searchRoots);
|
|
2831
2873
|
if (resolved.resolvedPath) {
|
|
2832
|
-
promptPath =
|
|
2874
|
+
promptPath = import_node_path7.default.resolve(resolved.resolvedPath);
|
|
2833
2875
|
try {
|
|
2834
2876
|
await validateCustomPromptContent(promptPath);
|
|
2835
2877
|
} catch (error) {
|
|
@@ -2987,7 +3029,7 @@ async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId)
|
|
|
2987
3029
|
preprocessors.push({
|
|
2988
3030
|
type,
|
|
2989
3031
|
command,
|
|
2990
|
-
resolvedCommand: [...command.slice(0, -1),
|
|
3032
|
+
resolvedCommand: [...command.slice(0, -1), import_node_path7.default.resolve(resolved.resolvedPath)]
|
|
2991
3033
|
});
|
|
2992
3034
|
}
|
|
2993
3035
|
return preprocessors;
|
|
@@ -3379,13 +3421,13 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3379
3421
|
rubrics: rubricItems
|
|
3380
3422
|
};
|
|
3381
3423
|
}
|
|
3382
|
-
var import_promises7,
|
|
3424
|
+
var import_promises7, import_node_path7, ANSI_YELLOW4, ANSI_RESET5, MAX_ASSERTION_INCLUDE_DEPTH, PROMPT_FILE_PREFIX, VALID_FIELD_MATCH_TYPES, VALID_FIELD_AGGREGATION_TYPES;
|
|
3383
3425
|
var init_grader_parser = __esm({
|
|
3384
3426
|
"src/evaluation/loaders/grader-parser.ts"() {
|
|
3385
3427
|
"use strict";
|
|
3386
3428
|
init_cjs_shims();
|
|
3387
3429
|
import_promises7 = require("fs/promises");
|
|
3388
|
-
|
|
3430
|
+
import_node_path7 = __toESM(require("path"), 1);
|
|
3389
3431
|
init_content_preprocessor();
|
|
3390
3432
|
init_interpolation();
|
|
3391
3433
|
init_types();
|
|
@@ -3463,7 +3505,7 @@ var init_segment_formatter = __esm({
|
|
|
3463
3505
|
|
|
3464
3506
|
// src/evaluation/loaders/message-processor.ts
|
|
3465
3507
|
function detectImageMediaType(filePath) {
|
|
3466
|
-
const ext =
|
|
3508
|
+
const ext = import_node_path8.default.extname(filePath).toLowerCase();
|
|
3467
3509
|
return IMAGE_MEDIA_TYPES[ext];
|
|
3468
3510
|
}
|
|
3469
3511
|
async function processMessages(options) {
|
|
@@ -3525,7 +3567,7 @@ async function processMessages(options) {
|
|
|
3525
3567
|
...cloneJsonObject(rawSegment),
|
|
3526
3568
|
path: displayPath,
|
|
3527
3569
|
text: fileContent,
|
|
3528
|
-
resolvedPath:
|
|
3570
|
+
resolvedPath: import_node_path8.default.resolve(resolvedPath)
|
|
3529
3571
|
});
|
|
3530
3572
|
if (verbose) {
|
|
3531
3573
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -3649,7 +3691,7 @@ async function processExpectedMessages(options) {
|
|
|
3649
3691
|
type: "file",
|
|
3650
3692
|
path: displayPath,
|
|
3651
3693
|
text: fileContent,
|
|
3652
|
-
resolvedPath:
|
|
3694
|
+
resolvedPath: import_node_path8.default.resolve(resolvedPath)
|
|
3653
3695
|
});
|
|
3654
3696
|
if (verbose) {
|
|
3655
3697
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -3717,13 +3759,13 @@ async function processExpectedMessages(options) {
|
|
|
3717
3759
|
}
|
|
3718
3760
|
return segments;
|
|
3719
3761
|
}
|
|
3720
|
-
var import_promises8,
|
|
3762
|
+
var import_promises8, import_node_path8, IMAGE_MEDIA_TYPES, ANSI_YELLOW5, ANSI_RESET6;
|
|
3721
3763
|
var init_message_processor = __esm({
|
|
3722
3764
|
"src/evaluation/loaders/message-processor.ts"() {
|
|
3723
3765
|
"use strict";
|
|
3724
3766
|
init_cjs_shims();
|
|
3725
3767
|
import_promises8 = require("fs/promises");
|
|
3726
|
-
|
|
3768
|
+
import_node_path8 = __toESM(require("path"), 1);
|
|
3727
3769
|
init_segment_formatter();
|
|
3728
3770
|
init_input_message_utils();
|
|
3729
3771
|
init_types();
|
|
@@ -3821,7 +3863,7 @@ function matchesFilter(id, filter) {
|
|
|
3821
3863
|
return typeof filter === "string" ? import_micromatch.default.isMatch(id, filter) : filter.some((pattern) => import_micromatch.default.isMatch(id, pattern));
|
|
3822
3864
|
}
|
|
3823
3865
|
function detectFormat(filePath) {
|
|
3824
|
-
const ext =
|
|
3866
|
+
const ext = import_node_path9.default.extname(filePath).toLowerCase();
|
|
3825
3867
|
if (ext === ".jsonl") return "jsonl";
|
|
3826
3868
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
3827
3869
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -3831,9 +3873,9 @@ function detectFormat(filePath) {
|
|
|
3831
3873
|
);
|
|
3832
3874
|
}
|
|
3833
3875
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
3834
|
-
const dir =
|
|
3835
|
-
const base =
|
|
3836
|
-
const sidecarPath =
|
|
3876
|
+
const dir = import_node_path9.default.dirname(jsonlPath);
|
|
3877
|
+
const base = import_node_path9.default.basename(jsonlPath, ".jsonl");
|
|
3878
|
+
const sidecarPath = import_node_path9.default.join(dir, `${base}.yaml`);
|
|
3837
3879
|
if (!await fileExists(sidecarPath)) {
|
|
3838
3880
|
if (verbose) {
|
|
3839
3881
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -3882,13 +3924,13 @@ function parseJsonlContent(content, filePath) {
|
|
|
3882
3924
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
3883
3925
|
const verbose = options?.verbose ?? false;
|
|
3884
3926
|
const filterPattern = options?.filter;
|
|
3885
|
-
const absoluteTestPath =
|
|
3927
|
+
const absoluteTestPath = import_node_path9.default.resolve(evalFilePath);
|
|
3886
3928
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
3887
3929
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
3888
3930
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
3889
3931
|
const rawFile = await (0, import_promises9.readFile)(absoluteTestPath, "utf8");
|
|
3890
3932
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
3891
|
-
const fallbackSuiteName =
|
|
3933
|
+
const fallbackSuiteName = import_node_path9.default.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
3892
3934
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
3893
3935
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
3894
3936
|
const globalExecution = sidecar.execution;
|
|
@@ -4021,13 +4063,13 @@ ${detailBlock}${ANSI_RESET7}`);
|
|
|
4021
4063
|
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
|
|
4022
4064
|
}
|
|
4023
4065
|
}
|
|
4024
|
-
var import_promises9,
|
|
4066
|
+
var import_promises9, import_node_path9, import_micromatch, ANSI_YELLOW6, ANSI_RED2, ANSI_RESET7;
|
|
4025
4067
|
var init_jsonl_parser = __esm({
|
|
4026
4068
|
"src/evaluation/loaders/jsonl-parser.ts"() {
|
|
4027
4069
|
"use strict";
|
|
4028
4070
|
init_cjs_shims();
|
|
4029
4071
|
import_promises9 = require("fs/promises");
|
|
4030
|
-
|
|
4072
|
+
import_node_path9 = __toESM(require("path"), 1);
|
|
4031
4073
|
import_micromatch = __toESM(require("micromatch"), 1);
|
|
4032
4074
|
init_input_message_utils();
|
|
4033
4075
|
init_interpolation();
|
|
@@ -4311,14 +4353,14 @@ async function readJsonFile(filePath) {
|
|
|
4311
4353
|
return JSON.parse(content);
|
|
4312
4354
|
}
|
|
4313
4355
|
async function findGitRoot(startPath) {
|
|
4314
|
-
let currentDir =
|
|
4315
|
-
const root =
|
|
4356
|
+
let currentDir = import_node_path10.default.dirname(import_node_path10.default.resolve(startPath));
|
|
4357
|
+
const root = import_node_path10.default.parse(currentDir).root;
|
|
4316
4358
|
while (currentDir !== root) {
|
|
4317
|
-
const gitPath =
|
|
4359
|
+
const gitPath = import_node_path10.default.join(currentDir, ".git");
|
|
4318
4360
|
if (await fileExists2(gitPath)) {
|
|
4319
4361
|
return currentDir;
|
|
4320
4362
|
}
|
|
4321
|
-
const parentDir =
|
|
4363
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
4322
4364
|
if (parentDir === currentDir) {
|
|
4323
4365
|
break;
|
|
4324
4366
|
}
|
|
@@ -4329,8 +4371,8 @@ async function findGitRoot(startPath) {
|
|
|
4329
4371
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
4330
4372
|
const directories = [];
|
|
4331
4373
|
const seen = /* @__PURE__ */ new Set();
|
|
4332
|
-
const boundary =
|
|
4333
|
-
let current =
|
|
4374
|
+
const boundary = import_node_path10.default.resolve(repoRoot);
|
|
4375
|
+
let current = import_node_path10.default.resolve(import_node_path10.default.dirname(filePath));
|
|
4334
4376
|
while (current !== void 0) {
|
|
4335
4377
|
if (!seen.has(current)) {
|
|
4336
4378
|
directories.push(current);
|
|
@@ -4339,7 +4381,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
4339
4381
|
if (current === boundary) {
|
|
4340
4382
|
break;
|
|
4341
4383
|
}
|
|
4342
|
-
const parent =
|
|
4384
|
+
const parent = import_node_path10.default.dirname(current);
|
|
4343
4385
|
if (parent === current) {
|
|
4344
4386
|
break;
|
|
4345
4387
|
}
|
|
@@ -4353,16 +4395,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
4353
4395
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
4354
4396
|
const uniqueRoots = [];
|
|
4355
4397
|
const addRoot = (root) => {
|
|
4356
|
-
const normalized =
|
|
4398
|
+
const normalized = import_node_path10.default.resolve(root);
|
|
4357
4399
|
if (!uniqueRoots.includes(normalized)) {
|
|
4358
4400
|
uniqueRoots.push(normalized);
|
|
4359
4401
|
}
|
|
4360
4402
|
};
|
|
4361
|
-
let currentDir =
|
|
4403
|
+
let currentDir = import_node_path10.default.dirname(evalPath);
|
|
4362
4404
|
let reachedBoundary = false;
|
|
4363
4405
|
while (!reachedBoundary) {
|
|
4364
4406
|
addRoot(currentDir);
|
|
4365
|
-
const parentDir =
|
|
4407
|
+
const parentDir = import_node_path10.default.dirname(currentDir);
|
|
4366
4408
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
4367
4409
|
reachedBoundary = true;
|
|
4368
4410
|
} else {
|
|
@@ -4380,16 +4422,16 @@ function trimLeadingSeparators2(value) {
|
|
|
4380
4422
|
async function resolveFileReference3(rawValue, searchRoots) {
|
|
4381
4423
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
4382
4424
|
const potentialPaths = [];
|
|
4383
|
-
if (
|
|
4384
|
-
potentialPaths.push(
|
|
4425
|
+
if (import_node_path10.default.isAbsolute(rawValue)) {
|
|
4426
|
+
potentialPaths.push(import_node_path10.default.normalize(rawValue));
|
|
4385
4427
|
}
|
|
4386
4428
|
for (const base of searchRoots) {
|
|
4387
|
-
potentialPaths.push(
|
|
4429
|
+
potentialPaths.push(import_node_path10.default.resolve(base, displayPath));
|
|
4388
4430
|
}
|
|
4389
4431
|
const attempted = [];
|
|
4390
4432
|
const seen = /* @__PURE__ */ new Set();
|
|
4391
4433
|
for (const candidate of potentialPaths) {
|
|
4392
|
-
const absoluteCandidate =
|
|
4434
|
+
const absoluteCandidate = import_node_path10.default.resolve(candidate);
|
|
4393
4435
|
if (seen.has(absoluteCandidate)) {
|
|
4394
4436
|
continue;
|
|
4395
4437
|
}
|
|
@@ -4401,14 +4443,14 @@ async function resolveFileReference3(rawValue, searchRoots) {
|
|
|
4401
4443
|
}
|
|
4402
4444
|
return { displayPath, attempted };
|
|
4403
4445
|
}
|
|
4404
|
-
var import_node_fs2, import_promises10,
|
|
4446
|
+
var import_node_fs2, import_promises10, import_node_path10;
|
|
4405
4447
|
var init_file_utils = __esm({
|
|
4406
4448
|
"src/evaluation/file-utils.ts"() {
|
|
4407
4449
|
"use strict";
|
|
4408
4450
|
init_cjs_shims();
|
|
4409
4451
|
import_node_fs2 = require("fs");
|
|
4410
4452
|
import_promises10 = require("fs/promises");
|
|
4411
|
-
|
|
4453
|
+
import_node_path10 = __toESM(require("path"), 1);
|
|
4412
4454
|
}
|
|
4413
4455
|
});
|
|
4414
4456
|
|
|
@@ -4660,44 +4702,6 @@ var init_p_limit = __esm({
|
|
|
4660
4702
|
}
|
|
4661
4703
|
});
|
|
4662
4704
|
|
|
4663
|
-
// src/paths.ts
|
|
4664
|
-
function getAgentvConfigDir() {
|
|
4665
|
-
return import_node_path10.default.join(import_node_os.default.homedir(), ".agentv");
|
|
4666
|
-
}
|
|
4667
|
-
function getAgentvHome() {
|
|
4668
|
-
const envHome = process.env.AGENTV_HOME;
|
|
4669
|
-
if (envHome && envHome !== "undefined") {
|
|
4670
|
-
if (!logged) {
|
|
4671
|
-
logged = true;
|
|
4672
|
-
console.log(`Using AGENTV_HOME: ${envHome}`);
|
|
4673
|
-
}
|
|
4674
|
-
return envHome;
|
|
4675
|
-
}
|
|
4676
|
-
return import_node_path10.default.join(import_node_os.default.homedir(), ".agentv");
|
|
4677
|
-
}
|
|
4678
|
-
function getWorkspacesRoot() {
|
|
4679
|
-
return import_node_path10.default.join(getAgentvHome(), "workspaces");
|
|
4680
|
-
}
|
|
4681
|
-
function getSubagentsRoot() {
|
|
4682
|
-
return import_node_path10.default.join(getAgentvHome(), "subagents");
|
|
4683
|
-
}
|
|
4684
|
-
function getTraceStateRoot() {
|
|
4685
|
-
return import_node_path10.default.join(getAgentvHome(), "trace-state");
|
|
4686
|
-
}
|
|
4687
|
-
function getWorkspacePoolRoot() {
|
|
4688
|
-
return import_node_path10.default.join(getAgentvHome(), "workspace-pool");
|
|
4689
|
-
}
|
|
4690
|
-
var import_node_os, import_node_path10, logged;
|
|
4691
|
-
var init_paths = __esm({
|
|
4692
|
-
"src/paths.ts"() {
|
|
4693
|
-
"use strict";
|
|
4694
|
-
init_cjs_shims();
|
|
4695
|
-
import_node_os = __toESM(require("os"), 1);
|
|
4696
|
-
import_node_path10 = __toESM(require("path"), 1);
|
|
4697
|
-
logged = false;
|
|
4698
|
-
}
|
|
4699
|
-
});
|
|
4700
|
-
|
|
4701
4705
|
// src/runtime/target-proxy.ts
|
|
4702
4706
|
async function createTargetProxy(options) {
|
|
4703
4707
|
const { defaultProvider, targetResolver, availableTargets, maxCalls } = options;
|
|
@@ -14234,7 +14238,7 @@ async function promptInstall() {
|
|
|
14234
14238
|
}
|
|
14235
14239
|
}
|
|
14236
14240
|
function findManagedSdkInstallRoot() {
|
|
14237
|
-
return import_node_path25.default.join(
|
|
14241
|
+
return import_node_path25.default.join(getAgentvDataDir(), "deps", "pi-sdk");
|
|
14238
14242
|
}
|
|
14239
14243
|
function resolveGlobalNpmRoot() {
|
|
14240
14244
|
try {
|
|
@@ -24828,6 +24832,7 @@ __export(index_exports, {
|
|
|
24828
24832
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
24829
24833
|
generateRubrics: () => generateRubrics,
|
|
24830
24834
|
getAgentvConfigDir: () => getAgentvConfigDir,
|
|
24835
|
+
getAgentvDataDir: () => getAgentvDataDir,
|
|
24831
24836
|
getAgentvHome: () => getAgentvHome,
|
|
24832
24837
|
getOutputFilenames: () => getOutputFilenames,
|
|
24833
24838
|
getProject: () => getProject,
|
|
@@ -25546,6 +25551,8 @@ var import_node_path54 = __toESM(require("path"), 1);
|
|
|
25546
25551
|
var import_node_util8 = require("util");
|
|
25547
25552
|
init_paths();
|
|
25548
25553
|
var execFileAsync4 = (0, import_node_util8.promisify)(import_node_child_process12.execFile);
|
|
25554
|
+
var RESULTS_REPO_RESULTS_DIR = ".agentv/results";
|
|
25555
|
+
var RESULTS_REPO_RUNS_DIR = `${RESULTS_REPO_RESULTS_DIR}/runs`;
|
|
25549
25556
|
function sanitizeRepoSlug(repo) {
|
|
25550
25557
|
return repo.trim().replace(/[^A-Za-z0-9._-]+/g, "-");
|
|
25551
25558
|
}
|
|
@@ -25565,7 +25572,7 @@ function expandHome(p) {
|
|
|
25565
25572
|
}
|
|
25566
25573
|
function normalizeResultsConfig(config) {
|
|
25567
25574
|
const repo = config.repo.trim();
|
|
25568
|
-
const resolvedPath = config.path ? expandHome(config.path.trim()) : import_node_path54.default.join(
|
|
25575
|
+
const resolvedPath = config.path ? expandHome(config.path.trim()) : import_node_path54.default.join(getAgentvDataDir(), "results", sanitizeRepoSlug(repo));
|
|
25569
25576
|
return {
|
|
25570
25577
|
mode: "github",
|
|
25571
25578
|
repo,
|
|
@@ -25581,7 +25588,7 @@ function resolveResultsRepoUrl(repo) {
|
|
|
25581
25588
|
return `https://github.com/${repo}.git`;
|
|
25582
25589
|
}
|
|
25583
25590
|
function getResultsRepoLocalPaths(repo) {
|
|
25584
|
-
const rootDir = import_node_path54.default.join(
|
|
25591
|
+
const rootDir = import_node_path54.default.join(getAgentvDataDir(), "cache", "results-repo", sanitizeRepoSlug(repo));
|
|
25585
25592
|
return {
|
|
25586
25593
|
rootDir,
|
|
25587
25594
|
repoDir: import_node_path54.default.join(rootDir, "repo"),
|
|
@@ -25778,7 +25785,7 @@ async function stageResultsArtifacts(params) {
|
|
|
25778
25785
|
}
|
|
25779
25786
|
function resolveResultsRepoRunsDir(config) {
|
|
25780
25787
|
const normalized = normalizeResultsConfig(config);
|
|
25781
|
-
return import_node_path54.default.join(normalized.path, "runs");
|
|
25788
|
+
return import_node_path54.default.join(normalized.path, RESULTS_REPO_RESULTS_DIR, "runs");
|
|
25782
25789
|
}
|
|
25783
25790
|
async function directorySizeBytes(targetPath) {
|
|
25784
25791
|
const entry = await (0, import_promises39.stat)(targetPath);
|
|
@@ -25841,7 +25848,12 @@ async function directPushResults(params) {
|
|
|
25841
25848
|
const repoDir = await ensureResultsRepoClone(normalized);
|
|
25842
25849
|
const baseBranch = await resolveDefaultBranch(repoDir);
|
|
25843
25850
|
await fetchResultsRepo(repoDir);
|
|
25844
|
-
const destinationDir = import_node_path54.default.join(
|
|
25851
|
+
const destinationDir = import_node_path54.default.join(
|
|
25852
|
+
repoDir,
|
|
25853
|
+
RESULTS_REPO_RESULTS_DIR,
|
|
25854
|
+
"runs",
|
|
25855
|
+
params.destinationPath
|
|
25856
|
+
);
|
|
25845
25857
|
await stageResultsArtifacts({
|
|
25846
25858
|
repoDir,
|
|
25847
25859
|
sourceDir: params.sourceDir,
|
|
@@ -25989,9 +26001,12 @@ function parseGitBatchBlobs(output) {
|
|
|
25989
26001
|
return blobs;
|
|
25990
26002
|
}
|
|
25991
26003
|
async function listGitRuns(repoDir, ref = "origin/main") {
|
|
25992
|
-
const { stdout: treeOut } = await runGit(
|
|
25993
|
-
|
|
25994
|
-
|
|
26004
|
+
const { stdout: treeOut } = await runGit(
|
|
26005
|
+
["ls-tree", "-r", "--name-only", ref, RESULTS_REPO_RUNS_DIR],
|
|
26006
|
+
{
|
|
26007
|
+
cwd: repoDir
|
|
26008
|
+
}
|
|
26009
|
+
);
|
|
25995
26010
|
const benchmarkPaths = treeOut.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.endsWith("/benchmark.json"));
|
|
25996
26011
|
if (benchmarkPaths.length === 0) {
|
|
25997
26012
|
return [];
|
|
@@ -26008,7 +26023,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
26008
26023
|
const benchmarkPath = benchmarkPaths[index];
|
|
26009
26024
|
const benchmark = JSON.parse(blob.content.toString("utf8"));
|
|
26010
26025
|
const runDir = import_node_path54.default.posix.dirname(benchmarkPath);
|
|
26011
|
-
const relativeRunPath = import_node_path54.default.posix.relative(
|
|
26026
|
+
const relativeRunPath = import_node_path54.default.posix.relative(RESULTS_REPO_RUNS_DIR, runDir);
|
|
26012
26027
|
const runId = buildGitRunId(relativeRunPath);
|
|
26013
26028
|
const timestamp = benchmark.metadata?.timestamp?.trim() || import_node_path54.default.posix.basename(runDir);
|
|
26014
26029
|
const targets = benchmark.metadata?.targets ?? [];
|
|
@@ -26034,7 +26049,7 @@ async function listGitRuns(repoDir, ref = "origin/main") {
|
|
|
26034
26049
|
}
|
|
26035
26050
|
async function materializeGitRun(repoDir, relativeRunPath, ref = "origin/main") {
|
|
26036
26051
|
const normalizedRunPath = relativeRunPath.split(import_node_path54.default.sep).join("/");
|
|
26037
|
-
const runTreePath = import_node_path54.default.posix.join(
|
|
26052
|
+
const runTreePath = import_node_path54.default.posix.join(RESULTS_REPO_RUNS_DIR, normalizedRunPath);
|
|
26038
26053
|
const targetRunDir = import_node_path54.default.join(repoDir, ...runTreePath.split("/"));
|
|
26039
26054
|
const { stdout: treeOut } = await runGit(["ls-tree", "-r", "--name-only", ref, runTreePath], {
|
|
26040
26055
|
cwd: repoDir
|
|
@@ -27709,6 +27724,7 @@ function createAgentKernel() {
|
|
|
27709
27724
|
freeformEvaluationSchema,
|
|
27710
27725
|
generateRubrics,
|
|
27711
27726
|
getAgentvConfigDir,
|
|
27727
|
+
getAgentvDataDir,
|
|
27712
27728
|
getAgentvHome,
|
|
27713
27729
|
getOutputFilenames,
|
|
27714
27730
|
getProject,
|