@agentv/core 4.22.0-next.1 → 4.23.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-B3BLJRYI.js → chunk-3SBKNIB6.js} +63 -23
- package/dist/chunk-3SBKNIB6.js.map +1 -0
- package/dist/{chunk-LKX4QW3G.js → chunk-4VLOUBFL.js} +11 -4
- package/dist/chunk-4VLOUBFL.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +22 -19
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +14 -18
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +100 -37
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -4
- package/dist/index.d.ts +18 -4
- package/dist/index.js +9 -9
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-PA4YFM5D.js → ts-eval-loader-HT7NCO4M.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-B3BLJRYI.js.map +0 -1
- package/dist/chunk-LKX4QW3G.js.map +0 -1
- /package/dist/{ts-eval-loader-PA4YFM5D.js.map → ts-eval-loader-HT7NCO4M.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -277,6 +277,20 @@ var init_trace = __esm({
|
|
|
277
277
|
}
|
|
278
278
|
});
|
|
279
279
|
|
|
280
|
+
// src/evaluation/yaml-loader.ts
|
|
281
|
+
function parseYamlValue(content) {
|
|
282
|
+
return (0, import_yaml.parse)(content, PARSE_OPTIONS);
|
|
283
|
+
}
|
|
284
|
+
var import_yaml, PARSE_OPTIONS;
|
|
285
|
+
var init_yaml_loader = __esm({
|
|
286
|
+
"src/evaluation/yaml-loader.ts"() {
|
|
287
|
+
"use strict";
|
|
288
|
+
init_cjs_shims();
|
|
289
|
+
import_yaml = require("yaml");
|
|
290
|
+
PARSE_OPTIONS = { merge: true };
|
|
291
|
+
}
|
|
292
|
+
});
|
|
293
|
+
|
|
280
294
|
// src/evaluation/input-message-utils.ts
|
|
281
295
|
function flattenInputMessages(messages) {
|
|
282
296
|
return messages.flatMap((message) => extractContentSegments(message.content));
|
|
@@ -472,7 +486,7 @@ function isGlobPattern(filePath) {
|
|
|
472
486
|
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
473
487
|
}
|
|
474
488
|
function parseYamlCases(content, filePath) {
|
|
475
|
-
const raw = (
|
|
489
|
+
const raw = parseYamlValue(content);
|
|
476
490
|
const parsed = interpolateEnv(raw, process.env);
|
|
477
491
|
if (!Array.isArray(parsed)) {
|
|
478
492
|
throw new Error(
|
|
@@ -591,7 +605,7 @@ async function loadCasesFromDirectory(dirPath) {
|
|
|
591
605
|
throw new Error(`Cannot read case file: ${caseFilePath}
|
|
592
606
|
${message}`);
|
|
593
607
|
}
|
|
594
|
-
const raw = (
|
|
608
|
+
const raw = parseYamlValue(content);
|
|
595
609
|
const parsed = interpolateEnv(raw, process.env);
|
|
596
610
|
if (!isJsonObject(parsed)) {
|
|
597
611
|
throw new Error(
|
|
@@ -628,7 +642,7 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
628
642
|
}
|
|
629
643
|
return expanded;
|
|
630
644
|
}
|
|
631
|
-
var import_promises2, import_node_path2, import_fast_glob,
|
|
645
|
+
var import_promises2, import_node_path2, import_fast_glob, ANSI_YELLOW, ANSI_RESET2, FILE_PROTOCOL;
|
|
632
646
|
var init_case_file_loader = __esm({
|
|
633
647
|
"src/evaluation/loaders/case-file-loader.ts"() {
|
|
634
648
|
"use strict";
|
|
@@ -636,9 +650,9 @@ var init_case_file_loader = __esm({
|
|
|
636
650
|
import_promises2 = require("fs/promises");
|
|
637
651
|
import_node_path2 = __toESM(require("path"), 1);
|
|
638
652
|
import_fast_glob = __toESM(require("fast-glob"), 1);
|
|
639
|
-
import_yaml = require("yaml");
|
|
640
653
|
init_interpolation();
|
|
641
654
|
init_types();
|
|
655
|
+
init_yaml_loader();
|
|
642
656
|
ANSI_YELLOW = "\x1B[33m";
|
|
643
657
|
ANSI_RESET2 = "\x1B[0m";
|
|
644
658
|
FILE_PROTOCOL = "file://";
|
|
@@ -763,7 +777,7 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
763
777
|
}
|
|
764
778
|
try {
|
|
765
779
|
const rawConfig = await (0, import_promises4.readFile)(configPath, "utf8");
|
|
766
|
-
const parsed = interpolateEnv((
|
|
780
|
+
const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
|
|
767
781
|
if (!isJsonObject(parsed)) {
|
|
768
782
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
769
783
|
continue;
|
|
@@ -1163,16 +1177,16 @@ function parseHooksConfig(raw, configPath) {
|
|
|
1163
1177
|
function logWarning(message) {
|
|
1164
1178
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
1165
1179
|
}
|
|
1166
|
-
var import_promises4, import_node_path4,
|
|
1180
|
+
var import_promises4, import_node_path4, ANSI_YELLOW2, ANSI_RESET3, DEFAULT_EVAL_PATTERNS, VALID_TRIAL_STRATEGIES;
|
|
1167
1181
|
var init_config_loader = __esm({
|
|
1168
1182
|
"src/evaluation/loaders/config-loader.ts"() {
|
|
1169
1183
|
"use strict";
|
|
1170
1184
|
init_cjs_shims();
|
|
1171
1185
|
import_promises4 = require("fs/promises");
|
|
1172
1186
|
import_node_path4 = __toESM(require("path"), 1);
|
|
1173
|
-
import_yaml2 = require("yaml");
|
|
1174
1187
|
init_interpolation();
|
|
1175
1188
|
init_types();
|
|
1189
|
+
init_yaml_loader();
|
|
1176
1190
|
init_file_resolver();
|
|
1177
1191
|
ANSI_YELLOW2 = "\x1B[33m";
|
|
1178
1192
|
ANSI_RESET3 = "\x1B[0m";
|
|
@@ -1689,7 +1703,7 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
|
1689
1703
|
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
1690
1704
|
}
|
|
1691
1705
|
const content = await (0, import_promises7.readFile)(resolved.resolvedPath, "utf8");
|
|
1692
|
-
const parsed = interpolateEnv((
|
|
1706
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
1693
1707
|
if (!isJsonObject2(parsed)) {
|
|
1694
1708
|
throw new Error(
|
|
1695
1709
|
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
|
|
@@ -3296,18 +3310,18 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3296
3310
|
rubrics: rubricItems
|
|
3297
3311
|
};
|
|
3298
3312
|
}
|
|
3299
|
-
var import_promises7, import_node_path6,
|
|
3313
|
+
var import_promises7, import_node_path6, ANSI_YELLOW4, ANSI_RESET5, MAX_ASSERTION_INCLUDE_DEPTH, PROMPT_FILE_PREFIX, VALID_FIELD_MATCH_TYPES, VALID_FIELD_AGGREGATION_TYPES;
|
|
3300
3314
|
var init_grader_parser = __esm({
|
|
3301
3315
|
"src/evaluation/loaders/grader-parser.ts"() {
|
|
3302
3316
|
"use strict";
|
|
3303
3317
|
init_cjs_shims();
|
|
3304
3318
|
import_promises7 = require("fs/promises");
|
|
3305
3319
|
import_node_path6 = __toESM(require("path"), 1);
|
|
3306
|
-
import_yaml3 = require("yaml");
|
|
3307
3320
|
init_content_preprocessor();
|
|
3308
3321
|
init_interpolation();
|
|
3309
3322
|
init_types();
|
|
3310
3323
|
init_prompt_validator();
|
|
3324
|
+
init_yaml_loader();
|
|
3311
3325
|
init_file_resolver();
|
|
3312
3326
|
ANSI_YELLOW4 = "\x1B[33m";
|
|
3313
3327
|
ANSI_RESET5 = "\x1B[0m";
|
|
@@ -3759,7 +3773,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
3759
3773
|
}
|
|
3760
3774
|
try {
|
|
3761
3775
|
const content = await (0, import_promises9.readFile)(sidecarPath, "utf8");
|
|
3762
|
-
const parsed = interpolateEnv((
|
|
3776
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
3763
3777
|
if (!isJsonObject(parsed)) {
|
|
3764
3778
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
3765
3779
|
return {};
|
|
@@ -3938,7 +3952,7 @@ ${detailBlock}${ANSI_RESET7}`);
|
|
|
3938
3952
|
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
|
|
3939
3953
|
}
|
|
3940
3954
|
}
|
|
3941
|
-
var import_promises9, import_node_path8, import_micromatch,
|
|
3955
|
+
var import_promises9, import_node_path8, import_micromatch, ANSI_YELLOW6, ANSI_RED2, ANSI_RESET7;
|
|
3942
3956
|
var init_jsonl_parser = __esm({
|
|
3943
3957
|
"src/evaluation/loaders/jsonl-parser.ts"() {
|
|
3944
3958
|
"use strict";
|
|
@@ -3946,10 +3960,10 @@ var init_jsonl_parser = __esm({
|
|
|
3946
3960
|
import_promises9 = require("fs/promises");
|
|
3947
3961
|
import_node_path8 = __toESM(require("path"), 1);
|
|
3948
3962
|
import_micromatch = __toESM(require("micromatch"), 1);
|
|
3949
|
-
import_yaml4 = require("yaml");
|
|
3950
3963
|
init_input_message_utils();
|
|
3951
3964
|
init_interpolation();
|
|
3952
3965
|
init_types();
|
|
3966
|
+
init_yaml_loader();
|
|
3953
3967
|
init_file_resolver();
|
|
3954
3968
|
init_grader_parser();
|
|
3955
3969
|
init_message_processor();
|
|
@@ -3962,9 +3976,7 @@ var init_jsonl_parser = __esm({
|
|
|
3962
3976
|
|
|
3963
3977
|
// src/evaluation/metadata.ts
|
|
3964
3978
|
function parseMetadata(suite) {
|
|
3965
|
-
|
|
3966
|
-
const hasDescription = typeof suite.description === "string";
|
|
3967
|
-
if (!hasName) {
|
|
3979
|
+
if (typeof suite.name !== "string") {
|
|
3968
3980
|
return void 0;
|
|
3969
3981
|
}
|
|
3970
3982
|
return MetadataSchema.parse({
|
|
@@ -3984,7 +3996,7 @@ var init_metadata = __esm({
|
|
|
3984
3996
|
init_cjs_shims();
|
|
3985
3997
|
import_zod = require("zod");
|
|
3986
3998
|
MetadataSchema = import_zod.z.object({
|
|
3987
|
-
name: import_zod.z.string().min(1).max(64).regex(/^[a-z0-9-]+$/),
|
|
3999
|
+
name: import_zod.z.string().min(1).max(64).regex(/^[a-z0-9-]+$/).optional(),
|
|
3988
4000
|
description: import_zod.z.string().min(1).max(1024).optional(),
|
|
3989
4001
|
version: import_zod.z.string().optional(),
|
|
3990
4002
|
author: import_zod.z.string().optional(),
|
|
@@ -12363,7 +12375,7 @@ async function discoverCopilotSessions(opts) {
|
|
|
12363
12375
|
const eventsPath = import_node_path21.default.join(sessionDir, "events.jsonl");
|
|
12364
12376
|
try {
|
|
12365
12377
|
const workspaceContent = await (0, import_promises19.readFile)(workspacePath, "utf8");
|
|
12366
|
-
const workspace = (
|
|
12378
|
+
const workspace = parseYamlValue(workspaceContent) ?? {};
|
|
12367
12379
|
const cwd = String(workspace.cwd ?? "");
|
|
12368
12380
|
let updatedAt;
|
|
12369
12381
|
try {
|
|
@@ -12407,7 +12419,7 @@ async function discoverCopilotSessions(opts) {
|
|
|
12407
12419
|
filtered.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
12408
12420
|
return filtered.slice(0, limit);
|
|
12409
12421
|
}
|
|
12410
|
-
var import_promises19, import_node_os6, import_node_path21,
|
|
12422
|
+
var import_promises19, import_node_os6, import_node_path21, DEFAULT_SESSION_STATE_DIR;
|
|
12411
12423
|
var init_copilot_session_discovery = __esm({
|
|
12412
12424
|
"src/evaluation/providers/copilot-session-discovery.ts"() {
|
|
12413
12425
|
"use strict";
|
|
@@ -12415,7 +12427,7 @@ var init_copilot_session_discovery = __esm({
|
|
|
12415
12427
|
import_promises19 = require("fs/promises");
|
|
12416
12428
|
import_node_os6 = require("os");
|
|
12417
12429
|
import_node_path21 = __toESM(require("path"), 1);
|
|
12418
|
-
|
|
12430
|
+
init_yaml_loader();
|
|
12419
12431
|
DEFAULT_SESSION_STATE_DIR = () => import_node_path21.default.join((0, import_node_os6.homedir)(), ".copilot", "session-state");
|
|
12420
12432
|
}
|
|
12421
12433
|
});
|
|
@@ -17644,7 +17656,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
17644
17656
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
17645
17657
|
}
|
|
17646
17658
|
const raw = await (0, import_promises31.readFile)(absolutePath, "utf8");
|
|
17647
|
-
const parsed = (
|
|
17659
|
+
const parsed = parseYamlValue(raw);
|
|
17648
17660
|
if (!isRecord(parsed)) {
|
|
17649
17661
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
17650
17662
|
}
|
|
@@ -17657,7 +17669,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
17657
17669
|
function listTargetNames(definitions) {
|
|
17658
17670
|
return definitions.map((definition) => definition.name);
|
|
17659
17671
|
}
|
|
17660
|
-
var import_node_fs13, import_promises31, import_node_path38
|
|
17672
|
+
var import_node_fs13, import_promises31, import_node_path38;
|
|
17661
17673
|
var init_targets_file = __esm({
|
|
17662
17674
|
"src/evaluation/providers/targets-file.ts"() {
|
|
17663
17675
|
"use strict";
|
|
@@ -17665,7 +17677,7 @@ var init_targets_file = __esm({
|
|
|
17665
17677
|
import_node_fs13 = require("fs");
|
|
17666
17678
|
import_promises31 = require("fs/promises");
|
|
17667
17679
|
import_node_path38 = __toESM(require("path"), 1);
|
|
17668
|
-
|
|
17680
|
+
init_yaml_loader();
|
|
17669
17681
|
}
|
|
17670
17682
|
});
|
|
17671
17683
|
|
|
@@ -20080,6 +20092,9 @@ async function runEvaluation(options) {
|
|
|
20080
20092
|
result = { ...result, beforeAllOutput };
|
|
20081
20093
|
beforeAllOutputAttached = true;
|
|
20082
20094
|
}
|
|
20095
|
+
if (evalCase.metadata && !result.metadata) {
|
|
20096
|
+
result = { ...result, metadata: evalCase.metadata };
|
|
20097
|
+
}
|
|
20083
20098
|
if (onProgress) {
|
|
20084
20099
|
await onProgress({
|
|
20085
20100
|
workerId,
|
|
@@ -22487,7 +22502,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
22487
22502
|
try {
|
|
22488
22503
|
const absolutePath = import_node_path50.default.resolve(testFilePath);
|
|
22489
22504
|
const content = await (0, import_promises36.readFile)(absolutePath, "utf8");
|
|
22490
|
-
const parsed = interpolateEnv((
|
|
22505
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
22491
22506
|
if (!isJsonObject(parsed)) {
|
|
22492
22507
|
return {};
|
|
22493
22508
|
}
|
|
@@ -22559,7 +22574,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22559
22574
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
22560
22575
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
22561
22576
|
const rawFile = await (0, import_promises36.readFile)(absoluteTestPath, "utf8");
|
|
22562
|
-
const interpolated = interpolateEnv((
|
|
22577
|
+
const interpolated = interpolateEnv(parseYamlValue(rawFile), process.env);
|
|
22563
22578
|
if (!isJsonObject(interpolated)) {
|
|
22564
22579
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
22565
22580
|
}
|
|
@@ -22596,6 +22611,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22596
22611
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
22597
22612
|
}
|
|
22598
22613
|
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
22614
|
+
const suiteGovernance = extractSuiteGovernance(suite);
|
|
22599
22615
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
22600
22616
|
const suiteInputFiles = suite.input_files;
|
|
22601
22617
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
@@ -22705,7 +22721,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22705
22721
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
22706
22722
|
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
|
|
22707
22723
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
22708
|
-
const
|
|
22724
|
+
const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
22725
|
+
const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
|
|
22726
|
+
const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
|
|
22709
22727
|
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
22710
22728
|
const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
|
|
22711
22729
|
(v) => typeof v === "string"
|
|
@@ -22845,7 +22863,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
22845
22863
|
} catch {
|
|
22846
22864
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
22847
22865
|
}
|
|
22848
|
-
const parsed = interpolateEnv((
|
|
22866
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
22849
22867
|
if (!isJsonObject(parsed)) {
|
|
22850
22868
|
throw new Error(
|
|
22851
22869
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -22952,6 +22970,47 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
22952
22970
|
function asString5(value) {
|
|
22953
22971
|
return typeof value === "string" ? value : void 0;
|
|
22954
22972
|
}
|
|
22973
|
+
function extractSuiteGovernance(suite) {
|
|
22974
|
+
const top = suite.governance;
|
|
22975
|
+
if (isJsonObject(top)) {
|
|
22976
|
+
return top;
|
|
22977
|
+
}
|
|
22978
|
+
const wrapper = suite.metadata;
|
|
22979
|
+
if (isJsonObject(wrapper)) {
|
|
22980
|
+
const nested = wrapper.governance;
|
|
22981
|
+
if (isJsonObject(nested)) {
|
|
22982
|
+
return nested;
|
|
22983
|
+
}
|
|
22984
|
+
}
|
|
22985
|
+
return void 0;
|
|
22986
|
+
}
|
|
22987
|
+
function mergeSuiteMetadataPayload(caseMetadata, suitePayload) {
|
|
22988
|
+
if (!suitePayload) return caseMetadata;
|
|
22989
|
+
const result = { ...caseMetadata ?? {} };
|
|
22990
|
+
for (const [key, suiteVal] of Object.entries(suitePayload)) {
|
|
22991
|
+
const caseVal = result[key];
|
|
22992
|
+
if (Array.isArray(suiteVal) && Array.isArray(caseVal)) {
|
|
22993
|
+
const seen = /* @__PURE__ */ new Set();
|
|
22994
|
+
const out = [];
|
|
22995
|
+
for (const v of [...suiteVal, ...caseVal]) {
|
|
22996
|
+
const k = typeof v === "string" ? v : JSON.stringify(v);
|
|
22997
|
+
if (!seen.has(k)) {
|
|
22998
|
+
seen.add(k);
|
|
22999
|
+
out.push(v);
|
|
23000
|
+
}
|
|
23001
|
+
}
|
|
23002
|
+
result[key] = out;
|
|
23003
|
+
} else if (isJsonObject(suiteVal) && isJsonObject(caseVal)) {
|
|
23004
|
+
result[key] = mergeSuiteMetadataPayload(
|
|
23005
|
+
caseVal,
|
|
23006
|
+
suiteVal
|
|
23007
|
+
);
|
|
23008
|
+
} else if (caseVal === void 0) {
|
|
23009
|
+
result[key] = suiteVal;
|
|
23010
|
+
}
|
|
23011
|
+
}
|
|
23012
|
+
return result;
|
|
23013
|
+
}
|
|
22955
23014
|
function logWarning5(message, details) {
|
|
22956
23015
|
if (details && details.length > 0) {
|
|
22957
23016
|
const detailBlock = details.join("\n");
|
|
@@ -22970,7 +23029,7 @@ ${detailBlock}${ANSI_RESET9}`);
|
|
|
22970
23029
|
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET9}`);
|
|
22971
23030
|
}
|
|
22972
23031
|
}
|
|
22973
|
-
var import_promises36, import_node_path50, import_micromatch4,
|
|
23032
|
+
var import_promises36, import_node_path50, import_micromatch4, ANSI_YELLOW8, ANSI_RED3, ANSI_RESET9, loadEvalSuite, loadEvalCases, loadEvalCaseById;
|
|
22974
23033
|
var init_yaml_parser = __esm({
|
|
22975
23034
|
"src/evaluation/yaml-parser.ts"() {
|
|
22976
23035
|
"use strict";
|
|
@@ -22978,7 +23037,6 @@ var init_yaml_parser = __esm({
|
|
|
22978
23037
|
import_promises36 = require("fs/promises");
|
|
22979
23038
|
import_node_path50 = __toESM(require("path"), 1);
|
|
22980
23039
|
import_micromatch4 = __toESM(require("micromatch"), 1);
|
|
22981
|
-
import_yaml7 = require("yaml");
|
|
22982
23040
|
init_input_message_utils();
|
|
22983
23041
|
init_interpolation();
|
|
22984
23042
|
init_agent_skills_parser();
|
|
@@ -22992,6 +23050,7 @@ var init_yaml_parser = __esm({
|
|
|
22992
23050
|
init_metadata();
|
|
22993
23051
|
init_types();
|
|
22994
23052
|
init_repo_config_parser();
|
|
23053
|
+
init_yaml_loader();
|
|
22995
23054
|
init_prompt_builder();
|
|
22996
23055
|
init_config_loader();
|
|
22997
23056
|
init_jsonl_parser();
|
|
@@ -24464,6 +24523,7 @@ __export(index_exports, {
|
|
|
24464
24523
|
parseEnvOutput: () => parseEnvOutput,
|
|
24465
24524
|
parseJsonFromText: () => parseJsonFromText,
|
|
24466
24525
|
parseJsonSafe: () => parseJsonSafe,
|
|
24526
|
+
parseYamlValue: () => parseYamlValue,
|
|
24467
24527
|
prepareResultsRepoBranch: () => prepareResultsRepoBranch,
|
|
24468
24528
|
pushResultsRepoBranch: () => pushResultsRepoBranch,
|
|
24469
24529
|
readJsonFile: () => readJsonFile,
|
|
@@ -24523,6 +24583,7 @@ init_cjs_shims();
|
|
|
24523
24583
|
init_content();
|
|
24524
24584
|
init_types();
|
|
24525
24585
|
init_trace();
|
|
24586
|
+
init_yaml_loader();
|
|
24526
24587
|
init_yaml_parser();
|
|
24527
24588
|
init_agent_skills_parser();
|
|
24528
24589
|
init_config_loader();
|
|
@@ -24532,7 +24593,7 @@ init_ts_eval_loader();
|
|
|
24532
24593
|
init_cjs_shims();
|
|
24533
24594
|
var import_node_fs18 = require("fs");
|
|
24534
24595
|
var import_node_path51 = __toESM(require("path"), 1);
|
|
24535
|
-
|
|
24596
|
+
init_yaml_loader();
|
|
24536
24597
|
function codeGraderInstruction(graderName, description) {
|
|
24537
24598
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
24538
24599
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -24771,7 +24832,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
24771
24832
|
}
|
|
24772
24833
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
24773
24834
|
const content = (0, import_node_fs18.readFileSync)(evalYamlPath, "utf8");
|
|
24774
|
-
const parsed = (
|
|
24835
|
+
const parsed = parseYamlValue(content);
|
|
24775
24836
|
return transpileEvalYaml(parsed, import_node_path51.default.basename(evalYamlPath));
|
|
24776
24837
|
}
|
|
24777
24838
|
function getOutputFilenames(result) {
|
|
@@ -24978,8 +25039,8 @@ init_pool_manager();
|
|
|
24978
25039
|
init_cjs_shims();
|
|
24979
25040
|
var import_promises37 = require("fs/promises");
|
|
24980
25041
|
var import_node_path52 = __toESM(require("path"), 1);
|
|
24981
|
-
var import_yaml9 = require("yaml");
|
|
24982
25042
|
init_interpolation();
|
|
25043
|
+
init_yaml_loader();
|
|
24983
25044
|
init_repo_config_parser();
|
|
24984
25045
|
function normalizeGitUrl(url) {
|
|
24985
25046
|
let normalized = url.replace(/\.git$/, "");
|
|
@@ -25027,7 +25088,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
25027
25088
|
}
|
|
25028
25089
|
async function extractReposFromEvalFile(filePath) {
|
|
25029
25090
|
const content = await (0, import_promises37.readFile)(filePath, "utf8");
|
|
25030
|
-
const parsed = interpolateEnv((
|
|
25091
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
25031
25092
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
25032
25093
|
const obj = parsed;
|
|
25033
25094
|
const evalFileDir = import_node_path52.default.dirname(import_node_path52.default.resolve(filePath));
|
|
@@ -25048,7 +25109,7 @@ async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
|
25048
25109
|
if (typeof raw === "string") {
|
|
25049
25110
|
const workspaceFilePath = import_node_path52.default.resolve(evalFileDir, raw);
|
|
25050
25111
|
const content = await (0, import_promises37.readFile)(workspaceFilePath, "utf8");
|
|
25051
|
-
const parsed = interpolateEnv((
|
|
25112
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
25052
25113
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
25053
25114
|
return extractReposFromObject(parsed);
|
|
25054
25115
|
}
|
|
@@ -25412,7 +25473,8 @@ init_paths();
|
|
|
25412
25473
|
init_cjs_shims();
|
|
25413
25474
|
var import_node_fs20 = require("fs");
|
|
25414
25475
|
var import_node_path55 = __toESM(require("path"), 1);
|
|
25415
|
-
var
|
|
25476
|
+
var import_yaml2 = require("yaml");
|
|
25477
|
+
init_yaml_loader();
|
|
25416
25478
|
init_paths();
|
|
25417
25479
|
function getBenchmarksRegistryPath() {
|
|
25418
25480
|
return import_node_path55.default.join(getAgentvConfigDir(), "benchmarks.yaml");
|
|
@@ -25447,7 +25509,7 @@ function loadBenchmarkRegistry() {
|
|
|
25447
25509
|
}
|
|
25448
25510
|
try {
|
|
25449
25511
|
const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
|
|
25450
|
-
const parsed = (
|
|
25512
|
+
const parsed = parseYamlValue(raw);
|
|
25451
25513
|
if (!parsed || typeof parsed !== "object") {
|
|
25452
25514
|
return { benchmarks: [] };
|
|
25453
25515
|
}
|
|
@@ -25464,7 +25526,7 @@ function saveBenchmarkRegistry(registry) {
|
|
|
25464
25526
|
(0, import_node_fs20.mkdirSync)(dir, { recursive: true });
|
|
25465
25527
|
}
|
|
25466
25528
|
const payload = { benchmarks: registry.benchmarks.map(toYaml) };
|
|
25467
|
-
(0, import_node_fs20.writeFileSync)(registryPath, (0,
|
|
25529
|
+
(0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml2.stringify)(payload), "utf-8");
|
|
25468
25530
|
}
|
|
25469
25531
|
function deriveBenchmarkId(dirPath, existingIds) {
|
|
25470
25532
|
const base = import_node_path55.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
@@ -26991,6 +27053,7 @@ function createAgentKernel() {
|
|
|
26991
27053
|
parseEnvOutput,
|
|
26992
27054
|
parseJsonFromText,
|
|
26993
27055
|
parseJsonSafe,
|
|
27056
|
+
parseYamlValue,
|
|
26994
27057
|
prepareResultsRepoBranch,
|
|
26995
27058
|
pushResultsRepoBranch,
|
|
26996
27059
|
readJsonFile,
|