@agentv/core 4.22.0 → 4.24.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-LKX4QW3G.js → chunk-4VLOUBFL.js} +11 -4
- package/dist/chunk-4VLOUBFL.js.map +1 -0
- package/dist/{chunk-B3BLJRYI.js → chunk-CUVG5O5P.js} +71 -26
- package/dist/chunk-CUVG5O5P.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +22 -19
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +14 -18
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +108 -40
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +22 -4
- package/dist/index.d.ts +22 -4
- package/dist/index.js +9 -9
- package/dist/index.js.map +1 -1
- package/dist/{ts-eval-loader-PA4YFM5D.js → ts-eval-loader-SYQYQPMC.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-B3BLJRYI.js.map +0 -1
- package/dist/chunk-LKX4QW3G.js.map +0 -1
- /package/dist/{ts-eval-loader-PA4YFM5D.js.map → ts-eval-loader-SYQYQPMC.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -277,6 +277,20 @@ var init_trace = __esm({
|
|
|
277
277
|
}
|
|
278
278
|
});
|
|
279
279
|
|
|
280
|
+
// src/evaluation/yaml-loader.ts
|
|
281
|
+
function parseYamlValue(content) {
|
|
282
|
+
return (0, import_yaml.parse)(content, PARSE_OPTIONS);
|
|
283
|
+
}
|
|
284
|
+
var import_yaml, PARSE_OPTIONS;
|
|
285
|
+
var init_yaml_loader = __esm({
|
|
286
|
+
"src/evaluation/yaml-loader.ts"() {
|
|
287
|
+
"use strict";
|
|
288
|
+
init_cjs_shims();
|
|
289
|
+
import_yaml = require("yaml");
|
|
290
|
+
PARSE_OPTIONS = { merge: true };
|
|
291
|
+
}
|
|
292
|
+
});
|
|
293
|
+
|
|
280
294
|
// src/evaluation/input-message-utils.ts
|
|
281
295
|
function flattenInputMessages(messages) {
|
|
282
296
|
return messages.flatMap((message) => extractContentSegments(message.content));
|
|
@@ -472,7 +486,7 @@ function isGlobPattern(filePath) {
|
|
|
472
486
|
return filePath.includes("*") || filePath.includes("?") || filePath.includes("{");
|
|
473
487
|
}
|
|
474
488
|
function parseYamlCases(content, filePath) {
|
|
475
|
-
const raw = (
|
|
489
|
+
const raw = parseYamlValue(content);
|
|
476
490
|
const parsed = interpolateEnv(raw, process.env);
|
|
477
491
|
if (!Array.isArray(parsed)) {
|
|
478
492
|
throw new Error(
|
|
@@ -591,7 +605,7 @@ async function loadCasesFromDirectory(dirPath) {
|
|
|
591
605
|
throw new Error(`Cannot read case file: ${caseFilePath}
|
|
592
606
|
${message}`);
|
|
593
607
|
}
|
|
594
|
-
const raw = (
|
|
608
|
+
const raw = parseYamlValue(content);
|
|
595
609
|
const parsed = interpolateEnv(raw, process.env);
|
|
596
610
|
if (!isJsonObject(parsed)) {
|
|
597
611
|
throw new Error(
|
|
@@ -628,7 +642,7 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
628
642
|
}
|
|
629
643
|
return expanded;
|
|
630
644
|
}
|
|
631
|
-
var import_promises2, import_node_path2, import_fast_glob,
|
|
645
|
+
var import_promises2, import_node_path2, import_fast_glob, ANSI_YELLOW, ANSI_RESET2, FILE_PROTOCOL;
|
|
632
646
|
var init_case_file_loader = __esm({
|
|
633
647
|
"src/evaluation/loaders/case-file-loader.ts"() {
|
|
634
648
|
"use strict";
|
|
@@ -636,9 +650,9 @@ var init_case_file_loader = __esm({
|
|
|
636
650
|
import_promises2 = require("fs/promises");
|
|
637
651
|
import_node_path2 = __toESM(require("path"), 1);
|
|
638
652
|
import_fast_glob = __toESM(require("fast-glob"), 1);
|
|
639
|
-
import_yaml = require("yaml");
|
|
640
653
|
init_interpolation();
|
|
641
654
|
init_types();
|
|
655
|
+
init_yaml_loader();
|
|
642
656
|
ANSI_YELLOW = "\x1B[33m";
|
|
643
657
|
ANSI_RESET2 = "\x1B[0m";
|
|
644
658
|
FILE_PROTOCOL = "file://";
|
|
@@ -763,7 +777,7 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
763
777
|
}
|
|
764
778
|
try {
|
|
765
779
|
const rawConfig = await (0, import_promises4.readFile)(configPath, "utf8");
|
|
766
|
-
const parsed = interpolateEnv((
|
|
780
|
+
const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
|
|
767
781
|
if (!isJsonObject(parsed)) {
|
|
768
782
|
logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
|
|
769
783
|
continue;
|
|
@@ -1163,16 +1177,16 @@ function parseHooksConfig(raw, configPath) {
|
|
|
1163
1177
|
function logWarning(message) {
|
|
1164
1178
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
1165
1179
|
}
|
|
1166
|
-
var import_promises4, import_node_path4,
|
|
1180
|
+
var import_promises4, import_node_path4, ANSI_YELLOW2, ANSI_RESET3, DEFAULT_EVAL_PATTERNS, VALID_TRIAL_STRATEGIES;
|
|
1167
1181
|
var init_config_loader = __esm({
|
|
1168
1182
|
"src/evaluation/loaders/config-loader.ts"() {
|
|
1169
1183
|
"use strict";
|
|
1170
1184
|
init_cjs_shims();
|
|
1171
1185
|
import_promises4 = require("fs/promises");
|
|
1172
1186
|
import_node_path4 = __toESM(require("path"), 1);
|
|
1173
|
-
import_yaml2 = require("yaml");
|
|
1174
1187
|
init_interpolation();
|
|
1175
1188
|
init_types();
|
|
1189
|
+
init_yaml_loader();
|
|
1176
1190
|
init_file_resolver();
|
|
1177
1191
|
ANSI_YELLOW2 = "\x1B[33m";
|
|
1178
1192
|
ANSI_RESET3 = "\x1B[0m";
|
|
@@ -1689,7 +1703,7 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
|
1689
1703
|
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
1690
1704
|
}
|
|
1691
1705
|
const content = await (0, import_promises7.readFile)(resolved.resolvedPath, "utf8");
|
|
1692
|
-
const parsed = interpolateEnv((
|
|
1706
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
1693
1707
|
if (!isJsonObject2(parsed)) {
|
|
1694
1708
|
throw new Error(
|
|
1695
1709
|
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} (expected a YAML object with an assertions array)`
|
|
@@ -3296,18 +3310,18 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
3296
3310
|
rubrics: rubricItems
|
|
3297
3311
|
};
|
|
3298
3312
|
}
|
|
3299
|
-
var import_promises7, import_node_path6,
|
|
3313
|
+
var import_promises7, import_node_path6, ANSI_YELLOW4, ANSI_RESET5, MAX_ASSERTION_INCLUDE_DEPTH, PROMPT_FILE_PREFIX, VALID_FIELD_MATCH_TYPES, VALID_FIELD_AGGREGATION_TYPES;
|
|
3300
3314
|
var init_grader_parser = __esm({
|
|
3301
3315
|
"src/evaluation/loaders/grader-parser.ts"() {
|
|
3302
3316
|
"use strict";
|
|
3303
3317
|
init_cjs_shims();
|
|
3304
3318
|
import_promises7 = require("fs/promises");
|
|
3305
3319
|
import_node_path6 = __toESM(require("path"), 1);
|
|
3306
|
-
import_yaml3 = require("yaml");
|
|
3307
3320
|
init_content_preprocessor();
|
|
3308
3321
|
init_interpolation();
|
|
3309
3322
|
init_types();
|
|
3310
3323
|
init_prompt_validator();
|
|
3324
|
+
init_yaml_loader();
|
|
3311
3325
|
init_file_resolver();
|
|
3312
3326
|
ANSI_YELLOW4 = "\x1B[33m";
|
|
3313
3327
|
ANSI_RESET5 = "\x1B[0m";
|
|
@@ -3759,7 +3773,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
3759
3773
|
}
|
|
3760
3774
|
try {
|
|
3761
3775
|
const content = await (0, import_promises9.readFile)(sidecarPath, "utf8");
|
|
3762
|
-
const parsed = interpolateEnv((
|
|
3776
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
3763
3777
|
if (!isJsonObject(parsed)) {
|
|
3764
3778
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
3765
3779
|
return {};
|
|
@@ -3938,7 +3952,7 @@ ${detailBlock}${ANSI_RESET7}`);
|
|
|
3938
3952
|
console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
|
|
3939
3953
|
}
|
|
3940
3954
|
}
|
|
3941
|
-
var import_promises9, import_node_path8, import_micromatch,
|
|
3955
|
+
var import_promises9, import_node_path8, import_micromatch, ANSI_YELLOW6, ANSI_RED2, ANSI_RESET7;
|
|
3942
3956
|
var init_jsonl_parser = __esm({
|
|
3943
3957
|
"src/evaluation/loaders/jsonl-parser.ts"() {
|
|
3944
3958
|
"use strict";
|
|
@@ -3946,10 +3960,10 @@ var init_jsonl_parser = __esm({
|
|
|
3946
3960
|
import_promises9 = require("fs/promises");
|
|
3947
3961
|
import_node_path8 = __toESM(require("path"), 1);
|
|
3948
3962
|
import_micromatch = __toESM(require("micromatch"), 1);
|
|
3949
|
-
import_yaml4 = require("yaml");
|
|
3950
3963
|
init_input_message_utils();
|
|
3951
3964
|
init_interpolation();
|
|
3952
3965
|
init_types();
|
|
3966
|
+
init_yaml_loader();
|
|
3953
3967
|
init_file_resolver();
|
|
3954
3968
|
init_grader_parser();
|
|
3955
3969
|
init_message_processor();
|
|
@@ -3962,9 +3976,7 @@ var init_jsonl_parser = __esm({
|
|
|
3962
3976
|
|
|
3963
3977
|
// src/evaluation/metadata.ts
|
|
3964
3978
|
function parseMetadata(suite) {
|
|
3965
|
-
|
|
3966
|
-
const hasDescription = typeof suite.description === "string";
|
|
3967
|
-
if (!hasName) {
|
|
3979
|
+
if (typeof suite.name !== "string") {
|
|
3968
3980
|
return void 0;
|
|
3969
3981
|
}
|
|
3970
3982
|
return MetadataSchema.parse({
|
|
@@ -3984,7 +3996,7 @@ var init_metadata = __esm({
|
|
|
3984
3996
|
init_cjs_shims();
|
|
3985
3997
|
import_zod = require("zod");
|
|
3986
3998
|
MetadataSchema = import_zod.z.object({
|
|
3987
|
-
name: import_zod.z.string().min(1).max(64).regex(/^[a-z0-9-]+$/),
|
|
3999
|
+
name: import_zod.z.string().min(1).max(64).regex(/^[a-z0-9-]+$/).optional(),
|
|
3988
4000
|
description: import_zod.z.string().min(1).max(1024).optional(),
|
|
3989
4001
|
version: import_zod.z.string().optional(),
|
|
3990
4002
|
author: import_zod.z.string().optional(),
|
|
@@ -12363,7 +12375,7 @@ async function discoverCopilotSessions(opts) {
|
|
|
12363
12375
|
const eventsPath = import_node_path21.default.join(sessionDir, "events.jsonl");
|
|
12364
12376
|
try {
|
|
12365
12377
|
const workspaceContent = await (0, import_promises19.readFile)(workspacePath, "utf8");
|
|
12366
|
-
const workspace = (
|
|
12378
|
+
const workspace = parseYamlValue(workspaceContent) ?? {};
|
|
12367
12379
|
const cwd = String(workspace.cwd ?? "");
|
|
12368
12380
|
let updatedAt;
|
|
12369
12381
|
try {
|
|
@@ -12407,7 +12419,7 @@ async function discoverCopilotSessions(opts) {
|
|
|
12407
12419
|
filtered.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
12408
12420
|
return filtered.slice(0, limit);
|
|
12409
12421
|
}
|
|
12410
|
-
var import_promises19, import_node_os6, import_node_path21,
|
|
12422
|
+
var import_promises19, import_node_os6, import_node_path21, DEFAULT_SESSION_STATE_DIR;
|
|
12411
12423
|
var init_copilot_session_discovery = __esm({
|
|
12412
12424
|
"src/evaluation/providers/copilot-session-discovery.ts"() {
|
|
12413
12425
|
"use strict";
|
|
@@ -12415,7 +12427,7 @@ var init_copilot_session_discovery = __esm({
|
|
|
12415
12427
|
import_promises19 = require("fs/promises");
|
|
12416
12428
|
import_node_os6 = require("os");
|
|
12417
12429
|
import_node_path21 = __toESM(require("path"), 1);
|
|
12418
|
-
|
|
12430
|
+
init_yaml_loader();
|
|
12419
12431
|
DEFAULT_SESSION_STATE_DIR = () => import_node_path21.default.join((0, import_node_os6.homedir)(), ".copilot", "session-state");
|
|
12420
12432
|
}
|
|
12421
12433
|
});
|
|
@@ -17644,7 +17656,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
17644
17656
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
17645
17657
|
}
|
|
17646
17658
|
const raw = await (0, import_promises31.readFile)(absolutePath, "utf8");
|
|
17647
|
-
const parsed = (
|
|
17659
|
+
const parsed = parseYamlValue(raw);
|
|
17648
17660
|
if (!isRecord(parsed)) {
|
|
17649
17661
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
17650
17662
|
}
|
|
@@ -17657,7 +17669,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
17657
17669
|
function listTargetNames(definitions) {
|
|
17658
17670
|
return definitions.map((definition) => definition.name);
|
|
17659
17671
|
}
|
|
17660
|
-
var import_node_fs13, import_promises31, import_node_path38
|
|
17672
|
+
var import_node_fs13, import_promises31, import_node_path38;
|
|
17661
17673
|
var init_targets_file = __esm({
|
|
17662
17674
|
"src/evaluation/providers/targets-file.ts"() {
|
|
17663
17675
|
"use strict";
|
|
@@ -17665,7 +17677,7 @@ var init_targets_file = __esm({
|
|
|
17665
17677
|
import_node_fs13 = require("fs");
|
|
17666
17678
|
import_promises31 = require("fs/promises");
|
|
17667
17679
|
import_node_path38 = __toESM(require("path"), 1);
|
|
17668
|
-
|
|
17680
|
+
init_yaml_loader();
|
|
17669
17681
|
}
|
|
17670
17682
|
});
|
|
17671
17683
|
|
|
@@ -20080,6 +20092,9 @@ async function runEvaluation(options) {
|
|
|
20080
20092
|
result = { ...result, beforeAllOutput };
|
|
20081
20093
|
beforeAllOutputAttached = true;
|
|
20082
20094
|
}
|
|
20095
|
+
if (evalCase.metadata && !result.metadata) {
|
|
20096
|
+
result = { ...result, metadata: evalCase.metadata };
|
|
20097
|
+
}
|
|
20083
20098
|
if (onProgress) {
|
|
20084
20099
|
await onProgress({
|
|
20085
20100
|
workerId,
|
|
@@ -20090,7 +20105,9 @@ async function runEvaluation(options) {
|
|
|
20090
20105
|
completedAt: Date.now(),
|
|
20091
20106
|
error: result.error,
|
|
20092
20107
|
score: result.score,
|
|
20093
|
-
executionStatus: result.executionStatus
|
|
20108
|
+
executionStatus: result.executionStatus,
|
|
20109
|
+
durationMs: result.durationMs,
|
|
20110
|
+
evalRunDurationMs: result.evalRun?.durationMs
|
|
20094
20111
|
});
|
|
20095
20112
|
}
|
|
20096
20113
|
if (onResult) {
|
|
@@ -20404,7 +20421,8 @@ async function runBatchEvaluation(options) {
|
|
|
20404
20421
|
completedAt: Date.now(),
|
|
20405
20422
|
error: error instanceof Error ? error.message : String(error),
|
|
20406
20423
|
score: errorResult.score,
|
|
20407
|
-
executionStatus: errorResult.executionStatus
|
|
20424
|
+
executionStatus: errorResult.executionStatus,
|
|
20425
|
+
evalRunDurationMs: errorResult.evalRun?.durationMs
|
|
20408
20426
|
});
|
|
20409
20427
|
}
|
|
20410
20428
|
continue;
|
|
@@ -20422,7 +20440,9 @@ async function runBatchEvaluation(options) {
|
|
|
20422
20440
|
completedAt: Date.now(),
|
|
20423
20441
|
error: result.error,
|
|
20424
20442
|
score: result.score,
|
|
20425
|
-
executionStatus: result.executionStatus
|
|
20443
|
+
executionStatus: result.executionStatus,
|
|
20444
|
+
durationMs: result.durationMs,
|
|
20445
|
+
evalRunDurationMs: result.evalRun?.durationMs
|
|
20426
20446
|
});
|
|
20427
20447
|
}
|
|
20428
20448
|
}
|
|
@@ -22487,7 +22507,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
22487
22507
|
try {
|
|
22488
22508
|
const absolutePath = import_node_path50.default.resolve(testFilePath);
|
|
22489
22509
|
const content = await (0, import_promises36.readFile)(absolutePath, "utf8");
|
|
22490
|
-
const parsed = interpolateEnv((
|
|
22510
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
22491
22511
|
if (!isJsonObject(parsed)) {
|
|
22492
22512
|
return {};
|
|
22493
22513
|
}
|
|
@@ -22559,7 +22579,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22559
22579
|
const searchRoots = buildSearchRoots(absoluteTestPath, repoRootPath);
|
|
22560
22580
|
const config = await loadConfig(absoluteTestPath, repoRootPath);
|
|
22561
22581
|
const rawFile = await (0, import_promises36.readFile)(absoluteTestPath, "utf8");
|
|
22562
|
-
const interpolated = interpolateEnv((
|
|
22582
|
+
const interpolated = interpolateEnv(parseYamlValue(rawFile), process.env);
|
|
22563
22583
|
if (!isJsonObject(interpolated)) {
|
|
22564
22584
|
throw new Error(`Invalid test file format: ${evalFilePath}`);
|
|
22565
22585
|
}
|
|
@@ -22596,6 +22616,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22596
22616
|
throw new Error(`Invalid test file format: ${evalFilePath} - missing 'tests' field`);
|
|
22597
22617
|
}
|
|
22598
22618
|
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);
|
|
22619
|
+
const suiteGovernance = extractSuiteGovernance(suite);
|
|
22599
22620
|
const suiteInputMessages = expandInputShorthand(suite.input);
|
|
22600
22621
|
const suiteInputFiles = suite.input_files;
|
|
22601
22622
|
const rawGlobalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
|
|
@@ -22705,7 +22726,9 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
22705
22726
|
const userFilePaths = collectResolvedInputFilePaths(inputMessages);
|
|
22706
22727
|
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
|
|
22707
22728
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
22708
|
-
const
|
|
22729
|
+
const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
22730
|
+
const suitePayload = suiteGovernance !== void 0 ? { governance: suiteGovernance } : void 0;
|
|
22731
|
+
const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);
|
|
22709
22732
|
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
22710
22733
|
const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
|
|
22711
22734
|
(v) => typeof v === "string"
|
|
@@ -22845,7 +22868,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
22845
22868
|
} catch {
|
|
22846
22869
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
22847
22870
|
}
|
|
22848
|
-
const parsed = interpolateEnv((
|
|
22871
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
22849
22872
|
if (!isJsonObject(parsed)) {
|
|
22850
22873
|
throw new Error(
|
|
22851
22874
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
@@ -22952,6 +22975,47 @@ function mergeWorkspaceConfigs(suiteLevel, caseLevel) {
|
|
|
22952
22975
|
function asString5(value) {
|
|
22953
22976
|
return typeof value === "string" ? value : void 0;
|
|
22954
22977
|
}
|
|
22978
|
+
function extractSuiteGovernance(suite) {
|
|
22979
|
+
const top = suite.governance;
|
|
22980
|
+
if (isJsonObject(top)) {
|
|
22981
|
+
return top;
|
|
22982
|
+
}
|
|
22983
|
+
const wrapper = suite.metadata;
|
|
22984
|
+
if (isJsonObject(wrapper)) {
|
|
22985
|
+
const nested = wrapper.governance;
|
|
22986
|
+
if (isJsonObject(nested)) {
|
|
22987
|
+
return nested;
|
|
22988
|
+
}
|
|
22989
|
+
}
|
|
22990
|
+
return void 0;
|
|
22991
|
+
}
|
|
22992
|
+
function mergeSuiteMetadataPayload(caseMetadata, suitePayload) {
|
|
22993
|
+
if (!suitePayload) return caseMetadata;
|
|
22994
|
+
const result = { ...caseMetadata ?? {} };
|
|
22995
|
+
for (const [key, suiteVal] of Object.entries(suitePayload)) {
|
|
22996
|
+
const caseVal = result[key];
|
|
22997
|
+
if (Array.isArray(suiteVal) && Array.isArray(caseVal)) {
|
|
22998
|
+
const seen = /* @__PURE__ */ new Set();
|
|
22999
|
+
const out = [];
|
|
23000
|
+
for (const v of [...suiteVal, ...caseVal]) {
|
|
23001
|
+
const k = typeof v === "string" ? v : JSON.stringify(v);
|
|
23002
|
+
if (!seen.has(k)) {
|
|
23003
|
+
seen.add(k);
|
|
23004
|
+
out.push(v);
|
|
23005
|
+
}
|
|
23006
|
+
}
|
|
23007
|
+
result[key] = out;
|
|
23008
|
+
} else if (isJsonObject(suiteVal) && isJsonObject(caseVal)) {
|
|
23009
|
+
result[key] = mergeSuiteMetadataPayload(
|
|
23010
|
+
caseVal,
|
|
23011
|
+
suiteVal
|
|
23012
|
+
);
|
|
23013
|
+
} else if (caseVal === void 0) {
|
|
23014
|
+
result[key] = suiteVal;
|
|
23015
|
+
}
|
|
23016
|
+
}
|
|
23017
|
+
return result;
|
|
23018
|
+
}
|
|
22955
23019
|
function logWarning5(message, details) {
|
|
22956
23020
|
if (details && details.length > 0) {
|
|
22957
23021
|
const detailBlock = details.join("\n");
|
|
@@ -22970,7 +23034,7 @@ ${detailBlock}${ANSI_RESET9}`);
|
|
|
22970
23034
|
console.error(`${ANSI_RED3}Error: ${message}${ANSI_RESET9}`);
|
|
22971
23035
|
}
|
|
22972
23036
|
}
|
|
22973
|
-
var import_promises36, import_node_path50, import_micromatch4,
|
|
23037
|
+
var import_promises36, import_node_path50, import_micromatch4, ANSI_YELLOW8, ANSI_RED3, ANSI_RESET9, loadEvalSuite, loadEvalCases, loadEvalCaseById;
|
|
22974
23038
|
var init_yaml_parser = __esm({
|
|
22975
23039
|
"src/evaluation/yaml-parser.ts"() {
|
|
22976
23040
|
"use strict";
|
|
@@ -22978,7 +23042,6 @@ var init_yaml_parser = __esm({
|
|
|
22978
23042
|
import_promises36 = require("fs/promises");
|
|
22979
23043
|
import_node_path50 = __toESM(require("path"), 1);
|
|
22980
23044
|
import_micromatch4 = __toESM(require("micromatch"), 1);
|
|
22981
|
-
import_yaml7 = require("yaml");
|
|
22982
23045
|
init_input_message_utils();
|
|
22983
23046
|
init_interpolation();
|
|
22984
23047
|
init_agent_skills_parser();
|
|
@@ -22992,6 +23055,7 @@ var init_yaml_parser = __esm({
|
|
|
22992
23055
|
init_metadata();
|
|
22993
23056
|
init_types();
|
|
22994
23057
|
init_repo_config_parser();
|
|
23058
|
+
init_yaml_loader();
|
|
22995
23059
|
init_prompt_builder();
|
|
22996
23060
|
init_config_loader();
|
|
22997
23061
|
init_jsonl_parser();
|
|
@@ -24464,6 +24528,7 @@ __export(index_exports, {
|
|
|
24464
24528
|
parseEnvOutput: () => parseEnvOutput,
|
|
24465
24529
|
parseJsonFromText: () => parseJsonFromText,
|
|
24466
24530
|
parseJsonSafe: () => parseJsonSafe,
|
|
24531
|
+
parseYamlValue: () => parseYamlValue,
|
|
24467
24532
|
prepareResultsRepoBranch: () => prepareResultsRepoBranch,
|
|
24468
24533
|
pushResultsRepoBranch: () => pushResultsRepoBranch,
|
|
24469
24534
|
readJsonFile: () => readJsonFile,
|
|
@@ -24523,6 +24588,7 @@ init_cjs_shims();
|
|
|
24523
24588
|
init_content();
|
|
24524
24589
|
init_types();
|
|
24525
24590
|
init_trace();
|
|
24591
|
+
init_yaml_loader();
|
|
24526
24592
|
init_yaml_parser();
|
|
24527
24593
|
init_agent_skills_parser();
|
|
24528
24594
|
init_config_loader();
|
|
@@ -24532,7 +24598,7 @@ init_ts_eval_loader();
|
|
|
24532
24598
|
init_cjs_shims();
|
|
24533
24599
|
var import_node_fs18 = require("fs");
|
|
24534
24600
|
var import_node_path51 = __toESM(require("path"), 1);
|
|
24535
|
-
|
|
24601
|
+
init_yaml_loader();
|
|
24536
24602
|
function codeGraderInstruction(graderName, description) {
|
|
24537
24603
|
const desc = description ? ` This grader: ${description}.` : "";
|
|
24538
24604
|
return `Run \`agentv eval assert ${graderName} --agent-output <agent_output> --agent-input <original_prompt>\` and check the result.${desc} The command accepts --agent-output (the agent's full response text) and --agent-input (the original user prompt). It returns JSON on stdout: {"score": 0-1, "reasoning": "..."}. A score >= 0.5 means pass (exit 0); below 0.5 means fail (exit 1).`;
|
|
@@ -24771,7 +24837,7 @@ function transpileEvalYaml(suite, source = "EVAL.yaml") {
|
|
|
24771
24837
|
}
|
|
24772
24838
|
function transpileEvalYamlFile(evalYamlPath) {
|
|
24773
24839
|
const content = (0, import_node_fs18.readFileSync)(evalYamlPath, "utf8");
|
|
24774
|
-
const parsed = (
|
|
24840
|
+
const parsed = parseYamlValue(content);
|
|
24775
24841
|
return transpileEvalYaml(parsed, import_node_path51.default.basename(evalYamlPath));
|
|
24776
24842
|
}
|
|
24777
24843
|
function getOutputFilenames(result) {
|
|
@@ -24978,8 +25044,8 @@ init_pool_manager();
|
|
|
24978
25044
|
init_cjs_shims();
|
|
24979
25045
|
var import_promises37 = require("fs/promises");
|
|
24980
25046
|
var import_node_path52 = __toESM(require("path"), 1);
|
|
24981
|
-
var import_yaml9 = require("yaml");
|
|
24982
25047
|
init_interpolation();
|
|
25048
|
+
init_yaml_loader();
|
|
24983
25049
|
init_repo_config_parser();
|
|
24984
25050
|
function normalizeGitUrl(url) {
|
|
24985
25051
|
let normalized = url.replace(/\.git$/, "");
|
|
@@ -25027,7 +25093,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
25027
25093
|
}
|
|
25028
25094
|
async function extractReposFromEvalFile(filePath) {
|
|
25029
25095
|
const content = await (0, import_promises37.readFile)(filePath, "utf8");
|
|
25030
|
-
const parsed = interpolateEnv((
|
|
25096
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
25031
25097
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
25032
25098
|
const obj = parsed;
|
|
25033
25099
|
const evalFileDir = import_node_path52.default.dirname(import_node_path52.default.resolve(filePath));
|
|
@@ -25048,7 +25114,7 @@ async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
|
25048
25114
|
if (typeof raw === "string") {
|
|
25049
25115
|
const workspaceFilePath = import_node_path52.default.resolve(evalFileDir, raw);
|
|
25050
25116
|
const content = await (0, import_promises37.readFile)(workspaceFilePath, "utf8");
|
|
25051
|
-
const parsed = interpolateEnv((
|
|
25117
|
+
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
25052
25118
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
25053
25119
|
return extractReposFromObject(parsed);
|
|
25054
25120
|
}
|
|
@@ -25412,7 +25478,8 @@ init_paths();
|
|
|
25412
25478
|
init_cjs_shims();
|
|
25413
25479
|
var import_node_fs20 = require("fs");
|
|
25414
25480
|
var import_node_path55 = __toESM(require("path"), 1);
|
|
25415
|
-
var
|
|
25481
|
+
var import_yaml2 = require("yaml");
|
|
25482
|
+
init_yaml_loader();
|
|
25416
25483
|
init_paths();
|
|
25417
25484
|
function getBenchmarksRegistryPath() {
|
|
25418
25485
|
return import_node_path55.default.join(getAgentvConfigDir(), "benchmarks.yaml");
|
|
@@ -25447,7 +25514,7 @@ function loadBenchmarkRegistry() {
|
|
|
25447
25514
|
}
|
|
25448
25515
|
try {
|
|
25449
25516
|
const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
|
|
25450
|
-
const parsed = (
|
|
25517
|
+
const parsed = parseYamlValue(raw);
|
|
25451
25518
|
if (!parsed || typeof parsed !== "object") {
|
|
25452
25519
|
return { benchmarks: [] };
|
|
25453
25520
|
}
|
|
@@ -25464,7 +25531,7 @@ function saveBenchmarkRegistry(registry) {
|
|
|
25464
25531
|
(0, import_node_fs20.mkdirSync)(dir, { recursive: true });
|
|
25465
25532
|
}
|
|
25466
25533
|
const payload = { benchmarks: registry.benchmarks.map(toYaml) };
|
|
25467
|
-
(0, import_node_fs20.writeFileSync)(registryPath, (0,
|
|
25534
|
+
(0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml2.stringify)(payload), "utf-8");
|
|
25468
25535
|
}
|
|
25469
25536
|
function deriveBenchmarkId(dirPath, existingIds) {
|
|
25470
25537
|
const base = import_node_path55.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
@@ -26991,6 +27058,7 @@ function createAgentKernel() {
|
|
|
26991
27058
|
parseEnvOutput,
|
|
26992
27059
|
parseJsonFromText,
|
|
26993
27060
|
parseJsonSafe,
|
|
27061
|
+
parseYamlValue,
|
|
26994
27062
|
prepareResultsRepoBranch,
|
|
26995
27063
|
pushResultsRepoBranch,
|
|
26996
27064
|
readJsonFile,
|