agentv 4.14.0-next.1 → 4.15.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-YFQMID6V.js → chunk-CJFA423D.js} +125 -16
- package/dist/chunk-CJFA423D.js.map +1 -0
- package/dist/{chunk-CRCGNYS7.js → chunk-GLWYD3YB.js} +4 -4
- package/dist/{chunk-HP3ZUKYB.js → chunk-VSWJTL3T.js} +774 -192
- package/dist/chunk-VSWJTL3T.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-Y3NRIBUT.js → dist-G6XAYD4R.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-SH4UJMIB.js → interactive-3FSYHUSL.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-HP3ZUKYB.js.map +0 -1
- package/dist/chunk-YFQMID6V.js.map +0 -1
- /package/dist/{chunk-CRCGNYS7.js.map → chunk-GLWYD3YB.js.map} +0 -0
- /package/dist/{dist-Y3NRIBUT.js.map → dist-G6XAYD4R.js.map} +0 -0
- /package/dist/{interactive-SH4UJMIB.js.map → interactive-3FSYHUSL.js.map} +0 -0
|
@@ -41,12 +41,12 @@ import {
|
|
|
41
41
|
subscribeToPiLogEntries,
|
|
42
42
|
syncResultsRepo,
|
|
43
43
|
toCamelCaseDeep
|
|
44
|
-
} from "./chunk-
|
|
44
|
+
} from "./chunk-VSWJTL3T.js";
|
|
45
45
|
|
|
46
46
|
// package.json
|
|
47
47
|
var package_default = {
|
|
48
48
|
name: "agentv",
|
|
49
|
-
version: "4.
|
|
49
|
+
version: "4.15.0-next.1",
|
|
50
50
|
description: "CLI entry point for AgentV",
|
|
51
51
|
type: "module",
|
|
52
52
|
repository: {
|
|
@@ -1491,6 +1491,37 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
1491
1491
|
await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
|
|
1492
1492
|
`, "utf8");
|
|
1493
1493
|
await writeJsonlFile(indexPath, indexRecords);
|
|
1494
|
+
const transcriptPath = path7.join(outputDir, "transcript.jsonl");
|
|
1495
|
+
const transcriptLines = results.map((result) => {
|
|
1496
|
+
let inputText = "";
|
|
1497
|
+
if (typeof result.input === "string") {
|
|
1498
|
+
inputText = result.input;
|
|
1499
|
+
} else if (Array.isArray(result.input)) {
|
|
1500
|
+
const firstUserMsg = result.input.find((m) => m.role === "user");
|
|
1501
|
+
inputText = typeof firstUserMsg?.content === "string" ? firstUserMsg.content : "";
|
|
1502
|
+
}
|
|
1503
|
+
return {
|
|
1504
|
+
input: inputText,
|
|
1505
|
+
output: result.output,
|
|
1506
|
+
token_usage: result.tokenUsage ? {
|
|
1507
|
+
input: result.tokenUsage.input,
|
|
1508
|
+
output: result.tokenUsage.output,
|
|
1509
|
+
cached: result.tokenUsage.cached
|
|
1510
|
+
} : void 0,
|
|
1511
|
+
duration_ms: result.durationMs,
|
|
1512
|
+
cost_usd: result.costUsd,
|
|
1513
|
+
source: {
|
|
1514
|
+
provider: result.target,
|
|
1515
|
+
session_id: result.conversationId ?? result.testId,
|
|
1516
|
+
timestamp: result.timestamp
|
|
1517
|
+
}
|
|
1518
|
+
};
|
|
1519
|
+
});
|
|
1520
|
+
await writeFile(
|
|
1521
|
+
transcriptPath,
|
|
1522
|
+
transcriptLines.map((line) => JSON.stringify(line)).join("\n") + (transcriptLines.length ? "\n" : ""),
|
|
1523
|
+
"utf8"
|
|
1524
|
+
);
|
|
1494
1525
|
return { testArtifactDir, timingPath, benchmarkPath, indexPath };
|
|
1495
1526
|
}
|
|
1496
1527
|
|
|
@@ -3147,7 +3178,14 @@ var KNOWN_TEST_FIELDS = /* @__PURE__ */ new Set([
|
|
|
3147
3178
|
"metadata",
|
|
3148
3179
|
"conversation_id",
|
|
3149
3180
|
"suite",
|
|
3150
|
-
"note"
|
|
3181
|
+
"note",
|
|
3182
|
+
"depends_on",
|
|
3183
|
+
"on_dependency_failure",
|
|
3184
|
+
"mode",
|
|
3185
|
+
"turns",
|
|
3186
|
+
"aggregation",
|
|
3187
|
+
"on_turn_failure",
|
|
3188
|
+
"window_size"
|
|
3151
3189
|
]);
|
|
3152
3190
|
var NAME_PATTERN = /^[a-z0-9-]+$/;
|
|
3153
3191
|
function isObject(value) {
|
|
@@ -3360,6 +3398,7 @@ async function validateEvalFile(filePath) {
|
|
|
3360
3398
|
if (assertField !== void 0) {
|
|
3361
3399
|
validateAssertArray(assertField, location, absolutePath, errors);
|
|
3362
3400
|
}
|
|
3401
|
+
validateConversationMode(evalCase, location, absolutePath, errors);
|
|
3363
3402
|
await validateWorkspaceConfig(
|
|
3364
3403
|
evalCase.workspace,
|
|
3365
3404
|
absolutePath,
|
|
@@ -3456,16 +3495,6 @@ function validateWorkspaceRepoConfig(workspace, filePath, errors) {
|
|
|
3456
3495
|
}
|
|
3457
3496
|
}
|
|
3458
3497
|
}
|
|
3459
|
-
if (isObject(afterEachHook) && afterEachHook.reset && afterEachHook.reset !== "none") {
|
|
3460
|
-
if (!Array.isArray(repos) || repos.length === 0) {
|
|
3461
|
-
errors.push({
|
|
3462
|
-
severity: "warning",
|
|
3463
|
-
filePath,
|
|
3464
|
-
location: "workspace.hooks.after_each",
|
|
3465
|
-
message: `hooks.after_each.reset '${afterEachHook.reset}' has no effect without repos.`
|
|
3466
|
-
});
|
|
3467
|
-
}
|
|
3468
|
-
}
|
|
3469
3498
|
if (isObject(afterEachHook) && afterEachHook.reset && isolation === "per_test") {
|
|
3470
3499
|
errors.push({
|
|
3471
3500
|
severity: "warning",
|
|
@@ -3712,6 +3741,86 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
|
3712
3741
|
}
|
|
3713
3742
|
}
|
|
3714
3743
|
}
|
|
3744
|
+
function validateConversationMode(evalCase, location, filePath, errors) {
|
|
3745
|
+
const mode = evalCase.mode;
|
|
3746
|
+
const turns = evalCase.turns;
|
|
3747
|
+
const aggregation = evalCase.aggregation;
|
|
3748
|
+
const onTurnFailure = evalCase.on_turn_failure;
|
|
3749
|
+
const windowSize = evalCase.window_size;
|
|
3750
|
+
const isConversationMode = mode === "conversation";
|
|
3751
|
+
if (turns !== void 0 && !isConversationMode) {
|
|
3752
|
+
errors.push({
|
|
3753
|
+
severity: "error",
|
|
3754
|
+
filePath,
|
|
3755
|
+
location: `${location}.turns`,
|
|
3756
|
+
message: "'turns' requires mode: conversation"
|
|
3757
|
+
});
|
|
3758
|
+
}
|
|
3759
|
+
if (isConversationMode && (!Array.isArray(turns) || turns.length === 0)) {
|
|
3760
|
+
errors.push({
|
|
3761
|
+
severity: "error",
|
|
3762
|
+
filePath,
|
|
3763
|
+
location: `${location}.mode`,
|
|
3764
|
+
message: "mode: conversation requires a non-empty 'turns' array"
|
|
3765
|
+
});
|
|
3766
|
+
}
|
|
3767
|
+
if (isConversationMode && Array.isArray(turns) && evalCase.expected_output !== void 0) {
|
|
3768
|
+
errors.push({
|
|
3769
|
+
severity: "error",
|
|
3770
|
+
filePath,
|
|
3771
|
+
location: `${location}.expected_output`,
|
|
3772
|
+
message: "Top-level 'expected_output' is not allowed with mode: conversation (use per-turn expected_output instead)"
|
|
3773
|
+
});
|
|
3774
|
+
}
|
|
3775
|
+
if (aggregation !== void 0 && !isConversationMode) {
|
|
3776
|
+
errors.push({
|
|
3777
|
+
severity: "error",
|
|
3778
|
+
filePath,
|
|
3779
|
+
location: `${location}.aggregation`,
|
|
3780
|
+
message: "'aggregation' requires mode: conversation"
|
|
3781
|
+
});
|
|
3782
|
+
}
|
|
3783
|
+
if (onTurnFailure !== void 0 && !isConversationMode) {
|
|
3784
|
+
errors.push({
|
|
3785
|
+
severity: "error",
|
|
3786
|
+
filePath,
|
|
3787
|
+
location: `${location}.on_turn_failure`,
|
|
3788
|
+
message: "'on_turn_failure' requires mode: conversation"
|
|
3789
|
+
});
|
|
3790
|
+
}
|
|
3791
|
+
if (windowSize !== void 0 && !isConversationMode) {
|
|
3792
|
+
errors.push({
|
|
3793
|
+
severity: "error",
|
|
3794
|
+
filePath,
|
|
3795
|
+
location: `${location}.window_size`,
|
|
3796
|
+
message: "'window_size' requires mode: conversation"
|
|
3797
|
+
});
|
|
3798
|
+
}
|
|
3799
|
+
if (isConversationMode && Array.isArray(turns)) {
|
|
3800
|
+
for (let i = 0; i < turns.length; i++) {
|
|
3801
|
+
const turn = turns[i];
|
|
3802
|
+
if (!isObject(turn)) {
|
|
3803
|
+
errors.push({
|
|
3804
|
+
severity: "error",
|
|
3805
|
+
filePath,
|
|
3806
|
+
location: `${location}.turns[${i}]`,
|
|
3807
|
+
message: "Turn must be an object"
|
|
3808
|
+
});
|
|
3809
|
+
continue;
|
|
3810
|
+
}
|
|
3811
|
+
const turnInput = turn.input;
|
|
3812
|
+
const isEmpty = turnInput === void 0 || turnInput === "" || typeof turnInput === "string" && turnInput.trim() === "" || Array.isArray(turnInput) && turnInput.length === 0;
|
|
3813
|
+
if (isEmpty) {
|
|
3814
|
+
errors.push({
|
|
3815
|
+
severity: "error",
|
|
3816
|
+
filePath,
|
|
3817
|
+
location: `${location}.turns[${i}].input`,
|
|
3818
|
+
message: "Each turn must have a non-empty input"
|
|
3819
|
+
});
|
|
3820
|
+
}
|
|
3821
|
+
}
|
|
3822
|
+
}
|
|
3823
|
+
}
|
|
3715
3824
|
function isObject2(value) {
|
|
3716
3825
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
3717
3826
|
}
|
|
@@ -5242,7 +5351,7 @@ async function runEvalCommand(input) {
|
|
|
5242
5351
|
const useFileExport = !!options.otelFile;
|
|
5243
5352
|
if (options.exportOtel || useFileExport) {
|
|
5244
5353
|
try {
|
|
5245
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
5354
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-G6XAYD4R.js");
|
|
5246
5355
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5247
5356
|
let headers = {};
|
|
5248
5357
|
if (options.otelBackend) {
|
|
@@ -5438,7 +5547,7 @@ async function runEvalCommand(input) {
|
|
|
5438
5547
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5439
5548
|
let transcriptProviderFactory;
|
|
5440
5549
|
if (options.transcript) {
|
|
5441
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5550
|
+
const { TranscriptProvider } = await import("./dist-G6XAYD4R.js");
|
|
5442
5551
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5443
5552
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5444
5553
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5760,4 +5869,4 @@ export {
|
|
|
5760
5869
|
getCategories,
|
|
5761
5870
|
filterByCategory
|
|
5762
5871
|
};
|
|
5763
|
-
//# sourceMappingURL=chunk-
|
|
5872
|
+
//# sourceMappingURL=chunk-CJFA423D.js.map
|