agentv 4.24.1-next.1 → 4.25.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-SHHWQAXG.js → artifact-writer-RFZXLCR7.js} +3 -3
- package/dist/{chunk-OQKETJT6.js → chunk-FWDOW3TN.js} +2 -2
- package/dist/{chunk-BTSHFGH3.js → chunk-ITIGAJ74.js} +20 -75
- package/dist/chunk-ITIGAJ74.js.map +1 -0
- package/dist/{chunk-QCNJEI4K.js → chunk-PPNAZTU6.js} +20 -9
- package/dist/{chunk-QCNJEI4K.js.map → chunk-PPNAZTU6.js.map} +1 -1
- package/dist/{chunk-5CC472UM.js → chunk-QHSB6LBN.js} +42 -1
- package/dist/chunk-QHSB6LBN.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/{dist-AWHCD22R.js → dist-6SR4U7SX.js} +4 -2
- package/dist/index.js +4 -4
- package/dist/{interactive-AAYWH73N.js → interactive-MLSSYY5F.js} +4 -4
- package/dist/studio/assets/{index-8Vc4sq0b.js → index-BRKyoUlX.js} +1 -1
- package/dist/studio/assets/{index-C25w7gi1.js → index-DWr4P2nR.js} +24 -24
- package/dist/studio/index.html +1 -1
- package/package.json +1 -1
- package/dist/chunk-5CC472UM.js.map +0 -1
- package/dist/chunk-BTSHFGH3.js.map +0 -1
- /package/dist/{artifact-writer-SHHWQAXG.js.map → artifact-writer-RFZXLCR7.js.map} +0 -0
- /package/dist/{chunk-OQKETJT6.js.map → chunk-FWDOW3TN.js.map} +0 -0
- /package/dist/{dist-AWHCD22R.js.map → dist-6SR4U7SX.js.map} +0 -0
- /package/dist/{interactive-AAYWH73N.js.map → interactive-MLSSYY5F.js.map} +0 -0
|
@@ -13,8 +13,8 @@ import {
|
|
|
13
13
|
writeArtifacts,
|
|
14
14
|
writeArtifactsFromResults,
|
|
15
15
|
writePerTestArtifacts
|
|
16
|
-
} from "./chunk-
|
|
17
|
-
import "./chunk-
|
|
16
|
+
} from "./chunk-FWDOW3TN.js";
|
|
17
|
+
import "./chunk-QHSB6LBN.js";
|
|
18
18
|
import "./chunk-QOBQ5XYF.js";
|
|
19
19
|
import "./chunk-BPGJ4HBU.js";
|
|
20
20
|
import "./chunk-XGWXNNH6.js";
|
|
@@ -38,4 +38,4 @@ export {
|
|
|
38
38
|
writeArtifactsFromResults,
|
|
39
39
|
writePerTestArtifacts
|
|
40
40
|
};
|
|
41
|
-
//# sourceMappingURL=artifact-writer-
|
|
41
|
+
//# sourceMappingURL=artifact-writer-RFZXLCR7.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
|
|
2
2
|
import {
|
|
3
3
|
toTranscriptJsonLines
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-QHSB6LBN.js";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_THRESHOLD
|
|
7
7
|
} from "./chunk-XGWXNNH6.js";
|
|
@@ -690,4 +690,4 @@ export {
|
|
|
690
690
|
writePerTestArtifacts,
|
|
691
691
|
writeArtifactsFromResults
|
|
692
692
|
};
|
|
693
|
-
//# sourceMappingURL=chunk-
|
|
693
|
+
//# sourceMappingURL=chunk-FWDOW3TN.js.map
|
|
@@ -15,23 +15,20 @@ import {
|
|
|
15
15
|
resolveWorkspaceOrFilePath,
|
|
16
16
|
toSnakeCaseDeep,
|
|
17
17
|
writeArtifactsFromResults
|
|
18
|
-
} from "./chunk-
|
|
18
|
+
} from "./chunk-FWDOW3TN.js";
|
|
19
19
|
import {
|
|
20
20
|
ResponseCache,
|
|
21
21
|
RunBudgetTracker,
|
|
22
|
-
commitAndPushResultsBranch,
|
|
23
|
-
createDraftResultsPr,
|
|
24
22
|
deriveCategory,
|
|
23
|
+
directPushResults,
|
|
25
24
|
directorySizeBytes,
|
|
26
25
|
getResultsRepoStatus,
|
|
27
26
|
loadTsConfig,
|
|
28
|
-
prepareResultsRepoBranch,
|
|
29
27
|
resolveResultsRepoRunsDir,
|
|
30
28
|
shouldEnableCache,
|
|
31
29
|
shouldSkipCacheForTemperature,
|
|
32
|
-
stageResultsArtifacts,
|
|
33
30
|
syncResultsRepo
|
|
34
|
-
} from "./chunk-
|
|
31
|
+
} from "./chunk-QHSB6LBN.js";
|
|
35
32
|
import {
|
|
36
33
|
CLI_PLACEHOLDERS,
|
|
37
34
|
COMMON_TARGET_SETTINGS,
|
|
@@ -179,7 +176,7 @@ async function findRepoRoot(start) {
|
|
|
179
176
|
// package.json
|
|
180
177
|
var package_default = {
|
|
181
178
|
name: "agentv",
|
|
182
|
-
version: "4.
|
|
179
|
+
version: "4.25.0-next.1",
|
|
183
180
|
description: "CLI entry point for AgentV",
|
|
184
181
|
type: "module",
|
|
185
182
|
repository: {
|
|
@@ -1071,9 +1068,6 @@ function normalizeResultsExportConfig(config) {
|
|
|
1071
1068
|
branch_prefix: config.branch_prefix?.trim() || "eval-results"
|
|
1072
1069
|
};
|
|
1073
1070
|
}
|
|
1074
|
-
function slugify(value) {
|
|
1075
|
-
return value.trim().replace(/[^A-Za-z0-9._/-]+/g, "-").replace(/\/+/g, "/").replace(/^-+|-+$/g, "").slice(0, 120);
|
|
1076
|
-
}
|
|
1077
1071
|
function getRelativeRunPath(cwd, runDir) {
|
|
1078
1072
|
const relative = path5.relative(path5.join(cwd, ".agentv", "results", "runs"), runDir);
|
|
1079
1073
|
if (!relative.startsWith("..") && !path5.isAbsolute(relative)) {
|
|
@@ -1083,42 +1077,12 @@ function getRelativeRunPath(cwd, runDir) {
|
|
|
1083
1077
|
const runName = path5.basename(runDir);
|
|
1084
1078
|
return experiment && experiment !== runName ? path5.join(experiment, runName) : runName;
|
|
1085
1079
|
}
|
|
1086
|
-
function buildBranchName(config, payload) {
|
|
1087
|
-
const timestamp = path5.basename(payload.run_dir);
|
|
1088
|
-
const evalStem = payload.test_files.length === 1 ? path5.basename(payload.test_files[0]).replace(/\.eval\.ya?ml$/i, "").replace(/\.[^.]+$/i, "") : `${payload.test_files.length}-evals`;
|
|
1089
|
-
const experiment = slugify(payload.experiment ?? "default");
|
|
1090
|
-
const branchLeaf = slugify(`${experiment}-${evalStem}-${timestamp}`) || timestamp;
|
|
1091
|
-
return `${config.branch_prefix}/${branchLeaf}`;
|
|
1092
|
-
}
|
|
1093
1080
|
function buildCommitTitle(payload) {
|
|
1094
1081
|
const passed = payload.results.filter((result) => result.score >= DEFAULT_THRESHOLD).length;
|
|
1095
1082
|
const avgScore = payload.results.length > 0 ? payload.results.reduce((sum, result) => sum + result.score, 0) / payload.results.length : 0;
|
|
1096
1083
|
const experiment = payload.experiment ?? "default";
|
|
1097
1084
|
return `feat(results): ${experiment} - ${passed}/${payload.results.length} PASS (${avgScore.toFixed(3)})`;
|
|
1098
1085
|
}
|
|
1099
|
-
function buildPrBody(payload) {
|
|
1100
|
-
const sections = payload.eval_summaries.map((summary) => {
|
|
1101
|
-
const table = summary.results.map((result) => `| ${result.test_id} | ${result.score.toFixed(3)} | ${result.status} |`).join("\n");
|
|
1102
|
-
return [
|
|
1103
|
-
`### ${summary.eval_file}`,
|
|
1104
|
-
"",
|
|
1105
|
-
`Summary: ${summary.passed}/${summary.total} PASS (${summary.avg_score.toFixed(3)})`,
|
|
1106
|
-
"",
|
|
1107
|
-
"| Test | Score | Status |",
|
|
1108
|
-
"|---|---|---|",
|
|
1109
|
-
table || "| (no results) | 0.000 | ERROR |"
|
|
1110
|
-
].join("\n");
|
|
1111
|
-
}).join("\n\n");
|
|
1112
|
-
return [
|
|
1113
|
-
"## Results",
|
|
1114
|
-
"",
|
|
1115
|
-
sections,
|
|
1116
|
-
"",
|
|
1117
|
-
`Run: ${path5.basename(payload.run_dir)}`,
|
|
1118
|
-
`Experiment: ${payload.experiment ?? "default"}`,
|
|
1119
|
-
`Eval Files: ${payload.test_files.join(", ")}`
|
|
1120
|
-
].join("\n");
|
|
1121
|
-
}
|
|
1122
1086
|
async function maybeWarnLargeArtifact(runDir) {
|
|
1123
1087
|
const sizeBytes = await directorySizeBytes(runDir);
|
|
1124
1088
|
if (sizeBytes > SIZE_WARNING_BYTES) {
|
|
@@ -1209,38 +1173,19 @@ async function maybeAutoExportRunArtifacts(payload) {
|
|
|
1209
1173
|
}
|
|
1210
1174
|
try {
|
|
1211
1175
|
await maybeWarnLargeArtifact(payload.run_dir);
|
|
1212
|
-
const
|
|
1213
|
-
const
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
const changed = await commitAndPushResultsBranch({
|
|
1224
|
-
repoDir: prepared.repoDir,
|
|
1225
|
-
branchName,
|
|
1226
|
-
commitMessage: commitTitle
|
|
1227
|
-
});
|
|
1228
|
-
if (!changed) {
|
|
1229
|
-
console.warn("Warning: results export produced no git changes. Skipping PR creation.");
|
|
1230
|
-
return;
|
|
1231
|
-
}
|
|
1232
|
-
const prUrl = await createDraftResultsPr({
|
|
1233
|
-
repo: config.repo,
|
|
1234
|
-
repoDir: prepared.repoDir,
|
|
1235
|
-
baseBranch: prepared.baseBranch,
|
|
1236
|
-
branchName,
|
|
1237
|
-
title: commitTitle,
|
|
1238
|
-
body: buildPrBody(payload)
|
|
1239
|
-
});
|
|
1240
|
-
console.log(`Remote results draft PR created: ${prUrl}`);
|
|
1241
|
-
} finally {
|
|
1242
|
-
await prepared.cleanup();
|
|
1176
|
+
const relativeRunPath = getRelativeRunPath(payload.cwd, payload.run_dir);
|
|
1177
|
+
const commitTitle = buildCommitTitle(payload);
|
|
1178
|
+
const pushed = await directPushResults({
|
|
1179
|
+
config,
|
|
1180
|
+
sourceDir: payload.run_dir,
|
|
1181
|
+
destinationPath: relativeRunPath,
|
|
1182
|
+
commitMessage: commitTitle
|
|
1183
|
+
});
|
|
1184
|
+
if (!pushed) {
|
|
1185
|
+
console.warn("Warning: results export produced no git changes. Skipping push.");
|
|
1186
|
+
return;
|
|
1243
1187
|
}
|
|
1188
|
+
console.log(`Results pushed to ${config.repo} (${config.path}/${relativeRunPath})`);
|
|
1244
1189
|
} catch (error) {
|
|
1245
1190
|
console.warn(`Warning: skipping results export: ${getStatusMessage(error)}`);
|
|
1246
1191
|
console.warn("Warning: Run 'gh auth login' if GitHub authentication is missing.");
|
|
@@ -5474,7 +5419,7 @@ async function runEvalCommand(input) {
|
|
|
5474
5419
|
const useFileExport = !!options.otelFile;
|
|
5475
5420
|
if (options.exportOtel || useFileExport) {
|
|
5476
5421
|
try {
|
|
5477
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
5422
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-6SR4U7SX.js");
|
|
5478
5423
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
5479
5424
|
let headers = {};
|
|
5480
5425
|
if (options.otelBackend) {
|
|
@@ -5673,7 +5618,7 @@ async function runEvalCommand(input) {
|
|
|
5673
5618
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
5674
5619
|
let transcriptProviderFactory;
|
|
5675
5620
|
if (options.transcript) {
|
|
5676
|
-
const { TranscriptProvider } = await import("./dist-
|
|
5621
|
+
const { TranscriptProvider } = await import("./dist-6SR4U7SX.js");
|
|
5677
5622
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
5678
5623
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
5679
5624
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -5842,7 +5787,7 @@ async function runEvalCommand(input) {
|
|
|
5842
5787
|
if (usesDefaultArtifactWorkspace && allResults.length > 0) {
|
|
5843
5788
|
const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
|
|
5844
5789
|
if (isResumeAppend) {
|
|
5845
|
-
const { writePerTestArtifacts } = await import("./artifact-writer-
|
|
5790
|
+
const { writePerTestArtifacts } = await import("./artifact-writer-RFZXLCR7.js");
|
|
5846
5791
|
await writePerTestArtifacts(allResults, runDir, {
|
|
5847
5792
|
experiment: normalizeExperimentName(options.experiment)
|
|
5848
5793
|
});
|
|
@@ -6065,4 +6010,4 @@ export {
|
|
|
6065
6010
|
getCategories,
|
|
6066
6011
|
filterByCategory
|
|
6067
6012
|
};
|
|
6068
|
-
//# sourceMappingURL=chunk-
|
|
6013
|
+
//# sourceMappingURL=chunk-ITIGAJ74.js.map
|