agentv 4.24.1-next.1 → 4.25.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,8 +13,8 @@ import {
13
13
  writeArtifacts,
14
14
  writeArtifactsFromResults,
15
15
  writePerTestArtifacts
16
- } from "./chunk-OQKETJT6.js";
17
- import "./chunk-5CC472UM.js";
16
+ } from "./chunk-FWDOW3TN.js";
17
+ import "./chunk-QHSB6LBN.js";
18
18
  import "./chunk-QOBQ5XYF.js";
19
19
  import "./chunk-BPGJ4HBU.js";
20
20
  import "./chunk-XGWXNNH6.js";
@@ -38,4 +38,4 @@ export {
38
38
  writeArtifactsFromResults,
39
39
  writePerTestArtifacts
40
40
  };
41
- //# sourceMappingURL=artifact-writer-SHHWQAXG.js.map
41
+ //# sourceMappingURL=artifact-writer-RFZXLCR7.js.map
@@ -1,7 +1,7 @@
1
1
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
2
  import {
3
3
  toTranscriptJsonLines
4
- } from "./chunk-5CC472UM.js";
4
+ } from "./chunk-QHSB6LBN.js";
5
5
  import {
6
6
  DEFAULT_THRESHOLD
7
7
  } from "./chunk-XGWXNNH6.js";
@@ -690,4 +690,4 @@ export {
690
690
  writePerTestArtifacts,
691
691
  writeArtifactsFromResults
692
692
  };
693
- //# sourceMappingURL=chunk-OQKETJT6.js.map
693
+ //# sourceMappingURL=chunk-FWDOW3TN.js.map
@@ -15,23 +15,20 @@ import {
15
15
  resolveWorkspaceOrFilePath,
16
16
  toSnakeCaseDeep,
17
17
  writeArtifactsFromResults
18
- } from "./chunk-OQKETJT6.js";
18
+ } from "./chunk-FWDOW3TN.js";
19
19
  import {
20
20
  ResponseCache,
21
21
  RunBudgetTracker,
22
- commitAndPushResultsBranch,
23
- createDraftResultsPr,
24
22
  deriveCategory,
23
+ directPushResults,
25
24
  directorySizeBytes,
26
25
  getResultsRepoStatus,
27
26
  loadTsConfig,
28
- prepareResultsRepoBranch,
29
27
  resolveResultsRepoRunsDir,
30
28
  shouldEnableCache,
31
29
  shouldSkipCacheForTemperature,
32
- stageResultsArtifacts,
33
30
  syncResultsRepo
34
- } from "./chunk-5CC472UM.js";
31
+ } from "./chunk-QHSB6LBN.js";
35
32
  import {
36
33
  CLI_PLACEHOLDERS,
37
34
  COMMON_TARGET_SETTINGS,
@@ -179,7 +176,7 @@ async function findRepoRoot(start) {
179
176
  // package.json
180
177
  var package_default = {
181
178
  name: "agentv",
182
- version: "4.24.1-next.1",
179
+ version: "4.25.0-next.1",
183
180
  description: "CLI entry point for AgentV",
184
181
  type: "module",
185
182
  repository: {
@@ -1071,9 +1068,6 @@ function normalizeResultsExportConfig(config) {
1071
1068
  branch_prefix: config.branch_prefix?.trim() || "eval-results"
1072
1069
  };
1073
1070
  }
1074
- function slugify(value) {
1075
- return value.trim().replace(/[^A-Za-z0-9._/-]+/g, "-").replace(/\/+/g, "/").replace(/^-+|-+$/g, "").slice(0, 120);
1076
- }
1077
1071
  function getRelativeRunPath(cwd, runDir) {
1078
1072
  const relative = path5.relative(path5.join(cwd, ".agentv", "results", "runs"), runDir);
1079
1073
  if (!relative.startsWith("..") && !path5.isAbsolute(relative)) {
@@ -1083,42 +1077,12 @@ function getRelativeRunPath(cwd, runDir) {
1083
1077
  const runName = path5.basename(runDir);
1084
1078
  return experiment && experiment !== runName ? path5.join(experiment, runName) : runName;
1085
1079
  }
1086
- function buildBranchName(config, payload) {
1087
- const timestamp = path5.basename(payload.run_dir);
1088
- const evalStem = payload.test_files.length === 1 ? path5.basename(payload.test_files[0]).replace(/\.eval\.ya?ml$/i, "").replace(/\.[^.]+$/i, "") : `${payload.test_files.length}-evals`;
1089
- const experiment = slugify(payload.experiment ?? "default");
1090
- const branchLeaf = slugify(`${experiment}-${evalStem}-${timestamp}`) || timestamp;
1091
- return `${config.branch_prefix}/${branchLeaf}`;
1092
- }
1093
1080
  function buildCommitTitle(payload) {
1094
1081
  const passed = payload.results.filter((result) => result.score >= DEFAULT_THRESHOLD).length;
1095
1082
  const avgScore = payload.results.length > 0 ? payload.results.reduce((sum, result) => sum + result.score, 0) / payload.results.length : 0;
1096
1083
  const experiment = payload.experiment ?? "default";
1097
1084
  return `feat(results): ${experiment} - ${passed}/${payload.results.length} PASS (${avgScore.toFixed(3)})`;
1098
1085
  }
1099
- function buildPrBody(payload) {
1100
- const sections = payload.eval_summaries.map((summary) => {
1101
- const table = summary.results.map((result) => `| ${result.test_id} | ${result.score.toFixed(3)} | ${result.status} |`).join("\n");
1102
- return [
1103
- `### ${summary.eval_file}`,
1104
- "",
1105
- `Summary: ${summary.passed}/${summary.total} PASS (${summary.avg_score.toFixed(3)})`,
1106
- "",
1107
- "| Test | Score | Status |",
1108
- "|---|---|---|",
1109
- table || "| (no results) | 0.000 | ERROR |"
1110
- ].join("\n");
1111
- }).join("\n\n");
1112
- return [
1113
- "## Results",
1114
- "",
1115
- sections,
1116
- "",
1117
- `Run: ${path5.basename(payload.run_dir)}`,
1118
- `Experiment: ${payload.experiment ?? "default"}`,
1119
- `Eval Files: ${payload.test_files.join(", ")}`
1120
- ].join("\n");
1121
- }
1122
1086
  async function maybeWarnLargeArtifact(runDir) {
1123
1087
  const sizeBytes = await directorySizeBytes(runDir);
1124
1088
  if (sizeBytes > SIZE_WARNING_BYTES) {
@@ -1209,38 +1173,19 @@ async function maybeAutoExportRunArtifacts(payload) {
1209
1173
  }
1210
1174
  try {
1211
1175
  await maybeWarnLargeArtifact(payload.run_dir);
1212
- const branchName = buildBranchName(config, payload);
1213
- const prepared = await prepareResultsRepoBranch(config, branchName);
1214
- try {
1215
- const relativeRunPath = getRelativeRunPath(payload.cwd, payload.run_dir);
1216
- const destinationDir = path5.join(prepared.repoDir, config.path, relativeRunPath);
1217
- await stageResultsArtifacts({
1218
- repoDir: prepared.repoDir,
1219
- sourceDir: payload.run_dir,
1220
- destinationDir
1221
- });
1222
- const commitTitle = buildCommitTitle(payload);
1223
- const changed = await commitAndPushResultsBranch({
1224
- repoDir: prepared.repoDir,
1225
- branchName,
1226
- commitMessage: commitTitle
1227
- });
1228
- if (!changed) {
1229
- console.warn("Warning: results export produced no git changes. Skipping PR creation.");
1230
- return;
1231
- }
1232
- const prUrl = await createDraftResultsPr({
1233
- repo: config.repo,
1234
- repoDir: prepared.repoDir,
1235
- baseBranch: prepared.baseBranch,
1236
- branchName,
1237
- title: commitTitle,
1238
- body: buildPrBody(payload)
1239
- });
1240
- console.log(`Remote results draft PR created: ${prUrl}`);
1241
- } finally {
1242
- await prepared.cleanup();
1176
+ const relativeRunPath = getRelativeRunPath(payload.cwd, payload.run_dir);
1177
+ const commitTitle = buildCommitTitle(payload);
1178
+ const pushed = await directPushResults({
1179
+ config,
1180
+ sourceDir: payload.run_dir,
1181
+ destinationPath: relativeRunPath,
1182
+ commitMessage: commitTitle
1183
+ });
1184
+ if (!pushed) {
1185
+ console.warn("Warning: results export produced no git changes. Skipping push.");
1186
+ return;
1243
1187
  }
1188
+ console.log(`Results pushed to ${config.repo} (${config.path}/${relativeRunPath})`);
1244
1189
  } catch (error) {
1245
1190
  console.warn(`Warning: skipping results export: ${getStatusMessage(error)}`);
1246
1191
  console.warn("Warning: Run 'gh auth login' if GitHub authentication is missing.");
@@ -5474,7 +5419,7 @@ async function runEvalCommand(input) {
5474
5419
  const useFileExport = !!options.otelFile;
5475
5420
  if (options.exportOtel || useFileExport) {
5476
5421
  try {
5477
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-AWHCD22R.js");
5422
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-6SR4U7SX.js");
5478
5423
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
5479
5424
  let headers = {};
5480
5425
  if (options.otelBackend) {
@@ -5673,7 +5618,7 @@ async function runEvalCommand(input) {
5673
5618
  const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
5674
5619
  let transcriptProviderFactory;
5675
5620
  if (options.transcript) {
5676
- const { TranscriptProvider } = await import("./dist-AWHCD22R.js");
5621
+ const { TranscriptProvider } = await import("./dist-6SR4U7SX.js");
5677
5622
  const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
5678
5623
  const totalTests = [...fileMetadata.values()].reduce(
5679
5624
  (sum, meta) => sum + meta.testCases.length,
@@ -5842,7 +5787,7 @@ async function runEvalCommand(input) {
5842
5787
  if (usesDefaultArtifactWorkspace && allResults.length > 0) {
5843
5788
  const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
5844
5789
  if (isResumeAppend) {
5845
- const { writePerTestArtifacts } = await import("./artifact-writer-SHHWQAXG.js");
5790
+ const { writePerTestArtifacts } = await import("./artifact-writer-RFZXLCR7.js");
5846
5791
  await writePerTestArtifacts(allResults, runDir, {
5847
5792
  experiment: normalizeExperimentName(options.experiment)
5848
5793
  });
@@ -6065,4 +6010,4 @@ export {
6065
6010
  getCategories,
6066
6011
  filterByCategory
6067
6012
  };
6068
- //# sourceMappingURL=chunk-BTSHFGH3.js.map
6013
+ //# sourceMappingURL=chunk-ITIGAJ74.js.map