agentv 4.7.0 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-VEAOMKNS.js → chunk-A6W3KOCS.js} +17 -11
- package/dist/chunk-A6W3KOCS.js.map +1 -0
- package/dist/{chunk-I6UE4LHZ.js → chunk-H4GQXK5M.js} +85 -4
- package/dist/chunk-H4GQXK5M.js.map +1 -0
- package/dist/{chunk-AX4CQS45.js → chunk-QBZJSQXV.js} +72 -73
- package/dist/chunk-QBZJSQXV.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-XRVHRBJF.js → dist-QXVR2ZRH.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-UBEMNJZG.js → interactive-IRYNIFCY.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-AX4CQS45.js.map +0 -1
- package/dist/chunk-I6UE4LHZ.js.map +0 -1
- package/dist/chunk-VEAOMKNS.js.map +0 -1
- /package/dist/{dist-XRVHRBJF.js.map → dist-QXVR2ZRH.js.map} +0 -0
- /package/dist/{interactive-UBEMNJZG.js.map → interactive-IRYNIFCY.js.map} +0 -0
|
@@ -32,12 +32,12 @@ import {
|
|
|
32
32
|
subscribeToCopilotCliLogEntries,
|
|
33
33
|
subscribeToCopilotSdkLogEntries,
|
|
34
34
|
subscribeToPiLogEntries
|
|
35
|
-
} from "./chunk-
|
|
35
|
+
} from "./chunk-H4GQXK5M.js";
|
|
36
36
|
|
|
37
37
|
// package.json
|
|
38
38
|
var package_default = {
|
|
39
39
|
name: "agentv",
|
|
40
|
-
version: "4.
|
|
40
|
+
version: "4.8.0",
|
|
41
41
|
description: "CLI entry point for AgentV",
|
|
42
42
|
type: "module",
|
|
43
43
|
repository: {
|
|
@@ -1712,8 +1712,9 @@ var JunitWriter = class _JunitWriter {
|
|
|
1712
1712
|
}
|
|
1713
1713
|
return ` <testcase name="${escapeXml(r.testId)}" classname="${escapeXml(suiteName)}" time="${time}">${inner}</testcase>`;
|
|
1714
1714
|
});
|
|
1715
|
+
const suiteTime = results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
|
|
1715
1716
|
suiteXmls.push(
|
|
1716
|
-
` <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}">
|
|
1717
|
+
` <testsuite name="${escapeXml(suiteName)}" tests="${results.length}" failures="${failures}" errors="${errors}" time="${suiteTime.toFixed(3)}">
|
|
1717
1718
|
${testCases.join("\n")}
|
|
1718
1719
|
</testsuite>`
|
|
1719
1720
|
);
|
|
@@ -1723,8 +1724,9 @@ ${testCases.join("\n")}
|
|
|
1723
1724
|
const totalFailures = this.results.filter(
|
|
1724
1725
|
(r) => r.executionStatus !== "execution_error" && r.score < this.threshold
|
|
1725
1726
|
).length;
|
|
1727
|
+
const totalTime = this.results.reduce((sum, r) => sum + (r.durationMs ?? 0), 0) / 1e3;
|
|
1726
1728
|
const xml = `<?xml version="1.0" encoding="UTF-8"?>
|
|
1727
|
-
<testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}">
|
|
1729
|
+
<testsuites tests="${totalTests}" failures="${totalFailures}" errors="${totalErrors}" time="${totalTime.toFixed(3)}">
|
|
1728
1730
|
${suiteXmls.join("\n")}
|
|
1729
1731
|
</testsuites>
|
|
1730
1732
|
`;
|
|
@@ -1823,17 +1825,6 @@ function createWriterFromPath(filePath, options) {
|
|
|
1823
1825
|
);
|
|
1824
1826
|
}
|
|
1825
1827
|
}
|
|
1826
|
-
async function createMultiWriter(filePaths, options) {
|
|
1827
|
-
const writers = await Promise.all(filePaths.map((fp) => createWriterFromPath(fp, options)));
|
|
1828
|
-
return {
|
|
1829
|
-
async append(result) {
|
|
1830
|
-
await Promise.all(writers.map((w) => w.append(result)));
|
|
1831
|
-
},
|
|
1832
|
-
async close() {
|
|
1833
|
-
await Promise.all(writers.map((w) => w.close()));
|
|
1834
|
-
}
|
|
1835
|
-
};
|
|
1836
|
-
}
|
|
1837
1828
|
|
|
1838
1829
|
// src/commands/eval/progress-display.ts
|
|
1839
1830
|
var ANSI_BOLD = "\x1B[1m";
|
|
@@ -3141,7 +3132,8 @@ var COPILOT_SDK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
3141
3132
|
"log_dir",
|
|
3142
3133
|
"log_format",
|
|
3143
3134
|
"system_prompt",
|
|
3144
|
-
"workspace_template"
|
|
3135
|
+
"workspace_template",
|
|
3136
|
+
"byok"
|
|
3145
3137
|
]);
|
|
3146
3138
|
var COPILOT_CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
3147
3139
|
...COMMON_SETTINGS,
|
|
@@ -4015,15 +4007,12 @@ function trimOutputMessages(output, outputMessages) {
|
|
|
4015
4007
|
return sliced.map((m) => ({ role: m.role, content: m.content }));
|
|
4016
4008
|
}
|
|
4017
4009
|
function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
4018
|
-
const cliFormat = normalizeString(rawOptions.outputFormat);
|
|
4019
|
-
const configFormat = config?.output?.format;
|
|
4020
|
-
const formatStr = cliFormat ?? configFormat ?? "jsonl";
|
|
4021
|
-
const format = formatStr === "yaml" ? "yaml" : "jsonl";
|
|
4022
4010
|
const cliWorkers = normalizeOptionalNumber(rawOptions.workers);
|
|
4023
4011
|
const configWorkers = config?.execution?.workers;
|
|
4024
4012
|
const workers = cliWorkers ?? configWorkers ?? 0;
|
|
4025
|
-
const
|
|
4026
|
-
const
|
|
4013
|
+
const cliOutputDir = normalizeString(rawOptions.output);
|
|
4014
|
+
const rawExportPaths = rawOptions.export;
|
|
4015
|
+
const exportPaths = Array.isArray(rawExportPaths) ? rawExportPaths.filter((v) => typeof v === "string" && v.trim().length > 0) : [];
|
|
4027
4016
|
const rawTarget = rawOptions.target;
|
|
4028
4017
|
let cliTargets = [];
|
|
4029
4018
|
let singleTarget;
|
|
@@ -4065,9 +4054,9 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4065
4054
|
targetsPath: normalizeString(rawOptions.targets),
|
|
4066
4055
|
filter: normalizeFilter(rawOptions.filter),
|
|
4067
4056
|
workers: workers > 0 ? workers : void 0,
|
|
4057
|
+
outputDir: cliOutputDir,
|
|
4068
4058
|
outPath: cliOut ?? configOut,
|
|
4069
|
-
|
|
4070
|
-
format,
|
|
4059
|
+
exportPaths,
|
|
4071
4060
|
dryRun: normalizeBoolean(rawOptions.dryRun),
|
|
4072
4061
|
dryRunDelay: normalizeNumber(rawOptions.dryRunDelay, 0),
|
|
4073
4062
|
dryRunDelayMin: normalizeNumber(rawOptions.dryRunDelayMin, 0),
|
|
@@ -4484,13 +4473,48 @@ async function runEvalCommand(input) {
|
|
|
4484
4473
|
if (options.verbose) {
|
|
4485
4474
|
console.log(`Repository root: ${repoRoot}`);
|
|
4486
4475
|
}
|
|
4487
|
-
|
|
4488
|
-
|
|
4476
|
+
if (options.outPath) {
|
|
4477
|
+
console.warn("Warning: --out is deprecated. Use --output <dir> to set the artifact directory.");
|
|
4478
|
+
}
|
|
4479
|
+
if (options.artifacts) {
|
|
4480
|
+
console.warn(
|
|
4481
|
+
"Warning: --artifacts is deprecated. Use --output <dir> to set the artifact directory."
|
|
4482
|
+
);
|
|
4483
|
+
}
|
|
4484
|
+
if (options.benchmarkJson) {
|
|
4485
|
+
console.warn(
|
|
4486
|
+
"Warning: --benchmark-json is deprecated. benchmark.json is always written to the artifact directory."
|
|
4487
|
+
);
|
|
4488
|
+
}
|
|
4489
|
+
if (normalizeString(input.rawOptions.outputFormat)) {
|
|
4490
|
+
console.warn(
|
|
4491
|
+
"Warning: --output-format is deprecated. The artifact directory always uses JSONL."
|
|
4492
|
+
);
|
|
4493
|
+
}
|
|
4494
|
+
const explicitDir = options.outputDir ?? options.artifacts;
|
|
4495
|
+
let runDir;
|
|
4496
|
+
let outputPath;
|
|
4497
|
+
let usesDefaultArtifactWorkspace;
|
|
4498
|
+
if (explicitDir) {
|
|
4499
|
+
runDir = path15.resolve(explicitDir);
|
|
4500
|
+
mkdirSync(runDir, { recursive: true });
|
|
4501
|
+
outputPath = path15.join(runDir, "index.jsonl");
|
|
4502
|
+
usesDefaultArtifactWorkspace = true;
|
|
4503
|
+
} else if (options.outPath) {
|
|
4504
|
+
outputPath = path15.resolve(options.outPath);
|
|
4505
|
+
runDir = path15.dirname(outputPath);
|
|
4506
|
+
mkdirSync(runDir, { recursive: true });
|
|
4507
|
+
usesDefaultArtifactWorkspace = false;
|
|
4508
|
+
} else {
|
|
4509
|
+
outputPath = buildDefaultOutputPath(cwd);
|
|
4510
|
+
runDir = path15.dirname(outputPath);
|
|
4511
|
+
usesDefaultArtifactWorkspace = true;
|
|
4512
|
+
}
|
|
4489
4513
|
let otelExporter = null;
|
|
4490
4514
|
const useFileExport = !!options.otelFile;
|
|
4491
4515
|
if (options.exportOtel || useFileExport) {
|
|
4492
4516
|
try {
|
|
4493
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4517
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-QXVR2ZRH.js");
|
|
4494
4518
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4495
4519
|
let headers = {};
|
|
4496
4520
|
if (options.otelBackend) {
|
|
@@ -4531,16 +4555,11 @@ async function runEvalCommand(input) {
|
|
|
4531
4555
|
}
|
|
4532
4556
|
}
|
|
4533
4557
|
const primaryWritePath = outputPath;
|
|
4534
|
-
const
|
|
4535
|
-
|
|
4536
|
-
|
|
4537
|
-
|
|
4538
|
-
|
|
4539
|
-
if (uniqueOutputPaths.length === 1) {
|
|
4540
|
-
console.log(`Output path: ${outputPath}`);
|
|
4541
|
-
} else {
|
|
4542
|
-
console.log("Output paths:");
|
|
4543
|
-
for (const p of uniqueReportedOutputPaths) {
|
|
4558
|
+
const resolvedExportPaths = options.exportPaths.map((p) => path15.resolve(p));
|
|
4559
|
+
console.log(`Artifact directory: ${runDir}`);
|
|
4560
|
+
if (resolvedExportPaths.length > 0) {
|
|
4561
|
+
console.log("Export files:");
|
|
4562
|
+
for (const p of resolvedExportPaths) {
|
|
4544
4563
|
console.log(` ${p}`);
|
|
4545
4564
|
}
|
|
4546
4565
|
}
|
|
@@ -4621,12 +4640,7 @@ async function runEvalCommand(input) {
|
|
|
4621
4640
|
throw new Error("--threshold must be between 0 and 1");
|
|
4622
4641
|
}
|
|
4623
4642
|
const writerOptions = resolvedThreshold !== void 0 ? { threshold: resolvedThreshold } : void 0;
|
|
4624
|
-
|
|
4625
|
-
if (uniqueOutputPaths.length === 1) {
|
|
4626
|
-
outputWriter = await createOutputWriter(primaryWritePath, options.format);
|
|
4627
|
-
} else {
|
|
4628
|
-
outputWriter = await createMultiWriter(uniqueOutputPaths, writerOptions);
|
|
4629
|
-
}
|
|
4643
|
+
const outputWriter = await createOutputWriter(primaryWritePath, "jsonl");
|
|
4630
4644
|
const isMatrixMode = Array.from(fileMetadata.values()).some((meta) => meta.selections.length > 1);
|
|
4631
4645
|
let totalEvalCount = 0;
|
|
4632
4646
|
for (const meta of fileMetadata.values()) {
|
|
@@ -4694,7 +4708,7 @@ async function runEvalCommand(input) {
|
|
|
4694
4708
|
const activeTestFiles = resolvedTestFiles.filter((f) => fileMetadata.has(f));
|
|
4695
4709
|
let transcriptProviderFactory;
|
|
4696
4710
|
if (options.transcript) {
|
|
4697
|
-
const { TranscriptProvider } = await import("./dist-
|
|
4711
|
+
const { TranscriptProvider } = await import("./dist-QXVR2ZRH.js");
|
|
4698
4712
|
const transcriptProvider = await TranscriptProvider.fromFile(options.transcript);
|
|
4699
4713
|
const totalTests = [...fileMetadata.values()].reduce(
|
|
4700
4714
|
(sum, meta) => sum + meta.testCases.length,
|
|
@@ -4808,18 +4822,17 @@ async function runEvalCommand(input) {
|
|
|
4808
4822
|
await writeBenchmarkJson(benchmarkPath, allResults);
|
|
4809
4823
|
console.log(`Benchmark written to: ${benchmarkPath}`);
|
|
4810
4824
|
}
|
|
4811
|
-
if (usesDefaultArtifactWorkspace) {
|
|
4825
|
+
if (usesDefaultArtifactWorkspace && allResults.length > 0) {
|
|
4812
4826
|
const evalFile = activeTestFiles.length === 1 ? activeTestFiles[0] : "";
|
|
4813
|
-
const workspaceDir = path15.dirname(outputPath);
|
|
4814
4827
|
const {
|
|
4815
4828
|
testArtifactDir,
|
|
4816
4829
|
timingPath,
|
|
4817
4830
|
benchmarkPath: workspaceBenchmarkPath,
|
|
4818
4831
|
indexPath
|
|
4819
|
-
} = await writeArtifactsFromResults(allResults,
|
|
4832
|
+
} = await writeArtifactsFromResults(allResults, runDir, {
|
|
4820
4833
|
evalFile
|
|
4821
4834
|
});
|
|
4822
|
-
console.log(`Artifact workspace written to: ${
|
|
4835
|
+
console.log(`Artifact workspace written to: ${runDir}`);
|
|
4823
4836
|
console.log(` Index: ${indexPath}`);
|
|
4824
4837
|
console.log(
|
|
4825
4838
|
` Per-test artifacts: ${testArtifactDir} (${allResults.length} test directories)`
|
|
@@ -4827,24 +4840,17 @@ async function runEvalCommand(input) {
|
|
|
4827
4840
|
console.log(` Timing: ${timingPath}`);
|
|
4828
4841
|
console.log(` Benchmark: ${workspaceBenchmarkPath}`);
|
|
4829
4842
|
}
|
|
4830
|
-
if (
|
|
4831
|
-
const
|
|
4832
|
-
|
|
4833
|
-
|
|
4834
|
-
|
|
4835
|
-
|
|
4836
|
-
|
|
4837
|
-
|
|
4838
|
-
} = await writeArtifactsFromResults(allResults, artifactsDir, {
|
|
4839
|
-
evalFile
|
|
4840
|
-
});
|
|
4841
|
-
console.log(`Artifacts written to: ${artifactsDir}`);
|
|
4842
|
-
console.log(` Index: ${indexPath}`);
|
|
4843
|
+
if (resolvedExportPaths.length > 0 && allResults.length > 0) {
|
|
4844
|
+
for (const exportPath of resolvedExportPaths) {
|
|
4845
|
+
const writer = await createWriterFromPath(exportPath, writerOptions);
|
|
4846
|
+
for (const result of allResults) {
|
|
4847
|
+
await writer.append(result);
|
|
4848
|
+
}
|
|
4849
|
+
await writer.close();
|
|
4850
|
+
}
|
|
4843
4851
|
console.log(
|
|
4844
|
-
`
|
|
4852
|
+
`Export file(s) written: ${resolvedExportPaths.map((p) => path15.relative(cwd, p)).join(", ")}`
|
|
4845
4853
|
);
|
|
4846
|
-
console.log(` Timing: ${timingPath}`);
|
|
4847
|
-
console.log(` Benchmark: ${abp}`);
|
|
4848
4854
|
}
|
|
4849
4855
|
const failedWithWorkspaces = allResults.filter(
|
|
4850
4856
|
(r) => r.workspacePath && (r.error || r.score < 0.5)
|
|
@@ -4856,15 +4862,8 @@ async function runEvalCommand(input) {
|
|
|
4856
4862
|
}
|
|
4857
4863
|
}
|
|
4858
4864
|
if (allResults.length > 0) {
|
|
4859
|
-
|
|
4860
|
-
console.log(`
|
|
4865
|
+
console.log(`
|
|
4861
4866
|
Results written to: ${outputPath}`);
|
|
4862
|
-
} else {
|
|
4863
|
-
console.log("\nResults written to:");
|
|
4864
|
-
for (const p of uniqueReportedOutputPaths) {
|
|
4865
|
-
console.log(` ${p}`);
|
|
4866
|
-
}
|
|
4867
|
-
}
|
|
4868
4867
|
await saveRunCache(cwd, outputPath).catch(() => void 0);
|
|
4869
4868
|
}
|
|
4870
4869
|
if (summary.executionErrorCount > 0 && !options.retryErrors) {
|
|
@@ -4985,4 +4984,4 @@ export {
|
|
|
4985
4984
|
getCategories,
|
|
4986
4985
|
filterByCategory
|
|
4987
4986
|
};
|
|
4988
|
-
//# sourceMappingURL=chunk-
|
|
4987
|
+
//# sourceMappingURL=chunk-QBZJSQXV.js.map
|