agentv 3.12.0 → 3.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -10
- package/dist/{chunk-UYBLUYHN.js → chunk-K747KGDP.js} +64 -49
- package/dist/chunk-K747KGDP.js.map +1 -0
- package/dist/{chunk-VLOFRXH4.js → chunk-LSXO22CF.js} +17 -43
- package/dist/chunk-LSXO22CF.js.map +1 -0
- package/dist/{chunk-2ELQ6F3C.js → chunk-UK7UMQOX.js} +29 -34
- package/dist/chunk-UK7UMQOX.js.map +1 -0
- package/dist/cli.js +3 -4
- package/dist/cli.js.map +1 -1
- package/dist/{dist-L6R5HJ72.js → dist-LCZDS36N.js} +2 -6
- package/dist/index.js +3 -4
- package/dist/{interactive-5X62YEEX.js → interactive-76ZJVPI7.js} +3 -4
- package/dist/{interactive-5X62YEEX.js.map → interactive-76ZJVPI7.js.map} +1 -1
- package/package.json +1 -1
- package/dist/chunk-2ELQ6F3C.js.map +0 -1
- package/dist/chunk-NR7QVL75.js +0 -122
- package/dist/chunk-NR7QVL75.js.map +0 -1
- package/dist/chunk-UYBLUYHN.js.map +0 -1
- package/dist/chunk-VLOFRXH4.js.map +0 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js +0 -9
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map +0 -1
- /package/dist/{dist-L6R5HJ72.js.map → dist-LCZDS36N.js.map} +0 -0
|
@@ -27,12 +27,12 @@ import {
|
|
|
27
27
|
subscribeToCopilotCliLogEntries,
|
|
28
28
|
subscribeToCopilotSdkLogEntries,
|
|
29
29
|
subscribeToPiLogEntries
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-K747KGDP.js";
|
|
31
31
|
|
|
32
32
|
// package.json
|
|
33
33
|
var package_default = {
|
|
34
34
|
name: "agentv",
|
|
35
|
-
version: "3.
|
|
35
|
+
version: "3.13.1",
|
|
36
36
|
description: "CLI entry point for AgentV",
|
|
37
37
|
type: "module",
|
|
38
38
|
repository: {
|
|
@@ -302,28 +302,21 @@ function toSnakeCaseDeep(obj) {
|
|
|
302
302
|
import { existsSync, statSync } from "node:fs";
|
|
303
303
|
import path3 from "node:path";
|
|
304
304
|
var RESULT_INDEX_FILENAME = "index.jsonl";
|
|
305
|
-
var
|
|
305
|
+
var RESULT_RUNS_DIRNAME = "runs";
|
|
306
306
|
function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
|
|
307
307
|
return `eval_${timestamp.toISOString().replace(/[:.]/g, "-")}`;
|
|
308
308
|
}
|
|
309
309
|
function buildDefaultRunDir(cwd) {
|
|
310
|
-
return path3.join(cwd, ".agentv", "results",
|
|
310
|
+
return path3.join(cwd, ".agentv", "results", RESULT_RUNS_DIRNAME, createRunDirName());
|
|
311
311
|
}
|
|
312
312
|
function resolveRunIndexPath(runDir) {
|
|
313
313
|
return path3.join(runDir, RESULT_INDEX_FILENAME);
|
|
314
314
|
}
|
|
315
|
-
function resolveRunLegacyResultsPath(runDir) {
|
|
316
|
-
return path3.join(runDir, LEGACY_RESULTS_FILENAME);
|
|
317
|
-
}
|
|
318
315
|
function resolveExistingRunPrimaryPath(runDir) {
|
|
319
316
|
const indexPath = resolveRunIndexPath(runDir);
|
|
320
317
|
if (existsSync(indexPath)) {
|
|
321
318
|
return indexPath;
|
|
322
319
|
}
|
|
323
|
-
const legacyPath = resolveRunLegacyResultsPath(runDir);
|
|
324
|
-
if (existsSync(legacyPath)) {
|
|
325
|
-
return legacyPath;
|
|
326
|
-
}
|
|
327
320
|
return void 0;
|
|
328
321
|
}
|
|
329
322
|
function isDirectoryPath(filePath) {
|
|
@@ -339,9 +332,7 @@ function resolveWorkspaceOrFilePath(filePath) {
|
|
|
339
332
|
}
|
|
340
333
|
const existing = resolveExistingRunPrimaryPath(filePath);
|
|
341
334
|
if (!existing) {
|
|
342
|
-
throw new Error(
|
|
343
|
-
`Result workspace is missing ${RESULT_INDEX_FILENAME} and ${LEGACY_RESULTS_FILENAME}: ${filePath}`
|
|
344
|
-
);
|
|
335
|
+
throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
|
|
345
336
|
}
|
|
346
337
|
return existing;
|
|
347
338
|
}
|
|
@@ -557,7 +548,7 @@ function buildBenchmarkArtifact(results, evalFile = "") {
|
|
|
557
548
|
tests_run: testIds
|
|
558
549
|
},
|
|
559
550
|
run_summary: runSummary,
|
|
560
|
-
|
|
551
|
+
per_grader_summary: perEvaluatorSummary,
|
|
561
552
|
notes
|
|
562
553
|
};
|
|
563
554
|
}
|
|
@@ -670,7 +661,6 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
670
661
|
const timingPath = path4.join(outputDir, "timing.json");
|
|
671
662
|
const benchmarkPath = path4.join(outputDir, "benchmark.json");
|
|
672
663
|
const indexPath = path4.join(outputDir, RESULT_INDEX_FILENAME);
|
|
673
|
-
const legacyResultsPath = options?.writeLegacyResults ? path4.join(outputDir, LEGACY_RESULTS_FILENAME) : void 0;
|
|
674
664
|
await mkdir(outputDir, { recursive: true });
|
|
675
665
|
const indexRecords = [];
|
|
676
666
|
for (const result of results) {
|
|
@@ -707,10 +697,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
707
697
|
await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
|
|
708
698
|
`, "utf8");
|
|
709
699
|
await writeJsonlFile(indexPath, indexRecords);
|
|
710
|
-
|
|
711
|
-
await writeJsonlFile(legacyResultsPath, results);
|
|
712
|
-
}
|
|
713
|
-
return { testArtifactDir, timingPath, benchmarkPath, indexPath, legacyResultsPath };
|
|
700
|
+
return { testArtifactDir, timingPath, benchmarkPath, indexPath };
|
|
714
701
|
}
|
|
715
702
|
|
|
716
703
|
// src/commands/eval/benchmark-writer.ts
|
|
@@ -2141,7 +2128,7 @@ async function saveRunCache(cwd, resultPath) {
|
|
|
2141
2128
|
const dir = path13.join(cwd, ".agentv");
|
|
2142
2129
|
await mkdir7(dir, { recursive: true });
|
|
2143
2130
|
const basename = path13.basename(resultPath);
|
|
2144
|
-
const cache = basename === RESULT_INDEX_FILENAME
|
|
2131
|
+
const cache = basename === RESULT_INDEX_FILENAME ? {
|
|
2145
2132
|
lastRunDir: path13.dirname(resultPath),
|
|
2146
2133
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
2147
2134
|
} : {
|
|
@@ -4050,7 +4037,6 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4050
4037
|
verbose: normalizeBoolean(rawOptions.verbose) || yamlExecution?.verbose === true || config?.execution?.verbose === true,
|
|
4051
4038
|
// Precedence: CLI > YAML config > TS config
|
|
4052
4039
|
otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0) ?? (config?.execution?.otelFile ? resolveTimestampPlaceholder(config.execution.otelFile) : void 0),
|
|
4053
|
-
traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0) ?? (config?.execution?.traceFile ? resolveTimestampPlaceholder(config.execution.traceFile) : void 0),
|
|
4054
4040
|
exportOtel: normalizeBoolean(rawOptions.exportOtel) || yamlExecution?.export_otel === true,
|
|
4055
4041
|
otelBackend: normalizeString(rawOptions.otelBackend) ?? yamlExecution?.otel_backend,
|
|
4056
4042
|
otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent) || yamlExecution?.otel_capture_content === true,
|
|
@@ -4418,13 +4404,11 @@ async function runEvalCommand(input) {
|
|
|
4418
4404
|
}
|
|
4419
4405
|
const usesDefaultArtifactWorkspace = !options.outPath;
|
|
4420
4406
|
const outputPath = options.outPath ? path15.resolve(options.outPath) : buildDefaultOutputPath(cwd);
|
|
4421
|
-
const defaultTraceFile = usesDefaultArtifactWorkspace && !options.traceFile ? path15.join(path15.dirname(outputPath), "trace.jsonl") : void 0;
|
|
4422
|
-
const traceFilePath = options.traceFile ? path15.resolve(options.traceFile) : defaultTraceFile;
|
|
4423
4407
|
let otelExporter = null;
|
|
4424
|
-
const useFileExport = !!
|
|
4408
|
+
const useFileExport = !!options.otelFile;
|
|
4425
4409
|
if (options.exportOtel || useFileExport) {
|
|
4426
4410
|
try {
|
|
4427
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4411
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-LCZDS36N.js");
|
|
4428
4412
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4429
4413
|
let headers = {};
|
|
4430
4414
|
if (options.otelBackend) {
|
|
@@ -4448,8 +4432,7 @@ async function runEvalCommand(input) {
|
|
|
4448
4432
|
headers,
|
|
4449
4433
|
captureContent,
|
|
4450
4434
|
groupTurns: options.otelGroupTurns,
|
|
4451
|
-
otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0
|
|
4452
|
-
traceFilePath
|
|
4435
|
+
otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0
|
|
4453
4436
|
});
|
|
4454
4437
|
const initialized = await otelExporter.init();
|
|
4455
4438
|
if (!initialized) {
|
|
@@ -4465,7 +4448,7 @@ async function runEvalCommand(input) {
|
|
|
4465
4448
|
otelExporter = null;
|
|
4466
4449
|
}
|
|
4467
4450
|
}
|
|
4468
|
-
const primaryWritePath =
|
|
4451
|
+
const primaryWritePath = outputPath;
|
|
4469
4452
|
const extraOutputPaths = options.outputPaths.map((p) => path15.resolve(p));
|
|
4470
4453
|
const allOutputPaths = extraOutputPaths.length > 0 ? [primaryWritePath, ...extraOutputPaths] : [primaryWritePath];
|
|
4471
4454
|
const uniqueOutputPaths = [...new Set(allOutputPaths)];
|
|
@@ -4486,9 +4469,6 @@ async function runEvalCommand(input) {
|
|
|
4486
4469
|
if (options.otelFile) {
|
|
4487
4470
|
console.log(`OTLP JSON file: ${path15.resolve(options.otelFile)}`);
|
|
4488
4471
|
}
|
|
4489
|
-
if (traceFilePath) {
|
|
4490
|
-
console.log(`Trace file: ${traceFilePath}`);
|
|
4491
|
-
}
|
|
4492
4472
|
const evaluationRunner = await resolveEvaluationRunner();
|
|
4493
4473
|
const allResults = [];
|
|
4494
4474
|
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
@@ -4676,11 +4656,9 @@ async function runEvalCommand(input) {
|
|
|
4676
4656
|
testArtifactDir,
|
|
4677
4657
|
timingPath,
|
|
4678
4658
|
benchmarkPath: workspaceBenchmarkPath,
|
|
4679
|
-
indexPath
|
|
4680
|
-
legacyResultsPath
|
|
4659
|
+
indexPath
|
|
4681
4660
|
} = await writeArtifactsFromResults(allResults, workspaceDir, {
|
|
4682
|
-
evalFile
|
|
4683
|
-
writeLegacyResults: true
|
|
4661
|
+
evalFile
|
|
4684
4662
|
});
|
|
4685
4663
|
console.log(`Artifact workspace written to: ${workspaceDir}`);
|
|
4686
4664
|
console.log(` Index: ${indexPath}`);
|
|
@@ -4689,9 +4667,6 @@ async function runEvalCommand(input) {
|
|
|
4689
4667
|
);
|
|
4690
4668
|
console.log(` Timing: ${timingPath}`);
|
|
4691
4669
|
console.log(` Benchmark: ${workspaceBenchmarkPath}`);
|
|
4692
|
-
if (legacyResultsPath) {
|
|
4693
|
-
console.log(` Compatibility output: ${legacyResultsPath} (deprecated)`);
|
|
4694
|
-
}
|
|
4695
4670
|
}
|
|
4696
4671
|
if (options.artifacts) {
|
|
4697
4672
|
const artifactsDir = path15.resolve(options.artifacts);
|
|
@@ -4702,8 +4677,7 @@ async function runEvalCommand(input) {
|
|
|
4702
4677
|
timingPath,
|
|
4703
4678
|
benchmarkPath: abp
|
|
4704
4679
|
} = await writeArtifactsFromResults(allResults, artifactsDir, {
|
|
4705
|
-
evalFile
|
|
4706
|
-
writeLegacyResults: false
|
|
4680
|
+
evalFile
|
|
4707
4681
|
});
|
|
4708
4682
|
console.log(`Artifacts written to: ${artifactsDir}`);
|
|
4709
4683
|
console.log(` Index: ${indexPath}`);
|
|
@@ -4785,7 +4759,7 @@ export {
|
|
|
4785
4759
|
package_default,
|
|
4786
4760
|
toSnakeCaseDeep,
|
|
4787
4761
|
RESULT_INDEX_FILENAME,
|
|
4788
|
-
|
|
4762
|
+
RESULT_RUNS_DIRNAME,
|
|
4789
4763
|
resolveExistingRunPrimaryPath,
|
|
4790
4764
|
resolveWorkspaceOrFilePath,
|
|
4791
4765
|
writeArtifactsFromResults,
|
|
@@ -4807,4 +4781,4 @@ export {
|
|
|
4807
4781
|
selectTarget,
|
|
4808
4782
|
runEvalCommand
|
|
4809
4783
|
};
|
|
4810
|
-
//# sourceMappingURL=chunk-
|
|
4784
|
+
//# sourceMappingURL=chunk-LSXO22CF.js.map
|