agentv 3.12.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -6
- package/dist/{chunk-VLOFRXH4.js → chunk-6H4IAXQH.js} +13 -41
- package/dist/chunk-6H4IAXQH.js.map +1 -0
- package/dist/{chunk-UYBLUYHN.js → chunk-7OHZAFND.js} +18 -16
- package/dist/chunk-7OHZAFND.js.map +1 -0
- package/dist/{chunk-2ELQ6F3C.js → chunk-DJU4C6NS.js} +13 -21
- package/dist/chunk-DJU4C6NS.js.map +1 -0
- package/dist/cli.js +3 -4
- package/dist/cli.js.map +1 -1
- package/dist/{dist-L6R5HJ72.js → dist-SMKOBBFB.js} +2 -6
- package/dist/index.js +3 -4
- package/dist/{interactive-5X62YEEX.js → interactive-RV664PCR.js} +3 -4
- package/dist/{interactive-5X62YEEX.js.map → interactive-RV664PCR.js.map} +1 -1
- package/dist/templates/.agentv/config.yaml +4 -13
- package/dist/templates/.agentv/targets.yaml +0 -16
- package/package.json +1 -1
- package/dist/chunk-2ELQ6F3C.js.map +0 -1
- package/dist/chunk-NR7QVL75.js +0 -122
- package/dist/chunk-NR7QVL75.js.map +0 -1
- package/dist/chunk-UYBLUYHN.js.map +0 -1
- package/dist/chunk-VLOFRXH4.js.map +0 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js +0 -9
- package/dist/simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map +0 -1
- package/dist/templates/.agentv/.env.example +0 -23
- /package/dist/{dist-L6R5HJ72.js.map → dist-SMKOBBFB.js.map} +0 -0
package/README.md
CHANGED
|
@@ -209,8 +209,8 @@ agentv eval evals/my-eval.yaml
|
|
|
209
209
|
# Self-contained HTML dashboard (opens in any browser, no server needed)
|
|
210
210
|
agentv eval evals/my-eval.yaml -o report.html
|
|
211
211
|
|
|
212
|
-
# Explicit JSONL
|
|
213
|
-
agentv eval evals/my-eval.yaml -o
|
|
212
|
+
# Explicit JSONL output
|
|
213
|
+
agentv eval evals/my-eval.yaml -o output.jsonl
|
|
214
214
|
|
|
215
215
|
# Multiple formats simultaneously
|
|
216
216
|
agentv eval evals/my-eval.yaml -o report.html
|
|
@@ -221,11 +221,10 @@ agentv eval evals/my-eval.yaml -o results.xml
|
|
|
221
221
|
|
|
222
222
|
The HTML report auto-refreshes every 2 seconds during a live run, then locks once the run completes.
|
|
223
223
|
|
|
224
|
-
By default, `agentv eval`
|
|
225
|
-
with `index.jsonl` as the
|
|
226
|
-
is still written alongside it for legacy tooling during the deprecation window.
|
|
224
|
+
By default, `agentv eval` creates a run workspace under `.agentv/results/raw/<run>/`
|
|
225
|
+
with `index.jsonl` as the machine-facing manifest.
|
|
227
226
|
|
|
228
|
-
You can also convert an existing manifest
|
|
227
|
+
You can also convert an existing manifest to HTML after the fact:
|
|
229
228
|
|
|
230
229
|
```bash
|
|
231
230
|
agentv convert .agentv/results/raw/eval_<timestamp>/index.jsonl -o report.html
|
|
@@ -27,12 +27,12 @@ import {
|
|
|
27
27
|
subscribeToCopilotCliLogEntries,
|
|
28
28
|
subscribeToCopilotSdkLogEntries,
|
|
29
29
|
subscribeToPiLogEntries
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-7OHZAFND.js";
|
|
31
31
|
|
|
32
32
|
// package.json
|
|
33
33
|
var package_default = {
|
|
34
34
|
name: "agentv",
|
|
35
|
-
version: "3.
|
|
35
|
+
version: "3.13.0",
|
|
36
36
|
description: "CLI entry point for AgentV",
|
|
37
37
|
type: "module",
|
|
38
38
|
repository: {
|
|
@@ -302,7 +302,6 @@ function toSnakeCaseDeep(obj) {
|
|
|
302
302
|
import { existsSync, statSync } from "node:fs";
|
|
303
303
|
import path3 from "node:path";
|
|
304
304
|
var RESULT_INDEX_FILENAME = "index.jsonl";
|
|
305
|
-
var LEGACY_RESULTS_FILENAME = "results.jsonl";
|
|
306
305
|
function createRunDirName(timestamp = /* @__PURE__ */ new Date()) {
|
|
307
306
|
return `eval_${timestamp.toISOString().replace(/[:.]/g, "-")}`;
|
|
308
307
|
}
|
|
@@ -312,18 +311,11 @@ function buildDefaultRunDir(cwd) {
|
|
|
312
311
|
function resolveRunIndexPath(runDir) {
|
|
313
312
|
return path3.join(runDir, RESULT_INDEX_FILENAME);
|
|
314
313
|
}
|
|
315
|
-
function resolveRunLegacyResultsPath(runDir) {
|
|
316
|
-
return path3.join(runDir, LEGACY_RESULTS_FILENAME);
|
|
317
|
-
}
|
|
318
314
|
function resolveExistingRunPrimaryPath(runDir) {
|
|
319
315
|
const indexPath = resolveRunIndexPath(runDir);
|
|
320
316
|
if (existsSync(indexPath)) {
|
|
321
317
|
return indexPath;
|
|
322
318
|
}
|
|
323
|
-
const legacyPath = resolveRunLegacyResultsPath(runDir);
|
|
324
|
-
if (existsSync(legacyPath)) {
|
|
325
|
-
return legacyPath;
|
|
326
|
-
}
|
|
327
319
|
return void 0;
|
|
328
320
|
}
|
|
329
321
|
function isDirectoryPath(filePath) {
|
|
@@ -339,9 +331,7 @@ function resolveWorkspaceOrFilePath(filePath) {
|
|
|
339
331
|
}
|
|
340
332
|
const existing = resolveExistingRunPrimaryPath(filePath);
|
|
341
333
|
if (!existing) {
|
|
342
|
-
throw new Error(
|
|
343
|
-
`Result workspace is missing ${RESULT_INDEX_FILENAME} and ${LEGACY_RESULTS_FILENAME}: ${filePath}`
|
|
344
|
-
);
|
|
334
|
+
throw new Error(`Result workspace is missing ${RESULT_INDEX_FILENAME}: ${filePath}`);
|
|
345
335
|
}
|
|
346
336
|
return existing;
|
|
347
337
|
}
|
|
@@ -670,7 +660,6 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
670
660
|
const timingPath = path4.join(outputDir, "timing.json");
|
|
671
661
|
const benchmarkPath = path4.join(outputDir, "benchmark.json");
|
|
672
662
|
const indexPath = path4.join(outputDir, RESULT_INDEX_FILENAME);
|
|
673
|
-
const legacyResultsPath = options?.writeLegacyResults ? path4.join(outputDir, LEGACY_RESULTS_FILENAME) : void 0;
|
|
674
663
|
await mkdir(outputDir, { recursive: true });
|
|
675
664
|
const indexRecords = [];
|
|
676
665
|
for (const result of results) {
|
|
@@ -707,10 +696,7 @@ async function writeArtifactsFromResults(results, outputDir, options) {
|
|
|
707
696
|
await writeFile(benchmarkPath, `${JSON.stringify(benchmark, null, 2)}
|
|
708
697
|
`, "utf8");
|
|
709
698
|
await writeJsonlFile(indexPath, indexRecords);
|
|
710
|
-
|
|
711
|
-
await writeJsonlFile(legacyResultsPath, results);
|
|
712
|
-
}
|
|
713
|
-
return { testArtifactDir, timingPath, benchmarkPath, indexPath, legacyResultsPath };
|
|
699
|
+
return { testArtifactDir, timingPath, benchmarkPath, indexPath };
|
|
714
700
|
}
|
|
715
701
|
|
|
716
702
|
// src/commands/eval/benchmark-writer.ts
|
|
@@ -2141,7 +2127,7 @@ async function saveRunCache(cwd, resultPath) {
|
|
|
2141
2127
|
const dir = path13.join(cwd, ".agentv");
|
|
2142
2128
|
await mkdir7(dir, { recursive: true });
|
|
2143
2129
|
const basename = path13.basename(resultPath);
|
|
2144
|
-
const cache = basename === RESULT_INDEX_FILENAME
|
|
2130
|
+
const cache = basename === RESULT_INDEX_FILENAME ? {
|
|
2145
2131
|
lastRunDir: path13.dirname(resultPath),
|
|
2146
2132
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
2147
2133
|
} : {
|
|
@@ -4050,7 +4036,6 @@ function normalizeOptions(rawOptions, config, yamlExecution) {
|
|
|
4050
4036
|
verbose: normalizeBoolean(rawOptions.verbose) || yamlExecution?.verbose === true || config?.execution?.verbose === true,
|
|
4051
4037
|
// Precedence: CLI > YAML config > TS config
|
|
4052
4038
|
otelFile: normalizeString(rawOptions.otelFile) ?? (yamlExecution?.otel_file ? resolveTimestampPlaceholder(yamlExecution.otel_file) : void 0) ?? (config?.execution?.otelFile ? resolveTimestampPlaceholder(config.execution.otelFile) : void 0),
|
|
4053
|
-
traceFile: normalizeString(rawOptions.traceFile) ?? (yamlExecution?.trace_file ? resolveTimestampPlaceholder(yamlExecution.trace_file) : void 0) ?? (config?.execution?.traceFile ? resolveTimestampPlaceholder(config.execution.traceFile) : void 0),
|
|
4054
4039
|
exportOtel: normalizeBoolean(rawOptions.exportOtel) || yamlExecution?.export_otel === true,
|
|
4055
4040
|
otelBackend: normalizeString(rawOptions.otelBackend) ?? yamlExecution?.otel_backend,
|
|
4056
4041
|
otelCaptureContent: normalizeBoolean(rawOptions.otelCaptureContent) || yamlExecution?.otel_capture_content === true,
|
|
@@ -4418,13 +4403,11 @@ async function runEvalCommand(input) {
|
|
|
4418
4403
|
}
|
|
4419
4404
|
const usesDefaultArtifactWorkspace = !options.outPath;
|
|
4420
4405
|
const outputPath = options.outPath ? path15.resolve(options.outPath) : buildDefaultOutputPath(cwd);
|
|
4421
|
-
const defaultTraceFile = usesDefaultArtifactWorkspace && !options.traceFile ? path15.join(path15.dirname(outputPath), "trace.jsonl") : void 0;
|
|
4422
|
-
const traceFilePath = options.traceFile ? path15.resolve(options.traceFile) : defaultTraceFile;
|
|
4423
4406
|
let otelExporter = null;
|
|
4424
|
-
const useFileExport = !!
|
|
4407
|
+
const useFileExport = !!options.otelFile;
|
|
4425
4408
|
if (options.exportOtel || useFileExport) {
|
|
4426
4409
|
try {
|
|
4427
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4410
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-SMKOBBFB.js");
|
|
4428
4411
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4429
4412
|
let headers = {};
|
|
4430
4413
|
if (options.otelBackend) {
|
|
@@ -4448,8 +4431,7 @@ async function runEvalCommand(input) {
|
|
|
4448
4431
|
headers,
|
|
4449
4432
|
captureContent,
|
|
4450
4433
|
groupTurns: options.otelGroupTurns,
|
|
4451
|
-
otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0
|
|
4452
|
-
traceFilePath
|
|
4434
|
+
otlpFilePath: options.otelFile ? path15.resolve(options.otelFile) : void 0
|
|
4453
4435
|
});
|
|
4454
4436
|
const initialized = await otelExporter.init();
|
|
4455
4437
|
if (!initialized) {
|
|
@@ -4465,7 +4447,7 @@ async function runEvalCommand(input) {
|
|
|
4465
4447
|
otelExporter = null;
|
|
4466
4448
|
}
|
|
4467
4449
|
}
|
|
4468
|
-
const primaryWritePath =
|
|
4450
|
+
const primaryWritePath = outputPath;
|
|
4469
4451
|
const extraOutputPaths = options.outputPaths.map((p) => path15.resolve(p));
|
|
4470
4452
|
const allOutputPaths = extraOutputPaths.length > 0 ? [primaryWritePath, ...extraOutputPaths] : [primaryWritePath];
|
|
4471
4453
|
const uniqueOutputPaths = [...new Set(allOutputPaths)];
|
|
@@ -4486,9 +4468,6 @@ async function runEvalCommand(input) {
|
|
|
4486
4468
|
if (options.otelFile) {
|
|
4487
4469
|
console.log(`OTLP JSON file: ${path15.resolve(options.otelFile)}`);
|
|
4488
4470
|
}
|
|
4489
|
-
if (traceFilePath) {
|
|
4490
|
-
console.log(`Trace file: ${traceFilePath}`);
|
|
4491
|
-
}
|
|
4492
4471
|
const evaluationRunner = await resolveEvaluationRunner();
|
|
4493
4472
|
const allResults = [];
|
|
4494
4473
|
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
@@ -4676,11 +4655,9 @@ async function runEvalCommand(input) {
|
|
|
4676
4655
|
testArtifactDir,
|
|
4677
4656
|
timingPath,
|
|
4678
4657
|
benchmarkPath: workspaceBenchmarkPath,
|
|
4679
|
-
indexPath
|
|
4680
|
-
legacyResultsPath
|
|
4658
|
+
indexPath
|
|
4681
4659
|
} = await writeArtifactsFromResults(allResults, workspaceDir, {
|
|
4682
|
-
evalFile
|
|
4683
|
-
writeLegacyResults: true
|
|
4660
|
+
evalFile
|
|
4684
4661
|
});
|
|
4685
4662
|
console.log(`Artifact workspace written to: ${workspaceDir}`);
|
|
4686
4663
|
console.log(` Index: ${indexPath}`);
|
|
@@ -4689,9 +4666,6 @@ async function runEvalCommand(input) {
|
|
|
4689
4666
|
);
|
|
4690
4667
|
console.log(` Timing: ${timingPath}`);
|
|
4691
4668
|
console.log(` Benchmark: ${workspaceBenchmarkPath}`);
|
|
4692
|
-
if (legacyResultsPath) {
|
|
4693
|
-
console.log(` Compatibility output: ${legacyResultsPath} (deprecated)`);
|
|
4694
|
-
}
|
|
4695
4669
|
}
|
|
4696
4670
|
if (options.artifacts) {
|
|
4697
4671
|
const artifactsDir = path15.resolve(options.artifacts);
|
|
@@ -4702,8 +4676,7 @@ async function runEvalCommand(input) {
|
|
|
4702
4676
|
timingPath,
|
|
4703
4677
|
benchmarkPath: abp
|
|
4704
4678
|
} = await writeArtifactsFromResults(allResults, artifactsDir, {
|
|
4705
|
-
evalFile
|
|
4706
|
-
writeLegacyResults: false
|
|
4679
|
+
evalFile
|
|
4707
4680
|
});
|
|
4708
4681
|
console.log(`Artifacts written to: ${artifactsDir}`);
|
|
4709
4682
|
console.log(` Index: ${indexPath}`);
|
|
@@ -4785,7 +4758,6 @@ export {
|
|
|
4785
4758
|
package_default,
|
|
4786
4759
|
toSnakeCaseDeep,
|
|
4787
4760
|
RESULT_INDEX_FILENAME,
|
|
4788
|
-
LEGACY_RESULTS_FILENAME,
|
|
4789
4761
|
resolveExistingRunPrimaryPath,
|
|
4790
4762
|
resolveWorkspaceOrFilePath,
|
|
4791
4763
|
writeArtifactsFromResults,
|
|
@@ -4807,4 +4779,4 @@ export {
|
|
|
4807
4779
|
selectTarget,
|
|
4808
4780
|
runEvalCommand
|
|
4809
4781
|
};
|
|
4810
|
-
//# sourceMappingURL=chunk-
|
|
4782
|
+
//# sourceMappingURL=chunk-6H4IAXQH.js.map
|