agentv 3.11.1 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -12
- package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
- package/dist/{chunk-V2S5CZU3.js → chunk-2ELQ6F3C.js} +916 -523
- package/dist/chunk-2ELQ6F3C.js.map +1 -0
- package/dist/{chunk-JK6V4KVD.js → chunk-NR7QVL75.js} +32 -24
- package/dist/chunk-NR7QVL75.js.map +1 -0
- package/dist/{chunk-OYD2NB55.js → chunk-UYBLUYHN.js} +104 -15
- package/dist/chunk-UYBLUYHN.js.map +1 -0
- package/dist/{chunk-CKMAM2GD.js → chunk-VLOFRXH4.js} +461 -196
- package/dist/chunk-VLOFRXH4.js.map +1 -0
- package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
- package/dist/cli.js +5 -6
- package/dist/cli.js.map +1 -1
- package/dist/{dist-VUPMLHIV.js → dist-L6R5HJ72.js} +4 -5
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
- package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
- package/dist/index.js +5 -6
- package/dist/{interactive-FZJANO4A.js → interactive-5X62YEEX.js} +5 -6
- package/dist/{interactive-FZJANO4A.js.map → interactive-5X62YEEX.js.map} +1 -1
- package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
- package/dist/{simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js} +2 -3
- package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
- package/dist/templates/.agentv/.env.example +23 -0
- package/dist/templates/.agentv/config.yaml +13 -4
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-2QFWRIYL.js +0 -186
- package/dist/chunk-2QFWRIYL.js.map +0 -1
- package/dist/chunk-2RMPO6LY.js +0 -747
- package/dist/chunk-2RMPO6LY.js.map +0 -1
- package/dist/chunk-3Q7WIXT4.js +0 -4846
- package/dist/chunk-3Q7WIXT4.js.map +0 -1
- package/dist/chunk-73O2DCJP.js +0 -1274
- package/dist/chunk-73O2DCJP.js.map +0 -1
- package/dist/chunk-AUKF3Y3W.js +0 -212
- package/dist/chunk-AUKF3Y3W.js.map +0 -1
- package/dist/chunk-BRH7SIDP.js +0 -133
- package/dist/chunk-BRH7SIDP.js.map +0 -1
- package/dist/chunk-BXM4I3BM.js +0 -526
- package/dist/chunk-BXM4I3BM.js.map +0 -1
- package/dist/chunk-CKMAM2GD.js.map +0 -1
- package/dist/chunk-FHTURHTY.js +0 -546
- package/dist/chunk-FHTURHTY.js.map +0 -1
- package/dist/chunk-GJFXQQWG.js +0 -21
- package/dist/chunk-GJFXQQWG.js.map +0 -1
- package/dist/chunk-HKMLG4KF.js +0 -38
- package/dist/chunk-HKMLG4KF.js.map +0 -1
- package/dist/chunk-JGU3PVA4.js +0 -133
- package/dist/chunk-JGU3PVA4.js.map +0 -1
- package/dist/chunk-JK6V4KVD.js.map +0 -1
- package/dist/chunk-LHU5FGVZ.js +0 -4804
- package/dist/chunk-LHU5FGVZ.js.map +0 -1
- package/dist/chunk-OL2WGI6E.js +0 -149
- package/dist/chunk-OL2WGI6E.js.map +0 -1
- package/dist/chunk-ONETZL6N.js +0 -15
- package/dist/chunk-ONETZL6N.js.map +0 -1
- package/dist/chunk-OYD2NB55.js.map +0 -1
- package/dist/chunk-QV4UGEN6.js +0 -320
- package/dist/chunk-QV4UGEN6.js.map +0 -1
- package/dist/chunk-QXLDKGF3.js +0 -46
- package/dist/chunk-QXLDKGF3.js.map +0 -1
- package/dist/chunk-U6VEM66A.js +0 -63
- package/dist/chunk-U6VEM66A.js.map +0 -1
- package/dist/chunk-UALXHIMX.js +0 -48
- package/dist/chunk-UALXHIMX.js.map +0 -1
- package/dist/chunk-UGXG73VF.js +0 -55
- package/dist/chunk-UGXG73VF.js.map +0 -1
- package/dist/chunk-UHP5KEDL.js +0 -38
- package/dist/chunk-UHP5KEDL.js.map +0 -1
- package/dist/chunk-V2S5CZU3.js.map +0 -1
- package/dist/chunk-WVSXFZWP.js +0 -204
- package/dist/chunk-WVSXFZWP.js.map +0 -1
- package/dist/chunk-XSUMCWKO.js +0 -30
- package/dist/chunk-XSUMCWKO.js.map +0 -1
- package/dist/chunk-XUO7ZEHU.js +0 -181
- package/dist/chunk-XUO7ZEHU.js.map +0 -1
- package/dist/chunk-YSGUX5JT.js +0 -1002
- package/dist/chunk-YSGUX5JT.js.map +0 -1
- package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
- package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
- package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
- package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
- package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
- package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
- package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
- package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
- package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
- package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
- package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
- package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
- package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
- package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
- package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
- package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
- package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
- /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
- /package/dist/{dist-VUPMLHIV.js.map → dist-L6R5HJ72.js.map} +0 -0
- /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
- /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
- /package/dist/{esm-UYZ3HJBU.js.map → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map} +0 -0
- /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
package/README.md
CHANGED
|
@@ -203,14 +203,17 @@ See `agentv eval --help` for all options: workers, timeouts, output formats, tra
|
|
|
203
203
|
Write results to different formats using the `-o` flag (format auto-detected from extension):
|
|
204
204
|
|
|
205
205
|
```bash
|
|
206
|
-
#
|
|
207
|
-
agentv eval evals/my-eval.yaml
|
|
206
|
+
# Default run workspace (index.jsonl + benchmark/timing/per-test artifacts)
|
|
207
|
+
agentv eval evals/my-eval.yaml
|
|
208
208
|
|
|
209
209
|
# Self-contained HTML dashboard (opens in any browser, no server needed)
|
|
210
210
|
agentv eval evals/my-eval.yaml -o report.html
|
|
211
211
|
|
|
212
|
+
# Explicit JSONL compatibility output
|
|
213
|
+
agentv eval evals/my-eval.yaml -o results.jsonl
|
|
214
|
+
|
|
212
215
|
# Multiple formats simultaneously
|
|
213
|
-
agentv eval evals/my-eval.yaml -o
|
|
216
|
+
agentv eval evals/my-eval.yaml -o report.html
|
|
214
217
|
|
|
215
218
|
# JUnit XML for CI/CD integration
|
|
216
219
|
agentv eval evals/my-eval.yaml -o results.xml
|
|
@@ -218,10 +221,14 @@ agentv eval evals/my-eval.yaml -o results.xml
|
|
|
218
221
|
|
|
219
222
|
The HTML report auto-refreshes every 2 seconds during a live run, then locks once the run completes.
|
|
220
223
|
|
|
221
|
-
|
|
224
|
+
By default, `agentv eval` now creates a run workspace under `.agentv/results/raw/<run>/`
|
|
225
|
+
with `index.jsonl` as the primary machine-facing manifest. A compatibility `results.jsonl`
|
|
226
|
+
is still written alongside it for legacy tooling during the deprecation window.
|
|
227
|
+
|
|
228
|
+
You can also convert an existing manifest or compatibility JSONL file to HTML after the fact:
|
|
222
229
|
|
|
223
230
|
```bash
|
|
224
|
-
agentv convert results.jsonl -o report.html
|
|
231
|
+
agentv convert .agentv/results/raw/eval_<timestamp>/index.jsonl -o report.html
|
|
225
232
|
```
|
|
226
233
|
|
|
227
234
|
#### Timeouts
|
|
@@ -352,7 +359,7 @@ agentv create eval my-eval # → evals/my-eval.eval.yaml + .cases.jsonl
|
|
|
352
359
|
Compare a combined results file across all targets (N-way matrix):
|
|
353
360
|
|
|
354
361
|
```bash
|
|
355
|
-
agentv compare results.jsonl
|
|
362
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl
|
|
356
363
|
```
|
|
357
364
|
|
|
358
365
|
```
|
|
@@ -373,8 +380,8 @@ Pairwise Summary:
|
|
|
373
380
|
Designate a baseline for CI regression gating, or compare two specific targets:
|
|
374
381
|
|
|
375
382
|
```bash
|
|
376
|
-
agentv compare results.jsonl --baseline gpt-4.1
|
|
377
|
-
agentv compare results.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
|
|
383
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1
|
|
384
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
|
|
378
385
|
agentv compare before.jsonl after.jsonl # two-file pairwise
|
|
379
386
|
```
|
|
380
387
|
|
|
@@ -518,10 +525,7 @@ tests:
|
|
|
518
525
|
|
|
519
526
|
Scoring: `(satisfied weights) / (total weights)` → verdicts: `pass` (≥0.8), `borderline` (≥0.6), `fail`
|
|
520
527
|
|
|
521
|
-
|
|
522
|
-
```bash
|
|
523
|
-
agentv generate rubrics evals/my-eval.yaml
|
|
524
|
-
```
|
|
528
|
+
Author assertions directly in your eval file. When you want help choosing between simple assertions, deterministic graders, and LLM-based graders, use the `agentv-eval-writer` skill.
|
|
525
529
|
|
|
526
530
|
See [rubric evaluator](https://agentv.dev/evaluation/rubrics/) for detailed patterns.
|
|
527
531
|
|
|
@@ -2,9 +2,8 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
AgentvProvider
|
|
4
4
|
} from "./chunk-BJV6MDBE.js";
|
|
5
|
-
import "./chunk-UGXG73VF.js";
|
|
6
5
|
import "./chunk-5H446C7X.js";
|
|
7
6
|
export {
|
|
8
7
|
AgentvProvider
|
|
9
8
|
};
|
|
10
|
-
//# sourceMappingURL=agentv-provider-
|
|
9
|
+
//# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map
|