agentv 3.11.1 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -12
- package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
- package/dist/{chunk-CKMAM2GD.js → chunk-6H4IAXQH.js} +435 -198
- package/dist/chunk-6H4IAXQH.js.map +1 -0
- package/dist/{chunk-OYD2NB55.js → chunk-7OHZAFND.js} +120 -29
- package/dist/chunk-7OHZAFND.js.map +1 -0
- package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} +914 -529
- package/dist/chunk-DJU4C6NS.js.map +1 -0
- package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
- package/dist/cli.js +4 -6
- package/dist/cli.js.map +1 -1
- package/dist/{dist-VUPMLHIV.js → dist-SMKOBBFB.js} +3 -8
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
- package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
- package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
- package/dist/index.js +4 -6
- package/dist/{interactive-FZJANO4A.js → interactive-RV664PCR.js} +4 -6
- package/dist/{interactive-FZJANO4A.js.map → interactive-RV664PCR.js.map} +1 -1
- package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
- package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-2QFWRIYL.js +0 -186
- package/dist/chunk-2QFWRIYL.js.map +0 -1
- package/dist/chunk-2RMPO6LY.js +0 -747
- package/dist/chunk-2RMPO6LY.js.map +0 -1
- package/dist/chunk-3Q7WIXT4.js +0 -4846
- package/dist/chunk-3Q7WIXT4.js.map +0 -1
- package/dist/chunk-73O2DCJP.js +0 -1274
- package/dist/chunk-73O2DCJP.js.map +0 -1
- package/dist/chunk-AUKF3Y3W.js +0 -212
- package/dist/chunk-AUKF3Y3W.js.map +0 -1
- package/dist/chunk-BRH7SIDP.js +0 -133
- package/dist/chunk-BRH7SIDP.js.map +0 -1
- package/dist/chunk-BXM4I3BM.js +0 -526
- package/dist/chunk-BXM4I3BM.js.map +0 -1
- package/dist/chunk-CKMAM2GD.js.map +0 -1
- package/dist/chunk-FHTURHTY.js +0 -546
- package/dist/chunk-FHTURHTY.js.map +0 -1
- package/dist/chunk-GJFXQQWG.js +0 -21
- package/dist/chunk-GJFXQQWG.js.map +0 -1
- package/dist/chunk-HKMLG4KF.js +0 -38
- package/dist/chunk-HKMLG4KF.js.map +0 -1
- package/dist/chunk-JGU3PVA4.js +0 -133
- package/dist/chunk-JGU3PVA4.js.map +0 -1
- package/dist/chunk-JK6V4KVD.js +0 -114
- package/dist/chunk-JK6V4KVD.js.map +0 -1
- package/dist/chunk-LHU5FGVZ.js +0 -4804
- package/dist/chunk-LHU5FGVZ.js.map +0 -1
- package/dist/chunk-OL2WGI6E.js +0 -149
- package/dist/chunk-OL2WGI6E.js.map +0 -1
- package/dist/chunk-ONETZL6N.js +0 -15
- package/dist/chunk-ONETZL6N.js.map +0 -1
- package/dist/chunk-OYD2NB55.js.map +0 -1
- package/dist/chunk-QV4UGEN6.js +0 -320
- package/dist/chunk-QV4UGEN6.js.map +0 -1
- package/dist/chunk-QXLDKGF3.js +0 -46
- package/dist/chunk-QXLDKGF3.js.map +0 -1
- package/dist/chunk-U6VEM66A.js +0 -63
- package/dist/chunk-U6VEM66A.js.map +0 -1
- package/dist/chunk-UALXHIMX.js +0 -48
- package/dist/chunk-UALXHIMX.js.map +0 -1
- package/dist/chunk-UGXG73VF.js +0 -55
- package/dist/chunk-UGXG73VF.js.map +0 -1
- package/dist/chunk-UHP5KEDL.js +0 -38
- package/dist/chunk-UHP5KEDL.js.map +0 -1
- package/dist/chunk-V2S5CZU3.js.map +0 -1
- package/dist/chunk-WVSXFZWP.js +0 -204
- package/dist/chunk-WVSXFZWP.js.map +0 -1
- package/dist/chunk-XSUMCWKO.js +0 -30
- package/dist/chunk-XSUMCWKO.js.map +0 -1
- package/dist/chunk-XUO7ZEHU.js +0 -181
- package/dist/chunk-XUO7ZEHU.js.map +0 -1
- package/dist/chunk-YSGUX5JT.js +0 -1002
- package/dist/chunk-YSGUX5JT.js.map +0 -1
- package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
- package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
- package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
- package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
- package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
- package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
- package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
- package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
- package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
- package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
- package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
- package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
- package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
- package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
- package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
- package/dist/esm-UYZ3HJBU.js.map +0 -1
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
- package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
- package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
- package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
- package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
- package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
- package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js +0 -10
- package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
- package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
- package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
- package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
- package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
- package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
- package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
- package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
- /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
- /package/dist/{dist-VUPMLHIV.js.map → dist-SMKOBBFB.js.map} +0 -0
- /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
- /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
- /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
package/README.md
CHANGED
|
@@ -203,14 +203,17 @@ See `agentv eval --help` for all options: workers, timeouts, output formats, tra
|
|
|
203
203
|
Write results to different formats using the `-o` flag (format auto-detected from extension):
|
|
204
204
|
|
|
205
205
|
```bash
|
|
206
|
-
#
|
|
207
|
-
agentv eval evals/my-eval.yaml
|
|
206
|
+
# Default run workspace (index.jsonl + benchmark/timing/per-test artifacts)
|
|
207
|
+
agentv eval evals/my-eval.yaml
|
|
208
208
|
|
|
209
209
|
# Self-contained HTML dashboard (opens in any browser, no server needed)
|
|
210
210
|
agentv eval evals/my-eval.yaml -o report.html
|
|
211
211
|
|
|
212
|
+
# Explicit JSONL output
|
|
213
|
+
agentv eval evals/my-eval.yaml -o output.jsonl
|
|
214
|
+
|
|
212
215
|
# Multiple formats simultaneously
|
|
213
|
-
agentv eval evals/my-eval.yaml -o
|
|
216
|
+
agentv eval evals/my-eval.yaml -o report.html
|
|
214
217
|
|
|
215
218
|
# JUnit XML for CI/CD integration
|
|
216
219
|
agentv eval evals/my-eval.yaml -o results.xml
|
|
@@ -218,10 +221,13 @@ agentv eval evals/my-eval.yaml -o results.xml
|
|
|
218
221
|
|
|
219
222
|
The HTML report auto-refreshes every 2 seconds during a live run, then locks once the run completes.
|
|
220
223
|
|
|
221
|
-
|
|
224
|
+
By default, `agentv eval` creates a run workspace under `.agentv/results/raw/<run>/`
|
|
225
|
+
with `index.jsonl` as the machine-facing manifest.
|
|
226
|
+
|
|
227
|
+
You can also convert an existing manifest to HTML after the fact:
|
|
222
228
|
|
|
223
229
|
```bash
|
|
224
|
-
agentv convert results.jsonl -o report.html
|
|
230
|
+
agentv convert .agentv/results/raw/eval_<timestamp>/index.jsonl -o report.html
|
|
225
231
|
```
|
|
226
232
|
|
|
227
233
|
#### Timeouts
|
|
@@ -352,7 +358,7 @@ agentv create eval my-eval # → evals/my-eval.eval.yaml + .cases.jsonl
|
|
|
352
358
|
Compare a combined results file across all targets (N-way matrix):
|
|
353
359
|
|
|
354
360
|
```bash
|
|
355
|
-
agentv compare results.jsonl
|
|
361
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl
|
|
356
362
|
```
|
|
357
363
|
|
|
358
364
|
```
|
|
@@ -373,8 +379,8 @@ Pairwise Summary:
|
|
|
373
379
|
Designate a baseline for CI regression gating, or compare two specific targets:
|
|
374
380
|
|
|
375
381
|
```bash
|
|
376
|
-
agentv compare results.jsonl --baseline gpt-4.1
|
|
377
|
-
agentv compare results.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
|
|
382
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1
|
|
383
|
+
agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
|
|
378
384
|
agentv compare before.jsonl after.jsonl # two-file pairwise
|
|
379
385
|
```
|
|
380
386
|
|
|
@@ -518,10 +524,7 @@ tests:
|
|
|
518
524
|
|
|
519
525
|
Scoring: `(satisfied weights) / (total weights)` → verdicts: `pass` (≥0.8), `borderline` (≥0.6), `fail`
|
|
520
526
|
|
|
521
|
-
|
|
522
|
-
```bash
|
|
523
|
-
agentv generate rubrics evals/my-eval.yaml
|
|
524
|
-
```
|
|
527
|
+
Author assertions directly in your eval file. When you want help choosing between simple assertions, deterministic graders, and LLM-based graders, use the `agentv-eval-writer` skill.
|
|
525
528
|
|
|
526
529
|
See [rubric evaluator](https://agentv.dev/evaluation/rubrics/) for detailed patterns.
|
|
527
530
|
|
|
@@ -2,9 +2,8 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
AgentvProvider
|
|
4
4
|
} from "./chunk-BJV6MDBE.js";
|
|
5
|
-
import "./chunk-UGXG73VF.js";
|
|
6
5
|
import "./chunk-5H446C7X.js";
|
|
7
6
|
export {
|
|
8
7
|
AgentvProvider
|
|
9
8
|
};
|
|
10
|
-
//# sourceMappingURL=agentv-provider-
|
|
9
|
+
//# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map
|