agentv 3.11.1 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +15 -12
  2. package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
  3. package/dist/{chunk-CKMAM2GD.js → chunk-6H4IAXQH.js} +435 -198
  4. package/dist/chunk-6H4IAXQH.js.map +1 -0
  5. package/dist/{chunk-OYD2NB55.js → chunk-7OHZAFND.js} +120 -29
  6. package/dist/chunk-7OHZAFND.js.map +1 -0
  7. package/dist/{chunk-V2S5CZU3.js → chunk-DJU4C6NS.js} +914 -529
  8. package/dist/chunk-DJU4C6NS.js.map +1 -0
  9. package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
  10. package/dist/cli.js +4 -6
  11. package/dist/cli.js.map +1 -1
  12. package/dist/{dist-VUPMLHIV.js → dist-SMKOBBFB.js} +3 -8
  13. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
  14. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
  15. package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
  16. package/dist/index.js +4 -6
  17. package/dist/{interactive-FZJANO4A.js → interactive-RV664PCR.js} +4 -6
  18. package/dist/{interactive-FZJANO4A.js.map → interactive-RV664PCR.js.map} +1 -1
  19. package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
  20. package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
  21. package/package.json +1 -1
  22. package/dist/chunk-2QFWRIYL.js +0 -186
  23. package/dist/chunk-2QFWRIYL.js.map +0 -1
  24. package/dist/chunk-2RMPO6LY.js +0 -747
  25. package/dist/chunk-2RMPO6LY.js.map +0 -1
  26. package/dist/chunk-3Q7WIXT4.js +0 -4846
  27. package/dist/chunk-3Q7WIXT4.js.map +0 -1
  28. package/dist/chunk-73O2DCJP.js +0 -1274
  29. package/dist/chunk-73O2DCJP.js.map +0 -1
  30. package/dist/chunk-AUKF3Y3W.js +0 -212
  31. package/dist/chunk-AUKF3Y3W.js.map +0 -1
  32. package/dist/chunk-BRH7SIDP.js +0 -133
  33. package/dist/chunk-BRH7SIDP.js.map +0 -1
  34. package/dist/chunk-BXM4I3BM.js +0 -526
  35. package/dist/chunk-BXM4I3BM.js.map +0 -1
  36. package/dist/chunk-CKMAM2GD.js.map +0 -1
  37. package/dist/chunk-FHTURHTY.js +0 -546
  38. package/dist/chunk-FHTURHTY.js.map +0 -1
  39. package/dist/chunk-GJFXQQWG.js +0 -21
  40. package/dist/chunk-GJFXQQWG.js.map +0 -1
  41. package/dist/chunk-HKMLG4KF.js +0 -38
  42. package/dist/chunk-HKMLG4KF.js.map +0 -1
  43. package/dist/chunk-JGU3PVA4.js +0 -133
  44. package/dist/chunk-JGU3PVA4.js.map +0 -1
  45. package/dist/chunk-JK6V4KVD.js +0 -114
  46. package/dist/chunk-JK6V4KVD.js.map +0 -1
  47. package/dist/chunk-LHU5FGVZ.js +0 -4804
  48. package/dist/chunk-LHU5FGVZ.js.map +0 -1
  49. package/dist/chunk-OL2WGI6E.js +0 -149
  50. package/dist/chunk-OL2WGI6E.js.map +0 -1
  51. package/dist/chunk-ONETZL6N.js +0 -15
  52. package/dist/chunk-ONETZL6N.js.map +0 -1
  53. package/dist/chunk-OYD2NB55.js.map +0 -1
  54. package/dist/chunk-QV4UGEN6.js +0 -320
  55. package/dist/chunk-QV4UGEN6.js.map +0 -1
  56. package/dist/chunk-QXLDKGF3.js +0 -46
  57. package/dist/chunk-QXLDKGF3.js.map +0 -1
  58. package/dist/chunk-U6VEM66A.js +0 -63
  59. package/dist/chunk-U6VEM66A.js.map +0 -1
  60. package/dist/chunk-UALXHIMX.js +0 -48
  61. package/dist/chunk-UALXHIMX.js.map +0 -1
  62. package/dist/chunk-UGXG73VF.js +0 -55
  63. package/dist/chunk-UGXG73VF.js.map +0 -1
  64. package/dist/chunk-UHP5KEDL.js +0 -38
  65. package/dist/chunk-UHP5KEDL.js.map +0 -1
  66. package/dist/chunk-V2S5CZU3.js.map +0 -1
  67. package/dist/chunk-WVSXFZWP.js +0 -204
  68. package/dist/chunk-WVSXFZWP.js.map +0 -1
  69. package/dist/chunk-XSUMCWKO.js +0 -30
  70. package/dist/chunk-XSUMCWKO.js.map +0 -1
  71. package/dist/chunk-XUO7ZEHU.js +0 -181
  72. package/dist/chunk-XUO7ZEHU.js.map +0 -1
  73. package/dist/chunk-YSGUX5JT.js +0 -1002
  74. package/dist/chunk-YSGUX5JT.js.map +0 -1
  75. package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
  76. package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
  77. package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
  78. package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
  79. package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
  80. package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
  81. package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
  82. package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
  83. package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
  84. package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
  85. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
  86. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
  87. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
  88. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
  89. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
  90. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
  91. package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
  92. package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
  93. package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
  94. package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
  95. package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
  96. package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
  97. package/dist/esm-UYZ3HJBU.js.map +0 -1
  98. package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
  99. package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
  100. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
  101. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
  102. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
  103. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
  104. package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
  105. package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
  106. package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
  107. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js +0 -10
  108. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
  109. package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
  110. package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
  111. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
  112. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
  113. package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
  114. package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
  115. package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
  116. package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
  117. /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
  118. /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
  119. /package/dist/{dist-VUPMLHIV.js.map → dist-SMKOBBFB.js.map} +0 -0
  120. /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
  121. /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
  122. /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
package/README.md CHANGED
@@ -203,14 +203,17 @@ See `agentv eval --help` for all options: workers, timeouts, output formats, tra
203
203
  Write results to different formats using the `-o` flag (format auto-detected from extension):
204
204
 
205
205
  ```bash
206
- # JSONL (default streaming format)
207
- agentv eval evals/my-eval.yaml -o results.jsonl
206
+ # Default run workspace (index.jsonl + benchmark/timing/per-test artifacts)
207
+ agentv eval evals/my-eval.yaml
208
208
 
209
209
  # Self-contained HTML dashboard (opens in any browser, no server needed)
210
210
  agentv eval evals/my-eval.yaml -o report.html
211
211
 
212
+ # Explicit JSONL output
213
+ agentv eval evals/my-eval.yaml -o output.jsonl
214
+
212
215
  # Multiple formats simultaneously
213
- agentv eval evals/my-eval.yaml -o results.jsonl -o report.html
216
+ agentv eval evals/my-eval.yaml -o report.html
214
217
 
215
218
  # JUnit XML for CI/CD integration
216
219
  agentv eval evals/my-eval.yaml -o results.xml
@@ -218,10 +221,13 @@ agentv eval evals/my-eval.yaml -o results.xml
218
221
 
219
222
  The HTML report auto-refreshes every 2 seconds during a live run, then locks once the run completes.
220
223
 
221
- You can also convert an existing JSONL results file to HTML after the fact:
224
+ By default, `agentv eval` creates a run workspace under `.agentv/results/raw/<run>/`
225
+ with `index.jsonl` as the machine-facing manifest.
226
+
227
+ You can also convert an existing manifest to HTML after the fact:
222
228
 
223
229
  ```bash
224
- agentv convert results.jsonl -o report.html
230
+ agentv convert .agentv/results/raw/eval_<timestamp>/index.jsonl -o report.html
225
231
  ```
226
232
 
227
233
  #### Timeouts
@@ -352,7 +358,7 @@ agentv create eval my-eval # → evals/my-eval.eval.yaml + .cases.jsonl
352
358
  Compare a combined results file across all targets (N-way matrix):
353
359
 
354
360
  ```bash
355
- agentv compare results.jsonl
361
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl
356
362
  ```
357
363
 
358
364
  ```
@@ -373,8 +379,8 @@ Pairwise Summary:
373
379
  Designate a baseline for CI regression gating, or compare two specific targets:
374
380
 
375
381
  ```bash
376
- agentv compare results.jsonl --baseline gpt-4.1 # exit 1 on regression
377
- agentv compare results.jsonl --baseline gpt-4.1 --candidate gpt-5-mini # pairwise
382
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1
383
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
378
384
  agentv compare before.jsonl after.jsonl # two-file pairwise
379
385
  ```
380
386
 
@@ -518,10 +524,7 @@ tests:
518
524
 
519
525
  Scoring: `(satisfied weights) / (total weights)` → verdicts: `pass` (≥0.8), `borderline` (≥0.6), `fail`
520
526
 
521
- Auto-generate rubrics from expected outcomes:
522
- ```bash
523
- agentv generate rubrics evals/my-eval.yaml
524
- ```
527
+ Author assertions directly in your eval file. When you want help choosing between simple assertions, deterministic graders, and LLM-based graders, use the `agentv-eval-writer` skill.
525
528
 
526
529
  See [rubric evaluator](https://agentv.dev/evaluation/rubrics/) for detailed patterns.
527
530
 
@@ -2,9 +2,8 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
2
2
  import {
3
3
  AgentvProvider
4
4
  } from "./chunk-BJV6MDBE.js";
5
- import "./chunk-UGXG73VF.js";
6
5
  import "./chunk-5H446C7X.js";
7
6
  export {
8
7
  AgentvProvider
9
8
  };
10
- //# sourceMappingURL=agentv-provider-MIDKLYIH-6LIYKQRP.js.map
9
+ //# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map