agentv 3.11.1 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +16 -12
  2. package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +1 -2
  3. package/dist/{chunk-V2S5CZU3.js → chunk-2ELQ6F3C.js} +916 -523
  4. package/dist/chunk-2ELQ6F3C.js.map +1 -0
  5. package/dist/{chunk-JK6V4KVD.js → chunk-NR7QVL75.js} +32 -24
  6. package/dist/chunk-NR7QVL75.js.map +1 -0
  7. package/dist/{chunk-OYD2NB55.js → chunk-UYBLUYHN.js} +104 -15
  8. package/dist/chunk-UYBLUYHN.js.map +1 -0
  9. package/dist/{chunk-CKMAM2GD.js → chunk-VLOFRXH4.js} +461 -196
  10. package/dist/chunk-VLOFRXH4.js.map +1 -0
  11. package/dist/{chunk-BAUNAXHT.js → chunk-XOSNETAV.js} +1 -1
  12. package/dist/cli.js +5 -6
  13. package/dist/cli.js.map +1 -1
  14. package/dist/{dist-VUPMLHIV.js → dist-L6R5HJ72.js} +4 -5
  15. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js → esm-5Q4BZALM-5REQWAUV.js} +2 -3
  16. package/dist/{esm-OJ2BXJK4-YKEI3Z7E.js.map → esm-5Q4BZALM-5REQWAUV.js.map} +1 -1
  17. package/dist/{esm-UYZ3HJBU.js → esm-CZAWIY6F.js} +2 -2
  18. package/dist/index.js +5 -6
  19. package/dist/{interactive-FZJANO4A.js → interactive-5X62YEEX.js} +5 -6
  20. package/dist/{interactive-FZJANO4A.js.map → interactive-5X62YEEX.js.map} +1 -1
  21. package/dist/{otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js} +1 -2
  22. package/dist/{simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js} +2 -3
  23. package/dist/{src-PXDA7QIS.js → src-ML4D2MC2.js} +2 -2
  24. package/dist/templates/.agentv/.env.example +23 -0
  25. package/dist/templates/.agentv/config.yaml +13 -4
  26. package/dist/templates/.agentv/targets.yaml +16 -0
  27. package/package.json +1 -1
  28. package/dist/chunk-2QFWRIYL.js +0 -186
  29. package/dist/chunk-2QFWRIYL.js.map +0 -1
  30. package/dist/chunk-2RMPO6LY.js +0 -747
  31. package/dist/chunk-2RMPO6LY.js.map +0 -1
  32. package/dist/chunk-3Q7WIXT4.js +0 -4846
  33. package/dist/chunk-3Q7WIXT4.js.map +0 -1
  34. package/dist/chunk-73O2DCJP.js +0 -1274
  35. package/dist/chunk-73O2DCJP.js.map +0 -1
  36. package/dist/chunk-AUKF3Y3W.js +0 -212
  37. package/dist/chunk-AUKF3Y3W.js.map +0 -1
  38. package/dist/chunk-BRH7SIDP.js +0 -133
  39. package/dist/chunk-BRH7SIDP.js.map +0 -1
  40. package/dist/chunk-BXM4I3BM.js +0 -526
  41. package/dist/chunk-BXM4I3BM.js.map +0 -1
  42. package/dist/chunk-CKMAM2GD.js.map +0 -1
  43. package/dist/chunk-FHTURHTY.js +0 -546
  44. package/dist/chunk-FHTURHTY.js.map +0 -1
  45. package/dist/chunk-GJFXQQWG.js +0 -21
  46. package/dist/chunk-GJFXQQWG.js.map +0 -1
  47. package/dist/chunk-HKMLG4KF.js +0 -38
  48. package/dist/chunk-HKMLG4KF.js.map +0 -1
  49. package/dist/chunk-JGU3PVA4.js +0 -133
  50. package/dist/chunk-JGU3PVA4.js.map +0 -1
  51. package/dist/chunk-JK6V4KVD.js.map +0 -1
  52. package/dist/chunk-LHU5FGVZ.js +0 -4804
  53. package/dist/chunk-LHU5FGVZ.js.map +0 -1
  54. package/dist/chunk-OL2WGI6E.js +0 -149
  55. package/dist/chunk-OL2WGI6E.js.map +0 -1
  56. package/dist/chunk-ONETZL6N.js +0 -15
  57. package/dist/chunk-ONETZL6N.js.map +0 -1
  58. package/dist/chunk-OYD2NB55.js.map +0 -1
  59. package/dist/chunk-QV4UGEN6.js +0 -320
  60. package/dist/chunk-QV4UGEN6.js.map +0 -1
  61. package/dist/chunk-QXLDKGF3.js +0 -46
  62. package/dist/chunk-QXLDKGF3.js.map +0 -1
  63. package/dist/chunk-U6VEM66A.js +0 -63
  64. package/dist/chunk-U6VEM66A.js.map +0 -1
  65. package/dist/chunk-UALXHIMX.js +0 -48
  66. package/dist/chunk-UALXHIMX.js.map +0 -1
  67. package/dist/chunk-UGXG73VF.js +0 -55
  68. package/dist/chunk-UGXG73VF.js.map +0 -1
  69. package/dist/chunk-UHP5KEDL.js +0 -38
  70. package/dist/chunk-UHP5KEDL.js.map +0 -1
  71. package/dist/chunk-V2S5CZU3.js.map +0 -1
  72. package/dist/chunk-WVSXFZWP.js +0 -204
  73. package/dist/chunk-WVSXFZWP.js.map +0 -1
  74. package/dist/chunk-XSUMCWKO.js +0 -30
  75. package/dist/chunk-XSUMCWKO.js.map +0 -1
  76. package/dist/chunk-XUO7ZEHU.js +0 -181
  77. package/dist/chunk-XUO7ZEHU.js.map +0 -1
  78. package/dist/chunk-YSGUX5JT.js +0 -1002
  79. package/dist/chunk-YSGUX5JT.js.map +0 -1
  80. package/dist/dist-3PCP5TNF-RYMVLILE.js +0 -25785
  81. package/dist/dist-3PCP5TNF-RYMVLILE.js.map +0 -1
  82. package/dist/dist-BOIN5LC5-T5UWUK43.js +0 -76113
  83. package/dist/dist-BOIN5LC5-T5UWUK43.js.map +0 -1
  84. package/dist/dist-LXPDQOBI-4V5J2WDS.js +0 -13
  85. package/dist/dist-LXPDQOBI-4V5J2WDS.js.map +0 -1
  86. package/dist/dist-es-4WSJUIYR-XKIX65IH.js +0 -69
  87. package/dist/dist-es-4WSJUIYR-XKIX65IH.js.map +0 -1
  88. package/dist/dist-es-7K7MKRME-CCMAZOQC.js +0 -355
  89. package/dist/dist-es-7K7MKRME-CCMAZOQC.js.map +0 -1
  90. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js +0 -191
  91. package/dist/dist-es-B2RTOKRI-VWZHK5RE.js.map +0 -1
  92. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js +0 -164
  93. package/dist/dist-es-HHZ4FAXA-CRERHWKB.js.map +0 -1
  94. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js +0 -355
  95. package/dist/dist-es-HVS3RPMX-AYJ3DW4L.js.map +0 -1
  96. package/dist/dist-es-L6R4FPI5-IKIRYN45.js +0 -472
  97. package/dist/dist-es-L6R4FPI5-IKIRYN45.js.map +0 -1
  98. package/dist/dist-es-SRVEB5QV-Q4CTC2HX.js +0 -24
  99. package/dist/dist-es-TRIVUKV4-2J47CDXR.js +0 -85
  100. package/dist/dist-es-TRIVUKV4-2J47CDXR.js.map +0 -1
  101. package/dist/dist-es-UEEUAV34-IZQDTAMW.js +0 -16
  102. package/dist/event-streams-NZADSH5J-6MOSNEV3.js +0 -247
  103. package/dist/event-streams-NZADSH5J-6MOSNEV3.js.map +0 -1
  104. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js +0 -738
  105. package/dist/loadSso-IQZ5NB6C-DZJTORO3.js.map +0 -1
  106. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js +0 -387
  107. package/dist/multipart-parser-IPYBIGNL-LFMNMM6D.js.map +0 -1
  108. package/dist/otlp-json-file-exporter-VN67MK3S-RQIM6EHY.js.map +0 -1
  109. package/dist/signin-2ANR4DVS-K5VGBEJF.js +0 -556
  110. package/dist/signin-2ANR4DVS-K5VGBEJF.js.map +0 -1
  111. package/dist/simple-trace-file-exporter-XWZTIZR2-4JKATE5G.js.map +0 -1
  112. package/dist/src-SLOMUG7K-CV5JG263.js +0 -1408
  113. package/dist/src-SLOMUG7K-CV5JG263.js.map +0 -1
  114. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js +0 -708
  115. package/dist/sso-oidc-HVCDATR2-CYP3BM5O.js.map +0 -1
  116. package/dist/sts-X7JGSP4H-PDAAYDDH.js +0 -2917
  117. package/dist/sts-X7JGSP4H-PDAAYDDH.js.map +0 -1
  118. package/dist/undici-VAR2VUJI-6PAOUXZC.js +0 -23388
  119. package/dist/undici-VAR2VUJI-6PAOUXZC.js.map +0 -1
  120. /package/dist/{agentv-provider-MIDKLYIH-6LIYKQRP.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
  121. /package/dist/{chunk-BAUNAXHT.js.map → chunk-XOSNETAV.js.map} +0 -0
  122. /package/dist/{dist-VUPMLHIV.js.map → dist-L6R5HJ72.js.map} +0 -0
  123. /package/dist/{dist-es-SRVEB5QV-Q4CTC2HX.js.map → esm-CZAWIY6F.js.map} +0 -0
  124. /package/dist/{dist-es-UEEUAV34-IZQDTAMW.js.map → otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map} +0 -0
  125. /package/dist/{esm-UYZ3HJBU.js.map → simple-trace-file-exporter-CRIO5HDZ-QYYT2QQT.js.map} +0 -0
  126. /package/dist/{src-PXDA7QIS.js.map → src-ML4D2MC2.js.map} +0 -0
package/README.md CHANGED
@@ -203,14 +203,17 @@ See `agentv eval --help` for all options: workers, timeouts, output formats, tra
203
203
  Write results to different formats using the `-o` flag (format auto-detected from extension):
204
204
 
205
205
  ```bash
206
- # JSONL (default streaming format)
207
- agentv eval evals/my-eval.yaml -o results.jsonl
206
+ # Default run workspace (index.jsonl + benchmark/timing/per-test artifacts)
207
+ agentv eval evals/my-eval.yaml
208
208
 
209
209
  # Self-contained HTML dashboard (opens in any browser, no server needed)
210
210
  agentv eval evals/my-eval.yaml -o report.html
211
211
 
212
+ # Explicit JSONL compatibility output
213
+ agentv eval evals/my-eval.yaml -o results.jsonl
214
+
212
215
  # Multiple formats simultaneously
213
- agentv eval evals/my-eval.yaml -o results.jsonl -o report.html
216
+ agentv eval evals/my-eval.yaml -o report.html
214
217
 
215
218
  # JUnit XML for CI/CD integration
216
219
  agentv eval evals/my-eval.yaml -o results.xml
@@ -218,10 +221,14 @@ agentv eval evals/my-eval.yaml -o results.xml
218
221
 
219
222
  The HTML report auto-refreshes every 2 seconds during a live run, then locks once the run completes.
220
223
 
221
- You can also convert an existing JSONL results file to HTML after the fact:
224
+ By default, `agentv eval` now creates a run workspace under `.agentv/results/raw/<run>/`
225
+ with `index.jsonl` as the primary machine-facing manifest. A compatibility `results.jsonl`
226
+ is still written alongside it for legacy tooling during the deprecation window.
227
+
228
+ You can also convert an existing manifest or compatibility JSONL file to HTML after the fact:
222
229
 
223
230
  ```bash
224
- agentv convert results.jsonl -o report.html
231
+ agentv convert .agentv/results/raw/eval_<timestamp>/index.jsonl -o report.html
225
232
  ```
226
233
 
227
234
  #### Timeouts
@@ -352,7 +359,7 @@ agentv create eval my-eval # → evals/my-eval.eval.yaml + .cases.jsonl
352
359
  Compare a combined results file across all targets (N-way matrix):
353
360
 
354
361
  ```bash
355
- agentv compare results.jsonl
362
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl
356
363
  ```
357
364
 
358
365
  ```
@@ -373,8 +380,8 @@ Pairwise Summary:
373
380
  Designate a baseline for CI regression gating, or compare two specific targets:
374
381
 
375
382
  ```bash
376
- agentv compare results.jsonl --baseline gpt-4.1 # exit 1 on regression
377
- agentv compare results.jsonl --baseline gpt-4.1 --candidate gpt-5-mini # pairwise
383
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1
384
+ agentv compare .agentv/results/raw/eval_<timestamp>/index.jsonl --baseline gpt-4.1 --candidate gpt-5-mini
378
385
  agentv compare before.jsonl after.jsonl # two-file pairwise
379
386
  ```
380
387
 
@@ -518,10 +525,7 @@ tests:
518
525
 
519
526
  Scoring: `(satisfied weights) / (total weights)` → verdicts: `pass` (≥0.8), `borderline` (≥0.6), `fail`
520
527
 
521
- Auto-generate rubrics from expected outcomes:
522
- ```bash
523
- agentv generate rubrics evals/my-eval.yaml
524
- ```
528
+ Author assertions directly in your eval file. When you want help choosing between simple assertions, deterministic graders, and LLM-based graders, use the `agentv-eval-writer` skill.
525
529
 
526
530
  See [rubric evaluator](https://agentv.dev/evaluation/rubrics/) for detailed patterns.
527
531
 
@@ -2,9 +2,8 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
2
2
  import {
3
3
  AgentvProvider
4
4
  } from "./chunk-BJV6MDBE.js";
5
- import "./chunk-UGXG73VF.js";
6
5
  import "./chunk-5H446C7X.js";
7
6
  export {
8
7
  AgentvProvider
9
8
  };
10
- //# sourceMappingURL=agentv-provider-MIDKLYIH-6LIYKQRP.js.map
9
+ //# sourceMappingURL=agentv-provider-NFFLXG5M-TJAWCWCX.js.map