vitest-evals 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +39 -35
  2. package/bin/vitest-evals.js +8 -0
  3. package/dist/cli.d.mts +13 -0
  4. package/dist/cli.d.ts +13 -0
  5. package/dist/cli.js +83 -0
  6. package/dist/cli.js.map +1 -0
  7. package/dist/cli.mjs +55 -0
  8. package/dist/cli.mjs.map +1 -0
  9. package/dist/harness.d.mts +19 -433
  10. package/dist/harness.d.ts +19 -433
  11. package/dist/harness.js +19 -51
  12. package/dist/harness.js.map +1 -1
  13. package/dist/harness.mjs +31 -49
  14. package/dist/harness.mjs.map +1 -1
  15. package/dist/index.d.mts +47 -68
  16. package/dist/index.d.ts +47 -68
  17. package/dist/index.js +46 -96
  18. package/dist/index.js.map +1 -1
  19. package/dist/index.mjs +58 -94
  20. package/dist/index.mjs.map +1 -1
  21. package/dist/internal/scoring.d.mts +1 -1
  22. package/dist/internal/scoring.d.ts +1 -1
  23. package/dist/internal/structuredOutputScorer.d.mts +1 -1
  24. package/dist/internal/structuredOutputScorer.d.ts +1 -1
  25. package/dist/internal/toolCallScorer.d.mts +1 -1
  26. package/dist/internal/toolCallScorer.d.ts +1 -1
  27. package/dist/internal/toolCallScorer.js +2 -0
  28. package/dist/internal/toolCallScorer.js.map +1 -1
  29. package/dist/internal/toolCallScorer.mjs +16 -0
  30. package/dist/internal/toolCallScorer.mjs.map +1 -1
  31. package/dist/judges/factualityJudge.d.mts +15 -13
  32. package/dist/judges/factualityJudge.d.ts +15 -13
  33. package/dist/judges/factualityJudge.js +13 -23
  34. package/dist/judges/factualityJudge.js.map +1 -1
  35. package/dist/judges/factualityJudge.mjs +27 -23
  36. package/dist/judges/factualityJudge.mjs.map +1 -1
  37. package/dist/judges/index.d.mts +1 -0
  38. package/dist/judges/index.d.ts +1 -0
  39. package/dist/judges/index.js +28 -47
  40. package/dist/judges/index.js.map +1 -1
  41. package/dist/judges/index.mjs +40 -45
  42. package/dist/judges/index.mjs.map +1 -1
  43. package/dist/judges/judgeHarness.d.mts +7 -10
  44. package/dist/judges/judgeHarness.d.ts +7 -10
  45. package/dist/judges/judgeHarness.js +13 -34
  46. package/dist/judges/judgeHarness.js.map +1 -1
  47. package/dist/judges/judgeHarness.mjs +25 -32
  48. package/dist/judges/judgeHarness.mjs.map +1 -1
  49. package/dist/judges/structuredOutputJudge.d.mts +7 -8
  50. package/dist/judges/structuredOutputJudge.d.ts +7 -8
  51. package/dist/judges/structuredOutputJudge.js +3 -3
  52. package/dist/judges/structuredOutputJudge.js.map +1 -1
  53. package/dist/judges/structuredOutputJudge.mjs +3 -3
  54. package/dist/judges/structuredOutputJudge.mjs.map +1 -1
  55. package/dist/judges/toolCallJudge.d.mts +12 -8
  56. package/dist/judges/toolCallJudge.d.ts +12 -8
  57. package/dist/judges/toolCallJudge.js +5 -3
  58. package/dist/judges/toolCallJudge.js.map +1 -1
  59. package/dist/judges/toolCallJudge.mjs +19 -3
  60. package/dist/judges/toolCallJudge.mjs.map +1 -1
  61. package/dist/judges/types.d.mts +14 -24
  62. package/dist/judges/types.d.ts +14 -24
  63. package/dist/judges/types.js.map +1 -1
  64. package/dist/legacy/scorers/index.js +2 -0
  65. package/dist/legacy/scorers/index.js.map +1 -1
  66. package/dist/legacy/scorers/index.mjs +16 -0
  67. package/dist/legacy/scorers/index.mjs.map +1 -1
  68. package/dist/legacy/scorers/toolCallScorer.js +2 -0
  69. package/dist/legacy/scorers/toolCallScorer.js.map +1 -1
  70. package/dist/legacy/scorers/toolCallScorer.mjs +16 -0
  71. package/dist/legacy/scorers/toolCallScorer.mjs.map +1 -1
  72. package/dist/legacy.js +7 -5
  73. package/dist/legacy.js.map +1 -1
  74. package/dist/legacy.mjs +21 -5
  75. package/dist/legacy.mjs.map +1 -1
  76. package/dist/replay.d.mts +1 -1
  77. package/dist/replay.d.ts +1 -1
  78. package/dist/reporter.js +4 -5
  79. package/dist/reporter.js.map +1 -1
  80. package/dist/reporter.mjs +18 -5
  81. package/dist/reporter.mjs.map +1 -1
  82. package/package.json +9 -1
package/README.md CHANGED
@@ -30,8 +30,8 @@ workflow.
30
30
 
31
31
  - `describeEval(...)` binds exactly one harness to a suite
32
32
  - the suite callback receives a fixture-backed Vitest `it`
33
- - `run(input, { metadata? })` executes the harness explicitly and returns a
34
- normalized `HarnessRun`
33
+ - `run(input)` executes the harness explicitly and returns a normalized
34
+ `HarnessRun`
35
35
  - the returned `result.output` is the app-facing value you assert on directly
36
36
  - the returned `result.session` is the canonical JSON-serializable transcript for
37
37
  reporting, replay, tool assertions, and judges
@@ -41,19 +41,18 @@ workflow.
41
41
  that do not return traces themselves. Span attributes include typed
42
42
  OpenTelemetry GenAI semantic keys while still allowing provider-specific
43
43
  metadata
44
- - scenario-specific judge criteria can live in `input`; use `metadata` for
45
- per-run expectations or harness configuration that are not part of the
46
- scenario payload
44
+ - scenario-specific judge criteria should live in `input` or explicit matcher
45
+ options, depending on whether the app or only the judge needs them
47
46
  - suite-level `judges` are optional and run automatically after each `run(...)`
48
47
  - suite-level `judgeThreshold` controls fail-on-score for those automatic judges
49
48
  - every judge is a named object with `assess(ctx)`
50
49
  - every judge receives `JudgeContext` with typed `input`, typed `output`, the
51
- normalized run/session, tool calls, and metadata; `output` is only optional
50
+ normalized run/session, and tool calls; `output` is only optional
52
51
  when the harness output type includes `undefined`
53
52
  - judges own their prompt, rubric, and parsing; LLM-backed judges use
54
53
  `ctx.runJudge(...)` from a configured `judgeHarness`
55
54
  - explicit judge assertions use
56
- `await expect(result).toSatisfyJudge(judge, context)`
55
+ `await expect(result).toSatisfyJudge(judge, options)`
57
56
 
58
57
  ## Explicit Run Example
59
58
 
@@ -80,18 +79,16 @@ describeEval(
80
79
  agent: () => createRefundAgent(),
81
80
  }),
82
81
  judgeHarness,
83
- judges: [FactualityJudge()],
82
+ judges: [
83
+ FactualityJudge({
84
+ expected: "The refund request is approved.",
85
+ }),
86
+ ],
84
87
  judgeThreshold: 0.6,
85
88
  },
86
89
  (it) => {
87
90
  it("approves a refundable invoice", async ({ run }) => {
88
- const result = await run("Refund invoice inv_123", {
89
- metadata: {
90
- expected: "The refund request is approved.",
91
- expectedStatus: "approved",
92
- expectedTools: ["lookupInvoice", "createRefund"],
93
- },
94
- });
91
+ const result = await run("Refund invoice inv_123");
95
92
 
96
93
  expect(result.output).toMatchObject({ status: "approved" });
97
94
  expect(toolCalls(result.session).map((call) => call.name)).toEqual([
@@ -121,13 +118,11 @@ describeEval("refund agent", { harness }, (it) => {
121
118
  input: "Refund invoice inv_404",
122
119
  expectedStatus: "denied",
123
120
  },
124
- ])("$name", async ({ input, ...metadata }, { run }) => {
125
- const result = await run(input, {
126
- metadata,
127
- });
121
+ ])("$name", async ({ input, expectedStatus }, { run }) => {
122
+ const result = await run(input);
128
123
 
129
124
  expect(result.output).toMatchObject({
130
- status: metadata.expectedStatus,
125
+ status: expectedStatus,
131
126
  });
132
127
  });
133
128
  });
@@ -144,6 +139,18 @@ compatibility.
144
139
 
145
140
  Full transcripts and spans are preserved in the Vitest JSON report metadata.
146
141
 
142
+ ## Local Report UI
143
+
144
+ The local report UI reads the same Vitest JSON artifacts and serves a React SPA
145
+ for drilling into runs, eval cases, harness output, sessions, tool calls,
146
+ scores, and trace spans.
147
+
148
+ ```sh
149
+ pnpm exec vitest-evals serve vitest-results.json
150
+ pnpm exec vitest-evals serve "eval-results/*.json"
151
+ pnpm exec vitest-evals serve eval-results/
152
+ ```
153
+
147
154
  ## GitHub Actions Reporting
148
155
 
149
156
  Use Vitest JSON as the eval report artifact. It preserves the `meta` field that
@@ -201,7 +208,7 @@ First-party harness packages are conveniences, not the only supported path. If
201
208
  you need to test a full application flow, use `createHarness(...)` to run your
202
209
  app through its normal entrypoint and return the app-facing output. Judges own
203
210
  their prompt/rubric text separately from the system under test.
204
- When generics are needed, use `createHarness<Input, Output, Metadata>(...)`.
211
+ When generics are needed, use `createHarness<Input, Output>(...)`.
205
212
 
206
213
  ```ts
207
214
  import {
@@ -209,7 +216,6 @@ import {
209
216
  createJudge,
210
217
  createJudgeHarness,
211
218
  describeEval,
212
- type JudgeContext,
213
219
  } from "vitest-evals";
214
220
 
215
221
  type AppEvent = {
@@ -226,14 +232,12 @@ type AppEvalInput = {
226
232
  };
227
233
  };
228
234
 
229
- type AppEvalMetadata = Record<string, never>;
230
-
231
235
  type AppOutput = {
232
236
  replies: Array<{ text: string }>;
233
237
  sideEffects: string[];
234
238
  };
235
239
 
236
- const appHarness = createHarness<AppEvalInput, AppOutput, AppEvalMetadata>({
240
+ const appHarness = createHarness<AppEvalInput, AppOutput>({
237
241
  name: "custom-app",
238
242
  run: async ({ input, signal }) => {
239
243
  const result = await replayAppEvents(input.events, {
@@ -259,9 +263,9 @@ const judgeHarness = createJudgeHarness({
259
263
  promptJudgeModel({ prompt, signal }),
260
264
  });
261
265
 
262
- const AppRubricJudge = createJudge(
266
+ const AppRubricJudge = createJudge<AppEvalInput, AppOutput>(
263
267
  "AppRubricJudge",
264
- async (ctx: JudgeContext<AppEvalInput, AppOutput, AppEvalMetadata>) => {
268
+ async (ctx) => {
265
269
  if (!ctx.runJudge) {
266
270
  throw new Error("AppRubricJudge requires a configured judgeHarness.");
267
271
  }
@@ -311,11 +315,11 @@ describeEval(
311
315
  Use `Harness.run(...)` for the application under test. Calling
312
316
  `ctx.harness.run(...)` from inside a judge runs the application a second time,
313
317
  so reserve that for judges that intentionally need a second execution. Put
314
- criteria on `input` when they are part of the scenario itself; use per-run
315
- `metadata` for harness configuration or expectations that are not part of the
316
- scenario payload. `createHarness(...)` builds a default user/assistant session
317
- from `input` and typed `output`; return a full `HarnessRun` only when you need
318
- exact session control.
318
+ criteria on `input` when they are part of the scenario itself; pass
319
+ case-specific judge criteria through matcher options, or configure suite-wide
320
+ criteria on the judge instance. `createHarness(...)` builds a default
321
+ user/assistant session from `input` and typed `output`; return a full
322
+ `HarnessRun` only when you need exact session control.
319
323
 
320
324
  Provider setup and rubric parsing stay in your judge. The core
321
325
  package only requires the judge to return a `JudgeResult` with a score and
@@ -437,7 +441,7 @@ so use that only when a second run is intentional.
437
441
 
438
442
  For an `EvalHarnessRun` returned by fixture `run(...)`,
439
443
  `toSatisfyJudge(...)` uses the run's typed `output` and reuses the registered
440
- input and metadata. It requires any custom judge params and rejects judges whose
444
+ input. It requires any custom judge params and rejects judges whose
441
445
  output type cannot assess the received value. Inside an eval test,
442
446
  matcher calls on registered output objects or session objects reuse that exact
443
447
  run context when the value can be registered by reference, so
@@ -445,10 +449,10 @@ run context when the value can be registered by reference, so
445
449
  outputs. Other raw values fall back to the current test's most recent
446
450
  `run(...)` context. For
447
451
  manually-created runs or values outside an eval context, pass any required
448
- `input`, `metadata`, or `harness` in matcher options. Structured or
452
+ `input` or `harness` in matcher options. Structured or
449
453
  programmatic result checks should usually assert on `result.output` directly.
450
454
  When a judge needs richer normalized context or the configured suite harness,
451
- type it with `JudgeContext`.
455
+ type it with `createJudge<Input, Output>(...)` or `JudgeContext<Input, Output>`.
452
456
 
453
457
  When you only need deterministic contract checks, built-ins such as
454
458
  `StructuredOutputJudge()` and `ToolCallJudge()` are still available.
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env node
2
+
3
+ import("../dist/cli.mjs")
4
+ .then(({ runVitestEvalsCli }) => runVitestEvalsCli(process.argv.slice(2)))
5
+ .catch((error) => {
6
+ console.error(error instanceof Error ? error.message : String(error));
7
+ process.exitCode = 1;
8
+ });
package/dist/cli.d.mts ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env node
2
+ /** Output streams used by the `vitest-evals` CLI runner. */
3
+ type VitestEvalsCliIo = {
4
+ stdout?: Pick<NodeJS.WriteStream, "write">;
5
+ };
6
+ /** Options for running the `vitest-evals` CLI. */
7
+ type RunVitestEvalsCliOptions = VitestEvalsCliIo & {
8
+ cwd?: string;
9
+ };
10
+ /** Runs the product-facing `vitest-evals` CLI. */
11
+ declare function runVitestEvalsCli(args?: string[], options?: RunVitestEvalsCliOptions): Promise<void>;
12
+
13
+ export { type RunVitestEvalsCliOptions, type VitestEvalsCliIo, runVitestEvalsCli };
package/dist/cli.d.ts ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env node
2
+ /** Output streams used by the `vitest-evals` CLI runner. */
3
+ type VitestEvalsCliIo = {
4
+ stdout?: Pick<NodeJS.WriteStream, "write">;
5
+ };
6
+ /** Options for running the `vitest-evals` CLI. */
7
+ type RunVitestEvalsCliOptions = VitestEvalsCliIo & {
8
+ cwd?: string;
9
+ };
10
+ /** Runs the product-facing `vitest-evals` CLI. */
11
+ declare function runVitestEvalsCli(args?: string[], options?: RunVitestEvalsCliOptions): Promise<void>;
12
+
13
+ export { type RunVitestEvalsCliOptions, type VitestEvalsCliIo, runVitestEvalsCli };
package/dist/cli.js ADDED
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ var __create = Object.create;
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
6
+ var __getOwnPropNames = Object.getOwnPropertyNames;
7
+ var __getProtoOf = Object.getPrototypeOf;
8
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
9
+ var __export = (target, all) => {
10
+ for (var name in all)
11
+ __defProp(target, name, { get: all[name], enumerable: true });
12
+ };
13
+ var __copyProps = (to, from, except, desc) => {
14
+ if (from && typeof from === "object" || typeof from === "function") {
15
+ for (let key of __getOwnPropNames(from))
16
+ if (!__hasOwnProp.call(to, key) && key !== except)
17
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
18
+ }
19
+ return to;
20
+ };
21
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
22
+ // If the importer is in node compatibility mode or this is not an ESM
23
+ // file that has been converted to a CommonJS file using a Babel-
24
+ // compatible transform (i.e. "__esModule" has not been set), then set
25
+ // "default" to the CommonJS "module.exports" for node compatibility.
26
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
27
+ mod
28
+ ));
29
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
30
+
31
+ // src/cli.ts
32
+ var cli_exports = {};
33
+ __export(cli_exports, {
34
+ runVitestEvalsCli: () => runVitestEvalsCli
35
+ });
36
+ module.exports = __toCommonJS(cli_exports);
37
+ async function runVitestEvalsCli(args = process.argv.slice(2), options = {}) {
38
+ const [command, ...commandArgs] = args;
39
+ if (!command || command === "help" || command === "--help" || command === "-h") {
40
+ writeLine(options.stdout, usage());
41
+ return;
42
+ }
43
+ switch (command) {
44
+ case "serve": {
45
+ const { runReportUiCli } = await import("@vitest-evals/report-ui");
46
+ await runReportUiCli(commandArgs, {
47
+ commandName: "vitest-evals serve",
48
+ cwd: options.cwd,
49
+ stdout: options.stdout
50
+ });
51
+ return;
52
+ }
53
+ default:
54
+ throw new Error(`Unknown command: ${command}
55
+
56
+ ${usage()}`);
57
+ }
58
+ }
59
+ function usage() {
60
+ return [
61
+ "Usage: vitest-evals <command>",
62
+ "",
63
+ "Commands:",
64
+ " serve [json | dir | glob] Serve the local report UI",
65
+ "",
66
+ "Run `vitest-evals serve --help` for report UI options."
67
+ ].join("\n");
68
+ }
69
+ function writeLine(stdout, message) {
70
+ (stdout ?? process.stdout).write(`${message}
71
+ `);
72
+ }
73
+ if (typeof require !== "undefined" && typeof module !== "undefined" && require.main === module) {
74
+ runVitestEvalsCli().catch((error) => {
75
+ console.error(error instanceof Error ? error.message : String(error));
76
+ process.exitCode = 1;
77
+ });
78
+ }
79
+ // Annotate the CommonJS export names for ESM import in node:
80
+ 0 && (module.exports = {
81
+ runVitestEvalsCli
82
+ });
83
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/** Output streams used by the `vitest-evals` CLI runner. */\nexport type VitestEvalsCliIo = {\n stdout?: Pick<NodeJS.WriteStream, \"write\">;\n};\n\n/** Options for running the `vitest-evals` CLI. */\nexport type RunVitestEvalsCliOptions = VitestEvalsCliIo & {\n cwd?: string;\n};\n\n/** Runs the product-facing `vitest-evals` CLI. */\nexport async function runVitestEvalsCli(\n args = process.argv.slice(2),\n options: RunVitestEvalsCliOptions = {},\n) {\n const [command, ...commandArgs] = args;\n\n if (\n !command ||\n command === \"help\" ||\n command === \"--help\" ||\n command === \"-h\"\n ) {\n writeLine(options.stdout, usage());\n return;\n }\n\n switch (command) {\n case \"serve\": {\n const { runReportUiCli } = await import(\"@vitest-evals/report-ui\");\n await runReportUiCli(commandArgs, {\n commandName: \"vitest-evals serve\",\n cwd: options.cwd,\n stdout: options.stdout,\n });\n return;\n }\n default:\n throw new Error(`Unknown command: ${command}\\n\\n${usage()}`);\n }\n}\n\nfunction usage() {\n return [\n \"Usage: vitest-evals <command>\",\n \"\",\n \"Commands:\",\n \" serve [json | dir | glob] Serve the local report UI\",\n \"\",\n \"Run `vitest-evals serve --help` for report UI options.\",\n ].join(\"\\n\");\n}\n\nfunction writeLine(\n stdout: Pick<NodeJS.WriteStream, \"write\"> | undefined,\n message: string,\n) {\n (stdout ?? process.stdout).write(`${message}\\n`);\n}\n\ndeclare const require: NodeJS.Require | undefined;\ndeclare const module: NodeJS.Module | undefined;\n\nif (\n typeof require !== \"undefined\" &&\n typeof module !== \"undefined\" &&\n require.main === module\n) {\n runVitestEvalsCli().catch((error) => {\n console.error(error instanceof Error ? error.message : String(error));\n process.exitCode = 1;\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAaA,eAAsB,kBACpB,OAAO,QAAQ,KAAK,MAAM,CAAC,GAC3B,UAAoC,CAAC,GACrC;AACA,QAAM,CAAC,SAAS,GAAG,WAAW,IAAI;AAElC,MACE,CAAC,WACD,YAAY,UACZ,YAAY,YACZ,YAAY,MACZ;AACA,cAAU,QAAQ,QAAQ,MAAM,CAAC;AACjC;AAAA,EACF;AAEA,UAAQ,SAAS;AAAA,IACf,KAAK,SAAS;AACZ,YAAM,EAAE,eAAe,IAAI,MAAM,OAAO,yBAAyB;AACjE,YAAM,eAAe,aAAa;AAAA,QAChC,aAAa;AAAA,QACb,KAAK,QAAQ;AAAA,QACb,QAAQ,QAAQ;AAAA,MAClB,CAAC;AACD;AAAA,IACF;AAAA,IACA;AACE,YAAM,IAAI,MAAM,oBAAoB,OAAO;AAAA;AAAA,EAAO,MAAM,CAAC,EAAE;AAAA,EAC/D;AACF;AAEA,SAAS,QAAQ;AACf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,UACP,QACA,SACA;AACA,GAAC,UAAU,QAAQ,QAAQ,MAAM,GAAG,OAAO;AAAA,CAAI;AACjD;AAKA,IACE,OAAO,YAAY,eACnB,OAAO,WAAW,eAClB,QAAQ,SAAS,QACjB;AACA,oBAAkB,EAAE,MAAM,CAAC,UAAU;AACnC,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,WAAW;AAAA,EACrB,CAAC;AACH;","names":[]}
package/dist/cli.mjs ADDED
@@ -0,0 +1,55 @@
1
+ #!/usr/bin/env node
2
+ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
3
+ get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
4
+ }) : x)(function(x) {
5
+ if (typeof require !== "undefined") return require.apply(this, arguments);
6
+ throw Error('Dynamic require of "' + x + '" is not supported');
7
+ });
8
+
9
+ // src/cli.ts
10
+ async function runVitestEvalsCli(args = process.argv.slice(2), options = {}) {
11
+ const [command, ...commandArgs] = args;
12
+ if (!command || command === "help" || command === "--help" || command === "-h") {
13
+ writeLine(options.stdout, usage());
14
+ return;
15
+ }
16
+ switch (command) {
17
+ case "serve": {
18
+ const { runReportUiCli } = await import("@vitest-evals/report-ui");
19
+ await runReportUiCli(commandArgs, {
20
+ commandName: "vitest-evals serve",
21
+ cwd: options.cwd,
22
+ stdout: options.stdout
23
+ });
24
+ return;
25
+ }
26
+ default:
27
+ throw new Error(`Unknown command: ${command}
28
+
29
+ ${usage()}`);
30
+ }
31
+ }
32
+ function usage() {
33
+ return [
34
+ "Usage: vitest-evals <command>",
35
+ "",
36
+ "Commands:",
37
+ " serve [json | dir | glob] Serve the local report UI",
38
+ "",
39
+ "Run `vitest-evals serve --help` for report UI options."
40
+ ].join("\n");
41
+ }
42
+ function writeLine(stdout, message) {
43
+ (stdout ?? process.stdout).write(`${message}
44
+ `);
45
+ }
46
+ if (typeof __require !== "undefined" && typeof module !== "undefined" && __require.main === module) {
47
+ runVitestEvalsCli().catch((error) => {
48
+ console.error(error instanceof Error ? error.message : String(error));
49
+ process.exitCode = 1;
50
+ });
51
+ }
52
+ export {
53
+ runVitestEvalsCli
54
+ };
55
+ //# sourceMappingURL=cli.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/** Output streams used by the `vitest-evals` CLI runner. */\nexport type VitestEvalsCliIo = {\n stdout?: Pick<NodeJS.WriteStream, \"write\">;\n};\n\n/** Options for running the `vitest-evals` CLI. */\nexport type RunVitestEvalsCliOptions = VitestEvalsCliIo & {\n cwd?: string;\n};\n\n/** Runs the product-facing `vitest-evals` CLI. */\nexport async function runVitestEvalsCli(\n args = process.argv.slice(2),\n options: RunVitestEvalsCliOptions = {},\n) {\n const [command, ...commandArgs] = args;\n\n if (\n !command ||\n command === \"help\" ||\n command === \"--help\" ||\n command === \"-h\"\n ) {\n writeLine(options.stdout, usage());\n return;\n }\n\n switch (command) {\n case \"serve\": {\n const { runReportUiCli } = await import(\"@vitest-evals/report-ui\");\n await runReportUiCli(commandArgs, {\n commandName: \"vitest-evals serve\",\n cwd: options.cwd,\n stdout: options.stdout,\n });\n return;\n }\n default:\n throw new Error(`Unknown command: ${command}\\n\\n${usage()}`);\n }\n}\n\nfunction usage() {\n return [\n \"Usage: vitest-evals <command>\",\n \"\",\n \"Commands:\",\n \" serve [json | dir | glob] Serve the local report UI\",\n \"\",\n \"Run `vitest-evals serve --help` for report UI options.\",\n ].join(\"\\n\");\n}\n\nfunction writeLine(\n stdout: Pick<NodeJS.WriteStream, \"write\"> | undefined,\n message: string,\n) {\n (stdout ?? process.stdout).write(`${message}\\n`);\n}\n\ndeclare const require: NodeJS.Require | undefined;\ndeclare const module: NodeJS.Module | undefined;\n\nif (\n typeof require !== \"undefined\" &&\n typeof module !== \"undefined\" &&\n require.main === module\n) {\n runVitestEvalsCli().catch((error) => {\n console.error(error instanceof Error ? error.message : String(error));\n process.exitCode = 1;\n });\n}\n"],"mappings":";;;;;;;;;AAaA,eAAsB,kBACpB,OAAO,QAAQ,KAAK,MAAM,CAAC,GAC3B,UAAoC,CAAC,GACrC;AACA,QAAM,CAAC,SAAS,GAAG,WAAW,IAAI;AAElC,MACE,CAAC,WACD,YAAY,UACZ,YAAY,YACZ,YAAY,MACZ;AACA,cAAU,QAAQ,QAAQ,MAAM,CAAC;AACjC;AAAA,EACF;AAEA,UAAQ,SAAS;AAAA,IACf,KAAK,SAAS;AACZ,YAAM,EAAE,eAAe,IAAI,MAAM,OAAO,yBAAyB;AACjE,YAAM,eAAe,aAAa;AAAA,QAChC,aAAa;AAAA,QACb,KAAK,QAAQ;AAAA,QACb,QAAQ,QAAQ;AAAA,MAClB,CAAC;AACD;AAAA,IACF;AAAA,IACA;AACE,YAAM,IAAI,MAAM,oBAAoB,OAAO;AAAA;AAAA,EAAO,MAAM,CAAC,EAAE;AAAA,EAC/D;AACF;AAEA,SAAS,QAAQ;AACf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,UACP,QACA,SACA;AACA,GAAC,UAAU,QAAQ,QAAQ,MAAM,GAAG,OAAO;AAAA,CAAI;AACjD;AAKA,IACE,OAAO,cAAY,eACnB,OAAO,WAAW,eAClB,UAAQ,SAAS,QACjB;AACA,oBAAkB,EAAE,MAAM,CAAC,UAAU;AACnC,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,WAAW;AAAA,EACrB,CAAC;AACH;","names":[]}