vitest-evals 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -35
- package/bin/vitest-evals.js +8 -0
- package/dist/cli.d.mts +13 -0
- package/dist/cli.d.ts +13 -0
- package/dist/cli.js +83 -0
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +55 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/harness.d.mts +19 -433
- package/dist/harness.d.ts +19 -433
- package/dist/harness.js +19 -51
- package/dist/harness.js.map +1 -1
- package/dist/harness.mjs +31 -49
- package/dist/harness.mjs.map +1 -1
- package/dist/index.d.mts +47 -68
- package/dist/index.d.ts +47 -68
- package/dist/index.js +46 -96
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +58 -94
- package/dist/index.mjs.map +1 -1
- package/dist/internal/scoring.d.mts +1 -1
- package/dist/internal/scoring.d.ts +1 -1
- package/dist/internal/structuredOutputScorer.d.mts +1 -1
- package/dist/internal/structuredOutputScorer.d.ts +1 -1
- package/dist/internal/toolCallScorer.d.mts +1 -1
- package/dist/internal/toolCallScorer.d.ts +1 -1
- package/dist/internal/toolCallScorer.js +2 -0
- package/dist/internal/toolCallScorer.js.map +1 -1
- package/dist/internal/toolCallScorer.mjs +16 -0
- package/dist/internal/toolCallScorer.mjs.map +1 -1
- package/dist/judges/factualityJudge.d.mts +15 -13
- package/dist/judges/factualityJudge.d.ts +15 -13
- package/dist/judges/factualityJudge.js +13 -23
- package/dist/judges/factualityJudge.js.map +1 -1
- package/dist/judges/factualityJudge.mjs +27 -23
- package/dist/judges/factualityJudge.mjs.map +1 -1
- package/dist/judges/index.d.mts +1 -0
- package/dist/judges/index.d.ts +1 -0
- package/dist/judges/index.js +28 -47
- package/dist/judges/index.js.map +1 -1
- package/dist/judges/index.mjs +40 -45
- package/dist/judges/index.mjs.map +1 -1
- package/dist/judges/judgeHarness.d.mts +7 -10
- package/dist/judges/judgeHarness.d.ts +7 -10
- package/dist/judges/judgeHarness.js +13 -34
- package/dist/judges/judgeHarness.js.map +1 -1
- package/dist/judges/judgeHarness.mjs +25 -32
- package/dist/judges/judgeHarness.mjs.map +1 -1
- package/dist/judges/structuredOutputJudge.d.mts +7 -8
- package/dist/judges/structuredOutputJudge.d.ts +7 -8
- package/dist/judges/structuredOutputJudge.js +3 -3
- package/dist/judges/structuredOutputJudge.js.map +1 -1
- package/dist/judges/structuredOutputJudge.mjs +3 -3
- package/dist/judges/structuredOutputJudge.mjs.map +1 -1
- package/dist/judges/toolCallJudge.d.mts +12 -8
- package/dist/judges/toolCallJudge.d.ts +12 -8
- package/dist/judges/toolCallJudge.js +5 -3
- package/dist/judges/toolCallJudge.js.map +1 -1
- package/dist/judges/toolCallJudge.mjs +19 -3
- package/dist/judges/toolCallJudge.mjs.map +1 -1
- package/dist/judges/types.d.mts +14 -24
- package/dist/judges/types.d.ts +14 -24
- package/dist/judges/types.js.map +1 -1
- package/dist/legacy/scorers/index.js +2 -0
- package/dist/legacy/scorers/index.js.map +1 -1
- package/dist/legacy/scorers/index.mjs +16 -0
- package/dist/legacy/scorers/index.mjs.map +1 -1
- package/dist/legacy/scorers/toolCallScorer.js +2 -0
- package/dist/legacy/scorers/toolCallScorer.js.map +1 -1
- package/dist/legacy/scorers/toolCallScorer.mjs +16 -0
- package/dist/legacy/scorers/toolCallScorer.mjs.map +1 -1
- package/dist/legacy.js +7 -5
- package/dist/legacy.js.map +1 -1
- package/dist/legacy.mjs +21 -5
- package/dist/legacy.mjs.map +1 -1
- package/dist/replay.d.mts +1 -1
- package/dist/replay.d.ts +1 -1
- package/dist/reporter.js +4 -5
- package/dist/reporter.js.map +1 -1
- package/dist/reporter.mjs +18 -5
- package/dist/reporter.mjs.map +1 -1
- package/package.json +9 -1
package/README.md
CHANGED
|
@@ -30,8 +30,8 @@ workflow.
|
|
|
30
30
|
|
|
31
31
|
- `describeEval(...)` binds exactly one harness to a suite
|
|
32
32
|
- the suite callback receives a fixture-backed Vitest `it`
|
|
33
|
-
- `run(input
|
|
34
|
-
|
|
33
|
+
- `run(input)` executes the harness explicitly and returns a normalized
|
|
34
|
+
`HarnessRun`
|
|
35
35
|
- the returned `result.output` is the app-facing value you assert on directly
|
|
36
36
|
- the returned `result.session` is the canonical JSON-serializable transcript for
|
|
37
37
|
reporting, replay, tool assertions, and judges
|
|
@@ -41,19 +41,18 @@ workflow.
|
|
|
41
41
|
that do not return traces themselves. Span attributes include typed
|
|
42
42
|
OpenTelemetry GenAI semantic keys while still allowing provider-specific
|
|
43
43
|
metadata
|
|
44
|
-
- scenario-specific judge criteria
|
|
45
|
-
|
|
46
|
-
scenario payload
|
|
44
|
+
- scenario-specific judge criteria should live in `input` or explicit matcher
|
|
45
|
+
options, depending on whether the app or only the judge needs them
|
|
47
46
|
- suite-level `judges` are optional and run automatically after each `run(...)`
|
|
48
47
|
- suite-level `judgeThreshold` controls fail-on-score for those automatic judges
|
|
49
48
|
- every judge is a named object with `assess(ctx)`
|
|
50
49
|
- every judge receives `JudgeContext` with typed `input`, typed `output`, the
|
|
51
|
-
normalized run/session, tool calls
|
|
50
|
+
normalized run/session, and tool calls; `output` is only optional
|
|
52
51
|
when the harness output type includes `undefined`
|
|
53
52
|
- judges own their prompt, rubric, and parsing; LLM-backed judges use
|
|
54
53
|
`ctx.runJudge(...)` from a configured `judgeHarness`
|
|
55
54
|
- explicit judge assertions use
|
|
56
|
-
`await expect(result).toSatisfyJudge(judge,
|
|
55
|
+
`await expect(result).toSatisfyJudge(judge, options)`
|
|
57
56
|
|
|
58
57
|
## Explicit Run Example
|
|
59
58
|
|
|
@@ -80,18 +79,16 @@ describeEval(
|
|
|
80
79
|
agent: () => createRefundAgent(),
|
|
81
80
|
}),
|
|
82
81
|
judgeHarness,
|
|
83
|
-
judges: [
|
|
82
|
+
judges: [
|
|
83
|
+
FactualityJudge({
|
|
84
|
+
expected: "The refund request is approved.",
|
|
85
|
+
}),
|
|
86
|
+
],
|
|
84
87
|
judgeThreshold: 0.6,
|
|
85
88
|
},
|
|
86
89
|
(it) => {
|
|
87
90
|
it("approves a refundable invoice", async ({ run }) => {
|
|
88
|
-
const result = await run("Refund invoice inv_123"
|
|
89
|
-
metadata: {
|
|
90
|
-
expected: "The refund request is approved.",
|
|
91
|
-
expectedStatus: "approved",
|
|
92
|
-
expectedTools: ["lookupInvoice", "createRefund"],
|
|
93
|
-
},
|
|
94
|
-
});
|
|
91
|
+
const result = await run("Refund invoice inv_123");
|
|
95
92
|
|
|
96
93
|
expect(result.output).toMatchObject({ status: "approved" });
|
|
97
94
|
expect(toolCalls(result.session).map((call) => call.name)).toEqual([
|
|
@@ -121,13 +118,11 @@ describeEval("refund agent", { harness }, (it) => {
|
|
|
121
118
|
input: "Refund invoice inv_404",
|
|
122
119
|
expectedStatus: "denied",
|
|
123
120
|
},
|
|
124
|
-
])("$name", async ({ input,
|
|
125
|
-
const result = await run(input
|
|
126
|
-
metadata,
|
|
127
|
-
});
|
|
121
|
+
])("$name", async ({ input, expectedStatus }, { run }) => {
|
|
122
|
+
const result = await run(input);
|
|
128
123
|
|
|
129
124
|
expect(result.output).toMatchObject({
|
|
130
|
-
status:
|
|
125
|
+
status: expectedStatus,
|
|
131
126
|
});
|
|
132
127
|
});
|
|
133
128
|
});
|
|
@@ -144,6 +139,18 @@ compatibility.
|
|
|
144
139
|
|
|
145
140
|
Full transcripts and spans are preserved in the Vitest JSON report metadata.
|
|
146
141
|
|
|
142
|
+
## Local Report UI
|
|
143
|
+
|
|
144
|
+
The local report UI reads the same Vitest JSON artifacts and serves a React SPA
|
|
145
|
+
for drilling into runs, eval cases, harness output, sessions, tool calls,
|
|
146
|
+
scores, and trace spans.
|
|
147
|
+
|
|
148
|
+
```sh
|
|
149
|
+
pnpm exec vitest-evals serve vitest-results.json
|
|
150
|
+
pnpm exec vitest-evals serve "eval-results/*.json"
|
|
151
|
+
pnpm exec vitest-evals serve eval-results/
|
|
152
|
+
```
|
|
153
|
+
|
|
147
154
|
## GitHub Actions Reporting
|
|
148
155
|
|
|
149
156
|
Use Vitest JSON as the eval report artifact. It preserves the `meta` field that
|
|
@@ -201,7 +208,7 @@ First-party harness packages are conveniences, not the only supported path. If
|
|
|
201
208
|
you need to test a full application flow, use `createHarness(...)` to run your
|
|
202
209
|
app through its normal entrypoint and return the app-facing output. Judges own
|
|
203
210
|
their prompt/rubric text separately from the system under test.
|
|
204
|
-
When generics are needed, use `createHarness<Input, Output
|
|
211
|
+
When generics are needed, use `createHarness<Input, Output>(...)`.
|
|
205
212
|
|
|
206
213
|
```ts
|
|
207
214
|
import {
|
|
@@ -209,7 +216,6 @@ import {
|
|
|
209
216
|
createJudge,
|
|
210
217
|
createJudgeHarness,
|
|
211
218
|
describeEval,
|
|
212
|
-
type JudgeContext,
|
|
213
219
|
} from "vitest-evals";
|
|
214
220
|
|
|
215
221
|
type AppEvent = {
|
|
@@ -226,14 +232,12 @@ type AppEvalInput = {
|
|
|
226
232
|
};
|
|
227
233
|
};
|
|
228
234
|
|
|
229
|
-
type AppEvalMetadata = Record<string, never>;
|
|
230
|
-
|
|
231
235
|
type AppOutput = {
|
|
232
236
|
replies: Array<{ text: string }>;
|
|
233
237
|
sideEffects: string[];
|
|
234
238
|
};
|
|
235
239
|
|
|
236
|
-
const appHarness = createHarness<AppEvalInput, AppOutput
|
|
240
|
+
const appHarness = createHarness<AppEvalInput, AppOutput>({
|
|
237
241
|
name: "custom-app",
|
|
238
242
|
run: async ({ input, signal }) => {
|
|
239
243
|
const result = await replayAppEvents(input.events, {
|
|
@@ -259,9 +263,9 @@ const judgeHarness = createJudgeHarness({
|
|
|
259
263
|
promptJudgeModel({ prompt, signal }),
|
|
260
264
|
});
|
|
261
265
|
|
|
262
|
-
const AppRubricJudge = createJudge(
|
|
266
|
+
const AppRubricJudge = createJudge<AppEvalInput, AppOutput>(
|
|
263
267
|
"AppRubricJudge",
|
|
264
|
-
async (ctx
|
|
268
|
+
async (ctx) => {
|
|
265
269
|
if (!ctx.runJudge) {
|
|
266
270
|
throw new Error("AppRubricJudge requires a configured judgeHarness.");
|
|
267
271
|
}
|
|
@@ -311,11 +315,11 @@ describeEval(
|
|
|
311
315
|
Use `Harness.run(...)` for the application under test. Calling
|
|
312
316
|
`ctx.harness.run(...)` from inside a judge runs the application a second time,
|
|
313
317
|
so reserve that for judges that intentionally need a second execution. Put
|
|
314
|
-
criteria on `input` when they are part of the scenario itself;
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
from `input` and typed `output`; return a full
|
|
318
|
-
exact session control.
|
|
318
|
+
criteria on `input` when they are part of the scenario itself; pass
|
|
319
|
+
case-specific judge criteria through matcher options, or configure suite-wide
|
|
320
|
+
criteria on the judge instance. `createHarness(...)` builds a default
|
|
321
|
+
user/assistant session from `input` and typed `output`; return a full
|
|
322
|
+
`HarnessRun` only when you need exact session control.
|
|
319
323
|
|
|
320
324
|
Provider setup and rubric parsing stay in your judge. The core
|
|
321
325
|
package only requires the judge to return a `JudgeResult` with a score and
|
|
@@ -437,7 +441,7 @@ so use that only when a second run is intentional.
|
|
|
437
441
|
|
|
438
442
|
For an `EvalHarnessRun` returned by fixture `run(...)`,
|
|
439
443
|
`toSatisfyJudge(...)` uses the run's typed `output` and reuses the registered
|
|
440
|
-
input
|
|
444
|
+
input. It requires any custom judge params and rejects judges whose
|
|
441
445
|
output type cannot assess the received value. Inside an eval test,
|
|
442
446
|
matcher calls on registered output objects or session objects reuse that exact
|
|
443
447
|
run context when the value can be registered by reference, so
|
|
@@ -445,10 +449,10 @@ run context when the value can be registered by reference, so
|
|
|
445
449
|
outputs. Other raw values fall back to the current test's most recent
|
|
446
450
|
`run(...)` context. For
|
|
447
451
|
manually-created runs or values outside an eval context, pass any required
|
|
448
|
-
`input
|
|
452
|
+
`input` or `harness` in matcher options. Structured or
|
|
449
453
|
programmatic result checks should usually assert on `result.output` directly.
|
|
450
454
|
When a judge needs richer normalized context or the configured suite harness,
|
|
451
|
-
type it with `JudgeContext
|
|
455
|
+
type it with `createJudge<Input, Output>(...)` or `JudgeContext<Input, Output>`.
|
|
452
456
|
|
|
453
457
|
When you only need deterministic contract checks, built-ins such as
|
|
454
458
|
`StructuredOutputJudge()` and `ToolCallJudge()` are still available.
|
package/dist/cli.d.mts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/** Output streams used by the `vitest-evals` CLI runner. */
|
|
3
|
+
type VitestEvalsCliIo = {
|
|
4
|
+
stdout?: Pick<NodeJS.WriteStream, "write">;
|
|
5
|
+
};
|
|
6
|
+
/** Options for running the `vitest-evals` CLI. */
|
|
7
|
+
type RunVitestEvalsCliOptions = VitestEvalsCliIo & {
|
|
8
|
+
cwd?: string;
|
|
9
|
+
};
|
|
10
|
+
/** Runs the product-facing `vitest-evals` CLI. */
|
|
11
|
+
declare function runVitestEvalsCli(args?: string[], options?: RunVitestEvalsCliOptions): Promise<void>;
|
|
12
|
+
|
|
13
|
+
export { type RunVitestEvalsCliOptions, type VitestEvalsCliIo, runVitestEvalsCli };
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/** Output streams used by the `vitest-evals` CLI runner. */
|
|
3
|
+
type VitestEvalsCliIo = {
|
|
4
|
+
stdout?: Pick<NodeJS.WriteStream, "write">;
|
|
5
|
+
};
|
|
6
|
+
/** Options for running the `vitest-evals` CLI. */
|
|
7
|
+
type RunVitestEvalsCliOptions = VitestEvalsCliIo & {
|
|
8
|
+
cwd?: string;
|
|
9
|
+
};
|
|
10
|
+
/** Runs the product-facing `vitest-evals` CLI. */
|
|
11
|
+
declare function runVitestEvalsCli(args?: string[], options?: RunVitestEvalsCliOptions): Promise<void>;
|
|
12
|
+
|
|
13
|
+
export { type RunVitestEvalsCliOptions, type VitestEvalsCliIo, runVitestEvalsCli };
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
var __create = Object.create;
|
|
4
|
+
var __defProp = Object.defineProperty;
|
|
5
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
6
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
8
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
+
var __export = (target, all) => {
|
|
10
|
+
for (var name in all)
|
|
11
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
12
|
+
};
|
|
13
|
+
var __copyProps = (to, from, except, desc) => {
|
|
14
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
15
|
+
for (let key of __getOwnPropNames(from))
|
|
16
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
17
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
18
|
+
}
|
|
19
|
+
return to;
|
|
20
|
+
};
|
|
21
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
22
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
23
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
24
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
25
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
26
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
27
|
+
mod
|
|
28
|
+
));
|
|
29
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
30
|
+
|
|
31
|
+
// src/cli.ts
|
|
32
|
+
var cli_exports = {};
|
|
33
|
+
__export(cli_exports, {
|
|
34
|
+
runVitestEvalsCli: () => runVitestEvalsCli
|
|
35
|
+
});
|
|
36
|
+
module.exports = __toCommonJS(cli_exports);
|
|
37
|
+
async function runVitestEvalsCli(args = process.argv.slice(2), options = {}) {
|
|
38
|
+
const [command, ...commandArgs] = args;
|
|
39
|
+
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
40
|
+
writeLine(options.stdout, usage());
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
switch (command) {
|
|
44
|
+
case "serve": {
|
|
45
|
+
const { runReportUiCli } = await import("@vitest-evals/report-ui");
|
|
46
|
+
await runReportUiCli(commandArgs, {
|
|
47
|
+
commandName: "vitest-evals serve",
|
|
48
|
+
cwd: options.cwd,
|
|
49
|
+
stdout: options.stdout
|
|
50
|
+
});
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
default:
|
|
54
|
+
throw new Error(`Unknown command: ${command}
|
|
55
|
+
|
|
56
|
+
${usage()}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
function usage() {
|
|
60
|
+
return [
|
|
61
|
+
"Usage: vitest-evals <command>",
|
|
62
|
+
"",
|
|
63
|
+
"Commands:",
|
|
64
|
+
" serve [json | dir | glob] Serve the local report UI",
|
|
65
|
+
"",
|
|
66
|
+
"Run `vitest-evals serve --help` for report UI options."
|
|
67
|
+
].join("\n");
|
|
68
|
+
}
|
|
69
|
+
function writeLine(stdout, message) {
|
|
70
|
+
(stdout ?? process.stdout).write(`${message}
|
|
71
|
+
`);
|
|
72
|
+
}
|
|
73
|
+
if (typeof require !== "undefined" && typeof module !== "undefined" && require.main === module) {
|
|
74
|
+
runVitestEvalsCli().catch((error) => {
|
|
75
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
76
|
+
process.exitCode = 1;
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
80
|
+
0 && (module.exports = {
|
|
81
|
+
runVitestEvalsCli
|
|
82
|
+
});
|
|
83
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/** Output streams used by the `vitest-evals` CLI runner. */\nexport type VitestEvalsCliIo = {\n stdout?: Pick<NodeJS.WriteStream, \"write\">;\n};\n\n/** Options for running the `vitest-evals` CLI. */\nexport type RunVitestEvalsCliOptions = VitestEvalsCliIo & {\n cwd?: string;\n};\n\n/** Runs the product-facing `vitest-evals` CLI. */\nexport async function runVitestEvalsCli(\n args = process.argv.slice(2),\n options: RunVitestEvalsCliOptions = {},\n) {\n const [command, ...commandArgs] = args;\n\n if (\n !command ||\n command === \"help\" ||\n command === \"--help\" ||\n command === \"-h\"\n ) {\n writeLine(options.stdout, usage());\n return;\n }\n\n switch (command) {\n case \"serve\": {\n const { runReportUiCli } = await import(\"@vitest-evals/report-ui\");\n await runReportUiCli(commandArgs, {\n commandName: \"vitest-evals serve\",\n cwd: options.cwd,\n stdout: options.stdout,\n });\n return;\n }\n default:\n throw new Error(`Unknown command: ${command}\\n\\n${usage()}`);\n }\n}\n\nfunction usage() {\n return [\n \"Usage: vitest-evals <command>\",\n \"\",\n \"Commands:\",\n \" serve [json | dir | glob] Serve the local report UI\",\n \"\",\n \"Run `vitest-evals serve --help` for report UI options.\",\n ].join(\"\\n\");\n}\n\nfunction writeLine(\n stdout: Pick<NodeJS.WriteStream, \"write\"> | undefined,\n message: string,\n) {\n (stdout ?? process.stdout).write(`${message}\\n`);\n}\n\ndeclare const require: NodeJS.Require | undefined;\ndeclare const module: NodeJS.Module | undefined;\n\nif (\n typeof require !== \"undefined\" &&\n typeof module !== \"undefined\" &&\n require.main === module\n) {\n runVitestEvalsCli().catch((error) => {\n console.error(error instanceof Error ? error.message : String(error));\n process.exitCode = 1;\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAaA,eAAsB,kBACpB,OAAO,QAAQ,KAAK,MAAM,CAAC,GAC3B,UAAoC,CAAC,GACrC;AACA,QAAM,CAAC,SAAS,GAAG,WAAW,IAAI;AAElC,MACE,CAAC,WACD,YAAY,UACZ,YAAY,YACZ,YAAY,MACZ;AACA,cAAU,QAAQ,QAAQ,MAAM,CAAC;AACjC;AAAA,EACF;AAEA,UAAQ,SAAS;AAAA,IACf,KAAK,SAAS;AACZ,YAAM,EAAE,eAAe,IAAI,MAAM,OAAO,yBAAyB;AACjE,YAAM,eAAe,aAAa;AAAA,QAChC,aAAa;AAAA,QACb,KAAK,QAAQ;AAAA,QACb,QAAQ,QAAQ;AAAA,MAClB,CAAC;AACD;AAAA,IACF;AAAA,IACA;AACE,YAAM,IAAI,MAAM,oBAAoB,OAAO;AAAA;AAAA,EAAO,MAAM,CAAC,EAAE;AAAA,EAC/D;AACF;AAEA,SAAS,QAAQ;AACf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,UACP,QACA,SACA;AACA,GAAC,UAAU,QAAQ,QAAQ,MAAM,GAAG,OAAO;AAAA,CAAI;AACjD;AAKA,IACE,OAAO,YAAY,eACnB,OAAO,WAAW,eAClB,QAAQ,SAAS,QACjB;AACA,oBAAkB,EAAE,MAAM,CAAC,UAAU;AACnC,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,WAAW;AAAA,EACrB,CAAC;AACH;","names":[]}
|
package/dist/cli.mjs
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
3
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
4
|
+
}) : x)(function(x) {
|
|
5
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
6
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
// src/cli.ts
|
|
10
|
+
async function runVitestEvalsCli(args = process.argv.slice(2), options = {}) {
|
|
11
|
+
const [command, ...commandArgs] = args;
|
|
12
|
+
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
13
|
+
writeLine(options.stdout, usage());
|
|
14
|
+
return;
|
|
15
|
+
}
|
|
16
|
+
switch (command) {
|
|
17
|
+
case "serve": {
|
|
18
|
+
const { runReportUiCli } = await import("@vitest-evals/report-ui");
|
|
19
|
+
await runReportUiCli(commandArgs, {
|
|
20
|
+
commandName: "vitest-evals serve",
|
|
21
|
+
cwd: options.cwd,
|
|
22
|
+
stdout: options.stdout
|
|
23
|
+
});
|
|
24
|
+
return;
|
|
25
|
+
}
|
|
26
|
+
default:
|
|
27
|
+
throw new Error(`Unknown command: ${command}
|
|
28
|
+
|
|
29
|
+
${usage()}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
function usage() {
|
|
33
|
+
return [
|
|
34
|
+
"Usage: vitest-evals <command>",
|
|
35
|
+
"",
|
|
36
|
+
"Commands:",
|
|
37
|
+
" serve [json | dir | glob] Serve the local report UI",
|
|
38
|
+
"",
|
|
39
|
+
"Run `vitest-evals serve --help` for report UI options."
|
|
40
|
+
].join("\n");
|
|
41
|
+
}
|
|
42
|
+
function writeLine(stdout, message) {
|
|
43
|
+
(stdout ?? process.stdout).write(`${message}
|
|
44
|
+
`);
|
|
45
|
+
}
|
|
46
|
+
if (typeof __require !== "undefined" && typeof module !== "undefined" && __require.main === module) {
|
|
47
|
+
runVitestEvalsCli().catch((error) => {
|
|
48
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
49
|
+
process.exitCode = 1;
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
export {
|
|
53
|
+
runVitestEvalsCli
|
|
54
|
+
};
|
|
55
|
+
//# sourceMappingURL=cli.mjs.map
|
package/dist/cli.mjs.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\n\n/** Output streams used by the `vitest-evals` CLI runner. */\nexport type VitestEvalsCliIo = {\n stdout?: Pick<NodeJS.WriteStream, \"write\">;\n};\n\n/** Options for running the `vitest-evals` CLI. */\nexport type RunVitestEvalsCliOptions = VitestEvalsCliIo & {\n cwd?: string;\n};\n\n/** Runs the product-facing `vitest-evals` CLI. */\nexport async function runVitestEvalsCli(\n args = process.argv.slice(2),\n options: RunVitestEvalsCliOptions = {},\n) {\n const [command, ...commandArgs] = args;\n\n if (\n !command ||\n command === \"help\" ||\n command === \"--help\" ||\n command === \"-h\"\n ) {\n writeLine(options.stdout, usage());\n return;\n }\n\n switch (command) {\n case \"serve\": {\n const { runReportUiCli } = await import(\"@vitest-evals/report-ui\");\n await runReportUiCli(commandArgs, {\n commandName: \"vitest-evals serve\",\n cwd: options.cwd,\n stdout: options.stdout,\n });\n return;\n }\n default:\n throw new Error(`Unknown command: ${command}\\n\\n${usage()}`);\n }\n}\n\nfunction usage() {\n return [\n \"Usage: vitest-evals <command>\",\n \"\",\n \"Commands:\",\n \" serve [json | dir | glob] Serve the local report UI\",\n \"\",\n \"Run `vitest-evals serve --help` for report UI options.\",\n ].join(\"\\n\");\n}\n\nfunction writeLine(\n stdout: Pick<NodeJS.WriteStream, \"write\"> | undefined,\n message: string,\n) {\n (stdout ?? process.stdout).write(`${message}\\n`);\n}\n\ndeclare const require: NodeJS.Require | undefined;\ndeclare const module: NodeJS.Module | undefined;\n\nif (\n typeof require !== \"undefined\" &&\n typeof module !== \"undefined\" &&\n require.main === module\n) {\n runVitestEvalsCli().catch((error) => {\n console.error(error instanceof Error ? error.message : String(error));\n process.exitCode = 1;\n });\n}\n"],"mappings":";;;;;;;;;AAaA,eAAsB,kBACpB,OAAO,QAAQ,KAAK,MAAM,CAAC,GAC3B,UAAoC,CAAC,GACrC;AACA,QAAM,CAAC,SAAS,GAAG,WAAW,IAAI;AAElC,MACE,CAAC,WACD,YAAY,UACZ,YAAY,YACZ,YAAY,MACZ;AACA,cAAU,QAAQ,QAAQ,MAAM,CAAC;AACjC;AAAA,EACF;AAEA,UAAQ,SAAS;AAAA,IACf,KAAK,SAAS;AACZ,YAAM,EAAE,eAAe,IAAI,MAAM,OAAO,yBAAyB;AACjE,YAAM,eAAe,aAAa;AAAA,QAChC,aAAa;AAAA,QACb,KAAK,QAAQ;AAAA,QACb,QAAQ,QAAQ;AAAA,MAClB,CAAC;AACD;AAAA,IACF;AAAA,IACA;AACE,YAAM,IAAI,MAAM,oBAAoB,OAAO;AAAA;AAAA,EAAO,MAAM,CAAC,EAAE;AAAA,EAC/D;AACF;AAEA,SAAS,QAAQ;AACf,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE,KAAK,IAAI;AACb;AAEA,SAAS,UACP,QACA,SACA;AACA,GAAC,UAAU,QAAQ,QAAQ,MAAM,GAAG,OAAO;AAAA,CAAI;AACjD;AAKA,IACE,OAAO,cAAY,eACnB,OAAO,WAAW,eAClB,UAAQ,SAAS,QACjB;AACA,oBAAkB,EAAE,MAAM,CAAC,UAAU;AACnC,YAAQ,MAAM,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK,CAAC;AACpE,YAAQ,WAAW;AAAA,EACrB,CAAC;AACH;","names":[]}
|