@prompt-diff/cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app.d.ts +12 -0
- package/dist/app.d.ts.map +1 -0
- package/dist/app.js +57 -0
- package/dist/app.js.map +1 -0
- package/dist/components/DiffView.d.ts +6 -0
- package/dist/components/DiffView.d.ts.map +1 -0
- package/dist/components/DiffView.js +36 -0
- package/dist/components/DiffView.js.map +1 -0
- package/dist/components/EvalView.d.ts +7 -0
- package/dist/components/EvalView.d.ts.map +1 -0
- package/dist/components/EvalView.js +181 -0
- package/dist/components/EvalView.js.map +1 -0
- package/dist/components/Spinner.d.ts +5 -0
- package/dist/components/Spinner.d.ts.map +1 -0
- package/dist/components/Spinner.js +18 -0
- package/dist/components/Spinner.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +79 -0
- package/dist/index.js.map +1 -0
- package/dist/run-command.d.ts +12 -0
- package/dist/run-command.d.ts.map +1 -0
- package/dist/run-command.js +121 -0
- package/dist/run-command.js.map +1 -0
- package/package.json +33 -0
package/dist/app.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import React from "react";
|
|
2
|
+
import type { ProviderName, ProviderConfig } from "@prompt-diff/core";
|
|
3
|
+
interface AppProps {
|
|
4
|
+
prompt: string;
|
|
5
|
+
providers: ProviderName[];
|
|
6
|
+
config: ProviderConfig;
|
|
7
|
+
runs: number;
|
|
8
|
+
outputFormat: "pretty" | "json";
|
|
9
|
+
}
|
|
10
|
+
export declare function App({ prompt, providers, config, runs, outputFormat }: AppProps): React.JSX.Element | null;
|
|
11
|
+
export {};
|
|
12
|
+
//# sourceMappingURL=app.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"app.d.ts","sourceRoot":"","sources":["../src/app.tsx"],"names":[],"mappings":"AAAA,OAAO,KAA8B,MAAM,OAAO,CAAC;AAGnD,OAAO,KAAK,EAAc,YAAY,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAIlF,UAAU,QAAQ;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,YAAY,EAAE,CAAC;IAC1B,MAAM,EAAE,cAAc,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,QAAQ,GAAG,MAAM,CAAC;CACjC;AAED,wBAAgB,GAAG,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,EAAE,EAAE,QAAQ,4BA2D9E"}
|
package/dist/app.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import React, { useState, useEffect } from "react";
|
|
2
|
+
import { Box, Text, useApp } from "ink";
|
|
3
|
+
import { runDiff, runDiffMany } from "@prompt-diff/core";
|
|
4
|
+
import { DiffView } from "./components/DiffView.js";
|
|
5
|
+
import { Spinner } from "./components/Spinner.js";
|
|
6
|
+
export function App({ prompt, providers, config, runs, outputFormat }) {
|
|
7
|
+
const { exit } = useApp();
|
|
8
|
+
const [loading, setLoading] = useState(true);
|
|
9
|
+
const [diffs, setDiffs] = useState([]);
|
|
10
|
+
const [error, setError] = useState(null);
|
|
11
|
+
useEffect(() => {
|
|
12
|
+
const run = async () => {
|
|
13
|
+
try {
|
|
14
|
+
if (runs > 1) {
|
|
15
|
+
const results = await runDiffMany({ prompt, providers, config, runs });
|
|
16
|
+
setDiffs(results);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
const result = await runDiff({ prompt, providers, config });
|
|
20
|
+
setDiffs([result]);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
catch (err) {
|
|
24
|
+
setError(err instanceof Error ? err.message : String(err));
|
|
25
|
+
}
|
|
26
|
+
finally {
|
|
27
|
+
setLoading(false);
|
|
28
|
+
setTimeout(() => exit(), 50);
|
|
29
|
+
}
|
|
30
|
+
};
|
|
31
|
+
run();
|
|
32
|
+
}, []);
|
|
33
|
+
if (outputFormat === "json") {
|
|
34
|
+
if (!loading) {
|
|
35
|
+
process.stdout.write(JSON.stringify(diffs, null, 2) + "\n");
|
|
36
|
+
}
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
if (loading) {
|
|
40
|
+
return (React.createElement(Box, null,
|
|
41
|
+
React.createElement(Spinner, { label: `Querying ${providers.join(", ")}...` })));
|
|
42
|
+
}
|
|
43
|
+
if (error) {
|
|
44
|
+
return React.createElement(Text, { color: "red" },
|
|
45
|
+
"Error: ",
|
|
46
|
+
error);
|
|
47
|
+
}
|
|
48
|
+
return (React.createElement(Box, { flexDirection: "column" }, diffs.map((diff, i) => (React.createElement(Box, { key: i, flexDirection: "column" },
|
|
49
|
+
diffs.length > 1 && (React.createElement(Text, { bold: true, color: "yellow" },
|
|
50
|
+
"\u2014 Run ",
|
|
51
|
+
i + 1,
|
|
52
|
+
" of ",
|
|
53
|
+
diffs.length,
|
|
54
|
+
" \u2014")),
|
|
55
|
+
React.createElement(DiffView, { diff: diff }))))));
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=app.js.map
|
package/dist/app.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"app.js","sourceRoot":"","sources":["../src/app.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,KAAK,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAEzD,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAUlD,MAAM,UAAU,GAAG,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,EAAY;IAC7E,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,EAAE,CAAC;IAC1B,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,GAAG,QAAQ,CAAe,EAAE,CAAC,CAAC;IACrD,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,GAAG,QAAQ,CAAgB,IAAI,CAAC,CAAC;IAExD,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,GAAG,GAAG,KAAK,IAAI,EAAE;YACrB,IAAI,CAAC;gBACH,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;oBACb,MAAM,OAAO,GAAG,MAAM,WAAW,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;oBACvE,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACpB,CAAC;qBAAM,CAAC;oBACN,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC5D,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACrB,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,QAAQ,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;YAC7D,CAAC;oBAAS,CAAC;gBACT,UAAU,CAAC,KAAK,CAAC,CAAC;gBAClB,UAAU,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC,CAAC;QACF,GAAG,EAAE,CAAC;IACR,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,IAAI,YAAY,KAAK,MAAM,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;QAC9D,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,OAAO,EAAE,CAAC;QACZ,OAAO,CACL,oBAAC,GAAG;YACF,oBAAC,OAAO,IAAC,KAAK,EAAE,YAAY,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAI,CACrD,CACP,CAAC;IACJ,CAAC;IAED,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,oBAAC,IAAI,IAAC,KAAK,EAAC,KAAK;;YAAS,KAAK,CAAQ,CAAC;IACjD,CAAC;IAED,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ,IACxB,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,CACtB,oBAAC,GAAG,IAAC,GAAG,EAAE,CAAC,EAAE,aAAa,EAAC,QAAQ;QAChC,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CACnB,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,QAAQ;;YAChB,CAAC,GAAG,CAAC;;YAAM,KAAK,CAAC,MAAM;sBACzB,CACR;QACD,oBAAC,QAAQ,IAAC,IAAI,EAAE,IAAI,GAAI,CACpB,CACP,CAAC,CACE,CACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DiffView.d.ts","sourceRoot":"","sources":["../../src/components/DiffView.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,KAAK,EAAE,UAAU,EAAkB,MAAM,mBAAmB,CAAC;AAoCpE,wBAAgB,QAAQ,CAAC,EAAE,IAAI,EAAE,EAAE;IAAE,IAAI,EAAE,UAAU,CAAA;CAAE,qBAetD"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import React from "react";
|
|
2
|
+
import { Box, Text } from "ink";
|
|
3
|
+
import { formatCost } from "@prompt-diff/core";
|
|
4
|
+
function ProviderCard({ result }) {
|
|
5
|
+
const color = result.error ? "red" : "green";
|
|
6
|
+
const latency = `${result.latencyMs}ms`;
|
|
7
|
+
const tokens = `${result.inputTokens}→${result.outputTokens} tok`;
|
|
8
|
+
const cost = formatCost(result.costUsd);
|
|
9
|
+
return (React.createElement(Box, { flexDirection: "column", borderStyle: "round", borderColor: color, paddingX: 1, marginBottom: 1 },
|
|
10
|
+
React.createElement(Box, { justifyContent: "space-between" },
|
|
11
|
+
React.createElement(Text, { bold: true, color: color },
|
|
12
|
+
result.provider.toUpperCase(),
|
|
13
|
+
" (",
|
|
14
|
+
result.model,
|
|
15
|
+
")"),
|
|
16
|
+
React.createElement(Text, { dimColor: true },
|
|
17
|
+
latency,
|
|
18
|
+
" ",
|
|
19
|
+
tokens,
|
|
20
|
+
" ",
|
|
21
|
+
cost)),
|
|
22
|
+
React.createElement(Box, { marginTop: 1 }, result.error ? (React.createElement(Text, { color: "red" },
|
|
23
|
+
"Error: ",
|
|
24
|
+
result.error)) : (React.createElement(Text, { wrap: "wrap" }, result.output)))));
|
|
25
|
+
}
|
|
26
|
+
export function DiffView({ diff }) {
|
|
27
|
+
return (React.createElement(Box, { flexDirection: "column" },
|
|
28
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
29
|
+
React.createElement(Text, { bold: true }, "Prompt: "),
|
|
30
|
+
React.createElement(Text, { color: "cyan", wrap: "wrap" }, diff.prompt)),
|
|
31
|
+
diff.results.map((r) => (React.createElement(ProviderCard, { key: r.provider, result: r }))),
|
|
32
|
+
React.createElement(Text, { dimColor: true },
|
|
33
|
+
"Ran at ",
|
|
34
|
+
diff.ranAt)));
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=DiffView.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DiffView.js","sourceRoot":"","sources":["../../src/components/DiffView.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,KAAK,CAAC;AAEhC,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,SAAS,YAAY,CAAC,EAAE,MAAM,EAA8B;IAC1D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC;IAC7C,MAAM,OAAO,GAAG,GAAG,MAAM,CAAC,SAAS,IAAI,CAAC;IACxC,MAAM,MAAM,GAAG,GAAG,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,YAAY,MAAM,CAAC;IAClE,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAExC,OAAO,CACL,oBAAC,GAAG,IACF,aAAa,EAAC,QAAQ,EACtB,WAAW,EAAC,OAAO,EACnB,WAAW,EAAE,KAAK,EAClB,QAAQ,EAAE,CAAC,EACX,YAAY,EAAE,CAAC;QAEf,oBAAC,GAAG,IAAC,cAAc,EAAC,eAAe;YACjC,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAE,KAAK;gBACpB,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE;;gBAAI,MAAM,CAAC,KAAK;oBACzC;YACP,oBAAC,IAAI,IAAC,QAAQ;gBACX,OAAO;;gBAAI,MAAM;;gBAAI,IAAI,CACrB,CACH;QACN,oBAAC,GAAG,IAAC,SAAS,EAAE,CAAC,IACd,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CACd,oBAAC,IAAI,IAAC,KAAK,EAAC,KAAK;;YAAS,MAAM,CAAC,KAAK,CAAQ,CAC/C,CAAC,CAAC,CAAC,CACF,oBAAC,IAAI,IAAC,IAAI,EAAC,MAAM,IAAE,MAAM,CAAC,MAAM,CAAQ,CACzC,CACG,CACF,CACP,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,EAAE,IAAI,EAAwB;IACrD,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ;QACzB,oBAAC,GAAG,IAAC,YAAY,EAAE,CAAC;YAClB,oBAAC,IAAI,IAAC,IAAI,qBAAgB;YAC1B,oBAAC,IAAI,IAAC,KAAK,EAAC,MAAM,EAAC,IAAI,EAAC,MAAM,IAC3B,IAAI,CAAC,MAAM,CACP,CACH;QACL,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CACvB,oBAAC,YAAY,IAAC,GAAG,EAAE,CAAC,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC,GAAI,CAC7C,CAAC;QACF,oBAAC,IAAI,IAAC,QAAQ;;YAAS,IAAI,CAAC,KAAK,CAAQ,CACrC,CACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvalView.d.ts","sourceRoot":"","sources":["../../src/components/EvalView.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,OAAO,KAAK,EAAE,WAAW,EAAuD,MAAM,mBAAmB,CAAC;AAmO1G,wBAAgB,QAAQ,CAAC,EAAE,MAAM,EAAE,OAAe,EAAE,EAAE;IAAE,MAAM,EAAE,WAAW,CAAC;IAAC,OAAO,CAAC,EAAE,OAAO,CAAA;CAAE,qBA8B/F"}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import React from "react";
|
|
2
|
+
import { Box, Text } from "ink";
|
|
3
|
+
import { formatCost } from "@prompt-diff/core";
|
|
4
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
5
|
+
function pct(n) {
|
|
6
|
+
return `${Math.round(n * 100)}%`;
|
|
7
|
+
}
|
|
8
|
+
function bar(score, width = 12) {
|
|
9
|
+
const filled = Math.round(score * width);
|
|
10
|
+
return "█".repeat(filled) + "░".repeat(width - filled);
|
|
11
|
+
}
|
|
12
|
+
function scoreColor(score) {
|
|
13
|
+
if (score >= 0.8)
|
|
14
|
+
return "green";
|
|
15
|
+
if (score >= 0.5)
|
|
16
|
+
return "yellow";
|
|
17
|
+
return "red";
|
|
18
|
+
}
|
|
19
|
+
// ─── Summary table ────────────────────────────────────────────────────────────
|
|
20
|
+
function SummaryTable({ summary }) {
|
|
21
|
+
const colW = { provider: 20, model: 24, score: 18, pass: 10, latency: 12, cost: 10 };
|
|
22
|
+
return (React.createElement(Box, { flexDirection: "column", marginBottom: 1 },
|
|
23
|
+
React.createElement(Text, { bold: true, color: "cyan" }, " Summary"),
|
|
24
|
+
React.createElement(Box, null,
|
|
25
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Provider".padEnd(colW.provider)),
|
|
26
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Model".padEnd(colW.model)),
|
|
27
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Score".padEnd(colW.score)),
|
|
28
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Pass/Total".padEnd(colW.pass)),
|
|
29
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Avg Latency".padEnd(colW.latency)),
|
|
30
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Total Cost")),
|
|
31
|
+
React.createElement(Text, { dimColor: true }, "─".repeat(colW.provider + colW.model + colW.score + colW.pass + colW.latency + colW.cost)),
|
|
32
|
+
summary.map((s) => (React.createElement(Box, { key: `${s.provider}/${s.model}` },
|
|
33
|
+
React.createElement(Text, null, s.provider.padEnd(colW.provider)),
|
|
34
|
+
React.createElement(Text, { dimColor: true }, s.model.slice(0, colW.model - 2).padEnd(colW.model)),
|
|
35
|
+
React.createElement(Text, { color: scoreColor(s.score) }, `${bar(s.score)} ${pct(s.score)}`.padEnd(colW.score)),
|
|
36
|
+
React.createElement(Text, null, `${s.passed}/${s.total}`.padEnd(colW.pass)),
|
|
37
|
+
React.createElement(Text, { dimColor: true }, `${s.avgLatencyMs}ms`.padEnd(colW.latency)),
|
|
38
|
+
React.createElement(Text, { dimColor: true }, s.totalCostUsd === 0 ? "$0.00" : formatCost(s.totalCostUsd)))))));
|
|
39
|
+
}
|
|
40
|
+
// ─── Case × provider matrix (default view) ────────────────────────────────────
|
|
41
|
+
const COL_CASE = 42;
|
|
42
|
+
const COL_CELL = 11;
|
|
43
|
+
function userAssertions(pr) {
|
|
44
|
+
return pr.assertions.filter((a) => a.type !== "provider-error");
|
|
45
|
+
}
|
|
46
|
+
function MatrixCell({ pr }) {
|
|
47
|
+
const w = COL_CELL;
|
|
48
|
+
if (!pr) {
|
|
49
|
+
return (React.createElement(Box, { width: w, flexShrink: 0 },
|
|
50
|
+
React.createElement(Text, { dimColor: true }, "—".padStart(w - 1))));
|
|
51
|
+
}
|
|
52
|
+
if (pr.error) {
|
|
53
|
+
return (React.createElement(Box, { width: w, flexShrink: 0 },
|
|
54
|
+
React.createElement(Text, { color: "red", bold: true }, "ERR".padEnd(w - 1))));
|
|
55
|
+
}
|
|
56
|
+
const ua = userAssertions(pr);
|
|
57
|
+
const suffix = ua.length > 0 ? ` ${ua.filter((a) => a.pass).length}/${ua.length}` : "";
|
|
58
|
+
const label = `${pr.pass ? "✓" : "✗"}${suffix}`.slice(0, w - 1).padEnd(w - 1);
|
|
59
|
+
return (React.createElement(Box, { width: w, flexShrink: 0 },
|
|
60
|
+
React.createElement(Text, { color: pr.pass ? "green" : "red" }, label)));
|
|
61
|
+
}
|
|
62
|
+
function CaseMatrix({ cases, providerOrder, }) {
|
|
63
|
+
return (React.createElement(Box, { flexDirection: "column", marginBottom: 1 },
|
|
64
|
+
React.createElement(Text, { bold: true, color: "cyan" }, " Test matrix"),
|
|
65
|
+
React.createElement(Text, { dimColor: true }, " Rows = test cases \u00B7 Columns = providers (\u2713/\u2717, assertion pass/total)"),
|
|
66
|
+
React.createElement(Box, { marginTop: 1 },
|
|
67
|
+
React.createElement(Box, { width: 4, flexShrink: 0 },
|
|
68
|
+
React.createElement(Text, { bold: true, dimColor: true }, "#".padEnd(3))),
|
|
69
|
+
React.createElement(Box, { width: COL_CASE, flexShrink: 0 },
|
|
70
|
+
React.createElement(Text, { bold: true, dimColor: true }, "Case".padEnd(COL_CASE - 1))),
|
|
71
|
+
providerOrder.map((key) => {
|
|
72
|
+
const short = key.includes("/") ? key.split("/")[0] : key;
|
|
73
|
+
const head = short.slice(0, COL_CELL - 2).padEnd(COL_CELL - 1);
|
|
74
|
+
return (React.createElement(Box, { key: key, width: COL_CELL, flexShrink: 0 },
|
|
75
|
+
React.createElement(Text, { bold: true, dimColor: true }, head)));
|
|
76
|
+
})),
|
|
77
|
+
React.createElement(Text, { dimColor: true }, "─".repeat(4 + COL_CASE + providerOrder.length * COL_CELL)),
|
|
78
|
+
cases.map((c, i) => {
|
|
79
|
+
const prompt = c.prompt.length > COL_CASE - 2
|
|
80
|
+
? c.prompt.slice(0, COL_CASE - 4) + "… "
|
|
81
|
+
: c.prompt.padEnd(COL_CASE - 1);
|
|
82
|
+
return (React.createElement(Box, { key: i },
|
|
83
|
+
React.createElement(Box, { width: 4, flexShrink: 0 },
|
|
84
|
+
React.createElement(Text, { dimColor: true }, `${i + 1}`.padEnd(3))),
|
|
85
|
+
React.createElement(Box, { width: COL_CASE, flexShrink: 0 },
|
|
86
|
+
React.createElement(Text, null, prompt)),
|
|
87
|
+
providerOrder.map((key) => {
|
|
88
|
+
const pr = c.providerResults.find((r) => `${r.provider}/${r.model}` === key);
|
|
89
|
+
return React.createElement(MatrixCell, { key: key, pr: pr });
|
|
90
|
+
})));
|
|
91
|
+
})));
|
|
92
|
+
}
|
|
93
|
+
// ─── Per-case rows ────────────────────────────────────────────────────────────
|
|
94
|
+
function CaseRow({ c, idx, providerOrder }) {
|
|
95
|
+
const prompt = c.prompt.length > 60 ? c.prompt.slice(0, 57) + "…" : c.prompt;
|
|
96
|
+
const varStr = Object.entries(c.vars).map(([k, v]) => `${k}=${v}`).join(", ");
|
|
97
|
+
return (React.createElement(Box, { flexDirection: "column", marginBottom: 1 },
|
|
98
|
+
React.createElement(Box, null,
|
|
99
|
+
React.createElement(Text, { bold: true, color: "cyan" },
|
|
100
|
+
"#",
|
|
101
|
+
idx + 1,
|
|
102
|
+
" "),
|
|
103
|
+
React.createElement(Text, { wrap: "truncate-end" }, prompt),
|
|
104
|
+
varStr && React.createElement(Text, { dimColor: true },
|
|
105
|
+
" (",
|
|
106
|
+
varStr,
|
|
107
|
+
")")),
|
|
108
|
+
providerOrder.map((key) => {
|
|
109
|
+
const pr = c.providerResults.find((r) => `${r.provider}/${r.model}` === key);
|
|
110
|
+
if (!pr)
|
|
111
|
+
return null;
|
|
112
|
+
const icon = pr.error ? "✗" : pr.pass ? "✓" : "✗";
|
|
113
|
+
const color = pr.error ? "red" : pr.pass ? "green" : "red";
|
|
114
|
+
const label = `${pr.provider}/${pr.model}`;
|
|
115
|
+
return (React.createElement(Box, { key: key, marginLeft: 2, flexDirection: "column" },
|
|
116
|
+
React.createElement(Box, { gap: 1 },
|
|
117
|
+
React.createElement(Text, { color: color }, icon),
|
|
118
|
+
React.createElement(Text, { dimColor: true }, label.slice(0, 36).padEnd(36)),
|
|
119
|
+
!pr.error && (React.createElement(Text, { dimColor: true },
|
|
120
|
+
pr.latencyMs,
|
|
121
|
+
"ms"))),
|
|
122
|
+
pr.assertions
|
|
123
|
+
.filter((a) => !(pr.error && a.type === "provider-error"))
|
|
124
|
+
.map((a, i) => a.type === "llm-rubric" ? (React.createElement(Box, { key: i, marginLeft: 4, flexDirection: "column", marginBottom: 1 },
|
|
125
|
+
React.createElement(Box, { flexDirection: "row", gap: 1 },
|
|
126
|
+
React.createElement(Text, { bold: true, color: "magenta" },
|
|
127
|
+
"[",
|
|
128
|
+
a.type,
|
|
129
|
+
"]"),
|
|
130
|
+
React.createElement(Text, { bold: true, color: a.pass ? "green" : "red" }, a.pass ? "Pass" : "Fail")),
|
|
131
|
+
a.rubricCriterion ? (React.createElement(Box, { marginLeft: 2, flexDirection: "column" },
|
|
132
|
+
React.createElement(Text, { dimColor: true }, "Rubric:"),
|
|
133
|
+
React.createElement(Text, { wrap: "wrap" }, a.rubricCriterion))) : null,
|
|
134
|
+
React.createElement(Box, { marginLeft: 2, flexDirection: "column" },
|
|
135
|
+
React.createElement(Text, { dimColor: true },
|
|
136
|
+
a.pass ? "Why it passed" : "Why it failed",
|
|
137
|
+
":"),
|
|
138
|
+
React.createElement(Text, { wrap: "wrap" }, a.reason ?? "—")))) : (React.createElement(Box, { key: i, marginLeft: 4 },
|
|
139
|
+
React.createElement(Text, { color: a.pass ? "green" : "red" },
|
|
140
|
+
a.pass ? "✓" : "✗",
|
|
141
|
+
" "),
|
|
142
|
+
React.createElement(Text, { dimColor: true },
|
|
143
|
+
"[",
|
|
144
|
+
a.type,
|
|
145
|
+
"]"),
|
|
146
|
+
a.reason && React.createElement(Text, { dimColor: true },
|
|
147
|
+
" \u2014 ",
|
|
148
|
+
a.reason)))),
|
|
149
|
+
pr.error && (React.createElement(Box, { marginLeft: 4 },
|
|
150
|
+
React.createElement(Text, { color: "red" },
|
|
151
|
+
"Error: ",
|
|
152
|
+
pr.error)))));
|
|
153
|
+
})));
|
|
154
|
+
}
|
|
155
|
+
// ─── Main component ───────────────────────────────────────────────────────────
|
|
156
|
+
export function EvalView({ result, verbose = false }) {
|
|
157
|
+
const providerOrder = result.summary.map((s) => `${s.provider}/${s.model}`);
|
|
158
|
+
const allPassed = result.summary.every((s) => s.failed === 0);
|
|
159
|
+
return (React.createElement(Box, { flexDirection: "column" },
|
|
160
|
+
React.createElement(Box, { marginBottom: 1 },
|
|
161
|
+
React.createElement(Text, { bold: true, color: allPassed ? "green" : "red" }, allPassed
|
|
162
|
+
? "✓ All test results passed"
|
|
163
|
+
: `✗ ${result.summary.reduce((acc, s) => acc + s.failed, 0)} failed (provider × case)`),
|
|
164
|
+
React.createElement(Text, { dimColor: true },
|
|
165
|
+
" (",
|
|
166
|
+
result.cases.length,
|
|
167
|
+
" test case",
|
|
168
|
+
result.cases.length !== 1 ? "s" : "",
|
|
169
|
+
", ",
|
|
170
|
+
result.summary.length,
|
|
171
|
+
" provider",
|
|
172
|
+
result.summary.length !== 1 ? "s" : "",
|
|
173
|
+
")")),
|
|
174
|
+
React.createElement(SummaryTable, { summary: result.summary }),
|
|
175
|
+
React.createElement(CaseMatrix, { cases: result.cases, providerOrder: providerOrder }),
|
|
176
|
+
verbose && (React.createElement(Box, { flexDirection: "column" },
|
|
177
|
+
React.createElement(Text, { bold: true, color: "cyan" }, " Details"),
|
|
178
|
+
React.createElement(Text, { dimColor: true }, "─".repeat(80)),
|
|
179
|
+
result.cases.map((c, i) => (React.createElement(CaseRow, { key: i, c: c, idx: i, providerOrder: providerOrder })))))));
|
|
180
|
+
}
|
|
181
|
+
//# sourceMappingURL=EvalView.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EvalView.js","sourceRoot":"","sources":["../../src/components/EvalView.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,KAAK,CAAC;AAEhC,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,iFAAiF;AAEjF,SAAS,GAAG,CAAC,CAAS;IACpB,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,SAAS,GAAG,CAAC,KAAa,EAAE,KAAK,GAAG,EAAE;IACpC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC;IACzC,OAAO,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,UAAU,CAAC,KAAa;IAC/B,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,OAAO,CAAC;IACjC,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,QAAQ,CAAC;IAClC,OAAO,KAAK,CAAC;AACf,CAAC;AAED,iFAAiF;AAEjF,SAAS,YAAY,CAAC,EAAE,OAAO,EAAkC;IAC/D,MAAM,IAAI,GAAG,EAAE,QAAQ,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;IAErF,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ,EAAC,YAAY,EAAE,CAAC;QACzC,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,MAAM,eAAgB;QACvC,oBAAC,GAAG;YACF,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAQ;YAC7D,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAQ;YACvD,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAQ;YACvD,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAQ;YAC3D,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAQ;YAC/D,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAAE,YAAY,CAAQ,CACrC;QACN,oBAAC,IAAI,IAAC,QAAQ,UAAE,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,CAAQ;QACjH,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAClB,oBAAC,GAAG,IAAC,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE;YAClC,oBAAC,IAAI,QAAE,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAQ;YAC/C,oBAAC,IAAI,IAAC,QAAQ,UAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAQ;YAC3E,oBAAC,IAAI,IAAC,KAAK,EAAE,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,IAC7B,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAChD;YACP,oBAAC,IAAI,QAAE,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAQ;YACzD,oBAAC,IAAI,IAAC,QAAQ,UAAE,GAAG,CAAC,CAAC,YAAY,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAQ;YAClE,oBAAC,IAAI,IAAC,QAAQ,UAAE,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,YAAY,CAAC,CAAQ,CAC/E,CACP,CAAC,CACE,CACP,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF,MAAM,QAAQ,GAAG,EAAE,CAAC;AACpB,MAAM,QAAQ,GAAG,EAAE,CAAC;AAEpB,SAAS,cAAc,CAAC,EAAsB;IAC5C,OAAO,EAAE,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,gBAAgB,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,UAAU,CAAC,EAAE,EAAE,EAA0C;IAChE,MAAM,CAAC,GAAG,QAAQ,CAAC;IACnB,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,OAAO,CACL,oBAAC,GAAG,IAAC,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;YAC1B,oBAAC,IAAI,IAAC,QAAQ,UAAE,GAAG,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,CAAQ,CACvC,CACP,CAAC;IACJ,CAAC;IACD,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC;QACb,OAAO,CACL,oBAAC,GAAG,IAAC,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;YAC1B,oBAAC,IAAI,IAAC,KAAK,EAAC,KAAK,EAAC,IAAI,UACnB,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CACf,CACH,CACP,CAAC;IACJ,CAAC;IACD,MAAM,EAAE,GAAG,cAAc,CAAC,EAAE,CAAC,CAAC;IAC9B,MAAM,MAAM,GACV,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1E,MAAM,KAAK,GAAG,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,MAAM,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9E,OAAO,CACL,oBAAC,GAAG,IAAC,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;QAC1B,oBAAC,IAAI,IAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,IAAG,KAAK,CAAQ,CAClD,CACP,CAAC;AACJ,CAAC;AAED,SAAS,UAAU,CAAC,EAClB,KAAK,EACL,aAAa,GAId;IACC,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ,EAAC,YAAY,EAAE,CAAC;QACzC,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,MAAM,mBAAoB;QAC3C,oBAAC,IAAI,IAAC,QAAQ,gGAA4E;QAC1F,oBAAC,GAAG,IAAC,SAAS,EAAE,CAAC;YACf,oBAAC,GAAG,IAAC,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;gBAC1B,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAChB,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CACT,CACH;YACN,oBAAC,GAAG,IAAC,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;gBACjC,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAChB,MAAM,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CACvB,CACH;YACL,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;gBACzB,MAAM,KAAK,GAAG,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,GAAG,CAAC;gBAC3D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;gBAC/D,OAAO,CACL,oBAAC,GAAG,IAAC,GAAG,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;oBAC3C,oBAAC,IAAI,IAAC,IAAI,QAAC,QAAQ,UAChB,IAAI,CACA,CACH,CACP,CAAC;YACJ,CAAC,CAAC,CACE;QACN,oBAAC,IAAI,IAAC,QAAQ,UACX,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,QAAQ,GAAG,aAAa,CAAC,MAAM,GAAG,QAAQ,CAAC,CACtD;QACN,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClB,MAAM,MAAM,GACV,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,QAAQ,GAAG,CAAC;gBAC5B,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,GAAG,CAAC,CAAC,GAAG,IAAI;gBACxC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC;YACpC,OAAO,CACL,oBAAC,GAAG,IAAC,GAAG,EAAE,CAAC;gBACT,oBAAC,GAAG,IAAC,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;oBAC1B,oBAAC,IAAI,IAAC,QAAQ,UAAE,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAQ,CACxC;gBACN,oBAAC,GAAG,IAAC,KAAK,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;oBACjC,oBAAC,IAAI,QAAE,MAAM,CAAQ,CACjB;gBACL,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;oBACzB,MAAM,EAAE,GAAG,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC;oBAC7E,OAAO,oBAAC,UAAU,IAAC,GAAG,EAAE,GAAG,EAAE,EAAE,EAAE,EAAE,GAAI,CAAC;gBAC1C,CAAC,CAAC,CACE,CACP,CAAC;QACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF,SAAS,OAAO,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,aAAa,EAA+D;IACrG,MAAM,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAC7E,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE9E,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ,EAAC,YAAY,EAAE,CAAC;QACzC,oBAAC,GAAG;YACF,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,MAAM;;gBAAG,GAAG,GAAG,CAAC;oBAAS;YAC1C,oBAAC,IAAI,IAAC,IAAI,EAAC,cAAc,IAAE,MAAM,CAAQ;YACxC,MAAM,IAAI,oBAAC,IAAI,IAAC,QAAQ;;gBAAI,MAAM;oBAAS,CACxC;QACL,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YACzB,MAAM,EAAE,GAAG,CAAC,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,GAAG,CAAC,CAAC;YAC7E,IAAI,CAAC,EAAE;gBAAE,OAAO,IAAI,CAAC;YACrB,MAAM,IAAI,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;YAClD,MAAM,KAAK,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;YAC3D,MAAM,KAAK,GAAG,GAAG,EAAE,CAAC,QAAQ,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC;YAE3C,OAAO,CACL,oBAAC,GAAG,IAAC,GAAG,EAAE,GAAG,EAAE,UAAU,EAAE,CAAC,EAAE,aAAa,EAAC,QAAQ;gBAClD,oBAAC,GAAG,IAAC,GAAG,EAAE,CAAC;oBACT,oBAAC,IAAI,IAAC,KAAK,EAAE,KAAK,IAAG,IAAI,CAAQ;oBACjC,oBAAC,IAAI,IAAC,QAAQ,UAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAQ;oBACpD,CAAC,EAAE,CAAC,KAAK,IAAI,CACZ,oBAAC,IAAI,IAAC,QAAQ;wBAAE,EAAE,CAAC,SAAS;6BAAU,CACvC,CACG;gBACL,EAAE,CAAC,UAAU;qBACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,gBAAgB,CAAC,CAAC;qBACzD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACZ,CAAC,CAAC,IAAI,KAAK,YAAY,CAAC,CAAC,CAAC,CACxB,oBAAC,GAAG,IAAC,GAAG,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,aAAa,EAAC,QAAQ,EAAC,YAAY,EAAE,CAAC;oBAChE,oBAAC,GAAG,IAAC,aAAa,EAAC,KAAK,EAAC,GAAG,EAAE,CAAC;wBAC7B,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,SAAS;;4BACtB,CAAC,CAAC,IAAI;gCACH;wBACP,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,IACvC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CACpB,CACH;oBACL,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CACnB,oBAAC,GAAG,IAAC,UAAU,EAAE,CAAC,EAAE,aAAa,EAAC,QAAQ;wBACxC,oBAAC,IAAI,IAAC,QAAQ,oBAAe;wBAC7B,oBAAC,IAAI,IAAC,IAAI,EAAC,MAAM,IAAE,CAAC,CAAC,eAAe,CAAQ,CACxC,CACP,CAAC,CAAC,CAAC,IAAI;oBACR,oBAAC,GAAG,IAAC,UAAU,EAAE,CAAC,EAAE,aAAa,EAAC,QAAQ;wBACxC,oBAAC,IAAI,IAAC,QAAQ;4BAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,eAAe;gCAAS;wBACnE,oBAAC,IAAI,IAAC,IAAI,EAAC,MAAM,IAAE,CAAC,CAAC,MAAM,IAAI,GAAG,CAAQ,CACtC,CACF,CACP,CAAC,CAAC,CAAC,CACF,oBAAC,GAAG,IAAC,GAAG,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;oBACxB,oBAAC,IAAI,IAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK;wBAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;4BAAS;oBACnE,oBAAC,IAAI,IAAC,QAAQ;;wBAAG,CAAC,CAAC,IAAI;4BAAS;oBAC/B,CAAC,CAAC,MAAM,IAAI,oBAAC,IAAI,IAAC,QAAQ;;wBAAK,CAAC,CAAC,MAAM,CAAQ,CAC5C,CACP,CACJ;gBACA,EAAE,CAAC,KAAK,IAAI,CACX,oBAAC,GAAG,IAAC,UAAU,EAAE,CAAC;oBAChB,oBAAC,IAAI,IAAC,KAAK,EAAC,KAAK;;wBAAS,EAAE,CAAC,KAAK,CAAQ,CACtC,CACP,CACG,CACP,CAAC;QACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC;AAED,iFAAiF;AAEjF,MAAM,UAAU,QAAQ,CAAC,EAAE,MAAM,EAAE,OAAO,GAAG,KAAK,EAA8C;IAC9F,MAAM,aAAa,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;IAC5E,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC;IAE9D,OAAO,CACL,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ;QACzB,oBAAC,GAAG,IAAC,YAAY,EAAE,CAAC;YAClB,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,IAC1C,SAAS;gBACR,CAAC,CAAC,2BAA2B;gBAC7B,CAAC,CAAC,KAAK,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,2BAA2B,CACnF;YACP,oBAAC,IAAI,IAAC,QAAQ;;gBAAK,MAAM,CAAC,KAAK,CAAC,MAAM;;gBAAY,MAAM,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;;gBAAI,MAAM,CAAC,OAAO,CAAC,MAAM;;gBAAW,MAAM,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE;oBAAS,CACrK;QAEN,oBAAC,YAAY,IAAC,OAAO,EAAE,MAAM,CAAC,OAAO,GAAI;QAEzC,oBAAC,UAAU,IAAC,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,aAAa,EAAE,aAAa,GAAI;QAEhE,OAAO,IAAI,CACV,oBAAC,GAAG,IAAC,aAAa,EAAC,QAAQ;YACzB,oBAAC,IAAI,IAAC,IAAI,QAAC,KAAK,EAAC,MAAM,eAAgB;YACvC,oBAAC,IAAI,IAAC,QAAQ,UAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAQ;YACrC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAC1B,oBAAC,OAAO,IAAC,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,aAAa,EAAE,aAAa,GAAI,CAChE,CAAC,CACE,CACP,CACG,CACP,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Spinner.d.ts","sourceRoot":"","sources":["../../src/components/Spinner.tsx"],"names":[],"mappings":"AAAA,OAAO,KAA8B,MAAM,OAAO,CAAC;AAKnD,wBAAgB,OAAO,CAAC,EAAE,KAAK,EAAE,EAAE;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,qBAgBnD"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import React, { useState, useEffect } from "react";
|
|
2
|
+
import { Text } from "ink";
|
|
3
|
+
const FRAMES = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"];
|
|
4
|
+
export function Spinner({ label }) {
|
|
5
|
+
const [frame, setFrame] = useState(0);
|
|
6
|
+
useEffect(() => {
|
|
7
|
+
const id = setInterval(() => {
|
|
8
|
+
setFrame((f) => (f + 1) % FRAMES.length);
|
|
9
|
+
}, 80);
|
|
10
|
+
return () => clearInterval(id);
|
|
11
|
+
}, []);
|
|
12
|
+
return (React.createElement(Text, null,
|
|
13
|
+
React.createElement(Text, { color: "cyan" },
|
|
14
|
+
FRAMES[frame],
|
|
15
|
+
" "),
|
|
16
|
+
label));
|
|
17
|
+
}
|
|
18
|
+
//# sourceMappingURL=Spinner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Spinner.js","sourceRoot":"","sources":["../../src/components/Spinner.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AACnD,OAAO,EAAE,IAAI,EAAE,MAAM,KAAK,CAAC;AAE3B,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;AAElE,MAAM,UAAU,OAAO,CAAC,EAAE,KAAK,EAAqB;IAClD,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;IAEtC,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE;YAC1B,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;QAC3C,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,GAAG,EAAE,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;IACjC,CAAC,EAAE,EAAE,CAAC,CAAC;IAEP,OAAO,CACL,oBAAC,IAAI;QACH,oBAAC,IAAI,IAAC,KAAK,EAAC,MAAM;YAAE,MAAM,CAAC,KAAK,CAAC;gBAAS;QACzC,KAAK,CACD,CACR,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { render } from "ink";
|
|
4
|
+
import React from "react";
|
|
5
|
+
import { readFileSync } from "fs";
|
|
6
|
+
import { App } from "./app.js";
|
|
7
|
+
import { RunCommand } from "./run-command.js";
|
|
8
|
+
const program = new Command();
|
|
9
|
+
program
|
|
10
|
+
.name("prompt-diff")
|
|
11
|
+
.description("Prompt-Diff — compare and evaluate LLM outputs across providers")
|
|
12
|
+
.version("0.1.0");
|
|
13
|
+
// ── diff (default) ────────────────────────────────────────────────────────────
|
|
14
|
+
program
|
|
15
|
+
.command("diff [prompt]", { isDefault: true })
|
|
16
|
+
.description("Run a single prompt across providers and diff outputs")
|
|
17
|
+
.argument("[prompt]", "Prompt to send to all providers")
|
|
18
|
+
.option("--file <path>", "Append file contents to the prompt")
|
|
19
|
+
.option("--models <list>", "Comma-separated providers (claude,ollama,minimax)", "claude,ollama")
|
|
20
|
+
.option("--runs <n>", "Number of runs for averaging", "1")
|
|
21
|
+
.option("--output <format>", "Output format: pretty or json", "pretty")
|
|
22
|
+
.action((promptArg, opts) => {
|
|
23
|
+
if (!promptArg && !opts.file) {
|
|
24
|
+
process.stderr.write("error: provide a prompt argument or --file\n");
|
|
25
|
+
process.exit(1);
|
|
26
|
+
}
|
|
27
|
+
let prompt = promptArg ?? "";
|
|
28
|
+
if (opts.file) {
|
|
29
|
+
const fileContent = readFileSync(opts.file, "utf-8");
|
|
30
|
+
prompt = prompt
|
|
31
|
+
? `${prompt}\n\n\`\`\`\n${fileContent}\n\`\`\``
|
|
32
|
+
: fileContent;
|
|
33
|
+
}
|
|
34
|
+
const providers = opts.models.split(",").map((m) => m.trim());
|
|
35
|
+
const config = {
|
|
36
|
+
claude: process.env.ANTHROPIC_API_KEY
|
|
37
|
+
? { apiKey: process.env.ANTHROPIC_API_KEY }
|
|
38
|
+
: undefined,
|
|
39
|
+
ollama: { baseUrl: process.env.OLLAMA_BASE_URL ?? "http://localhost:11434" },
|
|
40
|
+
minimax: process.env.MINIMAX_API_KEY && process.env.MINIMAX_GROUP_ID
|
|
41
|
+
? { apiKey: process.env.MINIMAX_API_KEY, groupId: process.env.MINIMAX_GROUP_ID }
|
|
42
|
+
: undefined,
|
|
43
|
+
};
|
|
44
|
+
render(React.createElement(App, {
|
|
45
|
+
prompt,
|
|
46
|
+
providers,
|
|
47
|
+
config,
|
|
48
|
+
runs: Math.max(1, parseInt(opts.runs, 10)),
|
|
49
|
+
outputFormat: opts.output === "json" ? "json" : "pretty",
|
|
50
|
+
}));
|
|
51
|
+
});
|
|
52
|
+
// ── run (test suite) ──────────────────────────────────────────────────────────
|
|
53
|
+
program
|
|
54
|
+
.command("run")
|
|
55
|
+
.description("Run a YAML test suite and evaluate outputs with assertions (see examples/*.yaml)")
|
|
56
|
+
.requiredOption("--config <path>", "Path to suite YAML file (e.g. examples/prompt-diff.yaml)")
|
|
57
|
+
.option("--models <list>", "Comma-separated providers (claude, ollama, minimax — env vars as for diff)", "claude,ollama")
|
|
58
|
+
.option("--output <format>", "Output format: pretty or json", "pretty")
|
|
59
|
+
.option("--verbose", "Show per-case assertion details and full prompts", false)
|
|
60
|
+
.option("--fail-on-error", "Exit with code 1 if any provider result fails", false)
|
|
61
|
+
.option("--judge <name>", "Provider for llm-rubric: auto (Claude if ANTHROPIC_API_KEY), claude, ollama, none", "auto")
|
|
62
|
+
.action((opts) => {
|
|
63
|
+
const j = opts.judge.toLowerCase();
|
|
64
|
+
const allowed = new Set(["auto", "claude", "ollama", "none"]);
|
|
65
|
+
if (!allowed.has(j)) {
|
|
66
|
+
process.stderr.write("error: --judge must be one of: auto, claude, ollama, none\n");
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
render(React.createElement(RunCommand, {
|
|
70
|
+
configPath: opts.config,
|
|
71
|
+
models: opts.models,
|
|
72
|
+
output: opts.output === "json" ? "json" : "pretty",
|
|
73
|
+
verbose: opts.verbose,
|
|
74
|
+
failOnError: opts.failOnError,
|
|
75
|
+
judge: j,
|
|
76
|
+
}));
|
|
77
|
+
});
|
|
78
|
+
program.parse();
|
|
79
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,GAAG,EAAE,MAAM,UAAU,CAAC;AAC/B,OAAO,EAAE,UAAU,EAAoB,MAAM,kBAAkB,CAAC;AAGhE,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,aAAa,CAAC;KACnB,WAAW,CAAC,iEAAiE,CAAC;KAC9E,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,iFAAiF;AAEjF,OAAO;KACJ,OAAO,CAAC,eAAe,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC;KAC7C,WAAW,CAAC,uDAAuD,CAAC;KACpE,QAAQ,CAAC,UAAU,EAAE,iCAAiC,CAAC;KACvD,MAAM,CAAC,eAAe,EAAE,oCAAoC,CAAC;KAC7D,MAAM,CACL,iBAAiB,EACjB,mDAAmD,EACnD,eAAe,CAChB;KACA,MAAM,CAAC,YAAY,EAAE,8BAA8B,EAAE,GAAG,CAAC;KACzD,MAAM,CAAC,mBAAmB,EAAE,+BAA+B,EAAE,QAAQ,CAAC;KACtE,MAAM,CACL,CACE,SAA6B,EAC7B,IAAqE,EACrE,EAAE;IACF,IAAI,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC7B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,IAAI,MAAM,GAAG,SAAS,IAAI,EAAE,CAAC;IAC7B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QACrD,MAAM,GAAG,MAAM;YACb,CAAC,CAAC,GAAG,MAAM,eAAe,WAAW,UAAU;YAC/C,CAAC,CAAC,WAAW,CAAC;IAClB,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAmB,CAAC;IAChF,MAAM,MAAM,GAAmB;QAC7B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;YACnC,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,EAAE;YAC3C,CAAC,CAAC,SAAS;QACb,MAAM,EAAE,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,EAAE;QAC5E,OAAO,EACL,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB;YACzD,CAAC,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,OAAO,EAAE,OAAO,CAAC,GAAG,CAAC,gBAAgB,EAAE;YAChF,CAAC,CAAC,SAAS;KAChB,CAAC;IAEF,MAAM,CACJ,KAAK,CAAC,aAAa,CAAC,GAAG,EAAE;QACvB,MAAM;QACN,SAAS;QACT,MAAM;QACN,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC1C,YAAY,EAAE,IAAI,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ;KACzD,CAAC,CACH,CAAC;AACJ,CAAC,CACF,CAAC;AAEJ,iFAAiF;AAEjF,OAAO;KACJ,OAAO,CAAC,KAAK,CAAC;KACd,WAAW,CACV,kFAAkF,CACnF;KACA,cAAc,CAAC,iBAAiB,EAAE,0DAA0D,CAAC;KAC7F,MAAM,CACL,iBAAiB,EACjB,4EAA4E,EAC5E,eAAe,CAChB;KACA,MAAM,CAAC,mBAAmB,EAAE,+BAA+B,EAAE,QAAQ,CAAC;KACtE,MAAM,CAAC,WAAW,EAAE,kDAAkD,EAAE,KAAK,CAAC;KAC9E,MAAM,CAAC,iBAAiB,EAAE,+CAA+C,EAAE,KAAK,CAAC;KACjF,MAAM,CACL,gBAAgB,EAChB,mFAAmF,EACnF,MAAM,CACP;KACA,MAAM,CACL,CAAC,IAOA,EAAE,EAAE;IACH,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC;IAC9D,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,6DAA6D,CAAC,CAAC;QACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,CACJ,KAAK,CAAC,aAAa,CAAC,UAAU,EAAE;QAC9B,UAAU,EAAE,IAAI,CAAC,MAAM;QACvB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,MAAM,EAAE,IAAI,CAAC,MAAM,KAAK,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ;QAClD,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,WAAW,EAAE,IAAI,CAAC,WAAW;QAC7B,KAAK,EAAE,CAAgB;KACxB,CAAC,CACH,CAAC;AACJ,CAAC,CACF,CAAC;AAEJ,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import React from "react";
|
|
2
|
+
export type JudgeChoice = "auto" | "claude" | "ollama" | "none";
|
|
3
|
+
export interface RunCommandProps {
|
|
4
|
+
configPath: string;
|
|
5
|
+
models: string;
|
|
6
|
+
output: "pretty" | "json";
|
|
7
|
+
verbose: boolean;
|
|
8
|
+
failOnError: boolean;
|
|
9
|
+
judge: JudgeChoice;
|
|
10
|
+
}
|
|
11
|
+
export declare function RunCommand({ configPath, models, output, verbose, failOnError, judge, }: RunCommandProps): React.JSX.Element | null;
|
|
12
|
+
//# sourceMappingURL=run-command.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-command.d.ts","sourceRoot":"","sources":["../src/run-command.tsx"],"names":[],"mappings":"AAAA,OAAO,KAA8B,MAAM,OAAO,CAAC;AAcnD,MAAM,MAAM,WAAW,GAAG,MAAM,GAAG,QAAQ,GAAG,QAAQ,GAAG,MAAM,CAAC;AAEhE,MAAM,WAAW,eAAe;IAC9B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,QAAQ,GAAG,MAAM,CAAC;IAC1B,OAAO,EAAE,OAAO,CAAC;IACjB,WAAW,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,WAAW,CAAC;CACpB;AAuBD,wBAAgB,UAAU,CAAC,EACzB,UAAU,EACV,MAAM,EACN,MAAM,EACN,OAAO,EACP,WAAW,EACX,KAAK,GACN,EAAE,eAAe,4BAiGjB"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import React, { useState, useEffect } from "react";
|
|
2
|
+
import { Box, Text, useApp } from "ink";
|
|
3
|
+
import { readFileSync } from "fs";
|
|
4
|
+
import { parseSuiteConfig, runSuite, ClaudeProvider, OllamaProvider, MinimaxProvider, } from "@prompt-diff/core";
|
|
5
|
+
import { EvalView } from "./components/EvalView.js";
|
|
6
|
+
import { Spinner } from "./components/Spinner.js";
|
|
7
|
+
function buildJudgeProvider(judge) {
|
|
8
|
+
switch (judge) {
|
|
9
|
+
case "none":
|
|
10
|
+
return undefined;
|
|
11
|
+
case "claude": {
|
|
12
|
+
const key = process.env.ANTHROPIC_API_KEY;
|
|
13
|
+
if (!key) {
|
|
14
|
+
process.stderr.write("warn: --judge claude requires ANTHROPIC_API_KEY\n");
|
|
15
|
+
return undefined;
|
|
16
|
+
}
|
|
17
|
+
return new ClaudeProvider(key);
|
|
18
|
+
}
|
|
19
|
+
case "ollama":
|
|
20
|
+
return new OllamaProvider(process.env.OLLAMA_BASE_URL ?? "http://localhost:11434");
|
|
21
|
+
case "auto":
|
|
22
|
+
return process.env.ANTHROPIC_API_KEY
|
|
23
|
+
? new ClaudeProvider(process.env.ANTHROPIC_API_KEY)
|
|
24
|
+
: undefined;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
export function RunCommand({ configPath, models, output, verbose, failOnError, judge, }) {
|
|
28
|
+
const { exit } = useApp();
|
|
29
|
+
const [loading, setLoading] = useState(true);
|
|
30
|
+
const [progress, setProgress] = useState({ done: 0, total: 0 });
|
|
31
|
+
const [result, setResult] = useState(null);
|
|
32
|
+
const [error, setError] = useState(null);
|
|
33
|
+
useEffect(() => {
|
|
34
|
+
const run = async () => {
|
|
35
|
+
try {
|
|
36
|
+
// Load config
|
|
37
|
+
const yaml = readFileSync(configPath, "utf-8");
|
|
38
|
+
const config = parseSuiteConfig(yaml);
|
|
39
|
+
// Build providers from --models flag using env vars
|
|
40
|
+
const providerNames = models.split(",").map((m) => m.trim());
|
|
41
|
+
const providers = providerNames.flatMap((name) => {
|
|
42
|
+
switch (name) {
|
|
43
|
+
case "claude": {
|
|
44
|
+
const key = process.env.ANTHROPIC_API_KEY;
|
|
45
|
+
if (!key) {
|
|
46
|
+
process.stderr.write(`warn: ANTHROPIC_API_KEY not set, skipping claude\n`);
|
|
47
|
+
return [];
|
|
48
|
+
}
|
|
49
|
+
return [new ClaudeProvider(key)];
|
|
50
|
+
}
|
|
51
|
+
case "ollama":
|
|
52
|
+
return [new OllamaProvider(process.env.OLLAMA_BASE_URL ?? "http://localhost:11434")];
|
|
53
|
+
case "minimax": {
|
|
54
|
+
const apiKey = process.env.MINIMAX_API_KEY;
|
|
55
|
+
const groupId = process.env.MINIMAX_GROUP_ID;
|
|
56
|
+
if (!apiKey || !groupId) {
|
|
57
|
+
process.stderr.write("warn: MINIMAX_API_KEY and MINIMAX_GROUP_ID required for minimax — skipping\n");
|
|
58
|
+
return [];
|
|
59
|
+
}
|
|
60
|
+
return [new MinimaxProvider(apiKey, groupId)];
|
|
61
|
+
}
|
|
62
|
+
default:
|
|
63
|
+
process.stderr.write(`warn: unknown provider "${name}" — skipping\n`);
|
|
64
|
+
return [];
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
if (providers.length === 0) {
|
|
68
|
+
throw new Error("No providers available. Check your --models flag and env vars.");
|
|
69
|
+
}
|
|
70
|
+
const judgeProvider = buildJudgeProvider(judge);
|
|
71
|
+
const totalCases = config.prompts.length * config.tests.length;
|
|
72
|
+
setProgress({ done: 0, total: totalCases });
|
|
73
|
+
const suiteResult = await runSuite({
|
|
74
|
+
config,
|
|
75
|
+
providers,
|
|
76
|
+
judgeProvider,
|
|
77
|
+
onCaseComplete: (_, index, total) => {
|
|
78
|
+
setProgress({ done: index + 1, total });
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
setResult(suiteResult);
|
|
82
|
+
if (output === "json") {
|
|
83
|
+
process.stdout.write(JSON.stringify(suiteResult, null, 2) + "\n");
|
|
84
|
+
}
|
|
85
|
+
if (failOnError) {
|
|
86
|
+
const anyFailed = suiteResult.summary.some((s) => s.failed > 0);
|
|
87
|
+
if (anyFailed)
|
|
88
|
+
process.exitCode = 1;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
catch (err) {
|
|
92
|
+
setError(err instanceof Error ? err.message : String(err));
|
|
93
|
+
if (failOnError)
|
|
94
|
+
process.exitCode = 1;
|
|
95
|
+
}
|
|
96
|
+
finally {
|
|
97
|
+
setLoading(false);
|
|
98
|
+
setTimeout(() => exit(), 50);
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
run();
|
|
102
|
+
}, [configPath, models, output, failOnError, judge]);
|
|
103
|
+
if (output === "json")
|
|
104
|
+
return null;
|
|
105
|
+
if (loading) {
|
|
106
|
+
const label = progress.total > 0
|
|
107
|
+
? `Running test ${progress.done + 1}/${progress.total}…`
|
|
108
|
+
: "Loading suite…";
|
|
109
|
+
return React.createElement(Box, null,
|
|
110
|
+
React.createElement(Spinner, { label: label }));
|
|
111
|
+
}
|
|
112
|
+
if (error) {
|
|
113
|
+
return React.createElement(Text, { color: "red" },
|
|
114
|
+
"Error: ",
|
|
115
|
+
error);
|
|
116
|
+
}
|
|
117
|
+
if (!result)
|
|
118
|
+
return null;
|
|
119
|
+
return React.createElement(EvalView, { result: result, verbose: verbose });
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=run-command.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"run-command.js","sourceRoot":"","sources":["../src/run-command.tsx"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,OAAO,CAAC;AACnD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,KAAK,CAAC;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EACL,gBAAgB,EAChB,QAAQ,EACR,cAAc,EACd,cAAc,EACd,eAAe,GAChB,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EAAE,QAAQ,EAAE,MAAM,0BAA0B,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,yBAAyB,CAAC;AAalD,SAAS,kBAAkB,CAAC,KAAkB;IAC5C,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,MAAM;YACT,OAAO,SAAS,CAAC;QACnB,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;YAC1C,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;gBAC1E,OAAO,SAAS,CAAC;YACnB,CAAC;YACD,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC;QACD,KAAK,QAAQ;YACX,OAAO,IAAI,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,CAAC,CAAC;QACrF,KAAK,MAAM;YACT,OAAO,OAAO,CAAC,GAAG,CAAC,iBAAiB;gBAClC,CAAC,CAAC,IAAI,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;gBACnD,CAAC,CAAC,SAAS,CAAC;IAClB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,EACzB,UAAU,EACV,MAAM,EACN,MAAM,EACN,OAAO,EACP,WAAW,EACX,KAAK,GACW;IAChB,MAAM,EAAE,IAAI,EAAE,GAAG,MAAM,EAAE,CAAC;IAC1B,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,CAAC,QAAQ,EAAE,WAAW,CAAC,GAAG,QAAQ,CAAkC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC,CAAC;IACjG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,QAAQ,CAAqB,IAAI,CAAC,CAAC;IAC/D,MAAM,CAAC,KAAK,EAAE,QAAQ,CAAC,GAAG,QAAQ,CAAgB,IAAI,CAAC,CAAC;IAExD,SAAS,CAAC,GAAG,EAAE;QACb,MAAM,GAAG,GAAG,KAAK,IAAI,EAAE;YACrB,IAAI,CAAC;gBACH,cAAc;gBACd,MAAM,IAAI,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;gBAC/C,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAEtC,oDAAoD;gBACpD,MAAM,aAAa,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBAC7D,MAAM,SAAS,GAAe,aAAa,CAAC,OAAO,CAAC,CAAC,IAAI,EAAc,EAAE;oBACvE,QAAQ,IAAI,EAAE,CAAC;wBACb,KAAK,QAAQ,CAAC,CAAC,CAAC;4BACd,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC;4BAC1C,IAAI,CAAC,GAAG,EAAE,CAAC;gCAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,oDAAoD,CAAC,CAAC;gCAAC,OAAO,EAAE,CAAC;4BAAC,CAAC;4BACpG,OAAO,CAAC,IAAI,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC;wBACnC,CAAC;wBACD,KAAK,QAAQ;4BACX,OAAO,CAAC,IAAI,cAAc,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,wBAAwB,CAAC,CAAC,CAAC;wBACvF,KAAK,SAAS,CAAC,CAAC,CAAC;4BACf,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;4BAC3C,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC;4BAC7C,IAAI,CAAC,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gCACxB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,8EAA8E,CAC/E,CAAC;gCACF,OAAO,EAAE,CAAC;4BACZ,CAAC;4BACD,OAAO,CAAC,IAAI,eAAe,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC;wBAChD,CAAC;wBACD;4BACE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,IAAI,gBAAgB,CAAC,CAAC;4BACtE,OAAO,EAAE,CAAC;oBACd,CAAC;gBACH,CAAC,CAAC,CAAC;gBAEH,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBAC3B,MAAM,IAAI,KAAK,CAAC,gEAAgE,CAAC,CAAC;gBACpF,CAAC;gBAED,MAAM,aAAa,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;gBAEhD,MAAM,UAAU,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;gBAC/D,WAAW,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;gBAE5C,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC;oBACjC,MAAM;oBACN,SAAS;oBACT,aAAa;oBACb,cAAc,EAAE,CAAC,CAAiB,EAAE,KAAa,EAAE,KAAa,EAAE,EAAE;wBAClE,WAAW,CAAC,EAAE,IAAI,EAAE,KAAK,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC1C,CAAC;iBACF,CAAC,CAAC;gBAEH,SAAS,CAAC,WAAW,CAAC,CAAC;gBAEvB,IAAI,MAAM,KAAK,MAAM,EAAE,CAAC;oBACtB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;gBACpE,CAAC;gBAED,IAAI,WAAW,EAAE,CAAC;oBAChB,MAAM,SAAS,GAAG,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAChE,IAAI,SAAS;wBAAE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;gBACtC,CAAC;YACH,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,QAAQ,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC3D,IAAI,WAAW;oBAAE,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;YACxC,CAAC;oBAAS,CAAC;gBACT,UAAU,CAAC,KAAK,CAAC,CAAC;gBAClB,UAAU,CAAC,GAAG,EAAE,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC,CAAC;QACF,GAAG,EAAE,CAAC;IACR,CAAC,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC;IAErD,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,IAAI,CAAC;IAEnC,IAAI,OAAO,EAAE,CAAC;QACZ,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,GAAG,CAAC;YAC9B,CAAC,CAAC,gBAAgB,QAAQ,CAAC,IAAI,GAAG,CAAC,IAAI,QAAQ,CAAC,KAAK,GAAG;YACxD,CAAC,CAAC,gBAAgB,CAAC;QACrB,OAAO,oBAAC,GAAG;YAAC,oBAAC,OAAO,IAAC,KAAK,EAAE,KAAK,GAAI,CAAM,CAAC;IAC9C,CAAC;IAED,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,oBAAC,IAAI,IAAC,KAAK,EAAC,KAAK;;YAAS,KAAK,CAAQ,CAAC;IACjD,CAAC;IAED,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,oBAAC,QAAQ,IAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,GAAI,CAAC;AACxD,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@prompt-diff/cli",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"repository": {
|
|
5
|
+
"type": "git",
|
|
6
|
+
"url": "https://github.com/darkrishabh/prompt-diff.git",
|
|
7
|
+
"directory": "packages/cli"
|
|
8
|
+
},
|
|
9
|
+
"publishConfig": {
|
|
10
|
+
"access": "public"
|
|
11
|
+
},
|
|
12
|
+
"type": "module",
|
|
13
|
+
"files": ["dist"],
|
|
14
|
+
"bin": {
|
|
15
|
+
"prompt-diff": "./dist/index.js"
|
|
16
|
+
},
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsc -p tsconfig.json",
|
|
19
|
+
"dev": "tsc -p tsconfig.json --watch",
|
|
20
|
+
"type-check": "tsc --noEmit",
|
|
21
|
+
"clean": "rm -rf dist"
|
|
22
|
+
},
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"@prompt-diff/core": "^0.1.0",
|
|
25
|
+
"commander": "^12.1.0",
|
|
26
|
+
"ink": "^5.0.1",
|
|
27
|
+
"react": "^18.3.1"
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/react": "^18.3.12",
|
|
31
|
+
"typescript": "^5.7.3"
|
|
32
|
+
}
|
|
33
|
+
}
|