@darkrishabh/bench-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +333 -0
  2. package/dist/cli/app.d.ts +11 -0
  3. package/dist/cli/app.d.ts.map +1 -0
  4. package/dist/cli/app.js +48 -0
  5. package/dist/cli/app.js.map +1 -0
  6. package/dist/cli/components/DiffView.d.ts +5 -0
  7. package/dist/cli/components/DiffView.d.ts.map +1 -0
  8. package/dist/cli/components/DiffView.js +14 -0
  9. package/dist/cli/components/DiffView.js.map +1 -0
  10. package/dist/cli/components/EvalView.d.ts +6 -0
  11. package/dist/cli/components/EvalView.d.ts.map +1 -0
  12. package/dist/cli/components/EvalView.js +82 -0
  13. package/dist/cli/components/EvalView.js.map +1 -0
  14. package/dist/cli/components/Spinner.d.ts +4 -0
  15. package/dist/cli/components/Spinner.d.ts.map +1 -0
  16. package/dist/cli/components/Spinner.js +15 -0
  17. package/dist/cli/components/Spinner.js.map +1 -0
  18. package/dist/cli/index.d.ts +3 -0
  19. package/dist/cli/index.d.ts.map +1 -0
  20. package/dist/cli/index.js +117 -0
  21. package/dist/cli/index.js.map +1 -0
  22. package/dist/cli/run-command.d.ts +11 -0
  23. package/dist/cli/run-command.d.ts.map +1 -0
  24. package/dist/cli/run-command.js +119 -0
  25. package/dist/cli/run-command.js.map +1 -0
  26. package/dist/engine/cost.d.ts +3 -0
  27. package/dist/engine/cost.d.ts.map +1 -0
  28. package/dist/engine/cost.js +52 -0
  29. package/dist/engine/cost.js.map +1 -0
  30. package/dist/engine/diff.d.ts +6 -0
  31. package/dist/engine/diff.d.ts.map +1 -0
  32. package/dist/engine/diff.js +43 -0
  33. package/dist/engine/diff.js.map +1 -0
  34. package/dist/engine/eval.d.ts +14 -0
  35. package/dist/engine/eval.d.ts.map +1 -0
  36. package/dist/engine/eval.js +194 -0
  37. package/dist/engine/eval.js.map +1 -0
  38. package/dist/engine/index.d.ts +15 -0
  39. package/dist/engine/index.d.ts.map +1 -0
  40. package/dist/engine/index.js +10 -0
  41. package/dist/engine/index.js.map +1 -0
  42. package/dist/engine/providers/base.d.ts +7 -0
  43. package/dist/engine/providers/base.d.ts.map +1 -0
  44. package/dist/engine/providers/base.js +2 -0
  45. package/dist/engine/providers/base.js.map +1 -0
  46. package/dist/engine/providers/claude.d.ts +15 -0
  47. package/dist/engine/providers/claude.d.ts.map +1 -0
  48. package/dist/engine/providers/claude.js +53 -0
  49. package/dist/engine/providers/claude.js.map +1 -0
  50. package/dist/engine/providers/minimax.d.ts +16 -0
  51. package/dist/engine/providers/minimax.d.ts.map +1 -0
  52. package/dist/engine/providers/minimax.js +67 -0
  53. package/dist/engine/providers/minimax.js.map +1 -0
  54. package/dist/engine/providers/ollama.d.ts +14 -0
  55. package/dist/engine/providers/ollama.d.ts.map +1 -0
  56. package/dist/engine/providers/ollama.js +60 -0
  57. package/dist/engine/providers/ollama.js.map +1 -0
  58. package/dist/engine/providers/openai-compatible.d.ts +19 -0
  59. package/dist/engine/providers/openai-compatible.d.ts.map +1 -0
  60. package/dist/engine/providers/openai-compatible.js +109 -0
  61. package/dist/engine/providers/openai-compatible.js.map +1 -0
  62. package/dist/engine/providers/subprocess.d.ts +55 -0
  63. package/dist/engine/providers/subprocess.d.ts.map +1 -0
  64. package/dist/engine/providers/subprocess.js +111 -0
  65. package/dist/engine/providers/subprocess.js.map +1 -0
  66. package/dist/engine/suite-loader.d.ts +11 -0
  67. package/dist/engine/suite-loader.d.ts.map +1 -0
  68. package/dist/engine/suite-loader.js +75 -0
  69. package/dist/engine/suite-loader.js.map +1 -0
  70. package/dist/engine/types.d.ts +104 -0
  71. package/dist/engine/types.d.ts.map +1 -0
  72. package/dist/engine/types.js +2 -0
  73. package/dist/engine/types.js.map +1 -0
  74. package/next-env.d.ts +6 -0
  75. package/next.config.ts +26 -0
  76. package/package.json +72 -0
  77. package/public/icon.svg +14 -0
  78. package/src/app/api/diff/route.ts +135 -0
  79. package/src/app/api/models/route.ts +96 -0
  80. package/src/app/api/suite/route.ts +314 -0
  81. package/src/app/globals.css +215 -0
  82. package/src/app/icon.svg +14 -0
  83. package/src/app/layout.tsx +44 -0
  84. package/src/app/opengraph-image.tsx +73 -0
  85. package/src/app/page.tsx +952 -0
  86. package/src/app/suite/layout.tsx +12 -0
  87. package/src/app/suite/page.tsx +206 -0
  88. package/src/app/twitter-image.tsx +1 -0
  89. package/src/components/BenchAiLogo.tsx +38 -0
  90. package/src/components/ComparePanel.tsx +643 -0
  91. package/src/components/ConfigPanel.tsx +809 -0
  92. package/src/components/MarkdownOutput.tsx +16 -0
  93. package/src/components/ModelResponseCard.tsx +313 -0
  94. package/src/components/QuickComparisonBar.tsx +184 -0
  95. package/src/components/ResponsesLineDiff.tsx +149 -0
  96. package/src/components/SettingsPanel.tsx +591 -0
  97. package/src/components/SuitePanel.tsx +875 -0
  98. package/src/lib/brand.ts +4 -0
  99. package/src/lib/config-yaml.ts +70 -0
  100. package/src/lib/consume-suite-sse.ts +70 -0
  101. package/src/lib/describe-judge.ts +23 -0
  102. package/src/lib/model-chip-palette.ts +9 -0
  103. package/src/lib/openai-model-list.ts +33 -0
  104. package/src/lib/provider-ui.ts +30 -0
  105. package/src/lib/resolve-credentials.ts +80 -0
  106. package/src/lib/run-history.ts +66 -0
  107. package/src/lib/simple-line-diff.ts +50 -0
  108. package/src/lib/storage.ts +100 -0
  109. package/src/lib/suite-judge-meta.ts +13 -0
  110. package/src/lib/suite-run-history.ts +81 -0
  111. package/src/types.ts +170 -0
  112. package/vercel.json +5 -0
@@ -0,0 +1,12 @@
1
+ import type { Metadata } from "next";
2
+ import type { ReactNode } from "react";
3
+ import { BRAND_NAME, BRAND_SUITE_SUBTITLE } from "../../lib/brand";
4
+
5
+ export const metadata: Metadata = {
6
+ title: `Test suites · ${BRAND_NAME}`,
7
+ description: `${BRAND_SUITE_SUBTITLE} — judge-backed rubrics and assertions.`,
8
+ };
9
+
10
+ export default function SuiteLayout({ children }: { children: ReactNode }) {
11
+ return children;
12
+ }
@@ -0,0 +1,206 @@
1
+ "use client";
2
+
3
+ import Link from "next/link";
4
+ import { useState, useEffect } from "react";
5
+ import type { JudgeSettings, LLMInstance, SecretsMap } from "../../types";
6
+ import { DEFAULT_JUDGE_SETTINGS } from "../../types";
7
+ import {
8
+ loadInstances,
9
+ saveInstances,
10
+ loadSecrets,
11
+ saveSecrets,
12
+ loadJudgeSettings,
13
+ saveJudgeSettings,
14
+ } from "../../lib/storage";
15
+ import { SettingsPanel } from "../../components/SettingsPanel";
16
+ import { BRAND_NAME, BRAND_SUITE_SUBTITLE } from "../../lib/brand";
17
+ import { BenchAiLogo } from "../../components/BenchAiLogo";
18
+ import { SuitePanel } from "../../components/SuitePanel";
19
+
20
+ export default function SuitePage() {
21
+ const [instances, setInstances] = useState<LLMInstance[]>([]);
22
+ const [secrets, setSecrets] = useState<SecretsMap>({});
23
+ const [judge, setJudge] = useState<JudgeSettings>(DEFAULT_JUDGE_SETTINGS);
24
+ const [configOpen, setConfigOpen] = useState(false);
25
+
26
+ useEffect(() => {
27
+ setInstances(loadInstances());
28
+ setSecrets(loadSecrets());
29
+ setJudge(loadJudgeSettings());
30
+ }, []);
31
+
32
+ const updateInstances = (next: LLMInstance[]) => {
33
+ setInstances(next);
34
+ saveInstances(next);
35
+ };
36
+
37
+ const updateSecrets = (next: SecretsMap) => {
38
+ setSecrets(next);
39
+ saveSecrets(next);
40
+ };
41
+
42
+ const updateJudge = (next: JudgeSettings) => {
43
+ setJudge(next);
44
+ saveJudgeSettings(next);
45
+ };
46
+
47
+ const enabled = instances.filter((i) => i.enabled);
48
+
49
+ return (
50
+ <div style={{ minHeight: "100vh", background: "var(--bg-gradient)" }}>
51
+ <header
52
+ style={{
53
+ background: "var(--surface)",
54
+ borderBottom: "1px solid var(--border)",
55
+ position: "sticky",
56
+ top: 0,
57
+ zIndex: 30,
58
+ boxShadow: "var(--shadow-xs)",
59
+ }}
60
+ >
61
+ <div
62
+ style={{
63
+ maxWidth: 1120,
64
+ margin: "0 auto",
65
+ padding: "0 1.5rem",
66
+ minHeight: 58,
67
+ display: "flex",
68
+ alignItems: "center",
69
+ justifyContent: "space-between",
70
+ gap: "1rem",
71
+ }}
72
+ >
73
+ <div style={{ display: "flex", flexDirection: "column", gap: "0.1rem", minWidth: 0 }}>
74
+ <div style={{ display: "flex", alignItems: "center", gap: "0.65rem", flexWrap: "wrap" }}>
75
+ <Link
76
+ href="/"
77
+ style={{
78
+ display: "flex",
79
+ alignItems: "center",
80
+ gap: "0.65rem",
81
+ textDecoration: "none",
82
+ color: "var(--text-1)",
83
+ }}
84
+ >
85
+ <BenchAiLogo size={30} />
86
+ <span
87
+ style={{
88
+ fontWeight: 700,
89
+ fontSize: "1.05rem",
90
+ letterSpacing: "-0.03em",
91
+ }}
92
+ >
93
+ {BRAND_NAME}
94
+ </span>
95
+ </Link>
96
+ <span
97
+ style={{
98
+ fontSize: "0.65rem",
99
+ fontWeight: 600,
100
+ color: "var(--text-3)",
101
+ letterSpacing: "0.04em",
102
+ }}
103
+ >
104
+ Test suites
105
+ </span>
106
+ </div>
107
+ <span style={{ fontSize: "0.75rem", color: "var(--text-3)", fontWeight: 500 }}>
108
+ {BRAND_SUITE_SUBTITLE}
109
+ </span>
110
+ </div>
111
+
112
+ <div style={{ display: "flex", alignItems: "center", gap: "0.65rem", flexShrink: 0 }}>
113
+ <Link
114
+ href="/"
115
+ style={{
116
+ fontSize: "0.8125rem",
117
+ fontWeight: 600,
118
+ color: "var(--text-2)",
119
+ textDecoration: "none",
120
+ padding: "0.45rem 0.85rem",
121
+ borderRadius: "var(--r-md)",
122
+ border: "1px solid var(--border)",
123
+ background: "var(--surface)",
124
+ boxShadow: "var(--shadow-xs)",
125
+ transition: "background 0.15s, border-color 0.15s",
126
+ }}
127
+ >
128
+ ← Compare
129
+ </Link>
130
+ <button
131
+ type="button"
132
+ onClick={() => setConfigOpen(true)}
133
+ style={{
134
+ padding: "0.45rem 1rem",
135
+ borderRadius: "var(--r-md)",
136
+ border: "1px solid var(--border)",
137
+ background: "var(--surface)",
138
+ color: "var(--text-1)",
139
+ cursor: "pointer",
140
+ fontSize: "0.8125rem",
141
+ fontWeight: 600,
142
+ display: "flex",
143
+ alignItems: "center",
144
+ gap: "0.5rem",
145
+ fontFamily: "inherit",
146
+ transition: "background 0.15s, border-color 0.15s, box-shadow 0.15s",
147
+ boxShadow: "var(--shadow-xs)",
148
+ flexShrink: 0,
149
+ }}
150
+ >
151
+ Settings
152
+ {enabled.length > 0 && (
153
+ <span
154
+ style={{
155
+ background: "var(--accent)",
156
+ color: "#fff",
157
+ borderRadius: 999,
158
+ minWidth: 22,
159
+ height: 22,
160
+ padding: "0 6px",
161
+ fontSize: "0.7rem",
162
+ fontWeight: 700,
163
+ display: "inline-flex",
164
+ alignItems: "center",
165
+ justifyContent: "center",
166
+ }}
167
+ >
168
+ {enabled.length}
169
+ </span>
170
+ )}
171
+ </button>
172
+ </div>
173
+ </div>
174
+ </header>
175
+
176
+ <main style={{ maxWidth: 1120, margin: "0 auto", padding: "1.75rem 1.5rem 3rem" }}>
177
+ <SuitePanel
178
+ instances={instances}
179
+ secrets={secrets}
180
+ judge={judge}
181
+ onOpenSettings={() => setConfigOpen(true)}
182
+ />
183
+ </main>
184
+
185
+ {configOpen && (
186
+ <SettingsPanel
187
+ open={configOpen}
188
+ onClose={() => setConfigOpen(false)}
189
+ instances={instances}
190
+ onUpdateInstances={updateInstances}
191
+ secrets={secrets}
192
+ onUpdateSecrets={updateSecrets}
193
+ judge={judge}
194
+ onUpdateJudge={updateJudge}
195
+ />
196
+ )}
197
+
198
+ <style>{`
199
+ @keyframes spin {
200
+ from { transform: rotate(0deg); }
201
+ to { transform: rotate(360deg); }
202
+ }
203
+ `}</style>
204
+ </div>
205
+ );
206
+ }
@@ -0,0 +1 @@
1
+ export { default, alt, size, contentType } from "./opengraph-image";
@@ -0,0 +1,38 @@
1
+ "use client";
2
+
3
+ import { useId } from "react";
4
+
5
+ /** App mark: side-by-side panels (compare / diff). Matches `src/app/icon.svg`. */
6
+
7
+ export function BenchAiLogo({ size = 32 }: { size?: number }) {
8
+ const gid = useId().replace(/:/g, "");
9
+ const gradId = `bench-ai-logo-gradient-${gid}`;
10
+ return (
11
+ <svg
12
+ xmlns="http://www.w3.org/2000/svg"
13
+ viewBox="0 0 32 32"
14
+ width={size}
15
+ height={size}
16
+ aria-hidden
17
+ focusable="false"
18
+ >
19
+ <defs>
20
+ <linearGradient
21
+ id={gradId}
22
+ x1="4"
23
+ y1="2"
24
+ x2="28"
25
+ y2="30"
26
+ gradientUnits="userSpaceOnUse"
27
+ >
28
+ <stop stopColor="#1e40af" />
29
+ <stop offset="1" stopColor="#172554" />
30
+ </linearGradient>
31
+ </defs>
32
+ <rect width="32" height="32" rx="8" fill={`url(#${gradId})`} />
33
+ <rect x="7" y="9" width="7" height="14" rx="2" fill="#ffffff" fillOpacity="0.95" />
34
+ <rect x="18" y="9" width="7" height="14" rx="2" fill="#ffffff" fillOpacity="0.78" />
35
+ <rect x="15" y="8" width="2" height="16" rx="1" fill="#ffffff" fillOpacity="0.35" />
36
+ </svg>
37
+ );
38
+ }