@darkrishabh/bench-ai 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +333 -0
  2. package/dist/cli/app.d.ts +11 -0
  3. package/dist/cli/app.d.ts.map +1 -0
  4. package/dist/cli/app.js +48 -0
  5. package/dist/cli/app.js.map +1 -0
  6. package/dist/cli/components/DiffView.d.ts +5 -0
  7. package/dist/cli/components/DiffView.d.ts.map +1 -0
  8. package/dist/cli/components/DiffView.js +14 -0
  9. package/dist/cli/components/DiffView.js.map +1 -0
  10. package/dist/cli/components/EvalView.d.ts +6 -0
  11. package/dist/cli/components/EvalView.d.ts.map +1 -0
  12. package/dist/cli/components/EvalView.js +82 -0
  13. package/dist/cli/components/EvalView.js.map +1 -0
  14. package/dist/cli/components/Spinner.d.ts +4 -0
  15. package/dist/cli/components/Spinner.d.ts.map +1 -0
  16. package/dist/cli/components/Spinner.js +15 -0
  17. package/dist/cli/components/Spinner.js.map +1 -0
  18. package/dist/cli/index.d.ts +3 -0
  19. package/dist/cli/index.d.ts.map +1 -0
  20. package/dist/cli/index.js +117 -0
  21. package/dist/cli/index.js.map +1 -0
  22. package/dist/cli/run-command.d.ts +11 -0
  23. package/dist/cli/run-command.d.ts.map +1 -0
  24. package/dist/cli/run-command.js +119 -0
  25. package/dist/cli/run-command.js.map +1 -0
  26. package/dist/engine/cost.d.ts +3 -0
  27. package/dist/engine/cost.d.ts.map +1 -0
  28. package/dist/engine/cost.js +52 -0
  29. package/dist/engine/cost.js.map +1 -0
  30. package/dist/engine/diff.d.ts +6 -0
  31. package/dist/engine/diff.d.ts.map +1 -0
  32. package/dist/engine/diff.js +43 -0
  33. package/dist/engine/diff.js.map +1 -0
  34. package/dist/engine/eval.d.ts +14 -0
  35. package/dist/engine/eval.d.ts.map +1 -0
  36. package/dist/engine/eval.js +194 -0
  37. package/dist/engine/eval.js.map +1 -0
  38. package/dist/engine/index.d.ts +15 -0
  39. package/dist/engine/index.d.ts.map +1 -0
  40. package/dist/engine/index.js +10 -0
  41. package/dist/engine/index.js.map +1 -0
  42. package/dist/engine/providers/base.d.ts +7 -0
  43. package/dist/engine/providers/base.d.ts.map +1 -0
  44. package/dist/engine/providers/base.js +2 -0
  45. package/dist/engine/providers/base.js.map +1 -0
  46. package/dist/engine/providers/claude.d.ts +15 -0
  47. package/dist/engine/providers/claude.d.ts.map +1 -0
  48. package/dist/engine/providers/claude.js +53 -0
  49. package/dist/engine/providers/claude.js.map +1 -0
  50. package/dist/engine/providers/minimax.d.ts +16 -0
  51. package/dist/engine/providers/minimax.d.ts.map +1 -0
  52. package/dist/engine/providers/minimax.js +67 -0
  53. package/dist/engine/providers/minimax.js.map +1 -0
  54. package/dist/engine/providers/ollama.d.ts +14 -0
  55. package/dist/engine/providers/ollama.d.ts.map +1 -0
  56. package/dist/engine/providers/ollama.js +60 -0
  57. package/dist/engine/providers/ollama.js.map +1 -0
  58. package/dist/engine/providers/openai-compatible.d.ts +19 -0
  59. package/dist/engine/providers/openai-compatible.d.ts.map +1 -0
  60. package/dist/engine/providers/openai-compatible.js +109 -0
  61. package/dist/engine/providers/openai-compatible.js.map +1 -0
  62. package/dist/engine/providers/subprocess.d.ts +55 -0
  63. package/dist/engine/providers/subprocess.d.ts.map +1 -0
  64. package/dist/engine/providers/subprocess.js +111 -0
  65. package/dist/engine/providers/subprocess.js.map +1 -0
  66. package/dist/engine/suite-loader.d.ts +11 -0
  67. package/dist/engine/suite-loader.d.ts.map +1 -0
  68. package/dist/engine/suite-loader.js +75 -0
  69. package/dist/engine/suite-loader.js.map +1 -0
  70. package/dist/engine/types.d.ts +104 -0
  71. package/dist/engine/types.d.ts.map +1 -0
  72. package/dist/engine/types.js +2 -0
  73. package/dist/engine/types.js.map +1 -0
  74. package/next-env.d.ts +6 -0
  75. package/next.config.ts +26 -0
  76. package/package.json +72 -0
  77. package/public/icon.svg +14 -0
  78. package/src/app/api/diff/route.ts +135 -0
  79. package/src/app/api/models/route.ts +96 -0
  80. package/src/app/api/suite/route.ts +314 -0
  81. package/src/app/globals.css +215 -0
  82. package/src/app/icon.svg +14 -0
  83. package/src/app/layout.tsx +44 -0
  84. package/src/app/opengraph-image.tsx +73 -0
  85. package/src/app/page.tsx +952 -0
  86. package/src/app/suite/layout.tsx +12 -0
  87. package/src/app/suite/page.tsx +206 -0
  88. package/src/app/twitter-image.tsx +1 -0
  89. package/src/components/BenchAiLogo.tsx +38 -0
  90. package/src/components/ComparePanel.tsx +643 -0
  91. package/src/components/ConfigPanel.tsx +809 -0
  92. package/src/components/MarkdownOutput.tsx +16 -0
  93. package/src/components/ModelResponseCard.tsx +313 -0
  94. package/src/components/QuickComparisonBar.tsx +184 -0
  95. package/src/components/ResponsesLineDiff.tsx +149 -0
  96. package/src/components/SettingsPanel.tsx +591 -0
  97. package/src/components/SuitePanel.tsx +875 -0
  98. package/src/lib/brand.ts +4 -0
  99. package/src/lib/config-yaml.ts +70 -0
  100. package/src/lib/consume-suite-sse.ts +70 -0
  101. package/src/lib/describe-judge.ts +23 -0
  102. package/src/lib/model-chip-palette.ts +9 -0
  103. package/src/lib/openai-model-list.ts +33 -0
  104. package/src/lib/provider-ui.ts +30 -0
  105. package/src/lib/resolve-credentials.ts +80 -0
  106. package/src/lib/run-history.ts +66 -0
  107. package/src/lib/simple-line-diff.ts +50 -0
  108. package/src/lib/storage.ts +100 -0
  109. package/src/lib/suite-judge-meta.ts +13 -0
  110. package/src/lib/suite-run-history.ts +81 -0
  111. package/src/types.ts +170 -0
  112. package/vercel.json +5 -0
package/src/types.ts ADDED
@@ -0,0 +1,170 @@
1
+ import type { ProviderResult } from "@darkrishabh/bench-ai";
2
+
3
+ // ─── Provider names ───────────────────────────────────────────────────────────
4
+
5
+ /** Native (hand-rolled) providers */
6
+ export type NativeProvider = "claude" | "ollama" | "minimax" | "claude-cli" | "codex";
7
+
8
+ /** OpenAI-compatible presets + catch-all custom */
9
+ export type OAIPreset =
10
+ | "openai"
11
+ | "groq"
12
+ | "openrouter"
13
+ | "nvidia-nim"
14
+ | "together"
15
+ | "perplexity"
16
+ | "custom";
17
+
18
+ export type AnyProvider = NativeProvider | OAIPreset;
19
+
20
+ // ─── Preset base URLs ─────────────────────────────────────────────────────────
21
+
22
+ export const PRESET_BASE_URLS: Record<OAIPreset, string> = {
23
+ openai: "https://api.openai.com/v1",
24
+ groq: "https://api.groq.com/openai/v1",
25
+ openrouter: "https://openrouter.ai/api/v1",
26
+ "nvidia-nim":"https://integrate.api.nvidia.com/v1",
27
+ together: "https://api.together.xyz/v1",
28
+ perplexity: "https://api.perplexity.ai",
29
+ custom: "",
30
+ };
31
+
32
+ // ─── Per-preset model suggestions ────────────────────────────────────────────
33
+
34
+ export const PRESET_MODELS: Record<AnyProvider, string[]> = {
35
+ claude: ["claude-3-5-haiku-20241022", "claude-3-5-sonnet-20241022", "claude-opus-4-5"],
36
+ ollama: ["llama3.2", "llama3.1", "mistral", "codellama", "phi3", "gemma2", "qwen2.5"],
37
+ minimax: ["abab6.5s-chat", "abab6.5-chat"],
38
+ "claude-cli": [
39
+ "claude-opus-4-6",
40
+ "claude-sonnet-4-6",
41
+ "claude-haiku-4-5-20251001",
42
+ "claude-3-5-sonnet-20241022",
43
+ "claude-3-5-haiku-20241022",
44
+ ],
45
+ codex: ["gpt-4o", "gpt-4o-mini", "o3-mini", "o1-mini", "o1"],
46
+ /** Fallback when /api/models cannot list (no key); with a key, live list loads from OpenAI */
47
+ openai: [
48
+ "gpt-5",
49
+ "gpt-5-mini",
50
+ "gpt-5-nano",
51
+ "gpt-5-chat-latest",
52
+ "gpt-5.4",
53
+ "gpt-4o",
54
+ "gpt-4o-mini",
55
+ "gpt-4-turbo",
56
+ "gpt-4",
57
+ "gpt-3.5-turbo",
58
+ "o1",
59
+ "o1-mini",
60
+ "o3-mini",
61
+ "o3",
62
+ "o4-mini",
63
+ ],
64
+ groq: ["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "mixtral-8x7b-32768", "gemma2-9b-it"],
65
+ openrouter: [
66
+ "openai/gpt-4o",
67
+ "anthropic/claude-3.5-sonnet",
68
+ "google/gemini-flash-1.5",
69
+ "mistralai/mistral-7b-instruct",
70
+ "meta-llama/llama-3.1-70b-instruct",
71
+ "deepseek/deepseek-chat",
72
+ ],
73
+ "nvidia-nim": [
74
+ "meta/llama-3.1-405b-instruct",
75
+ "nvidia/llama-3.1-nemotron-70b-instruct",
76
+ "meta/llama-3.1-70b-instruct",
77
+ "mistralai/mistral-large-2-instruct",
78
+ ],
79
+ together: [
80
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
81
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
82
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
83
+ "google/gemma-2-27b-it",
84
+ ],
85
+ perplexity: [
86
+ "llama-3.1-sonar-large-128k-online",
87
+ "llama-3.1-sonar-small-128k-online",
88
+ ],
89
+ custom: [],
90
+ };
91
+
92
+ // ─── LLM instance ─────────────────────────────────────────────────────────────
93
+
94
+ export interface LLMInstance {
95
+ id: string;
96
+ provider: AnyProvider;
97
+ model: string;
98
+ enabled: boolean;
99
+ // Credentials & endpoints
100
+ apiKey?: string;
101
+ /** When set, API key is read from Settings → Secrets under this variable name (falls back to inline apiKey if empty). */
102
+ apiKeySecretRef?: string;
103
+ baseUrl?: string;
104
+ groupId?: string; // Minimax only
105
+ /** Minimax Group ID from Secrets when set. */
106
+ groupIdSecretRef?: string;
107
+ // Generation params
108
+ maxTokens?: number;
109
+ temperature?: number;
110
+ }
111
+
112
+ // ─── Settings (secrets, judge, YAML export) ─────────────────────────────────
113
+
114
+ /** Variable name → secret value (stored in localStorage). */
115
+ export type SecretsMap = Record<string, string>;
116
+
117
+ export type JudgeMode = "auto" | "claude" | "ollama" | "none";
118
+
119
+ export interface JudgeSettings {
120
+ mode: JudgeMode;
121
+ /** Secrets variable for Anthropic key when using Claude judge */
122
+ anthropicSecretRef: string;
123
+ claudeModel: string;
124
+ ollamaBaseUrl: string;
125
+ ollamaModel: string;
126
+ }
127
+
128
+ export const DEFAULT_JUDGE_SETTINGS: JudgeSettings = {
129
+ mode: "auto",
130
+ anthropicSecretRef: "anthropic",
131
+ claudeModel: "claude-3-5-haiku-20241022",
132
+ ollamaBaseUrl: "http://localhost:11434",
133
+ ollamaModel: "llama3.2",
134
+ };
135
+
136
+ /** Suggested secret variable names for quick-add in Settings. */
137
+ export const SUGGESTED_SECRET_KEYS: { key: string; label: string }[] = [
138
+ { key: "anthropic", label: "Anthropic" },
139
+ { key: "openai", label: "OpenAI" },
140
+ { key: "groq", label: "Groq" },
141
+ { key: "openrouter", label: "OpenRouter" },
142
+ { key: "together", label: "Together" },
143
+ { key: "perplexity", label: "Perplexity" },
144
+ { key: "nvidia_nim", label: "NVIDIA NIM" },
145
+ { key: "minimax_api", label: "Minimax API" },
146
+ { key: "minimax_group", label: "Minimax Group ID" },
147
+ ];
148
+
149
+ export const APP_CONFIG_VERSION = 1;
150
+
151
+ /** Shape for YAML import/export of app settings (includes secrets — handle carefully). */
152
+ export interface AppConfigYaml {
153
+ version: number;
154
+ secrets?: SecretsMap;
155
+ judge?: Partial<JudgeSettings>;
156
+ instances?: LLMInstance[];
157
+ }
158
+
159
+ // ─── Web result types ─────────────────────────────────────────────────────────
160
+
161
+ export interface WebProviderResult extends ProviderResult {
162
+ instanceId: string;
163
+ label: string;
164
+ }
165
+
166
+ export interface WebDiffResult {
167
+ prompt: string;
168
+ ranAt: string;
169
+ results: WebProviderResult[];
170
+ }
package/vercel.json ADDED
@@ -0,0 +1,5 @@
1
+ {
2
+ "$schema": "https://openapi.vercel.sh/vercel.json",
3
+ "framework": "nextjs",
4
+ "buildCommand": "npm run build"
5
+ }