llm-diff 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/diff.js ADDED
@@ -0,0 +1,148 @@
1
+ /**
2
+ * Core diff engine.
3
+ *
4
+ * Takes two prompts (or files), fires them at the same model,
5
+ * and returns a structured result with token/cost/latency deltas
6
+ * plus a textual diff of the responses.
7
+ */
8
+
9
+ import { readFile } from 'node:fs/promises';
10
+ import { resolve } from 'node:path';
11
+ import { diffWords } from 'diff';
12
+ import { resolveModel, complete, calculateCost } from './providers.js';
13
+
14
+ /**
15
+ * @typedef {object} DiffResult
16
+ * @property {object} a – result from prompt A
17
+ * @property {object} b – result from prompt B
18
+ * @property {object} delta – computed deltas
19
+ * @property {Array} wordDiff – diff-lib change objects
20
+ * @property {object} model – resolved model info
21
+ */
22
+
23
+ /**
24
+ * Run the diff.
25
+ *
26
+ * @param {object} opts
27
+ * @param {string} opts.promptA – text or path to prompt A
28
+ * @param {string} opts.promptB – text or path to prompt B
29
+ * @param {string} opts.model – model name/alias
30
+ * @param {string} [opts.system] – optional system prompt (text or path)
31
+ * @param {string} [opts.baseUrl] – gateway URL override
32
+ * @param {number} [opts.maxTokens]
33
+ * @param {number} [opts.temperature]
34
+ * @param {number} [opts.timeout]
35
+ * @param {number} [opts.runs] – number of runs to average (default 1)
36
+ * @param {boolean}[opts.parallel] – run A and B in parallel (default true)
37
+ * @returns {Promise<DiffResult>}
38
+ */
39
+ export async function runDiff(opts) {
40
+ const model = resolveModel(opts.model);
41
+
42
+ const [promptA, promptB, system] = await Promise.all([
43
+ loadInput(opts.promptA),
44
+ loadInput(opts.promptB),
45
+ opts.system ? loadInput(opts.system) : Promise.resolve(undefined),
46
+ ]);
47
+
48
+ const callOpts = {
49
+ system,
50
+ baseUrl: opts.baseUrl,
51
+ maxTokens: opts.maxTokens,
52
+ temperature: opts.temperature,
53
+ timeout: opts.timeout,
54
+ };
55
+
56
+ const runs = Math.max(1, opts.runs || 1);
57
+ const parallel = opts.parallel !== false;
58
+
59
+ // Collect results across runs
60
+ const aResults = [];
61
+ const bResults = [];
62
+
63
+ for (let i = 0; i < runs; i++) {
64
+ if (parallel) {
65
+ const [a, b] = await Promise.all([
66
+ complete(model, promptA, callOpts),
67
+ complete(model, promptB, callOpts),
68
+ ]);
69
+ aResults.push(a);
70
+ bResults.push(b);
71
+ } else {
72
+ aResults.push(await complete(model, promptA, callOpts));
73
+ bResults.push(await complete(model, promptB, callOpts));
74
+ }
75
+ }
76
+
77
+ // Average the numeric fields, keep last text
78
+ const a = average(aResults);
79
+ const b = average(bResults);
80
+
81
+ // Cost
82
+ a.cost = calculateCost(model, a.inputTokens, a.outputTokens);
83
+ b.cost = calculateCost(model, b.inputTokens, b.outputTokens);
84
+
85
+ // Deltas
86
+ const totalTokensA = a.inputTokens + a.outputTokens;
87
+ const totalTokensB = b.inputTokens + b.outputTokens;
88
+
89
+ const delta = {
90
+ inputTokens: b.inputTokens - a.inputTokens,
91
+ outputTokens: b.outputTokens - a.outputTokens,
92
+ totalTokens: totalTokensB - totalTokensA,
93
+ totalTokensPct: totalTokensA ? ((totalTokensB - totalTokensA) / totalTokensA) * 100 : 0,
94
+ cost: b.cost - a.cost,
95
+ costPct: a.cost ? ((b.cost - a.cost) / a.cost) * 100 : 0,
96
+ latencyMs: b.latencyMs - a.latencyMs,
97
+ latencyPct: a.latencyMs ? ((b.latencyMs - a.latencyMs) / a.latencyMs) * 100 : 0,
98
+ };
99
+
100
+ // Word-level diff of responses
101
+ const wordDiff = diffWords(a.text, b.text);
102
+
103
+ return { a, b, delta, wordDiff, model, runs };
104
+ }
105
+
106
+ // ---------------------------------------------------------------------------
107
+ // Helpers
108
+ // ---------------------------------------------------------------------------
109
+
110
+ /**
111
+ * Load input — if it looks like a file path, read the file; otherwise treat as
112
+ * inline text.
113
+ */
114
+ async function loadInput(input) {
115
+ if (!input) return input;
116
+
117
+ // Heuristic: it's a file if it contains a dot-extension or slash, and is
118
+ // short enough to be a path rather than a prompt.
119
+ const looksLikePath = (
120
+ input.length < 500 &&
121
+ !input.includes('\n') &&
122
+ /\.[a-z0-9]{1,6}$/i.test(input)
123
+ );
124
+
125
+ if (looksLikePath) {
126
+ try {
127
+ return await readFile(resolve(input), 'utf-8');
128
+ } catch {
129
+ // Not a file — treat as literal text
130
+ return input;
131
+ }
132
+ }
133
+ return input;
134
+ }
135
+
136
+ /**
137
+ * Average numeric fields across multiple run results. Keep last text.
138
+ */
139
+ function average(results) {
140
+ if (results.length === 1) return { ...results[0] };
141
+ const n = results.length;
142
+ return {
143
+ text: results[n - 1].text,
144
+ inputTokens: Math.round(results.reduce((s, r) => s + r.inputTokens, 0) / n),
145
+ outputTokens: Math.round(results.reduce((s, r) => s + r.outputTokens, 0) / n),
146
+ latencyMs: results.reduce((s, r) => s + r.latencyMs, 0) / n,
147
+ };
148
+ }
package/src/index.js ADDED
@@ -0,0 +1,10 @@
1
+ /**
2
+ * llm-diff — public API
3
+ *
4
+ * Usage:
5
+ * import { runDiff, resolveModel, listModels } from 'llm-diff';
6
+ */
7
+
8
+ export { runDiff } from './diff.js';
9
+ export { resolveModel, listModels, calculateCost, complete } from './providers.js';
10
+ export { render, renderModelList } from './render.js';
@@ -0,0 +1,356 @@
1
+ /**
2
+ * Provider registry — pricing, API call adapters, token counting.
3
+ *
4
+ * Each provider exports:
5
+ * models – map of model aliases → { id, inputCostPer1k, outputCostPer1k }
6
+ * complete() – (model, prompt, opts) → { text, inputTokens, outputTokens, latencyMs }
7
+ */
8
+
9
+ // ---------------------------------------------------------------------------
10
+ // Pricing tables (USD per 1 000 tokens, as of early 2026)
11
+ // ---------------------------------------------------------------------------
12
+
13
+ const OPENAI_MODELS = {
14
+ 'gpt-4o': { id: 'gpt-4o', inputCostPer1k: 0.0025, outputCostPer1k: 0.01 },
15
+ 'gpt-4o-mini': { id: 'gpt-4o-mini', inputCostPer1k: 0.00015, outputCostPer1k: 0.0006 },
16
+ 'gpt-4-turbo': { id: 'gpt-4-turbo', inputCostPer1k: 0.01, outputCostPer1k: 0.03 },
17
+ 'gpt-4': { id: 'gpt-4', inputCostPer1k: 0.03, outputCostPer1k: 0.06 },
18
+ 'gpt-3.5-turbo':{ id: 'gpt-3.5-turbo',inputCostPer1k: 0.0005, outputCostPer1k: 0.0015 },
19
+ 'o1': { id: 'o1', inputCostPer1k: 0.015, outputCostPer1k: 0.06 },
20
+ 'o1-mini': { id: 'o1-mini', inputCostPer1k: 0.003, outputCostPer1k: 0.012 },
21
+ 'o3-mini': { id: 'o3-mini', inputCostPer1k: 0.0011, outputCostPer1k: 0.0044 },
22
+ };
23
+
24
+ const ANTHROPIC_MODELS = {
25
+ 'claude-sonnet-4-20250514': { id: 'claude-sonnet-4-20250514', inputCostPer1k: 0.003, outputCostPer1k: 0.015 },
26
+ 'claude-3.5-haiku': { id: 'claude-3-5-haiku-20241022', inputCostPer1k: 0.0008, outputCostPer1k: 0.004 },
27
+ 'claude-3-opus': { id: 'claude-3-opus-20240229', inputCostPer1k: 0.015, outputCostPer1k: 0.075 },
28
+ };
29
+
30
+ const GEMINI_MODELS = {
31
+ 'gemini-2.0-flash': { id: 'gemini-2.0-flash', inputCostPer1k: 0.0001, outputCostPer1k: 0.0004 },
32
+ 'gemini-2.0-pro': { id: 'gemini-2.0-pro', inputCostPer1k: 0.00125, outputCostPer1k: 0.005 },
33
+ 'gemini-1.5-pro': { id: 'gemini-1.5-pro', inputCostPer1k: 0.00125, outputCostPer1k: 0.005 },
34
+ 'gemini-1.5-flash': { id: 'gemini-1.5-flash', inputCostPer1k: 0.000075,outputCostPer1k: 0.0003 },
35
+ };
36
+
37
+ const GROQ_MODELS = {
38
+ 'llama-3.3-70b': { id: 'llama-3.3-70b-versatile', inputCostPer1k: 0.00059, outputCostPer1k: 0.00079 },
39
+ 'llama-3.1-8b': { id: 'llama-3.1-8b-instant', inputCostPer1k: 0.00005, outputCostPer1k: 0.00008 },
40
+ 'mixtral-8x7b': { id: 'mixtral-8x7b-32768', inputCostPer1k: 0.00024, outputCostPer1k: 0.00024 },
41
+ 'gemma2-9b': { id: 'gemma2-9b-it', inputCostPer1k: 0.0002, outputCostPer1k: 0.0002 },
42
+ };
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Provider detection
46
+ // ---------------------------------------------------------------------------
47
+
48
+ const ALL_MODELS = {
49
+ ...prefix(OPENAI_MODELS, 'openai'),
50
+ ...prefix(ANTHROPIC_MODELS, 'anthropic'),
51
+ ...prefix(GEMINI_MODELS, 'gemini'),
52
+ ...prefix(GROQ_MODELS, 'groq'),
53
+ };
54
+
55
+ function prefix(map, provider) {
56
+ const out = {};
57
+ for (const [alias, info] of Object.entries(map)) {
58
+ out[alias] = { ...info, provider };
59
+ }
60
+ return out;
61
+ }
62
+
63
+ /**
64
+ * Resolve a model string → { provider, id, inputCostPer1k, outputCostPer1k }.
65
+ * Throws if unknown.
66
+ */
67
+ export function resolveModel(name) {
68
+ const key = name.toLowerCase();
69
+ if (ALL_MODELS[key]) return ALL_MODELS[key];
70
+
71
+ // Allow full provider/model syntax: openai/gpt-4o
72
+ const [prov, ...rest] = key.split('/');
73
+ const modelPart = rest.join('/');
74
+ if (ALL_MODELS[modelPart] && ALL_MODELS[modelPart].provider === prov) {
75
+ return ALL_MODELS[modelPart];
76
+ }
77
+
78
+ throw new Error(
79
+ `Unknown model "${name}". Run \`llm-diff --models\` to see supported models.`
80
+ );
81
+ }
82
+
83
+ /**
84
+ * List every supported model grouped by provider.
85
+ */
86
+ export function listModels() {
87
+ const grouped = {};
88
+ for (const [alias, info] of Object.entries(ALL_MODELS)) {
89
+ if (!grouped[info.provider]) grouped[info.provider] = [];
90
+ grouped[info.provider].push({ alias, ...info });
91
+ }
92
+ return grouped;
93
+ }
94
+
95
+ // ---------------------------------------------------------------------------
96
+ // API adapters
97
+ // ---------------------------------------------------------------------------
98
+
99
+ /**
100
+ * Fire a chat completion and return normalised result.
101
+ *
102
+ * @param {object} model – resolved model from resolveModel()
103
+ * @param {string} prompt – the user message (or full messages JSON)
104
+ * @param {object} opts
105
+ * @param {string} [opts.system] – system message
106
+ * @param {string} [opts.baseUrl] – gateway override
107
+ * @param {number} [opts.maxTokens] – max output tokens (default 2048)
108
+ * @param {number} [opts.temperature] – temperature (default 0)
109
+ * @param {number} [opts.timeout] – request timeout ms (default 60000)
110
+ * @returns {Promise<{text: string, inputTokens: number, outputTokens: number, latencyMs: number}>}
111
+ */
112
+ export async function complete(model, prompt, opts = {}) {
113
+ const { provider } = model;
114
+ switch (provider) {
115
+ case 'openai': return openaiComplete(model, prompt, opts);
116
+ case 'anthropic': return anthropicComplete(model, prompt, opts);
117
+ case 'gemini': return geminiComplete(model, prompt, opts);
118
+ case 'groq': return groqComplete(model, prompt, opts);
119
+ default: throw new Error(`No adapter for provider "${provider}"`);
120
+ }
121
+ }
122
+
123
+ // ---------------------------------------------------------------------------
124
+ // OpenAI-compatible (also covers Groq, llmhut gateway, any OpenAI-compat API)
125
+ // ---------------------------------------------------------------------------
126
+
127
+ async function openaiComplete(model, prompt, opts) {
128
+ const apiKey = env('OPENAI_API_KEY');
129
+ const baseUrl = opts.baseUrl || 'https://api.openai.com/v1';
130
+ return openaiCompatComplete(model, prompt, { ...opts, apiKey, baseUrl });
131
+ }
132
+
133
+ async function groqComplete(model, prompt, opts) {
134
+ const apiKey = env('GROQ_API_KEY');
135
+ const baseUrl = opts.baseUrl || 'https://api.groq.com/openai/v1';
136
+ return openaiCompatComplete(model, prompt, { ...opts, apiKey, baseUrl });
137
+ }
138
+
139
+ async function openaiCompatComplete(model, prompt, opts) {
140
+ const {
141
+ apiKey,
142
+ baseUrl,
143
+ system,
144
+ maxTokens = 2048,
145
+ temperature = 0,
146
+ timeout = 60_000,
147
+ } = opts;
148
+
149
+ const messages = buildMessages(prompt, system);
150
+
151
+ const controller = new AbortController();
152
+ const timer = setTimeout(() => controller.abort(), timeout);
153
+ const t0 = performance.now();
154
+
155
+ try {
156
+ const res = await fetch(`${baseUrl}/chat/completions`, {
157
+ method: 'POST',
158
+ headers: {
159
+ 'Content-Type': 'application/json',
160
+ 'Authorization': `Bearer ${apiKey}`,
161
+ },
162
+ body: JSON.stringify({
163
+ model: model.id,
164
+ messages,
165
+ max_tokens: maxTokens,
166
+ temperature,
167
+ }),
168
+ signal: controller.signal,
169
+ });
170
+
171
+ if (!res.ok) {
172
+ const body = await res.text().catch(() => '');
173
+ throw new Error(`${model.provider} API ${res.status}: ${body.slice(0, 300)}`);
174
+ }
175
+
176
+ const data = await res.json();
177
+ const latencyMs = performance.now() - t0;
178
+
179
+ return {
180
+ text: data.choices?.[0]?.message?.content ?? '',
181
+ inputTokens: data.usage?.prompt_tokens ?? 0,
182
+ outputTokens: data.usage?.completion_tokens ?? 0,
183
+ latencyMs,
184
+ };
185
+ } finally {
186
+ clearTimeout(timer);
187
+ }
188
+ }
189
+
190
+ // ---------------------------------------------------------------------------
191
+ // Anthropic
192
+ // ---------------------------------------------------------------------------
193
+
194
+ async function anthropicComplete(model, prompt, opts) {
195
+ const apiKey = env('ANTHROPIC_API_KEY');
196
+ const baseUrl = opts.baseUrl || 'https://api.anthropic.com';
197
+ const {
198
+ system,
199
+ maxTokens = 2048,
200
+ temperature = 0,
201
+ timeout = 60_000,
202
+ } = opts;
203
+
204
+ const messages = buildMessages(prompt); // Anthropic takes system separately
205
+
206
+ const controller = new AbortController();
207
+ const timer = setTimeout(() => controller.abort(), timeout);
208
+ const t0 = performance.now();
209
+
210
+ try {
211
+ const body = {
212
+ model: model.id,
213
+ messages,
214
+ max_tokens: maxTokens,
215
+ temperature,
216
+ };
217
+ if (system) body.system = system;
218
+
219
+ const res = await fetch(`${baseUrl}/v1/messages`, {
220
+ method: 'POST',
221
+ headers: {
222
+ 'Content-Type': 'application/json',
223
+ 'x-api-key': apiKey,
224
+ 'anthropic-version': '2023-06-01',
225
+ },
226
+ body: JSON.stringify(body),
227
+ signal: controller.signal,
228
+ });
229
+
230
+ if (!res.ok) {
231
+ const text = await res.text().catch(() => '');
232
+ throw new Error(`Anthropic API ${res.status}: ${text.slice(0, 300)}`);
233
+ }
234
+
235
+ const data = await res.json();
236
+ const latencyMs = performance.now() - t0;
237
+
238
+ const text = data.content
239
+ ?.filter(b => b.type === 'text')
240
+ .map(b => b.text)
241
+ .join('') ?? '';
242
+
243
+ return {
244
+ text,
245
+ inputTokens: data.usage?.input_tokens ?? 0,
246
+ outputTokens: data.usage?.output_tokens ?? 0,
247
+ latencyMs,
248
+ };
249
+ } finally {
250
+ clearTimeout(timer);
251
+ }
252
+ }
253
+
254
+ // ---------------------------------------------------------------------------
255
+ // Google Gemini
256
+ // ---------------------------------------------------------------------------
257
+
258
+ async function geminiComplete(model, prompt, opts) {
259
+ const apiKey = env('GEMINI_API_KEY');
260
+ const baseUrl = opts.baseUrl || 'https://generativelanguage.googleapis.com/v1beta';
261
+ const {
262
+ system,
263
+ maxTokens = 2048,
264
+ temperature = 0,
265
+ timeout = 60_000,
266
+ } = opts;
267
+
268
+ const contents = [{ role: 'user', parts: [{ text: prompt }] }];
269
+ const body = {
270
+ contents,
271
+ generationConfig: {
272
+ maxOutputTokens: maxTokens,
273
+ temperature,
274
+ },
275
+ };
276
+ if (system) {
277
+ body.systemInstruction = { parts: [{ text: system }] };
278
+ }
279
+
280
+ const controller = new AbortController();
281
+ const timer = setTimeout(() => controller.abort(), timeout);
282
+ const t0 = performance.now();
283
+
284
+ try {
285
+ const url = `${baseUrl}/models/${model.id}:generateContent?key=${apiKey}`;
286
+ const res = await fetch(url, {
287
+ method: 'POST',
288
+ headers: { 'Content-Type': 'application/json' },
289
+ body: JSON.stringify(body),
290
+ signal: controller.signal,
291
+ });
292
+
293
+ if (!res.ok) {
294
+ const text = await res.text().catch(() => '');
295
+ throw new Error(`Gemini API ${res.status}: ${text.slice(0, 300)}`);
296
+ }
297
+
298
+ const data = await res.json();
299
+ const latencyMs = performance.now() - t0;
300
+
301
+ const text = data.candidates?.[0]?.content?.parts
302
+ ?.map(p => p.text)
303
+ .join('') ?? '';
304
+
305
+ const usage = data.usageMetadata ?? {};
306
+
307
+ return {
308
+ text,
309
+ inputTokens: usage.promptTokenCount ?? 0,
310
+ outputTokens: usage.candidatesTokenCount ?? 0,
311
+ latencyMs,
312
+ };
313
+ } finally {
314
+ clearTimeout(timer);
315
+ }
316
+ }
317
+
318
+ // ---------------------------------------------------------------------------
319
+ // Helpers
320
+ // ---------------------------------------------------------------------------
321
+
322
+ function buildMessages(prompt, system) {
323
+ const msgs = [];
324
+ // If prompt is raw JSON array of messages, use it directly
325
+ if (prompt.trimStart().startsWith('[')) {
326
+ try {
327
+ const parsed = JSON.parse(prompt);
328
+ if (Array.isArray(parsed)) return parsed;
329
+ } catch { /* fall through */ }
330
+ }
331
+ if (system) msgs.push({ role: 'system', content: system });
332
+ msgs.push({ role: 'user', content: prompt });
333
+ return msgs;
334
+ }
335
+
336
+ function env(key) {
337
+ const val = process.env[key];
338
+ if (!val) {
339
+ throw new Error(
340
+ `Missing environment variable ${key}. ` +
341
+ `Set it or pass --base-url to use a gateway that handles auth.`
342
+ );
343
+ }
344
+ return val;
345
+ }
346
+
347
+ // ---------------------------------------------------------------------------
348
+ // Cost calculation
349
+ // ---------------------------------------------------------------------------
350
+
351
+ export function calculateCost(model, inputTokens, outputTokens) {
352
+ return (
353
+ (inputTokens / 1000) * model.inputCostPer1k +
354
+ (outputTokens / 1000) * model.outputCostPer1k
355
+ );
356
+ }