llm-diff 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +189 -0
- package/README.md +186 -0
- package/bin/llm-diff.js +8 -0
- package/package.json +56 -0
- package/src/cli.js +192 -0
- package/src/diff.js +148 -0
- package/src/index.js +10 -0
- package/src/providers.js +356 -0
- package/src/render.js +350 -0
package/src/diff.js
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core diff engine.
|
|
3
|
+
*
|
|
4
|
+
* Takes two prompts (or files), fires them at the same model,
|
|
5
|
+
* and returns a structured result with token/cost/latency deltas
|
|
6
|
+
* plus a textual diff of the responses.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFile } from 'node:fs/promises';
|
|
10
|
+
import { resolve } from 'node:path';
|
|
11
|
+
import { diffWords } from 'diff';
|
|
12
|
+
import { resolveModel, complete, calculateCost } from './providers.js';
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* @typedef {object} DiffResult
|
|
16
|
+
* @property {object} a – result from prompt A
|
|
17
|
+
* @property {object} b – result from prompt B
|
|
18
|
+
* @property {object} delta – computed deltas
|
|
19
|
+
* @property {Array} wordDiff – diff-lib change objects
|
|
20
|
+
* @property {object} model – resolved model info
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Run the diff.
|
|
25
|
+
*
|
|
26
|
+
* @param {object} opts
|
|
27
|
+
* @param {string} opts.promptA – text or path to prompt A
|
|
28
|
+
* @param {string} opts.promptB – text or path to prompt B
|
|
29
|
+
* @param {string} opts.model – model name/alias
|
|
30
|
+
* @param {string} [opts.system] – optional system prompt (text or path)
|
|
31
|
+
* @param {string} [opts.baseUrl] – gateway URL override
|
|
32
|
+
* @param {number} [opts.maxTokens]
|
|
33
|
+
* @param {number} [opts.temperature]
|
|
34
|
+
* @param {number} [opts.timeout]
|
|
35
|
+
* @param {number} [opts.runs] – number of runs to average (default 1)
|
|
36
|
+
* @param {boolean}[opts.parallel] – run A and B in parallel (default true)
|
|
37
|
+
* @returns {Promise<DiffResult>}
|
|
38
|
+
*/
|
|
39
|
+
export async function runDiff(opts) {
|
|
40
|
+
const model = resolveModel(opts.model);
|
|
41
|
+
|
|
42
|
+
const [promptA, promptB, system] = await Promise.all([
|
|
43
|
+
loadInput(opts.promptA),
|
|
44
|
+
loadInput(opts.promptB),
|
|
45
|
+
opts.system ? loadInput(opts.system) : Promise.resolve(undefined),
|
|
46
|
+
]);
|
|
47
|
+
|
|
48
|
+
const callOpts = {
|
|
49
|
+
system,
|
|
50
|
+
baseUrl: opts.baseUrl,
|
|
51
|
+
maxTokens: opts.maxTokens,
|
|
52
|
+
temperature: opts.temperature,
|
|
53
|
+
timeout: opts.timeout,
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
const runs = Math.max(1, opts.runs || 1);
|
|
57
|
+
const parallel = opts.parallel !== false;
|
|
58
|
+
|
|
59
|
+
// Collect results across runs
|
|
60
|
+
const aResults = [];
|
|
61
|
+
const bResults = [];
|
|
62
|
+
|
|
63
|
+
for (let i = 0; i < runs; i++) {
|
|
64
|
+
if (parallel) {
|
|
65
|
+
const [a, b] = await Promise.all([
|
|
66
|
+
complete(model, promptA, callOpts),
|
|
67
|
+
complete(model, promptB, callOpts),
|
|
68
|
+
]);
|
|
69
|
+
aResults.push(a);
|
|
70
|
+
bResults.push(b);
|
|
71
|
+
} else {
|
|
72
|
+
aResults.push(await complete(model, promptA, callOpts));
|
|
73
|
+
bResults.push(await complete(model, promptB, callOpts));
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Average the numeric fields, keep last text
|
|
78
|
+
const a = average(aResults);
|
|
79
|
+
const b = average(bResults);
|
|
80
|
+
|
|
81
|
+
// Cost
|
|
82
|
+
a.cost = calculateCost(model, a.inputTokens, a.outputTokens);
|
|
83
|
+
b.cost = calculateCost(model, b.inputTokens, b.outputTokens);
|
|
84
|
+
|
|
85
|
+
// Deltas
|
|
86
|
+
const totalTokensA = a.inputTokens + a.outputTokens;
|
|
87
|
+
const totalTokensB = b.inputTokens + b.outputTokens;
|
|
88
|
+
|
|
89
|
+
const delta = {
|
|
90
|
+
inputTokens: b.inputTokens - a.inputTokens,
|
|
91
|
+
outputTokens: b.outputTokens - a.outputTokens,
|
|
92
|
+
totalTokens: totalTokensB - totalTokensA,
|
|
93
|
+
totalTokensPct: totalTokensA ? ((totalTokensB - totalTokensA) / totalTokensA) * 100 : 0,
|
|
94
|
+
cost: b.cost - a.cost,
|
|
95
|
+
costPct: a.cost ? ((b.cost - a.cost) / a.cost) * 100 : 0,
|
|
96
|
+
latencyMs: b.latencyMs - a.latencyMs,
|
|
97
|
+
latencyPct: a.latencyMs ? ((b.latencyMs - a.latencyMs) / a.latencyMs) * 100 : 0,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// Word-level diff of responses
|
|
101
|
+
const wordDiff = diffWords(a.text, b.text);
|
|
102
|
+
|
|
103
|
+
return { a, b, delta, wordDiff, model, runs };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
// Helpers
|
|
108
|
+
// ---------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Load input — if it looks like a file path, read the file; otherwise treat as
|
|
112
|
+
* inline text.
|
|
113
|
+
*/
|
|
114
|
+
async function loadInput(input) {
|
|
115
|
+
if (!input) return input;
|
|
116
|
+
|
|
117
|
+
// Heuristic: it's a file if it contains a dot-extension or slash, and is
|
|
118
|
+
// short enough to be a path rather than a prompt.
|
|
119
|
+
const looksLikePath = (
|
|
120
|
+
input.length < 500 &&
|
|
121
|
+
!input.includes('\n') &&
|
|
122
|
+
/\.[a-z0-9]{1,6}$/i.test(input)
|
|
123
|
+
);
|
|
124
|
+
|
|
125
|
+
if (looksLikePath) {
|
|
126
|
+
try {
|
|
127
|
+
return await readFile(resolve(input), 'utf-8');
|
|
128
|
+
} catch {
|
|
129
|
+
// Not a file — treat as literal text
|
|
130
|
+
return input;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return input;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Average numeric fields across multiple run results. Keep last text.
|
|
138
|
+
*/
|
|
139
|
+
function average(results) {
|
|
140
|
+
if (results.length === 1) return { ...results[0] };
|
|
141
|
+
const n = results.length;
|
|
142
|
+
return {
|
|
143
|
+
text: results[n - 1].text,
|
|
144
|
+
inputTokens: Math.round(results.reduce((s, r) => s + r.inputTokens, 0) / n),
|
|
145
|
+
outputTokens: Math.round(results.reduce((s, r) => s + r.outputTokens, 0) / n),
|
|
146
|
+
latencyMs: results.reduce((s, r) => s + r.latencyMs, 0) / n,
|
|
147
|
+
};
|
|
148
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llm-diff — public API
|
|
3
|
+
*
|
|
4
|
+
* Usage:
|
|
5
|
+
* import { runDiff, resolveModel, listModels } from 'llm-diff';
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
export { runDiff } from './diff.js';
|
|
9
|
+
export { resolveModel, listModels, calculateCost, complete } from './providers.js';
|
|
10
|
+
export { render, renderModelList } from './render.js';
|
package/src/providers.js
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider registry — pricing, API call adapters, token counting.
|
|
3
|
+
*
|
|
4
|
+
* Each provider exports:
|
|
5
|
+
* models – map of model aliases → { id, inputCostPer1k, outputCostPer1k }
|
|
6
|
+
* complete() – (model, prompt, opts) → { text, inputTokens, outputTokens, latencyMs }
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// ---------------------------------------------------------------------------
|
|
10
|
+
// Pricing tables (USD per 1 000 tokens, as of early 2026)
|
|
11
|
+
// ---------------------------------------------------------------------------
|
|
12
|
+
|
|
13
|
+
const OPENAI_MODELS = {
|
|
14
|
+
'gpt-4o': { id: 'gpt-4o', inputCostPer1k: 0.0025, outputCostPer1k: 0.01 },
|
|
15
|
+
'gpt-4o-mini': { id: 'gpt-4o-mini', inputCostPer1k: 0.00015, outputCostPer1k: 0.0006 },
|
|
16
|
+
'gpt-4-turbo': { id: 'gpt-4-turbo', inputCostPer1k: 0.01, outputCostPer1k: 0.03 },
|
|
17
|
+
'gpt-4': { id: 'gpt-4', inputCostPer1k: 0.03, outputCostPer1k: 0.06 },
|
|
18
|
+
'gpt-3.5-turbo':{ id: 'gpt-3.5-turbo',inputCostPer1k: 0.0005, outputCostPer1k: 0.0015 },
|
|
19
|
+
'o1': { id: 'o1', inputCostPer1k: 0.015, outputCostPer1k: 0.06 },
|
|
20
|
+
'o1-mini': { id: 'o1-mini', inputCostPer1k: 0.003, outputCostPer1k: 0.012 },
|
|
21
|
+
'o3-mini': { id: 'o3-mini', inputCostPer1k: 0.0011, outputCostPer1k: 0.0044 },
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const ANTHROPIC_MODELS = {
|
|
25
|
+
'claude-sonnet-4-20250514': { id: 'claude-sonnet-4-20250514', inputCostPer1k: 0.003, outputCostPer1k: 0.015 },
|
|
26
|
+
'claude-3.5-haiku': { id: 'claude-3-5-haiku-20241022', inputCostPer1k: 0.0008, outputCostPer1k: 0.004 },
|
|
27
|
+
'claude-3-opus': { id: 'claude-3-opus-20240229', inputCostPer1k: 0.015, outputCostPer1k: 0.075 },
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const GEMINI_MODELS = {
|
|
31
|
+
'gemini-2.0-flash': { id: 'gemini-2.0-flash', inputCostPer1k: 0.0001, outputCostPer1k: 0.0004 },
|
|
32
|
+
'gemini-2.0-pro': { id: 'gemini-2.0-pro', inputCostPer1k: 0.00125, outputCostPer1k: 0.005 },
|
|
33
|
+
'gemini-1.5-pro': { id: 'gemini-1.5-pro', inputCostPer1k: 0.00125, outputCostPer1k: 0.005 },
|
|
34
|
+
'gemini-1.5-flash': { id: 'gemini-1.5-flash', inputCostPer1k: 0.000075,outputCostPer1k: 0.0003 },
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
const GROQ_MODELS = {
|
|
38
|
+
'llama-3.3-70b': { id: 'llama-3.3-70b-versatile', inputCostPer1k: 0.00059, outputCostPer1k: 0.00079 },
|
|
39
|
+
'llama-3.1-8b': { id: 'llama-3.1-8b-instant', inputCostPer1k: 0.00005, outputCostPer1k: 0.00008 },
|
|
40
|
+
'mixtral-8x7b': { id: 'mixtral-8x7b-32768', inputCostPer1k: 0.00024, outputCostPer1k: 0.00024 },
|
|
41
|
+
'gemma2-9b': { id: 'gemma2-9b-it', inputCostPer1k: 0.0002, outputCostPer1k: 0.0002 },
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// Provider detection
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
const ALL_MODELS = {
|
|
49
|
+
...prefix(OPENAI_MODELS, 'openai'),
|
|
50
|
+
...prefix(ANTHROPIC_MODELS, 'anthropic'),
|
|
51
|
+
...prefix(GEMINI_MODELS, 'gemini'),
|
|
52
|
+
...prefix(GROQ_MODELS, 'groq'),
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
function prefix(map, provider) {
|
|
56
|
+
const out = {};
|
|
57
|
+
for (const [alias, info] of Object.entries(map)) {
|
|
58
|
+
out[alias] = { ...info, provider };
|
|
59
|
+
}
|
|
60
|
+
return out;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Resolve a model string → { provider, id, inputCostPer1k, outputCostPer1k }.
|
|
65
|
+
* Throws if unknown.
|
|
66
|
+
*/
|
|
67
|
+
export function resolveModel(name) {
|
|
68
|
+
const key = name.toLowerCase();
|
|
69
|
+
if (ALL_MODELS[key]) return ALL_MODELS[key];
|
|
70
|
+
|
|
71
|
+
// Allow full provider/model syntax: openai/gpt-4o
|
|
72
|
+
const [prov, ...rest] = key.split('/');
|
|
73
|
+
const modelPart = rest.join('/');
|
|
74
|
+
if (ALL_MODELS[modelPart] && ALL_MODELS[modelPart].provider === prov) {
|
|
75
|
+
return ALL_MODELS[modelPart];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
throw new Error(
|
|
79
|
+
`Unknown model "${name}". Run \`llm-diff --models\` to see supported models.`
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* List every supported model grouped by provider.
|
|
85
|
+
*/
|
|
86
|
+
export function listModels() {
|
|
87
|
+
const grouped = {};
|
|
88
|
+
for (const [alias, info] of Object.entries(ALL_MODELS)) {
|
|
89
|
+
if (!grouped[info.provider]) grouped[info.provider] = [];
|
|
90
|
+
grouped[info.provider].push({ alias, ...info });
|
|
91
|
+
}
|
|
92
|
+
return grouped;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ---------------------------------------------------------------------------
|
|
96
|
+
// API adapters
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Fire a chat completion and return normalised result.
|
|
101
|
+
*
|
|
102
|
+
* @param {object} model – resolved model from resolveModel()
|
|
103
|
+
* @param {string} prompt – the user message (or full messages JSON)
|
|
104
|
+
* @param {object} opts
|
|
105
|
+
* @param {string} [opts.system] – system message
|
|
106
|
+
* @param {string} [opts.baseUrl] – gateway override
|
|
107
|
+
* @param {number} [opts.maxTokens] – max output tokens (default 2048)
|
|
108
|
+
* @param {number} [opts.temperature] – temperature (default 0)
|
|
109
|
+
* @param {number} [opts.timeout] – request timeout ms (default 60000)
|
|
110
|
+
* @returns {Promise<{text: string, inputTokens: number, outputTokens: number, latencyMs: number}>}
|
|
111
|
+
*/
|
|
112
|
+
export async function complete(model, prompt, opts = {}) {
|
|
113
|
+
const { provider } = model;
|
|
114
|
+
switch (provider) {
|
|
115
|
+
case 'openai': return openaiComplete(model, prompt, opts);
|
|
116
|
+
case 'anthropic': return anthropicComplete(model, prompt, opts);
|
|
117
|
+
case 'gemini': return geminiComplete(model, prompt, opts);
|
|
118
|
+
case 'groq': return groqComplete(model, prompt, opts);
|
|
119
|
+
default: throw new Error(`No adapter for provider "${provider}"`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ---------------------------------------------------------------------------
|
|
124
|
+
// OpenAI-compatible (also covers Groq, llmhut gateway, any OpenAI-compat API)
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
async function openaiComplete(model, prompt, opts) {
|
|
128
|
+
const apiKey = env('OPENAI_API_KEY');
|
|
129
|
+
const baseUrl = opts.baseUrl || 'https://api.openai.com/v1';
|
|
130
|
+
return openaiCompatComplete(model, prompt, { ...opts, apiKey, baseUrl });
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async function groqComplete(model, prompt, opts) {
|
|
134
|
+
const apiKey = env('GROQ_API_KEY');
|
|
135
|
+
const baseUrl = opts.baseUrl || 'https://api.groq.com/openai/v1';
|
|
136
|
+
return openaiCompatComplete(model, prompt, { ...opts, apiKey, baseUrl });
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
async function openaiCompatComplete(model, prompt, opts) {
|
|
140
|
+
const {
|
|
141
|
+
apiKey,
|
|
142
|
+
baseUrl,
|
|
143
|
+
system,
|
|
144
|
+
maxTokens = 2048,
|
|
145
|
+
temperature = 0,
|
|
146
|
+
timeout = 60_000,
|
|
147
|
+
} = opts;
|
|
148
|
+
|
|
149
|
+
const messages = buildMessages(prompt, system);
|
|
150
|
+
|
|
151
|
+
const controller = new AbortController();
|
|
152
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
153
|
+
const t0 = performance.now();
|
|
154
|
+
|
|
155
|
+
try {
|
|
156
|
+
const res = await fetch(`${baseUrl}/chat/completions`, {
|
|
157
|
+
method: 'POST',
|
|
158
|
+
headers: {
|
|
159
|
+
'Content-Type': 'application/json',
|
|
160
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
161
|
+
},
|
|
162
|
+
body: JSON.stringify({
|
|
163
|
+
model: model.id,
|
|
164
|
+
messages,
|
|
165
|
+
max_tokens: maxTokens,
|
|
166
|
+
temperature,
|
|
167
|
+
}),
|
|
168
|
+
signal: controller.signal,
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
if (!res.ok) {
|
|
172
|
+
const body = await res.text().catch(() => '');
|
|
173
|
+
throw new Error(`${model.provider} API ${res.status}: ${body.slice(0, 300)}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const data = await res.json();
|
|
177
|
+
const latencyMs = performance.now() - t0;
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
text: data.choices?.[0]?.message?.content ?? '',
|
|
181
|
+
inputTokens: data.usage?.prompt_tokens ?? 0,
|
|
182
|
+
outputTokens: data.usage?.completion_tokens ?? 0,
|
|
183
|
+
latencyMs,
|
|
184
|
+
};
|
|
185
|
+
} finally {
|
|
186
|
+
clearTimeout(timer);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// ---------------------------------------------------------------------------
|
|
191
|
+
// Anthropic
|
|
192
|
+
// ---------------------------------------------------------------------------
|
|
193
|
+
|
|
194
|
+
async function anthropicComplete(model, prompt, opts) {
|
|
195
|
+
const apiKey = env('ANTHROPIC_API_KEY');
|
|
196
|
+
const baseUrl = opts.baseUrl || 'https://api.anthropic.com';
|
|
197
|
+
const {
|
|
198
|
+
system,
|
|
199
|
+
maxTokens = 2048,
|
|
200
|
+
temperature = 0,
|
|
201
|
+
timeout = 60_000,
|
|
202
|
+
} = opts;
|
|
203
|
+
|
|
204
|
+
const messages = buildMessages(prompt); // Anthropic takes system separately
|
|
205
|
+
|
|
206
|
+
const controller = new AbortController();
|
|
207
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
208
|
+
const t0 = performance.now();
|
|
209
|
+
|
|
210
|
+
try {
|
|
211
|
+
const body = {
|
|
212
|
+
model: model.id,
|
|
213
|
+
messages,
|
|
214
|
+
max_tokens: maxTokens,
|
|
215
|
+
temperature,
|
|
216
|
+
};
|
|
217
|
+
if (system) body.system = system;
|
|
218
|
+
|
|
219
|
+
const res = await fetch(`${baseUrl}/v1/messages`, {
|
|
220
|
+
method: 'POST',
|
|
221
|
+
headers: {
|
|
222
|
+
'Content-Type': 'application/json',
|
|
223
|
+
'x-api-key': apiKey,
|
|
224
|
+
'anthropic-version': '2023-06-01',
|
|
225
|
+
},
|
|
226
|
+
body: JSON.stringify(body),
|
|
227
|
+
signal: controller.signal,
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
if (!res.ok) {
|
|
231
|
+
const text = await res.text().catch(() => '');
|
|
232
|
+
throw new Error(`Anthropic API ${res.status}: ${text.slice(0, 300)}`);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
const data = await res.json();
|
|
236
|
+
const latencyMs = performance.now() - t0;
|
|
237
|
+
|
|
238
|
+
const text = data.content
|
|
239
|
+
?.filter(b => b.type === 'text')
|
|
240
|
+
.map(b => b.text)
|
|
241
|
+
.join('') ?? '';
|
|
242
|
+
|
|
243
|
+
return {
|
|
244
|
+
text,
|
|
245
|
+
inputTokens: data.usage?.input_tokens ?? 0,
|
|
246
|
+
outputTokens: data.usage?.output_tokens ?? 0,
|
|
247
|
+
latencyMs,
|
|
248
|
+
};
|
|
249
|
+
} finally {
|
|
250
|
+
clearTimeout(timer);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// ---------------------------------------------------------------------------
|
|
255
|
+
// Google Gemini
|
|
256
|
+
// ---------------------------------------------------------------------------
|
|
257
|
+
|
|
258
|
+
async function geminiComplete(model, prompt, opts) {
|
|
259
|
+
const apiKey = env('GEMINI_API_KEY');
|
|
260
|
+
const baseUrl = opts.baseUrl || 'https://generativelanguage.googleapis.com/v1beta';
|
|
261
|
+
const {
|
|
262
|
+
system,
|
|
263
|
+
maxTokens = 2048,
|
|
264
|
+
temperature = 0,
|
|
265
|
+
timeout = 60_000,
|
|
266
|
+
} = opts;
|
|
267
|
+
|
|
268
|
+
const contents = [{ role: 'user', parts: [{ text: prompt }] }];
|
|
269
|
+
const body = {
|
|
270
|
+
contents,
|
|
271
|
+
generationConfig: {
|
|
272
|
+
maxOutputTokens: maxTokens,
|
|
273
|
+
temperature,
|
|
274
|
+
},
|
|
275
|
+
};
|
|
276
|
+
if (system) {
|
|
277
|
+
body.systemInstruction = { parts: [{ text: system }] };
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const controller = new AbortController();
|
|
281
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
282
|
+
const t0 = performance.now();
|
|
283
|
+
|
|
284
|
+
try {
|
|
285
|
+
const url = `${baseUrl}/models/${model.id}:generateContent?key=${apiKey}`;
|
|
286
|
+
const res = await fetch(url, {
|
|
287
|
+
method: 'POST',
|
|
288
|
+
headers: { 'Content-Type': 'application/json' },
|
|
289
|
+
body: JSON.stringify(body),
|
|
290
|
+
signal: controller.signal,
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
if (!res.ok) {
|
|
294
|
+
const text = await res.text().catch(() => '');
|
|
295
|
+
throw new Error(`Gemini API ${res.status}: ${text.slice(0, 300)}`);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const data = await res.json();
|
|
299
|
+
const latencyMs = performance.now() - t0;
|
|
300
|
+
|
|
301
|
+
const text = data.candidates?.[0]?.content?.parts
|
|
302
|
+
?.map(p => p.text)
|
|
303
|
+
.join('') ?? '';
|
|
304
|
+
|
|
305
|
+
const usage = data.usageMetadata ?? {};
|
|
306
|
+
|
|
307
|
+
return {
|
|
308
|
+
text,
|
|
309
|
+
inputTokens: usage.promptTokenCount ?? 0,
|
|
310
|
+
outputTokens: usage.candidatesTokenCount ?? 0,
|
|
311
|
+
latencyMs,
|
|
312
|
+
};
|
|
313
|
+
} finally {
|
|
314
|
+
clearTimeout(timer);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// ---------------------------------------------------------------------------
|
|
319
|
+
// Helpers
|
|
320
|
+
// ---------------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
function buildMessages(prompt, system) {
|
|
323
|
+
const msgs = [];
|
|
324
|
+
// If prompt is raw JSON array of messages, use it directly
|
|
325
|
+
if (prompt.trimStart().startsWith('[')) {
|
|
326
|
+
try {
|
|
327
|
+
const parsed = JSON.parse(prompt);
|
|
328
|
+
if (Array.isArray(parsed)) return parsed;
|
|
329
|
+
} catch { /* fall through */ }
|
|
330
|
+
}
|
|
331
|
+
if (system) msgs.push({ role: 'system', content: system });
|
|
332
|
+
msgs.push({ role: 'user', content: prompt });
|
|
333
|
+
return msgs;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
function env(key) {
|
|
337
|
+
const val = process.env[key];
|
|
338
|
+
if (!val) {
|
|
339
|
+
throw new Error(
|
|
340
|
+
`Missing environment variable ${key}. ` +
|
|
341
|
+
`Set it or pass --base-url to use a gateway that handles auth.`
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
return val;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// ---------------------------------------------------------------------------
|
|
348
|
+
// Cost calculation
|
|
349
|
+
// ---------------------------------------------------------------------------
|
|
350
|
+
|
|
351
|
+
export function calculateCost(model, inputTokens, outputTokens) {
|
|
352
|
+
return (
|
|
353
|
+
(inputTokens / 1000) * model.inputCostPer1k +
|
|
354
|
+
(outputTokens / 1000) * model.outputCostPer1k
|
|
355
|
+
);
|
|
356
|
+
}
|