mod8-cli 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/LICENSE +21 -0
- package/README.md +239 -0
- package/bin/mod8.js +2 -0
- package/dist/cli.js +302 -0
- package/dist/commands/addProvider.js +105 -0
- package/dist/commands/all.js +158 -0
- package/dist/commands/chat.js +855 -0
- package/dist/commands/config.js +29 -0
- package/dist/commands/devAuthStatus.js +34 -0
- package/dist/commands/devHostAsk.js +51 -0
- package/dist/commands/devHostSystem.js +15 -0
- package/dist/commands/devResolve.js +54 -0
- package/dist/commands/devSimulate.js +235 -0
- package/dist/commands/devWorkAsk.js +55 -0
- package/dist/commands/intentRouting.js +280 -0
- package/dist/commands/keys.js +55 -0
- package/dist/commands/list.js +27 -0
- package/dist/commands/login.js +147 -0
- package/dist/commands/logout.js +17 -0
- package/dist/commands/prompt.js +63 -0
- package/dist/commands/providers.js +30 -0
- package/dist/commands/verify.js +5 -0
- package/dist/input/compose.js +37 -0
- package/dist/input/files.js +49 -0
- package/dist/input/stdin.js +14 -0
- package/dist/providers/anthropic.js +115 -0
- package/dist/providers/displayName.js +25 -0
- package/dist/providers/errorHints.js +175 -0
- package/dist/providers/generic.js +331 -0
- package/dist/providers/genericChat.js +265 -0
- package/dist/providers/google.js +63 -0
- package/dist/providers/hostSystem.js +173 -0
- package/dist/providers/index.js +38 -0
- package/dist/providers/mock.js +87 -0
- package/dist/providers/modelResolution.js +42 -0
- package/dist/providers/openai.js +75 -0
- package/dist/providers/pricing.js +47 -0
- package/dist/providers/proxy.js +148 -0
- package/dist/providers/registry.js +196 -0
- package/dist/providers/types.js +1 -0
- package/dist/providers/workSystem.js +33 -0
- package/dist/storage/auth.js +65 -0
- package/dist/storage/config.js +35 -0
- package/dist/storage/keys.js +59 -0
- package/dist/storage/providers.js +337 -0
- package/dist/storage/sessions.js +150 -0
- package/dist/types.js +9 -0
- package/dist/util/debug.js +79 -0
- package/dist/util/errors.js +157 -0
- package/dist/util/prompt.js +111 -0
- package/dist/util/secrets.js +110 -0
- package/dist/util/text.js +53 -0
- package/dist/util/time.js +25 -0
- package/dist/verify/runner.js +437 -0
- package/package.json +69 -0
- package/specs/all-mode.yaml +44 -0
- package/specs/behavior/auto-fallback.yaml +49 -0
- package/specs/behavior/bare-name-routing.yaml +223 -0
- package/specs/behavior/bare-paste-confirm.yaml +125 -0
- package/specs/behavior/env-var-respected.yaml +108 -0
- package/specs/behavior/error-fidelity.yaml +92 -0
- package/specs/behavior/error-hints.yaml +160 -0
- package/specs/behavior/fresh-vs-resume.yaml +94 -0
- package/specs/behavior/fuzzy-match.yaml +208 -0
- package/specs/behavior/host-self-knowledge-fresh.yaml +66 -0
- package/specs/behavior/intent-no-mismatch.yaml +115 -0
- package/specs/behavior/login-logout.yaml +97 -0
- package/specs/behavior/no-model-allowlist.yaml +80 -0
- package/specs/behavior/paste-key.yaml +342 -0
- package/specs/behavior/provider-switching.yaml +186 -0
- package/specs/behavior/providers-json-respected.yaml +106 -0
- package/specs/behavior/self-knowledge.yaml +119 -0
- package/specs/behavior/stress-session.yaml +226 -0
- package/specs/behavior/switch-back-when-failing.yaml +90 -0
- package/specs/behavior/work-character.yaml +109 -0
- package/specs/chat-meta.yaml +349 -0
- package/specs/chat-startup.yaml +148 -0
- package/specs/chat.yaml +91 -0
- package/specs/config.yaml +42 -0
- package/specs/install.yaml +112 -0
- package/specs/keys.yaml +81 -0
- package/specs/one-shot.yaml +65 -0
- package/specs/pipe-and-files.yaml +40 -0
- package/specs/providers.yaml +172 -0
- package/specs/sessions.yaml +115 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic provider client factory.
|
|
3
|
+
*
|
|
4
|
+
* Given a ProviderEntry from the store, returns a ProviderClient that calls
|
|
5
|
+
* the right SDK based on apiType. Three dispatch paths:
|
|
6
|
+
*
|
|
7
|
+
* - anthropic: @anthropic-ai/sdk (native Messages API)
|
|
8
|
+
* - gemini: @google/generative-ai (native)
|
|
9
|
+
* - openai-compat: openai SDK with a custom baseURL — covers OpenAI,
|
|
10
|
+
* DeepSeek, Mistral, Groq, OpenRouter, xAI, Together, …
|
|
11
|
+
*
|
|
12
|
+
* The mock provider short-circuits everything when MOD8_MOCK=1.
|
|
13
|
+
*/
|
|
14
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
15
|
+
import OpenAI from 'openai';
|
|
16
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
17
|
+
import { priceFor } from './pricing.js';
|
|
18
|
+
import { resolveConfigured } from '../storage/providers.js';
|
|
19
|
+
import { templateById } from './registry.js';
|
|
20
|
+
import { mockProvider } from './mock.js';
|
|
21
|
+
import { resolveModel } from './modelResolution.js';
|
|
22
|
+
import { debugProviderCall, debugProviderError, debugProviderResponse, } from '../util/debug.js';
|
|
23
|
+
/**
|
|
24
|
+
* Build a ProviderClient bound to a specific configured provider id.
|
|
25
|
+
* Throws a friendly error if the provider isn't configured.
|
|
26
|
+
*/
|
|
27
|
+
export async function buildProviderClient(id) {
|
|
28
|
+
if (process.env.MOD8_MOCK === '1')
|
|
29
|
+
return mockProvider(id);
|
|
30
|
+
const entry = await resolveConfigured(id);
|
|
31
|
+
if (!entry) {
|
|
32
|
+
const tpl = templateById(id);
|
|
33
|
+
const label = tpl?.name ?? id;
|
|
34
|
+
throw new Error(`No ${label} key configured. Run: mod8 keys set ${id}` +
|
|
35
|
+
(tpl ? '' : `, or mod8 add-provider for custom ones`) +
|
|
36
|
+
'.');
|
|
37
|
+
}
|
|
38
|
+
return clientForEntry(id, entry);
|
|
39
|
+
}
|
|
40
|
+
function clientForEntry(id, entry) {
|
|
41
|
+
switch (entry.apiType) {
|
|
42
|
+
case 'anthropic':
|
|
43
|
+
return anthropicClient(id, entry);
|
|
44
|
+
case 'openai-compat':
|
|
45
|
+
return openaiCompatClient(id, entry);
|
|
46
|
+
case 'gemini':
|
|
47
|
+
return geminiClient(id, entry);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// ---------- Anthropic ----------
|
|
51
|
+
function anthropicClient(id, entry) {
|
|
52
|
+
const client = new Anthropic({ apiKey: entry.apiKey });
|
|
53
|
+
return {
|
|
54
|
+
id,
|
|
55
|
+
defaultModel: entry.defaultModel,
|
|
56
|
+
async call(prompt, opts = {}) {
|
|
57
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
58
|
+
const model = resolved.model;
|
|
59
|
+
debugProviderCall({
|
|
60
|
+
providerId: id,
|
|
61
|
+
apiType: 'anthropic',
|
|
62
|
+
model,
|
|
63
|
+
modelSource: resolved.source,
|
|
64
|
+
apiKey: entry.apiKey,
|
|
65
|
+
promptPreview: prompt,
|
|
66
|
+
});
|
|
67
|
+
const start = Date.now();
|
|
68
|
+
try {
|
|
69
|
+
const res = await client.messages.create({
|
|
70
|
+
model,
|
|
71
|
+
max_tokens: opts.maxTokens ?? 1024,
|
|
72
|
+
messages: [{ role: 'user', content: prompt }],
|
|
73
|
+
});
|
|
74
|
+
const latencyMs = Date.now() - start;
|
|
75
|
+
const text = res.content
|
|
76
|
+
.filter((b) => b.type === 'text')
|
|
77
|
+
.map((b) => b.text)
|
|
78
|
+
.join('');
|
|
79
|
+
const inputTokens = res.usage.input_tokens;
|
|
80
|
+
const outputTokens = res.usage.output_tokens;
|
|
81
|
+
const actualModel = res.model ?? model;
|
|
82
|
+
debugProviderResponse(id, actualModel, { input: inputTokens, output: outputTokens, latencyMs });
|
|
83
|
+
return {
|
|
84
|
+
text,
|
|
85
|
+
inputTokens,
|
|
86
|
+
outputTokens,
|
|
87
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
88
|
+
latencyMs,
|
|
89
|
+
model: actualModel,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
catch (err) {
|
|
93
|
+
debugProviderError(id, err);
|
|
94
|
+
throw err;
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
async *stream(prompt, opts = {}) {
|
|
98
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
99
|
+
const model = resolved.model;
|
|
100
|
+
debugProviderCall({
|
|
101
|
+
providerId: id,
|
|
102
|
+
apiType: 'anthropic',
|
|
103
|
+
model,
|
|
104
|
+
modelSource: resolved.source,
|
|
105
|
+
apiKey: entry.apiKey,
|
|
106
|
+
promptPreview: prompt,
|
|
107
|
+
});
|
|
108
|
+
const start = Date.now();
|
|
109
|
+
try {
|
|
110
|
+
const ms = client.messages.stream({
|
|
111
|
+
model,
|
|
112
|
+
max_tokens: opts.maxTokens ?? 1024,
|
|
113
|
+
messages: [{ role: 'user', content: prompt }],
|
|
114
|
+
});
|
|
115
|
+
for await (const ev of ms) {
|
|
116
|
+
if (ev.type === 'content_block_delta' && ev.delta.type === 'text_delta') {
|
|
117
|
+
yield { type: 'text', delta: ev.delta.text };
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
const final = await ms.finalMessage();
|
|
121
|
+
const latencyMs = Date.now() - start;
|
|
122
|
+
const inputTokens = final.usage.input_tokens;
|
|
123
|
+
const outputTokens = final.usage.output_tokens;
|
|
124
|
+
const actualModel = final.model ?? model;
|
|
125
|
+
debugProviderResponse(id, actualModel, { input: inputTokens, output: outputTokens, latencyMs });
|
|
126
|
+
yield {
|
|
127
|
+
type: 'done',
|
|
128
|
+
usage: {
|
|
129
|
+
inputTokens,
|
|
130
|
+
outputTokens,
|
|
131
|
+
latencyMs,
|
|
132
|
+
model: actualModel,
|
|
133
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
debugProviderError(id, err);
|
|
139
|
+
throw err;
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
// ---------- OpenAI-compatible (OpenAI, DeepSeek, Mistral, Groq, ...) ----------
|
|
145
|
+
function openaiCompatClient(id, entry) {
|
|
146
|
+
const client = new OpenAI({
|
|
147
|
+
apiKey: entry.apiKey,
|
|
148
|
+
baseURL: entry.baseUrl,
|
|
149
|
+
});
|
|
150
|
+
return {
|
|
151
|
+
id,
|
|
152
|
+
defaultModel: entry.defaultModel,
|
|
153
|
+
async call(prompt, opts = {}) {
|
|
154
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
155
|
+
const model = resolved.model;
|
|
156
|
+
debugProviderCall({
|
|
157
|
+
providerId: id,
|
|
158
|
+
apiType: 'openai-compat',
|
|
159
|
+
model,
|
|
160
|
+
modelSource: resolved.source,
|
|
161
|
+
baseUrl: entry.baseUrl,
|
|
162
|
+
apiKey: entry.apiKey,
|
|
163
|
+
promptPreview: prompt,
|
|
164
|
+
});
|
|
165
|
+
const start = Date.now();
|
|
166
|
+
try {
|
|
167
|
+
const res = await client.chat.completions.create({
|
|
168
|
+
model,
|
|
169
|
+
messages: [{ role: 'user', content: prompt }],
|
|
170
|
+
max_tokens: opts.maxTokens ?? 1024,
|
|
171
|
+
});
|
|
172
|
+
const latencyMs = Date.now() - start;
|
|
173
|
+
const text = res.choices[0]?.message?.content ?? '';
|
|
174
|
+
const inputTokens = res.usage?.prompt_tokens ?? 0;
|
|
175
|
+
const outputTokens = res.usage?.completion_tokens ?? 0;
|
|
176
|
+
const actualModel = res.model ?? model;
|
|
177
|
+
debugProviderResponse(id, actualModel, { input: inputTokens, output: outputTokens, latencyMs });
|
|
178
|
+
return {
|
|
179
|
+
text,
|
|
180
|
+
inputTokens,
|
|
181
|
+
outputTokens,
|
|
182
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
183
|
+
latencyMs,
|
|
184
|
+
model: actualModel,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
catch (err) {
|
|
188
|
+
debugProviderError(id, err);
|
|
189
|
+
throw err;
|
|
190
|
+
}
|
|
191
|
+
},
|
|
192
|
+
async *stream(prompt, opts = {}) {
|
|
193
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
194
|
+
const model = resolved.model;
|
|
195
|
+
debugProviderCall({
|
|
196
|
+
providerId: id,
|
|
197
|
+
apiType: 'openai-compat',
|
|
198
|
+
model,
|
|
199
|
+
modelSource: resolved.source,
|
|
200
|
+
baseUrl: entry.baseUrl,
|
|
201
|
+
apiKey: entry.apiKey,
|
|
202
|
+
promptPreview: prompt,
|
|
203
|
+
});
|
|
204
|
+
const start = Date.now();
|
|
205
|
+
try {
|
|
206
|
+
const stream = await client.chat.completions.create({
|
|
207
|
+
model,
|
|
208
|
+
messages: [{ role: 'user', content: prompt }],
|
|
209
|
+
max_tokens: opts.maxTokens ?? 1024,
|
|
210
|
+
stream: true,
|
|
211
|
+
stream_options: { include_usage: true },
|
|
212
|
+
});
|
|
213
|
+
let inputTokens = 0;
|
|
214
|
+
let outputTokens = 0;
|
|
215
|
+
let actualModel = model;
|
|
216
|
+
for await (const chunk of stream) {
|
|
217
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
218
|
+
if (delta)
|
|
219
|
+
yield { type: 'text', delta };
|
|
220
|
+
if (chunk.usage) {
|
|
221
|
+
inputTokens = chunk.usage.prompt_tokens ?? 0;
|
|
222
|
+
outputTokens = chunk.usage.completion_tokens ?? 0;
|
|
223
|
+
}
|
|
224
|
+
if (chunk.model)
|
|
225
|
+
actualModel = chunk.model;
|
|
226
|
+
}
|
|
227
|
+
const latencyMs = Date.now() - start;
|
|
228
|
+
debugProviderResponse(id, actualModel, { input: inputTokens, output: outputTokens, latencyMs });
|
|
229
|
+
yield {
|
|
230
|
+
type: 'done',
|
|
231
|
+
usage: {
|
|
232
|
+
inputTokens,
|
|
233
|
+
outputTokens,
|
|
234
|
+
latencyMs,
|
|
235
|
+
model: actualModel,
|
|
236
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
237
|
+
},
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
debugProviderError(id, err);
|
|
242
|
+
throw err;
|
|
243
|
+
}
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
// ---------- Gemini ----------
|
|
248
|
+
function geminiClient(id, entry) {
|
|
249
|
+
const genAI = new GoogleGenerativeAI(entry.apiKey);
|
|
250
|
+
return {
|
|
251
|
+
id,
|
|
252
|
+
defaultModel: entry.defaultModel,
|
|
253
|
+
async call(prompt, opts = {}) {
|
|
254
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
255
|
+
const modelName = resolved.model;
|
|
256
|
+
debugProviderCall({
|
|
257
|
+
providerId: id,
|
|
258
|
+
apiType: 'gemini',
|
|
259
|
+
model: modelName,
|
|
260
|
+
modelSource: resolved.source,
|
|
261
|
+
apiKey: entry.apiKey,
|
|
262
|
+
promptPreview: prompt,
|
|
263
|
+
});
|
|
264
|
+
const model = genAI.getGenerativeModel({ model: modelName });
|
|
265
|
+
const start = Date.now();
|
|
266
|
+
try {
|
|
267
|
+
const result = await model.generateContent(prompt);
|
|
268
|
+
const latencyMs = Date.now() - start;
|
|
269
|
+
const text = result.response.text();
|
|
270
|
+
const usage = result.response.usageMetadata;
|
|
271
|
+
const inputTokens = usage?.promptTokenCount ?? 0;
|
|
272
|
+
const outputTokens = usage?.candidatesTokenCount ?? 0;
|
|
273
|
+
debugProviderResponse(id, modelName, { input: inputTokens, output: outputTokens, latencyMs });
|
|
274
|
+
return {
|
|
275
|
+
text,
|
|
276
|
+
inputTokens,
|
|
277
|
+
outputTokens,
|
|
278
|
+
costUsd: priceFor(modelName, inputTokens, outputTokens),
|
|
279
|
+
latencyMs,
|
|
280
|
+
model: modelName,
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
catch (err) {
|
|
284
|
+
debugProviderError(id, err);
|
|
285
|
+
throw err;
|
|
286
|
+
}
|
|
287
|
+
},
|
|
288
|
+
async *stream(prompt, opts = {}) {
|
|
289
|
+
const resolved = resolveModel(id, opts.model, entry.defaultModel);
|
|
290
|
+
const modelName = resolved.model;
|
|
291
|
+
debugProviderCall({
|
|
292
|
+
providerId: id,
|
|
293
|
+
apiType: 'gemini',
|
|
294
|
+
model: modelName,
|
|
295
|
+
modelSource: resolved.source,
|
|
296
|
+
apiKey: entry.apiKey,
|
|
297
|
+
promptPreview: prompt,
|
|
298
|
+
});
|
|
299
|
+
const model = genAI.getGenerativeModel({ model: modelName });
|
|
300
|
+
const start = Date.now();
|
|
301
|
+
try {
|
|
302
|
+
const result = await model.generateContentStream(prompt);
|
|
303
|
+
for await (const chunk of result.stream) {
|
|
304
|
+
const text = chunk.text();
|
|
305
|
+
if (text)
|
|
306
|
+
yield { type: 'text', delta: text };
|
|
307
|
+
}
|
|
308
|
+
const final = await result.response;
|
|
309
|
+
const usage = final.usageMetadata;
|
|
310
|
+
const inputTokens = usage?.promptTokenCount ?? 0;
|
|
311
|
+
const outputTokens = usage?.candidatesTokenCount ?? 0;
|
|
312
|
+
const latencyMs = Date.now() - start;
|
|
313
|
+
debugProviderResponse(id, modelName, { input: inputTokens, output: outputTokens, latencyMs });
|
|
314
|
+
yield {
|
|
315
|
+
type: 'done',
|
|
316
|
+
usage: {
|
|
317
|
+
inputTokens,
|
|
318
|
+
outputTokens,
|
|
319
|
+
latencyMs,
|
|
320
|
+
model: modelName,
|
|
321
|
+
costUsd: priceFor(modelName, inputTokens, outputTokens),
|
|
322
|
+
},
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
catch (err) {
|
|
326
|
+
debugProviderError(id, err);
|
|
327
|
+
throw err;
|
|
328
|
+
}
|
|
329
|
+
},
|
|
330
|
+
};
|
|
331
|
+
}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic multi-turn streaming chat — used by the chat REPL.
|
|
3
|
+
*
|
|
4
|
+
* Same dispatch as generic.ts but for the multi-message API surface
|
|
5
|
+
* (system prompt + messages array, with cancellation via AbortSignal).
|
|
6
|
+
*/
|
|
7
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
8
|
+
import OpenAI from 'openai';
|
|
9
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
10
|
+
import { priceFor } from './pricing.js';
|
|
11
|
+
import { resolveConfigured } from '../storage/providers.js';
|
|
12
|
+
import { templateById } from './registry.js';
|
|
13
|
+
import { resolveModel } from './modelResolution.js';
|
|
14
|
+
import { debugProviderCall, debugProviderError, } from '../util/debug.js';
|
|
15
|
+
import { readAuth, effectiveProxyUrl } from '../storage/auth.js';
|
|
16
|
+
import { toProxyProviderId } from './proxy.js';
|
|
17
|
+
export async function* streamProviderChat(opts) {
|
|
18
|
+
// Proxy short-circuit: when logged in via mod8 login, all four built-in
|
|
19
|
+
// providers route through the hosted proxy. Custom OpenAI-compat ids
|
|
20
|
+
// still hit local providers.json below.
|
|
21
|
+
if (process.env.MOD8_MOCK !== '1') {
|
|
22
|
+
const auth = await readAuth();
|
|
23
|
+
if (auth) {
|
|
24
|
+
const proxyId = toProxyProviderId(opts.providerId);
|
|
25
|
+
if (proxyId) {
|
|
26
|
+
yield* streamProxyChat(auth, proxyId, opts);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
const entry = await resolveConfigured(opts.providerId);
|
|
32
|
+
if (!entry) {
|
|
33
|
+
const tpl = templateById(opts.providerId);
|
|
34
|
+
const label = tpl?.name ?? opts.providerId;
|
|
35
|
+
throw new Error(`No ${label} key configured. Run: mod8 keys set ${opts.providerId}` +
|
|
36
|
+
(tpl ? '' : `, or mod8 add-provider for custom ones`) +
|
|
37
|
+
'.');
|
|
38
|
+
}
|
|
39
|
+
const resolved = resolveModel(opts.providerId, opts.model, entry.defaultModel);
|
|
40
|
+
const model = resolved.model;
|
|
41
|
+
// Preview the most recent user turn (chat REPL prompts are conversation
|
|
42
|
+
// history, not a single user message — first ~200 chars is enough).
|
|
43
|
+
const lastUser = [...opts.messages].reverse().find((m) => m.role === 'user');
|
|
44
|
+
debugProviderCall({
|
|
45
|
+
providerId: opts.providerId,
|
|
46
|
+
apiType: entry.apiType,
|
|
47
|
+
model,
|
|
48
|
+
modelSource: resolved.source,
|
|
49
|
+
baseUrl: entry.baseUrl,
|
|
50
|
+
apiKey: entry.apiKey,
|
|
51
|
+
promptPreview: lastUser?.content ?? '',
|
|
52
|
+
});
|
|
53
|
+
try {
|
|
54
|
+
switch (entry.apiType) {
|
|
55
|
+
case 'anthropic':
|
|
56
|
+
yield* streamAnthropic(entry, model, opts);
|
|
57
|
+
return;
|
|
58
|
+
case 'openai-compat':
|
|
59
|
+
yield* streamOpenAICompat(entry, model, opts);
|
|
60
|
+
return;
|
|
61
|
+
case 'gemini':
|
|
62
|
+
yield* streamGemini(entry, model, opts);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
debugProviderError(opts.providerId, err);
|
|
68
|
+
throw err;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
async function* streamAnthropic(entry, model, opts) {
|
|
72
|
+
const client = new Anthropic({ apiKey: entry.apiKey });
|
|
73
|
+
const start = Date.now();
|
|
74
|
+
const ms = client.messages.stream({
|
|
75
|
+
model,
|
|
76
|
+
max_tokens: opts.maxTokens ?? 4096,
|
|
77
|
+
system: opts.system,
|
|
78
|
+
messages: opts.messages,
|
|
79
|
+
}, { signal: opts.signal });
|
|
80
|
+
for await (const event of ms) {
|
|
81
|
+
if (event.type === 'content_block_delta' &&
|
|
82
|
+
event.delta.type === 'text_delta') {
|
|
83
|
+
yield { type: 'text', delta: event.delta.text };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const final = await ms.finalMessage();
|
|
87
|
+
const latencyMs = Date.now() - start;
|
|
88
|
+
const inputTokens = final.usage.input_tokens;
|
|
89
|
+
const outputTokens = final.usage.output_tokens;
|
|
90
|
+
const actualModel = final.model ?? model;
|
|
91
|
+
yield {
|
|
92
|
+
type: 'done',
|
|
93
|
+
usage: {
|
|
94
|
+
inputTokens,
|
|
95
|
+
outputTokens,
|
|
96
|
+
latencyMs,
|
|
97
|
+
model: actualModel,
|
|
98
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
async function* streamOpenAICompat(entry, model, opts) {
|
|
103
|
+
const client = new OpenAI({
|
|
104
|
+
apiKey: entry.apiKey,
|
|
105
|
+
baseURL: entry.baseUrl,
|
|
106
|
+
});
|
|
107
|
+
const start = Date.now();
|
|
108
|
+
const messages = [
|
|
109
|
+
{ role: 'system', content: opts.system },
|
|
110
|
+
...opts.messages,
|
|
111
|
+
];
|
|
112
|
+
const stream = await client.chat.completions.create({
|
|
113
|
+
model,
|
|
114
|
+
messages,
|
|
115
|
+
max_tokens: opts.maxTokens ?? 4096,
|
|
116
|
+
stream: true,
|
|
117
|
+
stream_options: { include_usage: true },
|
|
118
|
+
}, { signal: opts.signal });
|
|
119
|
+
let inputTokens = 0;
|
|
120
|
+
let outputTokens = 0;
|
|
121
|
+
let actualModel = model;
|
|
122
|
+
for await (const chunk of stream) {
|
|
123
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
124
|
+
if (delta)
|
|
125
|
+
yield { type: 'text', delta };
|
|
126
|
+
if (chunk.usage) {
|
|
127
|
+
inputTokens = chunk.usage.prompt_tokens ?? 0;
|
|
128
|
+
outputTokens = chunk.usage.completion_tokens ?? 0;
|
|
129
|
+
}
|
|
130
|
+
if (chunk.model)
|
|
131
|
+
actualModel = chunk.model;
|
|
132
|
+
}
|
|
133
|
+
const latencyMs = Date.now() - start;
|
|
134
|
+
yield {
|
|
135
|
+
type: 'done',
|
|
136
|
+
usage: {
|
|
137
|
+
inputTokens,
|
|
138
|
+
outputTokens,
|
|
139
|
+
latencyMs,
|
|
140
|
+
model: actualModel,
|
|
141
|
+
costUsd: priceFor(actualModel, inputTokens, outputTokens),
|
|
142
|
+
},
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
async function* streamProxyChat(auth, providerId, opts) {
|
|
146
|
+
// Default models match makeProxyClient for parity.
|
|
147
|
+
const defaultByProvider = {
|
|
148
|
+
anthropic: 'claude-sonnet-4-6',
|
|
149
|
+
openai: 'gpt-4o',
|
|
150
|
+
google: 'gemini-2.5-flash',
|
|
151
|
+
deepseek: 'deepseek-chat',
|
|
152
|
+
};
|
|
153
|
+
const resolved = resolveModel(opts.providerId, opts.model, defaultByProvider[providerId]);
|
|
154
|
+
const model = resolved.model;
|
|
155
|
+
const proxyUrl = effectiveProxyUrl(auth);
|
|
156
|
+
const start = Date.now();
|
|
157
|
+
const resp = await fetch(`${proxyUrl}/v1/chat`, {
|
|
158
|
+
method: 'POST',
|
|
159
|
+
headers: {
|
|
160
|
+
Authorization: `Bearer ${auth.mod8Key}`,
|
|
161
|
+
'Content-Type': 'application/json',
|
|
162
|
+
},
|
|
163
|
+
body: JSON.stringify({
|
|
164
|
+
provider: providerId,
|
|
165
|
+
model,
|
|
166
|
+
system: opts.system,
|
|
167
|
+
messages: opts.messages,
|
|
168
|
+
...(opts.maxTokens !== undefined ? { maxTokens: opts.maxTokens } : {}),
|
|
169
|
+
}),
|
|
170
|
+
signal: opts.signal,
|
|
171
|
+
});
|
|
172
|
+
if (!resp.ok) {
|
|
173
|
+
const detail = await resp.text().catch(() => '');
|
|
174
|
+
throw new Error(`mod8 proxy: ${resp.status} ${resp.statusText}${detail ? ` — ${detail.slice(0, 200)}` : ''}`);
|
|
175
|
+
}
|
|
176
|
+
if (!resp.body)
|
|
177
|
+
throw new Error('mod8 proxy: empty response body');
|
|
178
|
+
const reader = resp.body.getReader();
|
|
179
|
+
const decoder = new TextDecoder();
|
|
180
|
+
let buf = '';
|
|
181
|
+
let inputTokens = 0;
|
|
182
|
+
let outputTokens = 0;
|
|
183
|
+
let chargedMicros = 0;
|
|
184
|
+
let sawDone = false;
|
|
185
|
+
while (true) {
|
|
186
|
+
const { done, value } = await reader.read();
|
|
187
|
+
if (done)
|
|
188
|
+
break;
|
|
189
|
+
buf += decoder.decode(value, { stream: true });
|
|
190
|
+
let idx;
|
|
191
|
+
while ((idx = buf.indexOf('\n\n')) >= 0) {
|
|
192
|
+
const chunk = buf.slice(0, idx);
|
|
193
|
+
buf = buf.slice(idx + 2);
|
|
194
|
+
for (const line of chunk.split('\n')) {
|
|
195
|
+
if (!line.startsWith('data: '))
|
|
196
|
+
continue;
|
|
197
|
+
try {
|
|
198
|
+
const ev = JSON.parse(line.slice(6));
|
|
199
|
+
if (ev.type === 'text')
|
|
200
|
+
yield { type: 'text', delta: ev.delta };
|
|
201
|
+
else if (ev.type === 'done') {
|
|
202
|
+
inputTokens = ev.tokensIn;
|
|
203
|
+
outputTokens = ev.tokensOut;
|
|
204
|
+
chargedMicros = ev.chargedMicros;
|
|
205
|
+
sawDone = true;
|
|
206
|
+
}
|
|
207
|
+
else if (ev.type === 'error') {
|
|
208
|
+
throw new Error(`mod8 proxy: ${ev.error}`);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
catch {
|
|
212
|
+
// ignore parse errors on non-JSON lines
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
if (!sawDone)
|
|
218
|
+
throw new Error('mod8 proxy: stream ended without a done event');
|
|
219
|
+
yield {
|
|
220
|
+
type: 'done',
|
|
221
|
+
usage: {
|
|
222
|
+
inputTokens,
|
|
223
|
+
outputTokens,
|
|
224
|
+
latencyMs: Date.now() - start,
|
|
225
|
+
model,
|
|
226
|
+
costUsd: chargedMicros / 1_000_000,
|
|
227
|
+
},
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
async function* streamGemini(entry, model, opts) {
|
|
231
|
+
const genAI = new GoogleGenerativeAI(entry.apiKey);
|
|
232
|
+
const m = genAI.getGenerativeModel({ model, systemInstruction: opts.system });
|
|
233
|
+
// Gemini takes a history array + a final user message; reshape.
|
|
234
|
+
const history = opts.messages.slice(0, -1).map((msg) => ({
|
|
235
|
+
role: msg.role === 'assistant' ? 'model' : 'user',
|
|
236
|
+
parts: [{ text: msg.content }],
|
|
237
|
+
}));
|
|
238
|
+
const last = opts.messages[opts.messages.length - 1];
|
|
239
|
+
const userText = last && last.role === 'user' ? last.content : '';
|
|
240
|
+
const chat = m.startChat({ history });
|
|
241
|
+
const start = Date.now();
|
|
242
|
+
const result = await chat.sendMessageStream(userText);
|
|
243
|
+
for await (const chunk of result.stream) {
|
|
244
|
+
const text = chunk.text();
|
|
245
|
+
if (text)
|
|
246
|
+
yield { type: 'text', delta: text };
|
|
247
|
+
if (opts.signal?.aborted)
|
|
248
|
+
throw new Error('aborted');
|
|
249
|
+
}
|
|
250
|
+
const final = await result.response;
|
|
251
|
+
const usage = final.usageMetadata;
|
|
252
|
+
const inputTokens = usage?.promptTokenCount ?? 0;
|
|
253
|
+
const outputTokens = usage?.candidatesTokenCount ?? 0;
|
|
254
|
+
const latencyMs = Date.now() - start;
|
|
255
|
+
yield {
|
|
256
|
+
type: 'done',
|
|
257
|
+
usage: {
|
|
258
|
+
inputTokens,
|
|
259
|
+
outputTokens,
|
|
260
|
+
latencyMs,
|
|
261
|
+
model,
|
|
262
|
+
costUsd: priceFor(model, inputTokens, outputTokens),
|
|
263
|
+
},
|
|
264
|
+
};
|
|
265
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
2
|
+
import { getKey } from '../storage/keys.js';
|
|
3
|
+
import { priceFor } from './pricing.js';
|
|
4
|
+
const DEFAULT_MODEL = 'gemini-2.0-flash';
|
|
5
|
+
async function buildClient() {
|
|
6
|
+
// Google ecosystem uses both names; honour either.
|
|
7
|
+
const apiKey = process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY ?? (await getKey('google'));
|
|
8
|
+
if (!apiKey) {
|
|
9
|
+
throw new Error('No Google key configured. Run: mod8 keys set google, or set GOOGLE_API_KEY (or GEMINI_API_KEY).');
|
|
10
|
+
}
|
|
11
|
+
return new GoogleGenerativeAI(apiKey);
|
|
12
|
+
}
|
|
13
|
+
export const googleProvider = {
|
|
14
|
+
id: 'google',
|
|
15
|
+
defaultModel: DEFAULT_MODEL,
|
|
16
|
+
async call(prompt, opts = {}) {
|
|
17
|
+
const genAI = await buildClient();
|
|
18
|
+
const modelName = opts.model ?? process.env.MOD8_GOOGLE_MODEL ?? DEFAULT_MODEL;
|
|
19
|
+
const model = genAI.getGenerativeModel({ model: modelName });
|
|
20
|
+
const start = Date.now();
|
|
21
|
+
const result = await model.generateContent(prompt);
|
|
22
|
+
const latencyMs = Date.now() - start;
|
|
23
|
+
const text = result.response.text();
|
|
24
|
+
const usage = result.response.usageMetadata;
|
|
25
|
+
const inputTokens = usage?.promptTokenCount ?? 0;
|
|
26
|
+
const outputTokens = usage?.candidatesTokenCount ?? 0;
|
|
27
|
+
return {
|
|
28
|
+
text,
|
|
29
|
+
inputTokens,
|
|
30
|
+
outputTokens,
|
|
31
|
+
costUsd: priceFor(modelName, inputTokens, outputTokens),
|
|
32
|
+
latencyMs,
|
|
33
|
+
model: modelName,
|
|
34
|
+
};
|
|
35
|
+
},
|
|
36
|
+
async *stream(prompt, opts = {}) {
|
|
37
|
+
const genAI = await buildClient();
|
|
38
|
+
const modelName = opts.model ?? process.env.MOD8_GOOGLE_MODEL ?? DEFAULT_MODEL;
|
|
39
|
+
const model = genAI.getGenerativeModel({ model: modelName });
|
|
40
|
+
const start = Date.now();
|
|
41
|
+
const result = await model.generateContentStream(prompt);
|
|
42
|
+
for await (const chunk of result.stream) {
|
|
43
|
+
const text = chunk.text();
|
|
44
|
+
if (text)
|
|
45
|
+
yield { type: 'text', delta: text };
|
|
46
|
+
}
|
|
47
|
+
const final = await result.response;
|
|
48
|
+
const usage = final.usageMetadata;
|
|
49
|
+
const inputTokens = usage?.promptTokenCount ?? 0;
|
|
50
|
+
const outputTokens = usage?.candidatesTokenCount ?? 0;
|
|
51
|
+
const latencyMs = Date.now() - start;
|
|
52
|
+
yield {
|
|
53
|
+
type: 'done',
|
|
54
|
+
usage: {
|
|
55
|
+
inputTokens,
|
|
56
|
+
outputTokens,
|
|
57
|
+
latencyMs,
|
|
58
|
+
model: modelName,
|
|
59
|
+
costUsd: priceFor(modelName, inputTokens, outputTokens),
|
|
60
|
+
},
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
};
|