@24klynx/llm 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +352 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1357 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +29 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1357 @@
|
|
|
1
|
+
import { LlmAuthError, LlmContextOverflowError, LlmError, LlmRateLimitError } from "@lynx/core";
|
|
2
|
+
//#region src/stream.ts
|
|
3
|
+
/**
|
|
4
|
+
* Stream\<T\> — pull‑based AsyncIterator backed by an internal queue.
|
|
5
|
+
*
|
|
6
|
+
* Producers call `enqueue()` / `done()` / `error()`.
|
|
7
|
+
* Consumers use `for await … of`.
|
|
8
|
+
*
|
|
9
|
+
* This is deliberately a class (not EventEmitter) so the consumer
|
|
10
|
+
* controls back‑pressure — when nobody is iterating, values stay in
|
|
11
|
+
* the queue until `maxQueueSize` is reached.
|
|
12
|
+
*/
|
|
13
|
+
/** Drop the oldest item when the queue exceeds this size. */
|
|
14
|
+
const DEFAULT_MAX_QUEUE = 256;
|
|
15
|
+
/**
|
|
16
|
+
* A cold, single‑consumer async iterator.
|
|
17
|
+
*
|
|
18
|
+
* ```ts
|
|
19
|
+
* const stream = new Stream<string>();
|
|
20
|
+
* producer(stream);
|
|
21
|
+
* for await (const chunk of stream) {
|
|
22
|
+
* console.log(chunk);
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*/
|
|
26
|
+
var Stream = class {
|
|
27
|
+
_queue = [];
|
|
28
|
+
_resolvers = [];
|
|
29
|
+
_done = false;
|
|
30
|
+
_error = null;
|
|
31
|
+
_maxQueue;
|
|
32
|
+
constructor(maxQueueSize = DEFAULT_MAX_QUEUE) {
|
|
33
|
+
this._maxQueue = maxQueueSize;
|
|
34
|
+
}
|
|
35
|
+
/** Push a value into the stream. */
|
|
36
|
+
enqueue(value) {
|
|
37
|
+
if (this._done || this._error) return;
|
|
38
|
+
if (this._resolvers.length > 0) this._resolvers.shift()({
|
|
39
|
+
value,
|
|
40
|
+
done: false
|
|
41
|
+
});
|
|
42
|
+
else {
|
|
43
|
+
this._queue.push(value);
|
|
44
|
+
if (this._queue.length > this._maxQueue) this._queue.shift();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/** Signal that the stream has finished normally. */
|
|
48
|
+
done() {
|
|
49
|
+
if (this._done) return;
|
|
50
|
+
this._done = true;
|
|
51
|
+
for (const resolve of this._resolvers) resolve({
|
|
52
|
+
value: void 0,
|
|
53
|
+
done: true
|
|
54
|
+
});
|
|
55
|
+
this._resolvers = [];
|
|
56
|
+
}
|
|
57
|
+
/** Propagate an error to the consumer. */
|
|
58
|
+
error(err) {
|
|
59
|
+
if (this._done || this._error) return;
|
|
60
|
+
this._error = err;
|
|
61
|
+
this._done = true;
|
|
62
|
+
for (const resolve of this._resolvers) resolve({
|
|
63
|
+
value: void 0,
|
|
64
|
+
done: true
|
|
65
|
+
});
|
|
66
|
+
this._resolvers = [];
|
|
67
|
+
}
|
|
68
|
+
[Symbol.asyncIterator]() {
|
|
69
|
+
return this;
|
|
70
|
+
}
|
|
71
|
+
next() {
|
|
72
|
+
if (this._error) return Promise.reject(this._error);
|
|
73
|
+
if (this._done && this._queue.length === 0) return Promise.resolve({
|
|
74
|
+
value: void 0,
|
|
75
|
+
done: true
|
|
76
|
+
});
|
|
77
|
+
if (this._queue.length > 0) {
|
|
78
|
+
const value = this._queue.shift();
|
|
79
|
+
return Promise.resolve({
|
|
80
|
+
value,
|
|
81
|
+
done: false
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return new Promise((resolve) => {
|
|
85
|
+
this._resolvers.push(resolve);
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
//#endregion
|
|
90
|
+
//#region src/retry.ts
|
|
91
|
+
/**
|
|
92
|
+
* withRetry — exponential backoff with jitter and Retry‑After respect.
|
|
93
|
+
*
|
|
94
|
+
* Used by every LLM provider for transient errors (429, 529, ECONNRESET, etc.).
|
|
95
|
+
*
|
|
96
|
+
* Algorithm:
|
|
97
|
+
* delay = min(baseMs * 2^(attempt-1), maxMs)
|
|
98
|
+
* jitter = delay * uniform(0.75, 1.25)
|
|
99
|
+
* final = min(jitter, maxMs)
|
|
100
|
+
*/
|
|
101
|
+
const BASE_DELAY_MS = 500;
|
|
102
|
+
const MAX_DELAY_MS = 32e3;
|
|
103
|
+
const MAX_RETRIES = 10;
|
|
104
|
+
/** Uniform random in [lo, hi]. */
|
|
105
|
+
function jitter(lo, hi) {
|
|
106
|
+
return lo + Math.random() * (hi - lo);
|
|
107
|
+
}
|
|
108
|
+
/** Extract Retry‑After seconds from HTTP headers if present. */
|
|
109
|
+
function parseRetryAfter(err) {
|
|
110
|
+
const headers = err.headers;
|
|
111
|
+
if (!headers) return null;
|
|
112
|
+
const raw = headers["retry-after"];
|
|
113
|
+
if (!raw) return null;
|
|
114
|
+
const secs = Number.parseInt(raw, 10);
|
|
115
|
+
if (!Number.isNaN(secs)) return secs;
|
|
116
|
+
const parsed = Date.parse(raw);
|
|
117
|
+
if (!Number.isNaN(parsed)) return Math.max(0, Math.ceil((parsed - Date.now()) / 1e3));
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Execute `fn` with exponential backoff.
|
|
122
|
+
*
|
|
123
|
+
* Only retries on transient errors. Non‑retryable errors are re‑thrown
|
|
124
|
+
* immediately.
|
|
125
|
+
*
|
|
126
|
+
* ```ts
|
|
127
|
+
* const result = await withRetry(() => fetchFromApi(), {
|
|
128
|
+
* baseMs: 500,
|
|
129
|
+
* maxMs: 32_000,
|
|
130
|
+
* });
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
async function withRetry(fn, opts = {}) {
|
|
134
|
+
const baseMs = opts.baseMs ?? BASE_DELAY_MS;
|
|
135
|
+
const maxMs = opts.maxMs ?? MAX_DELAY_MS;
|
|
136
|
+
const maxRetries = opts.maxRetries ?? MAX_RETRIES;
|
|
137
|
+
const isRetryable = opts.isRetryable ?? ((err) => {
|
|
138
|
+
if (err instanceof LlmRateLimitError) return true;
|
|
139
|
+
if (err instanceof LlmError && err.retryable) return true;
|
|
140
|
+
const code = err.code;
|
|
141
|
+
if (code === "ECONNRESET" || code === "EPIPE" || code === "ETIMEDOUT") return true;
|
|
142
|
+
return false;
|
|
143
|
+
});
|
|
144
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) try {
|
|
145
|
+
return await fn();
|
|
146
|
+
} catch (err) {
|
|
147
|
+
const error = err instanceof Error ? err : new Error(String(err));
|
|
148
|
+
if (attempt === maxRetries || !isRetryable(error)) throw error;
|
|
149
|
+
if (error.status === 529 && opts.isForeground === false) throw error;
|
|
150
|
+
const retryAfter = parseRetryAfter(error);
|
|
151
|
+
const delay = retryAfter ? retryAfter * 1e3 : Math.min(baseMs * Math.pow(2, attempt), maxMs);
|
|
152
|
+
const jittered = Math.min(delay * jitter(.75, 1.25), maxMs);
|
|
153
|
+
await new Promise((resolve) => setTimeout(resolve, jittered));
|
|
154
|
+
}
|
|
155
|
+
throw new LlmError("withRetry exhausted all attempts", {
|
|
156
|
+
category: "llm",
|
|
157
|
+
recoverable: false,
|
|
158
|
+
retryable: false,
|
|
159
|
+
diagnosticHint: "llm_retry_exhausted"
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
//#endregion
|
|
163
|
+
//#region src/capabilities.ts
|
|
164
|
+
/**
|
|
165
|
+
* Lookup table keyed by "provider/modelId".
|
|
166
|
+
*
|
|
167
|
+
* New models can be added here without code changes —
|
|
168
|
+
* capabilities are pure data, not behaviour.
|
|
169
|
+
*/
|
|
170
|
+
const CAPABILITIES = {
|
|
171
|
+
"deepseek/deepseek-chat": {
|
|
172
|
+
contextWindow: 128e3,
|
|
173
|
+
maxOutput: 8192,
|
|
174
|
+
supportsReasoning: false,
|
|
175
|
+
supportsStreaming: true,
|
|
176
|
+
supportsToolUse: true,
|
|
177
|
+
supportsVision: false,
|
|
178
|
+
promptCacheEnabled: true
|
|
179
|
+
},
|
|
180
|
+
"deepseek/deepseek-reasoner": {
|
|
181
|
+
contextWindow: 128e3,
|
|
182
|
+
maxOutput: 8192,
|
|
183
|
+
supportsReasoning: true,
|
|
184
|
+
supportsStreaming: true,
|
|
185
|
+
supportsToolUse: false,
|
|
186
|
+
supportsVision: false,
|
|
187
|
+
promptCacheEnabled: true
|
|
188
|
+
},
|
|
189
|
+
"openai/gpt-4o": {
|
|
190
|
+
contextWindow: 128e3,
|
|
191
|
+
maxOutput: 16384,
|
|
192
|
+
supportsReasoning: false,
|
|
193
|
+
supportsStreaming: true,
|
|
194
|
+
supportsToolUse: true,
|
|
195
|
+
supportsVision: true,
|
|
196
|
+
promptCacheEnabled: false
|
|
197
|
+
},
|
|
198
|
+
"openai/gpt-4o-mini": {
|
|
199
|
+
contextWindow: 128e3,
|
|
200
|
+
maxOutput: 16384,
|
|
201
|
+
supportsReasoning: false,
|
|
202
|
+
supportsStreaming: true,
|
|
203
|
+
supportsToolUse: true,
|
|
204
|
+
supportsVision: true,
|
|
205
|
+
promptCacheEnabled: false
|
|
206
|
+
},
|
|
207
|
+
"openai/o3-mini": {
|
|
208
|
+
contextWindow: 2e5,
|
|
209
|
+
maxOutput: 1e5,
|
|
210
|
+
supportsReasoning: true,
|
|
211
|
+
supportsStreaming: true,
|
|
212
|
+
supportsToolUse: false,
|
|
213
|
+
supportsVision: false,
|
|
214
|
+
promptCacheEnabled: false
|
|
215
|
+
},
|
|
216
|
+
"anthropic/claude-opus-4-8": {
|
|
217
|
+
contextWindow: 2e5,
|
|
218
|
+
maxOutput: 32e3,
|
|
219
|
+
supportsReasoning: true,
|
|
220
|
+
supportsStreaming: true,
|
|
221
|
+
supportsToolUse: true,
|
|
222
|
+
supportsVision: true,
|
|
223
|
+
promptCacheEnabled: true
|
|
224
|
+
},
|
|
225
|
+
"anthropic/claude-sonnet-4-6": {
|
|
226
|
+
contextWindow: 2e5,
|
|
227
|
+
maxOutput: 16384,
|
|
228
|
+
supportsReasoning: true,
|
|
229
|
+
supportsStreaming: true,
|
|
230
|
+
supportsToolUse: true,
|
|
231
|
+
supportsVision: true,
|
|
232
|
+
promptCacheEnabled: true
|
|
233
|
+
},
|
|
234
|
+
"anthropic/claude-haiku-4-5": {
|
|
235
|
+
contextWindow: 2e5,
|
|
236
|
+
maxOutput: 8192,
|
|
237
|
+
supportsReasoning: false,
|
|
238
|
+
supportsStreaming: true,
|
|
239
|
+
supportsToolUse: true,
|
|
240
|
+
supportsVision: true,
|
|
241
|
+
promptCacheEnabled: true
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
/**
|
|
245
|
+
* Return the capability blob for `provider/modelId`.
|
|
246
|
+
* Returns `undefined` for unknown models — callers should fall back
|
|
247
|
+
* or throw a {@link import('@lynx/core').ConfigError}.
|
|
248
|
+
*/
|
|
249
|
+
function getCapability(providerId, modelId) {
|
|
250
|
+
return CAPABILITIES[`${providerId}/${modelId}`];
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Return every known capability entry.
|
|
254
|
+
* Used by the model picker UI in the TUI layer.
|
|
255
|
+
*/
|
|
256
|
+
function listCapabilities() {
|
|
257
|
+
return Object.entries(CAPABILITIES).map(([key, cap]) => ({
|
|
258
|
+
key,
|
|
259
|
+
...cap
|
|
260
|
+
}));
|
|
261
|
+
}
|
|
262
|
+
//#endregion
|
|
263
|
+
//#region src/fallback.ts
|
|
264
|
+
/**
|
|
265
|
+
* Model/Provider fallback logic.
|
|
266
|
+
*
|
|
267
|
+
* When a provider returns a transient error (rate‑limit, overload) the
|
|
268
|
+
* fallback engine tries the next candidate in the configured chain.
|
|
269
|
+
* Permanent errors (auth) skip the provider entirely and move to the next.
|
|
270
|
+
*
|
|
271
|
+
* The primary use‑case is keeping the agent loop running when the
|
|
272
|
+
* preferred model is temporarily unavailable.
|
|
273
|
+
*/
|
|
274
|
+
/** Error thrown when every candidate in the chain has been exhausted. */
|
|
275
|
+
var FallbackExhaustedError = class extends Error {
|
|
276
|
+
attempts;
|
|
277
|
+
constructor(attempts) {
|
|
278
|
+
const summary = attempts.map((a) => ` ${a.provider}/${a.model}: ${a.error}`).join("\n");
|
|
279
|
+
super(`All fallback candidates exhausted:\n${summary}`);
|
|
280
|
+
this.name = "FallbackExhaustedError";
|
|
281
|
+
this.attempts = attempts;
|
|
282
|
+
}
|
|
283
|
+
};
|
|
284
|
+
/**
|
|
285
|
+
* Determine whether an error is transient (worth retrying with a
|
|
286
|
+
* different provider) or permanent (skip this provider).
|
|
287
|
+
*/
|
|
288
|
+
function isTransient(err) {
|
|
289
|
+
if (err instanceof LlmRateLimitError) return true;
|
|
290
|
+
if (err instanceof LlmAuthError) return false;
|
|
291
|
+
if (err instanceof Error && "code" in err) {
|
|
292
|
+
const code = err.code;
|
|
293
|
+
if (code === "ECONNRESET" || code === "EPIPE" || code === "ETIMEDOUT" || code === "ENOTFOUND") return true;
|
|
294
|
+
}
|
|
295
|
+
return true;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* Run `fn` with automatic fallback across a chain of model candidates.
|
|
299
|
+
*
|
|
300
|
+
* `fn` is called with (provider, model) and should return a result.
|
|
301
|
+
* If `fn` throws, the error is classified — transient errors move to the
|
|
302
|
+
* next candidate, permanent errors skip the provider, and fatal errors
|
|
303
|
+
* (like user abort) are re‑thrown immediately.
|
|
304
|
+
*/
|
|
305
|
+
async function runWithFallback(candidates, fn, opts) {
|
|
306
|
+
if (candidates.length === 0) throw new FallbackExhaustedError([]);
|
|
307
|
+
const attempts = [];
|
|
308
|
+
for (const candidate of candidates) {
|
|
309
|
+
if (opts?.signal?.aborted) throw new DOMException("Aborted", "AbortError");
|
|
310
|
+
try {
|
|
311
|
+
return {
|
|
312
|
+
result: await fn(candidate),
|
|
313
|
+
provider: candidate.provider,
|
|
314
|
+
model: candidate.model,
|
|
315
|
+
attempts
|
|
316
|
+
};
|
|
317
|
+
} catch (err) {
|
|
318
|
+
if (err instanceof DOMException && err.name === "AbortError") throw err;
|
|
319
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
320
|
+
const status = err.status;
|
|
321
|
+
const code = err.code;
|
|
322
|
+
attempts.push({
|
|
323
|
+
provider: candidate.provider,
|
|
324
|
+
model: candidate.model,
|
|
325
|
+
error: message,
|
|
326
|
+
code,
|
|
327
|
+
status
|
|
328
|
+
});
|
|
329
|
+
if (!isTransient(err)) continue;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
throw new FallbackExhaustedError(attempts);
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Build a fallback chain from a primary model and a list of fallback refs.
|
|
337
|
+
*
|
|
338
|
+
* Fallback refs use the format `"provider/model"` (e.g. `"openai/gpt-4o-mini"`).
|
|
339
|
+
* If the provider prefix is omitted the primary's provider is used.
|
|
340
|
+
*/
|
|
341
|
+
function buildCandidateChain(primaryProvider, primaryModel, fallbackRefs) {
|
|
342
|
+
const chain = [{
|
|
343
|
+
provider: primaryProvider,
|
|
344
|
+
model: primaryModel
|
|
345
|
+
}];
|
|
346
|
+
for (const ref of fallbackRefs) {
|
|
347
|
+
const slashIdx = ref.indexOf("/");
|
|
348
|
+
if (slashIdx > 0) chain.push({
|
|
349
|
+
provider: ref.slice(0, slashIdx),
|
|
350
|
+
model: ref.slice(slashIdx + 1)
|
|
351
|
+
});
|
|
352
|
+
else chain.push({
|
|
353
|
+
provider: primaryProvider,
|
|
354
|
+
model: ref
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
return chain;
|
|
358
|
+
}
|
|
359
|
+
//#endregion
|
|
360
|
+
//#region src/tool-format.ts
|
|
361
|
+
/**
|
|
362
|
+
* Convert Lynx tool descriptors to OpenAI-compatible format.
|
|
363
|
+
*
|
|
364
|
+
* Wraps each tool with `type: "function"` and maps `inputSchema` → `parameters`.
|
|
365
|
+
* Tools already in OpenAI format pass through unchanged.
|
|
366
|
+
*/
|
|
367
|
+
function normalizeOpenAiTools(tools) {
|
|
368
|
+
return tools.map((tool) => {
|
|
369
|
+
const raw = tool;
|
|
370
|
+
if (typeof raw === "object" && raw !== null && raw.type === "function") return tool;
|
|
371
|
+
return {
|
|
372
|
+
type: "function",
|
|
373
|
+
function: {
|
|
374
|
+
name: String(raw.name ?? "unknown"),
|
|
375
|
+
description: String(raw.description ?? ""),
|
|
376
|
+
parameters: raw.inputSchema ?? {}
|
|
377
|
+
}
|
|
378
|
+
};
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Convert Lynx ChatMessage[] to OpenAI wire-format messages.
|
|
383
|
+
*
|
|
384
|
+
* Key transformations:
|
|
385
|
+
* - `tool_result` content blocks → `role: "tool"` messages with string content
|
|
386
|
+
* - `tool_use` blocks in assistant messages → `tool_calls` array
|
|
387
|
+
* - `reasoning` blocks are stripped (not sent back to the API)
|
|
388
|
+
*/
|
|
389
|
+
function normalizeOpenAiMessages(messages) {
|
|
390
|
+
const out = [];
|
|
391
|
+
for (const msg of messages) {
|
|
392
|
+
if (typeof msg.content === "string") {
|
|
393
|
+
out.push({
|
|
394
|
+
role: msg.role,
|
|
395
|
+
content: msg.content
|
|
396
|
+
});
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
const blocks = msg.content;
|
|
400
|
+
const toolResults = blocks.filter((b) => b.type === "tool_result");
|
|
401
|
+
const nonToolBlocks = blocks.filter((b) => b.type !== "tool_result");
|
|
402
|
+
for (const tr of toolResults) out.push({
|
|
403
|
+
role: "tool",
|
|
404
|
+
tool_call_id: tr.toolUseId ?? "unknown",
|
|
405
|
+
content: tr.content ?? ""
|
|
406
|
+
});
|
|
407
|
+
if (nonToolBlocks.length > 0) {
|
|
408
|
+
const normalized = normalizeContentBlocks(nonToolBlocks, msg.role);
|
|
409
|
+
if (msg.role === "assistant") {
|
|
410
|
+
const textBlocks = normalized.filter((b) => b.type === "text");
|
|
411
|
+
const toolUses = blocks.filter((b) => b.type === "tool_use");
|
|
412
|
+
const assistantMsg = {
|
|
413
|
+
role: "assistant",
|
|
414
|
+
content: textBlocks.length > 0 ? textBlocks : null
|
|
415
|
+
};
|
|
416
|
+
if (toolUses.length > 0) assistantMsg.tool_calls = toolUses.map((tu) => ({
|
|
417
|
+
id: tu.id ?? "unknown",
|
|
418
|
+
type: "function",
|
|
419
|
+
function: {
|
|
420
|
+
name: tu.name ?? "unknown",
|
|
421
|
+
arguments: JSON.stringify(tu.input ?? {})
|
|
422
|
+
}
|
|
423
|
+
}));
|
|
424
|
+
out.push(assistantMsg);
|
|
425
|
+
} else {
|
|
426
|
+
const textBlocks = normalized.filter((b) => b.type === "text");
|
|
427
|
+
if (textBlocks.length > 0) out.push({
|
|
428
|
+
role: msg.role,
|
|
429
|
+
content: textBlocks.length === 1 && msg.role !== "system" ? textBlocks[0].text ?? "" : textBlocks
|
|
430
|
+
});
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
return out;
|
|
435
|
+
}
|
|
436
|
+
/** Keep only text blocks, strip reasoning/tool_use/tool_result from content. */
|
|
437
|
+
function normalizeContentBlocks(blocks, _role) {
|
|
438
|
+
return blocks.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => ({
|
|
439
|
+
type: "text",
|
|
440
|
+
text: b.text
|
|
441
|
+
}));
|
|
442
|
+
}
|
|
443
|
+
//#endregion
|
|
444
|
+
//#region src/providers/deepseek.ts
|
|
445
|
+
/**
|
|
446
|
+
* DeepSeek provider — SSE streaming via the /v1/chat/completions endpoint.
|
|
447
|
+
*
|
|
448
|
+
* DeepSeek is API‑compatible with OpenAI so the wire format is the same.
|
|
449
|
+
* The key difference: DeepSeek has implicit prefix caching on every request
|
|
450
|
+
* (no cache_control blocks needed).
|
|
451
|
+
*/
|
|
452
|
+
const DEFAULT_BASE_URL$2 = "https://api.deepseek.com";
|
|
453
|
+
const MODELS$2 = [{
|
|
454
|
+
id: "deepseek-chat",
|
|
455
|
+
label: "DeepSeek Chat (V3)",
|
|
456
|
+
contextWindow: 128e3,
|
|
457
|
+
maxOutput: 8192
|
|
458
|
+
}, {
|
|
459
|
+
id: "deepseek-reasoner",
|
|
460
|
+
label: "DeepSeek Reasoner (R1)",
|
|
461
|
+
contextWindow: 128e3,
|
|
462
|
+
maxOutput: 8192
|
|
463
|
+
}];
|
|
464
|
+
/** Flush active tool call buffer, yielding tool_use_end or error events. */
|
|
465
|
+
function flushActiveToolCall$1(call) {
|
|
466
|
+
try {
|
|
467
|
+
const input = JSON.parse(call.buffer);
|
|
468
|
+
return [{
|
|
469
|
+
type: "tool_use_end",
|
|
470
|
+
callId: call.id,
|
|
471
|
+
input
|
|
472
|
+
}];
|
|
473
|
+
} catch {
|
|
474
|
+
return [{
|
|
475
|
+
type: "error",
|
|
476
|
+
code: "TOOL_PARSE_ERROR",
|
|
477
|
+
message: `Failed to parse tool input for ${call.name}`
|
|
478
|
+
}];
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
/** Process tool_calls delta array, returning events and updated active call state. */
|
|
482
|
+
function processToolCallDeltas$1(toolCalls, activeCall) {
|
|
483
|
+
const events = [];
|
|
484
|
+
let currentCall = activeCall;
|
|
485
|
+
for (const tc of toolCalls) {
|
|
486
|
+
const tcId = tc.id;
|
|
487
|
+
const fn = tc.function;
|
|
488
|
+
if (tcId && fn?.name) {
|
|
489
|
+
if (currentCall) try {
|
|
490
|
+
const input = JSON.parse(currentCall.buffer);
|
|
491
|
+
events.push({
|
|
492
|
+
type: "tool_use_end",
|
|
493
|
+
callId: currentCall.id,
|
|
494
|
+
input
|
|
495
|
+
});
|
|
496
|
+
} catch {}
|
|
497
|
+
currentCall = {
|
|
498
|
+
id: tcId,
|
|
499
|
+
name: fn.name,
|
|
500
|
+
buffer: ""
|
|
501
|
+
};
|
|
502
|
+
events.push({
|
|
503
|
+
type: "tool_use_start",
|
|
504
|
+
callId: tcId,
|
|
505
|
+
name: fn.name
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
if (fn?.arguments && currentCall) {
|
|
509
|
+
currentCall.buffer += fn.arguments;
|
|
510
|
+
events.push({
|
|
511
|
+
type: "tool_use_delta",
|
|
512
|
+
callId: currentCall.id,
|
|
513
|
+
text: fn.arguments
|
|
514
|
+
});
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
return {
|
|
518
|
+
events,
|
|
519
|
+
activeCall: currentCall
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
/** Process a single SSE line, returning events to yield and updated state. */
|
|
523
|
+
function processSseLine$1(line, state) {
|
|
524
|
+
const trimmed = line.trim();
|
|
525
|
+
if (!trimmed || !trimmed.startsWith("data:")) return {
|
|
526
|
+
events: [],
|
|
527
|
+
state,
|
|
528
|
+
done: false
|
|
529
|
+
};
|
|
530
|
+
const data = trimmed.slice(5).trim();
|
|
531
|
+
if (data === "[DONE]") {
|
|
532
|
+
const events = [];
|
|
533
|
+
if (state.activeToolCall) events.push(...flushActiveToolCall$1(state.activeToolCall));
|
|
534
|
+
events.push({ type: "done" });
|
|
535
|
+
return {
|
|
536
|
+
events,
|
|
537
|
+
state: {
|
|
538
|
+
...state,
|
|
539
|
+
activeToolCall: null
|
|
540
|
+
},
|
|
541
|
+
done: true
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
let chunk;
|
|
545
|
+
try {
|
|
546
|
+
chunk = JSON.parse(data);
|
|
547
|
+
} catch {
|
|
548
|
+
return {
|
|
549
|
+
events: [],
|
|
550
|
+
state,
|
|
551
|
+
done: false
|
|
552
|
+
};
|
|
553
|
+
}
|
|
554
|
+
const choice = chunk.choices?.[0];
|
|
555
|
+
if (!choice?.delta) return {
|
|
556
|
+
events: [],
|
|
557
|
+
state,
|
|
558
|
+
done: false
|
|
559
|
+
};
|
|
560
|
+
const delta = choice.delta;
|
|
561
|
+
const events = [];
|
|
562
|
+
let { textIndex, reasoningIndex, activeToolCall } = state;
|
|
563
|
+
if (delta.reasoning_content && typeof delta.reasoning_content === "string") events.push({
|
|
564
|
+
type: "reasoning_delta",
|
|
565
|
+
index: reasoningIndex++,
|
|
566
|
+
text: delta.reasoning_content
|
|
567
|
+
});
|
|
568
|
+
if (delta.content && typeof delta.content === "string") events.push({
|
|
569
|
+
type: "text_delta",
|
|
570
|
+
index: textIndex++,
|
|
571
|
+
text: delta.content
|
|
572
|
+
});
|
|
573
|
+
if (delta.tool_calls) {
|
|
574
|
+
const tcResult = processToolCallDeltas$1(delta.tool_calls, activeToolCall);
|
|
575
|
+
events.push(...tcResult.events);
|
|
576
|
+
activeToolCall = tcResult.activeCall;
|
|
577
|
+
}
|
|
578
|
+
return {
|
|
579
|
+
events,
|
|
580
|
+
state: {
|
|
581
|
+
textIndex,
|
|
582
|
+
reasoningIndex,
|
|
583
|
+
activeToolCall
|
|
584
|
+
},
|
|
585
|
+
done: false
|
|
586
|
+
};
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Create a DeepSeek provider instance.
|
|
590
|
+
*
|
|
591
|
+
* Each call to `stream()` opens a fresh HTTP connection.
|
|
592
|
+
* The provider itself is stateless — the API key is the only configuration.
|
|
593
|
+
*/
|
|
594
|
+
function createDeepSeekProvider(config) {
|
|
595
|
+
const baseUrl = config.baseUrl ?? DEFAULT_BASE_URL$2;
|
|
596
|
+
config.timeoutMs;
|
|
597
|
+
return {
|
|
598
|
+
id: "deepseek",
|
|
599
|
+
listModels() {
|
|
600
|
+
return MODELS$2;
|
|
601
|
+
},
|
|
602
|
+
getCapability(modelId) {
|
|
603
|
+
const cap = getCapability("deepseek", modelId);
|
|
604
|
+
if (!cap) throw new LlmError(`Unknown DeepSeek model: ${modelId}`, {
|
|
605
|
+
category: "llm",
|
|
606
|
+
recoverable: false,
|
|
607
|
+
retryable: false,
|
|
608
|
+
diagnosticHint: "llm_unknown_model"
|
|
609
|
+
});
|
|
610
|
+
return cap;
|
|
611
|
+
},
|
|
612
|
+
async *stream(...[modelId, messages, systemPrompt, tools, signal]) {
|
|
613
|
+
const body = {
|
|
614
|
+
model: modelId,
|
|
615
|
+
messages: [...systemPrompt ? [{
|
|
616
|
+
role: "system",
|
|
617
|
+
content: systemPrompt
|
|
618
|
+
}] : [], ...normalizeOpenAiMessages(messages)],
|
|
619
|
+
stream: true,
|
|
620
|
+
...tools.length > 0 ? { tools: normalizeOpenAiTools(tools) } : {}
|
|
621
|
+
};
|
|
622
|
+
async function makeRequest() {
|
|
623
|
+
const controller = new AbortController();
|
|
624
|
+
const onAbort = () => controller.abort();
|
|
625
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
626
|
+
try {
|
|
627
|
+
const response = await fetch(`${baseUrl}/v1/chat/completions`, {
|
|
628
|
+
method: "POST",
|
|
629
|
+
headers: {
|
|
630
|
+
"Content-Type": "application/json",
|
|
631
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
632
|
+
},
|
|
633
|
+
body: JSON.stringify(body),
|
|
634
|
+
signal: controller.signal
|
|
635
|
+
});
|
|
636
|
+
if (!response.ok) await handleHttpError$2(response);
|
|
637
|
+
return response;
|
|
638
|
+
} finally {
|
|
639
|
+
signal.removeEventListener("abort", onAbort);
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
const response = await withRetry(makeRequest, {
|
|
643
|
+
baseMs: 500,
|
|
644
|
+
maxMs: 32e3
|
|
645
|
+
});
|
|
646
|
+
if (!response.body) throw new LlmError("DeepSeek returned empty response body", {
|
|
647
|
+
category: "llm",
|
|
648
|
+
recoverable: true,
|
|
649
|
+
retryable: true
|
|
650
|
+
});
|
|
651
|
+
const reader = response.body.getReader();
|
|
652
|
+
const decoder = new TextDecoder();
|
|
653
|
+
let buffer = "";
|
|
654
|
+
let textIndex = 0;
|
|
655
|
+
let reasoningIndex = 0;
|
|
656
|
+
let activeToolCall = null;
|
|
657
|
+
try {
|
|
658
|
+
while (true) {
|
|
659
|
+
if (signal.aborted) break;
|
|
660
|
+
const { done, value } = await reader.read();
|
|
661
|
+
if (done) break;
|
|
662
|
+
buffer += decoder.decode(value, { stream: true });
|
|
663
|
+
const lines = buffer.split("\n");
|
|
664
|
+
buffer = lines.pop() ?? "";
|
|
665
|
+
for (const line of lines) {
|
|
666
|
+
const result = processSseLine$1(line, {
|
|
667
|
+
activeToolCall,
|
|
668
|
+
textIndex,
|
|
669
|
+
reasoningIndex
|
|
670
|
+
});
|
|
671
|
+
for (const event of result.events) yield event;
|
|
672
|
+
if (result.done) return;
|
|
673
|
+
activeToolCall = result.state.activeToolCall;
|
|
674
|
+
textIndex = result.state.textIndex;
|
|
675
|
+
reasoningIndex = result.state.reasoningIndex;
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
if (activeToolCall) for (const event of flushActiveToolCall$1(activeToolCall)) yield event;
|
|
679
|
+
yield { type: "done" };
|
|
680
|
+
} finally {
|
|
681
|
+
reader.releaseLock();
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
};
|
|
685
|
+
}
|
|
686
|
+
async function handleHttpError$2(response) {
|
|
687
|
+
let body;
|
|
688
|
+
try {
|
|
689
|
+
body = await response.text();
|
|
690
|
+
} catch {
|
|
691
|
+
body = "";
|
|
692
|
+
}
|
|
693
|
+
const status = response.status;
|
|
694
|
+
if (status === 401 || status === 403) throw new LlmAuthError(`DeepSeek auth failed (${status}): ${body}`);
|
|
695
|
+
if (status === 429) throw new LlmRateLimitError(`DeepSeek rate limited (429): ${body}`);
|
|
696
|
+
if (status === 529) {
|
|
697
|
+
const err = new LlmRateLimitError(`DeepSeek overloaded (529): ${body}`);
|
|
698
|
+
err.status = 529;
|
|
699
|
+
throw err;
|
|
700
|
+
}
|
|
701
|
+
if (status === 400 && body.includes("context")) throw new LlmContextOverflowError(`DeepSeek context overflow: ${body}`);
|
|
702
|
+
throw new LlmError(`DeepSeek HTTP ${status}: ${body}`, {
|
|
703
|
+
category: "llm",
|
|
704
|
+
recoverable: status >= 500,
|
|
705
|
+
retryable: status >= 500 || status === 429
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
//#endregion
|
|
709
|
+
//#region src/providers/openai.ts
|
|
710
|
+
/**
|
|
711
|
+
* OpenAI provider — SSE streaming via the /v1/chat/completions endpoint.
|
|
712
|
+
*
|
|
713
|
+
* The API shape is identical to DeepSeek (both follow the OpenAI spec),
|
|
714
|
+
* but the provider metadata and defaults differ.
|
|
715
|
+
*/
|
|
716
|
+
const DEFAULT_BASE_URL$1 = "https://api.openai.com";
|
|
717
|
+
const MODELS$1 = [
|
|
718
|
+
{
|
|
719
|
+
id: "gpt-4o",
|
|
720
|
+
label: "GPT-4o",
|
|
721
|
+
contextWindow: 128e3,
|
|
722
|
+
maxOutput: 16384
|
|
723
|
+
},
|
|
724
|
+
{
|
|
725
|
+
id: "gpt-4o-mini",
|
|
726
|
+
label: "GPT-4o Mini",
|
|
727
|
+
contextWindow: 128e3,
|
|
728
|
+
maxOutput: 16384
|
|
729
|
+
},
|
|
730
|
+
{
|
|
731
|
+
id: "o3-mini",
|
|
732
|
+
label: "o3-mini",
|
|
733
|
+
contextWindow: 2e5,
|
|
734
|
+
maxOutput: 1e5
|
|
735
|
+
}
|
|
736
|
+
];
|
|
737
|
+
/** Flush active tool call buffer, yielding tool_use_end or error events. */
|
|
738
|
+
function flushActiveToolCall(call) {
|
|
739
|
+
try {
|
|
740
|
+
const input = JSON.parse(call.buffer);
|
|
741
|
+
return [{
|
|
742
|
+
type: "tool_use_end",
|
|
743
|
+
callId: call.id,
|
|
744
|
+
input
|
|
745
|
+
}];
|
|
746
|
+
} catch {
|
|
747
|
+
return [{
|
|
748
|
+
type: "error",
|
|
749
|
+
code: "TOOL_PARSE_ERROR",
|
|
750
|
+
message: `Failed to parse tool input for ${call.name}`
|
|
751
|
+
}];
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
/** Process tool_calls delta array, returning events and updated active call state. */
|
|
755
|
+
function processToolCallDeltas(toolCalls, activeCall) {
|
|
756
|
+
const events = [];
|
|
757
|
+
let currentCall = activeCall;
|
|
758
|
+
for (const tc of toolCalls) {
|
|
759
|
+
const tcId = tc.id;
|
|
760
|
+
const fn = tc.function;
|
|
761
|
+
if (tcId && fn?.name) {
|
|
762
|
+
if (currentCall) try {
|
|
763
|
+
const input = JSON.parse(currentCall.buffer);
|
|
764
|
+
events.push({
|
|
765
|
+
type: "tool_use_end",
|
|
766
|
+
callId: currentCall.id,
|
|
767
|
+
input
|
|
768
|
+
});
|
|
769
|
+
} catch {}
|
|
770
|
+
currentCall = {
|
|
771
|
+
id: tcId,
|
|
772
|
+
name: fn.name,
|
|
773
|
+
buffer: ""
|
|
774
|
+
};
|
|
775
|
+
events.push({
|
|
776
|
+
type: "tool_use_start",
|
|
777
|
+
callId: tcId,
|
|
778
|
+
name: fn.name
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
if (fn?.arguments && currentCall) {
|
|
782
|
+
currentCall.buffer += fn.arguments;
|
|
783
|
+
events.push({
|
|
784
|
+
type: "tool_use_delta",
|
|
785
|
+
callId: currentCall.id,
|
|
786
|
+
text: fn.arguments
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
return {
|
|
791
|
+
events,
|
|
792
|
+
activeCall: currentCall
|
|
793
|
+
};
|
|
794
|
+
}
|
|
795
|
+
/** Process a single SSE line, returning events to yield and updated state. */
|
|
796
|
+
function processSseLine(line, state) {
|
|
797
|
+
const trimmed = line.trim();
|
|
798
|
+
if (!trimmed || !trimmed.startsWith("data:")) return {
|
|
799
|
+
events: [],
|
|
800
|
+
state,
|
|
801
|
+
done: false
|
|
802
|
+
};
|
|
803
|
+
const data = trimmed.slice(5).trim();
|
|
804
|
+
if (data === "[DONE]") {
|
|
805
|
+
const events = [];
|
|
806
|
+
if (state.activeToolCall) events.push(...flushActiveToolCall(state.activeToolCall));
|
|
807
|
+
events.push({ type: "done" });
|
|
808
|
+
return {
|
|
809
|
+
events,
|
|
810
|
+
state: {
|
|
811
|
+
...state,
|
|
812
|
+
activeToolCall: null
|
|
813
|
+
},
|
|
814
|
+
done: true
|
|
815
|
+
};
|
|
816
|
+
}
|
|
817
|
+
let chunk;
|
|
818
|
+
try {
|
|
819
|
+
chunk = JSON.parse(data);
|
|
820
|
+
} catch {
|
|
821
|
+
return {
|
|
822
|
+
events: [],
|
|
823
|
+
state,
|
|
824
|
+
done: false
|
|
825
|
+
};
|
|
826
|
+
}
|
|
827
|
+
const choice = chunk.choices?.[0];
|
|
828
|
+
if (!choice?.delta) return {
|
|
829
|
+
events: [],
|
|
830
|
+
state,
|
|
831
|
+
done: false
|
|
832
|
+
};
|
|
833
|
+
const delta = choice.delta;
|
|
834
|
+
const events = [];
|
|
835
|
+
let { textIndex, reasoningIndex, activeToolCall } = state;
|
|
836
|
+
if (delta.reasoning_content && typeof delta.reasoning_content === "string") events.push({
|
|
837
|
+
type: "reasoning_delta",
|
|
838
|
+
index: reasoningIndex++,
|
|
839
|
+
text: delta.reasoning_content
|
|
840
|
+
});
|
|
841
|
+
if (delta.content && typeof delta.content === "string") events.push({
|
|
842
|
+
type: "text_delta",
|
|
843
|
+
index: textIndex++,
|
|
844
|
+
text: delta.content
|
|
845
|
+
});
|
|
846
|
+
if (delta.tool_calls) {
|
|
847
|
+
const tcResult = processToolCallDeltas(delta.tool_calls, activeToolCall);
|
|
848
|
+
events.push(...tcResult.events);
|
|
849
|
+
activeToolCall = tcResult.activeCall;
|
|
850
|
+
}
|
|
851
|
+
return {
|
|
852
|
+
events,
|
|
853
|
+
state: {
|
|
854
|
+
textIndex,
|
|
855
|
+
reasoningIndex,
|
|
856
|
+
activeToolCall
|
|
857
|
+
},
|
|
858
|
+
done: false
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
/**
|
|
862
|
+
* Create an OpenAI provider instance.
|
|
863
|
+
*
|
|
864
|
+
* Same SSE parsing logic as DeepSeek — both follow the same wire protocol.
|
|
865
|
+
* The only differences are the base URL, models, and capability metadata.
|
|
866
|
+
*/
|
|
867
|
+
function createOpenAiProvider(config) {
|
|
868
|
+
const baseUrl = config.baseUrl ?? DEFAULT_BASE_URL$1;
|
|
869
|
+
return {
|
|
870
|
+
id: "openai",
|
|
871
|
+
listModels() {
|
|
872
|
+
return MODELS$1;
|
|
873
|
+
},
|
|
874
|
+
getCapability(modelId) {
|
|
875
|
+
const cap = getCapability("openai", modelId);
|
|
876
|
+
if (!cap) throw new LlmError(`Unknown OpenAI model: ${modelId}`, {
|
|
877
|
+
category: "llm",
|
|
878
|
+
recoverable: false,
|
|
879
|
+
retryable: false,
|
|
880
|
+
diagnosticHint: "llm_unknown_model"
|
|
881
|
+
});
|
|
882
|
+
return cap;
|
|
883
|
+
},
|
|
884
|
+
async *stream(...[modelId, messages, systemPrompt, tools, signal]) {
|
|
885
|
+
const body = {
|
|
886
|
+
model: modelId,
|
|
887
|
+
messages: [...systemPrompt ? [{
|
|
888
|
+
role: "system",
|
|
889
|
+
content: systemPrompt
|
|
890
|
+
}] : [], ...normalizeOpenAiMessages(messages)],
|
|
891
|
+
stream: true,
|
|
892
|
+
...tools.length > 0 ? { tools: normalizeOpenAiTools(tools) } : {}
|
|
893
|
+
};
|
|
894
|
+
async function makeRequest() {
|
|
895
|
+
const controller = new AbortController();
|
|
896
|
+
const onAbort = () => controller.abort();
|
|
897
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
898
|
+
try {
|
|
899
|
+
const response = await fetch(`${baseUrl}/v1/chat/completions`, {
|
|
900
|
+
method: "POST",
|
|
901
|
+
headers: {
|
|
902
|
+
"Content-Type": "application/json",
|
|
903
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
904
|
+
},
|
|
905
|
+
body: JSON.stringify(body),
|
|
906
|
+
signal: controller.signal
|
|
907
|
+
});
|
|
908
|
+
if (!response.ok) await handleHttpError$1(response);
|
|
909
|
+
return response;
|
|
910
|
+
} finally {
|
|
911
|
+
signal.removeEventListener("abort", onAbort);
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
const response = await withRetry(makeRequest, {
|
|
915
|
+
baseMs: 500,
|
|
916
|
+
maxMs: 32e3
|
|
917
|
+
});
|
|
918
|
+
if (!response.body) throw new LlmError("OpenAI returned empty response body", {
|
|
919
|
+
category: "llm",
|
|
920
|
+
recoverable: true,
|
|
921
|
+
retryable: true
|
|
922
|
+
});
|
|
923
|
+
const reader = response.body.getReader();
|
|
924
|
+
const decoder = new TextDecoder();
|
|
925
|
+
let buffer = "";
|
|
926
|
+
let textIndex = 0;
|
|
927
|
+
let reasoningIndex = 0;
|
|
928
|
+
let activeToolCall = null;
|
|
929
|
+
try {
|
|
930
|
+
while (true) {
|
|
931
|
+
if (signal.aborted) break;
|
|
932
|
+
const { done, value } = await reader.read();
|
|
933
|
+
if (done) break;
|
|
934
|
+
buffer += decoder.decode(value, { stream: true });
|
|
935
|
+
const lines = buffer.split("\n");
|
|
936
|
+
buffer = lines.pop() ?? "";
|
|
937
|
+
for (const line of lines) {
|
|
938
|
+
const result = processSseLine(line, {
|
|
939
|
+
activeToolCall,
|
|
940
|
+
textIndex,
|
|
941
|
+
reasoningIndex
|
|
942
|
+
});
|
|
943
|
+
for (const event of result.events) yield event;
|
|
944
|
+
if (result.done) return;
|
|
945
|
+
activeToolCall = result.state.activeToolCall;
|
|
946
|
+
textIndex = result.state.textIndex;
|
|
947
|
+
reasoningIndex = result.state.reasoningIndex;
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
if (activeToolCall) for (const event of flushActiveToolCall(activeToolCall)) yield event;
|
|
951
|
+
yield { type: "done" };
|
|
952
|
+
} finally {
|
|
953
|
+
reader.releaseLock();
|
|
954
|
+
}
|
|
955
|
+
}
|
|
956
|
+
};
|
|
957
|
+
}
|
|
958
|
+
async function handleHttpError$1(response) {
|
|
959
|
+
let body;
|
|
960
|
+
try {
|
|
961
|
+
body = await response.text();
|
|
962
|
+
} catch {
|
|
963
|
+
body = "";
|
|
964
|
+
}
|
|
965
|
+
const status = response.status;
|
|
966
|
+
if (status === 401 || status === 403) throw new LlmAuthError(`OpenAI auth failed (${status}): ${body}`);
|
|
967
|
+
if (status === 429) throw new LlmRateLimitError(`OpenAI rate limited (429): ${body}`);
|
|
968
|
+
if (status === 529) throw new LlmRateLimitError(`OpenAI overloaded (529): ${body}`, 529);
|
|
969
|
+
if (status === 400 && body.includes("context")) throw new LlmContextOverflowError(`OpenAI context overflow: ${body}`);
|
|
970
|
+
throw new LlmError(`OpenAI HTTP ${status}: ${body}`, {
|
|
971
|
+
category: "llm",
|
|
972
|
+
recoverable: status >= 500,
|
|
973
|
+
retryable: status >= 500 || status === 429
|
|
974
|
+
});
|
|
975
|
+
}
|
|
976
|
+
//#endregion
|
|
977
|
+
//#region src/providers/anthropic.ts
|
|
978
|
+
/**
|
|
979
|
+
* Anthropic provider — SSE streaming via the /v1/messages endpoint.
|
|
980
|
+
*
|
|
981
|
+
* Translates Lynx's OpenAI‑compatible internal format to Anthropic's
|
|
982
|
+
* native message/tool format, then maps Anthropic SSE events back to
|
|
983
|
+
* Lynx StreamEvent types. This is the reverse of what Claude Code's
|
|
984
|
+
* Codex adapter does (Anthropic → OpenAI translation).
|
|
985
|
+
*/
|
|
986
|
+
const DEFAULT_BASE_URL = "https://api.anthropic.com";
|
|
987
|
+
const ANTHROPIC_VERSION = "2023-06-01";
|
|
988
|
+
const MODELS = [
|
|
989
|
+
{
|
|
990
|
+
id: "claude-opus-4-8",
|
|
991
|
+
label: "Claude Opus 4.8",
|
|
992
|
+
contextWindow: 2e5,
|
|
993
|
+
maxOutput: 32e3
|
|
994
|
+
},
|
|
995
|
+
{
|
|
996
|
+
id: "claude-sonnet-4-6",
|
|
997
|
+
label: "Claude Sonnet 4.6",
|
|
998
|
+
contextWindow: 2e5,
|
|
999
|
+
maxOutput: 16384
|
|
1000
|
+
},
|
|
1001
|
+
{
|
|
1002
|
+
id: "claude-haiku-4-5",
|
|
1003
|
+
label: "Claude Haiku 4.5",
|
|
1004
|
+
contextWindow: 2e5,
|
|
1005
|
+
maxOutput: 8192
|
|
1006
|
+
}
|
|
1007
|
+
];
|
|
1008
|
+
/**
|
|
1009
|
+
* Convert Lynx OpenAI‑compatible ChatMessage[] to Anthropic Message[].
|
|
1010
|
+
* System‑role messages are extracted into a separate string array
|
|
1011
|
+
* (Anthropic treats system as a top‑level parameter, not a message).
|
|
1012
|
+
*/
|
|
1013
|
+
function translateMessages(chatMessages) {
|
|
1014
|
+
const messages = [];
|
|
1015
|
+
const systemLines = [];
|
|
1016
|
+
for (const msg of chatMessages) {
|
|
1017
|
+
if (msg.role === "system") {
|
|
1018
|
+
if (typeof msg.content === "string") systemLines.push(msg.content);
|
|
1019
|
+
else if (Array.isArray(msg.content)) {
|
|
1020
|
+
for (const block of msg.content) if (block.type === "text" && block.text) systemLines.push(block.text);
|
|
1021
|
+
}
|
|
1022
|
+
continue;
|
|
1023
|
+
}
|
|
1024
|
+
if (msg.role === "user") messages.push({
|
|
1025
|
+
role: "user",
|
|
1026
|
+
content: translateUserContent(msg)
|
|
1027
|
+
});
|
|
1028
|
+
else if (msg.role === "assistant") messages.push({
|
|
1029
|
+
role: "assistant",
|
|
1030
|
+
content: translateAssistantContent(msg)
|
|
1031
|
+
});
|
|
1032
|
+
else if (msg.role === "tool") messages.push({
|
|
1033
|
+
role: "user",
|
|
1034
|
+
content: [{
|
|
1035
|
+
type: "tool_result",
|
|
1036
|
+
tool_use_id: msg.tool_call_id ?? "unknown",
|
|
1037
|
+
content: typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content)
|
|
1038
|
+
}]
|
|
1039
|
+
});
|
|
1040
|
+
}
|
|
1041
|
+
return {
|
|
1042
|
+
messages,
|
|
1043
|
+
systemLines
|
|
1044
|
+
};
|
|
1045
|
+
}
|
|
1046
|
+
/** Convert a Lynx user message to Anthropic content. */
|
|
1047
|
+
function translateUserContent(msg) {
|
|
1048
|
+
if (typeof msg.content === "string") return msg.content;
|
|
1049
|
+
if (!msg.content || msg.content.length === 0) return "";
|
|
1050
|
+
const blocks = [];
|
|
1051
|
+
for (const block of msg.content) if (block.type === "text" && block.text) blocks.push({
|
|
1052
|
+
type: "text",
|
|
1053
|
+
text: block.text
|
|
1054
|
+
});
|
|
1055
|
+
else if (block.type === "tool_result") blocks.push({
|
|
1056
|
+
type: "tool_result",
|
|
1057
|
+
tool_use_id: block.toolUseId ?? "unknown",
|
|
1058
|
+
content: typeof block.content === "string" ? block.content : JSON.stringify(block.content),
|
|
1059
|
+
is_error: block.isError
|
|
1060
|
+
});
|
|
1061
|
+
return blocks.length > 0 ? blocks : "";
|
|
1062
|
+
}
|
|
1063
|
+
/** Convert a Lynx assistant message to Anthropic content. */
|
|
1064
|
+
function translateAssistantContent(msg) {
|
|
1065
|
+
const blocks = [];
|
|
1066
|
+
if (Array.isArray(msg.content)) {
|
|
1067
|
+
for (const block of msg.content) if (block.type === "text" && block.text) blocks.push({
|
|
1068
|
+
type: "text",
|
|
1069
|
+
text: block.text
|
|
1070
|
+
});
|
|
1071
|
+
else if (block.type === "tool_use" && block.id && block.name) blocks.push({
|
|
1072
|
+
type: "tool_use",
|
|
1073
|
+
id: block.id,
|
|
1074
|
+
name: block.name,
|
|
1075
|
+
input: block.input ?? {}
|
|
1076
|
+
});
|
|
1077
|
+
} else if (typeof msg.content === "string" && msg.content) blocks.push({
|
|
1078
|
+
type: "text",
|
|
1079
|
+
text: msg.content
|
|
1080
|
+
});
|
|
1081
|
+
if (msg.tool_calls) for (const tc of msg.tool_calls) {
|
|
1082
|
+
let input = {};
|
|
1083
|
+
try {
|
|
1084
|
+
input = JSON.parse(tc.function.arguments);
|
|
1085
|
+
} catch {
|
|
1086
|
+
input = {};
|
|
1087
|
+
}
|
|
1088
|
+
blocks.push({
|
|
1089
|
+
type: "tool_use",
|
|
1090
|
+
id: tc.id,
|
|
1091
|
+
name: tc.function.name,
|
|
1092
|
+
input
|
|
1093
|
+
});
|
|
1094
|
+
}
|
|
1095
|
+
return blocks.length > 0 ? blocks : "";
|
|
1096
|
+
}
|
|
1097
|
+
/** Convert Lynx/OpenAI‑format tool descriptors to Anthropic format. */
|
|
1098
|
+
function translateTools(tools) {
|
|
1099
|
+
return tools.map((tool) => {
|
|
1100
|
+
const raw = tool;
|
|
1101
|
+
return {
|
|
1102
|
+
name: raw.name ?? "unknown",
|
|
1103
|
+
description: raw.description ?? "",
|
|
1104
|
+
input_schema: raw.inputSchema ?? raw.function?.parameters ?? {}
|
|
1105
|
+
};
|
|
1106
|
+
});
|
|
1107
|
+
}
|
|
1108
|
+
/**
|
|
1109
|
+
* Parse a single Anthropic SSE event line and return mapped Lynx events.
|
|
1110
|
+
* Anthropic SSE format: `event: <type>\ndata: <json>\n\n`
|
|
1111
|
+
*/
|
|
1112
|
+
function processAnthropicEvent(eventType, data, state) {
|
|
1113
|
+
const events = [];
|
|
1114
|
+
let nextState = state;
|
|
1115
|
+
if (eventType === "message_start") return {
|
|
1116
|
+
events,
|
|
1117
|
+
state: nextState
|
|
1118
|
+
};
|
|
1119
|
+
if (eventType === "ping") return {
|
|
1120
|
+
events,
|
|
1121
|
+
state: nextState
|
|
1122
|
+
};
|
|
1123
|
+
if (eventType === "content_block_start") {
|
|
1124
|
+
const parsed = JSON.parse(data);
|
|
1125
|
+
const block = parsed.content_block;
|
|
1126
|
+
if (block.type === "tool_use" && block.id && block.name) {
|
|
1127
|
+
nextState = {
|
|
1128
|
+
...state,
|
|
1129
|
+
activeToolCall: {
|
|
1130
|
+
id: block.id,
|
|
1131
|
+
name: block.name,
|
|
1132
|
+
buffer: ""
|
|
1133
|
+
},
|
|
1134
|
+
activeToolCallIndex: parsed.index,
|
|
1135
|
+
toolUseIndices: new Set(state.toolUseIndices).add(parsed.index)
|
|
1136
|
+
};
|
|
1137
|
+
events.push({
|
|
1138
|
+
type: "tool_use_start",
|
|
1139
|
+
callId: block.id,
|
|
1140
|
+
name: block.name
|
|
1141
|
+
});
|
|
1142
|
+
}
|
|
1143
|
+
return {
|
|
1144
|
+
events,
|
|
1145
|
+
state: nextState
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
if (eventType === "content_block_delta") {
|
|
1149
|
+
const delta = JSON.parse(data).delta;
|
|
1150
|
+
if (delta.type === "text_delta" && delta.text) events.push({
|
|
1151
|
+
type: "text_delta",
|
|
1152
|
+
index: state.textIndex++,
|
|
1153
|
+
text: delta.text
|
|
1154
|
+
});
|
|
1155
|
+
else if (delta.type === "input_json_delta" && delta.partial_json && state.activeToolCall) {
|
|
1156
|
+
nextState = {
|
|
1157
|
+
...state,
|
|
1158
|
+
activeToolCall: {
|
|
1159
|
+
...state.activeToolCall,
|
|
1160
|
+
buffer: state.activeToolCall.buffer + delta.partial_json
|
|
1161
|
+
}
|
|
1162
|
+
};
|
|
1163
|
+
events.push({
|
|
1164
|
+
type: "tool_use_delta",
|
|
1165
|
+
callId: state.activeToolCall.id,
|
|
1166
|
+
text: delta.partial_json
|
|
1167
|
+
});
|
|
1168
|
+
} else if (delta.type === "thinking_delta" && delta.thinking) events.push({
|
|
1169
|
+
type: "reasoning_delta",
|
|
1170
|
+
index: 0,
|
|
1171
|
+
text: delta.thinking
|
|
1172
|
+
});
|
|
1173
|
+
return {
|
|
1174
|
+
events,
|
|
1175
|
+
state: nextState
|
|
1176
|
+
};
|
|
1177
|
+
}
|
|
1178
|
+
if (eventType === "content_block_stop") {
|
|
1179
|
+
const parsed = JSON.parse(data);
|
|
1180
|
+
if (state.activeToolCall && parsed.index === state.activeToolCallIndex && state.activeToolCall.buffer) try {
|
|
1181
|
+
const input = JSON.parse(state.activeToolCall.buffer);
|
|
1182
|
+
events.push({
|
|
1183
|
+
type: "tool_use_end",
|
|
1184
|
+
callId: state.activeToolCall.id,
|
|
1185
|
+
input
|
|
1186
|
+
});
|
|
1187
|
+
nextState = {
|
|
1188
|
+
...state,
|
|
1189
|
+
toolUseCount: state.toolUseCount + 1,
|
|
1190
|
+
activeToolCall: null
|
|
1191
|
+
};
|
|
1192
|
+
} catch {
|
|
1193
|
+
events.push({
|
|
1194
|
+
type: "error",
|
|
1195
|
+
code: "TOOL_PARSE_ERROR",
|
|
1196
|
+
message: `解析 ${state.activeToolCall.name} 的工具输入失败`
|
|
1197
|
+
});
|
|
1198
|
+
}
|
|
1199
|
+
return {
|
|
1200
|
+
events,
|
|
1201
|
+
state: nextState
|
|
1202
|
+
};
|
|
1203
|
+
}
|
|
1204
|
+
if (eventType === "message_delta") return {
|
|
1205
|
+
events,
|
|
1206
|
+
state: nextState
|
|
1207
|
+
};
|
|
1208
|
+
if (eventType === "message_stop") {
|
|
1209
|
+
events.push({ type: "done" });
|
|
1210
|
+
return {
|
|
1211
|
+
events,
|
|
1212
|
+
state: nextState
|
|
1213
|
+
};
|
|
1214
|
+
}
|
|
1215
|
+
return {
|
|
1216
|
+
events,
|
|
1217
|
+
state: nextState
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
/**
|
|
1221
|
+
* Create an Anthropic provider instance.
|
|
1222
|
+
*
|
|
1223
|
+
* Each call to `stream()` opens a fresh HTTP connection.
|
|
1224
|
+
* The provider itself is stateless — the API key is the only configuration.
|
|
1225
|
+
*/
|
|
1226
|
+
function createAnthropicProvider(config) {
|
|
1227
|
+
const baseUrl = config.baseUrl ?? DEFAULT_BASE_URL;
|
|
1228
|
+
config.timeoutMs;
|
|
1229
|
+
const apiKey = config.apiKey;
|
|
1230
|
+
return {
|
|
1231
|
+
id: "anthropic",
|
|
1232
|
+
listModels() {
|
|
1233
|
+
return MODELS;
|
|
1234
|
+
},
|
|
1235
|
+
getCapability(modelId) {
|
|
1236
|
+
const cap = getCapability("anthropic", modelId);
|
|
1237
|
+
if (!cap) throw new LlmError(`未知 Anthropic 模型: ${modelId}`, {
|
|
1238
|
+
category: "llm",
|
|
1239
|
+
recoverable: false,
|
|
1240
|
+
retryable: false,
|
|
1241
|
+
diagnosticHint: "llm_unknown_model"
|
|
1242
|
+
});
|
|
1243
|
+
return cap;
|
|
1244
|
+
},
|
|
1245
|
+
async *stream(...[modelId, messages, systemPrompt, tools, signal]) {
|
|
1246
|
+
const translated = translateMessages(messages);
|
|
1247
|
+
const allSystemLines = systemPrompt ? [systemPrompt, ...translated.systemLines] : translated.systemLines;
|
|
1248
|
+
const systemParam = allSystemLines.length > 0 ? allSystemLines.join("\n\n") : void 0;
|
|
1249
|
+
const body = {
|
|
1250
|
+
model: modelId,
|
|
1251
|
+
max_tokens: 8192,
|
|
1252
|
+
messages: translated.messages,
|
|
1253
|
+
...systemParam ? { system: systemParam } : {},
|
|
1254
|
+
...tools.length > 0 ? { tools: translateTools(tools) } : {},
|
|
1255
|
+
stream: true
|
|
1256
|
+
};
|
|
1257
|
+
async function makeRequest() {
|
|
1258
|
+
const controller = new AbortController();
|
|
1259
|
+
const onAbort = () => controller.abort();
|
|
1260
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
1261
|
+
try {
|
|
1262
|
+
const response = await fetch(`${baseUrl}/v1/messages`, {
|
|
1263
|
+
method: "POST",
|
|
1264
|
+
headers: {
|
|
1265
|
+
"Content-Type": "application/json",
|
|
1266
|
+
"x-api-key": apiKey,
|
|
1267
|
+
"anthropic-version": ANTHROPIC_VERSION
|
|
1268
|
+
},
|
|
1269
|
+
body: JSON.stringify(body),
|
|
1270
|
+
signal: AbortSignal.any ? AbortSignal.any([signal, controller.signal]) : controller.signal
|
|
1271
|
+
});
|
|
1272
|
+
if (!response.ok) await handleHttpError(response);
|
|
1273
|
+
return response;
|
|
1274
|
+
} finally {
|
|
1275
|
+
signal.removeEventListener("abort", onAbort);
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
const response = await withRetry(makeRequest, {
|
|
1279
|
+
baseMs: 500,
|
|
1280
|
+
maxMs: 32e3
|
|
1281
|
+
});
|
|
1282
|
+
if (!response.body) throw new LlmError("Anthropic 返回了空响应体", {
|
|
1283
|
+
category: "llm",
|
|
1284
|
+
recoverable: true,
|
|
1285
|
+
retryable: true
|
|
1286
|
+
});
|
|
1287
|
+
const reader = response.body.getReader();
|
|
1288
|
+
const decoder = new TextDecoder();
|
|
1289
|
+
let buffer = "";
|
|
1290
|
+
let eventType = "";
|
|
1291
|
+
let state = {
|
|
1292
|
+
activeToolCall: null,
|
|
1293
|
+
activeToolCallIndex: -1,
|
|
1294
|
+
textIndex: 0,
|
|
1295
|
+
toolUseCount: 0,
|
|
1296
|
+
toolUseIndices: /* @__PURE__ */ new Set()
|
|
1297
|
+
};
|
|
1298
|
+
try {
|
|
1299
|
+
while (true) {
|
|
1300
|
+
if (signal.aborted) break;
|
|
1301
|
+
const { done, value } = await reader.read();
|
|
1302
|
+
if (done) break;
|
|
1303
|
+
buffer += decoder.decode(value, { stream: true });
|
|
1304
|
+
const lines = buffer.split("\n");
|
|
1305
|
+
buffer = lines.pop() ?? "";
|
|
1306
|
+
for (const line of lines) {
|
|
1307
|
+
const trimmed = line.trim();
|
|
1308
|
+
if (trimmed.startsWith("event: ")) eventType = trimmed.slice(7).trim();
|
|
1309
|
+
else if (trimmed.startsWith("data: ") && eventType) {
|
|
1310
|
+
const data = trimmed.slice(6).trim();
|
|
1311
|
+
const result = processAnthropicEvent(eventType, data, state);
|
|
1312
|
+
for (const event of result.events) yield event;
|
|
1313
|
+
state = result.state;
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
if (state.activeToolCall && state.activeToolCall.buffer) try {
|
|
1318
|
+
const input = JSON.parse(state.activeToolCall.buffer);
|
|
1319
|
+
yield {
|
|
1320
|
+
type: "tool_use_end",
|
|
1321
|
+
callId: state.activeToolCall.id,
|
|
1322
|
+
input
|
|
1323
|
+
};
|
|
1324
|
+
} catch {}
|
|
1325
|
+
yield { type: "done" };
|
|
1326
|
+
} finally {
|
|
1327
|
+
reader.releaseLock();
|
|
1328
|
+
}
|
|
1329
|
+
}
|
|
1330
|
+
};
|
|
1331
|
+
}
|
|
1332
|
+
async function handleHttpError(response) {
|
|
1333
|
+
let body;
|
|
1334
|
+
try {
|
|
1335
|
+
body = await response.text();
|
|
1336
|
+
} catch {
|
|
1337
|
+
body = "";
|
|
1338
|
+
}
|
|
1339
|
+
const status = response.status;
|
|
1340
|
+
if (status === 401 || status === 403) throw new LlmAuthError(`Anthropic 认证失败 (${status}): ${body}`);
|
|
1341
|
+
if (status === 429) throw new LlmRateLimitError(`Anthropic 速率限制 (429): ${body}`);
|
|
1342
|
+
if (status === 529) {
|
|
1343
|
+
const err = new LlmRateLimitError(`Anthropic 过载 (529): ${body}`);
|
|
1344
|
+
err.status = 529;
|
|
1345
|
+
throw err;
|
|
1346
|
+
}
|
|
1347
|
+
if (status === 400 && body.includes("context")) throw new LlmContextOverflowError(`Anthropic 上下文溢出: ${body}`);
|
|
1348
|
+
throw new LlmError(`Anthropic HTTP ${status}: ${body}`, {
|
|
1349
|
+
category: "llm",
|
|
1350
|
+
recoverable: status >= 500,
|
|
1351
|
+
retryable: status >= 500 || status === 429
|
|
1352
|
+
});
|
|
1353
|
+
}
|
|
1354
|
+
//#endregion
|
|
1355
|
+
export { FallbackExhaustedError, Stream, buildCandidateChain, createAnthropicProvider, createDeepSeekProvider, createOpenAiProvider, getCapability, listCapabilities, runWithFallback, withRetry };
|
|
1356
|
+
|
|
1357
|
+
//# sourceMappingURL=index.mjs.map
|