mixdog 0.7.8 → 0.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/README.md +13 -10
- package/package.json +1 -1
- package/scripts/openai-oauth-catalog-smoke.mjs +53 -0
- package/setup/config-merge.mjs +0 -1
- package/setup/install.mjs +574 -384
- package/setup/mixdog-cli.mjs +30 -3
- package/setup/setup-server.mjs +11 -31
- package/setup/setup.html +3 -3
- package/setup/tui.mjs +35 -316
- package/src/agent/orchestrator/config.mjs +0 -1
- package/src/agent/orchestrator/providers/anthropic-oauth.mjs +2 -5
- package/src/agent/orchestrator/providers/anthropic.mjs +243 -86
- package/src/agent/orchestrator/providers/gemini.mjs +386 -31
- package/src/agent/orchestrator/providers/grok-oauth.mjs +2 -5
- package/src/agent/orchestrator/providers/model-catalog.mjs +146 -13
- package/src/agent/orchestrator/providers/openai-compat-stream.mjs +366 -0
- package/src/agent/orchestrator/providers/openai-compat.mjs +74 -30
- package/src/agent/orchestrator/providers/openai-oauth-ws.mjs +2 -1
- package/src/agent/orchestrator/providers/openai-oauth.mjs +59 -13
- package/src/agent/orchestrator/session/manager.mjs +18 -4
- package/src/agent/orchestrator/stall-policy.mjs +6 -0
- package/src/shared/config.mjs +1 -1
- package/src/shared/llm/cost.mjs +2 -2
- package/src/shared/open-url.mjs +37 -0
- package/src/shared/seed.mjs +20 -3
- package/src/shared/user-data-guard.mjs +3 -1
- package/setup/wizard.mjs +0 -696
|
@@ -22,6 +22,30 @@ const CATALOG_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/mode
|
|
|
22
22
|
const CATALOG_CACHE_FILE = 'litellm-catalog.json';
|
|
23
23
|
const CATALOG_TTL_MS = 24 * 60 * 60_000;
|
|
24
24
|
|
|
25
|
+
// Second auto pricing source: models.dev publishes per-PROVIDER model
|
|
26
|
+
// catalogs (cost in $/M) for 140+ providers — including ones LiteLLM does not
|
|
27
|
+
// track yet (e.g. opencode-go). Because it is keyed provider→model, a
|
|
28
|
+
// provider-scoped lookup is collision-free: deepseek-v4-pro under `deepseek`
|
|
29
|
+
// and under `opencode-go` resolve to their own distinct rates. Same 24h TTL
|
|
30
|
+
// + disk cache shape as the LiteLLM catalog above.
|
|
31
|
+
const MODELSDEV_URL = 'https://models.dev/api.json';
|
|
32
|
+
const MODELSDEV_CACHE_FILE = 'modelsdev-catalog.json';
|
|
33
|
+
|
|
34
|
+
// mixdog provider id → models.dev provider id. Identity for ids that already
|
|
35
|
+
// match (opencode-go / deepseek / xai / nvidia / openai / anthropic / groq /
|
|
36
|
+
// mistral); only the OAuth aliases and gemini→google need remapping.
|
|
37
|
+
const _MODELSDEV_PROVIDER_ALIAS = {
|
|
38
|
+
'anthropic-oauth': 'anthropic',
|
|
39
|
+
'openai-oauth': 'openai',
|
|
40
|
+
'grok-oauth': 'xai',
|
|
41
|
+
'gemini': 'google',
|
|
42
|
+
};
|
|
43
|
+
function _modelsDevProviderId(provider) {
|
|
44
|
+
if (!provider) return null;
|
|
45
|
+
const p = String(provider).toLowerCase();
|
|
46
|
+
return _MODELSDEV_PROVIDER_ALIAS[p] || p;
|
|
47
|
+
}
|
|
48
|
+
|
|
25
49
|
// Provider prefix variants used for catalog key lookup. Named constants so
|
|
26
50
|
// all three lookup sites (getModelMetadataSync, getModelMetadata, enrichModels)
|
|
27
51
|
// stay in sync. A provider needing a new prefix adds it here.
|
|
@@ -138,6 +162,85 @@ function warmFromDiskSync() {
|
|
|
138
162
|
} catch { /* disk cache unavailable — stay cold, async warm will fill later */ }
|
|
139
163
|
}
|
|
140
164
|
|
|
165
|
+
// ── models.dev catalog (second auto pricing source) ─────────────────────────
|
|
166
|
+
let _mdCache = null;
|
|
167
|
+
let _mdCacheAt = 0;
|
|
168
|
+
let _mdLoadPromise = null;
|
|
169
|
+
function mdCachePath() {
|
|
170
|
+
return join(getPluginData(), MODELSDEV_CACHE_FILE);
|
|
171
|
+
}
|
|
172
|
+
async function _loadModelsDevImpl() {
|
|
173
|
+
try {
|
|
174
|
+
if (existsSync(mdCachePath())) {
|
|
175
|
+
const raw = JSON.parse(readFileSync(mdCachePath(), 'utf-8'));
|
|
176
|
+
if (raw?.fetchedAt && (Date.now() - raw.fetchedAt) < CATALOG_TTL_MS && raw.data) {
|
|
177
|
+
_mdCache = raw.data;
|
|
178
|
+
_mdCacheAt = raw.fetchedAt;
|
|
179
|
+
return _mdCache;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
} catch { /* fall through to remote */ }
|
|
183
|
+
try {
|
|
184
|
+
const res = await fetch(MODELSDEV_URL, { signal: AbortSignal.timeout(10_000) });
|
|
185
|
+
if (!res.ok) throw new Error('HTTP ' + res.status);
|
|
186
|
+
const data = await res.json();
|
|
187
|
+
try {
|
|
188
|
+
writeFileSync(mdCachePath(), JSON.stringify({ fetchedAt: Date.now(), data }));
|
|
189
|
+
} catch { /* cache is best-effort */ }
|
|
190
|
+
_mdCache = data;
|
|
191
|
+
_mdCacheAt = Date.now();
|
|
192
|
+
return data;
|
|
193
|
+
} catch (err) {
|
|
194
|
+
process.stderr.write(`[model-catalog] models.dev fetch failed: ${err.message}\n`);
|
|
195
|
+
return _mdCache || {};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
export async function loadModelsDevCatalog() {
|
|
199
|
+
if (_mdCache && (Date.now() - _mdCacheAt) < CATALOG_TTL_MS) return _mdCache;
|
|
200
|
+
if (_mdLoadPromise) return _mdLoadPromise;
|
|
201
|
+
_mdLoadPromise = _loadModelsDevImpl().finally(() => { _mdLoadPromise = null; });
|
|
202
|
+
return _mdLoadPromise;
|
|
203
|
+
}
|
|
204
|
+
function warmModelsDevFromDiskSync() {
|
|
205
|
+
if (_mdCache) return;
|
|
206
|
+
try {
|
|
207
|
+
const raw = JSON.parse(readFileSync(mdCachePath(), 'utf-8'));
|
|
208
|
+
if (raw?.data) {
|
|
209
|
+
_mdCache = raw.data;
|
|
210
|
+
_mdCacheAt = raw.fetchedAt || Date.now();
|
|
211
|
+
}
|
|
212
|
+
} catch { /* cold — async loadModelsDevCatalog will fill later */ }
|
|
213
|
+
}
|
|
214
|
+
// Adapt a models.dev model row (cost in $/M) to the LiteLLM-shaped row that
|
|
215
|
+
// _normalize() consumes ($/token). Only fields present are emitted.
|
|
216
|
+
function _modelsDevRowToOverride(row) {
|
|
217
|
+
const c = (row && row.cost) || {};
|
|
218
|
+
const out = {
|
|
219
|
+
max_input_tokens: row?.limit?.context,
|
|
220
|
+
max_output_tokens: row?.limit?.output,
|
|
221
|
+
mode: 'chat',
|
|
222
|
+
supports_function_calling: row?.tool_call === true,
|
|
223
|
+
supports_vision: Array.isArray(row?.modalities?.input) && row.modalities.input.includes('image'),
|
|
224
|
+
supports_prompt_caching: c.cache_read != null,
|
|
225
|
+
};
|
|
226
|
+
if (c.input != null) out.input_cost_per_token = c.input / 1_000_000;
|
|
227
|
+
if (c.output != null) out.output_cost_per_token = c.output / 1_000_000;
|
|
228
|
+
if (c.cache_read != null) out.cache_read_input_token_cost = c.cache_read / 1_000_000;
|
|
229
|
+
if (c.cache_write != null) out.cache_creation_input_token_cost = c.cache_write / 1_000_000;
|
|
230
|
+
return out;
|
|
231
|
+
}
|
|
232
|
+
function _modelsDevMetadataSync(id, provider) {
|
|
233
|
+
const pid = _modelsDevProviderId(provider);
|
|
234
|
+
if (!pid) return null;
|
|
235
|
+
if (!_mdCache) {
|
|
236
|
+
warmModelsDevFromDiskSync();
|
|
237
|
+
if (!_mdCache) { void loadModelsDevCatalog(); return null; }
|
|
238
|
+
}
|
|
239
|
+
const row = _mdCache?.[pid]?.models?.[id];
|
|
240
|
+
if (!row || !row.cost) return null;
|
|
241
|
+
return _normalize(_modelsDevRowToOverride(row));
|
|
242
|
+
}
|
|
243
|
+
|
|
141
244
|
/**
|
|
142
245
|
* Sync lookup. Warm order:
|
|
143
246
|
* 1. in-memory cache (hot path),
|
|
@@ -149,19 +252,37 @@ function warmFromDiskSync() {
|
|
|
149
252
|
* subsequent calls hit memory. TTL is intentionally ignored here — stale
|
|
150
253
|
* catalog beats no catalog, and the async path refreshes on schedule.
|
|
151
254
|
*/
|
|
152
|
-
export function getModelMetadataSync(id) {
|
|
255
|
+
export function getModelMetadataSync(id, provider) {
|
|
153
256
|
if (!id) return null;
|
|
154
|
-
|
|
257
|
+
const mappedProvider = provider ? _modelsDevProviderId(provider) : null;
|
|
258
|
+
// 1. Manual overrides — authoritative + offline. Provider-guarded: when a
|
|
259
|
+
// provider hint is given, an override is only honoured if it belongs to
|
|
260
|
+
// that provider, so a model id shared across providers (e.g.
|
|
261
|
+
// deepseek-v4-pro under `deepseek` vs `opencode-go`) never leaks the
|
|
262
|
+
// wrong provider's rate. Bare-id callers keep the legacy behaviour.
|
|
263
|
+
const ov = PRICING_OVERRIDES[id];
|
|
264
|
+
if (ov && (!mappedProvider || _modelsDevProviderId(ov.litellm_provider) === mappedProvider)) {
|
|
265
|
+
return _normalize(ov);
|
|
266
|
+
}
|
|
267
|
+
// 2. LiteLLM community catalog (broad mainstream coverage).
|
|
155
268
|
if (!_memCache) warmFromDiskSync();
|
|
156
|
-
if (
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
269
|
+
if (_memCache) {
|
|
270
|
+
const catalog = _memCache;
|
|
271
|
+
if (catalog[id]) return _normalize(catalog[id]);
|
|
272
|
+
for (const prefix of _CATALOG_SIMPLE_PREFIXES) {
|
|
273
|
+
if (catalog[prefix + id]) return _normalize(catalog[prefix + id]);
|
|
274
|
+
}
|
|
275
|
+
for (const prefix of _CATALOG_BEDROCK_PREFIXES) {
|
|
276
|
+
const v1 = catalog[prefix + id + '-v1:0'];
|
|
277
|
+
if (v1) return _normalize(v1);
|
|
278
|
+
}
|
|
161
279
|
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
280
|
+
// 3. models.dev — provider-scoped gap filler (collision-free, auto 24h).
|
|
281
|
+
// Placed LAST so it never shadows the authoritative sources above; it
|
|
282
|
+
// only prices what LiteLLM/overrides do not cover (e.g. opencode-go).
|
|
283
|
+
if (mappedProvider) {
|
|
284
|
+
const md = _modelsDevMetadataSync(id, provider);
|
|
285
|
+
if (md) return md;
|
|
165
286
|
}
|
|
166
287
|
return null;
|
|
167
288
|
}
|
|
@@ -209,8 +330,11 @@ export async function enrichModels(models) {
|
|
|
209
330
|
if (!meta) return m;
|
|
210
331
|
return {
|
|
211
332
|
...m,
|
|
212
|
-
|
|
213
|
-
|
|
333
|
+
// Provider-native limits are authoritative for request sizing.
|
|
334
|
+
// External catalogs are pricing/metadata fillers and may describe
|
|
335
|
+
// a public API SKU rather than the OAuth/backend route in use.
|
|
336
|
+
contextWindow: m.contextWindow || meta.contextWindow || null,
|
|
337
|
+
outputTokens: m.outputTokens || meta.outputTokens || null,
|
|
214
338
|
inputCostPerM: meta.inputCostPerM,
|
|
215
339
|
outputCostPerM: meta.outputCostPerM,
|
|
216
340
|
cacheReadCostPerM: meta.cacheReadCostPerM,
|
|
@@ -231,11 +355,20 @@ export async function enrichModels(models) {
|
|
|
231
355
|
export async function refreshCatalog() {
|
|
232
356
|
_memCache = null;
|
|
233
357
|
_memCacheAt = 0;
|
|
358
|
+
_mdCache = null;
|
|
359
|
+
_mdCacheAt = 0;
|
|
234
360
|
try {
|
|
235
361
|
if (existsSync(cachePath())) {
|
|
236
362
|
const fs = await import('fs');
|
|
237
363
|
fs.unlinkSync(cachePath());
|
|
238
364
|
}
|
|
239
365
|
} catch { /* ignore */ }
|
|
240
|
-
|
|
366
|
+
try {
|
|
367
|
+
if (existsSync(mdCachePath())) {
|
|
368
|
+
const fs = await import('fs');
|
|
369
|
+
fs.unlinkSync(mdCachePath());
|
|
370
|
+
}
|
|
371
|
+
} catch { /* ignore */ }
|
|
372
|
+
const [litellm] = await Promise.all([loadCatalog(), loadModelsDevCatalog()]);
|
|
373
|
+
return litellm;
|
|
241
374
|
}
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import {
|
|
2
|
+
PROVIDER_FIRST_BYTE_TIMEOUT_MS,
|
|
3
|
+
PROVIDER_SSE_IDLE_TIMEOUT_MS,
|
|
4
|
+
PROVIDER_SSE_IDLE_WATCHDOG_ENABLED,
|
|
5
|
+
createTimeoutSignal,
|
|
6
|
+
providerTimeoutError,
|
|
7
|
+
} from '../stall-policy.mjs';
|
|
8
|
+
import { populateHttpStatusFromMessage } from './retry-classifier.mjs';
|
|
9
|
+
|
|
10
|
+
function truncatedCompatStreamError(label, detail) {
|
|
11
|
+
return Object.assign(
|
|
12
|
+
new Error(`${label} SSE stream truncated${detail ? `: ${detail}` : ''}`),
|
|
13
|
+
{ name: 'TruncatedStreamError', code: 'TRUNCATED_STREAM', truncatedStream: true },
|
|
14
|
+
);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Completed tool_call.arguments must be valid JSON; empty/missing → {}. */
|
|
18
|
+
export function parseCompletedToolCallArgumentsJson(raw, label) {
|
|
19
|
+
const text = typeof raw === 'string' ? raw : (raw == null ? '' : String(raw));
|
|
20
|
+
const src = text === '' ? '{}' : text;
|
|
21
|
+
try {
|
|
22
|
+
return JSON.parse(src);
|
|
23
|
+
} catch {
|
|
24
|
+
throw truncatedCompatStreamError(label, 'invalid tool_call arguments JSON');
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function firstByteCompatStreamError(label) {
|
|
29
|
+
const err = providerTimeoutError(`${label} first byte`, PROVIDER_FIRST_BYTE_TIMEOUT_MS);
|
|
30
|
+
err.firstByteTimeout = true;
|
|
31
|
+
return err;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async function nextAsyncWithWatchdog(iterator, { signal, idleMs, idleEnabled, idleLabel } = {}) {
|
|
35
|
+
let idleTimer = null;
|
|
36
|
+
let idleReject = null;
|
|
37
|
+
let idleTimedOut = false;
|
|
38
|
+
const armIdle = () => {
|
|
39
|
+
if (!idleEnabled || !(idleMs > 0)) return;
|
|
40
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
41
|
+
idleTimer = setTimeout(() => {
|
|
42
|
+
idleTimedOut = true;
|
|
43
|
+
const e = providerTimeoutError(idleLabel || 'compat SSE idle', idleMs);
|
|
44
|
+
e.code = 'ETIMEDOUT';
|
|
45
|
+
if (idleReject) {
|
|
46
|
+
const r = idleReject;
|
|
47
|
+
idleReject = null;
|
|
48
|
+
r(e);
|
|
49
|
+
}
|
|
50
|
+
}, idleMs);
|
|
51
|
+
if (typeof idleTimer.unref === 'function') idleTimer.unref();
|
|
52
|
+
};
|
|
53
|
+
armIdle();
|
|
54
|
+
try {
|
|
55
|
+
const result = await new Promise((resolve, reject) => {
|
|
56
|
+
idleReject = reject;
|
|
57
|
+
if (signal?.aborted) {
|
|
58
|
+
const reason = signal.reason;
|
|
59
|
+
reject(reason instanceof Error ? reason : new Error('compat stream aborted'));
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
let onAbort = null;
|
|
63
|
+
if (signal) {
|
|
64
|
+
onAbort = () => {
|
|
65
|
+
const reason = signal.reason;
|
|
66
|
+
reject(reason instanceof Error ? reason : new Error('compat stream aborted'));
|
|
67
|
+
};
|
|
68
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
69
|
+
}
|
|
70
|
+
iterator.next().then(
|
|
71
|
+
(value) => {
|
|
72
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
73
|
+
if (signal && onAbort) {
|
|
74
|
+
try { signal.removeEventListener('abort', onAbort); } catch {}
|
|
75
|
+
}
|
|
76
|
+
resolve(value);
|
|
77
|
+
},
|
|
78
|
+
(err) => {
|
|
79
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
80
|
+
if (signal && onAbort) {
|
|
81
|
+
try { signal.removeEventListener('abort', onAbort); } catch {}
|
|
82
|
+
}
|
|
83
|
+
reject(err);
|
|
84
|
+
},
|
|
85
|
+
);
|
|
86
|
+
});
|
|
87
|
+
return result;
|
|
88
|
+
} catch (err) {
|
|
89
|
+
if (idleTimer) clearTimeout(idleTimer);
|
|
90
|
+
if (idleTimedOut) throw providerTimeoutError(idleLabel || 'compat SSE idle', idleMs);
|
|
91
|
+
throw err;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function mergeToolCallDelta(accByIndex, deltaCalls) {
|
|
96
|
+
for (const tc of deltaCalls || []) {
|
|
97
|
+
const idx = Number.isFinite(Number(tc?.index)) ? Number(tc.index) : 0;
|
|
98
|
+
const prev = accByIndex.get(idx) || {
|
|
99
|
+
id: '',
|
|
100
|
+
type: 'function',
|
|
101
|
+
function: { name: '', arguments: '' },
|
|
102
|
+
};
|
|
103
|
+
if (tc.id) prev.id = tc.id;
|
|
104
|
+
if (tc.type) prev.type = tc.type;
|
|
105
|
+
if (tc.function?.name && !prev.function.name) prev.function.name = tc.function.name;
|
|
106
|
+
if (tc.function?.arguments) prev.function.arguments += tc.function.arguments;
|
|
107
|
+
accByIndex.set(idx, prev);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function toolCallsFromStreamAcc(accByIndex, parseToolCalls, label) {
|
|
112
|
+
if (!accByIndex.size) return undefined;
|
|
113
|
+
const choice = {
|
|
114
|
+
message: {
|
|
115
|
+
tool_calls: [...accByIndex.entries()]
|
|
116
|
+
.sort((a, b) => a[0] - b[0])
|
|
117
|
+
.map(([, v]) => v),
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
return parseToolCalls(choice, label);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export async function consumeCompatChatCompletionStream(stream, { signal, label, onStreamDelta, parseToolCalls } = {}) {
|
|
124
|
+
const iterator = stream[Symbol.asyncIterator]();
|
|
125
|
+
const firstByteTimeout = createTimeoutSignal(signal, PROVIDER_FIRST_BYTE_TIMEOUT_MS, `${label} first byte`);
|
|
126
|
+
const idleEnabled = PROVIDER_SSE_IDLE_WATCHDOG_ENABLED;
|
|
127
|
+
const idleMs = PROVIDER_SSE_IDLE_TIMEOUT_MS;
|
|
128
|
+
let sawFirstEvent = false;
|
|
129
|
+
let content = '';
|
|
130
|
+
let reasoningContent = '';
|
|
131
|
+
let model = '';
|
|
132
|
+
let responseId = '';
|
|
133
|
+
let stopReason = null;
|
|
134
|
+
let rawUsage = null;
|
|
135
|
+
const toolAcc = new Map();
|
|
136
|
+
try {
|
|
137
|
+
while (true) {
|
|
138
|
+
const { value: chunk, done } = await nextAsyncWithWatchdog(iterator, {
|
|
139
|
+
// Until the first SSE chunk, bound the pending read to the
|
|
140
|
+
// first-byte timer (createTimeoutSignal already chains parent).
|
|
141
|
+
signal: sawFirstEvent ? signal : firstByteTimeout.signal,
|
|
142
|
+
idleMs,
|
|
143
|
+
idleEnabled: sawFirstEvent && idleEnabled,
|
|
144
|
+
idleLabel: `${label} SSE idle`,
|
|
145
|
+
});
|
|
146
|
+
if (done) break;
|
|
147
|
+
if (!sawFirstEvent) {
|
|
148
|
+
sawFirstEvent = true;
|
|
149
|
+
firstByteTimeout.cleanup();
|
|
150
|
+
}
|
|
151
|
+
try { onStreamDelta?.(); } catch {}
|
|
152
|
+
if (chunk?.id) responseId = chunk.id;
|
|
153
|
+
if (chunk?.model) model = chunk.model;
|
|
154
|
+
const choice = chunk?.choices?.[0];
|
|
155
|
+
if (choice?.delta?.content) content += choice.delta.content;
|
|
156
|
+
if (typeof choice?.delta?.reasoning_content === 'string') {
|
|
157
|
+
reasoningContent += choice.delta.reasoning_content;
|
|
158
|
+
}
|
|
159
|
+
mergeToolCallDelta(toolAcc, choice?.delta?.tool_calls);
|
|
160
|
+
if (choice?.finish_reason) stopReason = choice.finish_reason;
|
|
161
|
+
if (chunk?.usage) rawUsage = chunk.usage;
|
|
162
|
+
}
|
|
163
|
+
} finally {
|
|
164
|
+
firstByteTimeout.cleanup();
|
|
165
|
+
}
|
|
166
|
+
if (!sawFirstEvent) {
|
|
167
|
+
if (firstByteTimeout.signal?.aborted) throw firstByteCompatStreamError(label);
|
|
168
|
+
throw firstByteCompatStreamError(label);
|
|
169
|
+
}
|
|
170
|
+
if (!stopReason) {
|
|
171
|
+
throw truncatedCompatStreamError(label, 'no finish_reason');
|
|
172
|
+
}
|
|
173
|
+
const message = {
|
|
174
|
+
content: content || null,
|
|
175
|
+
...(reasoningContent ? { reasoning_content: reasoningContent } : {}),
|
|
176
|
+
};
|
|
177
|
+
const rawToolCalls = [...toolAcc.entries()]
|
|
178
|
+
.sort((a, b) => a[0] - b[0])
|
|
179
|
+
.map(([, v]) => v)
|
|
180
|
+
.filter(tc => tc.id || tc.function?.name);
|
|
181
|
+
if (rawToolCalls.length) message.tool_calls = rawToolCalls;
|
|
182
|
+
const response = {
|
|
183
|
+
id: responseId || null,
|
|
184
|
+
model: model || null,
|
|
185
|
+
choices: [{ message, finish_reason: stopReason }],
|
|
186
|
+
usage: rawUsage || undefined,
|
|
187
|
+
};
|
|
188
|
+
return {
|
|
189
|
+
response,
|
|
190
|
+
model,
|
|
191
|
+
content,
|
|
192
|
+
toolCalls: toolCallsFromStreamAcc(toolAcc, parseToolCalls, label),
|
|
193
|
+
stopReason,
|
|
194
|
+
reasoningContent: reasoningContent || null,
|
|
195
|
+
rawUsage,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function handleCompatResponsesStreamEvent(event, state, { label, parseResponsesToolCalls, responseOutputText, onStreamDelta }) {
|
|
200
|
+
if (!event || typeof event.type !== 'string') return;
|
|
201
|
+
switch (event.type) {
|
|
202
|
+
case 'response.created':
|
|
203
|
+
if (event.response?.model) state.model = event.response.model;
|
|
204
|
+
if (event.response?.id) state.responseId = event.response.id;
|
|
205
|
+
break;
|
|
206
|
+
case 'response.output_text.delta':
|
|
207
|
+
state.content += event.delta || '';
|
|
208
|
+
state.sawOutput = true;
|
|
209
|
+
try { onStreamDelta?.(); } catch {}
|
|
210
|
+
break;
|
|
211
|
+
case 'response.output_item.added':
|
|
212
|
+
if (event.item?.type === 'function_call') {
|
|
213
|
+
state.pendingCalls.set(event.item.id || '', {
|
|
214
|
+
name: event.item.name || '',
|
|
215
|
+
callId: event.item.call_id || '',
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
try { onStreamDelta?.(); } catch {}
|
|
219
|
+
break;
|
|
220
|
+
case 'response.function_call_arguments.delta':
|
|
221
|
+
try { onStreamDelta?.(); } catch {}
|
|
222
|
+
break;
|
|
223
|
+
case 'response.function_call_arguments.done': {
|
|
224
|
+
const itemId = event.item_id || '';
|
|
225
|
+
const pending = state.pendingCalls.get(itemId);
|
|
226
|
+
const call = {
|
|
227
|
+
id: pending?.callId || event.call_id || '',
|
|
228
|
+
name: pending?.name || event.name || '',
|
|
229
|
+
arguments: parseCompletedToolCallArgumentsJson(event.arguments, label),
|
|
230
|
+
_pendingItemId: itemId,
|
|
231
|
+
};
|
|
232
|
+
state.toolCalls.push(call);
|
|
233
|
+
if (call.id && call.name) delete call._pendingItemId;
|
|
234
|
+
try { onStreamDelta?.(); } catch {}
|
|
235
|
+
break;
|
|
236
|
+
}
|
|
237
|
+
case 'response.output_item.done': {
|
|
238
|
+
const item = event.item || {};
|
|
239
|
+
if (item.type === 'function_call') {
|
|
240
|
+
const tc = state.toolCalls.find(t => t._pendingItemId === (item.id || ''));
|
|
241
|
+
if (tc) {
|
|
242
|
+
if (!tc.id && item.call_id) tc.id = item.call_id;
|
|
243
|
+
if (!tc.name && item.name) tc.name = item.name;
|
|
244
|
+
if (tc.id && tc.name) delete tc._pendingItemId;
|
|
245
|
+
} else if (item.call_id && item.name) {
|
|
246
|
+
state.toolCalls.push({
|
|
247
|
+
id: item.call_id,
|
|
248
|
+
name: item.name,
|
|
249
|
+
arguments: parseCompletedToolCallArgumentsJson(item.arguments, label),
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
try { onStreamDelta?.(); } catch {}
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
case 'response.completed': {
|
|
257
|
+
const resp = event.response || {};
|
|
258
|
+
state.completed = true;
|
|
259
|
+
state.completedResponse = resp;
|
|
260
|
+
if (!state.model && resp.model) state.model = resp.model;
|
|
261
|
+
if (!state.responseId && resp.id) state.responseId = resp.id;
|
|
262
|
+
if (!state.content) state.content = responseOutputText(resp);
|
|
263
|
+
if (!state.toolCalls.length) {
|
|
264
|
+
const parsed = parseResponsesToolCalls(resp, label);
|
|
265
|
+
if (parsed?.length) state.toolCalls.push(...parsed.map(t => ({ ...t })));
|
|
266
|
+
}
|
|
267
|
+
try { onStreamDelta?.(); } catch {}
|
|
268
|
+
break;
|
|
269
|
+
}
|
|
270
|
+
case 'response.done':
|
|
271
|
+
if (!event.response || event.response.status === 'completed') state.completed = true;
|
|
272
|
+
else if (event.response.status === 'failed') {
|
|
273
|
+
const msg = event.response?.error?.message || 'response.done failed';
|
|
274
|
+
const err = new Error(`xAI Responses stream response.done failed: ${msg}`);
|
|
275
|
+
populateHttpStatusFromMessage(err, msg);
|
|
276
|
+
throw err;
|
|
277
|
+
} else if (event.response.status === 'incomplete') {
|
|
278
|
+
throw new Error(`xAI Responses stream response.done incomplete: ${event.response?.incomplete_details?.reason || 'incomplete'}`);
|
|
279
|
+
}
|
|
280
|
+
break;
|
|
281
|
+
case 'response.failed': {
|
|
282
|
+
const msg = event.response?.error?.message || event.error?.message || event.message || 'response.failed';
|
|
283
|
+
const err = new Error(`xAI Responses stream response.failed: ${msg}`);
|
|
284
|
+
populateHttpStatusFromMessage(err, msg);
|
|
285
|
+
throw err;
|
|
286
|
+
}
|
|
287
|
+
case 'response.incomplete':
|
|
288
|
+
throw new Error(`xAI Responses stream response.incomplete: ${event.response?.incomplete_details?.reason || 'incomplete'}`);
|
|
289
|
+
case 'error': {
|
|
290
|
+
const msg = event.message || event.error?.message || 'unknown';
|
|
291
|
+
const err = new Error(`xAI Responses stream error: ${msg}`);
|
|
292
|
+
populateHttpStatusFromMessage(err, msg);
|
|
293
|
+
throw err;
|
|
294
|
+
}
|
|
295
|
+
default:
|
|
296
|
+
break;
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
export async function consumeCompatResponsesStream(stream, {
|
|
301
|
+
signal,
|
|
302
|
+
label,
|
|
303
|
+
onStreamDelta,
|
|
304
|
+
parseResponsesToolCalls,
|
|
305
|
+
responseOutputText,
|
|
306
|
+
} = {}) {
|
|
307
|
+
const iterator = stream[Symbol.asyncIterator]();
|
|
308
|
+
const firstByteTimeout = createTimeoutSignal(signal, PROVIDER_FIRST_BYTE_TIMEOUT_MS, `${label} first byte`);
|
|
309
|
+
const idleEnabled = PROVIDER_SSE_IDLE_WATCHDOG_ENABLED;
|
|
310
|
+
const idleMs = PROVIDER_SSE_IDLE_TIMEOUT_MS;
|
|
311
|
+
const state = {
|
|
312
|
+
content: '',
|
|
313
|
+
model: '',
|
|
314
|
+
responseId: '',
|
|
315
|
+
toolCalls: [],
|
|
316
|
+
pendingCalls: new Map(),
|
|
317
|
+
completed: false,
|
|
318
|
+
completedResponse: null,
|
|
319
|
+
sawOutput: false,
|
|
320
|
+
};
|
|
321
|
+
let sawFirstEvent = false;
|
|
322
|
+
const deps = { label, parseResponsesToolCalls, responseOutputText, onStreamDelta };
|
|
323
|
+
try {
|
|
324
|
+
while (true) {
|
|
325
|
+
const { value: event, done } = await nextAsyncWithWatchdog(iterator, {
|
|
326
|
+
signal: sawFirstEvent ? signal : firstByteTimeout.signal,
|
|
327
|
+
idleMs,
|
|
328
|
+
idleEnabled: sawFirstEvent && idleEnabled,
|
|
329
|
+
idleLabel: `${label} SSE idle`,
|
|
330
|
+
});
|
|
331
|
+
if (done) break;
|
|
332
|
+
if (!sawFirstEvent) {
|
|
333
|
+
sawFirstEvent = true;
|
|
334
|
+
firstByteTimeout.cleanup();
|
|
335
|
+
}
|
|
336
|
+
handleCompatResponsesStreamEvent(event, state, deps);
|
|
337
|
+
}
|
|
338
|
+
} finally {
|
|
339
|
+
firstByteTimeout.cleanup();
|
|
340
|
+
}
|
|
341
|
+
if (!sawFirstEvent) {
|
|
342
|
+
if (firstByteTimeout.signal?.aborted) throw firstByteCompatStreamError(label);
|
|
343
|
+
throw firstByteCompatStreamError(label);
|
|
344
|
+
}
|
|
345
|
+
if (!state.completed) throw truncatedCompatStreamError(label, 'no response.completed');
|
|
346
|
+
const unresolved = state.toolCalls.find(t => t._pendingItemId);
|
|
347
|
+
if (unresolved) {
|
|
348
|
+
throw new Error(`xAI Responses stream function_call salvage failed: missing call_id/name for item_id=${unresolved._pendingItemId || '?'}`);
|
|
349
|
+
}
|
|
350
|
+
const response = state.completedResponse || {
|
|
351
|
+
id: state.responseId || null,
|
|
352
|
+
model: state.model || null,
|
|
353
|
+
output_text: state.content,
|
|
354
|
+
output: [],
|
|
355
|
+
};
|
|
356
|
+
const toolCalls = state.toolCalls.length
|
|
357
|
+
? state.toolCalls.map(({ _pendingItemId, ...t }) => t)
|
|
358
|
+
: parseResponsesToolCalls(response, label);
|
|
359
|
+
return {
|
|
360
|
+
response,
|
|
361
|
+
content: state.content || responseOutputText(response),
|
|
362
|
+
toolCalls,
|
|
363
|
+
model: state.model || response.model || null,
|
|
364
|
+
responseId: state.responseId || response.id || null,
|
|
365
|
+
};
|
|
366
|
+
}
|