mixdog 0.7.7 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,30 @@ const CATALOG_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/mode
22
22
  const CATALOG_CACHE_FILE = 'litellm-catalog.json';
23
23
  const CATALOG_TTL_MS = 24 * 60 * 60_000;
24
24
 
25
+ // Second auto pricing source: models.dev publishes per-PROVIDER model
26
+ // catalogs (cost in $/M) for 140+ providers — including ones LiteLLM does not
27
+ // track yet (e.g. opencode-go). Because it is keyed provider→model, a
28
+ // provider-scoped lookup is collision-free: deepseek-v4-pro under `deepseek`
29
+ // and under `opencode-go` resolve to their own distinct rates. Same 24h TTL
30
+ // + disk cache shape as the LiteLLM catalog above.
31
+ const MODELSDEV_URL = 'https://models.dev/api.json';
32
+ const MODELSDEV_CACHE_FILE = 'modelsdev-catalog.json';
33
+
34
+ // mixdog provider id → models.dev provider id. Identity for ids that already
35
+ // match (opencode-go / deepseek / xai / nvidia / openai / anthropic / groq /
36
+ // mistral); only the OAuth aliases and gemini→google need remapping.
37
+ const _MODELSDEV_PROVIDER_ALIAS = {
38
+ 'anthropic-oauth': 'anthropic',
39
+ 'openai-oauth': 'openai',
40
+ 'grok-oauth': 'xai',
41
+ 'gemini': 'google',
42
+ };
43
+ function _modelsDevProviderId(provider) {
44
+ if (!provider) return null;
45
+ const p = String(provider).toLowerCase();
46
+ return _MODELSDEV_PROVIDER_ALIAS[p] || p;
47
+ }
48
+
25
49
  // Provider prefix variants used for catalog key lookup. Named constants so
26
50
  // all three lookup sites (getModelMetadataSync, getModelMetadata, enrichModels)
27
51
  // stay in sync. A provider needing a new prefix adds it here.
@@ -138,6 +162,85 @@ function warmFromDiskSync() {
138
162
  } catch { /* disk cache unavailable — stay cold, async warm will fill later */ }
139
163
  }
140
164
 
165
+ // ── models.dev catalog (second auto pricing source) ─────────────────────────
166
+ let _mdCache = null;
167
+ let _mdCacheAt = 0;
168
+ let _mdLoadPromise = null;
169
+ function mdCachePath() {
170
+ return join(getPluginData(), MODELSDEV_CACHE_FILE);
171
+ }
172
+ async function _loadModelsDevImpl() {
173
+ try {
174
+ if (existsSync(mdCachePath())) {
175
+ const raw = JSON.parse(readFileSync(mdCachePath(), 'utf-8'));
176
+ if (raw?.fetchedAt && (Date.now() - raw.fetchedAt) < CATALOG_TTL_MS && raw.data) {
177
+ _mdCache = raw.data;
178
+ _mdCacheAt = raw.fetchedAt;
179
+ return _mdCache;
180
+ }
181
+ }
182
+ } catch { /* fall through to remote */ }
183
+ try {
184
+ const res = await fetch(MODELSDEV_URL, { signal: AbortSignal.timeout(10_000) });
185
+ if (!res.ok) throw new Error('HTTP ' + res.status);
186
+ const data = await res.json();
187
+ try {
188
+ writeFileSync(mdCachePath(), JSON.stringify({ fetchedAt: Date.now(), data }));
189
+ } catch { /* cache is best-effort */ }
190
+ _mdCache = data;
191
+ _mdCacheAt = Date.now();
192
+ return data;
193
+ } catch (err) {
194
+ process.stderr.write(`[model-catalog] models.dev fetch failed: ${err.message}\n`);
195
+ return _mdCache || {};
196
+ }
197
+ }
198
+ export async function loadModelsDevCatalog() {
199
+ if (_mdCache && (Date.now() - _mdCacheAt) < CATALOG_TTL_MS) return _mdCache;
200
+ if (_mdLoadPromise) return _mdLoadPromise;
201
+ _mdLoadPromise = _loadModelsDevImpl().finally(() => { _mdLoadPromise = null; });
202
+ return _mdLoadPromise;
203
+ }
204
+ function warmModelsDevFromDiskSync() {
205
+ if (_mdCache) return;
206
+ try {
207
+ const raw = JSON.parse(readFileSync(mdCachePath(), 'utf-8'));
208
+ if (raw?.data) {
209
+ _mdCache = raw.data;
210
+ _mdCacheAt = raw.fetchedAt || Date.now();
211
+ }
212
+ } catch { /* cold — async loadModelsDevCatalog will fill later */ }
213
+ }
214
+ // Adapt a models.dev model row (cost in $/M) to the LiteLLM-shaped row that
215
+ // _normalize() consumes ($/token). Only fields present are emitted.
216
+ function _modelsDevRowToOverride(row) {
217
+ const c = (row && row.cost) || {};
218
+ const out = {
219
+ max_input_tokens: row?.limit?.context,
220
+ max_output_tokens: row?.limit?.output,
221
+ mode: 'chat',
222
+ supports_function_calling: row?.tool_call === true,
223
+ supports_vision: Array.isArray(row?.modalities?.input) && row.modalities.input.includes('image'),
224
+ supports_prompt_caching: c.cache_read != null,
225
+ };
226
+ if (c.input != null) out.input_cost_per_token = c.input / 1_000_000;
227
+ if (c.output != null) out.output_cost_per_token = c.output / 1_000_000;
228
+ if (c.cache_read != null) out.cache_read_input_token_cost = c.cache_read / 1_000_000;
229
+ if (c.cache_write != null) out.cache_creation_input_token_cost = c.cache_write / 1_000_000;
230
+ return out;
231
+ }
232
+ function _modelsDevMetadataSync(id, provider) {
233
+ const pid = _modelsDevProviderId(provider);
234
+ if (!pid) return null;
235
+ if (!_mdCache) {
236
+ warmModelsDevFromDiskSync();
237
+ if (!_mdCache) { void loadModelsDevCatalog(); return null; }
238
+ }
239
+ const row = _mdCache?.[pid]?.models?.[id];
240
+ if (!row || !row.cost) return null;
241
+ return _normalize(_modelsDevRowToOverride(row));
242
+ }
243
+
141
244
  /**
142
245
  * Sync lookup. Warm order:
143
246
  * 1. in-memory cache (hot path),
@@ -149,19 +252,37 @@ function warmFromDiskSync() {
149
252
  * subsequent calls hit memory. TTL is intentionally ignored here — stale
150
253
  * catalog beats no catalog, and the async path refreshes on schedule.
151
254
  */
152
- export function getModelMetadataSync(id) {
255
+ export function getModelMetadataSync(id, provider) {
153
256
  if (!id) return null;
154
- if (PRICING_OVERRIDES[id]) return _normalize(PRICING_OVERRIDES[id]);
257
+ const mappedProvider = provider ? _modelsDevProviderId(provider) : null;
258
+ // 1. Manual overrides — authoritative + offline. Provider-guarded: when a
259
+ // provider hint is given, an override is only honoured if it belongs to
260
+ // that provider, so a model id shared across providers (e.g.
261
+ // deepseek-v4-pro under `deepseek` vs `opencode-go`) never leaks the
262
+ // wrong provider's rate. Bare-id callers keep the legacy behaviour.
263
+ const ov = PRICING_OVERRIDES[id];
264
+ if (ov && (!mappedProvider || _modelsDevProviderId(ov.litellm_provider) === mappedProvider)) {
265
+ return _normalize(ov);
266
+ }
267
+ // 2. LiteLLM community catalog (broad mainstream coverage).
155
268
  if (!_memCache) warmFromDiskSync();
156
- if (!_memCache) return null;
157
- const catalog = _memCache;
158
- if (catalog[id]) return _normalize(catalog[id]);
159
- for (const prefix of _CATALOG_SIMPLE_PREFIXES) {
160
- if (catalog[prefix + id]) return _normalize(catalog[prefix + id]);
269
+ if (_memCache) {
270
+ const catalog = _memCache;
271
+ if (catalog[id]) return _normalize(catalog[id]);
272
+ for (const prefix of _CATALOG_SIMPLE_PREFIXES) {
273
+ if (catalog[prefix + id]) return _normalize(catalog[prefix + id]);
274
+ }
275
+ for (const prefix of _CATALOG_BEDROCK_PREFIXES) {
276
+ const v1 = catalog[prefix + id + '-v1:0'];
277
+ if (v1) return _normalize(v1);
278
+ }
161
279
  }
162
- for (const prefix of _CATALOG_BEDROCK_PREFIXES) {
163
- const v1 = catalog[prefix + id + '-v1:0'];
164
- if (v1) return _normalize(v1);
280
+ // 3. models.dev — provider-scoped gap filler (collision-free, auto 24h).
281
+ // Placed LAST so it never shadows the authoritative sources above; it
282
+ // only prices what LiteLLM/overrides do not cover (e.g. opencode-go).
283
+ if (mappedProvider) {
284
+ const md = _modelsDevMetadataSync(id, provider);
285
+ if (md) return md;
165
286
  }
166
287
  return null;
167
288
  }
@@ -209,8 +330,11 @@ export async function enrichModels(models) {
209
330
  if (!meta) return m;
210
331
  return {
211
332
  ...m,
212
- contextWindow: meta.contextWindow || m.contextWindow || null,
213
- outputTokens: meta.outputTokens || m.outputTokens || null,
333
+ // Provider-native limits are authoritative for request sizing.
334
+ // External catalogs are pricing/metadata fillers and may describe
335
+ // a public API SKU rather than the OAuth/backend route in use.
336
+ contextWindow: m.contextWindow || meta.contextWindow || null,
337
+ outputTokens: m.outputTokens || meta.outputTokens || null,
214
338
  inputCostPerM: meta.inputCostPerM,
215
339
  outputCostPerM: meta.outputCostPerM,
216
340
  cacheReadCostPerM: meta.cacheReadCostPerM,
@@ -231,11 +355,20 @@ export async function enrichModels(models) {
231
355
  export async function refreshCatalog() {
232
356
  _memCache = null;
233
357
  _memCacheAt = 0;
358
+ _mdCache = null;
359
+ _mdCacheAt = 0;
234
360
  try {
235
361
  if (existsSync(cachePath())) {
236
362
  const fs = await import('fs');
237
363
  fs.unlinkSync(cachePath());
238
364
  }
239
365
  } catch { /* ignore */ }
240
- return loadCatalog();
366
+ try {
367
+ if (existsSync(mdCachePath())) {
368
+ const fs = await import('fs');
369
+ fs.unlinkSync(mdCachePath());
370
+ }
371
+ } catch { /* ignore */ }
372
+ const [litellm] = await Promise.all([loadCatalog(), loadModelsDevCatalog()]);
373
+ return litellm;
241
374
  }
@@ -0,0 +1,366 @@
1
+ import {
2
+ PROVIDER_FIRST_BYTE_TIMEOUT_MS,
3
+ PROVIDER_SSE_IDLE_TIMEOUT_MS,
4
+ PROVIDER_SSE_IDLE_WATCHDOG_ENABLED,
5
+ createTimeoutSignal,
6
+ providerTimeoutError,
7
+ } from '../stall-policy.mjs';
8
+ import { populateHttpStatusFromMessage } from './retry-classifier.mjs';
9
+
10
+ function truncatedCompatStreamError(label, detail) {
11
+ return Object.assign(
12
+ new Error(`${label} SSE stream truncated${detail ? `: ${detail}` : ''}`),
13
+ { name: 'TruncatedStreamError', code: 'TRUNCATED_STREAM', truncatedStream: true },
14
+ );
15
+ }
16
+
17
+ /** Completed tool_call.arguments must be valid JSON; empty/missing → {}. */
18
+ export function parseCompletedToolCallArgumentsJson(raw, label) {
19
+ const text = typeof raw === 'string' ? raw : (raw == null ? '' : String(raw));
20
+ const src = text === '' ? '{}' : text;
21
+ try {
22
+ return JSON.parse(src);
23
+ } catch {
24
+ throw truncatedCompatStreamError(label, 'invalid tool_call arguments JSON');
25
+ }
26
+ }
27
+
28
+ function firstByteCompatStreamError(label) {
29
+ const err = providerTimeoutError(`${label} first byte`, PROVIDER_FIRST_BYTE_TIMEOUT_MS);
30
+ err.firstByteTimeout = true;
31
+ return err;
32
+ }
33
+
34
+ async function nextAsyncWithWatchdog(iterator, { signal, idleMs, idleEnabled, idleLabel } = {}) {
35
+ let idleTimer = null;
36
+ let idleReject = null;
37
+ let idleTimedOut = false;
38
+ const armIdle = () => {
39
+ if (!idleEnabled || !(idleMs > 0)) return;
40
+ if (idleTimer) clearTimeout(idleTimer);
41
+ idleTimer = setTimeout(() => {
42
+ idleTimedOut = true;
43
+ const e = providerTimeoutError(idleLabel || 'compat SSE idle', idleMs);
44
+ e.code = 'ETIMEDOUT';
45
+ if (idleReject) {
46
+ const r = idleReject;
47
+ idleReject = null;
48
+ r(e);
49
+ }
50
+ }, idleMs);
51
+ if (typeof idleTimer.unref === 'function') idleTimer.unref();
52
+ };
53
+ armIdle();
54
+ try {
55
+ const result = await new Promise((resolve, reject) => {
56
+ idleReject = reject;
57
+ if (signal?.aborted) {
58
+ const reason = signal.reason;
59
+ reject(reason instanceof Error ? reason : new Error('compat stream aborted'));
60
+ return;
61
+ }
62
+ let onAbort = null;
63
+ if (signal) {
64
+ onAbort = () => {
65
+ const reason = signal.reason;
66
+ reject(reason instanceof Error ? reason : new Error('compat stream aborted'));
67
+ };
68
+ signal.addEventListener('abort', onAbort, { once: true });
69
+ }
70
+ iterator.next().then(
71
+ (value) => {
72
+ if (idleTimer) clearTimeout(idleTimer);
73
+ if (signal && onAbort) {
74
+ try { signal.removeEventListener('abort', onAbort); } catch {}
75
+ }
76
+ resolve(value);
77
+ },
78
+ (err) => {
79
+ if (idleTimer) clearTimeout(idleTimer);
80
+ if (signal && onAbort) {
81
+ try { signal.removeEventListener('abort', onAbort); } catch {}
82
+ }
83
+ reject(err);
84
+ },
85
+ );
86
+ });
87
+ return result;
88
+ } catch (err) {
89
+ if (idleTimer) clearTimeout(idleTimer);
90
+ if (idleTimedOut) throw providerTimeoutError(idleLabel || 'compat SSE idle', idleMs);
91
+ throw err;
92
+ }
93
+ }
94
+
95
+ function mergeToolCallDelta(accByIndex, deltaCalls) {
96
+ for (const tc of deltaCalls || []) {
97
+ const idx = Number.isFinite(Number(tc?.index)) ? Number(tc.index) : 0;
98
+ const prev = accByIndex.get(idx) || {
99
+ id: '',
100
+ type: 'function',
101
+ function: { name: '', arguments: '' },
102
+ };
103
+ if (tc.id) prev.id = tc.id;
104
+ if (tc.type) prev.type = tc.type;
105
+ if (tc.function?.name && !prev.function.name) prev.function.name = tc.function.name;
106
+ if (tc.function?.arguments) prev.function.arguments += tc.function.arguments;
107
+ accByIndex.set(idx, prev);
108
+ }
109
+ }
110
+
111
+ export function toolCallsFromStreamAcc(accByIndex, parseToolCalls, label) {
112
+ if (!accByIndex.size) return undefined;
113
+ const choice = {
114
+ message: {
115
+ tool_calls: [...accByIndex.entries()]
116
+ .sort((a, b) => a[0] - b[0])
117
+ .map(([, v]) => v),
118
+ },
119
+ };
120
+ return parseToolCalls(choice, label);
121
+ }
122
+
123
+ export async function consumeCompatChatCompletionStream(stream, { signal, label, onStreamDelta, parseToolCalls } = {}) {
124
+ const iterator = stream[Symbol.asyncIterator]();
125
+ const firstByteTimeout = createTimeoutSignal(signal, PROVIDER_FIRST_BYTE_TIMEOUT_MS, `${label} first byte`);
126
+ const idleEnabled = PROVIDER_SSE_IDLE_WATCHDOG_ENABLED;
127
+ const idleMs = PROVIDER_SSE_IDLE_TIMEOUT_MS;
128
+ let sawFirstEvent = false;
129
+ let content = '';
130
+ let reasoningContent = '';
131
+ let model = '';
132
+ let responseId = '';
133
+ let stopReason = null;
134
+ let rawUsage = null;
135
+ const toolAcc = new Map();
136
+ try {
137
+ while (true) {
138
+ const { value: chunk, done } = await nextAsyncWithWatchdog(iterator, {
139
+ // Until the first SSE chunk, bound the pending read to the
140
+ // first-byte timer (createTimeoutSignal already chains parent).
141
+ signal: sawFirstEvent ? signal : firstByteTimeout.signal,
142
+ idleMs,
143
+ idleEnabled: sawFirstEvent && idleEnabled,
144
+ idleLabel: `${label} SSE idle`,
145
+ });
146
+ if (done) break;
147
+ if (!sawFirstEvent) {
148
+ sawFirstEvent = true;
149
+ firstByteTimeout.cleanup();
150
+ }
151
+ try { onStreamDelta?.(); } catch {}
152
+ if (chunk?.id) responseId = chunk.id;
153
+ if (chunk?.model) model = chunk.model;
154
+ const choice = chunk?.choices?.[0];
155
+ if (choice?.delta?.content) content += choice.delta.content;
156
+ if (typeof choice?.delta?.reasoning_content === 'string') {
157
+ reasoningContent += choice.delta.reasoning_content;
158
+ }
159
+ mergeToolCallDelta(toolAcc, choice?.delta?.tool_calls);
160
+ if (choice?.finish_reason) stopReason = choice.finish_reason;
161
+ if (chunk?.usage) rawUsage = chunk.usage;
162
+ }
163
+ } finally {
164
+ firstByteTimeout.cleanup();
165
+ }
166
+ if (!sawFirstEvent) {
167
+ if (firstByteTimeout.signal?.aborted) throw firstByteCompatStreamError(label);
168
+ throw firstByteCompatStreamError(label);
169
+ }
170
+ if (!stopReason) {
171
+ throw truncatedCompatStreamError(label, 'no finish_reason');
172
+ }
173
+ const message = {
174
+ content: content || null,
175
+ ...(reasoningContent ? { reasoning_content: reasoningContent } : {}),
176
+ };
177
+ const rawToolCalls = [...toolAcc.entries()]
178
+ .sort((a, b) => a[0] - b[0])
179
+ .map(([, v]) => v)
180
+ .filter(tc => tc.id || tc.function?.name);
181
+ if (rawToolCalls.length) message.tool_calls = rawToolCalls;
182
+ const response = {
183
+ id: responseId || null,
184
+ model: model || null,
185
+ choices: [{ message, finish_reason: stopReason }],
186
+ usage: rawUsage || undefined,
187
+ };
188
+ return {
189
+ response,
190
+ model,
191
+ content,
192
+ toolCalls: toolCallsFromStreamAcc(toolAcc, parseToolCalls, label),
193
+ stopReason,
194
+ reasoningContent: reasoningContent || null,
195
+ rawUsage,
196
+ };
197
+ }
198
+
199
+ function handleCompatResponsesStreamEvent(event, state, { label, parseResponsesToolCalls, responseOutputText, onStreamDelta }) {
200
+ if (!event || typeof event.type !== 'string') return;
201
+ switch (event.type) {
202
+ case 'response.created':
203
+ if (event.response?.model) state.model = event.response.model;
204
+ if (event.response?.id) state.responseId = event.response.id;
205
+ break;
206
+ case 'response.output_text.delta':
207
+ state.content += event.delta || '';
208
+ state.sawOutput = true;
209
+ try { onStreamDelta?.(); } catch {}
210
+ break;
211
+ case 'response.output_item.added':
212
+ if (event.item?.type === 'function_call') {
213
+ state.pendingCalls.set(event.item.id || '', {
214
+ name: event.item.name || '',
215
+ callId: event.item.call_id || '',
216
+ });
217
+ }
218
+ try { onStreamDelta?.(); } catch {}
219
+ break;
220
+ case 'response.function_call_arguments.delta':
221
+ try { onStreamDelta?.(); } catch {}
222
+ break;
223
+ case 'response.function_call_arguments.done': {
224
+ const itemId = event.item_id || '';
225
+ const pending = state.pendingCalls.get(itemId);
226
+ const call = {
227
+ id: pending?.callId || event.call_id || '',
228
+ name: pending?.name || event.name || '',
229
+ arguments: parseCompletedToolCallArgumentsJson(event.arguments, label),
230
+ _pendingItemId: itemId,
231
+ };
232
+ state.toolCalls.push(call);
233
+ if (call.id && call.name) delete call._pendingItemId;
234
+ try { onStreamDelta?.(); } catch {}
235
+ break;
236
+ }
237
+ case 'response.output_item.done': {
238
+ const item = event.item || {};
239
+ if (item.type === 'function_call') {
240
+ const tc = state.toolCalls.find(t => t._pendingItemId === (item.id || ''));
241
+ if (tc) {
242
+ if (!tc.id && item.call_id) tc.id = item.call_id;
243
+ if (!tc.name && item.name) tc.name = item.name;
244
+ if (tc.id && tc.name) delete tc._pendingItemId;
245
+ } else if (item.call_id && item.name) {
246
+ state.toolCalls.push({
247
+ id: item.call_id,
248
+ name: item.name,
249
+ arguments: parseCompletedToolCallArgumentsJson(item.arguments, label),
250
+ });
251
+ }
252
+ }
253
+ try { onStreamDelta?.(); } catch {}
254
+ break;
255
+ }
256
+ case 'response.completed': {
257
+ const resp = event.response || {};
258
+ state.completed = true;
259
+ state.completedResponse = resp;
260
+ if (!state.model && resp.model) state.model = resp.model;
261
+ if (!state.responseId && resp.id) state.responseId = resp.id;
262
+ if (!state.content) state.content = responseOutputText(resp);
263
+ if (!state.toolCalls.length) {
264
+ const parsed = parseResponsesToolCalls(resp, label);
265
+ if (parsed?.length) state.toolCalls.push(...parsed.map(t => ({ ...t })));
266
+ }
267
+ try { onStreamDelta?.(); } catch {}
268
+ break;
269
+ }
270
+ case 'response.done':
271
+ if (!event.response || event.response.status === 'completed') state.completed = true;
272
+ else if (event.response.status === 'failed') {
273
+ const msg = event.response?.error?.message || 'response.done failed';
274
+ const err = new Error(`xAI Responses stream response.done failed: ${msg}`);
275
+ populateHttpStatusFromMessage(err, msg);
276
+ throw err;
277
+ } else if (event.response.status === 'incomplete') {
278
+ throw new Error(`xAI Responses stream response.done incomplete: ${event.response?.incomplete_details?.reason || 'incomplete'}`);
279
+ }
280
+ break;
281
+ case 'response.failed': {
282
+ const msg = event.response?.error?.message || event.error?.message || event.message || 'response.failed';
283
+ const err = new Error(`xAI Responses stream response.failed: ${msg}`);
284
+ populateHttpStatusFromMessage(err, msg);
285
+ throw err;
286
+ }
287
+ case 'response.incomplete':
288
+ throw new Error(`xAI Responses stream response.incomplete: ${event.response?.incomplete_details?.reason || 'incomplete'}`);
289
+ case 'error': {
290
+ const msg = event.message || event.error?.message || 'unknown';
291
+ const err = new Error(`xAI Responses stream error: ${msg}`);
292
+ populateHttpStatusFromMessage(err, msg);
293
+ throw err;
294
+ }
295
+ default:
296
+ break;
297
+ }
298
+ }
299
+
300
+ export async function consumeCompatResponsesStream(stream, {
301
+ signal,
302
+ label,
303
+ onStreamDelta,
304
+ parseResponsesToolCalls,
305
+ responseOutputText,
306
+ } = {}) {
307
+ const iterator = stream[Symbol.asyncIterator]();
308
+ const firstByteTimeout = createTimeoutSignal(signal, PROVIDER_FIRST_BYTE_TIMEOUT_MS, `${label} first byte`);
309
+ const idleEnabled = PROVIDER_SSE_IDLE_WATCHDOG_ENABLED;
310
+ const idleMs = PROVIDER_SSE_IDLE_TIMEOUT_MS;
311
+ const state = {
312
+ content: '',
313
+ model: '',
314
+ responseId: '',
315
+ toolCalls: [],
316
+ pendingCalls: new Map(),
317
+ completed: false,
318
+ completedResponse: null,
319
+ sawOutput: false,
320
+ };
321
+ let sawFirstEvent = false;
322
+ const deps = { label, parseResponsesToolCalls, responseOutputText, onStreamDelta };
323
+ try {
324
+ while (true) {
325
+ const { value: event, done } = await nextAsyncWithWatchdog(iterator, {
326
+ signal: sawFirstEvent ? signal : firstByteTimeout.signal,
327
+ idleMs,
328
+ idleEnabled: sawFirstEvent && idleEnabled,
329
+ idleLabel: `${label} SSE idle`,
330
+ });
331
+ if (done) break;
332
+ if (!sawFirstEvent) {
333
+ sawFirstEvent = true;
334
+ firstByteTimeout.cleanup();
335
+ }
336
+ handleCompatResponsesStreamEvent(event, state, deps);
337
+ }
338
+ } finally {
339
+ firstByteTimeout.cleanup();
340
+ }
341
+ if (!sawFirstEvent) {
342
+ if (firstByteTimeout.signal?.aborted) throw firstByteCompatStreamError(label);
343
+ throw firstByteCompatStreamError(label);
344
+ }
345
+ if (!state.completed) throw truncatedCompatStreamError(label, 'no response.completed');
346
+ const unresolved = state.toolCalls.find(t => t._pendingItemId);
347
+ if (unresolved) {
348
+ throw new Error(`xAI Responses stream function_call salvage failed: missing call_id/name for item_id=${unresolved._pendingItemId || '?'}`);
349
+ }
350
+ const response = state.completedResponse || {
351
+ id: state.responseId || null,
352
+ model: state.model || null,
353
+ output_text: state.content,
354
+ output: [],
355
+ };
356
+ const toolCalls = state.toolCalls.length
357
+ ? state.toolCalls.map(({ _pendingItemId, ...t }) => t)
358
+ : parseResponsesToolCalls(response, label);
359
+ return {
360
+ response,
361
+ content: state.content || responseOutputText(response),
362
+ toolCalls,
363
+ model: state.model || response.model || null,
364
+ responseId: state.responseId || response.id || null,
365
+ };
366
+ }