@xiaozhiclaw/provider-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/adapters/aliyun-oss-file-upload-adapter.d.ts +44 -0
  2. package/dist/adapters/aliyun-oss-file-upload-adapter.js +96 -0
  3. package/dist/adapters/gemini-file-upload-adapter.d.ts +26 -0
  4. package/dist/adapters/gemini-file-upload-adapter.js +92 -0
  5. package/dist/adapters/hub-oss-file-upload-adapter.d.ts +29 -0
  6. package/dist/adapters/hub-oss-file-upload-adapter.js +53 -0
  7. package/dist/adapters/index.d.ts +10 -0
  8. package/dist/adapters/index.js +10 -0
  9. package/dist/adapters/openai-file-upload-adapter.d.ts +38 -0
  10. package/dist/adapters/openai-file-upload-adapter.js +56 -0
  11. package/dist/adapters/volcengine-file-upload-adapter.d.ts +24 -0
  12. package/dist/adapters/volcengine-file-upload-adapter.js +45 -0
  13. package/dist/builtin-providers.d.ts +8 -0
  14. package/dist/builtin-providers.js +2237 -0
  15. package/dist/constants.d.ts +1 -0
  16. package/dist/constants.js +1 -0
  17. package/dist/credentials.d.ts +1 -0
  18. package/dist/credentials.js +8 -0
  19. package/dist/debug-transport.d.ts +12 -0
  20. package/dist/debug-transport.js +99 -0
  21. package/dist/errors.d.ts +11 -0
  22. package/dist/errors.js +12 -0
  23. package/dist/events.d.ts +48 -0
  24. package/dist/events.js +1 -0
  25. package/dist/file-upload-service.d.ts +68 -0
  26. package/dist/file-upload-service.js +110 -0
  27. package/dist/gemini-schema-utils.d.ts +17 -0
  28. package/dist/gemini-schema-utils.js +76 -0
  29. package/dist/index.d.ts +37 -0
  30. package/dist/index.js +33 -0
  31. package/dist/llm-client.d.ts +43 -0
  32. package/dist/llm-client.js +217 -0
  33. package/dist/media-client.d.ts +42 -0
  34. package/dist/media-client.js +174 -0
  35. package/dist/media-transport.d.ts +176 -0
  36. package/dist/media-transport.js +16 -0
  37. package/dist/media.d.ts +2 -0
  38. package/dist/media.js +1 -0
  39. package/dist/model-detection.d.ts +22 -0
  40. package/dist/model-detection.js +28 -0
  41. package/dist/paths.d.ts +2 -0
  42. package/dist/paths.js +11 -0
  43. package/dist/provider-def.d.ts +220 -0
  44. package/dist/provider-def.js +9 -0
  45. package/dist/provider-registry.d.ts +51 -0
  46. package/dist/provider-registry.js +130 -0
  47. package/dist/provider-tool-api.d.ts +44 -0
  48. package/dist/provider-tool-api.js +9 -0
  49. package/dist/provider-variant-resolver.d.ts +35 -0
  50. package/dist/provider-variant-resolver.js +174 -0
  51. package/dist/retry.d.ts +37 -0
  52. package/dist/retry.js +71 -0
  53. package/dist/transport.d.ts +281 -0
  54. package/dist/transport.js +27 -0
  55. package/dist/transports/anthropic-messages.d.ts +65 -0
  56. package/dist/transports/anthropic-messages.js +1004 -0
  57. package/dist/transports/gemini-cache-api.d.ts +86 -0
  58. package/dist/transports/gemini-cache-api.js +141 -0
  59. package/dist/transports/gemini-file-api.d.ts +90 -0
  60. package/dist/transports/gemini-file-api.js +164 -0
  61. package/dist/transports/gemini-generatecontent.d.ts +56 -0
  62. package/dist/transports/gemini-generatecontent.js +688 -0
  63. package/dist/transports/gemini-lyria-realtime.d.ts +117 -0
  64. package/dist/transports/gemini-lyria-realtime.js +295 -0
  65. package/dist/transports/gemini-media.d.ts +53 -0
  66. package/dist/transports/gemini-media.js +383 -0
  67. package/dist/transports/media-resolve.d.ts +50 -0
  68. package/dist/transports/media-resolve.js +91 -0
  69. package/dist/transports/minimax-media.d.ts +56 -0
  70. package/dist/transports/minimax-media.js +433 -0
  71. package/dist/transports/openai-chat.d.ts +81 -0
  72. package/dist/transports/openai-chat.js +782 -0
  73. package/dist/transports/openai-media.d.ts +24 -0
  74. package/dist/transports/openai-media.js +118 -0
  75. package/dist/transports/openai-responses.d.ts +63 -0
  76. package/dist/transports/openai-responses.js +778 -0
  77. package/dist/transports/qwen-media.d.ts +59 -0
  78. package/dist/transports/qwen-media.js +411 -0
  79. package/dist/transports/realtime-transport.d.ts +183 -0
  80. package/dist/transports/realtime-transport.js +332 -0
  81. package/dist/transports/volcengine-grounding.d.ts +58 -0
  82. package/dist/transports/volcengine-grounding.js +69 -0
  83. package/dist/transports/volcengine-media.d.ts +94 -0
  84. package/dist/transports/volcengine-media.js +801 -0
  85. package/dist/transports/volcengine-responses.d.ts +64 -0
  86. package/dist/transports/volcengine-responses.js +797 -0
  87. package/dist/transports/zhipu-media.d.ts +82 -0
  88. package/dist/transports/zhipu-media.js +522 -0
  89. package/dist/transports/zhipu-tool-api.d.ts +35 -0
  90. package/dist/transports/zhipu-tool-api.js +126 -0
  91. package/dist/wire-types.d.ts +51 -0
  92. package/dist/wire-types.js +1 -0
  93. package/package.json +33 -0
@@ -0,0 +1,782 @@
1
+ /**
2
+ * OpenAI Chat Completions TransportSSE streaming implementation.
3
+ *
4
+ * Covers all OpenAI-compatible providers:
5
+ * DeepSeek, Qwen, Minimax, Moonshot, OpenRouter, etc.
6
+ *
7
+ * POST {baseUrl}/v1/chat/completions with stream: true
8
+ * Auth: Authorization: Bearer {apiKey}
9
+ *
10
+ * SSE format: lines prefixed with "data: ", JSON parsing per event.
11
+ *
12
+ * Adapted from admin-infer-proxy-client.ts SSE logic + Hermes openai_chat.py transport.
13
+ */
14
+ import { MEDIA_MAX_UPLOAD_SIZE } from "../constants.js";
15
+ import { isLocalUrl, resolveMediaUrl, resolveMediaUrlViaUpload } from "./media-resolve.js";
16
+ import { DEFAULT_MAX_RETRIES, STREAM_IDLE_TIMEOUT_MS, retryDelay, retrySleep, extractHttpStatus, isTransientStatus, } from "../retry.js";
17
+ import { isGPT5xModel, isGPT5NanoModel, isOpenAIReasoningModel } from "../model-detection.js";
18
+ export class OpenAIChatTransport {
19
+ baseUrl;
20
+ extraHeaders;
21
+ timeoutMs;
22
+ supportsStreamOptions;
23
+ omitZeroTemperature;
24
+ quirks;
25
+ fileUploadAdapter;
26
+ // MiniMax cumulative streaming state (鎼?.7): reasoning_details and content
27
+ // are delivered as full accumulated strings, not incremental deltas.
28
+ // Reset at the start of each stream() call.
29
+ cumulativeReasoningLen = 0;
30
+ cumulativeContentLen = 0;
31
+ constructor(config) {
32
+ if (!config.baseUrl) {
33
+ throw new Error("OpenAIChatTransport: baseUrl is required (got empty or undefined)");
34
+ }
35
+ // Strip trailing slash
36
+ this.baseUrl = config.baseUrl.replace(/\/+$/, "");
37
+ this.extraHeaders = config.extraHeaders ?? {};
38
+ this.timeoutMs = config.timeoutMs ?? 180_000;
39
+ this.supportsStreamOptions = config.supportsStreamOptions ?? true;
40
+ this.omitZeroTemperature = config.omitZeroTemperature ?? false;
41
+ this.quirks = config.quirks ?? {};
42
+ this.fileUploadAdapter = config.fileUploadAdapter;
43
+ }
44
+ async *stream(request, apiKey, signal) {
45
+ // Reset cumulative state for MiniMax reasoning_split streaming
46
+ this.cumulativeReasoningLen = 0;
47
+ this.cumulativeContentLen = 0;
48
+ // Prefix completion (DeepSeek Beta): switch to /beta endpoint
49
+ const useBeta = request.prefixMessage && this.quirks.supportsPrefixCompletion;
50
+ // altcode parity: detect version suffix in baseUrl (e.g. /v4) to avoid doubling
51
+ const hasVersionInBase = /\/v\d+$/.test(this.baseUrl);
52
+ let url;
53
+ if (useBeta) {
54
+ url = `${this.baseUrl}/beta/v1/chat/completions`;
55
+ }
56
+ else if (hasVersionInBase) {
57
+ url = `${this.baseUrl}/chat/completions`;
58
+ }
59
+ else {
60
+ url = `${this.baseUrl}/v1/chat/completions`;
61
+ }
62
+ const body = {
63
+ model: request.model,
64
+ messages: convertMessagesForOpenAI(await resolveMessagesMedia(request.messages, this.fileUploadAdapter, apiKey, signal), this.quirks),
65
+ stream: true,
66
+ };
67
+ // stream_options: supported by most providers, but some reject it
68
+ if (this.supportsStreamOptions) {
69
+ body.stream_options = { include_usage: true };
70
+ }
71
+ if (request.tools && request.tools.length > 0) {
72
+ body.tools = request.tools;
73
+ if (request.toolChoice)
74
+ body.tool_choice = request.toolChoice;
75
+ }
76
+ // Inject builtin tools only when user sends function tools.
77
+ // Provider builtins and system function tools COEXISTthey use different schema types:
78
+ // GLM: { type: "web_search", web_search: { enable: true } } vs { type: "function", function: { name: "web_search" } }
79
+ // Kimi: { type: "builtin_function", function: { name: "$web_search" } }
80
+ // Volcengine: { type: "web_search" } (Responses API)
81
+ // The LLM uses provider builtins for fast inline search (during inference, transparent),
82
+ // and system tools for explicit deep/structured search (agent executes, returns results).
83
+ // T15: session-level toggle via request.disableBuiltinTools
84
+ const injectBuiltins = !request.disableBuiltinTools;
85
+ let kimiWebSearchInjected = false;
86
+ if (injectBuiltins && this.quirks.builtinWebSearch && body.tools) {
87
+ const tools = body.tools;
88
+ if (this.quirks.builtinCodeInterpreter) {
89
+ // GLM native format
90
+ tools.push({ type: "web_search", web_search: { enable: true, search_result: true } });
91
+ }
92
+ else {
93
+ // Kimi format (kimi-ProviderMax 鎼?3.2: builtin_function.$web_search)
94
+ tools.push({ type: "builtin_function", function: { name: "$web_search" } });
95
+ kimiWebSearchInjected = true;
96
+ }
97
+ }
98
+ if (injectBuiltins && this.quirks.builtinCodeInterpreter && body.tools) {
99
+ const tools = body.tools;
100
+ tools.push({ type: "code_interpreter", code_interpreter: { sandbox: "none" } });
101
+ }
102
+ // GLM: enable tool_stream for incremental tool call argument streaming
103
+ if (this.quirks.supportsToolStream && body.tools) {
104
+ body.tool_stream = true;
105
+ }
106
+ // DeepSeek V4 thinking mode: thinking + reasoning_effort as top-level params,
107
+ // temperature must be suppressed (ignored by API but wasteful to send).
108
+ const isDeepSeekThinking = this.quirks.useEffortInsteadOfBudget && !!request.reasoning;
109
+ // Kimi K2.6: immutable sampling paramstemperature always omitted (kimi-ProviderMax 鎼?)
110
+ const isKimiK26 = request.model.startsWith("kimi-k2.6");
111
+ // GPT-5.x: temperature allowed even with reasoning (openai-ProviderMax 鎼?)
112
+ const isGPT5x = isGPT5xModel(request.model);
113
+ if (request.temperature !== undefined) {
114
+ // Some providers (e.g. Moonshot/Kimi) reject temperature=0
115
+ if (!(this.omitZeroTemperature && request.temperature === 0)) {
116
+ // o-series reasoning models, DeepSeek thinking, Kimi K2.6 suppress temperature.
117
+ // GPT-5.x explicitly allows temperature with reasoning.
118
+ if (isGPT5x || (!isOpenAIReasoningModel(request.model) && !isDeepSeekThinking && !isKimiK26)) {
119
+ body.temperature = request.temperature;
120
+ }
121
+ }
122
+ }
123
+ if (request.topP !== undefined) {
124
+ if (isGPT5x || (!isOpenAIReasoningModel(request.model) && !isDeepSeekThinking && !isKimiK26)) {
125
+ body.top_p = request.topP;
126
+ }
127
+ }
128
+ if (request.maxTokens !== undefined) {
129
+ // GPT-5.x, o-series, and Kimi K2 use max_completion_tokens (openai-ProviderMax 鎼?, kimi-ProviderMax 鎼?.12)
130
+ if (isGPT5x || isOpenAIReasoningModel(request.model) || request.model.startsWith("kimi-k2")) {
131
+ body.max_completion_tokens = request.maxTokens;
132
+ }
133
+ else {
134
+ body.max_tokens = request.maxTokens;
135
+ }
136
+ }
137
+ if (request.reasoning) {
138
+ if (isGPT5x) {
139
+ // GPT-5.x: use reasoning object with effort (supports none/low/medium/high/xhigh)
140
+ // openai-ProviderMax 鎼?: unified reasoning effort for all 5.x models.
141
+ let effort = request.reasoning.effort === "minimal" ? "none" : request.reasoning.effort;
142
+ // gpt-5.4-nano only supports up to medium effort (openai-ProviderMax 鎼?)
143
+ if (isGPT5NanoModel(request.model) && (effort === "high" || effort === "xhigh")) {
144
+ effort = "medium";
145
+ }
146
+ body.reasoning = { effort };
147
+ }
148
+ else if (isOpenAIReasoningModel(request.model)) {
149
+ // OpenAI o-series (legacy): reasoning_effort as flat string
150
+ body.reasoning_effort = request.reasoning.effort;
151
+ }
152
+ else if (this.quirks.useEffortInsteadOfBudget) {
153
+ // DeepSeek V4 OpenAI format: thinking + reasoning_effort as top-level body params.
154
+ body.thinking = { type: "enabled" };
155
+ const cap = this.quirks.maxReasoningEffort ?? "high";
156
+ const effortInput = request.reasoning.effort ?? "high";
157
+ body.reasoning_effort = effortInput === "low" || effortInput === "medium"
158
+ ? "high"
159
+ : cap;
160
+ }
161
+ else if (this.quirks.supportsThinkingParam) {
162
+ if (kimiWebSearchInjected) {
163
+ // kimi-ProviderMax 鎼?3.4: $web_search requires thinking disabled
164
+ body.thinking = { type: "disabled" };
165
+ }
166
+ else if (this.quirks.supportsToolStream) {
167
+ // GLM (zhipu-openai): thinking.type = "enabled"/"disabled" + clear_thinking
168
+ // 鎼?.2/鎼?.5 of zhipu-ProviderMax.md
169
+ body.thinking = { type: "enabled", clear_thinking: false };
170
+ }
171
+ else {
172
+ // Kimi K2: thinking.type = "enabled" (kimi-ProviderMax 鎼?/鎼?.6)
173
+ body.thinking = { type: "enabled" };
174
+ }
175
+ if (this.quirks.supportsReasoningEffort) {
176
+ body.reasoning_effort = request.reasoning.effort;
177
+ }
178
+ }
179
+ else if (this.quirks.supportsReasoningEffort) {
180
+ body.reasoning_effort = request.reasoning.effort;
181
+ }
182
+ else {
183
+ // Generic pass-through
184
+ body.reasoning = request.reasoning;
185
+ }
186
+ }
187
+ else if (kimiWebSearchInjected) {
188
+ // kimi-ProviderMax 鎼?3.4: explicitly disable thinking when web search is active
189
+ body.thinking = { type: "disabled" };
190
+ }
191
+ else if (this.quirks.supportsThinkingParam && this.quirks.disableThinkingByDefault) {
192
+ body.thinking = { type: "disabled" };
193
+ }
194
+ // Structured output / JSON mode (kimi-ProviderMax 鎼?8, volcengine-ProviderMax 鎼?6)
195
+ if (request.structuredOutput) {
196
+ if (request.structuredOutput.mode === "json_object") {
197
+ body.response_format = { type: "json_object" };
198
+ }
199
+ else if (request.structuredOutput.mode === "json_schema") {
200
+ body.response_format = {
201
+ type: "json_schema",
202
+ json_schema: {
203
+ name: request.structuredOutput.name,
204
+ strict: request.structuredOutput.strict ?? true,
205
+ schema: request.structuredOutput.schema,
206
+ },
207
+ };
208
+ }
209
+ }
210
+ // MiniMax reasoning_split: split thinking into reasoning_details field (minimax-ProviderMax 鎼?.5)
211
+ if (this.quirks.supportsReasoningSplit && request.reasoning) {
212
+ body.reasoning_split = true;
213
+ }
214
+ // GPT-5.x Predicted Output: speculative decoding for code editing (openai-ProviderMax 鎼?1)
215
+ if (request.prediction && isGPT5x) {
216
+ body.prediction = request.prediction;
217
+ }
218
+ // Prefix completion (DeepSeek Beta): append assistant message with prefix: true
219
+ if (useBeta && request.prefixMessage) {
220
+ const msgs = body.messages;
221
+ msgs.push({
222
+ role: "assistant",
223
+ content: request.prefixMessage,
224
+ prefix: true,
225
+ });
226
+ }
227
+ const headers = {
228
+ "Content-Type": "application/json",
229
+ Authorization: `Bearer ${apiKey}`,
230
+ ...this.extraHeaders,
231
+ };
232
+ // Retry loop with exponential backoff (CC withRetry parity)
233
+ let lastError = null;
234
+ for (let attempt = 0; attempt <= DEFAULT_MAX_RETRIES; attempt++) {
235
+ if (signal?.aborted)
236
+ throw new Error("Request aborted");
237
+ if (attempt > 0 && lastError) {
238
+ await retrySleep(retryDelay(attempt), signal);
239
+ }
240
+ try {
241
+ yield* this.fetchAndStream(url, headers, body, signal);
242
+ return; // Success
243
+ }
244
+ catch (err) {
245
+ lastError = err instanceof Error ? err : new Error(String(err));
246
+ const isIdleTimeout = lastError.message.includes("Stream idle timeout");
247
+ if (!isTransientStatus(extractHttpStatus(lastError)) && !isIdleTimeout)
248
+ throw lastError;
249
+ if (attempt === DEFAULT_MAX_RETRIES)
250
+ throw lastError;
251
+ }
252
+ }
253
+ }
254
+ async *fetchAndStream(url, headers, body, signal) {
255
+ const timeoutSignal = AbortSignal.timeout(this.timeoutMs);
256
+ const combinedSignal = signal
257
+ ? AbortSignal.any([signal, timeoutSignal])
258
+ : timeoutSignal;
259
+ const response = await fetch(url, {
260
+ method: "POST",
261
+ headers,
262
+ body: JSON.stringify(body),
263
+ signal: combinedSignal,
264
+ });
265
+ if (!response.ok) {
266
+ const errorBody = await response.text().catch(() => "");
267
+ const err = new Error(`LLM API error ${response.status}: ${errorBody.slice(0, 500)}`);
268
+ err.status = response.status;
269
+ throw err;
270
+ }
271
+ if (!response.body) {
272
+ throw new Error("LLM API returned no response body");
273
+ }
274
+ // altcode parity: some providers ignore stream:true and return plain JSON.
275
+ // Detect via Content-Type header and synthesize stream events.
276
+ const contentType = response.headers.get("content-type") ?? "";
277
+ if (contentType.includes("application/json") && !contentType.includes("text/event-stream")) {
278
+ yield* this.handleNonStreamingResponse(response);
279
+ return;
280
+ }
281
+ yield* this.parseSSEStreamWithWatchdog(response.body);
282
+ }
283
+ /**
284
+ * Handle non-streaming JSON response from providers that ignore stream:true.
285
+ * Synthesize the same LLMChunk events a streaming response would produce.
286
+ */
287
+ async *handleNonStreamingResponse(response) {
288
+ const data = await response.json();
289
+ // Synthesize usage (faithfully parse nested detailssame fields as streaming path)
290
+ if (data.usage) {
291
+ yield {
292
+ type: "usage",
293
+ promptTokens: data.usage.prompt_tokens ?? 0,
294
+ completionTokens: data.usage.completion_tokens ?? 0,
295
+ reasoningTokens: data.usage.completion_tokens_details?.reasoning_tokens,
296
+ cacheReadTokens: data.usage.prompt_tokens_details?.cached_tokens,
297
+ };
298
+ }
299
+ for (const choice of data.choices ?? []) {
300
+ const msg = choice.message;
301
+ if (!msg)
302
+ continue;
303
+ if (msg.reasoning_content) {
304
+ yield { type: "reasoning_delta", text: msg.reasoning_content };
305
+ }
306
+ if (msg.content) {
307
+ yield { type: "delta", text: msg.content };
308
+ }
309
+ if (msg.tool_calls) {
310
+ for (let i = 0; i < msg.tool_calls.length; i++) {
311
+ const tc = msg.tool_calls[i];
312
+ yield {
313
+ type: "tool_call_delta",
314
+ index: i,
315
+ id: tc.id,
316
+ name: tc.function.name,
317
+ arguments: tc.function.arguments,
318
+ };
319
+ }
320
+ }
321
+ if (choice.finish_reason) {
322
+ yield { type: "done", finishReason: choice.finish_reason };
323
+ }
324
+ }
325
+ }
326
+ /**
327
+ * Parse SSE stream with 90s idle watchdog (CC parity).
328
+ * If no data arrives within STREAM_IDLE_TIMEOUT_MS, throw to trigger retry.
329
+ */
330
+ async *parseSSEStreamWithWatchdog(body) {
331
+ const decoder = new TextDecoder();
332
+ let buffer = "";
333
+ let idleTimer = null;
334
+ const abortController = new AbortController();
335
+ const resetIdleTimer = () => {
336
+ if (idleTimer)
337
+ clearTimeout(idleTimer);
338
+ idleTimer = setTimeout(() => {
339
+ abortController.abort();
340
+ }, STREAM_IDLE_TIMEOUT_MS);
341
+ };
342
+ try {
343
+ resetIdleTimer();
344
+ const reader = body.getReader();
345
+ try {
346
+ while (true) {
347
+ const { done, value } = await reader.read();
348
+ if (done)
349
+ break;
350
+ if (abortController.signal.aborted) {
351
+ throw new Error("Stream idle timeout");
352
+ }
353
+ resetIdleTimer();
354
+ buffer += decoder.decode(value, { stream: true });
355
+ let newlineIdx;
356
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
357
+ const line = buffer.slice(0, newlineIdx).trim();
358
+ buffer = buffer.slice(newlineIdx + 1);
359
+ if (!line)
360
+ continue;
361
+ if (line.startsWith(":"))
362
+ continue;
363
+ if (!line.startsWith("data: "))
364
+ continue;
365
+ const data = line.slice(6);
366
+ if (data === "[DONE]")
367
+ return;
368
+ let parsed;
369
+ try {
370
+ parsed = JSON.parse(data);
371
+ }
372
+ catch {
373
+ continue;
374
+ }
375
+ yield* this.processChunk(parsed);
376
+ }
377
+ }
378
+ }
379
+ finally {
380
+ reader.releaseLock();
381
+ }
382
+ }
383
+ finally {
384
+ if (idleTimer)
385
+ clearTimeout(idleTimer);
386
+ }
387
+ }
388
+ *processChunk(chunk) {
389
+ // Usage chunk (often sent at the end with stream_options)
390
+ if (chunk.usage) {
391
+ yield {
392
+ type: "usage",
393
+ promptTokens: chunk.usage.prompt_tokens ?? 0,
394
+ completionTokens: chunk.usage.completion_tokens ?? 0,
395
+ reasoningTokens: chunk.usage.completion_tokens_details?.reasoning_tokens,
396
+ cacheReadTokens: chunk.usage.prompt_tokens_details?.cached_tokens,
397
+ };
398
+ }
399
+ if (!chunk.choices || chunk.choices.length === 0)
400
+ return;
401
+ for (const choice of chunk.choices) {
402
+ const delta = choice.delta;
403
+ if (!delta)
404
+ continue;
405
+ // Reasoning content (DeepSeek-R1 style)
406
+ if (delta.reasoning_content) {
407
+ yield { type: "reasoning_delta", text: delta.reasoning_content };
408
+ }
409
+ // MiniMax reasoning_split: reasoning_details is cumulative (鎼?.7)
410
+ if (delta.reasoning_details) {
411
+ const newPart = delta.reasoning_details.slice(this.cumulativeReasoningLen);
412
+ if (newPart) {
413
+ this.cumulativeReasoningLen = delta.reasoning_details.length;
414
+ yield { type: "reasoning_delta", text: newPart };
415
+ }
416
+ }
417
+ // Text contentMiniMax cumulative mode diff (鎼?.7)
418
+ if (delta.content) {
419
+ if (this.quirks.supportsReasoningSplit) {
420
+ // MiniMax: content is cumulative string, extract only new portion
421
+ const newPart = delta.content.slice(this.cumulativeContentLen);
422
+ if (newPart) {
423
+ this.cumulativeContentLen = delta.content.length;
424
+ yield { type: "delta", text: newPart };
425
+ }
426
+ }
427
+ else {
428
+ yield { type: "delta", text: delta.content };
429
+ }
430
+ }
431
+ // Tool call deltas
432
+ if (delta.tool_calls) {
433
+ for (const tc of delta.tool_calls) {
434
+ yield {
435
+ type: "tool_call_delta",
436
+ index: tc.index,
437
+ id: tc.id,
438
+ name: tc.function?.name,
439
+ arguments: tc.function?.arguments ?? "",
440
+ };
441
+ }
442
+ }
443
+ // Finish reason
444
+ if (choice.finish_reason) {
445
+ // Map GLM-specific finish_reasons to standard values
446
+ // GLM returns: sensitive (content filtered), network_error, model_context_window_exceeded
447
+ const mapped = mapFinishReason(choice.finish_reason);
448
+ yield { type: "done", finishReason: mapped };
449
+ }
450
+ }
451
+ }
452
+ // 鈹€鈹€ FIM (Fill-In-Middle) CompletionDeepSeek Beta API 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
453
+ /**
454
+ * FIM completion via /beta/v1/completions.
455
+ * Only works with DeepSeek (requires supportsPrefixCompletion quirk).
456
+ * Non-thinking mode only; max completion 4K tokens.
457
+ */
458
+ async *complete(request, apiKey, signal) {
459
+ if (!this.quirks.supportsPrefixCompletion) {
460
+ throw new Error("FIM completion is not supported by this provider");
461
+ }
462
+ const hasVersionInBase = /\/v\d+$/.test(this.baseUrl);
463
+ const url = hasVersionInBase
464
+ ? `${this.baseUrl}/beta/completions`
465
+ : `${this.baseUrl}/beta/v1/completions`;
466
+ const body = {
467
+ model: request.model,
468
+ prompt: request.prompt,
469
+ stream: true,
470
+ max_tokens: request.maxTokens ?? 4096,
471
+ };
472
+ if (request.suffix)
473
+ body.suffix = request.suffix;
474
+ if (request.temperature !== undefined)
475
+ body.temperature = request.temperature;
476
+ if (request.stop)
477
+ body.stop = request.stop;
478
+ const headers = {
479
+ "Content-Type": "application/json",
480
+ Authorization: `Bearer ${apiKey}`,
481
+ ...this.extraHeaders,
482
+ };
483
+ const timeoutSignal = AbortSignal.timeout(this.timeoutMs);
484
+ const combinedSignal = signal
485
+ ? AbortSignal.any([signal, timeoutSignal])
486
+ : timeoutSignal;
487
+ const response = await fetch(url, {
488
+ method: "POST",
489
+ headers,
490
+ body: JSON.stringify(body),
491
+ signal: combinedSignal,
492
+ });
493
+ if (!response.ok) {
494
+ const errorBody = await response.text().catch(() => "");
495
+ throw new Error(`FIM API error ${response.status}: ${errorBody.slice(0, 500)}`);
496
+ }
497
+ if (!response.body)
498
+ throw new Error("FIM API returned no response body");
499
+ const decoder = new TextDecoder();
500
+ let buffer = "";
501
+ const reader = response.body.getReader();
502
+ try {
503
+ while (true) {
504
+ const { done, value } = await reader.read();
505
+ if (done)
506
+ break;
507
+ buffer += decoder.decode(value, { stream: true });
508
+ let newlineIdx;
509
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
510
+ const line = buffer.slice(0, newlineIdx).trim();
511
+ buffer = buffer.slice(newlineIdx + 1);
512
+ if (!line || line.startsWith(":") || !line.startsWith("data: "))
513
+ continue;
514
+ const data = line.slice(6);
515
+ if (data === "[DONE]")
516
+ return;
517
+ let parsed;
518
+ try {
519
+ parsed = JSON.parse(data);
520
+ }
521
+ catch {
522
+ continue;
523
+ }
524
+ for (const choice of parsed.choices ?? []) {
525
+ if (choice.text)
526
+ yield { type: "delta", text: choice.text };
527
+ if (choice.finish_reason)
528
+ yield { type: "done", finishReason: choice.finish_reason };
529
+ }
530
+ }
531
+ }
532
+ }
533
+ finally {
534
+ reader.releaseLock();
535
+ }
536
+ }
537
+ // 鈹€鈹€ #28: Kimi File UploadPOST /v1/files 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
538
+ /**
539
+ * Upload a file for use in conversations (Kimi File API).
540
+ * Returns a file_id that can be referenced in user messages.
541
+ * POST /v1/files with multipart/form-data.
542
+ */
543
+ async uploadFile(fileBlob, filename, purpose, apiKey, signal) {
544
+ const hasVersionInBase = /\/v\d+$/.test(this.baseUrl);
545
+ const url = hasVersionInBase
546
+ ? `${this.baseUrl}/files`
547
+ : `${this.baseUrl}/v1/files`;
548
+ const formData = new FormData();
549
+ formData.append("file", fileBlob, filename);
550
+ formData.append("purpose", purpose);
551
+ const res = await fetch(url, {
552
+ method: "POST",
553
+ headers: {
554
+ Authorization: `Bearer ${apiKey}`,
555
+ ...this.extraHeaders,
556
+ },
557
+ body: formData,
558
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
559
+ });
560
+ if (!res.ok) {
561
+ const errText = await res.text().catch(() => "");
562
+ throw new Error(`File upload error ${res.status}: ${errText}`);
563
+ }
564
+ const data = await res.json();
565
+ return {
566
+ fileId: data.id ?? "",
567
+ filename: data.filename ?? filename,
568
+ bytes: data.bytes ?? 0,
569
+ };
570
+ }
571
+ /**
572
+ * Get file content/statusGET /v1/files/{file_id}
573
+ */
574
+ async getFileInfo(fileId, apiKey, signal) {
575
+ const hasVersionInBase = /\/v\d+$/.test(this.baseUrl);
576
+ const url = hasVersionInBase
577
+ ? `${this.baseUrl}/files/${encodeURIComponent(fileId)}`
578
+ : `${this.baseUrl}/v1/files/${encodeURIComponent(fileId)}`;
579
+ const res = await fetch(url, {
580
+ method: "GET",
581
+ headers: {
582
+ Authorization: `Bearer ${apiKey}`,
583
+ ...this.extraHeaders,
584
+ },
585
+ signal: signal ?? AbortSignal.timeout(this.timeoutMs),
586
+ });
587
+ if (!res.ok) {
588
+ const errText = await res.text().catch(() => "");
589
+ throw new Error(`File info error ${res.status}: ${errText}`);
590
+ }
591
+ const data = await res.json();
592
+ return {
593
+ id: data.id ?? fileId,
594
+ filename: data.filename ?? "",
595
+ bytes: data.bytes ?? 0,
596
+ status: data.status ?? "unknown",
597
+ };
598
+ }
599
+ }
600
+ /**
601
+ * Map provider-specific finish_reason values to standard OpenAI values.
602
+ * GLM-specific: sensitive 閳?content_filter, model_context_window_exceeded 閳?length
603
+ */
604
+ function mapFinishReason(reason) {
605
+ switch (reason) {
606
+ case "sensitive":
607
+ return "content_filter";
608
+ case "model_context_window_exceeded":
609
+ return "length";
610
+ case "network_error":
611
+ return "error";
612
+ default:
613
+ return reason;
614
+ }
615
+ }
616
+ // 鈹€鈹€ Message Conversion 鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€鈹€
617
+ /**
618
+ * Pre-resolve local media URLs to base64 data URLs.
619
+ * Images and videos are left as-is (providers accept URLs and fetch server-side).
620
+ * Audio must always be base64-encoded (OpenAI input_audio.data requirement).
621
+ */
622
+ async function resolveMessagesMedia(messages, uploadAdapter, apiKey, signal) {
623
+ const needsResolution = messages.some((m) => m.audioUrls?.some(isLocalUrl) || m.imageUrls?.some(isLocalUrl));
624
+ if (!needsResolution)
625
+ return messages;
626
+ return Promise.all(messages.map(async (msg) => {
627
+ // Resolve imageUrls on both user messages and tool result messages
628
+ if (msg.role !== "user" && msg.role !== "tool")
629
+ return msg;
630
+ const patch = {};
631
+ // Audio MUST be base64 for input_audio (OpenAI API requirementno upload alternative)
632
+ if (msg.role === "user" && msg.audioUrls?.some(isLocalUrl)) {
633
+ patch.audioUrls = await Promise.all(msg.audioUrls.map((url) => isLocalUrl(url) ? resolveMediaUrl(url) : Promise.resolve(url)));
634
+ }
635
+ // Images: upload via adapter (requiredno base64 fallback)
636
+ if (msg.imageUrls?.some(isLocalUrl)) {
637
+ if (!uploadAdapter || !apiKey) {
638
+ throw new Error("FileUploadAdapter required for local image URLs. Configure OSS_ACCESS_KEY_ID/OSS_ACCESS_KEY_SECRET or QLOGICAGENT_HUB_URL.");
639
+ }
640
+ patch.imageUrls = await Promise.all(msg.imageUrls.map((url) => isLocalUrl(url)
641
+ ? resolveMediaUrlViaUpload(url, { uploadAdapter, apiKey, signal })
642
+ : Promise.resolve(url)));
643
+ }
644
+ return Object.keys(patch).length > 0 ? { ...msg, ...patch } : msg;
645
+ }));
646
+ }
647
+ /**
648
+ * Convert qlogicagent ChatMessage[] into OpenAI wire format.
649
+ *
650
+ * Handles three concerns:
651
+ * 1. Vision: user messages with imageUrls 閳?content:[{type:"text"},{type:"image_url"}]
652
+ * 2. Thinking passback: assistant thinkingBlocks 閳?reasoning_content field (DeepSeek/GLM)
653
+ * 3. filterImageBlocks quirk: strip imageUrls for non-vision providers
654
+ *
655
+ * NOTE: For audio and local file URLs, the caller must pre-resolve to base64
656
+ * data URLs using resolveMediaUrl() before calling this function.
657
+ */
658
+ function convertMessagesForOpenAI(messages, quirks) {
659
+ return messages.map(msg => {
660
+ // 鈹€鈹€ User messages: handle vision content blocks 鈹€鈹€
661
+ if (msg.role === "user") {
662
+ const hasImages = msg.imageUrls && msg.imageUrls.length > 0 && !quirks.filterImageBlocks;
663
+ const hasVideos = msg.videoUrls && msg.videoUrls.length > 0;
664
+ const hasAudios = msg.audioUrls && msg.audioUrls.length > 0;
665
+ const hasFiles = msg.fileIds && msg.fileIds.length > 0;
666
+ const hasMultimodal = hasImages || hasVideos || hasAudios || hasFiles;
667
+ if (hasMultimodal) {
668
+ const content = [];
669
+ if (msg.content) {
670
+ content.push({ type: "text", text: msg.content });
671
+ }
672
+ if (hasImages) {
673
+ const multiImage = msg.imageUrls.length > 1;
674
+ for (let imgIdx = 0; imgIdx < msg.imageUrls.length; imgIdx++) {
675
+ const url = msg.imageUrls[imgIdx];
676
+ if (multiImage) {
677
+ content.push({ type: "text", text: `[Image ${imgIdx + 1}]` });
678
+ }
679
+ const imgUrl = { url };
680
+ if (msg.imageDetail)
681
+ imgUrl.detail = msg.imageDetail;
682
+ content.push({ type: "image_url", image_url: imgUrl });
683
+ }
684
+ }
685
+ if (hasVideos) {
686
+ for (const url of msg.videoUrls) {
687
+ content.push({ type: "video_url", video_url: { url } });
688
+ }
689
+ }
690
+ if (hasAudios) {
691
+ for (const url of msg.audioUrls) {
692
+ // input_audio.data expects base64-encoded content (not a URL).
693
+ // The caller must pre-resolve local URLs via resolveMediaUrl().
694
+ // data: URLs 閳?extract the base64 portion; http URLs 閳?pass as-is (fallback).
695
+ let data = url;
696
+ let format = msg.audioFormat ?? "mp3";
697
+ if (url.startsWith("data:")) {
698
+ const match = /^data:audio\/([^;]+);base64,(.+)$/.exec(url);
699
+ if (match) {
700
+ format = match[1] === "mpeg" ? "mp3" : match[1];
701
+ data = match[2];
702
+ }
703
+ }
704
+ content.push({ type: "input_audio", input_audio: { data, format } });
705
+ }
706
+ }
707
+ if (hasFiles) {
708
+ for (const f of msg.fileIds) {
709
+ const mime = f.mimeType || "";
710
+ const tooLarge = f.size != null && f.size > MEDIA_MAX_UPLOAD_SIZE;
711
+ if (tooLarge) {
712
+ const sizeLabel = `${(f.size / (1024 * 1024)).toFixed(1)}MB`;
713
+ content.push({ type: "text", text: `[Attached: ${f.id} (${mime || "unknown"}, ${sizeLabel})file too large for direct vision, use tools to process]` });
714
+ }
715
+ else if (quirks.supportsDocumentVision && (mime === "application/pdf" || f.id.endsWith(".pdf"))) {
716
+ // Document vision capable: only use platform file_id for Files API refs (not URLs)
717
+ if (f.id.startsWith("http://") || f.id.startsWith("https://")) {
718
+ content.push({ type: "text", text: `[Attached PDF: ${f.id}]` });
719
+ }
720
+ else {
721
+ content.push({ type: "file", file: { file_id: f.id } });
722
+ }
723
+ }
724
+ else if (f.id.startsWith("http://") || f.id.startsWith("https://")) {
725
+ // URL-first: annotate as text contextagent can use tools to process
726
+ const label = mime ? `[Attached: ${f.id} (${mime})]` : `[Attached: ${f.id}]`;
727
+ content.push({ type: "text", text: label });
728
+ }
729
+ else {
730
+ // Platform file_id (OpenAI Files API)
731
+ content.push({ type: "file", file: { file_id: f.id } });
732
+ }
733
+ }
734
+ }
735
+ return { role: "user", content };
736
+ }
737
+ return { role: "user", content: msg.content ?? "" };
738
+ }
739
+ // 鈹€鈹€ Assistant messages: thinking blocks + tool calls 鈹€鈹€
740
+ if (msg.role === "assistant") {
741
+ const out = { role: "assistant" };
742
+ // DeepSeek/GLM: pass thinking content back via reasoning_content field
743
+ if (msg.thinkingBlocks && msg.thinkingBlocks.length > 0) {
744
+ out.reasoning_content = msg.thinkingBlocks.map(tb => tb.thinking).join("");
745
+ }
746
+ out.content = msg.content ?? null;
747
+ if (msg.tool_calls && msg.tool_calls.length > 0) {
748
+ out.tool_calls = msg.tool_calls.map(tc => ({
749
+ id: tc.id,
750
+ type: tc.type,
751
+ function: { name: tc.function.name, arguments: tc.function.arguments },
752
+ }));
753
+ }
754
+ return out;
755
+ }
756
+ // 鈹€鈹€ Tool result messages 鈹€鈹€
757
+ if (msg.role === "tool") {
758
+ // OpenAI Chat API supports multipart content in tool results (content array).
759
+ // If the tool result has imageUrls, include them as image_url content blocks.
760
+ if (msg.imageUrls && msg.imageUrls.length > 0 && !quirks.filterImageBlocks) {
761
+ const content = [];
762
+ if (msg.content)
763
+ content.push({ type: "text", text: msg.content });
764
+ for (const url of msg.imageUrls) {
765
+ content.push({ type: "image_url", image_url: { url } });
766
+ }
767
+ return {
768
+ role: "tool",
769
+ content,
770
+ tool_call_id: msg.tool_call_id ?? "",
771
+ };
772
+ }
773
+ return {
774
+ role: "tool",
775
+ content: msg.content ?? "",
776
+ tool_call_id: msg.tool_call_id ?? "",
777
+ };
778
+ }
779
+ // 鈹€鈹€ System messages: pass through 鈹€鈹€
780
+ return { role: msg.role, content: msg.content ?? "" };
781
+ });
782
+ }