universal-llm-client 4.5.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +2 -0
  3. package/dist/ai-model.d.ts +0 -1
  4. package/dist/ai-model.js +0 -1
  5. package/dist/auditor.d.ts +0 -1
  6. package/dist/auditor.js +0 -1
  7. package/dist/client.d.ts +0 -1
  8. package/dist/client.js +0 -1
  9. package/dist/gemma-channel.d.ts +0 -1
  10. package/dist/gemma-channel.js +0 -1
  11. package/dist/gemma-diffusion.d.ts +0 -1
  12. package/dist/gemma-diffusion.js +0 -1
  13. package/dist/http.d.ts +0 -1
  14. package/dist/http.js +0 -1
  15. package/dist/index.d.ts +0 -1
  16. package/dist/index.js +0 -1
  17. package/dist/interfaces.d.ts +0 -1
  18. package/dist/interfaces.js +0 -1
  19. package/dist/mcp.d.ts +0 -1
  20. package/dist/mcp.js +0 -1
  21. package/dist/providers/anthropic.d.ts +0 -1
  22. package/dist/providers/anthropic.js +0 -1
  23. package/dist/providers/google.d.ts +0 -1
  24. package/dist/providers/google.js +0 -1
  25. package/dist/providers/index.d.ts +0 -1
  26. package/dist/providers/index.js +0 -1
  27. package/dist/providers/ollama.d.ts +0 -1
  28. package/dist/providers/ollama.js +0 -1
  29. package/dist/providers/openai.d.ts +2 -1
  30. package/dist/providers/openai.js +303 -74
  31. package/dist/router.d.ts +0 -1
  32. package/dist/router.js +0 -1
  33. package/dist/stream-decoder.d.ts +0 -1
  34. package/dist/stream-decoder.js +0 -1
  35. package/dist/structured-output.d.ts +0 -1
  36. package/dist/structured-output.js +0 -1
  37. package/dist/thinking.d.ts +0 -1
  38. package/dist/thinking.js +0 -1
  39. package/dist/tools.d.ts +0 -1
  40. package/dist/tools.js +0 -1
  41. package/dist/zod-adapter.d.ts +0 -1
  42. package/dist/zod-adapter.js +0 -1
  43. package/package.json +1 -2
  44. package/dist/ai-model.d.ts.map +0 -1
  45. package/dist/ai-model.js.map +0 -1
  46. package/dist/auditor.d.ts.map +0 -1
  47. package/dist/auditor.js.map +0 -1
  48. package/dist/client.d.ts.map +0 -1
  49. package/dist/client.js.map +0 -1
  50. package/dist/gemma-channel.d.ts.map +0 -1
  51. package/dist/gemma-channel.js.map +0 -1
  52. package/dist/gemma-diffusion.d.ts.map +0 -1
  53. package/dist/gemma-diffusion.js.map +0 -1
  54. package/dist/http.d.ts.map +0 -1
  55. package/dist/http.js.map +0 -1
  56. package/dist/index.d.ts.map +0 -1
  57. package/dist/index.js.map +0 -1
  58. package/dist/interfaces.d.ts.map +0 -1
  59. package/dist/interfaces.js.map +0 -1
  60. package/dist/mcp.d.ts.map +0 -1
  61. package/dist/mcp.js.map +0 -1
  62. package/dist/providers/anthropic.d.ts.map +0 -1
  63. package/dist/providers/anthropic.js.map +0 -1
  64. package/dist/providers/google.d.ts.map +0 -1
  65. package/dist/providers/google.js.map +0 -1
  66. package/dist/providers/index.d.ts.map +0 -1
  67. package/dist/providers/index.js.map +0 -1
  68. package/dist/providers/ollama.d.ts.map +0 -1
  69. package/dist/providers/ollama.js.map +0 -1
  70. package/dist/providers/openai.d.ts.map +0 -1
  71. package/dist/providers/openai.js.map +0 -1
  72. package/dist/router.d.ts.map +0 -1
  73. package/dist/router.js.map +0 -1
  74. package/dist/stream-decoder.d.ts.map +0 -1
  75. package/dist/stream-decoder.js.map +0 -1
  76. package/dist/structured-output.d.ts.map +0 -1
  77. package/dist/structured-output.js.map +0 -1
  78. package/dist/thinking.d.ts.map +0 -1
  79. package/dist/thinking.js.map +0 -1
  80. package/dist/tools.d.ts.map +0 -1
  81. package/dist/tools.js.map +0 -1
  82. package/dist/zod-adapter.d.ts.map +0 -1
  83. package/dist/zod-adapter.js.map +0 -1
  84. package/src/ai-model.ts +0 -400
  85. package/src/auditor.ts +0 -213
  86. package/src/client.ts +0 -402
  87. package/src/debug/debug-google-streaming.ts +0 -97
  88. package/src/debug/debug-tool-execution.ts +0 -86
  89. package/src/debug/test-lmstudio-tools.ts +0 -155
  90. package/src/demos/README.md +0 -47
  91. package/src/demos/basic/universal-llm-examples.ts +0 -161
  92. package/src/demos/diffusion-gemma/.env +0 -29
  93. package/src/demos/diffusion-gemma/.env.example +0 -27
  94. package/src/demos/diffusion-gemma/CLAUDE.md +0 -95
  95. package/src/demos/diffusion-gemma/README.md +0 -59
  96. package/src/demos/diffusion-gemma/canvas.ts +0 -1606
  97. package/src/demos/diffusion-gemma/docker-compose.yml +0 -29
  98. package/src/demos/diffusion-gemma/probe-stream.ts +0 -51
  99. package/src/demos/diffusion-gemma/probe-tools.ts +0 -55
  100. package/src/demos/diffusion-gemma/server.ts +0 -1205
  101. package/src/demos/diffusion-gemma/start-vllm.sh +0 -98
  102. package/src/demos/mcp/astrid-memory-demo.ts +0 -295
  103. package/src/demos/mcp/astrid-persona-memory.ts +0 -357
  104. package/src/demos/mcp/mcp-mongodb-demo.ts +0 -275
  105. package/src/demos/mcp/simple-astrid-memory.ts +0 -148
  106. package/src/demos/mcp/simple-mcp-demo.ts +0 -68
  107. package/src/demos/mcp/working-mcp-demo.ts +0 -62
  108. package/src/demos/model-alias-demo.ts +0 -0
  109. package/src/demos/tools/RAG_MEMORY_INTEGRATION.md +0 -267
  110. package/src/demos/tools/astrid-memory-demo.ts +0 -270
  111. package/src/demos/tools/astrid-production-memory-clean.ts +0 -785
  112. package/src/demos/tools/astrid-production-memory.ts +0 -558
  113. package/src/demos/tools/basic-translation-test.ts +0 -66
  114. package/src/demos/tools/chromadb-similarity-tuning.ts +0 -390
  115. package/src/demos/tools/clean-multilingual-conversation.ts +0 -209
  116. package/src/demos/tools/clean-translation-test.ts +0 -119
  117. package/src/demos/tools/clean-universal-multilingual-test.ts +0 -131
  118. package/src/demos/tools/complete-rag-demo.ts +0 -369
  119. package/src/demos/tools/complete-tool-demo.ts +0 -132
  120. package/src/demos/tools/demo-tool-calling.ts +0 -124
  121. package/src/demos/tools/dynamic-language-switching-test.ts +0 -251
  122. package/src/demos/tools/hybrid-thinking-test.ts +0 -154
  123. package/src/demos/tools/memory-integration-test.ts +0 -420
  124. package/src/demos/tools/multilingual-memory-system.ts +0 -802
  125. package/src/demos/tools/ondemand-translation-demo.ts +0 -655
  126. package/src/demos/tools/production-tool-demo.ts +0 -245
  127. package/src/demos/tools/revolutionary-multilingual-test.ts +0 -151
  128. package/src/demos/tools/rigorous-language-analysis.ts +0 -218
  129. package/src/demos/tools/test-universal-memory-system.ts +0 -126
  130. package/src/demos/tools/translation-integration-guide.ts +0 -346
  131. package/src/demos/tools/universal-memory-system.ts +0 -560
  132. package/src/gemma-channel.ts +0 -47
  133. package/src/gemma-diffusion.ts +0 -167
  134. package/src/http.ts +0 -261
  135. package/src/index.ts +0 -180
  136. package/src/interfaces.ts +0 -843
  137. package/src/mcp.ts +0 -345
  138. package/src/providers/anthropic.ts +0 -796
  139. package/src/providers/google.ts +0 -840
  140. package/src/providers/index.ts +0 -8
  141. package/src/providers/ollama.ts +0 -503
  142. package/src/providers/openai.ts +0 -587
  143. package/src/router.ts +0 -785
  144. package/src/stream-decoder.ts +0 -535
  145. package/src/structured-output.ts +0 -759
  146. package/src/test-scripts/test-advanced-tools.ts +0 -310
  147. package/src/test-scripts/test-google-deep-research.ts +0 -33
  148. package/src/test-scripts/test-google-streaming-enhanced.ts +0 -147
  149. package/src/test-scripts/test-google-streaming.ts +0 -63
  150. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +0 -189
  151. package/src/test-scripts/test-google-thinking.ts +0 -46
  152. package/src/test-scripts/test-mcp-config.ts +0 -28
  153. package/src/test-scripts/test-mcp-connection.ts +0 -29
  154. package/src/test-scripts/test-system-message-positions.ts +0 -163
  155. package/src/test-scripts/test-system-prompt-improvement-demo.ts +0 -83
  156. package/src/test-scripts/test-tool-calling.ts +0 -231
  157. package/src/test-scripts/test-vllm-qwen36.ts +0 -256
  158. package/src/tests/ai-model.test.ts +0 -1614
  159. package/src/tests/auditor.test.ts +0 -224
  160. package/src/tests/gemma-diffusion.test.ts +0 -115
  161. package/src/tests/http.test.ts +0 -200
  162. package/src/tests/interfaces.test.ts +0 -117
  163. package/src/tests/providers/anthropic.test.ts +0 -118
  164. package/src/tests/providers/google.test.ts +0 -841
  165. package/src/tests/providers/ollama.test.ts +0 -1034
  166. package/src/tests/providers/openai.test.ts +0 -1511
  167. package/src/tests/router.test.ts +0 -254
  168. package/src/tests/stream-decoder.test.ts +0 -263
  169. package/src/tests/structured-output.test.ts +0 -1450
  170. package/src/tests/thinking.test.ts +0 -65
  171. package/src/tests/tools.test.ts +0 -175
  172. package/src/thinking.ts +0 -73
  173. package/src/tools.ts +0 -246
  174. package/src/zod-adapter.ts +0 -72
@@ -1,587 +0,0 @@
1
- /**
2
- * Universal LLM Client v3 — OpenAI-Compatible Provider
3
- *
4
- * Implements BaseLLMClient for OpenAI-compatible APIs.
5
- * Works with: OpenAI, OpenRouter, LM Studio, LlamaCpp, vLLM, Groq, Together.
6
- */
7
-
8
- import { BaseLLMClient } from '../client.js';
9
- import { resolveThinking, isOpenAIReasoningModel } from '../thinking.js';
10
- import { httpRequest, httpStream, parseSSE, buildHeaders } from '../http.js';
11
- import { StandardChatDecoder } from '../stream-decoder.js';
12
- import {
13
- normalizeJsonSchema,
14
- getJsonSchemaFromConfig,
15
- type JSONSchema,
16
- type StructuredOutputOptions,
17
- } from '../structured-output.js';
18
- import type {
19
- LLMClientOptions,
20
- LLMChatMessage,
21
- LLMChatResponse,
22
- ChatOptions,
23
- OpenAIResponse,
24
- OpenAIModelInfo,
25
- LLMToolCall,
26
- TokenUsageInfo,
27
- } from '../interfaces.js';
28
- import type { DecodedEvent } from '../stream-decoder.js';
29
- import type { Auditor } from '../auditor.js';
30
- import { isGemmaDiffusionModel, parseGemmaDiffusionOutput } from '../gemma-diffusion.js';
31
-
32
- export class OpenAICompatibleClient extends BaseLLMClient {
33
- /**
34
- * DiffusionGemma on trimmed vLLM builds has no server-side reasoning or
35
- * tool-call parser — the native channel protocol is handled client-side
36
- * (see gemma-diffusion.ts). Auto-detected from the model name; override
37
- * with `gemmaNativeProtocol` in LLMClientOptions.
38
- */
39
- private get gemmaNative(): boolean {
40
- return this.options.gemmaNativeProtocol ?? isGemmaDiffusionModel(this.options.model);
41
- }
42
-
43
- /**
44
- * Build a full endpoint URL, respecting apiBasePath (already baked into this.options.url)
45
- * and any queryParams provided at the provider config level.
46
- */
47
- private buildUrl(suffix: string): string {
48
- const raw = this.options.url.replace(/\/+$/, '');
49
- // Split off any query string already on the configured base URL so the
50
- // path is inserted before it (avoids `host/v1?k=v/chat/completions`).
51
- const qIdx = raw.indexOf('?');
52
- const basePath = (qIdx === -1 ? raw : raw.slice(0, qIdx)).replace(/\/+$/, '');
53
- const existingQuery = qIdx === -1 ? '' : raw.slice(qIdx + 1);
54
- const path = suffix.startsWith('/') ? suffix : '/' + suffix;
55
-
56
- const search = new URLSearchParams(existingQuery);
57
- const qp = this.options.queryParams;
58
- if (qp) {
59
- for (const [k, v] of Object.entries(qp)) {
60
- if (v != null) search.set(k, String(v));
61
- }
62
- }
63
- const qs = search.toString();
64
- return basePath + path + (qs ? `?${qs}` : '');
65
- }
66
-
67
- constructor(options: LLMClientOptions, auditor?: Auditor) {
68
- let base = (options.url || 'https://api.openai.com').replace(/\/+$/, '');
69
-
70
- // Respect apiBasePath (from ProviderConfig.apiBasePath). Default "/v1" for broad compatibility.
71
- // Set apiBasePath: '' (or '/') when you are supplying a *complete* path already
72
- // (e.g. full Azure ".../deployments/my-model" URL) or for non-/v1 OpenAI-compatible servers.
73
- const desired = options.apiBasePath;
74
- const shouldAppend = desired !== '' && desired !== '/';
75
-
76
- if (shouldAppend) {
77
- // Normalize to exactly one leading slash and no trailing slash
78
- // (so 'v1', '/v1', '//v1' and '/v1/' all become '/v1').
79
- const basePath = ('/' + (desired || '/v1').replace(/^\/+/, '')).replace(/\/+$/, '');
80
- if (!base.endsWith(basePath)) {
81
- base += basePath;
82
- }
83
- }
84
-
85
- super({ ...options, url: base }, auditor);
86
- }
87
-
88
- // ========================================================================
89
- // Chat
90
- // ========================================================================
91
-
92
- async chat(
93
- messages: LLMChatMessage[],
94
- options?: ChatOptions,
95
- ): Promise<LLMChatResponse> {
96
- // Structured output and tools can now be used together.\n // The provider sends both response_format and tools in the request.\n // The Router handles skipping validation when the response contains tool calls.
97
-
98
- const url = this.buildUrl('/chat/completions');
99
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
100
-
101
- const body: Record<string, unknown> = {
102
- model: this.options.model,
103
- messages: this.convertMessages(messages),
104
- ...this.buildRequestParams(options),
105
- };
106
-
107
- // Handle structured output
108
- const schemaOptions = this.extractSchemaOptions(options);
109
- if (schemaOptions) {
110
- body['response_format'] = this.buildResponseFormat(schemaOptions);
111
- } else if (options?.responseFormat) {
112
- body['response_format'] = options.responseFormat;
113
- }
114
-
115
- if (tools?.length) {
116
- body['tools'] = tools;
117
- if (options?.toolChoice) {
118
- body['tool_choice'] = options.toolChoice;
119
- }
120
- }
121
-
122
- if (this.gemmaNative) {
123
- // Markers must survive decoding for client-side parsing,
124
- // and request-level tool parsing is unavailable server-side.
125
- body['skip_special_tokens'] = false;
126
- if (tools?.length) body['tool_choice'] = 'none';
127
- }
128
-
129
- const start = Date.now();
130
- this.auditor.record({
131
- timestamp: start,
132
- type: 'request',
133
- provider: 'openai',
134
- model: this.options.model,
135
- });
136
-
137
- const response = await httpRequest<OpenAIResponse>(url, {
138
- method: 'POST',
139
- headers: buildHeaders(this.options),
140
- body,
141
- timeout: this.options.timeout ?? 30000,
142
- });
143
-
144
- const data = response.data;
145
- const choice = data.choices[0];
146
-
147
- if (!choice) {
148
- throw new Error('No choices returned from OpenAI API');
149
- }
150
-
151
- // vLLM / OpenAI-compatible `usage` carries no timing, so derive decode
152
- // throughput from the client-measured wall-clock duration.
153
- const durationMs = Date.now() - start;
154
- const usage: TokenUsageInfo | undefined = data.usage
155
- ? {
156
- inputTokens: data.usage.prompt_tokens,
157
- outputTokens: data.usage.completion_tokens,
158
- totalTokens: data.usage.total_tokens,
159
- cachedTokens: data.usage.prompt_tokens_details?.cached_tokens,
160
- durationMs,
161
- tokensPerSecond: durationMs > 0
162
- ? data.usage.completion_tokens / (durationMs / 1000)
163
- : undefined,
164
- }
165
- : undefined;
166
-
167
- // Normalize tool calls (ensure IDs and JSON-parseable empty args exist).
168
- let toolCalls = choice.message.tool_calls?.map(tc => this.normalizeToolCall(tc));
169
-
170
- // Get content, handling null case
171
- let content = choice.message.content || '';
172
- let reasoning: string | undefined;
173
-
174
- // Reasoning models served over the OpenAI-compatible API (vLLM
175
- // `--reasoning-parser`, DeepSeek-R1, etc.) return the chain-of-thought
176
- // in a dedicated field instead of inline <think> tags. vLLM uses
177
- // `reasoning_content`; some gateways use `reasoning`.
178
- const serverReasoning = choice.message.reasoning ?? choice.message.reasoning_content;
179
- if (typeof serverReasoning === 'string' && serverReasoning.length > 0) {
180
- reasoning = serverReasoning;
181
- }
182
-
183
- if (this.gemmaNative && content) {
184
- const parsed = parseGemmaDiffusionOutput(content);
185
- content = parsed.content;
186
- if (parsed.reasoning) reasoning = parsed.reasoning;
187
- if (!toolCalls?.length && parsed.toolCalls.length) {
188
- toolCalls = parsed.toolCalls.map(tc => ({
189
- id: this.generateToolCallId(),
190
- type: 'function' as const,
191
- function: { name: tc.name, arguments: tc.argumentsJson },
192
- }));
193
- }
194
- }
195
-
196
- const result: LLMChatResponse = {
197
- message: {
198
- role: 'assistant',
199
- content,
200
- tool_calls: toolCalls,
201
- },
202
- ...(reasoning !== undefined && { reasoning }),
203
- usage,
204
- provider: 'openai',
205
- };
206
-
207
- this.auditor.record({
208
- timestamp: Date.now(),
209
- type: 'response',
210
- provider: 'openai',
211
- model: this.options.model,
212
- duration: Date.now() - start,
213
- usage,
214
- });
215
-
216
- return result;
217
- }
218
-
219
- // ========================================================================
220
- // Streaming
221
- // ========================================================================
222
-
223
- async *chatStream(
224
- messages: LLMChatMessage[],
225
- options?: ChatOptions,
226
- ): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown> {
227
- const url = this.buildUrl('/chat/completions');
228
- const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
229
-
230
- const body: Record<string, unknown> = {
231
- model: this.options.model,
232
- messages: this.convertMessages(messages),
233
- stream: true,
234
- ...this.buildRequestParams(options),
235
- };
236
-
237
- if (tools?.length) {
238
- body['tools'] = tools;
239
- if (options?.toolChoice) {
240
- body['tool_choice'] = options.toolChoice;
241
- }
242
- }
243
-
244
- if (this.gemmaNative) {
245
- body['skip_special_tokens'] = false;
246
- if (tools?.length) body['tool_choice'] = 'none';
247
- }
248
-
249
- const start = Date.now();
250
- this.auditor.record({
251
- timestamp: start,
252
- type: 'stream_start',
253
- provider: 'openai',
254
- model: this.options.model,
255
- });
256
-
257
- // In gemma-native mode the decoder classifies thought-channel content,
258
- // so we yield ITS events (thinking vs text) instead of the raw deltas.
259
- const decoderEvents: DecodedEvent[] = [];
260
- const decoder = new StandardChatDecoder(
261
- this.gemmaNative ? e => decoderEvents.push(e) : () => {},
262
- );
263
-
264
- // Track accumulated tool calls across chunks
265
- const toolCallAccum: Map<number, {
266
- id: string;
267
- type: 'function';
268
- function: { name: string; arguments: string };
269
- }> = new Map();
270
-
271
- const stream = httpStream(url, {
272
- method: 'POST',
273
- headers: buildHeaders(this.options),
274
- body,
275
- timeout: this.options.timeout ?? 120000,
276
- });
277
-
278
- let usage: TokenUsageInfo | undefined;
279
- // Accumulates reasoning deltas from servers that stream a dedicated
280
- // `reasoning` / `reasoning_content` field (vLLM, DeepSeek-R1, etc.).
281
- let reasoningBuffer = '';
282
-
283
- for await (const { data } of parseSSE(stream)) {
284
- try {
285
- const parsed = JSON.parse(data) as {
286
- choices?: Array<{
287
- delta?: {
288
- content?: string;
289
- // Reasoning-model chain-of-thought deltas (vLLM
290
- // `--reasoning-parser`, DeepSeek-R1, etc.).
291
- reasoning?: string;
292
- reasoning_content?: string;
293
- tool_calls?: Array<{
294
- index: number;
295
- id?: string;
296
- type?: string;
297
- function?: { name?: string; arguments?: string };
298
- }>;
299
- };
300
- finish_reason?: string;
301
- }>;
302
- usage?: {
303
- prompt_tokens: number;
304
- completion_tokens: number;
305
- total_tokens: number;
306
- prompt_tokens_details?: {
307
- cached_tokens?: number;
308
- };
309
- };
310
- };
311
-
312
- if (parsed.usage) {
313
- usage = {
314
- inputTokens: parsed.usage.prompt_tokens,
315
- outputTokens: parsed.usage.completion_tokens,
316
- totalTokens: parsed.usage.total_tokens,
317
- cachedTokens: parsed.usage.prompt_tokens_details?.cached_tokens,
318
- };
319
- }
320
-
321
- const delta = parsed.choices?.[0]?.delta;
322
- if (!delta) continue;
323
-
324
- // Surface server-side reasoning deltas as thinking events.
325
- const reasoningDelta = delta.reasoning ?? delta.reasoning_content;
326
- if (reasoningDelta) {
327
- reasoningBuffer += reasoningDelta;
328
- yield { type: 'thinking', content: reasoningDelta };
329
- }
330
-
331
- if (delta.content) {
332
- decoder.push(delta.content);
333
- if (this.gemmaNative) {
334
- while (decoderEvents.length) yield decoderEvents.shift()!;
335
- } else {
336
- yield { type: 'text', content: delta.content };
337
- }
338
- }
339
-
340
- // Accumulate streamed tool calls
341
- if (delta.tool_calls) {
342
- for (const tc of delta.tool_calls) {
343
- const existing = toolCallAccum.get(tc.index);
344
- if (!existing) {
345
- toolCallAccum.set(tc.index, {
346
- id: tc.id || this.generateToolCallId(),
347
- type: 'function',
348
- function: {
349
- name: tc.function?.name || '',
350
- arguments: tc.function?.arguments || '',
351
- },
352
- });
353
- } else {
354
- if (tc.function?.arguments) {
355
- existing.function.arguments += tc.function.arguments;
356
- }
357
- if (tc.function?.name) {
358
- existing.function.name += tc.function.name;
359
- }
360
- }
361
- }
362
- }
363
-
364
- // Emit tool calls when stream finishes
365
- if (parsed.choices?.[0]?.finish_reason === 'tool_calls' || parsed.choices?.[0]?.finish_reason === 'stop') {
366
- if (toolCallAccum.size > 0) {
367
- const calls = Array.from(toolCallAccum.values())
368
- .map(tc => this.normalizeToolCall(tc));
369
- yield { type: 'tool_call', calls };
370
- }
371
- }
372
- } catch {
373
- // Skip unparseable SSE data
374
- }
375
- }
376
-
377
- decoder.flush();
378
- if (this.gemmaNative) {
379
- while (decoderEvents.length) yield decoderEvents.shift()!;
380
- }
381
-
382
- // Augment usage with client-measured timing (vLLM streams no timing).
383
- if (usage) {
384
- const durationMs = Date.now() - start;
385
- usage = {
386
- ...usage,
387
- durationMs,
388
- tokensPerSecond: durationMs > 0
389
- ? usage.outputTokens / (durationMs / 1000)
390
- : undefined,
391
- };
392
- }
393
-
394
- this.auditor.record({
395
- timestamp: Date.now(),
396
- type: 'stream_end',
397
- provider: 'openai',
398
- model: this.options.model,
399
- duration: Date.now() - start,
400
- usage,
401
- });
402
-
403
- let finalToolCalls = toolCallAccum.size > 0
404
- ? Array.from(toolCallAccum.values()).map(tc => this.normalizeToolCall(tc))
405
- : undefined;
406
- let cleanContent = decoder.getCleanContent();
407
- // Prefer the server's dedicated reasoning field; fall back to <think>
408
- // tags parsed from the content stream by the decoder.
409
- let reasoning = reasoningBuffer || decoder.getReasoning();
410
-
411
- if (this.gemmaNative) {
412
- // Native tool-call blocks live in the text channel; extract them.
413
- const parsed = parseGemmaDiffusionOutput(cleanContent);
414
- cleanContent = parsed.content;
415
- if (parsed.reasoning) {
416
- reasoning = reasoning ? `${reasoning}\n\n${parsed.reasoning}` : parsed.reasoning;
417
- }
418
- if (!finalToolCalls?.length && parsed.toolCalls.length) {
419
- finalToolCalls = parsed.toolCalls.map(tc => ({
420
- id: this.generateToolCallId(),
421
- type: 'function' as const,
422
- function: { name: tc.name, arguments: tc.argumentsJson },
423
- }));
424
- yield { type: 'tool_call', calls: finalToolCalls };
425
- }
426
- }
427
-
428
- return {
429
- message: {
430
- role: 'assistant',
431
- content: cleanContent,
432
- tool_calls: finalToolCalls,
433
- },
434
- reasoning,
435
- usage,
436
- provider: 'openai',
437
- };
438
- }
439
-
440
- private normalizeToolCall(
441
- toolCall: Partial<LLMToolCall> & { function?: Partial<LLMToolCall['function']> },
442
- ): LLMToolCall {
443
- return {
444
- ...toolCall,
445
- id: toolCall.id || this.generateToolCallId(),
446
- type: 'function',
447
- function: {
448
- ...toolCall.function,
449
- name: toolCall.function?.name || '',
450
- arguments: this.normalizeToolArguments(toolCall.function?.arguments),
451
- },
452
- };
453
- }
454
-
455
- private normalizeToolArguments(args: unknown): string {
456
- if (typeof args === 'string') {
457
- return args.trim().length > 0 ? args : '{}';
458
- }
459
- if (args == null) {
460
- return '{}';
461
- }
462
- return JSON.stringify(args) ?? '{}';
463
- }
464
-
465
- // ========================================================================
466
- // Embeddings
467
- // ========================================================================
468
-
469
- async embed(text: string): Promise<number[]> {
470
- const url = this.buildUrl('/embeddings');
471
- const response = await httpRequest<{
472
- data: Array<{ embedding: number[] }>;
473
- }>(url, {
474
- method: 'POST',
475
- headers: buildHeaders(this.options),
476
- body: {
477
- model: this.options.model,
478
- input: text,
479
- },
480
- timeout: this.options.timeout ?? 30000,
481
- });
482
- return response.data.data[0]?.embedding ?? [];
483
- }
484
-
485
- // ========================================================================
486
- // Model Discovery
487
- // ========================================================================
488
-
489
- async getModels(): Promise<string[]> {
490
- const url = this.buildUrl('/models');
491
- try {
492
- const response = await httpRequest<{
493
- data: OpenAIModelInfo[];
494
- }>(url, {
495
- headers: buildHeaders(this.options),
496
- timeout: 5000,
497
- });
498
- return response.data.data.map(m => m.id);
499
- } catch {
500
- return [];
501
- }
502
- }
503
-
504
- // ========================================================================
505
- // Internals
506
- // ========================================================================
507
-
508
- private convertMessages(messages: LLMChatMessage[]): LLMChatMessage[] {
509
- // OpenAI format is our canonical format, minimal conversion needed
510
- return messages.map(msg => ({
511
- ...msg,
512
- // Ensure content is never null/undefined
513
- content: msg.content ?? '',
514
- }));
515
- }
516
-
517
- private buildRequestParams(options?: ChatOptions): Record<string, unknown> {
518
- const params: Record<string, unknown> = {
519
- ...this.options.defaultParameters,
520
- ...options?.parameters,
521
- };
522
- if (options?.temperature !== undefined) params['temperature'] = options.temperature;
523
- if (options?.maxTokens !== undefined) params['max_tokens'] = options.maxTokens;
524
-
525
- // Unified thinking flag. Per-call overrides model config; only emitted
526
- // when explicitly set, so servers that reject unknown fields are
527
- // unaffected by default. OpenAI reasoning models (o-series / GPT-5) use
528
- // `reasoning_effort`; vLLM / Qwen use `chat_template_kwargs.enable_thinking`.
529
- // A user-supplied value (via parameters) always wins.
530
- const thinking = resolveThinking(options?.thinking, this.options.thinking);
531
- if (thinking) {
532
- const isOfficialOpenAI = (this.options.url ?? '').includes('api.openai.com');
533
- if (isOpenAIReasoningModel(this.options.model)) {
534
- if (params['reasoning_effort'] === undefined) {
535
- params['reasoning_effort'] = thinking.enabled ? (thinking.level ?? 'medium') : 'minimal';
536
- }
537
- } else if (!isOfficialOpenAI) {
538
- // `chat_template_kwargs` is a vLLM/Qwen extension. Official OpenAI
539
- // rejects unknown body fields (and gpt-4o has no thinking toggle),
540
- // so only send it to self-hosted / compatible gateways.
541
- const existing = (params['chat_template_kwargs'] as Record<string, unknown> | undefined) ?? {};
542
- params['chat_template_kwargs'] = { enable_thinking: thinking.enabled, ...existing };
543
- }
544
- }
545
- return params;
546
- }
547
-
548
- // ========================================================================
549
- // Structured Output Helpers
550
- // ========================================================================
551
-
552
- /**
553
- * Build OpenAI response_format for structured output.
554
- */
555
- private buildResponseFormat(options: StructuredOutputOptions<unknown> & { strict?: boolean }): Record<string, unknown> {
556
- let jsonSchema: JSONSchema;
557
- let name: string;
558
- let description: string | undefined;
559
-
560
- // Prefer jsonSchema if provided (handles raw JSON Schema case)
561
- if (options.jsonSchema) {
562
- // Use raw JSON Schema
563
- jsonSchema = normalizeJsonSchema(options.jsonSchema);
564
- name = options.name || 'response';
565
- description = options.description;
566
- } else if (options.schemaConfig) {
567
- // Use SchemaConfig's embedded JSON Schema
568
- jsonSchema = getJsonSchemaFromConfig(options.schemaConfig);
569
- name = options.name || options.schemaConfig.name || 'response';
570
- description = options.description || options.schemaConfig.description;
571
- } else {
572
- // Should not happen - we check this in extractSchemaOptions
573
- throw new Error('Either schemaConfig or jsonSchema must be provided');
574
- }
575
-
576
- // OpenAI strict mode — configurable, defaults to true for reliable structured output
577
- return {
578
- type: 'json_schema',
579
- json_schema: {
580
- name,
581
- ...(description && { description }),
582
- schema: jsonSchema,
583
- strict: options.strict ?? true,
584
- },
585
- };
586
- }
587
- }