universal-llm-client 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/CHANGELOG.md +142 -103
  2. package/LICENSE +21 -21
  3. package/README.md +640 -591
  4. package/dist/ai-model.d.ts +12 -1
  5. package/dist/ai-model.d.ts.map +1 -1
  6. package/dist/ai-model.js +36 -1
  7. package/dist/ai-model.js.map +1 -1
  8. package/dist/gemma-channel.d.ts +14 -0
  9. package/dist/gemma-channel.d.ts.map +1 -0
  10. package/dist/gemma-channel.js +38 -0
  11. package/dist/gemma-channel.js.map +1 -0
  12. package/dist/gemma-diffusion.d.ts +49 -0
  13. package/dist/gemma-diffusion.d.ts.map +1 -0
  14. package/dist/gemma-diffusion.js +147 -0
  15. package/dist/gemma-diffusion.js.map +1 -0
  16. package/dist/http.d.ts +4 -0
  17. package/dist/http.d.ts.map +1 -1
  18. package/dist/http.js +14 -1
  19. package/dist/http.js.map +1 -1
  20. package/dist/index.d.ts +2 -1
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +4 -0
  23. package/dist/index.js.map +1 -1
  24. package/dist/interfaces.d.ts +183 -7
  25. package/dist/interfaces.d.ts.map +1 -1
  26. package/dist/interfaces.js.map +1 -1
  27. package/dist/providers/anthropic.d.ts.map +1 -1
  28. package/dist/providers/anthropic.js +28 -3
  29. package/dist/providers/anthropic.js.map +1 -1
  30. package/dist/providers/google.d.ts +22 -1
  31. package/dist/providers/google.d.ts.map +1 -1
  32. package/dist/providers/google.js +225 -13
  33. package/dist/providers/google.js.map +1 -1
  34. package/dist/providers/ollama.d.ts +2 -0
  35. package/dist/providers/ollama.d.ts.map +1 -1
  36. package/dist/providers/ollama.js +59 -30
  37. package/dist/providers/ollama.js.map +1 -1
  38. package/dist/providers/openai.d.ts +14 -0
  39. package/dist/providers/openai.d.ts.map +1 -1
  40. package/dist/providers/openai.js +200 -22
  41. package/dist/providers/openai.js.map +1 -1
  42. package/dist/router.d.ts +2 -0
  43. package/dist/router.d.ts.map +1 -1
  44. package/dist/router.js +4 -0
  45. package/dist/router.js.map +1 -1
  46. package/dist/stream-decoder.d.ts +12 -0
  47. package/dist/stream-decoder.d.ts.map +1 -1
  48. package/dist/stream-decoder.js +182 -5
  49. package/dist/stream-decoder.js.map +1 -1
  50. package/dist/thinking.d.ts +36 -0
  51. package/dist/thinking.d.ts.map +1 -0
  52. package/dist/thinking.js +52 -0
  53. package/dist/thinking.js.map +1 -0
  54. package/package.json +118 -116
  55. package/src/ai-model.ts +400 -350
  56. package/src/auditor.ts +213 -213
  57. package/src/client.ts +402 -402
  58. package/src/debug/debug-google-streaming.ts +1 -1
  59. package/src/demos/basic/universal-llm-examples.ts +3 -3
  60. package/src/demos/diffusion-gemma/.env +29 -0
  61. package/src/demos/diffusion-gemma/.env.example +27 -0
  62. package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
  63. package/src/demos/diffusion-gemma/README.md +59 -0
  64. package/src/demos/diffusion-gemma/canvas.ts +1606 -0
  65. package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
  66. package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
  67. package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
  68. package/src/demos/diffusion-gemma/server.ts +1205 -0
  69. package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
  70. package/src/gemma-channel.ts +47 -0
  71. package/src/gemma-diffusion.ts +167 -0
  72. package/src/http.ts +261 -247
  73. package/src/index.ts +180 -161
  74. package/src/interfaces.ts +843 -657
  75. package/src/mcp.ts +345 -345
  76. package/src/providers/anthropic.ts +796 -762
  77. package/src/providers/google.ts +840 -620
  78. package/src/providers/index.ts +8 -8
  79. package/src/providers/ollama.ts +503 -469
  80. package/src/providers/openai.ts +587 -392
  81. package/src/router.ts +785 -780
  82. package/src/stream-decoder.ts +535 -361
  83. package/src/structured-output.ts +759 -759
  84. package/src/test-scripts/test-google-deep-research.ts +33 -0
  85. package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
  86. package/src/test-scripts/test-google-streaming.ts +1 -1
  87. package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
  88. package/src/test-scripts/test-google-thinking.ts +46 -0
  89. package/src/test-scripts/test-system-message-positions.ts +163 -163
  90. package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
  91. package/src/test-scripts/test-vllm-qwen36.ts +256 -0
  92. package/src/tests/ai-model.test.ts +1614 -1614
  93. package/src/tests/auditor.test.ts +224 -224
  94. package/src/tests/gemma-diffusion.test.ts +115 -0
  95. package/src/tests/http.test.ts +200 -200
  96. package/src/tests/interfaces.test.ts +117 -117
  97. package/src/tests/providers/anthropic.test.ts +118 -0
  98. package/src/tests/providers/google.test.ts +841 -660
  99. package/src/tests/providers/ollama.test.ts +1034 -954
  100. package/src/tests/providers/openai.test.ts +1511 -1122
  101. package/src/tests/router.test.ts +254 -254
  102. package/src/tests/stream-decoder.test.ts +263 -179
  103. package/src/tests/structured-output.test.ts +1450 -1450
  104. package/src/tests/thinking.test.ts +65 -0
  105. package/src/tests/tools.test.ts +175 -175
  106. package/src/thinking.ts +73 -0
  107. package/src/tools.ts +246 -246
  108. package/src/zod-adapter.ts +72 -72
@@ -1,361 +1,535 @@
1
- /**
2
- * Universal LLM Client v3 — Stream Decoder
3
- *
4
- * Pluggable interface for decoding raw LLM token streams into typed events.
5
- * Consumers select their strategy per-call: passthrough for raw speed,
6
- * standard-chat for structured tool calls, or interleaved-reasoning
7
- * for models that emit <think>/<progress> tags.
8
- */
9
-
10
- import type { LLMToolCall } from './interfaces.js';
11
-
12
- // ============================================================================
13
- // Decoded Event Types
14
- // ============================================================================
15
-
16
- /** Clean, typed events emitted by a stream decoder */
17
- export type DecodedEvent =
18
- | { type: 'text'; content: string }
19
- | { type: 'thinking'; content: string }
20
- | { type: 'progress'; content: string }
21
- | { type: 'tool_call'; calls: LLMToolCall[] };
22
-
23
- /** Callback invoked by the decoder as events become available */
24
- export type DecoderCallback = (event: DecodedEvent) => void;
25
-
26
- // ============================================================================
27
- // Decoder Interface
28
- // ============================================================================
29
-
30
- /**
31
- * Transform raw LLM tokens into clean typed events.
32
- *
33
- * Usage:
34
- * const decoder = createDecoder('standard-chat', callback);
35
- * for (const token of stream) decoder.push(token);
36
- * decoder.flush();
37
- * const clean = decoder.getCleanContent();
38
- */
39
- export interface StreamDecoder {
40
- /** Feed a raw token from the LLM stream */
41
- push(token: string): void;
42
- /** Signal end of stream — flush any buffered state */
43
- flush(): void;
44
- /** Get the accumulated clean text (all structural tags stripped) */
45
- getCleanContent(): string;
46
- /** Get accumulated reasoning/thinking content (if any) */
47
- getReasoning(): string | undefined;
48
- }
49
-
50
- // ============================================================================
51
- // Decoder Types
52
- // ============================================================================
53
-
54
- export type DecoderType = 'passthrough' | 'standard-chat' | 'interleaved-reasoning';
55
-
56
- // ============================================================================
57
- // Passthrough Decoder
58
- // ============================================================================
59
-
60
- /**
61
- * Bare-bones decoder for raw text completions.
62
- * No parsing, no tag awareness. All tokens → text events.
63
- */
64
- export class PassthroughDecoder implements StreamDecoder {
65
- private content = '';
66
- private readonly callback: DecoderCallback;
67
-
68
- constructor(callback: DecoderCallback) {
69
- this.callback = callback;
70
- }
71
-
72
- push(token: string): void {
73
- this.content += token;
74
- this.callback({ type: 'text', content: token });
75
- }
76
-
77
- flush(): void {
78
- // Nothing to flush all tokens emitted immediately
79
- }
80
-
81
- getCleanContent(): string {
82
- return this.content;
83
- }
84
-
85
- getReasoning(): string | undefined {
86
- return undefined;
87
- }
88
- }
89
-
90
- // ============================================================================
91
- // Standard Chat Decoder
92
- // ============================================================================
93
-
94
- /**
95
- * Decoder for standard LLM chat patterns — text streaming with native
96
- * reasoning and structured API tool calls. No text-level tag parsing.
97
- *
98
- * Streamed tokens are clean text → emitted as `text` events.
99
- * Native reasoning tokens → accepted via `pushReasoning()`.
100
- * Structured tool calls → accepted via `pushToolCalls()`.
101
- */
102
- export class StandardChatDecoder implements StreamDecoder {
103
- private content = '';
104
- private reasoning = '';
105
- private readonly callback: DecoderCallback;
106
-
107
- constructor(callback: DecoderCallback) {
108
- this.callback = callback;
109
- }
110
-
111
- push(token: string): void {
112
- this.content += token;
113
- this.callback({ type: 'text', content: token });
114
- }
115
-
116
- /** Feed native reasoning tokens from the provider */
117
- pushReasoning(content: string): void {
118
- this.reasoning += content;
119
- this.callback({ type: 'thinking', content });
120
- }
121
-
122
- /** Feed structured tool calls from the provider API response */
123
- pushToolCalls(calls: LLMToolCall[]): void {
124
- this.callback({ type: 'tool_call', calls });
125
- }
126
-
127
- flush(): void {
128
- // Nothing to flush — all events emitted as they arrive
129
- }
130
-
131
- getCleanContent(): string {
132
- return this.content;
133
- }
134
-
135
- getReasoning(): string | undefined {
136
- return this.reasoning || undefined;
137
- }
138
- }
139
-
140
- // ============================================================================
141
- // Interleaved Reasoning Decoder
142
- // ============================================================================
143
-
144
- /**
145
- * Decoder for models that emit interleaved reasoning tags in text.
146
- * Parses <think>...</think> and <progress>...</progress> tags from the
147
- * raw token stream and emits typed events for each.
148
- *
149
- * Handles streaming where tags may be split across chunks.
150
- */
151
- export class InterleavedReasoningDecoder implements StreamDecoder {
152
- private buffer = '';
153
- private content = '';
154
- private reasoning = '';
155
- private readonly callback: DecoderCallback;
156
- private inThink = false;
157
- private inProgress = false;
158
-
159
- constructor(callback: DecoderCallback) {
160
- this.callback = callback;
161
- }
162
-
163
- push(token: string): void {
164
- this.buffer += token;
165
- this.processBuffer();
166
- }
167
-
168
- flush(): void {
169
- // Emit any remaining buffer content as text
170
- if (this.buffer.length > 0) {
171
- if (this.inThink) {
172
- this.reasoning += this.buffer;
173
- this.callback({ type: 'thinking', content: this.buffer });
174
- } else if (this.inProgress) {
175
- this.callback({ type: 'progress', content: this.buffer });
176
- } else {
177
- this.content += this.buffer;
178
- this.callback({ type: 'text', content: this.buffer });
179
- }
180
- this.buffer = '';
181
- }
182
- }
183
-
184
- getCleanContent(): string {
185
- return this.content;
186
- }
187
-
188
- getReasoning(): string | undefined {
189
- return this.reasoning || undefined;
190
- }
191
-
192
- private processBuffer(): void {
193
- let safety = 0;
194
- while (this.buffer.length > 0 && safety++ < 200) {
195
- if (this.inThink) {
196
- const closeIdx = this.buffer.indexOf('</think>');
197
- if (closeIdx === -1) {
198
- // Might have partial closing tag at end
199
- if (this.buffer.endsWith('<') || this.buffer.endsWith('</') ||
200
- this.buffer.endsWith('</t') || this.buffer.endsWith('</th') ||
201
- this.buffer.endsWith('</thi') || this.buffer.endsWith('</thin') ||
202
- this.buffer.endsWith('</think')) {
203
- return; // Wait for more data
204
- }
205
- this.reasoning += this.buffer;
206
- this.callback({ type: 'thinking', content: this.buffer });
207
- this.buffer = '';
208
- return;
209
- }
210
- const thinkContent = this.buffer.slice(0, closeIdx);
211
- if (thinkContent) {
212
- this.reasoning += thinkContent;
213
- this.callback({ type: 'thinking', content: thinkContent });
214
- }
215
- this.buffer = this.buffer.slice(closeIdx + 8); // '</think>'.length
216
- this.inThink = false;
217
- continue;
218
- }
219
-
220
- if (this.inProgress) {
221
- const closeIdx = this.buffer.indexOf('</progress>');
222
- if (closeIdx === -1) {
223
- if (this.couldBePartialTag(this.buffer, '</progress>')) return;
224
- this.callback({ type: 'progress', content: this.buffer });
225
- this.buffer = '';
226
- return;
227
- }
228
- const progressContent = this.buffer.slice(0, closeIdx);
229
- if (progressContent) {
230
- this.callback({ type: 'progress', content: progressContent });
231
- }
232
- this.buffer = this.buffer.slice(closeIdx + 11); // '</progress>'.length
233
- this.inProgress = false;
234
- continue;
235
- }
236
-
237
- // Look for opening tags
238
- const thinkIdx = this.buffer.indexOf('<think>');
239
- const progressIdx = this.buffer.indexOf('<progress>');
240
-
241
- // Find earliest tag
242
- const nextTag = this.findEarliest(thinkIdx, progressIdx);
243
-
244
- if (nextTag === -1) {
245
- // No complete opening tags — check for partial tag at end
246
- const lastAngle = this.buffer.lastIndexOf('<');
247
- if (lastAngle >= 0 && lastAngle > this.buffer.length - 12) {
248
- // Potential partial tag — emit text before it, keep the rest
249
- const textBefore = this.buffer.slice(0, lastAngle);
250
- if (textBefore) {
251
- this.content += textBefore;
252
- this.callback({ type: 'text', content: textBefore });
253
- }
254
- this.buffer = this.buffer.slice(lastAngle);
255
- return;
256
- }
257
- // No partial tags — emit all as text
258
- this.content += this.buffer;
259
- this.callback({ type: 'text', content: this.buffer });
260
- this.buffer = '';
261
- return;
262
- }
263
-
264
- // Emit text before the tag
265
- const textBefore = this.buffer.slice(0, nextTag);
266
- if (textBefore) {
267
- this.content += textBefore;
268
- this.callback({ type: 'text', content: textBefore });
269
- }
270
-
271
- if (nextTag === thinkIdx) {
272
- this.buffer = this.buffer.slice(nextTag + 7); // '<think>'.length
273
- this.inThink = true;
274
- } else {
275
- this.buffer = this.buffer.slice(nextTag + 10); // '<progress>'.length
276
- this.inProgress = true;
277
- }
278
- }
279
- }
280
-
281
- private findEarliest(a: number, b: number): number {
282
- if (a === -1) return b;
283
- if (b === -1) return a;
284
- return Math.min(a, b);
285
- }
286
-
287
- private couldBePartialTag(buffer: string, tag: string): boolean {
288
- for (let i = 1; i < tag.length; i++) {
289
- if (buffer.endsWith(tag.slice(0, i))) return true;
290
- }
291
- return false;
292
- }
293
- }
294
-
295
- // ============================================================================
296
- // Pluggable Decoder Registry
297
- // ============================================================================
298
-
299
- export interface DecoderOptions {
300
- /** Known tool names for text-based tool call recovery */
301
- knownToolNames?: Set<string>;
302
- }
303
-
304
- /**
305
- * Factory function that creates a StreamDecoder instance.
306
- * External code registers these via `registerDecoder()`.
307
- */
308
- export type DecoderFactory = (callback: DecoderCallback, options?: DecoderOptions) => StreamDecoder;
309
-
310
- /** Internal registry of decoder factories, keyed by decoder type name */
311
- const decoderRegistry = new Map<string, DecoderFactory>();
312
-
313
- /**
314
- * Register a custom stream decoder type.
315
- * Once registered, it can be used via `createDecoder(name, ...)` or
316
- * by passing `decoderType: name` in ChatOptions.
317
- *
318
- * @example
319
- * ```typescript
320
- * import { registerDecoder } from 'universal-llm-client';
321
- *
322
- * registerDecoder('my-decoder', (callback, options) => {
323
- * return new MyCustomDecoder(callback, options);
324
- * });
325
- * ```
326
- */
327
- export function registerDecoder(type: string, factory: DecoderFactory): void {
328
- decoderRegistry.set(type, factory);
329
- }
330
-
331
- /**
332
- * Get all registered decoder type names.
333
- */
334
- export function getRegisteredDecoders(): string[] {
335
- return Array.from(decoderRegistry.keys());
336
- }
337
-
338
- // Pre-register built-in decoders
339
- registerDecoder('passthrough', (cb) => new PassthroughDecoder(cb));
340
- registerDecoder('standard-chat', (cb) => new StandardChatDecoder(cb));
341
- registerDecoder('interleaved-reasoning', (cb) => new InterleavedReasoningDecoder(cb));
342
-
343
- /**
344
- * Create a stream decoder by type name.
345
- * Looks up the decoder in the registry (built-in + custom).
346
- *
347
- * @throws Error if the decoder type is not registered
348
- */
349
- export function createDecoder(
350
- type: DecoderType | string,
351
- callback: DecoderCallback,
352
- options?: DecoderOptions,
353
- ): StreamDecoder {
354
- const factory = decoderRegistry.get(type);
355
- if (!factory) {
356
- const available = Array.from(decoderRegistry.keys()).join(', ');
357
- throw new Error(`Unknown decoder type: "${type}". Available: ${available}`);
358
- }
359
- return factory(callback, options);
360
- }
361
-
1
+ /**
2
+ * Universal LLM Client v3 — Stream Decoder
3
+ *
4
+ * Pluggable interface for decoding raw LLM token streams into typed events.
5
+ * Consumers select their strategy per-call: passthrough for raw speed,
6
+ * standard-chat for structured tool calls, or interleaved-reasoning
7
+ * for models that emit <think>/<progress> tags.
8
+ */
9
+
10
+ import type { LLMToolCall } from './interfaces.js';
11
+ import { GEMMA_THOUGHT_OPENERS, normalizeGemmaThought } from './gemma-channel.js';
12
+
13
+ // ============================================================================
14
+ // Decoded Event Types
15
+ // ============================================================================
16
+
17
+ /** Clean, typed events emitted by a stream decoder */
18
+ export type DecodedEvent =
19
+ | { type: 'text'; content: string }
20
+ | { type: 'thinking'; content: string }
21
+ | { type: 'progress'; content: string }
22
+ | { type: 'tool_call'; calls: LLMToolCall[] };
23
+
24
+ /** Callback invoked by the decoder as events become available */
25
+ export type DecoderCallback = (event: DecodedEvent) => void;
26
+
27
+ // ============================================================================
28
+ // Decoder Interface
29
+ // ============================================================================
30
+
31
+ /**
32
+ * Transform raw LLM tokens into clean typed events.
33
+ *
34
+ * Usage:
35
+ * const decoder = createDecoder('standard-chat', callback);
36
+ * for (const token of stream) decoder.push(token);
37
+ * decoder.flush();
38
+ * const clean = decoder.getCleanContent();
39
+ */
40
+ export interface StreamDecoder {
41
+ /** Feed a raw token from the LLM stream */
42
+ push(token: string): void;
43
+ /** Signal end of stream — flush any buffered state */
44
+ flush(): void;
45
+ /** Get the accumulated clean text (all structural tags stripped) */
46
+ getCleanContent(): string;
47
+ /** Get accumulated reasoning/thinking content (if any) */
48
+ getReasoning(): string | undefined;
49
+ }
50
+
51
+ // ============================================================================
52
+ // Decoder Types
53
+ // ============================================================================
54
+
55
+ export type DecoderType = 'passthrough' | 'standard-chat' | 'interleaved-reasoning';
56
+
57
+ // ============================================================================
58
+ // Passthrough Decoder
59
+ // ============================================================================
60
+
61
+ /**
62
+ * Bare-bones decoder for raw text completions.
63
+ * No parsing, no tag awareness. All tokens → text events.
64
+ */
65
+ export class PassthroughDecoder implements StreamDecoder {
66
+ private content = '';
67
+ private readonly callback: DecoderCallback;
68
+
69
+ constructor(callback: DecoderCallback) {
70
+ this.callback = callback;
71
+ }
72
+
73
+ push(token: string): void {
74
+ this.content += token;
75
+ this.callback({ type: 'text', content: token });
76
+ }
77
+
78
+ flush(): void {
79
+ // Nothing to flush — all tokens emitted immediately
80
+ }
81
+
82
+ getCleanContent(): string {
83
+ return this.content;
84
+ }
85
+
86
+ getReasoning(): string | undefined {
87
+ return undefined;
88
+ }
89
+ }
90
+
91
+ // ============================================================================
92
+ // Standard Chat Decoder
93
+ // ============================================================================
94
+
95
+ /**
96
+ * Decoder for standard LLM chat patterns text streaming with native
97
+ * reasoning and structured API tool calls. No text-level tag parsing.
98
+ *
99
+ * Streamed tokens are clean text emitted as `text` events.
100
+ * Native reasoning tokens → accepted via `pushReasoning()`.
101
+ * Structured tool calls → accepted via `pushToolCalls()`.
102
+ */
103
+ export class StandardChatDecoder implements StreamDecoder {
104
+ private content = '';
105
+ private reasoning = '';
106
+ private readonly callback: DecoderCallback;
107
+ private tagBuffer = '';
108
+ private inProgressTag = false;
109
+ private progressBody = '';
110
+ private inGemmaThought = false;
111
+ private gemmaThoughtBody = '';
112
+ private gemmaThoughtClose = '';
113
+ private inToolCallTag = false;
114
+ private toolCallBody = '';
115
+ private toolCallClose = '';
116
+
117
+ constructor(callback: DecoderCallback) {
118
+ this.callback = callback;
119
+ }
120
+
121
+ push(token: string): void {
122
+ let pos = 0;
123
+
124
+ while (pos < token.length) {
125
+ if (this.inGemmaThought) {
126
+ this.gemmaThoughtBody += token.slice(pos);
127
+ const closeIdx = this.gemmaThoughtBody.indexOf(this.gemmaThoughtClose);
128
+ if (closeIdx !== -1) {
129
+ const body = this.gemmaThoughtBody.slice(0, closeIdx);
130
+ const remainder = this.gemmaThoughtBody.slice(closeIdx + this.gemmaThoughtClose.length);
131
+ this.emitReasoning(normalizeGemmaThought(body));
132
+ this.inGemmaThought = false;
133
+ this.gemmaThoughtBody = '';
134
+ this.gemmaThoughtClose = '';
135
+ if (remainder) this.push(remainder);
136
+ }
137
+ return;
138
+ }
139
+
140
+ if (this.inToolCallTag) {
141
+ this.toolCallBody += token.slice(pos);
142
+ const closeIdx = this.toolCallBody.indexOf(this.toolCallClose);
143
+ if (closeIdx !== -1) {
144
+ const body = this.toolCallBody.slice(0, closeIdx);
145
+ const remainder = this.toolCallBody.slice(closeIdx + this.toolCallClose.length);
146
+
147
+ if (body.trim()) {
148
+ try {
149
+ const normalizedJson = body.trim()
150
+ .replace(/'/g, '"')
151
+ .replace(/True/g, 'true')
152
+ .replace(/False/g, 'false')
153
+ .replace(/None/g, 'null');
154
+ const parsed = JSON.parse(normalizedJson);
155
+ const calls = Array.isArray(parsed) ? parsed : [parsed];
156
+ const validatedCalls: LLMToolCall[] = [];
157
+ for (const call of calls) {
158
+ if (call && typeof call === 'object' && call.name) {
159
+ validatedCalls.push({
160
+ id: call.id || `recovered_${Date.now()}_${Math.random().toString(36).slice(2)}`,
161
+ type: 'function',
162
+ function: {
163
+ name: call.name,
164
+ arguments: typeof call.arguments === 'string'
165
+ ? call.arguments
166
+ : JSON.stringify(call.arguments ?? {}),
167
+ }
168
+ });
169
+ }
170
+ }
171
+ if (validatedCalls.length > 0) {
172
+ this.callback({ type: 'tool_call', calls: validatedCalls });
173
+ }
174
+ } catch {
175
+ // ignore
176
+ }
177
+ }
178
+
179
+ this.inToolCallTag = false;
180
+ this.toolCallBody = '';
181
+ this.toolCallClose = '';
182
+ if (remainder) this.push(remainder);
183
+ }
184
+ return;
185
+ }
186
+
187
+ if (this.inProgressTag) {
188
+ this.progressBody += token.slice(pos);
189
+ const closeIdx = this.progressBody.indexOf('</progress>');
190
+ if (closeIdx !== -1) {
191
+ const body = this.progressBody.slice(0, closeIdx);
192
+ const remainder = this.progressBody.slice(closeIdx + '</progress>'.length);
193
+ if (body) {
194
+ this.callback({ type: 'progress', content: body });
195
+ }
196
+ this.inProgressTag = false;
197
+ this.progressBody = '';
198
+ if (remainder) this.push(remainder);
199
+ }
200
+ return;
201
+ }
202
+
203
+ if (this.tagBuffer.length > 0) {
204
+ const ch = token[pos]!;
205
+ pos++;
206
+ this.tagBuffer += ch;
207
+ if (this.matchesStructuralOpenerPrefix(this.tagBuffer)) {
208
+ if (this.tagBuffer === '<progress>') {
209
+ this.inProgressTag = true;
210
+ this.progressBody = '';
211
+ this.tagBuffer = '';
212
+ } else if (this.tagBuffer === '<tool_call|>') {
213
+ this.inToolCallTag = true;
214
+ this.toolCallBody = '';
215
+ this.toolCallClose = '<|tool_response>';
216
+ this.tagBuffer = '';
217
+ } else if (this.tagBuffer === '<|tool_response>') {
218
+ this.tagBuffer = '';
219
+ } else if (this.tagBuffer === '<|channel>thought') {
220
+ this.inGemmaThought = true;
221
+ this.gemmaThoughtBody = '';
222
+ this.gemmaThoughtClose = '<channel|>';
223
+ this.tagBuffer = '';
224
+ } else if (this.tagBuffer === '<|thought') {
225
+ this.inGemmaThought = true;
226
+ this.gemmaThoughtBody = '';
227
+ this.gemmaThoughtClose = '|>';
228
+ this.tagBuffer = '';
229
+ }
230
+ } else {
231
+ this.emitText(this.tagBuffer);
232
+ this.tagBuffer = '';
233
+ }
234
+ continue;
235
+ }
236
+
237
+ const ltIdx = token.indexOf('<', pos);
238
+ if (ltIdx === -1) {
239
+ this.emitText(token.slice(pos));
240
+ return;
241
+ }
242
+
243
+ if (ltIdx > pos) {
244
+ this.emitText(token.slice(pos, ltIdx));
245
+ }
246
+ this.tagBuffer = '<';
247
+ pos = ltIdx + 1;
248
+ }
249
+ }
250
+
251
+ private emitText(text: string): void {
252
+ if (!text) return;
253
+ this.content += text;
254
+ this.callback({ type: 'text', content: text });
255
+ }
256
+
257
+ private emitReasoning(content: string): void {
258
+ if (!content) return;
259
+ this.reasoning += content;
260
+ this.callback({ type: 'thinking', content });
261
+ }
262
+
263
+ private matchesStructuralOpenerPrefix(candidate: string): boolean {
264
+ if ('<progress>'.startsWith(candidate)) return true;
265
+ if ('<tool_call|>'.startsWith(candidate)) return true;
266
+ if ('<|tool_response>'.startsWith(candidate)) return true;
267
+ return GEMMA_THOUGHT_OPENERS.some(opener => opener.startsWith(candidate));
268
+ }
269
+
270
+ /** Feed native reasoning tokens from the provider */
271
+ pushReasoning(content: string): void {
272
+ this.emitReasoning(content);
273
+ }
274
+
275
+ /** Feed structured tool calls from the provider API response */
276
+ pushToolCalls(calls: LLMToolCall[]): void {
277
+ this.callback({ type: 'tool_call', calls });
278
+ }
279
+
280
+ flush(): void {
281
+ if (this.tagBuffer) {
282
+ this.emitText(this.tagBuffer);
283
+ this.tagBuffer = '';
284
+ }
285
+ if (this.inGemmaThought) {
286
+ this.emitReasoning(normalizeGemmaThought(this.gemmaThoughtBody));
287
+ this.inGemmaThought = false;
288
+ this.gemmaThoughtBody = '';
289
+ this.gemmaThoughtClose = '';
290
+ }
291
+ if (this.inProgressTag) {
292
+ if (this.progressBody) {
293
+ this.emitText('<progress>' + this.progressBody);
294
+ }
295
+ this.inProgressTag = false;
296
+ this.progressBody = '';
297
+ }
298
+ if (this.inToolCallTag) {
299
+ this.inToolCallTag = false;
300
+ this.toolCallBody = '';
301
+ this.toolCallClose = '';
302
+ }
303
+ }
304
+
305
+ getCleanContent(): string {
306
+ return this.content;
307
+ }
308
+
309
+ getReasoning(): string | undefined {
310
+ return this.reasoning || undefined;
311
+ }
312
+ }
313
+
314
+ // ============================================================================
315
+ // Interleaved Reasoning Decoder
316
+ // ============================================================================
317
+
318
+ /**
319
+ * Decoder for models that emit interleaved reasoning tags in text.
320
+ * Parses <think>...</think> and <progress>...</progress> tags from the
321
+ * raw token stream and emits typed events for each.
322
+ *
323
+ * Handles streaming where tags may be split across chunks.
324
+ */
325
+ export class InterleavedReasoningDecoder implements StreamDecoder {
326
+ private buffer = '';
327
+ private content = '';
328
+ private reasoning = '';
329
+ private readonly callback: DecoderCallback;
330
+ private inThink = false;
331
+ private inProgress = false;
332
+
333
+ constructor(callback: DecoderCallback) {
334
+ this.callback = callback;
335
+ }
336
+
337
+ push(token: string): void {
338
+ this.buffer += token;
339
+ this.processBuffer();
340
+ }
341
+
342
+ flush(): void {
343
+ // Emit any remaining buffer content as text
344
+ if (this.buffer.length > 0) {
345
+ if (this.inThink) {
346
+ this.reasoning += this.buffer;
347
+ this.callback({ type: 'thinking', content: this.buffer });
348
+ } else if (this.inProgress) {
349
+ this.callback({ type: 'progress', content: this.buffer });
350
+ } else {
351
+ this.content += this.buffer;
352
+ this.callback({ type: 'text', content: this.buffer });
353
+ }
354
+ this.buffer = '';
355
+ }
356
+ }
357
+
358
+ getCleanContent(): string {
359
+ return this.content;
360
+ }
361
+
362
+ getReasoning(): string | undefined {
363
+ return this.reasoning || undefined;
364
+ }
365
+
366
+ private processBuffer(): void {
367
+ let safety = 0;
368
+ while (this.buffer.length > 0 && safety++ < 200) {
369
+ if (this.inThink) {
370
+ const closeIdx = this.buffer.indexOf('</think>');
371
+ if (closeIdx === -1) {
372
+ // Might have partial closing tag at end
373
+ if (this.buffer.endsWith('<') || this.buffer.endsWith('</') ||
374
+ this.buffer.endsWith('</t') || this.buffer.endsWith('</th') ||
375
+ this.buffer.endsWith('</thi') || this.buffer.endsWith('</thin') ||
376
+ this.buffer.endsWith('</think')) {
377
+ return; // Wait for more data
378
+ }
379
+ this.reasoning += this.buffer;
380
+ this.callback({ type: 'thinking', content: this.buffer });
381
+ this.buffer = '';
382
+ return;
383
+ }
384
+ const thinkContent = this.buffer.slice(0, closeIdx);
385
+ if (thinkContent) {
386
+ this.reasoning += thinkContent;
387
+ this.callback({ type: 'thinking', content: thinkContent });
388
+ }
389
+ this.buffer = this.buffer.slice(closeIdx + 8); // '</think>'.length
390
+ this.inThink = false;
391
+ continue;
392
+ }
393
+
394
+ if (this.inProgress) {
395
+ const closeIdx = this.buffer.indexOf('</progress>');
396
+ if (closeIdx === -1) {
397
+ if (this.couldBePartialTag(this.buffer, '</progress>')) return;
398
+ this.callback({ type: 'progress', content: this.buffer });
399
+ this.buffer = '';
400
+ return;
401
+ }
402
+ const progressContent = this.buffer.slice(0, closeIdx);
403
+ if (progressContent) {
404
+ this.callback({ type: 'progress', content: progressContent });
405
+ }
406
+ this.buffer = this.buffer.slice(closeIdx + 11); // '</progress>'.length
407
+ this.inProgress = false;
408
+ continue;
409
+ }
410
+
411
+ // Look for opening tags
412
+ const thinkIdx = this.buffer.indexOf('<think>');
413
+ const progressIdx = this.buffer.indexOf('<progress>');
414
+
415
+ // Find earliest tag
416
+ const nextTag = this.findEarliest(thinkIdx, progressIdx);
417
+
418
+ if (nextTag === -1) {
419
+ // No complete opening tags — check for partial tag at end
420
+ const lastAngle = this.buffer.lastIndexOf('<');
421
+ if (lastAngle >= 0 && lastAngle > this.buffer.length - 12) {
422
+ // Potential partial tag — emit text before it, keep the rest
423
+ const textBefore = this.buffer.slice(0, lastAngle);
424
+ if (textBefore) {
425
+ this.content += textBefore;
426
+ this.callback({ type: 'text', content: textBefore });
427
+ }
428
+ this.buffer = this.buffer.slice(lastAngle);
429
+ return;
430
+ }
431
+ // No partial tags — emit all as text
432
+ this.content += this.buffer;
433
+ this.callback({ type: 'text', content: this.buffer });
434
+ this.buffer = '';
435
+ return;
436
+ }
437
+
438
+ // Emit text before the tag
439
+ const textBefore = this.buffer.slice(0, nextTag);
440
+ if (textBefore) {
441
+ this.content += textBefore;
442
+ this.callback({ type: 'text', content: textBefore });
443
+ }
444
+
445
+ if (nextTag === thinkIdx) {
446
+ this.buffer = this.buffer.slice(nextTag + 7); // '<think>'.length
447
+ this.inThink = true;
448
+ } else {
449
+ this.buffer = this.buffer.slice(nextTag + 10); // '<progress>'.length
450
+ this.inProgress = true;
451
+ }
452
+ }
453
+ }
454
+
455
+ private findEarliest(a: number, b: number): number {
456
+ if (a === -1) return b;
457
+ if (b === -1) return a;
458
+ return Math.min(a, b);
459
+ }
460
+
461
+ private couldBePartialTag(buffer: string, tag: string): boolean {
462
+ for (let i = 1; i < tag.length; i++) {
463
+ if (buffer.endsWith(tag.slice(0, i))) return true;
464
+ }
465
+ return false;
466
+ }
467
+ }
468
+
469
+ // ============================================================================
470
+ // Pluggable Decoder Registry
471
+ // ============================================================================
472
+
473
+ export interface DecoderOptions {
474
+ /** Known tool names for text-based tool call recovery */
475
+ knownToolNames?: Set<string>;
476
+ }
477
+
478
+ /**
479
+ * Factory function that creates a StreamDecoder instance.
480
+ * External code registers these via `registerDecoder()`.
481
+ */
482
+ export type DecoderFactory = (callback: DecoderCallback, options?: DecoderOptions) => StreamDecoder;
483
+
484
+ /** Internal registry of decoder factories, keyed by decoder type name */
485
+ const decoderRegistry = new Map<string, DecoderFactory>();
486
+
487
+ /**
488
+ * Register a custom stream decoder type.
489
+ * Once registered, it can be used via `createDecoder(name, ...)` or
490
+ * by passing `decoderType: name` in ChatOptions.
491
+ *
492
+ * @example
493
+ * ```typescript
494
+ * import { registerDecoder } from 'universal-llm-client';
495
+ *
496
+ * registerDecoder('my-decoder', (callback, options) => {
497
+ * return new MyCustomDecoder(callback, options);
498
+ * });
499
+ * ```
500
+ */
501
+ export function registerDecoder(type: string, factory: DecoderFactory): void {
502
+ decoderRegistry.set(type, factory);
503
+ }
504
+
505
+ /**
506
+ * Get all registered decoder type names.
507
+ */
508
+ export function getRegisteredDecoders(): string[] {
509
+ return Array.from(decoderRegistry.keys());
510
+ }
511
+
512
+ // Pre-register built-in decoders
513
+ registerDecoder('passthrough', (cb) => new PassthroughDecoder(cb));
514
+ registerDecoder('standard-chat', (cb) => new StandardChatDecoder(cb));
515
+ registerDecoder('interleaved-reasoning', (cb) => new InterleavedReasoningDecoder(cb));
516
+
517
+ /**
518
+ * Create a stream decoder by type name.
519
+ * Looks up the decoder in the registry (built-in + custom).
520
+ *
521
+ * @throws Error if the decoder type is not registered
522
+ */
523
+ export function createDecoder(
524
+ type: DecoderType | string,
525
+ callback: DecoderCallback,
526
+ options?: DecoderOptions,
527
+ ): StreamDecoder {
528
+ const factory = decoderRegistry.get(type);
529
+ if (!factory) {
530
+ const available = Array.from(decoderRegistry.keys()).join(', ');
531
+ throw new Error(`Unknown decoder type: "${type}". Available: ${available}`);
532
+ }
533
+ return factory(callback, options);
534
+ }
535
+