npm - universal-llm-client - Versions diffs - 4.2.0 → 4.5.0 - Mend

universal-llm-client 4.2.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/CHANGELOG.md +142 -103
package/LICENSE +21 -21
package/README.md +640 -591
package/dist/ai-model.d.ts +12 -1
package/dist/ai-model.d.ts.map +1 -1
package/dist/ai-model.js +36 -1
package/dist/ai-model.js.map +1 -1
package/dist/gemma-channel.d.ts +14 -0
package/dist/gemma-channel.d.ts.map +1 -0
package/dist/gemma-channel.js +38 -0
package/dist/gemma-channel.js.map +1 -0
package/dist/gemma-diffusion.d.ts +49 -0
package/dist/gemma-diffusion.d.ts.map +1 -0
package/dist/gemma-diffusion.js +147 -0
package/dist/gemma-diffusion.js.map +1 -0
package/dist/http.d.ts +4 -0
package/dist/http.d.ts.map +1 -1
package/dist/http.js +14 -1
package/dist/http.js.map +1 -1
package/dist/index.d.ts +2 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -0
package/dist/index.js.map +1 -1
package/dist/interfaces.d.ts +183 -7
package/dist/interfaces.d.ts.map +1 -1
package/dist/interfaces.js.map +1 -1
package/dist/providers/anthropic.d.ts.map +1 -1
package/dist/providers/anthropic.js +28 -3
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/google.d.ts +22 -1
package/dist/providers/google.d.ts.map +1 -1
package/dist/providers/google.js +225 -13
package/dist/providers/google.js.map +1 -1
package/dist/providers/ollama.d.ts +2 -0
package/dist/providers/ollama.d.ts.map +1 -1
package/dist/providers/ollama.js +59 -30
package/dist/providers/ollama.js.map +1 -1
package/dist/providers/openai.d.ts +14 -0
package/dist/providers/openai.d.ts.map +1 -1
package/dist/providers/openai.js +200 -22
package/dist/providers/openai.js.map +1 -1
package/dist/router.d.ts +2 -0
package/dist/router.d.ts.map +1 -1
package/dist/router.js +4 -0
package/dist/router.js.map +1 -1
package/dist/stream-decoder.d.ts +12 -0
package/dist/stream-decoder.d.ts.map +1 -1
package/dist/stream-decoder.js +182 -5
package/dist/stream-decoder.js.map +1 -1
package/dist/thinking.d.ts +36 -0
package/dist/thinking.d.ts.map +1 -0
package/dist/thinking.js +52 -0
package/dist/thinking.js.map +1 -0
package/package.json +118 -116
package/src/ai-model.ts +400 -350
package/src/auditor.ts +213 -213
package/src/client.ts +402 -402
package/src/debug/debug-google-streaming.ts +1 -1
package/src/demos/basic/universal-llm-examples.ts +3 -3
package/src/demos/diffusion-gemma/.env +29 -0
package/src/demos/diffusion-gemma/.env.example +27 -0
package/src/demos/diffusion-gemma/CLAUDE.md +95 -0
package/src/demos/diffusion-gemma/README.md +59 -0
package/src/demos/diffusion-gemma/canvas.ts +1606 -0
package/src/demos/diffusion-gemma/docker-compose.yml +29 -0
package/src/demos/diffusion-gemma/probe-stream.ts +51 -0
package/src/demos/diffusion-gemma/probe-tools.ts +55 -0
package/src/demos/diffusion-gemma/server.ts +1205 -0
package/src/demos/diffusion-gemma/start-vllm.sh +98 -0
package/src/gemma-channel.ts +47 -0
package/src/gemma-diffusion.ts +167 -0
package/src/http.ts +261 -247
package/src/index.ts +180 -161
package/src/interfaces.ts +843 -657
package/src/mcp.ts +345 -345
package/src/providers/anthropic.ts +796 -762
package/src/providers/google.ts +840 -620
package/src/providers/index.ts +8 -8
package/src/providers/ollama.ts +503 -469
package/src/providers/openai.ts +587 -392
package/src/router.ts +785 -780
package/src/stream-decoder.ts +535 -361
package/src/structured-output.ts +759 -759
package/src/test-scripts/test-google-deep-research.ts +33 -0
package/src/test-scripts/test-google-streaming-enhanced.ts +147 -147
package/src/test-scripts/test-google-streaming.ts +1 -1
package/src/test-scripts/test-google-system-prompt-comprehensive.ts +189 -189
package/src/test-scripts/test-google-thinking.ts +46 -0
package/src/test-scripts/test-system-message-positions.ts +163 -163
package/src/test-scripts/test-system-prompt-improvement-demo.ts +83 -83
package/src/test-scripts/test-vllm-qwen36.ts +256 -0
package/src/tests/ai-model.test.ts +1614 -1614
package/src/tests/auditor.test.ts +224 -224
package/src/tests/gemma-diffusion.test.ts +115 -0
package/src/tests/http.test.ts +200 -200
package/src/tests/interfaces.test.ts +117 -117
package/src/tests/providers/anthropic.test.ts +118 -0
package/src/tests/providers/google.test.ts +841 -660
package/src/tests/providers/ollama.test.ts +1034 -954
package/src/tests/providers/openai.test.ts +1511 -1122
package/src/tests/router.test.ts +254 -254
package/src/tests/stream-decoder.test.ts +263 -179
package/src/tests/structured-output.test.ts +1450 -1450
package/src/tests/thinking.test.ts +65 -0
package/src/tests/tools.test.ts +175 -175
package/src/thinking.ts +73 -0
package/src/tools.ts +246 -246
package/src/zod-adapter.ts +72 -72

package/src/stream-decoder.ts CHANGED Viewed

@@ -1,361 +1,535 @@
-/**
- * Universal LLM Client v3 — Stream Decoder
- *
- * Pluggable interface for decoding raw LLM token streams into typed events.
- * Consumers select their strategy per-call: passthrough for raw speed,
- * standard-chat for structured tool calls, or interleaved-reasoning
- * for models that emit <think>/<progress> tags.
- */
-import type { LLMToolCall } from './interfaces.js';
-// ============================================================================
-// Decoded Event Types
-// ============================================================================
-/** Clean, typed events emitted by a stream decoder */
-export type DecodedEvent =
-    | { type: 'text'; content: string }
-    | { type: 'thinking'; content: string }
-    | { type: 'progress'; content: string }
-    | { type: 'tool_call'; calls: LLMToolCall[] };
-/** Callback invoked by the decoder as events become available */
-export type DecoderCallback = (event: DecodedEvent) => void;
-// ============================================================================
-// Decoder Interface
-// ============================================================================
-/**
- * Transform raw LLM tokens into clean typed events.
- *
- * Usage:
- *   const decoder = createDecoder('standard-chat', callback);
- *   for (const token of stream) decoder.push(token);
- *   decoder.flush();
- *   const clean = decoder.getCleanContent();
- */
-export interface StreamDecoder {
-    /** Feed a raw token from the LLM stream */
-    push(token: string): void;
-    /** Signal end of stream — flush any buffered state */
-    flush(): void;
-    /** Get the accumulated clean text (all structural tags stripped) */
-    getCleanContent(): string;
-    /** Get accumulated reasoning/thinking content (if any) */
-    getReasoning(): string | undefined;
-}
-// ============================================================================
-// Decoder Types
-// ============================================================================
-export type DecoderType = 'passthrough' | 'standard-chat' | 'interleaved-reasoning';
-// ============================================================================
-// Passthrough Decoder
-// ============================================================================
-/**
- * Bare-bones decoder for raw text completions.
- * No parsing, no tag awareness. All tokens → text events.
- */
-export class PassthroughDecoder implements StreamDecoder {
-    private content = '';
-    private readonly callback: DecoderCallback;
-    constructor(callback: DecoderCallback) {
-        this.callback = callback;
-    }
-    push(token: string): void {
-        this.content += token;
-        this.callback({ type: 'text', content: token });
-    }
-    flush(): void {
-        // Nothing to flush — all tokens emitted immediately
-    }
-    getCleanContent(): string {
-        return this.content;
-    }
-    getReasoning(): string | undefined {
-        return undefined;
-    }
-}
-// ============================================================================
-// Standard Chat Decoder
-// ============================================================================
-/**
- * Decoder for standard LLM chat patterns — text streaming with native
- * reasoning and structured API tool calls. No text-level tag parsing.
- *
- * Streamed tokens are clean text → emitted as `text` events.
- * Native reasoning tokens → accepted via `pushReasoning()`.
- * Structured tool calls → accepted via `pushToolCalls()`.
- */
-export class StandardChatDecoder implements StreamDecoder {
-    private content = '';
-    private reasoning = '';
-    private readonly callback: DecoderCallback;
-    constructor(callback: DecoderCallback) {
-        this.callback = callback;
-    }
-    push(token: string): void {
-        this.content += token;
-        this.callback({ type: 'text', content: token });
-    }
-    /** Feed native reasoning tokens from the provider */
-    pushReasoning(content: string): void {
-        this.reasoning += content;
-        this.callback({ type: 'thinking', content });
-    }
-    /** Feed structured tool calls from the provider API response */
-    pushToolCalls(calls: LLMToolCall[]): void {
-        this.callback({ type: 'tool_call', calls });
-    }
-    flush(): void {
-        // Nothing to flush — all events emitted as they arrive
-    }
-    getCleanContent(): string {
-        return this.content;
-    }
-    getReasoning(): string | undefined {
-        return this.reasoning || undefined;
-    }
-}
-// ============================================================================
-// Interleaved Reasoning Decoder
-// ============================================================================
-/**
- * Decoder for models that emit interleaved reasoning tags in text.
- * Parses <think>...</think> and <progress>...</progress> tags from the
- * raw token stream and emits typed events for each.
- *
- * Handles streaming where tags may be split across chunks.
- */
-export class InterleavedReasoningDecoder implements StreamDecoder {
-    private buffer = '';
-    private content = '';
-    private reasoning = '';
-    private readonly callback: DecoderCallback;
-    private inThink = false;
-    private inProgress = false;
-    constructor(callback: DecoderCallback) {
-        this.callback = callback;
-    }
-    push(token: string): void {
-        this.buffer += token;
-        this.processBuffer();
-    }
-    flush(): void {
-        // Emit any remaining buffer content as text
-        if (this.buffer.length > 0) {
-            if (this.inThink) {
-                this.reasoning += this.buffer;
-                this.callback({ type: 'thinking', content: this.buffer });
-            } else if (this.inProgress) {
-                this.callback({ type: 'progress', content: this.buffer });
-            } else {
-                this.content += this.buffer;
-                this.callback({ type: 'text', content: this.buffer });
-            }
-            this.buffer = '';
-        }
-    }
-    getCleanContent(): string {
-        return this.content;
-    }
-    getReasoning(): string | undefined {
-        return this.reasoning || undefined;
-    }
-    private processBuffer(): void {
-        let safety = 0;
-        while (this.buffer.length > 0 && safety++ < 200) {
-            if (this.inThink) {
-                const closeIdx = this.buffer.indexOf('</think>');
-                if (closeIdx === -1) {
-                    // Might have partial closing tag at end
-                    if (this.buffer.endsWith('<') || this.buffer.endsWith('</') ||
-                        this.buffer.endsWith('</t') || this.buffer.endsWith('</th') ||
-                        this.buffer.endsWith('</thi') || this.buffer.endsWith('</thin') ||
-                        this.buffer.endsWith('</think')) {
-                        return; // Wait for more data
-                    }
-                    this.reasoning += this.buffer;
-                    this.callback({ type: 'thinking', content: this.buffer });
-                    this.buffer = '';
-                    return;
-                }
-                const thinkContent = this.buffer.slice(0, closeIdx);
-                if (thinkContent) {
-                    this.reasoning += thinkContent;
-                    this.callback({ type: 'thinking', content: thinkContent });
-                }
-                this.buffer = this.buffer.slice(closeIdx + 8); // '</think>'.length
-                this.inThink = false;
-                continue;
-            }
-            if (this.inProgress) {
-                const closeIdx = this.buffer.indexOf('</progress>');
-                if (closeIdx === -1) {
-                    if (this.couldBePartialTag(this.buffer, '</progress>')) return;
-                    this.callback({ type: 'progress', content: this.buffer });
-                    this.buffer = '';
-                    return;
-                }
-                const progressContent = this.buffer.slice(0, closeIdx);
-                if (progressContent) {
-                    this.callback({ type: 'progress', content: progressContent });
-                }
-                this.buffer = this.buffer.slice(closeIdx + 11); // '</progress>'.length
-                this.inProgress = false;
-                continue;
-            }
-            // Look for opening tags
-            const thinkIdx = this.buffer.indexOf('<think>');
-            const progressIdx = this.buffer.indexOf('<progress>');
-            // Find earliest tag
-            const nextTag = this.findEarliest(thinkIdx, progressIdx);
-            if (nextTag === -1) {
-                // No complete opening tags — check for partial tag at end
-                const lastAngle = this.buffer.lastIndexOf('<');
-                if (lastAngle >= 0 && lastAngle > this.buffer.length - 12) {
-                    // Potential partial tag — emit text before it, keep the rest
-                    const textBefore = this.buffer.slice(0, lastAngle);
-                    if (textBefore) {
-                        this.content += textBefore;
-                        this.callback({ type: 'text', content: textBefore });
-                    }
-                    this.buffer = this.buffer.slice(lastAngle);
-                    return;
-                }
-                // No partial tags — emit all as text
-                this.content += this.buffer;
-                this.callback({ type: 'text', content: this.buffer });
-                this.buffer = '';
-                return;
-            }
-            // Emit text before the tag
-            const textBefore = this.buffer.slice(0, nextTag);
-            if (textBefore) {
-                this.content += textBefore;
-                this.callback({ type: 'text', content: textBefore });
-            }
-            if (nextTag === thinkIdx) {
-                this.buffer = this.buffer.slice(nextTag + 7); // '<think>'.length
-                this.inThink = true;
-            } else {
-                this.buffer = this.buffer.slice(nextTag + 10); // '<progress>'.length
-                this.inProgress = true;
-            }
-        }
-    }
-    private findEarliest(a: number, b: number): number {
-        if (a === -1) return b;
-        if (b === -1) return a;
-        return Math.min(a, b);
-    }
-    private couldBePartialTag(buffer: string, tag: string): boolean {
-        for (let i = 1; i < tag.length; i++) {
-            if (buffer.endsWith(tag.slice(0, i))) return true;
-        }
-        return false;
-    }
-}
-// ============================================================================
-// Pluggable Decoder Registry
-// ============================================================================
-export interface DecoderOptions {
-    /** Known tool names for text-based tool call recovery */
-    knownToolNames?: Set<string>;
-}
-/**
- * Factory function that creates a StreamDecoder instance.
- * External code registers these via `registerDecoder()`.
- */
-export type DecoderFactory = (callback: DecoderCallback, options?: DecoderOptions) => StreamDecoder;
-/** Internal registry of decoder factories, keyed by decoder type name */
-const decoderRegistry = new Map<string, DecoderFactory>();
-/**
- * Register a custom stream decoder type.
- * Once registered, it can be used via `createDecoder(name, ...)` or
- * by passing `decoderType: name` in ChatOptions.
- *
- * @example
- * ```typescript
- * import { registerDecoder } from 'universal-llm-client';
- *
- * registerDecoder('my-decoder', (callback, options) => {
- *   return new MyCustomDecoder(callback, options);
- * });
- * ```
- */
-export function registerDecoder(type: string, factory: DecoderFactory): void {
-    decoderRegistry.set(type, factory);
-}
-/**
- * Get all registered decoder type names.
- */
-export function getRegisteredDecoders(): string[] {
-    return Array.from(decoderRegistry.keys());
-}
-// Pre-register built-in decoders
-registerDecoder('passthrough', (cb) => new PassthroughDecoder(cb));
-registerDecoder('standard-chat', (cb) => new StandardChatDecoder(cb));
-registerDecoder('interleaved-reasoning', (cb) => new InterleavedReasoningDecoder(cb));
-/**
- * Create a stream decoder by type name.
- * Looks up the decoder in the registry (built-in + custom).
- *
- * @throws Error if the decoder type is not registered
- */
-export function createDecoder(
-    type: DecoderType | string,
-    callback: DecoderCallback,
-    options?: DecoderOptions,
-): StreamDecoder {
-    const factory = decoderRegistry.get(type);
-    if (!factory) {
-        const available = Array.from(decoderRegistry.keys()).join(', ');
-        throw new Error(`Unknown decoder type: "${type}". Available: ${available}`);
-    }
-    return factory(callback, options);
-}
+/**
+ * Universal LLM Client v3 — Stream Decoder
+ *
+ * Pluggable interface for decoding raw LLM token streams into typed events.
+ * Consumers select their strategy per-call: passthrough for raw speed,
+ * standard-chat for structured tool calls, or interleaved-reasoning
+ * for models that emit <think>/<progress> tags.
+ */
+import type { LLMToolCall } from './interfaces.js';
+import { GEMMA_THOUGHT_OPENERS, normalizeGemmaThought } from './gemma-channel.js';
+// ============================================================================
+// Decoded Event Types
+// ============================================================================
+/** Clean, typed events emitted by a stream decoder */
+export type DecodedEvent =
+    | { type: 'text'; content: string }
+    | { type: 'thinking'; content: string }
+    | { type: 'progress'; content: string }
+    | { type: 'tool_call'; calls: LLMToolCall[] };
+/** Callback invoked by the decoder as events become available */
+export type DecoderCallback = (event: DecodedEvent) => void;
+// ============================================================================
+// Decoder Interface
+// ============================================================================
+/**
+ * Transform raw LLM tokens into clean typed events.
+ *
+ * Usage:
+ *   const decoder = createDecoder('standard-chat', callback);
+ *   for (const token of stream) decoder.push(token);
+ *   decoder.flush();
+ *   const clean = decoder.getCleanContent();
+ */
+export interface StreamDecoder {
+    /** Feed a raw token from the LLM stream */
+    push(token: string): void;
+    /** Signal end of stream — flush any buffered state */
+    flush(): void;
+    /** Get the accumulated clean text (all structural tags stripped) */
+    getCleanContent(): string;
+    /** Get accumulated reasoning/thinking content (if any) */
+    getReasoning(): string | undefined;
+}
+// ============================================================================
+// Decoder Types
+// ============================================================================
+export type DecoderType = 'passthrough' | 'standard-chat' | 'interleaved-reasoning';
+// ============================================================================
+// Passthrough Decoder
+// ============================================================================
+/**
+ * Bare-bones decoder for raw text completions.
+ * No parsing, no tag awareness. All tokens → text events.
+ */
+export class PassthroughDecoder implements StreamDecoder {
+    private content = '';
+    private readonly callback: DecoderCallback;
+    constructor(callback: DecoderCallback) {
+        this.callback = callback;
+    }
+    push(token: string): void {
+        this.content += token;
+        this.callback({ type: 'text', content: token });
+    }
+    flush(): void {
+        // Nothing to flush — all tokens emitted immediately
+    }
+    getCleanContent(): string {
+        return this.content;
+    }
+    getReasoning(): string | undefined {
+        return undefined;
+    }
+}
+// ============================================================================
+// Standard Chat Decoder
+// ============================================================================
+/**
+ * Decoder for standard LLM chat patterns — text streaming with native
+ * reasoning and structured API tool calls. No text-level tag parsing.
+ *
+ * Streamed tokens are clean text → emitted as `text` events.
+ * Native reasoning tokens → accepted via `pushReasoning()`.
+ * Structured tool calls → accepted via `pushToolCalls()`.
+ */
+export class StandardChatDecoder implements StreamDecoder {
+    private content = '';
+    private reasoning = '';
+    private readonly callback: DecoderCallback;
+    private tagBuffer = '';
+    private inProgressTag = false;
+    private progressBody = '';
+    private inGemmaThought = false;
+    private gemmaThoughtBody = '';
+    private gemmaThoughtClose = '';
+    private inToolCallTag = false;
+    private toolCallBody = '';
+    private toolCallClose = '';
+    constructor(callback: DecoderCallback) {
+        this.callback = callback;
+    }
+    push(token: string): void {
+        let pos = 0;
+        while (pos < token.length) {
+            if (this.inGemmaThought) {
+                this.gemmaThoughtBody += token.slice(pos);
+                const closeIdx = this.gemmaThoughtBody.indexOf(this.gemmaThoughtClose);
+                if (closeIdx !== -1) {
+                    const body = this.gemmaThoughtBody.slice(0, closeIdx);
+                    const remainder = this.gemmaThoughtBody.slice(closeIdx + this.gemmaThoughtClose.length);
+                    this.emitReasoning(normalizeGemmaThought(body));
+                    this.inGemmaThought = false;
+                    this.gemmaThoughtBody = '';
+                    this.gemmaThoughtClose = '';
+                    if (remainder) this.push(remainder);
+                }
+                return;
+            }
+            if (this.inToolCallTag) {
+                this.toolCallBody += token.slice(pos);
+                const closeIdx = this.toolCallBody.indexOf(this.toolCallClose);
+                if (closeIdx !== -1) {
+                    const body = this.toolCallBody.slice(0, closeIdx);
+                    const remainder = this.toolCallBody.slice(closeIdx + this.toolCallClose.length);
+                    if (body.trim()) {
+                        try {
+                            const normalizedJson = body.trim()
+                                .replace(/'/g, '"')
+                                .replace(/True/g, 'true')
+                                .replace(/False/g, 'false')
+                                .replace(/None/g, 'null');
+                            const parsed = JSON.parse(normalizedJson);
+                            const calls = Array.isArray(parsed) ? parsed : [parsed];
+                            const validatedCalls: LLMToolCall[] = [];
+                            for (const call of calls) {
+                                if (call && typeof call === 'object' && call.name) {
+                                    validatedCalls.push({
+                                        id: call.id || `recovered_${Date.now()}_${Math.random().toString(36).slice(2)}`,
+                                        type: 'function',
+                                        function: {
+                                            name: call.name,
+                                            arguments: typeof call.arguments === 'string'
+                                                ? call.arguments
+                                                : JSON.stringify(call.arguments ?? {}),
+                                        }
+                                    });
+                                }
+                            }
+                            if (validatedCalls.length > 0) {
+                                this.callback({ type: 'tool_call', calls: validatedCalls });
+                            }
+                        } catch {
+                            // ignore
+                        }
+                    }
+                    this.inToolCallTag = false;
+                    this.toolCallBody = '';
+                    this.toolCallClose = '';
+                    if (remainder) this.push(remainder);
+                }
+                return;
+            }
+            if (this.inProgressTag) {
+                this.progressBody += token.slice(pos);
+                const closeIdx = this.progressBody.indexOf('</progress>');
+                if (closeIdx !== -1) {
+                    const body = this.progressBody.slice(0, closeIdx);
+                    const remainder = this.progressBody.slice(closeIdx + '</progress>'.length);
+                    if (body) {
+                        this.callback({ type: 'progress', content: body });
+                    }
+                    this.inProgressTag = false;
+                    this.progressBody = '';
+                    if (remainder) this.push(remainder);
+                }
+                return;
+            }
+            if (this.tagBuffer.length > 0) {
+                const ch = token[pos]!;
+                pos++;
+                this.tagBuffer += ch;
+                if (this.matchesStructuralOpenerPrefix(this.tagBuffer)) {
+                    if (this.tagBuffer === '<progress>') {
+                        this.inProgressTag = true;
+                        this.progressBody = '';
+                        this.tagBuffer = '';
+                    } else if (this.tagBuffer === '<tool_call|>') {
+                        this.inToolCallTag = true;
+                        this.toolCallBody = '';
+                        this.toolCallClose = '<|tool_response>';
+                        this.tagBuffer = '';
+                    } else if (this.tagBuffer === '<|tool_response>') {
+                        this.tagBuffer = '';
+                    } else if (this.tagBuffer === '<|channel>thought') {
+                        this.inGemmaThought = true;
+                        this.gemmaThoughtBody = '';
+                        this.gemmaThoughtClose = '<channel|>';
+                        this.tagBuffer = '';
+                    } else if (this.tagBuffer === '<|thought') {
+                        this.inGemmaThought = true;
+                        this.gemmaThoughtBody = '';
+                        this.gemmaThoughtClose = '|>';
+                        this.tagBuffer = '';
+                    }
+                } else {
+                    this.emitText(this.tagBuffer);
+                    this.tagBuffer = '';
+                }
+                continue;
+            }
+            const ltIdx = token.indexOf('<', pos);
+            if (ltIdx === -1) {
+                this.emitText(token.slice(pos));
+                return;
+            }
+            if (ltIdx > pos) {
+                this.emitText(token.slice(pos, ltIdx));
+            }
+            this.tagBuffer = '<';
+            pos = ltIdx + 1;
+        }
+    }
+    private emitText(text: string): void {
+        if (!text) return;
+        this.content += text;
+        this.callback({ type: 'text', content: text });
+    }
+    private emitReasoning(content: string): void {
+        if (!content) return;
+        this.reasoning += content;
+        this.callback({ type: 'thinking', content });
+    }
+    private matchesStructuralOpenerPrefix(candidate: string): boolean {
+        if ('<progress>'.startsWith(candidate)) return true;
+        if ('<tool_call|>'.startsWith(candidate)) return true;
+        if ('<|tool_response>'.startsWith(candidate)) return true;
+        return GEMMA_THOUGHT_OPENERS.some(opener => opener.startsWith(candidate));
+    }
+    /** Feed native reasoning tokens from the provider */
+    pushReasoning(content: string): void {
+        this.emitReasoning(content);
+    }
+    /** Feed structured tool calls from the provider API response */
+    pushToolCalls(calls: LLMToolCall[]): void {
+        this.callback({ type: 'tool_call', calls });
+    }
+    flush(): void {
+        if (this.tagBuffer) {
+            this.emitText(this.tagBuffer);
+            this.tagBuffer = '';
+        }
+        if (this.inGemmaThought) {
+            this.emitReasoning(normalizeGemmaThought(this.gemmaThoughtBody));
+            this.inGemmaThought = false;
+            this.gemmaThoughtBody = '';
+            this.gemmaThoughtClose = '';
+        }
+        if (this.inProgressTag) {
+            if (this.progressBody) {
+                this.emitText('<progress>' + this.progressBody);
+            }
+            this.inProgressTag = false;
+            this.progressBody = '';
+        }
+        if (this.inToolCallTag) {
+            this.inToolCallTag = false;
+            this.toolCallBody = '';
+            this.toolCallClose = '';
+        }
+    }
+    getCleanContent(): string {
+        return this.content;
+    }
+    getReasoning(): string | undefined {
+        return this.reasoning || undefined;
+    }
+}
+// ============================================================================
+// Interleaved Reasoning Decoder
+// ============================================================================
+/**
+ * Decoder for models that emit interleaved reasoning tags in text.
+ * Parses <think>...</think> and <progress>...</progress> tags from the
+ * raw token stream and emits typed events for each.
+ *
+ * Handles streaming where tags may be split across chunks.
+ */
+export class InterleavedReasoningDecoder implements StreamDecoder {
+    private buffer = '';
+    private content = '';
+    private reasoning = '';
+    private readonly callback: DecoderCallback;
+    private inThink = false;
+    private inProgress = false;
+    constructor(callback: DecoderCallback) {
+        this.callback = callback;
+    }
+    push(token: string): void {
+        this.buffer += token;
+        this.processBuffer();
+    }
+    flush(): void {
+        // Emit any remaining buffer content as text
+        if (this.buffer.length > 0) {
+            if (this.inThink) {
+                this.reasoning += this.buffer;
+                this.callback({ type: 'thinking', content: this.buffer });
+            } else if (this.inProgress) {
+                this.callback({ type: 'progress', content: this.buffer });
+            } else {
+                this.content += this.buffer;
+                this.callback({ type: 'text', content: this.buffer });
+            }
+            this.buffer = '';
+        }
+    }
+    getCleanContent(): string {
+        return this.content;
+    }
+    getReasoning(): string | undefined {
+        return this.reasoning || undefined;
+    }
+    private processBuffer(): void {
+        let safety = 0;
+        while (this.buffer.length > 0 && safety++ < 200) {
+            if (this.inThink) {
+                const closeIdx = this.buffer.indexOf('</think>');
+                if (closeIdx === -1) {
+                    // Might have partial closing tag at end
+                    if (this.buffer.endsWith('<') || this.buffer.endsWith('</') ||
+                        this.buffer.endsWith('</t') || this.buffer.endsWith('</th') ||
+                        this.buffer.endsWith('</thi') || this.buffer.endsWith('</thin') ||
+                        this.buffer.endsWith('</think')) {
+                        return; // Wait for more data
+                    }
+                    this.reasoning += this.buffer;
+                    this.callback({ type: 'thinking', content: this.buffer });
+                    this.buffer = '';
+                    return;
+                }
+                const thinkContent = this.buffer.slice(0, closeIdx);
+                if (thinkContent) {
+                    this.reasoning += thinkContent;
+                    this.callback({ type: 'thinking', content: thinkContent });
+                }
+                this.buffer = this.buffer.slice(closeIdx + 8); // '</think>'.length
+                this.inThink = false;
+                continue;
+            }
+            if (this.inProgress) {
+                const closeIdx = this.buffer.indexOf('</progress>');
+                if (closeIdx === -1) {
+                    if (this.couldBePartialTag(this.buffer, '</progress>')) return;
+                    this.callback({ type: 'progress', content: this.buffer });
+                    this.buffer = '';
+                    return;
+                }
+                const progressContent = this.buffer.slice(0, closeIdx);
+                if (progressContent) {
+                    this.callback({ type: 'progress', content: progressContent });
+                }
+                this.buffer = this.buffer.slice(closeIdx + 11); // '</progress>'.length
+                this.inProgress = false;
+                continue;
+            }
+            // Look for opening tags
+            const thinkIdx = this.buffer.indexOf('<think>');
+            const progressIdx = this.buffer.indexOf('<progress>');
+            // Find earliest tag
+            const nextTag = this.findEarliest(thinkIdx, progressIdx);
+            if (nextTag === -1) {
+                // No complete opening tags — check for partial tag at end
+                const lastAngle = this.buffer.lastIndexOf('<');
+                if (lastAngle >= 0 && lastAngle > this.buffer.length - 12) {
+                    // Potential partial tag — emit text before it, keep the rest
+                    const textBefore = this.buffer.slice(0, lastAngle);
+                    if (textBefore) {
+                        this.content += textBefore;
+                        this.callback({ type: 'text', content: textBefore });
+                    }
+                    this.buffer = this.buffer.slice(lastAngle);
+                    return;
+                }
+                // No partial tags — emit all as text
+                this.content += this.buffer;
+                this.callback({ type: 'text', content: this.buffer });
+                this.buffer = '';
+                return;
+            }
+            // Emit text before the tag
+            const textBefore = this.buffer.slice(0, nextTag);
+            if (textBefore) {
+                this.content += textBefore;
+                this.callback({ type: 'text', content: textBefore });
+            }
+            if (nextTag === thinkIdx) {
+                this.buffer = this.buffer.slice(nextTag + 7); // '<think>'.length
+                this.inThink = true;
+            } else {
+                this.buffer = this.buffer.slice(nextTag + 10); // '<progress>'.length
+                this.inProgress = true;
+            }
+        }
+    }
+    private findEarliest(a: number, b: number): number {
+        if (a === -1) return b;
+        if (b === -1) return a;
+        return Math.min(a, b);
+    }
+    private couldBePartialTag(buffer: string, tag: string): boolean {
+        for (let i = 1; i < tag.length; i++) {
+            if (buffer.endsWith(tag.slice(0, i))) return true;
+        }
+        return false;
+    }
+}
+// ============================================================================
+// Pluggable Decoder Registry
+// ============================================================================
+export interface DecoderOptions {
+    /** Known tool names for text-based tool call recovery */
+    knownToolNames?: Set<string>;
+}
+/**
+ * Factory function that creates a StreamDecoder instance.
+ * External code registers these via `registerDecoder()`.
+ */
+export type DecoderFactory = (callback: DecoderCallback, options?: DecoderOptions) => StreamDecoder;
+/** Internal registry of decoder factories, keyed by decoder type name */
+const decoderRegistry = new Map<string, DecoderFactory>();
+/**
+ * Register a custom stream decoder type.
+ * Once registered, it can be used via `createDecoder(name, ...)` or
+ * by passing `decoderType: name` in ChatOptions.
+ *
+ * @example
+ * ```typescript
+ * import { registerDecoder } from 'universal-llm-client';
+ *
+ * registerDecoder('my-decoder', (callback, options) => {
+ *   return new MyCustomDecoder(callback, options);
+ * });
+ * ```
+ */
+export function registerDecoder(type: string, factory: DecoderFactory): void {
+    decoderRegistry.set(type, factory);
+}
+/**
+ * Get all registered decoder type names.
+ */
+export function getRegisteredDecoders(): string[] {
+    return Array.from(decoderRegistry.keys());
+}
+// Pre-register built-in decoders
+registerDecoder('passthrough', (cb) => new PassthroughDecoder(cb));
+registerDecoder('standard-chat', (cb) => new StandardChatDecoder(cb));
+registerDecoder('interleaved-reasoning', (cb) => new InterleavedReasoningDecoder(cb));
+/**
+ * Create a stream decoder by type name.
+ * Looks up the decoder in the registry (built-in + custom).
+ *
+ * @throws Error if the decoder type is not registered
+ */
+export function createDecoder(
+    type: DecoderType | string,
+    callback: DecoderCallback,
+    options?: DecoderOptions,
+): StreamDecoder {
+    const factory = decoderRegistry.get(type);
+    if (!factory) {
+        const available = Array.from(decoderRegistry.keys()).join(', ');
+        throw new Error(`Unknown decoder type: "${type}". Available: ${available}`);
+    }
+    return factory(callback, options);
+}