npm - @webmcp-auto-ui/agent - Versions diffs - 2.5.24 → 2.5.26 - Mend

@webmcp-auto-ui/agent 2.5.24 → 2.5.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/package.json +1 -1
package/src/autoui-server.ts +17 -0
package/src/diagnostics.ts +6 -6
package/src/discovery-cache.ts +17 -3
package/src/index.ts +3 -3
package/src/loop.ts +27 -19
package/src/providers/wasm.ts +184 -330
package/src/recipes/_generated.ts +273 -0
package/src/recipes/canary-data.md +50 -0
package/src/recipes/canary-display.md +99 -0
package/src/recipes/canary-middle.md +32 -0
package/src/recipes/hummingbird-data.md +32 -0
package/src/recipes/hummingbird-display.md +36 -0
package/src/recipes/hummingbird-middle.md +18 -0
package/src/tool-layers.ts +303 -31
package/src/types.ts +6 -1
package/tests/loop.test.ts +2 -2
package/src/providers/gemma.worker.legacy.ts +0 -123
package/src/providers/litert.worker.ts +0 -294
package/src/recipes/widgets/actions.md +0 -28
package/src/recipes/widgets/alert.md +0 -27
package/src/recipes/widgets/cards.md +0 -41
package/src/recipes/widgets/carousel.md +0 -39
package/src/recipes/widgets/chart-rich.md +0 -51
package/src/recipes/widgets/chart.md +0 -32
package/src/recipes/widgets/code.md +0 -21
package/src/recipes/widgets/d3.md +0 -36
package/src/recipes/widgets/data-table.md +0 -46
package/src/recipes/widgets/gallery.md +0 -39
package/src/recipes/widgets/grid-data.md +0 -57
package/src/recipes/widgets/hemicycle.md +0 -43
package/src/recipes/widgets/js-sandbox.md +0 -32
package/src/recipes/widgets/json-viewer.md +0 -27
package/src/recipes/widgets/kv.md +0 -31
package/src/recipes/widgets/list.md +0 -24
package/src/recipes/widgets/log.md +0 -39
package/src/recipes/widgets/map.md +0 -49
package/src/recipes/widgets/profile.md +0 -49
package/src/recipes/widgets/recipe-browser.md +0 -102
package/src/recipes/widgets/sankey.md +0 -54
package/src/recipes/widgets/stat-card.md +0 -43
package/src/recipes/widgets/stat.md +0 -35
package/src/recipes/widgets/tags.md +0 -30
package/src/recipes/widgets/text.md +0 -19
package/src/recipes/widgets/timeline.md +0 -38
package/src/recipes/widgets/trombinoscope.md +0 -39

package/src/providers/wasm.ts CHANGED Viewed

@@ -5,12 +5,13 @@
  */
 import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, WasmModelId, ContentBlock } from '../types.js';
 import type { PipelineTrace } from '../pipeline-trace.js';
+import { formatGemmaToolDeclaration, gemmaValue } from '../tool-layers.js';
 export type WasmStatus = 'idle' | 'loading' | 'ready' | 'error';
 export interface WasmProviderOptions {
   model?: WasmModelId;
-  contextSize?: number;  // MediaPipe maxTokens — default 4096
+  contextSize?: number;  // MediaPipe maxTokens — default 32768
   onProgress?: (progress: number, status: string, loaded?: number, total?: number) => void;
   onStatusChange?: (status: WasmStatus) => void;
 }
@@ -23,6 +24,8 @@ const LITERT_MODELS: Record<string, { repo: string; file: string; size: number }
 export class WasmProvider implements LLMProvider {
   readonly name = 'wasm';
   readonly model: string;
+  /** Signals to the agent loop that the system prompt must be built in Gemma native syntax. */
+  readonly promptKind = 'gemma' as const;
   /** Optional pipeline trace — set externally to trace parsing strategy fallbacks */
   trace?: PipelineTrace;
@@ -83,7 +86,7 @@ export class WasmProvider implements LLMProvider {
       baseOptions: {
         modelAssetBuffer: modelStream.getReader() as unknown as Uint8Array,
       },
-      maxTokens: this.opts.contextSize ?? 4096,
+      maxTokens: this.opts.contextSize ?? 32768,
       temperature: 1.0,
       topK: 64,
     });
@@ -176,7 +179,7 @@ export class WasmProvider implements LLMProvider {
   async chat(
     messages: ChatMessage[],
     tools: ProviderTool[],
-    options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
+    options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
   ): Promise<LLMResponse> {
     if (this.status !== 'ready') await this.initialize();
     if (!this.inference) throw new Error('Model not initialized');
@@ -202,7 +205,7 @@ export class WasmProvider implements LLMProvider {
   private async _chat(
     messages: ChatMessage[],
     tools: ProviderTool[],
-    options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
+    options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
   ): Promise<LLMResponse> {
     // Apply per-request options
     if (options?.maxTokens || options?.temperature || options?.topK) {
@@ -218,22 +221,22 @@ export class WasmProvider implements LLMProvider {
     }
     // Build Gemma chat prompt (Gemma 4 format with tool hints)
-    let prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
+    let prompt = this.buildPrompt(messages, tools, options?.system);
     // Aggressive clipping: Gemma struggles with long conversations — dynamic cap based on context size
-    const contextTokens = this.opts.contextSize ?? 4096;
-    const MAX_MESSAGES = contextTokens <= 4096 ? 8 : contextTokens <= 8192 ? 16 : 32;
+    const contextTokens = this.opts.contextSize ?? 32768;
+    const MAX_MESSAGES = Math.max(4, Math.floor(contextTokens / 512));
     while (messages.length > MAX_MESSAGES) {
       messages = messages.slice(1);
     }
-    prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
+    prompt = this.buildPrompt(messages, tools, options?.system);
     // Token-based clipping: if prompt is still too large, drop oldest messages
-    const maxPromptTokens = (this.opts.contextSize ?? 4096) - 512;
+    const maxPromptTokens = (this.opts.contextSize ?? 32768) - 512;
     try {
       while (this.inference.sizeInTokens(prompt) > maxPromptTokens && messages.length > 1) {
         messages = messages.slice(1);
-        prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
+        prompt = this.buildPrompt(messages, tools, options?.system);
       }
     } catch {
       // sizeInTokens not available — skip clipping
@@ -322,38 +325,10 @@ export class WasmProvider implements LLMProvider {
       }
     }
-    // Clean up hallucinated content after tool calls.
-    // Gemma often hallucinates fake <|tool_response> blocks after <tool_call|>.
-    // Strategy: keep only the FIRST complete tool call, strip everything after.
-    const firstCallStart = fullText.indexOf('<|tool_call>');
-    if (firstCallStart !== -1) {
-      const firstCallEnd = fullText.indexOf('<tool_call|>', firstCallStart);
-      if (firstCallEnd !== -1) {
-        const afterFirstCall = fullText.slice(firstCallEnd + '<tool_call|>'.length);
-        // Check if there's a REAL second tool call (not preceded by a fake tool_response)
-        const nextCallStart = afterFirstCall.indexOf('<|tool_call>');
-        if (nextCallStart !== -1) {
-          // Check if there's a fake tool_response between the two calls
-          const betweenCalls = afterFirstCall.slice(0, nextCallStart);
-          if (betweenCalls.includes('<|tool_response>') || betweenCalls.includes('<tool_response|>')) {
-            // Fake chained response — truncate after first tool call
-            fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
-          }
-          // Otherwise: legitimate multi-tool call, keep both
-        } else {
-          // No second tool call — truncate any trailing hallucination
-          fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
-        }
-      }
-    }
-    // Also strip any standalone <|tool_response> blocks in model output
+    // Strip any standalone <|tool_response> blocks in model output
     // (the model should never generate these — they're injected by the framework)
     fullText = fullText.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '');
-    // Strip thinking blocks — Gemma 4 wraps reasoning in <|channel>thought\n...<channel|>
-    fullText = fullText.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '');
     const latencyMs = performance.now() - t0;
     // Use sizeInTokens for accurate token count if available
@@ -364,127 +339,34 @@ export class WasmProvider implements LLMProvider {
       }
     } catch {}
-    // Parse tool calls — supports multiple formats:
-    // 1. Gemma 4 native: <|tool_call>call:tool_name{key:<|"|>value<|"|>}<tool_call|>
-    // 2. JSON format (legacy): <|tool_call>call:tool_name{"key":"value"}<tool_call|>
-    // 3. Loose JSON: { "tool": "name", "args": {...} }
     const content: ContentBlock[] = [];
-    const gemmaToolCallRe = /<\|tool_call>call:(\w+)(\{[^]*?\})<tool_call\|>/g;
-    // Fallback: parenthesized format — call:name("arg1", {arg2})
-    const parenToolCallRe = /<\|tool_call>call:(\w+)\(([^)]*(?:\{[^]*?\}[^)]*)?)\)(?:<tool_call\|>|$)/g;
-    let match: RegExpExecArray | null;
+    const START_TAG = '<|tool_call>call:';
+    const END_TAG = '<tool_call|>';
     let foundToolCall = false;
-    while ((match = gemmaToolCallRe.exec(fullText)) !== null) {
+    let scanIdx = 0;
+    while (true) {
+      const startIdx = fullText.indexOf(START_TAG, scanIdx);
+      if (startIdx === -1) break;
+      const nameStart = startIdx + START_TAG.length;
+      const braceIdx = fullText.indexOf('{', nameStart);
+      if (braceIdx === -1) break;
+      const name = fullText.slice(nameStart, braceIdx);
+      if (!/^\w+$/.test(name)) { scanIdx = nameStart; continue; }
+      const argsBlock = WasmProvider.extractArgsBlock(fullText, braceIdx);
+      if (!argsBlock) break;
+      const afterArgs = braceIdx + argsBlock.length;
+      if (!fullText.startsWith(END_TAG, afterArgs)) { scanIdx = afterArgs; continue; }
       foundToolCall = true;
-      const toolName = match[1];
-      let toolArgs: Record<string, unknown> = {};
-      const rawArgs = match[2];
-      // Strategy 1: Extract key-value pairs using <|"|> delimiters BEFORE replacing them.
-      // This correctly handles internal quotes like: query:<|"|>SELECT data."date"<|"|>
-      toolArgs = WasmProvider.parseGemmaArgs(rawArgs);
-      // Strategy 2: If no pairs found, try simple replacement + JSON.parse
-      if (Object.keys(toolArgs).length === 0) {
-        const argsStr = rawArgs.replace(/<\|"\|>/g, '"');
-        try {
-          toolArgs = JSON.parse(argsStr);
-          this.trace?.push('parse', toolName, 'fell back to quote replacement strategy', 'warn');
-        } catch {
-          // Strategy 3: regex key:value extraction on replaced string
-          try {
-            const obj: Record<string, unknown> = {};
-            const kvRe = /(\w+)\s*:\s*(?:"([^"]*)"|([\d.]+(?:e[+-]?\d+)?)|(\[.*?\])|(true|false|null))/g;
-            let kv: RegExpExecArray | null;
-            while ((kv = kvRe.exec(argsStr)) !== null) {
-              const [, k, strVal, numVal, arrVal, litVal] = kv;
-              if (strVal !== undefined) obj[k] = strVal;
-              else if (numVal !== undefined) obj[k] = Number(numVal);
-              else if (arrVal !== undefined) { try { obj[k] = JSON.parse(arrVal); } catch { obj[k] = arrVal; } }
-              else if (litVal !== undefined) obj[k] = JSON.parse(litVal);
-            }
-            if (Object.keys(obj).length > 0) {
-              toolArgs = obj;
-              this.trace?.push('parse', toolName, 'fell back to regex key:value strategy', 'warn');
-            }
-          } catch {}
-        }
-      }
-      // P4 fix: recursively parse string fields that look like JSON objects/arrays.
-      // Gemma wraps params in <|"|>{...}<|"|> which after replacement becomes "{...}" — a string.
-      for (const [k, v] of Object.entries(toolArgs)) {
-        if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
-          try { toolArgs[k] = JSON.parse(v); } catch { /* keep as string */ }
-        }
-      }
       content.push({
         type: 'tool_use',
         id: `tc-${Date.now()}-${content.length}`,
-        name: toolName,
-        input: toolArgs,
+        name,
+        input: WasmProvider.parseGemmaArgs(argsBlock),
       });
-    }
-    // Fallback: try parenthesized format — call:component("table", {data: [...]})
-    if (!foundToolCall) {
-      while ((match = parenToolCallRe.exec(fullText)) !== null) {
-        foundToolCall = true;
-        const toolName = match[1];
-        const argsRaw = match[2].replace(/<\|"\|>/g, '"').trim();
-        let toolArgs: Record<string, unknown> = {};
-        // Parse parenthesized args: could be ("name", {params}) or just ({params})
-        try {
-          // Try wrapping in array and parsing: ["name", {params}] or [{params}]
-          const asArray = JSON.parse(`[${argsRaw}]`);
-          if (asArray.length === 2 && typeof asArray[0] === 'string' && typeof asArray[1] === 'object') {
-            // component("table", {data: [...]}) → {name: "table", params: {data: [...]}}
-            toolArgs = { name: asArray[0], params: asArray[1] };
-          } else if (asArray.length === 1 && typeof asArray[0] === 'object') {
-            toolArgs = asArray[0];
-          } else if (asArray.length >= 1) {
-            // Generic: first string arg as name, rest as params
-            toolArgs = { name: String(asArray[0]), ...(typeof asArray[1] === 'object' ? { params: asArray[1] } : {}) };
-          }
-        } catch {
-          // Last resort: try parsing the whole thing as JSON object
-          try { toolArgs = JSON.parse(argsRaw); } catch {}
-        }
-        content.push({
-          type: 'tool_use',
-          id: `tc-${Date.now()}-${content.length}`,
-          name: toolName,
-          input: toolArgs,
-        });
-      }
+      scanIdx = afterArgs + END_TAG.length;
     }
     if (!foundToolCall) {
-      // Try JSON format fallback — strip markdown code blocks first
-      let cleaned = fullText.trim();
-      const mdMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
-      if (mdMatch) cleaned = mdMatch[1].trim();
-      try {
-        const parsed = JSON.parse(cleaned) as { tool?: string; args?: Record<string, unknown> };
-        if (parsed.tool && parsed.args) {
-          foundToolCall = true;
-          content.push({
-            type: 'tool_use',
-            id: `tc-${Date.now()}`,
-            name: parsed.tool,
-            input: parsed.args,
-          });
-        }
-      } catch {}
-    }
-    if (!foundToolCall) {
-      // Extract text without tool call tags
       const cleanText = fullText.replace(/<\|tool_call>.*?<tool_call\|>/g, '').trim();
       content.push({ type: 'text', text: cleanText || fullText });
     }
@@ -505,143 +387,84 @@ export class WasmProvider implements LLMProvider {
   }
   /**
-   * Parse Gemma native tool call args, handling internal quotes in values.
-   * Extracts key-value pairs using <|"|> delimiters before any replacement,
-   * so internal quotes like data."date" are preserved correctly.
-   * Example: {schema:<|"|>assemblee<|"|>,query:<|"|>SELECT data."date"<|"|>}
+   * Extract a brace-balanced {...} block starting at text[startIdx].
+   * Ignores { and } that appear inside <|"|>...<|"|> string delimiters.
+   * Returns the full block including outer braces, or null if unbalanced.
    */
-  private static parseGemmaArgs(raw: string): Record<string, unknown> {
-    const pairs: Record<string, unknown> = {};
-    // Extract string values delimited by <|"|>
-    const kvRegex = /(\w+)\s*:\s*<\|"\|>([\s\S]*?)<\|"\|>/g;
-    let m: RegExpExecArray | null;
-    while ((m = kvRegex.exec(raw)) !== null) {
-      pairs[m[1]] = m[2];
-    }
-    // Extract numeric values (no delimiters)
-    const numRegex = /(\w+)\s*:\s*(\d+(?:\.\d+)?(?:e[+-]?\d+)?)\s*(?:[,}]|$)/g;
-    while ((m = numRegex.exec(raw)) !== null) {
-      if (!(m[1] in pairs)) pairs[m[1]] = Number(m[2]);
-    }
-    // Extract boolean/null literals
-    const litRegex = /(\w+)\s*:\s*(true|false|null)\s*(?:[,}]|$)/g;
-    while ((m = litRegex.exec(raw)) !== null) {
-      if (!(m[1] in pairs)) pairs[m[1]] = JSON.parse(m[2]);
-    }
-    // Extract inline object/array values (e.g. params:{items:[...]}, data:{a:1})
-    // Gemma often writes nested objects without <|"|> delimiters.
-    // We find key:{ or key:[ and then match balanced braces/brackets.
-    const objRe = /(\w+)\s*:\s*([{\[])/g;
-    while ((m = objRe.exec(raw)) !== null) {
-      if (m[1] in pairs) continue; // already captured by a higher-priority regex
-      const key = m[1];
-      const opener = m[2];
-      const closer = opener === '{' ? '}' : ']';
-      let depth = 1;
-      let i = m.index + m[0].length;
-      while (i < raw.length && depth > 0) {
-        const ch = raw[i];
-        if (ch === opener) depth++;
-        else if (ch !== opener && (ch === '{' || ch === '[')) depth++;
-        else if (ch === closer) depth--;
-        else if (ch !== closer && (ch === '}' || ch === ']')) depth--;
-        i++;
+  private static extractArgsBlock(text: string, startIdx: number): string | null {
+    if (text[startIdx] !== '{') return null;
+    const DELIM = '<|"|>';
+    let depth = 0;
+    let inString = false;
+    let i = startIdx;
+    while (i < text.length) {
+      if (text.startsWith(DELIM, i)) {
+        inString = !inString;
+        i += DELIM.length;
+        continue;
       }
-      const fragment = raw.slice(m.index + m[0].length - 1, i); // includes opener and closer
-      // Replace <|"|> with " for JSON parsing
-      const jsonStr = fragment.replace(/<\|"\|>/g, '"');
-      try { pairs[key] = JSON.parse(jsonStr); } catch { /* unparseable — skip */ }
-    }
-    // Try to parse string values that look like JSON objects/arrays
-    for (const [k, v] of Object.entries(pairs)) {
-      if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
-        try { pairs[k] = JSON.parse(v); } catch { /* keep as string */ }
+      if (!inString) {
+        if (text[i] === '{') depth++;
+        else if (text[i] === '}') {
+          depth--;
+          if (depth === 0) return text.slice(startIdx, i + 1);
+        }
       }
+      i++;
     }
+    return null;
+  }
-    return pairs;
+  /**
+   * Parse Gemma native tool call args by normalizing to JSON in one pass.
+   *   1. `<|"|>...<|"|>`      → `"..."`          (string delimiters)
+   *   2. Unquoted keys         → `"quoted":`      (valid JSON keys)
+   * Then `JSON.parse` handles nesting, arrays, numbers, booleans, null natively.
+   * Example: {schema:<|"|>senat<|"|>,params:{data:[{id:1}]}} → {schema:"senat",params:{data:[{id:1}]}}
+   */
+  private static parseGemmaArgs(raw: string): Record<string, unknown> {
+    const jsonStr = raw
+      .replace(/<\|"\|>([\s\S]*?)<\|"\|>/g, (_, s) => JSON.stringify(s))
+      .replace(/([{,])\s*([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":');
+    try {
+      const parsed = JSON.parse(jsonStr);
+      return (typeof parsed === 'object' && parsed !== null) ? parsed : {};
+    } catch {
+      return {};
+    }
   }
   /**
    * Format a value for Gemma 4 native tool syntax.
-   * Strings use <|"|> delimiters, numbers/booleans/null are bare.
+   * Backward-compat wrapper — delegates to the module-level `gemmaValue`
+   * exported from `tool-layers.ts` so the logic is shared with the
+   * system-prompt declaration block.
+   * @internal — used by formatToolCall / formatToolResponse
    */
-  private static gemmaValue(v: unknown): string {
-    const q = '<|"|>';
-    if (v === null || v === undefined) return 'null';
-    if (typeof v === 'number' || typeof v === 'boolean') return String(v);
-    if (Array.isArray(v)) return `[${v.map(i => WasmProvider.gemmaValue(i)).join(',')}]`;
-    if (typeof v === 'object') {
-      const entries = Object.entries(v as Record<string, unknown>)
-        .map(([k, val]) => `${k}:${WasmProvider.gemmaValue(val)}`);
-      return `{${entries.join(',')}}`;
-    }
-    return `${q}${String(v)}${q}`;
+  static gemmaValue(v: unknown): string {
+    return gemmaValue(v);
   }
   /**
    * Format a tool declaration in Gemma 4 native syntax.
+   * Backward-compat wrapper — delegates to `formatGemmaToolDeclaration`
+   * exported from `tool-layers.ts`.
+   * @internal
    */
-  private static formatToolDeclaration(tool: ProviderTool): string {
-    const q = '<|"|>';
-    let decl = `<|tool>declaration:${tool.name}{\n`;
-    decl += `  description:${q}${tool.description}${q}`;
-    const schema = tool.input_schema;
-    if (schema?.properties) {
-      const props = schema.properties as Record<string, { description?: string; type?: string; enum?: string[]; format?: string; default?: unknown }>;
-      decl += `,\n  parameters:{\n    properties:{\n`;
-      const propEntries = Object.entries(props);
-      for (let i = 0; i < propEntries.length; i++) {
-        const [key, val] = propEntries[i];
-        decl += `      ${key}:{`;
-        const parts: string[] = [];
-        if (val.description) parts.push(`description:${q}${val.description}${q}`);
-        // P1 fix: if no type specified, infer OBJECT for params-like fields to avoid
-        // Gemma wrapping the value in <|"|>...<|"|> (treating it as a string)
-        let inferredType = val.type;
-        if (!inferredType) {
-          const descLower = (val.description ?? '').toLowerCase();
-          if (descLower.includes('objet') || descLower.includes('object') || descLower.includes('parameter') || descLower.includes('paramètre') || key === 'params') {
-            inferredType = 'object';
-          } else {
-            inferredType = 'string';
-          }
-        }
-        parts.push(`type:${q}${inferredType.toUpperCase()}${q}`);
-        if (val.enum) parts.push(`enum:[${val.enum.map(e => `${q}${e}${q}`).join(',')}]`);
-        if (val.format) parts.push(`format:${q}${val.format}${q}`);
-        if (val.default !== undefined) parts.push(`default:${WasmProvider.gemmaValue(val.default)}`);
-        decl += parts.join(',');
-        decl += `}${i < propEntries.length - 1 ? ',' : ''}\n`;
-      }
-      decl += `    }`;
-      if (schema.required && Array.isArray(schema.required)) {
-        decl += `,\n    required:[${(schema.required as string[]).map(r => `${q}${r}${q}`).join(',')}]`;
-      }
-      decl += `,\n    type:${q}OBJECT${q}\n  }`;
-    }
-    decl += `\n}<tool|>`;
-    return decl;
+  static formatToolDeclaration(tool: ProviderTool): string {
+    return formatGemmaToolDeclaration(tool);
   }
   /**
    * Format a tool response in Gemma 4 native syntax.
+   * @internal — used by buildGemmaPrompt
    */
-  private static formatToolResponse(toolName: string, content: string): string {
+  static formatToolResponse(toolName: string, content: string): string {
     const q = '<|"|>';
     // Try to parse as JSON for structured output
     try {
       const parsed = JSON.parse(content);
-      return `<|tool_response>response:${toolName}${WasmProvider.gemmaValue(parsed)}<tool_response|>`;
+      return `<|tool_response>response:${toolName}${gemmaValue(parsed)}<tool_response|>`;
     } catch {
       // Plain string result
       return `<|tool_response>response:${toolName}{result:${q}${content}${q}}<tool_response|>`;
@@ -650,87 +473,118 @@ export class WasmProvider implements LLMProvider {
   /**
    * Format a tool call in Gemma 4 native syntax.
+   * @internal — used by buildGemmaPrompt
    */
-  private static formatToolCall(name: string, input: Record<string, unknown>): string {
+  static formatToolCall(name: string, input: Record<string, unknown>): string {
     const entries = Object.entries(input)
-      .map(([k, v]) => `${k}:${WasmProvider.gemmaValue(v)}`);
+      .map(([k, v]) => `${k}:${gemmaValue(v)}`);
     return `<|tool_call>call:${name}{${entries.join(',')}}<tool_call|>`;
   }
-  private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string, maxTools?: number): string {
-    const systemParts: string[] = [];
+  private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string): string {
+    return buildGemmaPrompt({ systemPrompt, tools, messages });
+  }
-    // Inject system prompt from settings if provided
-    if (systemPrompt) {
-      systemParts.push(systemPrompt);
-    }
+  destroy() {
+    this.inference?.close?.();
+    this.inference = null;
+    this.setStatus('idle');
+    this.initPromise = null;
+  }
+}
-    if (tools.length > 0) {
-      // Gemma small models struggle with too many tools — limit to most relevant
-      const MAX_TOOLS = maxTools ?? 15;
-      const limitedTools = tools.length > MAX_TOOLS
-        ? [
-            // Always include render_* tools (UI)
-            ...tools.filter(t => t.name.startsWith('render_') || t.name === 'clear_canvas').slice(0, 8),
-            // Fill with data tools
-            ...tools.filter(t => !t.name.startsWith('render_') && t.name !== 'clear_canvas').slice(0, MAX_TOOLS - 8),
-          ]
-        : tools;
-      // Native Gemma 4 tool declarations
-      systemParts.push(limitedTools.map(t => WasmProvider.formatToolDeclaration(t)).join('\n'));
-    }
+/**
+ * Input for {@link buildGemmaPrompt}.
+ *
+ * Pass `messages: []` (or omit it) to produce a preview of the system/tool
+ * portion of the prompt without any conversation turns — useful for debug
+ * panels that want to display the exact transformed prompt Gemma will see.
+ */
+export interface BuildGemmaPromptInput {
+  /** System prompt — expected to already be in Gemma native syntax (use
+   *  `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`).
+   *  The tool declarations are embedded inside this system prompt — they are
+   *  NOT re-emitted from `tools` by this function anymore. */
+  systemPrompt?: string;
+  /** Provider tools — used only for message serialization (tool_use / tool_result
+   *  ID → name mapping). Declarations live inside `systemPrompt`. */
+  tools: ProviderTool[];
+  /** Conversation turns. Defaults to `[]` (preview mode — no `<|turn>` user/model blocks). */
+  messages?: ChatMessage[];
+}
-    // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
-    const toolNameById = new Map<string, string>();
-    for (const msg of messages) {
-      if (typeof msg.content !== 'string') {
-        for (const block of msg.content as ContentBlock[]) {
-          if (block.type === 'tool_use') {
-            const b = block as { type: 'tool_use'; id: string; name: string };
-            toolNameById.set(b.id, b.name);
-          }
+/**
+ * Build the final Gemma 4 native prompt string from a system prompt, a set of
+ * provider tools, and a conversation history.
+ *
+ * This is the exact transformation applied by {@link WasmProvider} before
+ * calling LlmInference — exported so UI debug panels can display the prompt
+ * as it will actually be sent to the model.
+ *
+ * The system prompt is expected to already be in Gemma native syntax AND to
+ * already embed the `<|tool>declaration>` blocks inline — build it with
+ * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`.
+ *
+ * Transformations applied:
+ * 1. Wraps the system prompt in `<|turn>system\n<|think|>\n...<turn|>` — this
+ *    activates Gemma 4's native thinking mode so the model emits its internal
+ *    reasoning inside a `<|channel>thought\n...<channel|>` block which is then
+ *    stripped from the final user-visible output (see the streaming cleanup in
+ *    {@link WasmProvider}).
+ * 2. Serializes messages as `<|turn>user|model\n...<turn|>` with tool_use →
+ *    `<|tool_call>`, tool_result → `<|tool_response>`.
+ * 3. Terminates with an open `<|turn>model\n` for generation.
+ * 4. No explicit `<bos>` — LlmInference adds it via the tokenizer.
+ */
+export function buildGemmaPrompt(input: BuildGemmaPromptInput): string {
+  const { systemPrompt, messages = [] } = input;
+  // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
+  const toolNameById = new Map<string, string>();
+  for (const msg of messages) {
+    if (typeof msg.content !== 'string') {
+      for (const block of msg.content as ContentBlock[]) {
+        if (block.type === 'tool_use') {
+          const b = block as { type: 'tool_use'; id: string; name: string };
+          toolNameById.set(b.id, b.name);
         }
       }
     }
+  }
-    const parts: string[] = [];
-    if (systemParts.length > 0) {
-      // Gemma 4 has no system role — inject system content as a user turn
-      parts.push(`<|turn>user\n${systemParts.join('\n')}<turn|>`);
-    }
-    for (const msg of messages) {
-      const role = msg.role === 'assistant' ? 'model' : 'user';
-      if (typeof msg.content === 'string') {
-        parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
-      } else {
-        // Serialize all block types in Gemma 4 native format
-        const segments: string[] = [];
-        for (const block of msg.content as ContentBlock[]) {
-          if (block.type === 'text') {
-            segments.push((block as { type: 'text'; text: string }).text);
-          } else if (block.type === 'tool_use') {
-            const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
-            segments.push(WasmProvider.formatToolCall(b.name, b.input));
-          } else if (block.type === 'tool_result') {
-            const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
-            const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
-            segments.push(WasmProvider.formatToolResponse(toolName, b.content));
-          }
-        }
-        if (segments.length > 0) {
-          parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
+  const parts: string[] = [];
+  // Gemma 4 native structure: the system prompt already embeds tool
+  // declarations inline at each STEP (built via buildSystemPromptWithAliases
+  // with providerKind: 'gemma').
+  if (systemPrompt) {
+    parts.push(`<|turn>system\n${systemPrompt}\n<turn|>`);
+  }
+  for (const msg of messages) {
+    const role = msg.role === 'assistant' ? 'model' : 'user';
+    if (typeof msg.content === 'string') {
+      parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
+    } else {
+      // Serialize all block types in Gemma 4 native format
+      const segments: string[] = [];
+      for (const block of msg.content as ContentBlock[]) {
+        if (block.type === 'text') {
+          segments.push((block as { type: 'text'; text: string }).text);
+        } else if (block.type === 'tool_use') {
+          const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
+          segments.push(WasmProvider.formatToolCall(b.name, b.input));
+        } else if (block.type === 'tool_result') {
+          const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
+          const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
+          segments.push(WasmProvider.formatToolResponse(toolName, b.content));
         }
       }
+      if (segments.length > 0) {
+        parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
+      }
     }
-    parts.push('<|turn>model\n');
-    return parts.join('\n');
-  }
-  destroy() {
-    this.inference?.close?.();
-    this.inference = null;
-    this.setStatus('idle');
-    this.initPromise = null;
   }
+  parts.push('<|turn>model\n');
+  return parts.join('\n');
 }