npm - @webmcp-auto-ui/agent - Versions diffs - 2.5.26 → 2.5.28 - Mend

@webmcp-auto-ui/agent 2.5.26 → 2.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/package.json +10 -2
package/src/autoui-server.ts +80 -65
package/src/index.ts +25 -6
package/src/loop.ts +52 -33
package/src/prompts/claude-prompt-builder.ts +81 -0
package/src/prompts/gemma4-prompt-builder.ts +205 -0
package/src/prompts/index.ts +55 -0
package/src/prompts/mistral-prompt-builder.ts +90 -0
package/src/prompts/qwen-prompt-builder.ts +90 -0
package/src/prompts/tool-call-parsers.ts +322 -0
package/src/prompts/tool-refs.ts +196 -0
package/src/providers/factory.ts +34 -3
package/src/providers/hawk-models.ts +22 -0
package/src/providers/hawk.ts +181 -0
package/src/providers/transformers-models.ts +143 -0
package/src/providers/transformers-serialize.ts +81 -0
package/src/providers/transformers.ts +329 -0
package/src/providers/transformers.worker.ts +640 -0
package/src/providers/wasm.ts +132 -332
package/src/recipes/_generated.ts +306 -0
package/src/recipes/hackathon-assemblee-nationale.md +111 -0
package/src/recipes/notebook-playbook.md +193 -0
package/src/server/hawkProxy.ts +54 -0
package/src/server/index.ts +2 -0
package/src/tool-layers.ts +7 -403
package/src/trace-observer.ts +669 -0
package/src/types.ts +17 -7
package/src/util/opfs-cache.ts +364 -0
package/src/util/storage-inventory.ts +195 -0
package/tests/gemma-prompt.test.ts +472 -0
package/tests/loop.test.ts +3 -3
package/tests/transformers-serialize.test.ts +103 -0

package/src/providers/wasm.ts CHANGED Viewed

@@ -5,7 +5,15 @@
  */
 import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, WasmModelId, ContentBlock } from '../types.js';
 import type { PipelineTrace } from '../pipeline-trace.js';
-import { formatGemmaToolDeclaration, gemmaValue } from '../tool-layers.js';
+import {
+  buildGemmaPrompt,
+  formatGemmaToolDeclaration,
+  formatToolCall,
+  formatToolResponse,
+  gemmaValue,
+} from '../prompts/gemma4-prompt-builder.js';
+import { parseToolCalls } from '../prompts/tool-call-parsers.js';
+import { loadOrDownloadModel } from '../util/opfs-cache.js';
 export type WasmStatus = 'idle' | 'loading' | 'ready' | 'error';
@@ -64,7 +72,6 @@ export class WasmProvider implements LLMProvider {
     const modelInfo = LITERT_MODELS[this.model] ?? LITERT_MODELS['gemma-e2b'];
     const { repo, file, size: expectedSize } = modelInfo;
-    const url = `https://huggingface.co/${repo}/resolve/main/${file}`;
     this.opts.onProgress?.(0, 'downloading', 0, expectedSize);
@@ -73,7 +80,15 @@ export class WasmProvider implements LLMProvider {
       'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai@0.10.27/wasm',
     );
-    const modelStream = await this.getModelStream(url, file, expectedSize);
+    const streams = await loadOrDownloadModel(
+      repo,
+      [{ path: file, expectedSize }],
+      (progress) => {
+        this.opts.onProgress?.(progress.totalProgress, progress.status, progress.loaded, progress.total);
+      },
+    );
+    const modelStream = streams.get(file);
+    if (!modelStream) throw new Error(`Model file missing: ${file}`);
     this.opts.onProgress?.(1, 'initializing', 0, 0);
@@ -94,88 +109,6 @@ export class WasmProvider implements LLMProvider {
     this.setStatus('ready');
   }
-  /**
-   * Download model with OPFS caching, returning a ReadableStream.
-   * The stream reader is passed directly to LlmInference as modelAssetBuffer
-   * to avoid buffering multi-GB models entirely in RAM.
-   */
-  private async getModelStream(
-    url: string,
-    filename: string,
-    knownSize: number,
-  ): Promise<ReadableStream<Uint8Array>> {
-    const total = knownSize;
-    const progressCb = (p: number, loaded: number, t: number) => {
-      this.opts.onProgress?.(p, 'downloading', loaded, t);
-    };
-    const root = await navigator.storage.getDirectory();
-    const modelsDir = await root.getDirectoryHandle('webmcp-models', { create: true });
-    // ── Clean orphan .crswap files (Chrome WritableStream leftovers) ──
-    try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* no swap — OK */ }
-    // ── OPFS cache hit ───────────────────────────────────────────────
-    try {
-      const cached = await modelsDir.getFileHandle(filename);
-      const file = await cached.getFile();
-      if (file.size > 1000 && (total === 0 || Math.abs(file.size - total) < total * 0.01)) {
-        progressCb(1, file.size, file.size);
-        this.opts.onProgress?.(1, 'cached', file.size, file.size);
-        return file.stream() as ReadableStream<Uint8Array>;
-      }
-      // Corrupt cache (0 bytes or wrong size) — remove and re-download
-      await modelsDir.removeEntry(filename).catch(() => {});
-      try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* OK */ }
-    } catch {
-      // Cache miss
-    }
-    // ── Network download (retry on 503) ───────────────────────────────
-    let response: Response | null = null;
-    for (let attempt = 0; attempt < 3; attempt++) {
-      response = await fetch(url);
-      if (response.ok) break;
-      if (response.status === 503 && attempt < 2) {
-        const wait = (attempt + 1) * 5000;
-        this.opts.onProgress?.(0, `retry in ${wait / 1000}s (503)`, 0, total);
-        await new Promise(r => setTimeout(r, wait));
-        continue;
-      }
-      throw new Error(`Download failed: ${response.status} ${response.statusText}`);
-    }
-    if (!response!.ok) throw new Error('Download failed after retries');
-    if (!response!.body) throw new Error('Response body is null');
-    const [streamForConsumer, streamForCache] = response!.body!.tee();
-    // Background OPFS cache (fire-and-forget)
-    (async () => {
-      try {
-        const handle = await modelsDir.getFileHandle(filename, { create: true });
-        const writable = await handle.createWritable();
-        await streamForCache.pipeTo(writable);
-      } catch {
-        try { await modelsDir.removeEntry(filename).catch(() => {}); } catch {}
-      }
-    })();
-    // Progress stream using known size
-    let loaded = 0;
-    const progressTransform = new TransformStream<Uint8Array, Uint8Array>({
-      transform(chunk, controller) {
-        loaded += chunk.length;
-        progressCb(total > 0 ? loaded / total : 0, loaded, total);
-        controller.enqueue(chunk);
-      },
-      flush() {
-        progressCb(1, total, total);
-      },
-    });
-    return streamForConsumer.pipeThrough(progressTransform);
-  }
   async chat(
     messages: ChatMessage[],
     tools: ProviderTool[],
@@ -260,53 +193,108 @@ export class WasmProvider implements LLMProvider {
     // even after our busy guard clears, because GPU resources release asynchronously.
     for (let attempt = 0; attempt < 5; attempt++) {
       try {
-        let lastToken = '';
-        let repeatCount = 0;
         const MAX_REPEATS = 20;
         const TOOL_CALL_MAX_CHARS = 3000;
-        const result = await this.inference.generateResponse(prompt, (partialResult: string, _done: boolean) => {
-          if (options?.signal?.aborted) {
-            this.inference?.cancelProcessing();
+        // ── Chrome M4 memory leak workaround (MediaPipe #6270) ─────────────
+        // Rather than accumulating chunks directly in a closure over the
+        // ProgressListener callback — which pins references and leaks on
+        // Chrome/Mac M4 — we bridge the callback into a ReadableStream and
+        // consume it via a ReadableStreamDefaultReader. Each chunk is fully
+        // processed and released before the next `await reader.read()`, which
+        // lets the GC reclaim intermediate strings between chunks.
+        const inference = this.inference;
+        const signal = options?.signal;
+        const streamControllerRef: { current: ReadableStreamDefaultController<string> | null } = { current: null };
+        const tokenStream = new ReadableStream<string>({
+          start(controller: ReadableStreamDefaultController<string>) {
+            streamControllerRef.current = controller;
+          },
+        });
+        const generationPromise = inference.generateResponse(prompt, (partialResult: string, done: boolean) => {
+          if (signal?.aborted) {
+            inference?.cancelProcessing();
+            try { streamControllerRef.current?.close(); } catch {}
             return;
           }
-          // Detect infinite repetition loop (e.g. Gemma repeating 't' 150 times)
-          if (partialResult === lastToken) {
-            repeatCount++;
-            if (repeatCount > MAX_REPEATS) {
-              this.inference?.cancelProcessing();
-              return;
-            }
-          } else {
-            lastToken = partialResult;
-            repeatCount = 0;
+          try { streamControllerRef.current?.enqueue(partialResult); } catch {}
+          if (done) {
+            try { streamControllerRef.current?.close(); } catch {}
           }
-          fullText += partialResult;
-          tokenCount++;
-          options?.onToken?.(partialResult);
-          // Early detect and strip fake tool_response in streaming
-          if (fullText.includes('<|tool_response>') && fullText.includes('<tool_call|>')) {
-            const lastCallEnd = fullText.lastIndexOf('<tool_call|>');
-            const responseStart = fullText.indexOf('<|tool_response>', lastCallEnd);
-            if (responseStart !== -1) {
-              // Gemma is hallucinating a response — cancel immediately
+        });
+        const reader: ReadableStreamDefaultReader<string> = tokenStream.getReader();
+        let lastToken = '';
+        let repeatCount = 0;
+        let cancelledEarly = false;
+        try {
+          while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+            const partialResult = value ?? '';
+            // Detect infinite repetition loop (e.g. Gemma repeating 't' 150 times)
+            if (partialResult === lastToken) {
+              repeatCount++;
+              if (repeatCount > MAX_REPEATS) {
+                this.inference?.cancelProcessing();
+                cancelledEarly = true;
+                break;
+              }
+            } else {
+              lastToken = partialResult;
+              repeatCount = 0;
+            }
+            fullText += partialResult;
+            tokenCount++;
+            options?.onToken?.(partialResult);
+            // Early detect and strip fake tool_response in streaming
+            if (fullText.includes('<|tool_response>') && fullText.includes('<tool_call|>')) {
+              const lastCallEnd = fullText.lastIndexOf('<tool_call|>');
+              const responseStart = fullText.indexOf('<|tool_response>', lastCallEnd);
+              if (responseStart !== -1) {
+                // Gemma is hallucinating a response — cancel immediately
+                this.inference?.cancelProcessing();
+                // Truncate to last valid tool call
+                fullText = fullText.slice(0, lastCallEnd + '<tool_call|>'.length);
+                cancelledEarly = true;
+                break;
+              }
+            }
+            // Safety: if text grows way too long, force cancel
+            if (fullText.length > TOOL_CALL_MAX_CHARS * 2) {
               this.inference?.cancelProcessing();
-              // Truncate to last valid tool call
-              fullText = fullText.slice(0, lastCallEnd + '<tool_call|>'.length);
-              return;
+              cancelledEarly = true;
+              break;
             }
           }
-          // Safety: if text grows way too long, force cancel
-          if (fullText.length > TOOL_CALL_MAX_CHARS * 2) {
-            this.inference?.cancelProcessing();
-            return;
+        } finally {
+          try { reader.releaseLock(); } catch {}
+          if (cancelledEarly) {
+            try { streamControllerRef.current?.close(); } catch {}
           }
-        });
+        }
+        const result = await generationPromise.catch(() => '');
         // Fallback if the streaming callback didn't accumulate
         if (result && !fullText) fullText = result;
+        // Pipeline-trace event: why did generation stop?
+        // - cancelled: we aborted mid-stream (repetition loop, fake tool_response, oversized, abort signal)
+        // - maxTokens: hit the maxTokens ceiling passed via options
+        // - eos: natural end-of-stream from MediaPipe (model emitted EOS)
+        const endReason = cancelledEarly
+          ? 'cancelled'
+          : tokenCount >= (options?.maxTokens ?? 4096)
+            ? 'maxTokens'
+            : 'eos';
+        const tail = fullText.slice(-80).replace(/\n/g, '\\n');
+        console.log(`[wasm] end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`);
+        this.trace?.push('generate', 'wasm', `end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`, endReason === 'eos' ? 'ok' : 'warn');
         break; // Success — exit retry loop
       } catch (err) {
         const msg = String(err);
@@ -325,9 +313,15 @@ export class WasmProvider implements LLMProvider {
       }
     }
-    // Strip any standalone <|tool_response> blocks in model output
-    // (the model should never generate these — they're injected by the framework)
-    fullText = fullText.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '');
+    // Strip hallucinated framework tokens the model should never emit on its own:
+    // - <|tool_response>...<tool_response|>  (injected by the framework, never generated)
+    // - <|channel>thought...<channel|>      (ghost thought channels if Gemma emits one
+    //   without <|think|> activation — stray artefacts from pretraining)
+    // - <|think|>                            (stray thinking-mode markers)
+    fullText = fullText
+      .replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '')
+      .replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '')
+      .replace(/<\|think\|>/g, '');
     const latencyMs = performance.now() - t0;
@@ -339,37 +333,7 @@ export class WasmProvider implements LLMProvider {
       }
     } catch {}
-    const content: ContentBlock[] = [];
-    const START_TAG = '<|tool_call>call:';
-    const END_TAG = '<tool_call|>';
-    let foundToolCall = false;
-    let scanIdx = 0;
-    while (true) {
-      const startIdx = fullText.indexOf(START_TAG, scanIdx);
-      if (startIdx === -1) break;
-      const nameStart = startIdx + START_TAG.length;
-      const braceIdx = fullText.indexOf('{', nameStart);
-      if (braceIdx === -1) break;
-      const name = fullText.slice(nameStart, braceIdx);
-      if (!/^\w+$/.test(name)) { scanIdx = nameStart; continue; }
-      const argsBlock = WasmProvider.extractArgsBlock(fullText, braceIdx);
-      if (!argsBlock) break;
-      const afterArgs = braceIdx + argsBlock.length;
-      if (!fullText.startsWith(END_TAG, afterArgs)) { scanIdx = afterArgs; continue; }
-      foundToolCall = true;
-      content.push({
-        type: 'tool_use',
-        id: `tc-${Date.now()}-${content.length}`,
-        name,
-        input: WasmProvider.parseGemmaArgs(argsBlock),
-      });
-      scanIdx = afterArgs + END_TAG.length;
-    }
-    if (!foundToolCall) {
-      const cleanText = fullText.replace(/<\|tool_call>.*?<tool_call\|>/g, '').trim();
-      content.push({ type: 'text', text: cleanText || fullText });
-    }
+    const { content, foundToolCall } = parseToolCalls(fullText, 'gemma-native');
     return {
       content,
@@ -386,103 +350,30 @@ export class WasmProvider implements LLMProvider {
     };
   }
-  /**
-   * Extract a brace-balanced {...} block starting at text[startIdx].
-   * Ignores { and } that appear inside <|"|>...<|"|> string delimiters.
-   * Returns the full block including outer braces, or null if unbalanced.
-   */
-  private static extractArgsBlock(text: string, startIdx: number): string | null {
-    if (text[startIdx] !== '{') return null;
-    const DELIM = '<|"|>';
-    let depth = 0;
-    let inString = false;
-    let i = startIdx;
-    while (i < text.length) {
-      if (text.startsWith(DELIM, i)) {
-        inString = !inString;
-        i += DELIM.length;
-        continue;
-      }
-      if (!inString) {
-        if (text[i] === '{') depth++;
-        else if (text[i] === '}') {
-          depth--;
-          if (depth === 0) return text.slice(startIdx, i + 1);
-        }
-      }
-      i++;
-    }
-    return null;
-  }
-  /**
-   * Parse Gemma native tool call args by normalizing to JSON in one pass.
-   *   1. `<|"|>...<|"|>`      → `"..."`          (string delimiters)
-   *   2. Unquoted keys         → `"quoted":`      (valid JSON keys)
-   * Then `JSON.parse` handles nesting, arrays, numbers, booleans, null natively.
-   * Example: {schema:<|"|>senat<|"|>,params:{data:[{id:1}]}} → {schema:"senat",params:{data:[{id:1}]}}
-   */
-  private static parseGemmaArgs(raw: string): Record<string, unknown> {
-    const jsonStr = raw
-      .replace(/<\|"\|>([\s\S]*?)<\|"\|>/g, (_, s) => JSON.stringify(s))
-      .replace(/([{,])\s*([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":');
-    try {
-      const parsed = JSON.parse(jsonStr);
-      return (typeof parsed === 'object' && parsed !== null) ? parsed : {};
-    } catch {
-      return {};
-    }
-  }
-  /**
-   * Format a value for Gemma 4 native tool syntax.
-   * Backward-compat wrapper — delegates to the module-level `gemmaValue`
-   * exported from `tool-layers.ts` so the logic is shared with the
-   * system-prompt declaration block.
-   * @internal — used by formatToolCall / formatToolResponse
-   */
+  /** @internal — delegates to `gemmaValue` from prompts/gemma4-prompt-builder. */
   static gemmaValue(v: unknown): string {
     return gemmaValue(v);
   }
-  /**
-   * Format a tool declaration in Gemma 4 native syntax.
-   * Backward-compat wrapper — delegates to `formatGemmaToolDeclaration`
-   * exported from `tool-layers.ts`.
-   * @internal
-   */
+  /** @internal — delegates to `formatGemmaToolDeclaration` from prompts/gemma4-prompt-builder. */
   static formatToolDeclaration(tool: ProviderTool): string {
     return formatGemmaToolDeclaration(tool);
   }
-  /**
-   * Format a tool response in Gemma 4 native syntax.
-   * @internal — used by buildGemmaPrompt
-   */
-  static formatToolResponse(toolName: string, content: string): string {
-    const q = '<|"|>';
-    // Try to parse as JSON for structured output
-    try {
-      const parsed = JSON.parse(content);
-      return `<|tool_response>response:${toolName}${gemmaValue(parsed)}<tool_response|>`;
-    } catch {
-      // Plain string result
-      return `<|tool_response>response:${toolName}{result:${q}${content}${q}}<tool_response|>`;
-    }
+  /** @internal — delegates to `formatToolResponse` from prompts/gemma4-prompt-builder. */
+  static formatToolResponse(content: string): string {
+    return formatToolResponse(content);
   }
-  /**
-   * Format a tool call in Gemma 4 native syntax.
-   * @internal — used by buildGemmaPrompt
-   */
+  /** @internal — delegates to `formatToolCall` from prompts/gemma4-prompt-builder. */
   static formatToolCall(name: string, input: Record<string, unknown>): string {
-    const entries = Object.entries(input)
-      .map(([k, v]) => `${k}:${gemmaValue(v)}`);
-    return `<|tool_call>call:${name}{${entries.join(',')}}<tool_call|>`;
+    return formatToolCall(name, input);
   }
-  private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string): string {
-    return buildGemmaPrompt({ systemPrompt, tools, messages });
+  private buildPrompt(messages: ChatMessage[], _tools: ProviderTool[], systemPrompt?: string): string {
+    // `_tools` is intentionally ignored — tool declarations are embedded inline
+    // inside `systemPrompt` via buildSystemPromptWithAliases({ providerKind: 'gemma' }).
+    return buildGemmaPrompt({ systemPrompt, messages });
   }
   destroy() {
@@ -493,98 +384,7 @@ export class WasmProvider implements LLMProvider {
   }
 }
-/**
- * Input for {@link buildGemmaPrompt}.
- *
- * Pass `messages: []` (or omit it) to produce a preview of the system/tool
- * portion of the prompt without any conversation turns — useful for debug
- * panels that want to display the exact transformed prompt Gemma will see.
- */
-export interface BuildGemmaPromptInput {
-  /** System prompt — expected to already be in Gemma native syntax (use
-   *  `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`).
-   *  The tool declarations are embedded inside this system prompt — they are
-   *  NOT re-emitted from `tools` by this function anymore. */
-  systemPrompt?: string;
-  /** Provider tools — used only for message serialization (tool_use / tool_result
-   *  ID → name mapping). Declarations live inside `systemPrompt`. */
-  tools: ProviderTool[];
-  /** Conversation turns. Defaults to `[]` (preview mode — no `<|turn>` user/model blocks). */
-  messages?: ChatMessage[];
-}
-/**
- * Build the final Gemma 4 native prompt string from a system prompt, a set of
- * provider tools, and a conversation history.
- *
- * This is the exact transformation applied by {@link WasmProvider} before
- * calling LlmInference — exported so UI debug panels can display the prompt
- * as it will actually be sent to the model.
- *
- * The system prompt is expected to already be in Gemma native syntax AND to
- * already embed the `<|tool>declaration>` blocks inline — build it with
- * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`.
- *
- * Transformations applied:
- * 1. Wraps the system prompt in `<|turn>system\n<|think|>\n...<turn|>` — this
- *    activates Gemma 4's native thinking mode so the model emits its internal
- *    reasoning inside a `<|channel>thought\n...<channel|>` block which is then
- *    stripped from the final user-visible output (see the streaming cleanup in
- *    {@link WasmProvider}).
- * 2. Serializes messages as `<|turn>user|model\n...<turn|>` with tool_use →
- *    `<|tool_call>`, tool_result → `<|tool_response>`.
- * 3. Terminates with an open `<|turn>model\n` for generation.
- * 4. No explicit `<bos>` — LlmInference adds it via the tokenizer.
- */
-export function buildGemmaPrompt(input: BuildGemmaPromptInput): string {
-  const { systemPrompt, messages = [] } = input;
-  // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
-  const toolNameById = new Map<string, string>();
-  for (const msg of messages) {
-    if (typeof msg.content !== 'string') {
-      for (const block of msg.content as ContentBlock[]) {
-        if (block.type === 'tool_use') {
-          const b = block as { type: 'tool_use'; id: string; name: string };
-          toolNameById.set(b.id, b.name);
-        }
-      }
-    }
-  }
-  const parts: string[] = [];
-  // Gemma 4 native structure: the system prompt already embeds tool
-  // declarations inline at each STEP (built via buildSystemPromptWithAliases
-  // with providerKind: 'gemma').
-  if (systemPrompt) {
-    parts.push(`<|turn>system\n${systemPrompt}\n<turn|>`);
-  }
-  for (const msg of messages) {
-    const role = msg.role === 'assistant' ? 'model' : 'user';
-    if (typeof msg.content === 'string') {
-      parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
-    } else {
-      // Serialize all block types in Gemma 4 native format
-      const segments: string[] = [];
-      for (const block of msg.content as ContentBlock[]) {
-        if (block.type === 'text') {
-          segments.push((block as { type: 'text'; text: string }).text);
-        } else if (block.type === 'tool_use') {
-          const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
-          segments.push(WasmProvider.formatToolCall(b.name, b.input));
-        } else if (block.type === 'tool_result') {
-          const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
-          const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
-          segments.push(WasmProvider.formatToolResponse(toolName, b.content));
-        }
-      }
-      if (segments.length > 0) {
-        parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
-      }
-    }
-  }
-  parts.push('<|turn>model\n');
-  return parts.join('\n');
-}
+// BuildGemmaPromptInput and buildGemmaPrompt now live in
+// ../prompts/gemma4-prompt-builder.ts. Re-exported here for backward compat.
+export { buildGemmaPrompt } from '../prompts/gemma4-prompt-builder.js';
+export type { BuildGemmaPromptInput } from '../prompts/gemma4-prompt-builder.js';