@webmcp-auto-ui/agent 2.5.26 → 2.5.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,15 @@
5
5
  */
6
6
  import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, WasmModelId, ContentBlock } from '../types.js';
7
7
  import type { PipelineTrace } from '../pipeline-trace.js';
8
- import { formatGemmaToolDeclaration, gemmaValue } from '../tool-layers.js';
8
+ import {
9
+ buildGemmaPrompt,
10
+ formatGemmaToolDeclaration,
11
+ formatToolCall,
12
+ formatToolResponse,
13
+ gemmaValue,
14
+ } from '../prompts/gemma4-prompt-builder.js';
15
+ import { parseToolCalls } from '../prompts/tool-call-parsers.js';
16
+ import { loadOrDownloadModel } from '../util/opfs-cache.js';
9
17
 
10
18
  export type WasmStatus = 'idle' | 'loading' | 'ready' | 'error';
11
19
 
@@ -64,7 +72,6 @@ export class WasmProvider implements LLMProvider {
64
72
 
65
73
  const modelInfo = LITERT_MODELS[this.model] ?? LITERT_MODELS['gemma-e2b'];
66
74
  const { repo, file, size: expectedSize } = modelInfo;
67
- const url = `https://huggingface.co/${repo}/resolve/main/${file}`;
68
75
 
69
76
  this.opts.onProgress?.(0, 'downloading', 0, expectedSize);
70
77
 
@@ -73,7 +80,15 @@ export class WasmProvider implements LLMProvider {
73
80
  'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai@0.10.27/wasm',
74
81
  );
75
82
 
76
- const modelStream = await this.getModelStream(url, file, expectedSize);
83
+ const streams = await loadOrDownloadModel(
84
+ repo,
85
+ [{ path: file, expectedSize }],
86
+ (progress) => {
87
+ this.opts.onProgress?.(progress.totalProgress, progress.status, progress.loaded, progress.total);
88
+ },
89
+ );
90
+ const modelStream = streams.get(file);
91
+ if (!modelStream) throw new Error(`Model file missing: ${file}`);
77
92
 
78
93
  this.opts.onProgress?.(1, 'initializing', 0, 0);
79
94
 
@@ -94,88 +109,6 @@ export class WasmProvider implements LLMProvider {
94
109
  this.setStatus('ready');
95
110
  }
96
111
 
97
- /**
98
- * Download model with OPFS caching, returning a ReadableStream.
99
- * The stream reader is passed directly to LlmInference as modelAssetBuffer
100
- * to avoid buffering multi-GB models entirely in RAM.
101
- */
102
- private async getModelStream(
103
- url: string,
104
- filename: string,
105
- knownSize: number,
106
- ): Promise<ReadableStream<Uint8Array>> {
107
- const total = knownSize;
108
- const progressCb = (p: number, loaded: number, t: number) => {
109
- this.opts.onProgress?.(p, 'downloading', loaded, t);
110
- };
111
-
112
- const root = await navigator.storage.getDirectory();
113
- const modelsDir = await root.getDirectoryHandle('webmcp-models', { create: true });
114
-
115
- // ── Clean orphan .crswap files (Chrome WritableStream leftovers) ──
116
- try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* no swap — OK */ }
117
-
118
- // ── OPFS cache hit ───────────────────────────────────────────────
119
- try {
120
- const cached = await modelsDir.getFileHandle(filename);
121
- const file = await cached.getFile();
122
- if (file.size > 1000 && (total === 0 || Math.abs(file.size - total) < total * 0.01)) {
123
- progressCb(1, file.size, file.size);
124
- this.opts.onProgress?.(1, 'cached', file.size, file.size);
125
- return file.stream() as ReadableStream<Uint8Array>;
126
- }
127
- // Corrupt cache (0 bytes or wrong size) — remove and re-download
128
- await modelsDir.removeEntry(filename).catch(() => {});
129
- try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* OK */ }
130
- } catch {
131
- // Cache miss
132
- }
133
-
134
- // ── Network download (retry on 503) ───────────────────────────────
135
- let response: Response | null = null;
136
- for (let attempt = 0; attempt < 3; attempt++) {
137
- response = await fetch(url);
138
- if (response.ok) break;
139
- if (response.status === 503 && attempt < 2) {
140
- const wait = (attempt + 1) * 5000;
141
- this.opts.onProgress?.(0, `retry in ${wait / 1000}s (503)`, 0, total);
142
- await new Promise(r => setTimeout(r, wait));
143
- continue;
144
- }
145
- throw new Error(`Download failed: ${response.status} ${response.statusText}`);
146
- }
147
- if (!response!.ok) throw new Error('Download failed after retries');
148
- if (!response!.body) throw new Error('Response body is null');
149
-
150
- const [streamForConsumer, streamForCache] = response!.body!.tee();
151
-
152
- // Background OPFS cache (fire-and-forget)
153
- (async () => {
154
- try {
155
- const handle = await modelsDir.getFileHandle(filename, { create: true });
156
- const writable = await handle.createWritable();
157
- await streamForCache.pipeTo(writable);
158
- } catch {
159
- try { await modelsDir.removeEntry(filename).catch(() => {}); } catch {}
160
- }
161
- })();
162
-
163
- // Progress stream using known size
164
- let loaded = 0;
165
- const progressTransform = new TransformStream<Uint8Array, Uint8Array>({
166
- transform(chunk, controller) {
167
- loaded += chunk.length;
168
- progressCb(total > 0 ? loaded / total : 0, loaded, total);
169
- controller.enqueue(chunk);
170
- },
171
- flush() {
172
- progressCb(1, total, total);
173
- },
174
- });
175
-
176
- return streamForConsumer.pipeThrough(progressTransform);
177
- }
178
-
179
112
  async chat(
180
113
  messages: ChatMessage[],
181
114
  tools: ProviderTool[],
@@ -260,53 +193,108 @@ export class WasmProvider implements LLMProvider {
260
193
  // even after our busy guard clears, because GPU resources release asynchronously.
261
194
  for (let attempt = 0; attempt < 5; attempt++) {
262
195
  try {
263
- let lastToken = '';
264
- let repeatCount = 0;
265
196
  const MAX_REPEATS = 20;
266
197
  const TOOL_CALL_MAX_CHARS = 3000;
267
198
 
268
- const result = await this.inference.generateResponse(prompt, (partialResult: string, _done: boolean) => {
269
- if (options?.signal?.aborted) {
270
- this.inference?.cancelProcessing();
199
+ // ── Chrome M4 memory leak workaround (MediaPipe #6270) ─────────────
200
+ // Rather than accumulating chunks directly in a closure over the
201
+ // ProgressListener callback — which pins references and leaks on
202
+ // Chrome/Mac M4 — we bridge the callback into a ReadableStream and
203
+ // consume it via a ReadableStreamDefaultReader. Each chunk is fully
204
+ // processed and released before the next `await reader.read()`, which
205
+ // lets the GC reclaim intermediate strings between chunks.
206
+ const inference = this.inference;
207
+ const signal = options?.signal;
208
+ const streamControllerRef: { current: ReadableStreamDefaultController<string> | null } = { current: null };
209
+ const tokenStream = new ReadableStream<string>({
210
+ start(controller: ReadableStreamDefaultController<string>) {
211
+ streamControllerRef.current = controller;
212
+ },
213
+ });
214
+
215
+ const generationPromise = inference.generateResponse(prompt, (partialResult: string, done: boolean) => {
216
+ if (signal?.aborted) {
217
+ inference?.cancelProcessing();
218
+ try { streamControllerRef.current?.close(); } catch {}
271
219
  return;
272
220
  }
273
- // Detect infinite repetition loop (e.g. Gemma repeating 't' 150 times)
274
- if (partialResult === lastToken) {
275
- repeatCount++;
276
- if (repeatCount > MAX_REPEATS) {
277
- this.inference?.cancelProcessing();
278
- return;
279
- }
280
- } else {
281
- lastToken = partialResult;
282
- repeatCount = 0;
221
+ try { streamControllerRef.current?.enqueue(partialResult); } catch {}
222
+ if (done) {
223
+ try { streamControllerRef.current?.close(); } catch {}
283
224
  }
284
- fullText += partialResult;
285
- tokenCount++;
286
- options?.onToken?.(partialResult);
287
-
288
- // Early detect and strip fake tool_response in streaming
289
- if (fullText.includes('<|tool_response>') && fullText.includes('<tool_call|>')) {
290
- const lastCallEnd = fullText.lastIndexOf('<tool_call|>');
291
- const responseStart = fullText.indexOf('<|tool_response>', lastCallEnd);
292
- if (responseStart !== -1) {
293
- // Gemma is hallucinating a response — cancel immediately
225
+ });
226
+
227
+ const reader: ReadableStreamDefaultReader<string> = tokenStream.getReader();
228
+ let lastToken = '';
229
+ let repeatCount = 0;
230
+ let cancelledEarly = false;
231
+ try {
232
+ while (true) {
233
+ const { value, done } = await reader.read();
234
+ if (done) break;
235
+ const partialResult = value ?? '';
236
+
237
+ // Detect infinite repetition loop (e.g. Gemma repeating 't' 150 times)
238
+ if (partialResult === lastToken) {
239
+ repeatCount++;
240
+ if (repeatCount > MAX_REPEATS) {
241
+ this.inference?.cancelProcessing();
242
+ cancelledEarly = true;
243
+ break;
244
+ }
245
+ } else {
246
+ lastToken = partialResult;
247
+ repeatCount = 0;
248
+ }
249
+ fullText += partialResult;
250
+ tokenCount++;
251
+ options?.onToken?.(partialResult);
252
+
253
+ // Early detect and strip fake tool_response in streaming
254
+ if (fullText.includes('<|tool_response>') && fullText.includes('<tool_call|>')) {
255
+ const lastCallEnd = fullText.lastIndexOf('<tool_call|>');
256
+ const responseStart = fullText.indexOf('<|tool_response>', lastCallEnd);
257
+ if (responseStart !== -1) {
258
+ // Gemma is hallucinating a response — cancel immediately
259
+ this.inference?.cancelProcessing();
260
+ // Truncate to last valid tool call
261
+ fullText = fullText.slice(0, lastCallEnd + '<tool_call|>'.length);
262
+ cancelledEarly = true;
263
+ break;
264
+ }
265
+ }
266
+
267
+ // Safety: if text grows way too long, force cancel
268
+ if (fullText.length > TOOL_CALL_MAX_CHARS * 2) {
294
269
  this.inference?.cancelProcessing();
295
- // Truncate to last valid tool call
296
- fullText = fullText.slice(0, lastCallEnd + '<tool_call|>'.length);
297
- return;
270
+ cancelledEarly = true;
271
+ break;
298
272
  }
299
273
  }
300
-
301
- // Safety: if text grows way too long, force cancel
302
- if (fullText.length > TOOL_CALL_MAX_CHARS * 2) {
303
- this.inference?.cancelProcessing();
304
- return;
274
+ } finally {
275
+ try { reader.releaseLock(); } catch {}
276
+ if (cancelledEarly) {
277
+ try { streamControllerRef.current?.close(); } catch {}
305
278
  }
306
- });
279
+ }
307
280
 
281
+ const result = await generationPromise.catch(() => '');
308
282
  // Fallback if the streaming callback didn't accumulate
309
283
  if (result && !fullText) fullText = result;
284
+
285
+ // Pipeline-trace event: why did generation stop?
286
+ // - cancelled: we aborted mid-stream (repetition loop, fake tool_response, oversized, abort signal)
287
+ // - maxTokens: hit the maxTokens ceiling passed via options
288
+ // - eos: natural end-of-stream from MediaPipe (model emitted EOS)
289
+ const endReason = cancelledEarly
290
+ ? 'cancelled'
291
+ : tokenCount >= (options?.maxTokens ?? 4096)
292
+ ? 'maxTokens'
293
+ : 'eos';
294
+ const tail = fullText.slice(-80).replace(/\n/g, '\\n');
295
+ console.log(`[wasm] end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`);
296
+ this.trace?.push('generate', 'wasm', `end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`, endReason === 'eos' ? 'ok' : 'warn');
297
+
310
298
  break; // Success — exit retry loop
311
299
  } catch (err) {
312
300
  const msg = String(err);
@@ -325,9 +313,15 @@ export class WasmProvider implements LLMProvider {
325
313
  }
326
314
  }
327
315
 
328
- // Strip any standalone <|tool_response> blocks in model output
329
- // (the model should never generate these — they're injected by the framework)
330
- fullText = fullText.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '');
316
+ // Strip hallucinated framework tokens the model should never emit on its own:
317
+ // - <|tool_response>...<tool_response|> (injected by the framework, never generated)
318
+ // - <|channel>thought...<channel|> (ghost thought channels if Gemma emits one
319
+ // without <|think|> activation — stray artefacts from pretraining)
320
+ // - <|think|> (stray thinking-mode markers)
321
+ fullText = fullText
322
+ .replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '')
323
+ .replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '')
324
+ .replace(/<\|think\|>/g, '');
331
325
 
332
326
  const latencyMs = performance.now() - t0;
333
327
 
@@ -339,37 +333,7 @@ export class WasmProvider implements LLMProvider {
339
333
  }
340
334
  } catch {}
341
335
 
342
- const content: ContentBlock[] = [];
343
- const START_TAG = '<|tool_call>call:';
344
- const END_TAG = '<tool_call|>';
345
- let foundToolCall = false;
346
- let scanIdx = 0;
347
- while (true) {
348
- const startIdx = fullText.indexOf(START_TAG, scanIdx);
349
- if (startIdx === -1) break;
350
- const nameStart = startIdx + START_TAG.length;
351
- const braceIdx = fullText.indexOf('{', nameStart);
352
- if (braceIdx === -1) break;
353
- const name = fullText.slice(nameStart, braceIdx);
354
- if (!/^\w+$/.test(name)) { scanIdx = nameStart; continue; }
355
- const argsBlock = WasmProvider.extractArgsBlock(fullText, braceIdx);
356
- if (!argsBlock) break;
357
- const afterArgs = braceIdx + argsBlock.length;
358
- if (!fullText.startsWith(END_TAG, afterArgs)) { scanIdx = afterArgs; continue; }
359
- foundToolCall = true;
360
- content.push({
361
- type: 'tool_use',
362
- id: `tc-${Date.now()}-${content.length}`,
363
- name,
364
- input: WasmProvider.parseGemmaArgs(argsBlock),
365
- });
366
- scanIdx = afterArgs + END_TAG.length;
367
- }
368
-
369
- if (!foundToolCall) {
370
- const cleanText = fullText.replace(/<\|tool_call>.*?<tool_call\|>/g, '').trim();
371
- content.push({ type: 'text', text: cleanText || fullText });
372
- }
336
+ const { content, foundToolCall } = parseToolCalls(fullText, 'gemma-native');
373
337
 
374
338
  return {
375
339
  content,
@@ -386,103 +350,30 @@ export class WasmProvider implements LLMProvider {
386
350
  };
387
351
  }
388
352
 
389
- /**
390
- * Extract a brace-balanced {...} block starting at text[startIdx].
391
- * Ignores { and } that appear inside <|"|>...<|"|> string delimiters.
392
- * Returns the full block including outer braces, or null if unbalanced.
393
- */
394
- private static extractArgsBlock(text: string, startIdx: number): string | null {
395
- if (text[startIdx] !== '{') return null;
396
- const DELIM = '<|"|>';
397
- let depth = 0;
398
- let inString = false;
399
- let i = startIdx;
400
- while (i < text.length) {
401
- if (text.startsWith(DELIM, i)) {
402
- inString = !inString;
403
- i += DELIM.length;
404
- continue;
405
- }
406
- if (!inString) {
407
- if (text[i] === '{') depth++;
408
- else if (text[i] === '}') {
409
- depth--;
410
- if (depth === 0) return text.slice(startIdx, i + 1);
411
- }
412
- }
413
- i++;
414
- }
415
- return null;
416
- }
417
-
418
- /**
419
- * Parse Gemma native tool call args by normalizing to JSON in one pass.
420
- * 1. `<|"|>...<|"|>` → `"..."` (string delimiters)
421
- * 2. Unquoted keys → `"quoted":` (valid JSON keys)
422
- * Then `JSON.parse` handles nesting, arrays, numbers, booleans, null natively.
423
- * Example: {schema:<|"|>senat<|"|>,params:{data:[{id:1}]}} → {schema:"senat",params:{data:[{id:1}]}}
424
- */
425
- private static parseGemmaArgs(raw: string): Record<string, unknown> {
426
- const jsonStr = raw
427
- .replace(/<\|"\|>([\s\S]*?)<\|"\|>/g, (_, s) => JSON.stringify(s))
428
- .replace(/([{,])\s*([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":');
429
- try {
430
- const parsed = JSON.parse(jsonStr);
431
- return (typeof parsed === 'object' && parsed !== null) ? parsed : {};
432
- } catch {
433
- return {};
434
- }
435
- }
436
-
437
- /**
438
- * Format a value for Gemma 4 native tool syntax.
439
- * Backward-compat wrapper — delegates to the module-level `gemmaValue`
440
- * exported from `tool-layers.ts` so the logic is shared with the
441
- * system-prompt declaration block.
442
- * @internal — used by formatToolCall / formatToolResponse
443
- */
353
+ /** @internal — delegates to `gemmaValue` from prompts/gemma4-prompt-builder. */
444
354
  static gemmaValue(v: unknown): string {
445
355
  return gemmaValue(v);
446
356
  }
447
357
 
448
- /**
449
- * Format a tool declaration in Gemma 4 native syntax.
450
- * Backward-compat wrapper — delegates to `formatGemmaToolDeclaration`
451
- * exported from `tool-layers.ts`.
452
- * @internal
453
- */
358
+ /** @internal — delegates to `formatGemmaToolDeclaration` from prompts/gemma4-prompt-builder. */
454
359
  static formatToolDeclaration(tool: ProviderTool): string {
455
360
  return formatGemmaToolDeclaration(tool);
456
361
  }
457
362
 
458
- /**
459
- * Format a tool response in Gemma 4 native syntax.
460
- * @internal — used by buildGemmaPrompt
461
- */
462
- static formatToolResponse(toolName: string, content: string): string {
463
- const q = '<|"|>';
464
- // Try to parse as JSON for structured output
465
- try {
466
- const parsed = JSON.parse(content);
467
- return `<|tool_response>response:${toolName}${gemmaValue(parsed)}<tool_response|>`;
468
- } catch {
469
- // Plain string result
470
- return `<|tool_response>response:${toolName}{result:${q}${content}${q}}<tool_response|>`;
471
- }
363
+ /** @internal — delegates to `formatToolResponse` from prompts/gemma4-prompt-builder. */
364
+ static formatToolResponse(content: string): string {
365
+ return formatToolResponse(content);
472
366
  }
473
367
 
474
- /**
475
- * Format a tool call in Gemma 4 native syntax.
476
- * @internal — used by buildGemmaPrompt
477
- */
368
+ /** @internal — delegates to `formatToolCall` from prompts/gemma4-prompt-builder. */
478
369
  static formatToolCall(name: string, input: Record<string, unknown>): string {
479
- const entries = Object.entries(input)
480
- .map(([k, v]) => `${k}:${gemmaValue(v)}`);
481
- return `<|tool_call>call:${name}{${entries.join(',')}}<tool_call|>`;
370
+ return formatToolCall(name, input);
482
371
  }
483
372
 
484
- private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string): string {
485
- return buildGemmaPrompt({ systemPrompt, tools, messages });
373
+ private buildPrompt(messages: ChatMessage[], _tools: ProviderTool[], systemPrompt?: string): string {
374
+ // `_tools` is intentionally ignored — tool declarations are embedded inline
375
+ // inside `systemPrompt` via buildSystemPromptWithAliases({ providerKind: 'gemma' }).
376
+ return buildGemmaPrompt({ systemPrompt, messages });
486
377
  }
487
378
 
488
379
  destroy() {
@@ -493,98 +384,7 @@ export class WasmProvider implements LLMProvider {
493
384
  }
494
385
  }
495
386
 
496
- /**
497
- * Input for {@link buildGemmaPrompt}.
498
- *
499
- * Pass `messages: []` (or omit it) to produce a preview of the system/tool
500
- * portion of the prompt without any conversation turns — useful for debug
501
- * panels that want to display the exact transformed prompt Gemma will see.
502
- */
503
- export interface BuildGemmaPromptInput {
504
- /** System prompt — expected to already be in Gemma native syntax (use
505
- * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`).
506
- * The tool declarations are embedded inside this system prompt — they are
507
- * NOT re-emitted from `tools` by this function anymore. */
508
- systemPrompt?: string;
509
- /** Provider tools — used only for message serialization (tool_use / tool_result
510
- * ID → name mapping). Declarations live inside `systemPrompt`. */
511
- tools: ProviderTool[];
512
- /** Conversation turns. Defaults to `[]` (preview mode — no `<|turn>` user/model blocks). */
513
- messages?: ChatMessage[];
514
- }
515
-
516
- /**
517
- * Build the final Gemma 4 native prompt string from a system prompt, a set of
518
- * provider tools, and a conversation history.
519
- *
520
- * This is the exact transformation applied by {@link WasmProvider} before
521
- * calling LlmInference — exported so UI debug panels can display the prompt
522
- * as it will actually be sent to the model.
523
- *
524
- * The system prompt is expected to already be in Gemma native syntax AND to
525
- * already embed the `<|tool>declaration>` blocks inline — build it with
526
- * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`.
527
- *
528
- * Transformations applied:
529
- * 1. Wraps the system prompt in `<|turn>system\n<|think|>\n...<turn|>` — this
530
- * activates Gemma 4's native thinking mode so the model emits its internal
531
- * reasoning inside a `<|channel>thought\n...<channel|>` block which is then
532
- * stripped from the final user-visible output (see the streaming cleanup in
533
- * {@link WasmProvider}).
534
- * 2. Serializes messages as `<|turn>user|model\n...<turn|>` with tool_use →
535
- * `<|tool_call>`, tool_result → `<|tool_response>`.
536
- * 3. Terminates with an open `<|turn>model\n` for generation.
537
- * 4. No explicit `<bos>` — LlmInference adds it via the tokenizer.
538
- */
539
- export function buildGemmaPrompt(input: BuildGemmaPromptInput): string {
540
- const { systemPrompt, messages = [] } = input;
541
-
542
- // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
543
- const toolNameById = new Map<string, string>();
544
- for (const msg of messages) {
545
- if (typeof msg.content !== 'string') {
546
- for (const block of msg.content as ContentBlock[]) {
547
- if (block.type === 'tool_use') {
548
- const b = block as { type: 'tool_use'; id: string; name: string };
549
- toolNameById.set(b.id, b.name);
550
- }
551
- }
552
- }
553
- }
554
-
555
- const parts: string[] = [];
556
-
557
- // Gemma 4 native structure: the system prompt already embeds tool
558
- // declarations inline at each STEP (built via buildSystemPromptWithAliases
559
- // with providerKind: 'gemma').
560
- if (systemPrompt) {
561
- parts.push(`<|turn>system\n${systemPrompt}\n<turn|>`);
562
- }
563
-
564
- for (const msg of messages) {
565
- const role = msg.role === 'assistant' ? 'model' : 'user';
566
- if (typeof msg.content === 'string') {
567
- parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
568
- } else {
569
- // Serialize all block types in Gemma 4 native format
570
- const segments: string[] = [];
571
- for (const block of msg.content as ContentBlock[]) {
572
- if (block.type === 'text') {
573
- segments.push((block as { type: 'text'; text: string }).text);
574
- } else if (block.type === 'tool_use') {
575
- const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
576
- segments.push(WasmProvider.formatToolCall(b.name, b.input));
577
- } else if (block.type === 'tool_result') {
578
- const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
579
- const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
580
- segments.push(WasmProvider.formatToolResponse(toolName, b.content));
581
- }
582
- }
583
- if (segments.length > 0) {
584
- parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
585
- }
586
- }
587
- }
588
- parts.push('<|turn>model\n');
589
- return parts.join('\n');
590
- }
387
+ // BuildGemmaPromptInput and buildGemmaPrompt now live in
388
+ // ../prompts/gemma4-prompt-builder.ts. Re-exported here for backward compat.
389
+ export { buildGemmaPrompt } from '../prompts/gemma4-prompt-builder.js';
390
+ export type { BuildGemmaPromptInput } from '../prompts/gemma4-prompt-builder.js';