@webmcp-auto-ui/agent 2.5.24 → 2.5.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/package.json +1 -1
  2. package/src/autoui-server.ts +17 -0
  3. package/src/diagnostics.ts +6 -6
  4. package/src/discovery-cache.ts +17 -3
  5. package/src/index.ts +3 -3
  6. package/src/loop.ts +27 -19
  7. package/src/providers/wasm.ts +184 -330
  8. package/src/recipes/_generated.ts +273 -0
  9. package/src/recipes/canary-data.md +50 -0
  10. package/src/recipes/canary-display.md +99 -0
  11. package/src/recipes/canary-middle.md +32 -0
  12. package/src/recipes/hummingbird-data.md +32 -0
  13. package/src/recipes/hummingbird-display.md +36 -0
  14. package/src/recipes/hummingbird-middle.md +18 -0
  15. package/src/tool-layers.ts +303 -31
  16. package/src/types.ts +6 -1
  17. package/tests/loop.test.ts +2 -2
  18. package/src/providers/gemma.worker.legacy.ts +0 -123
  19. package/src/providers/litert.worker.ts +0 -294
  20. package/src/recipes/widgets/actions.md +0 -28
  21. package/src/recipes/widgets/alert.md +0 -27
  22. package/src/recipes/widgets/cards.md +0 -41
  23. package/src/recipes/widgets/carousel.md +0 -39
  24. package/src/recipes/widgets/chart-rich.md +0 -51
  25. package/src/recipes/widgets/chart.md +0 -32
  26. package/src/recipes/widgets/code.md +0 -21
  27. package/src/recipes/widgets/d3.md +0 -36
  28. package/src/recipes/widgets/data-table.md +0 -46
  29. package/src/recipes/widgets/gallery.md +0 -39
  30. package/src/recipes/widgets/grid-data.md +0 -57
  31. package/src/recipes/widgets/hemicycle.md +0 -43
  32. package/src/recipes/widgets/js-sandbox.md +0 -32
  33. package/src/recipes/widgets/json-viewer.md +0 -27
  34. package/src/recipes/widgets/kv.md +0 -31
  35. package/src/recipes/widgets/list.md +0 -24
  36. package/src/recipes/widgets/log.md +0 -39
  37. package/src/recipes/widgets/map.md +0 -49
  38. package/src/recipes/widgets/profile.md +0 -49
  39. package/src/recipes/widgets/recipe-browser.md +0 -102
  40. package/src/recipes/widgets/sankey.md +0 -54
  41. package/src/recipes/widgets/stat-card.md +0 -43
  42. package/src/recipes/widgets/stat.md +0 -35
  43. package/src/recipes/widgets/tags.md +0 -30
  44. package/src/recipes/widgets/text.md +0 -19
  45. package/src/recipes/widgets/timeline.md +0 -38
  46. package/src/recipes/widgets/trombinoscope.md +0 -39
@@ -5,12 +5,13 @@
5
5
  */
6
6
  import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, WasmModelId, ContentBlock } from '../types.js';
7
7
  import type { PipelineTrace } from '../pipeline-trace.js';
8
+ import { formatGemmaToolDeclaration, gemmaValue } from '../tool-layers.js';
8
9
 
9
10
  export type WasmStatus = 'idle' | 'loading' | 'ready' | 'error';
10
11
 
11
12
  export interface WasmProviderOptions {
12
13
  model?: WasmModelId;
13
- contextSize?: number; // MediaPipe maxTokens — default 4096
14
+ contextSize?: number; // MediaPipe maxTokens — default 32768
14
15
  onProgress?: (progress: number, status: string, loaded?: number, total?: number) => void;
15
16
  onStatusChange?: (status: WasmStatus) => void;
16
17
  }
@@ -23,6 +24,8 @@ const LITERT_MODELS: Record<string, { repo: string; file: string; size: number }
23
24
  export class WasmProvider implements LLMProvider {
24
25
  readonly name = 'wasm';
25
26
  readonly model: string;
27
+ /** Signals to the agent loop that the system prompt must be built in Gemma native syntax. */
28
+ readonly promptKind = 'gemma' as const;
26
29
 
27
30
  /** Optional pipeline trace — set externally to trace parsing strategy fallbacks */
28
31
  trace?: PipelineTrace;
@@ -83,7 +86,7 @@ export class WasmProvider implements LLMProvider {
83
86
  baseOptions: {
84
87
  modelAssetBuffer: modelStream.getReader() as unknown as Uint8Array,
85
88
  },
86
- maxTokens: this.opts.contextSize ?? 4096,
89
+ maxTokens: this.opts.contextSize ?? 32768,
87
90
  temperature: 1.0,
88
91
  topK: 64,
89
92
  });
@@ -176,7 +179,7 @@ export class WasmProvider implements LLMProvider {
176
179
  async chat(
177
180
  messages: ChatMessage[],
178
181
  tools: ProviderTool[],
179
- options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
182
+ options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
180
183
  ): Promise<LLMResponse> {
181
184
  if (this.status !== 'ready') await this.initialize();
182
185
  if (!this.inference) throw new Error('Model not initialized');
@@ -202,7 +205,7 @@ export class WasmProvider implements LLMProvider {
202
205
  private async _chat(
203
206
  messages: ChatMessage[],
204
207
  tools: ProviderTool[],
205
- options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string; maxTools?: number }
208
+ options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
206
209
  ): Promise<LLMResponse> {
207
210
  // Apply per-request options
208
211
  if (options?.maxTokens || options?.temperature || options?.topK) {
@@ -218,22 +221,22 @@ export class WasmProvider implements LLMProvider {
218
221
  }
219
222
 
220
223
  // Build Gemma chat prompt (Gemma 4 format with tool hints)
221
- let prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
224
+ let prompt = this.buildPrompt(messages, tools, options?.system);
222
225
 
223
226
  // Aggressive clipping: Gemma struggles with long conversations — dynamic cap based on context size
224
- const contextTokens = this.opts.contextSize ?? 4096;
225
- const MAX_MESSAGES = contextTokens <= 4096 ? 8 : contextTokens <= 8192 ? 16 : 32;
227
+ const contextTokens = this.opts.contextSize ?? 32768;
228
+ const MAX_MESSAGES = Math.max(4, Math.floor(contextTokens / 512));
226
229
  while (messages.length > MAX_MESSAGES) {
227
230
  messages = messages.slice(1);
228
231
  }
229
- prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
232
+ prompt = this.buildPrompt(messages, tools, options?.system);
230
233
 
231
234
  // Token-based clipping: if prompt is still too large, drop oldest messages
232
- const maxPromptTokens = (this.opts.contextSize ?? 4096) - 512;
235
+ const maxPromptTokens = (this.opts.contextSize ?? 32768) - 512;
233
236
  try {
234
237
  while (this.inference.sizeInTokens(prompt) > maxPromptTokens && messages.length > 1) {
235
238
  messages = messages.slice(1);
236
- prompt = this.buildPrompt(messages, tools, options?.system, options?.maxTools);
239
+ prompt = this.buildPrompt(messages, tools, options?.system);
237
240
  }
238
241
  } catch {
239
242
  // sizeInTokens not available — skip clipping
@@ -322,38 +325,10 @@ export class WasmProvider implements LLMProvider {
322
325
  }
323
326
  }
324
327
 
325
- // Clean up hallucinated content after tool calls.
326
- // Gemma often hallucinates fake <|tool_response> blocks after <tool_call|>.
327
- // Strategy: keep only the FIRST complete tool call, strip everything after.
328
- const firstCallStart = fullText.indexOf('<|tool_call>');
329
- if (firstCallStart !== -1) {
330
- const firstCallEnd = fullText.indexOf('<tool_call|>', firstCallStart);
331
- if (firstCallEnd !== -1) {
332
- const afterFirstCall = fullText.slice(firstCallEnd + '<tool_call|>'.length);
333
- // Check if there's a REAL second tool call (not preceded by a fake tool_response)
334
- const nextCallStart = afterFirstCall.indexOf('<|tool_call>');
335
- if (nextCallStart !== -1) {
336
- // Check if there's a fake tool_response between the two calls
337
- const betweenCalls = afterFirstCall.slice(0, nextCallStart);
338
- if (betweenCalls.includes('<|tool_response>') || betweenCalls.includes('<tool_response|>')) {
339
- // Fake chained response — truncate after first tool call
340
- fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
341
- }
342
- // Otherwise: legitimate multi-tool call, keep both
343
- } else {
344
- // No second tool call — truncate any trailing hallucination
345
- fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
346
- }
347
- }
348
- }
349
-
350
- // Also strip any standalone <|tool_response> blocks in model output
328
+ // Strip any standalone <|tool_response> blocks in model output
351
329
  // (the model should never generate these — they're injected by the framework)
352
330
  fullText = fullText.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '');
353
331
 
354
- // Strip thinking blocks — Gemma 4 wraps reasoning in <|channel>thought\n...<channel|>
355
- fullText = fullText.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '');
356
-
357
332
  const latencyMs = performance.now() - t0;
358
333
 
359
334
  // Use sizeInTokens for accurate token count if available
@@ -364,127 +339,34 @@ export class WasmProvider implements LLMProvider {
364
339
  }
365
340
  } catch {}
366
341
 
367
- // Parse tool calls — supports multiple formats:
368
- // 1. Gemma 4 native: <|tool_call>call:tool_name{key:<|"|>value<|"|>}<tool_call|>
369
- // 2. JSON format (legacy): <|tool_call>call:tool_name{"key":"value"}<tool_call|>
370
- // 3. Loose JSON: { "tool": "name", "args": {...} }
371
342
  const content: ContentBlock[] = [];
372
- const gemmaToolCallRe = /<\|tool_call>call:(\w+)(\{[^]*?\})<tool_call\|>/g;
373
- // Fallback: parenthesized format — call:name("arg1", {arg2})
374
- const parenToolCallRe = /<\|tool_call>call:(\w+)\(([^)]*(?:\{[^]*?\}[^)]*)?)\)(?:<tool_call\|>|$)/g;
375
- let match: RegExpExecArray | null;
343
+ const START_TAG = '<|tool_call>call:';
344
+ const END_TAG = '<tool_call|>';
376
345
  let foundToolCall = false;
377
-
378
- while ((match = gemmaToolCallRe.exec(fullText)) !== null) {
346
+ let scanIdx = 0;
347
+ while (true) {
348
+ const startIdx = fullText.indexOf(START_TAG, scanIdx);
349
+ if (startIdx === -1) break;
350
+ const nameStart = startIdx + START_TAG.length;
351
+ const braceIdx = fullText.indexOf('{', nameStart);
352
+ if (braceIdx === -1) break;
353
+ const name = fullText.slice(nameStart, braceIdx);
354
+ if (!/^\w+$/.test(name)) { scanIdx = nameStart; continue; }
355
+ const argsBlock = WasmProvider.extractArgsBlock(fullText, braceIdx);
356
+ if (!argsBlock) break;
357
+ const afterArgs = braceIdx + argsBlock.length;
358
+ if (!fullText.startsWith(END_TAG, afterArgs)) { scanIdx = afterArgs; continue; }
379
359
  foundToolCall = true;
380
- const toolName = match[1];
381
- let toolArgs: Record<string, unknown> = {};
382
- const rawArgs = match[2];
383
-
384
- // Strategy 1: Extract key-value pairs using <|"|> delimiters BEFORE replacing them.
385
- // This correctly handles internal quotes like: query:<|"|>SELECT data."date"<|"|>
386
- toolArgs = WasmProvider.parseGemmaArgs(rawArgs);
387
-
388
- // Strategy 2: If no pairs found, try simple replacement + JSON.parse
389
- if (Object.keys(toolArgs).length === 0) {
390
- const argsStr = rawArgs.replace(/<\|"\|>/g, '"');
391
- try {
392
- toolArgs = JSON.parse(argsStr);
393
- this.trace?.push('parse', toolName, 'fell back to quote replacement strategy', 'warn');
394
- } catch {
395
- // Strategy 3: regex key:value extraction on replaced string
396
- try {
397
- const obj: Record<string, unknown> = {};
398
- const kvRe = /(\w+)\s*:\s*(?:"([^"]*)"|([\d.]+(?:e[+-]?\d+)?)|(\[.*?\])|(true|false|null))/g;
399
- let kv: RegExpExecArray | null;
400
- while ((kv = kvRe.exec(argsStr)) !== null) {
401
- const [, k, strVal, numVal, arrVal, litVal] = kv;
402
- if (strVal !== undefined) obj[k] = strVal;
403
- else if (numVal !== undefined) obj[k] = Number(numVal);
404
- else if (arrVal !== undefined) { try { obj[k] = JSON.parse(arrVal); } catch { obj[k] = arrVal; } }
405
- else if (litVal !== undefined) obj[k] = JSON.parse(litVal);
406
- }
407
- if (Object.keys(obj).length > 0) {
408
- toolArgs = obj;
409
- this.trace?.push('parse', toolName, 'fell back to regex key:value strategy', 'warn');
410
- }
411
- } catch {}
412
- }
413
- }
414
-
415
- // P4 fix: recursively parse string fields that look like JSON objects/arrays.
416
- // Gemma wraps params in <|"|>{...}<|"|> which after replacement becomes "{...}" — a string.
417
- for (const [k, v] of Object.entries(toolArgs)) {
418
- if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
419
- try { toolArgs[k] = JSON.parse(v); } catch { /* keep as string */ }
420
- }
421
- }
422
-
423
360
  content.push({
424
361
  type: 'tool_use',
425
362
  id: `tc-${Date.now()}-${content.length}`,
426
- name: toolName,
427
- input: toolArgs,
363
+ name,
364
+ input: WasmProvider.parseGemmaArgs(argsBlock),
428
365
  });
429
- }
430
-
431
- // Fallback: try parenthesized format — call:component("table", {data: [...]})
432
- if (!foundToolCall) {
433
- while ((match = parenToolCallRe.exec(fullText)) !== null) {
434
- foundToolCall = true;
435
- const toolName = match[1];
436
- const argsRaw = match[2].replace(/<\|"\|>/g, '"').trim();
437
- let toolArgs: Record<string, unknown> = {};
438
-
439
- // Parse parenthesized args: could be ("name", {params}) or just ({params})
440
- try {
441
- // Try wrapping in array and parsing: ["name", {params}] or [{params}]
442
- const asArray = JSON.parse(`[${argsRaw}]`);
443
- if (asArray.length === 2 && typeof asArray[0] === 'string' && typeof asArray[1] === 'object') {
444
- // component("table", {data: [...]}) → {name: "table", params: {data: [...]}}
445
- toolArgs = { name: asArray[0], params: asArray[1] };
446
- } else if (asArray.length === 1 && typeof asArray[0] === 'object') {
447
- toolArgs = asArray[0];
448
- } else if (asArray.length >= 1) {
449
- // Generic: first string arg as name, rest as params
450
- toolArgs = { name: String(asArray[0]), ...(typeof asArray[1] === 'object' ? { params: asArray[1] } : {}) };
451
- }
452
- } catch {
453
- // Last resort: try parsing the whole thing as JSON object
454
- try { toolArgs = JSON.parse(argsRaw); } catch {}
455
- }
456
-
457
- content.push({
458
- type: 'tool_use',
459
- id: `tc-${Date.now()}-${content.length}`,
460
- name: toolName,
461
- input: toolArgs,
462
- });
463
- }
366
+ scanIdx = afterArgs + END_TAG.length;
464
367
  }
465
368
 
466
369
  if (!foundToolCall) {
467
- // Try JSON format fallback — strip markdown code blocks first
468
- let cleaned = fullText.trim();
469
- const mdMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
470
- if (mdMatch) cleaned = mdMatch[1].trim();
471
-
472
- try {
473
- const parsed = JSON.parse(cleaned) as { tool?: string; args?: Record<string, unknown> };
474
- if (parsed.tool && parsed.args) {
475
- foundToolCall = true;
476
- content.push({
477
- type: 'tool_use',
478
- id: `tc-${Date.now()}`,
479
- name: parsed.tool,
480
- input: parsed.args,
481
- });
482
- }
483
- } catch {}
484
- }
485
-
486
- if (!foundToolCall) {
487
- // Extract text without tool call tags
488
370
  const cleanText = fullText.replace(/<\|tool_call>.*?<tool_call\|>/g, '').trim();
489
371
  content.push({ type: 'text', text: cleanText || fullText });
490
372
  }
@@ -505,143 +387,84 @@ export class WasmProvider implements LLMProvider {
505
387
  }
506
388
 
507
389
  /**
508
- * Parse Gemma native tool call args, handling internal quotes in values.
509
- * Extracts key-value pairs using <|"|> delimiters before any replacement,
510
- * so internal quotes like data."date" are preserved correctly.
511
- * Example: {schema:<|"|>assemblee<|"|>,query:<|"|>SELECT data."date"<|"|>}
390
+ * Extract a brace-balanced {...} block starting at text[startIdx].
391
+ * Ignores { and } that appear inside <|"|>...<|"|> string delimiters.
392
+ * Returns the full block including outer braces, or null if unbalanced.
512
393
  */
513
- private static parseGemmaArgs(raw: string): Record<string, unknown> {
514
- const pairs: Record<string, unknown> = {};
515
-
516
- // Extract string values delimited by <|"|>
517
- const kvRegex = /(\w+)\s*:\s*<\|"\|>([\s\S]*?)<\|"\|>/g;
518
- let m: RegExpExecArray | null;
519
- while ((m = kvRegex.exec(raw)) !== null) {
520
- pairs[m[1]] = m[2];
521
- }
522
-
523
- // Extract numeric values (no delimiters)
524
- const numRegex = /(\w+)\s*:\s*(\d+(?:\.\d+)?(?:e[+-]?\d+)?)\s*(?:[,}]|$)/g;
525
- while ((m = numRegex.exec(raw)) !== null) {
526
- if (!(m[1] in pairs)) pairs[m[1]] = Number(m[2]);
527
- }
528
-
529
- // Extract boolean/null literals
530
- const litRegex = /(\w+)\s*:\s*(true|false|null)\s*(?:[,}]|$)/g;
531
- while ((m = litRegex.exec(raw)) !== null) {
532
- if (!(m[1] in pairs)) pairs[m[1]] = JSON.parse(m[2]);
533
- }
534
-
535
- // Extract inline object/array values (e.g. params:{items:[...]}, data:{a:1})
536
- // Gemma often writes nested objects without <|"|> delimiters.
537
- // We find key:{ or key:[ and then match balanced braces/brackets.
538
- const objRe = /(\w+)\s*:\s*([{\[])/g;
539
- while ((m = objRe.exec(raw)) !== null) {
540
- if (m[1] in pairs) continue; // already captured by a higher-priority regex
541
- const key = m[1];
542
- const opener = m[2];
543
- const closer = opener === '{' ? '}' : ']';
544
- let depth = 1;
545
- let i = m.index + m[0].length;
546
- while (i < raw.length && depth > 0) {
547
- const ch = raw[i];
548
- if (ch === opener) depth++;
549
- else if (ch !== opener && (ch === '{' || ch === '[')) depth++;
550
- else if (ch === closer) depth--;
551
- else if (ch !== closer && (ch === '}' || ch === ']')) depth--;
552
- i++;
394
+ private static extractArgsBlock(text: string, startIdx: number): string | null {
395
+ if (text[startIdx] !== '{') return null;
396
+ const DELIM = '<|"|>';
397
+ let depth = 0;
398
+ let inString = false;
399
+ let i = startIdx;
400
+ while (i < text.length) {
401
+ if (text.startsWith(DELIM, i)) {
402
+ inString = !inString;
403
+ i += DELIM.length;
404
+ continue;
553
405
  }
554
- const fragment = raw.slice(m.index + m[0].length - 1, i); // includes opener and closer
555
- // Replace <|"|> with " for JSON parsing
556
- const jsonStr = fragment.replace(/<\|"\|>/g, '"');
557
- try { pairs[key] = JSON.parse(jsonStr); } catch { /* unparseable — skip */ }
558
- }
559
-
560
- // Try to parse string values that look like JSON objects/arrays
561
- for (const [k, v] of Object.entries(pairs)) {
562
- if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
563
- try { pairs[k] = JSON.parse(v); } catch { /* keep as string */ }
406
+ if (!inString) {
407
+ if (text[i] === '{') depth++;
408
+ else if (text[i] === '}') {
409
+ depth--;
410
+ if (depth === 0) return text.slice(startIdx, i + 1);
411
+ }
564
412
  }
413
+ i++;
565
414
  }
415
+ return null;
416
+ }
566
417
 
567
- return pairs;
418
+ /**
419
+ * Parse Gemma native tool call args by normalizing to JSON in one pass.
420
+ * 1. `<|"|>...<|"|>` → `"..."` (string delimiters)
421
+ * 2. Unquoted keys → `"quoted":` (valid JSON keys)
422
+ * Then `JSON.parse` handles nesting, arrays, numbers, booleans, null natively.
423
+ * Example: {schema:<|"|>senat<|"|>,params:{data:[{id:1}]}} → {schema:"senat",params:{data:[{id:1}]}}
424
+ */
425
+ private static parseGemmaArgs(raw: string): Record<string, unknown> {
426
+ const jsonStr = raw
427
+ .replace(/<\|"\|>([\s\S]*?)<\|"\|>/g, (_, s) => JSON.stringify(s))
428
+ .replace(/([{,])\s*([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":');
429
+ try {
430
+ const parsed = JSON.parse(jsonStr);
431
+ return (typeof parsed === 'object' && parsed !== null) ? parsed : {};
432
+ } catch {
433
+ return {};
434
+ }
568
435
  }
569
436
 
570
437
  /**
571
438
  * Format a value for Gemma 4 native tool syntax.
572
- * Strings use <|"|> delimiters, numbers/booleans/null are bare.
439
+ * Backward-compat wrapper delegates to the module-level `gemmaValue`
440
+ * exported from `tool-layers.ts` so the logic is shared with the
441
+ * system-prompt declaration block.
442
+ * @internal — used by formatToolCall / formatToolResponse
573
443
  */
574
- private static gemmaValue(v: unknown): string {
575
- const q = '<|"|>';
576
- if (v === null || v === undefined) return 'null';
577
- if (typeof v === 'number' || typeof v === 'boolean') return String(v);
578
- if (Array.isArray(v)) return `[${v.map(i => WasmProvider.gemmaValue(i)).join(',')}]`;
579
- if (typeof v === 'object') {
580
- const entries = Object.entries(v as Record<string, unknown>)
581
- .map(([k, val]) => `${k}:${WasmProvider.gemmaValue(val)}`);
582
- return `{${entries.join(',')}}`;
583
- }
584
- return `${q}${String(v)}${q}`;
444
+ static gemmaValue(v: unknown): string {
445
+ return gemmaValue(v);
585
446
  }
586
447
 
587
448
  /**
588
449
  * Format a tool declaration in Gemma 4 native syntax.
450
+ * Backward-compat wrapper — delegates to `formatGemmaToolDeclaration`
451
+ * exported from `tool-layers.ts`.
452
+ * @internal
589
453
  */
590
- private static formatToolDeclaration(tool: ProviderTool): string {
591
- const q = '<|"|>';
592
- let decl = `<|tool>declaration:${tool.name}{\n`;
593
- decl += ` description:${q}${tool.description}${q}`;
594
-
595
- const schema = tool.input_schema;
596
- if (schema?.properties) {
597
- const props = schema.properties as Record<string, { description?: string; type?: string; enum?: string[]; format?: string; default?: unknown }>;
598
- decl += `,\n parameters:{\n properties:{\n`;
599
-
600
- const propEntries = Object.entries(props);
601
- for (let i = 0; i < propEntries.length; i++) {
602
- const [key, val] = propEntries[i];
603
- decl += ` ${key}:{`;
604
- const parts: string[] = [];
605
- if (val.description) parts.push(`description:${q}${val.description}${q}`);
606
- // P1 fix: if no type specified, infer OBJECT for params-like fields to avoid
607
- // Gemma wrapping the value in <|"|>...<|"|> (treating it as a string)
608
- let inferredType = val.type;
609
- if (!inferredType) {
610
- const descLower = (val.description ?? '').toLowerCase();
611
- if (descLower.includes('objet') || descLower.includes('object') || descLower.includes('parameter') || descLower.includes('paramètre') || key === 'params') {
612
- inferredType = 'object';
613
- } else {
614
- inferredType = 'string';
615
- }
616
- }
617
- parts.push(`type:${q}${inferredType.toUpperCase()}${q}`);
618
- if (val.enum) parts.push(`enum:[${val.enum.map(e => `${q}${e}${q}`).join(',')}]`);
619
- if (val.format) parts.push(`format:${q}${val.format}${q}`);
620
- if (val.default !== undefined) parts.push(`default:${WasmProvider.gemmaValue(val.default)}`);
621
- decl += parts.join(',');
622
- decl += `}${i < propEntries.length - 1 ? ',' : ''}\n`;
623
- }
624
-
625
- decl += ` }`;
626
- if (schema.required && Array.isArray(schema.required)) {
627
- decl += `,\n required:[${(schema.required as string[]).map(r => `${q}${r}${q}`).join(',')}]`;
628
- }
629
- decl += `,\n type:${q}OBJECT${q}\n }`;
630
- }
631
-
632
- decl += `\n}<tool|>`;
633
- return decl;
454
+ static formatToolDeclaration(tool: ProviderTool): string {
455
+ return formatGemmaToolDeclaration(tool);
634
456
  }
635
457
 
636
458
  /**
637
459
  * Format a tool response in Gemma 4 native syntax.
460
+ * @internal — used by buildGemmaPrompt
638
461
  */
639
- private static formatToolResponse(toolName: string, content: string): string {
462
+ static formatToolResponse(toolName: string, content: string): string {
640
463
  const q = '<|"|>';
641
464
  // Try to parse as JSON for structured output
642
465
  try {
643
466
  const parsed = JSON.parse(content);
644
- return `<|tool_response>response:${toolName}${WasmProvider.gemmaValue(parsed)}<tool_response|>`;
467
+ return `<|tool_response>response:${toolName}${gemmaValue(parsed)}<tool_response|>`;
645
468
  } catch {
646
469
  // Plain string result
647
470
  return `<|tool_response>response:${toolName}{result:${q}${content}${q}}<tool_response|>`;
@@ -650,87 +473,118 @@ export class WasmProvider implements LLMProvider {
650
473
 
651
474
  /**
652
475
  * Format a tool call in Gemma 4 native syntax.
476
+ * @internal — used by buildGemmaPrompt
653
477
  */
654
- private static formatToolCall(name: string, input: Record<string, unknown>): string {
478
+ static formatToolCall(name: string, input: Record<string, unknown>): string {
655
479
  const entries = Object.entries(input)
656
- .map(([k, v]) => `${k}:${WasmProvider.gemmaValue(v)}`);
480
+ .map(([k, v]) => `${k}:${gemmaValue(v)}`);
657
481
  return `<|tool_call>call:${name}{${entries.join(',')}}<tool_call|>`;
658
482
  }
659
483
 
660
- private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string, maxTools?: number): string {
661
- const systemParts: string[] = [];
484
+ private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string): string {
485
+ return buildGemmaPrompt({ systemPrompt, tools, messages });
486
+ }
662
487
 
663
- // Inject system prompt from settings if provided
664
- if (systemPrompt) {
665
- systemParts.push(systemPrompt);
666
- }
488
+ destroy() {
489
+ this.inference?.close?.();
490
+ this.inference = null;
491
+ this.setStatus('idle');
492
+ this.initPromise = null;
493
+ }
494
+ }
667
495
 
668
- if (tools.length > 0) {
669
- // Gemma small models struggle with too many tools — limit to most relevant
670
- const MAX_TOOLS = maxTools ?? 15;
671
- const limitedTools = tools.length > MAX_TOOLS
672
- ? [
673
- // Always include render_* tools (UI)
674
- ...tools.filter(t => t.name.startsWith('render_') || t.name === 'clear_canvas').slice(0, 8),
675
- // Fill with data tools
676
- ...tools.filter(t => !t.name.startsWith('render_') && t.name !== 'clear_canvas').slice(0, MAX_TOOLS - 8),
677
- ]
678
- : tools;
679
-
680
- // Native Gemma 4 tool declarations
681
- systemParts.push(limitedTools.map(t => WasmProvider.formatToolDeclaration(t)).join('\n'));
682
- }
496
+ /**
497
+ * Input for {@link buildGemmaPrompt}.
498
+ *
499
+ * Pass `messages: []` (or omit it) to produce a preview of the system/tool
500
+ * portion of the prompt without any conversation turns — useful for debug
501
+ * panels that want to display the exact transformed prompt Gemma will see.
502
+ */
503
+ export interface BuildGemmaPromptInput {
504
+ /** System prompt expected to already be in Gemma native syntax (use
505
+ * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`).
506
+ * The tool declarations are embedded inside this system prompt — they are
507
+ * NOT re-emitted from `tools` by this function anymore. */
508
+ systemPrompt?: string;
509
+ /** Provider tools — used only for message serialization (tool_use / tool_result
510
+ * ID → name mapping). Declarations live inside `systemPrompt`. */
511
+ tools: ProviderTool[];
512
+ /** Conversation turns. Defaults to `[]` (preview mode — no `<|turn>` user/model blocks). */
513
+ messages?: ChatMessage[];
514
+ }
683
515
 
684
- // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
685
- const toolNameById = new Map<string, string>();
686
- for (const msg of messages) {
687
- if (typeof msg.content !== 'string') {
688
- for (const block of msg.content as ContentBlock[]) {
689
- if (block.type === 'tool_use') {
690
- const b = block as { type: 'tool_use'; id: string; name: string };
691
- toolNameById.set(b.id, b.name);
692
- }
516
+ /**
517
+ * Build the final Gemma 4 native prompt string from a system prompt, a set of
518
+ * provider tools, and a conversation history.
519
+ *
520
+ * This is the exact transformation applied by {@link WasmProvider} before
521
+ * calling LlmInference — exported so UI debug panels can display the prompt
522
+ * as it will actually be sent to the model.
523
+ *
524
+ * The system prompt is expected to already be in Gemma native syntax AND to
525
+ * already embed the `<|tool>declaration>` blocks inline — build it with
526
+ * `buildSystemPromptWithAliases(layers, { providerKind: 'gemma' })`.
527
+ *
528
+ * Transformations applied:
529
+ * 1. Wraps the system prompt in `<|turn>system\n<|think|>\n...<turn|>` — this
530
+ * activates Gemma 4's native thinking mode so the model emits its internal
531
+ * reasoning inside a `<|channel>thought\n...<channel|>` block which is then
532
+ * stripped from the final user-visible output (see the streaming cleanup in
533
+ * {@link WasmProvider}).
534
+ * 2. Serializes messages as `<|turn>user|model\n...<turn|>` with tool_use →
535
+ * `<|tool_call>`, tool_result → `<|tool_response>`.
536
+ * 3. Terminates with an open `<|turn>model\n` for generation.
537
+ * 4. No explicit `<bos>` — LlmInference adds it via the tokenizer.
538
+ */
539
+ export function buildGemmaPrompt(input: BuildGemmaPromptInput): string {
540
+ const { systemPrompt, messages = [] } = input;
541
+
542
+ // Build a map of tool_use_id → tool_name from all messages for tool_result resolution
543
+ const toolNameById = new Map<string, string>();
544
+ for (const msg of messages) {
545
+ if (typeof msg.content !== 'string') {
546
+ for (const block of msg.content as ContentBlock[]) {
547
+ if (block.type === 'tool_use') {
548
+ const b = block as { type: 'tool_use'; id: string; name: string };
549
+ toolNameById.set(b.id, b.name);
693
550
  }
694
551
  }
695
552
  }
553
+ }
696
554
 
697
- const parts: string[] = [];
698
- if (systemParts.length > 0) {
699
- // Gemma 4 has no system role — inject system content as a user turn
700
- parts.push(`<|turn>user\n${systemParts.join('\n')}<turn|>`);
701
- }
702
- for (const msg of messages) {
703
- const role = msg.role === 'assistant' ? 'model' : 'user';
704
- if (typeof msg.content === 'string') {
705
- parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
706
- } else {
707
- // Serialize all block types in Gemma 4 native format
708
- const segments: string[] = [];
709
- for (const block of msg.content as ContentBlock[]) {
710
- if (block.type === 'text') {
711
- segments.push((block as { type: 'text'; text: string }).text);
712
- } else if (block.type === 'tool_use') {
713
- const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
714
- segments.push(WasmProvider.formatToolCall(b.name, b.input));
715
- } else if (block.type === 'tool_result') {
716
- const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
717
- const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
718
- segments.push(WasmProvider.formatToolResponse(toolName, b.content));
719
- }
720
- }
721
- if (segments.length > 0) {
722
- parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
555
+ const parts: string[] = [];
556
+
557
+ // Gemma 4 native structure: the system prompt already embeds tool
558
+ // declarations inline at each STEP (built via buildSystemPromptWithAliases
559
+ // with providerKind: 'gemma').
560
+ if (systemPrompt) {
561
+ parts.push(`<|turn>system\n${systemPrompt}\n<turn|>`);
562
+ }
563
+
564
+ for (const msg of messages) {
565
+ const role = msg.role === 'assistant' ? 'model' : 'user';
566
+ if (typeof msg.content === 'string') {
567
+ parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
568
+ } else {
569
+ // Serialize all block types in Gemma 4 native format
570
+ const segments: string[] = [];
571
+ for (const block of msg.content as ContentBlock[]) {
572
+ if (block.type === 'text') {
573
+ segments.push((block as { type: 'text'; text: string }).text);
574
+ } else if (block.type === 'tool_use') {
575
+ const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
576
+ segments.push(WasmProvider.formatToolCall(b.name, b.input));
577
+ } else if (block.type === 'tool_result') {
578
+ const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
579
+ const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
580
+ segments.push(WasmProvider.formatToolResponse(toolName, b.content));
723
581
  }
724
582
  }
583
+ if (segments.length > 0) {
584
+ parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
585
+ }
725
586
  }
726
- parts.push('<|turn>model\n');
727
- return parts.join('\n');
728
- }
729
-
730
- destroy() {
731
- this.inference?.close?.();
732
- this.inference = null;
733
- this.setStatus('idle');
734
- this.initPromise = null;
735
587
  }
588
+ parts.push('<|turn>model\n');
589
+ return parts.join('\n');
736
590
  }