@webmcp-auto-ui/agent 2.5.25 → 2.5.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/autoui-server.ts +44 -0
- package/src/diagnostics.ts +6 -6
- package/src/discovery-cache.ts +17 -3
- package/src/index.ts +18 -4
- package/src/loop.ts +31 -34
- package/src/notebook-widgets/compact.ts +312 -0
- package/src/notebook-widgets/document.ts +372 -0
- package/src/notebook-widgets/editorial.ts +348 -0
- package/src/notebook-widgets/recipes/compact.md +104 -0
- package/src/notebook-widgets/recipes/document.md +100 -0
- package/src/notebook-widgets/recipes/editorial.md +104 -0
- package/src/notebook-widgets/recipes/workspace.md +94 -0
- package/src/notebook-widgets/shared.ts +1064 -0
- package/src/notebook-widgets/workspace.ts +328 -0
- package/src/prompts/claude-prompt-builder.ts +81 -0
- package/src/prompts/gemma4-prompt-builder.ts +205 -0
- package/src/prompts/index.ts +55 -0
- package/src/prompts/mistral-prompt-builder.ts +90 -0
- package/src/prompts/qwen-prompt-builder.ts +90 -0
- package/src/prompts/tool-call-parsers.ts +322 -0
- package/src/prompts/tool-refs.ts +196 -0
- package/src/providers/factory.ts +20 -3
- package/src/providers/transformers-models.ts +143 -0
- package/src/providers/transformers-serialize.ts +81 -0
- package/src/providers/transformers.ts +329 -0
- package/src/providers/transformers.worker.ts +667 -0
- package/src/providers/wasm.ts +150 -510
- package/src/recipes/_generated.ts +515 -0
- package/src/recipes/canary-data.md +50 -0
- package/src/recipes/canary-display.md +99 -0
- package/src/recipes/canary-middle.md +32 -0
- package/src/recipes/hackathon-assemblee-nationale.md +111 -0
- package/src/recipes/hummingbird-data.md +32 -0
- package/src/recipes/hummingbird-display.md +36 -0
- package/src/recipes/hummingbird-middle.md +18 -0
- package/src/recipes/notebook-playbook.md +129 -0
- package/src/tool-layers.ts +33 -157
- package/src/trace-observer.ts +669 -0
- package/src/types.ts +20 -5
- package/src/util/opfs-cache.ts +265 -0
- package/tests/gemma-prompt.test.ts +472 -0
- package/tests/loop.test.ts +5 -5
- package/tests/transformers-serialize.test.ts +103 -0
- package/src/providers/gemma.worker.legacy.ts +0 -123
- package/src/providers/litert.worker.ts +0 -294
- package/src/recipes/widgets/actions.md +0 -28
- package/src/recipes/widgets/alert.md +0 -27
- package/src/recipes/widgets/cards.md +0 -41
- package/src/recipes/widgets/carousel.md +0 -39
- package/src/recipes/widgets/chart-rich.md +0 -51
- package/src/recipes/widgets/chart.md +0 -32
- package/src/recipes/widgets/code.md +0 -21
- package/src/recipes/widgets/d3.md +0 -36
- package/src/recipes/widgets/data-table.md +0 -46
- package/src/recipes/widgets/gallery.md +0 -39
- package/src/recipes/widgets/grid-data.md +0 -57
- package/src/recipes/widgets/hemicycle.md +0 -43
- package/src/recipes/widgets/js-sandbox.md +0 -32
- package/src/recipes/widgets/json-viewer.md +0 -27
- package/src/recipes/widgets/kv.md +0 -31
- package/src/recipes/widgets/list.md +0 -24
- package/src/recipes/widgets/log.md +0 -39
- package/src/recipes/widgets/map.md +0 -49
- package/src/recipes/widgets/profile.md +0 -49
- package/src/recipes/widgets/recipe-browser.md +0 -102
- package/src/recipes/widgets/sankey.md +0 -54
- package/src/recipes/widgets/stat-card.md +0 -43
- package/src/recipes/widgets/stat.md +0 -35
- package/src/recipes/widgets/tags.md +0 -30
- package/src/recipes/widgets/text.md +0 -19
- package/src/recipes/widgets/timeline.md +0 -38
- package/src/recipes/widgets/trombinoscope.md +0 -39
package/src/providers/wasm.ts
CHANGED
|
@@ -5,12 +5,21 @@
|
|
|
5
5
|
*/
|
|
6
6
|
import type { LLMProvider, LLMResponse, ChatMessage, ProviderTool, WasmModelId, ContentBlock } from '../types.js';
|
|
7
7
|
import type { PipelineTrace } from '../pipeline-trace.js';
|
|
8
|
+
import {
|
|
9
|
+
buildGemmaPrompt,
|
|
10
|
+
formatGemmaToolDeclaration,
|
|
11
|
+
formatToolCall,
|
|
12
|
+
formatToolResponse,
|
|
13
|
+
gemmaValue,
|
|
14
|
+
} from '../prompts/gemma4-prompt-builder.js';
|
|
15
|
+
import { parseToolCalls } from '../prompts/tool-call-parsers.js';
|
|
16
|
+
import { loadOrDownloadModel } from '../util/opfs-cache.js';
|
|
8
17
|
|
|
9
18
|
export type WasmStatus = 'idle' | 'loading' | 'ready' | 'error';
|
|
10
19
|
|
|
11
20
|
export interface WasmProviderOptions {
|
|
12
21
|
model?: WasmModelId;
|
|
13
|
-
contextSize?: number; // MediaPipe maxTokens — default
|
|
22
|
+
contextSize?: number; // MediaPipe maxTokens — default 32768
|
|
14
23
|
onProgress?: (progress: number, status: string, loaded?: number, total?: number) => void;
|
|
15
24
|
onStatusChange?: (status: WasmStatus) => void;
|
|
16
25
|
}
|
|
@@ -23,6 +32,8 @@ const LITERT_MODELS: Record<string, { repo: string; file: string; size: number }
|
|
|
23
32
|
export class WasmProvider implements LLMProvider {
|
|
24
33
|
readonly name = 'wasm';
|
|
25
34
|
readonly model: string;
|
|
35
|
+
/** Signals to the agent loop that the system prompt must be built in Gemma native syntax. */
|
|
36
|
+
readonly promptKind = 'gemma' as const;
|
|
26
37
|
|
|
27
38
|
/** Optional pipeline trace — set externally to trace parsing strategy fallbacks */
|
|
28
39
|
trace?: PipelineTrace;
|
|
@@ -61,7 +72,6 @@ export class WasmProvider implements LLMProvider {
|
|
|
61
72
|
|
|
62
73
|
const modelInfo = LITERT_MODELS[this.model] ?? LITERT_MODELS['gemma-e2b'];
|
|
63
74
|
const { repo, file, size: expectedSize } = modelInfo;
|
|
64
|
-
const url = `https://huggingface.co/${repo}/resolve/main/${file}`;
|
|
65
75
|
|
|
66
76
|
this.opts.onProgress?.(0, 'downloading', 0, expectedSize);
|
|
67
77
|
|
|
@@ -70,7 +80,15 @@ export class WasmProvider implements LLMProvider {
|
|
|
70
80
|
'https://cdn.jsdelivr.net/npm/@mediapipe/tasks-genai@0.10.27/wasm',
|
|
71
81
|
);
|
|
72
82
|
|
|
73
|
-
const
|
|
83
|
+
const streams = await loadOrDownloadModel(
|
|
84
|
+
repo,
|
|
85
|
+
[{ path: file, expectedSize }],
|
|
86
|
+
(progress) => {
|
|
87
|
+
this.opts.onProgress?.(progress.totalProgress, progress.status, progress.loaded, progress.total);
|
|
88
|
+
},
|
|
89
|
+
);
|
|
90
|
+
const modelStream = streams.get(file);
|
|
91
|
+
if (!modelStream) throw new Error(`Model file missing: ${file}`);
|
|
74
92
|
|
|
75
93
|
this.opts.onProgress?.(1, 'initializing', 0, 0);
|
|
76
94
|
|
|
@@ -83,7 +101,7 @@ export class WasmProvider implements LLMProvider {
|
|
|
83
101
|
baseOptions: {
|
|
84
102
|
modelAssetBuffer: modelStream.getReader() as unknown as Uint8Array,
|
|
85
103
|
},
|
|
86
|
-
maxTokens: this.opts.contextSize ??
|
|
104
|
+
maxTokens: this.opts.contextSize ?? 32768,
|
|
87
105
|
temperature: 1.0,
|
|
88
106
|
topK: 64,
|
|
89
107
|
});
|
|
@@ -91,92 +109,10 @@ export class WasmProvider implements LLMProvider {
|
|
|
91
109
|
this.setStatus('ready');
|
|
92
110
|
}
|
|
93
111
|
|
|
94
|
-
/**
|
|
95
|
-
* Download model with OPFS caching, returning a ReadableStream.
|
|
96
|
-
* The stream reader is passed directly to LlmInference as modelAssetBuffer
|
|
97
|
-
* to avoid buffering multi-GB models entirely in RAM.
|
|
98
|
-
*/
|
|
99
|
-
private async getModelStream(
|
|
100
|
-
url: string,
|
|
101
|
-
filename: string,
|
|
102
|
-
knownSize: number,
|
|
103
|
-
): Promise<ReadableStream<Uint8Array>> {
|
|
104
|
-
const total = knownSize;
|
|
105
|
-
const progressCb = (p: number, loaded: number, t: number) => {
|
|
106
|
-
this.opts.onProgress?.(p, 'downloading', loaded, t);
|
|
107
|
-
};
|
|
108
|
-
|
|
109
|
-
const root = await navigator.storage.getDirectory();
|
|
110
|
-
const modelsDir = await root.getDirectoryHandle('webmcp-models', { create: true });
|
|
111
|
-
|
|
112
|
-
// ── Clean orphan .crswap files (Chrome WritableStream leftovers) ──
|
|
113
|
-
try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* no swap — OK */ }
|
|
114
|
-
|
|
115
|
-
// ── OPFS cache hit ───────────────────────────────────────────────
|
|
116
|
-
try {
|
|
117
|
-
const cached = await modelsDir.getFileHandle(filename);
|
|
118
|
-
const file = await cached.getFile();
|
|
119
|
-
if (file.size > 1000 && (total === 0 || Math.abs(file.size - total) < total * 0.01)) {
|
|
120
|
-
progressCb(1, file.size, file.size);
|
|
121
|
-
this.opts.onProgress?.(1, 'cached', file.size, file.size);
|
|
122
|
-
return file.stream() as ReadableStream<Uint8Array>;
|
|
123
|
-
}
|
|
124
|
-
// Corrupt cache (0 bytes or wrong size) — remove and re-download
|
|
125
|
-
await modelsDir.removeEntry(filename).catch(() => {});
|
|
126
|
-
try { await modelsDir.removeEntry(`${filename}.crswap`); } catch { /* OK */ }
|
|
127
|
-
} catch {
|
|
128
|
-
// Cache miss
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
// ── Network download (retry on 503) ───────────────────────────────
|
|
132
|
-
let response: Response | null = null;
|
|
133
|
-
for (let attempt = 0; attempt < 3; attempt++) {
|
|
134
|
-
response = await fetch(url);
|
|
135
|
-
if (response.ok) break;
|
|
136
|
-
if (response.status === 503 && attempt < 2) {
|
|
137
|
-
const wait = (attempt + 1) * 5000;
|
|
138
|
-
this.opts.onProgress?.(0, `retry in ${wait / 1000}s (503)`, 0, total);
|
|
139
|
-
await new Promise(r => setTimeout(r, wait));
|
|
140
|
-
continue;
|
|
141
|
-
}
|
|
142
|
-
throw new Error(`Download failed: ${response.status} ${response.statusText}`);
|
|
143
|
-
}
|
|
144
|
-
if (!response!.ok) throw new Error('Download failed after retries');
|
|
145
|
-
if (!response!.body) throw new Error('Response body is null');
|
|
146
|
-
|
|
147
|
-
const [streamForConsumer, streamForCache] = response!.body!.tee();
|
|
148
|
-
|
|
149
|
-
// Background OPFS cache (fire-and-forget)
|
|
150
|
-
(async () => {
|
|
151
|
-
try {
|
|
152
|
-
const handle = await modelsDir.getFileHandle(filename, { create: true });
|
|
153
|
-
const writable = await handle.createWritable();
|
|
154
|
-
await streamForCache.pipeTo(writable);
|
|
155
|
-
} catch {
|
|
156
|
-
try { await modelsDir.removeEntry(filename).catch(() => {}); } catch {}
|
|
157
|
-
}
|
|
158
|
-
})();
|
|
159
|
-
|
|
160
|
-
// Progress stream using known size
|
|
161
|
-
let loaded = 0;
|
|
162
|
-
const progressTransform = new TransformStream<Uint8Array, Uint8Array>({
|
|
163
|
-
transform(chunk, controller) {
|
|
164
|
-
loaded += chunk.length;
|
|
165
|
-
progressCb(total > 0 ? loaded / total : 0, loaded, total);
|
|
166
|
-
controller.enqueue(chunk);
|
|
167
|
-
},
|
|
168
|
-
flush() {
|
|
169
|
-
progressCb(1, total, total);
|
|
170
|
-
},
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
return streamForConsumer.pipeThrough(progressTransform);
|
|
174
|
-
}
|
|
175
|
-
|
|
176
112
|
async chat(
|
|
177
113
|
messages: ChatMessage[],
|
|
178
114
|
tools: ProviderTool[],
|
|
179
|
-
options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string
|
|
115
|
+
options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
|
|
180
116
|
): Promise<LLMResponse> {
|
|
181
117
|
if (this.status !== 'ready') await this.initialize();
|
|
182
118
|
if (!this.inference) throw new Error('Model not initialized');
|
|
@@ -202,7 +138,7 @@ export class WasmProvider implements LLMProvider {
|
|
|
202
138
|
private async _chat(
|
|
203
139
|
messages: ChatMessage[],
|
|
204
140
|
tools: ProviderTool[],
|
|
205
|
-
options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string
|
|
141
|
+
options?: { signal?: AbortSignal; maxTokens?: number; temperature?: number; topK?: number; onToken?: (token: string) => void; system?: string }
|
|
206
142
|
): Promise<LLMResponse> {
|
|
207
143
|
// Apply per-request options
|
|
208
144
|
if (options?.maxTokens || options?.temperature || options?.topK) {
|
|
@@ -218,22 +154,22 @@ export class WasmProvider implements LLMProvider {
|
|
|
218
154
|
}
|
|
219
155
|
|
|
220
156
|
// Build Gemma chat prompt (Gemma 4 format with tool hints)
|
|
221
|
-
let prompt = this.buildPrompt(messages, tools, options?.system
|
|
157
|
+
let prompt = this.buildPrompt(messages, tools, options?.system);
|
|
222
158
|
|
|
223
159
|
// Aggressive clipping: Gemma struggles with long conversations — dynamic cap based on context size
|
|
224
|
-
const contextTokens = this.opts.contextSize ??
|
|
225
|
-
const MAX_MESSAGES =
|
|
160
|
+
const contextTokens = this.opts.contextSize ?? 32768;
|
|
161
|
+
const MAX_MESSAGES = Math.max(4, Math.floor(contextTokens / 512));
|
|
226
162
|
while (messages.length > MAX_MESSAGES) {
|
|
227
163
|
messages = messages.slice(1);
|
|
228
164
|
}
|
|
229
|
-
prompt = this.buildPrompt(messages, tools, options?.system
|
|
165
|
+
prompt = this.buildPrompt(messages, tools, options?.system);
|
|
230
166
|
|
|
231
167
|
// Token-based clipping: if prompt is still too large, drop oldest messages
|
|
232
|
-
const maxPromptTokens = (this.opts.contextSize ??
|
|
168
|
+
const maxPromptTokens = (this.opts.contextSize ?? 32768) - 512;
|
|
233
169
|
try {
|
|
234
170
|
while (this.inference.sizeInTokens(prompt) > maxPromptTokens && messages.length > 1) {
|
|
235
171
|
messages = messages.slice(1);
|
|
236
|
-
prompt = this.buildPrompt(messages, tools, options?.system
|
|
172
|
+
prompt = this.buildPrompt(messages, tools, options?.system);
|
|
237
173
|
}
|
|
238
174
|
} catch {
|
|
239
175
|
// sizeInTokens not available — skip clipping
|
|
@@ -257,53 +193,108 @@ export class WasmProvider implements LLMProvider {
|
|
|
257
193
|
// even after our busy guard clears, because GPU resources release asynchronously.
|
|
258
194
|
for (let attempt = 0; attempt < 5; attempt++) {
|
|
259
195
|
try {
|
|
260
|
-
let lastToken = '';
|
|
261
|
-
let repeatCount = 0;
|
|
262
196
|
const MAX_REPEATS = 20;
|
|
263
197
|
const TOOL_CALL_MAX_CHARS = 3000;
|
|
264
198
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
199
|
+
// ── Chrome M4 memory leak workaround (MediaPipe #6270) ─────────────
|
|
200
|
+
// Rather than accumulating chunks directly in a closure over the
|
|
201
|
+
// ProgressListener callback — which pins references and leaks on
|
|
202
|
+
// Chrome/Mac M4 — we bridge the callback into a ReadableStream and
|
|
203
|
+
// consume it via a ReadableStreamDefaultReader. Each chunk is fully
|
|
204
|
+
// processed and released before the next `await reader.read()`, which
|
|
205
|
+
// lets the GC reclaim intermediate strings between chunks.
|
|
206
|
+
const inference = this.inference;
|
|
207
|
+
const signal = options?.signal;
|
|
208
|
+
const streamControllerRef: { current: ReadableStreamDefaultController<string> | null } = { current: null };
|
|
209
|
+
const tokenStream = new ReadableStream<string>({
|
|
210
|
+
start(controller: ReadableStreamDefaultController<string>) {
|
|
211
|
+
streamControllerRef.current = controller;
|
|
212
|
+
},
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const generationPromise = inference.generateResponse(prompt, (partialResult: string, done: boolean) => {
|
|
216
|
+
if (signal?.aborted) {
|
|
217
|
+
inference?.cancelProcessing();
|
|
218
|
+
try { streamControllerRef.current?.close(); } catch {}
|
|
268
219
|
return;
|
|
269
220
|
}
|
|
270
|
-
|
|
271
|
-
if (
|
|
272
|
-
|
|
273
|
-
if (repeatCount > MAX_REPEATS) {
|
|
274
|
-
this.inference?.cancelProcessing();
|
|
275
|
-
return;
|
|
276
|
-
}
|
|
277
|
-
} else {
|
|
278
|
-
lastToken = partialResult;
|
|
279
|
-
repeatCount = 0;
|
|
221
|
+
try { streamControllerRef.current?.enqueue(partialResult); } catch {}
|
|
222
|
+
if (done) {
|
|
223
|
+
try { streamControllerRef.current?.close(); } catch {}
|
|
280
224
|
}
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
const reader: ReadableStreamDefaultReader<string> = tokenStream.getReader();
|
|
228
|
+
let lastToken = '';
|
|
229
|
+
let repeatCount = 0;
|
|
230
|
+
let cancelledEarly = false;
|
|
231
|
+
try {
|
|
232
|
+
while (true) {
|
|
233
|
+
const { value, done } = await reader.read();
|
|
234
|
+
if (done) break;
|
|
235
|
+
const partialResult = value ?? '';
|
|
236
|
+
|
|
237
|
+
// Detect infinite repetition loop (e.g. Gemma repeating 't' 150 times)
|
|
238
|
+
if (partialResult === lastToken) {
|
|
239
|
+
repeatCount++;
|
|
240
|
+
if (repeatCount > MAX_REPEATS) {
|
|
241
|
+
this.inference?.cancelProcessing();
|
|
242
|
+
cancelledEarly = true;
|
|
243
|
+
break;
|
|
244
|
+
}
|
|
245
|
+
} else {
|
|
246
|
+
lastToken = partialResult;
|
|
247
|
+
repeatCount = 0;
|
|
248
|
+
}
|
|
249
|
+
fullText += partialResult;
|
|
250
|
+
tokenCount++;
|
|
251
|
+
options?.onToken?.(partialResult);
|
|
252
|
+
|
|
253
|
+
// Early detect and strip fake tool_response in streaming
|
|
254
|
+
if (fullText.includes('<|tool_response>') && fullText.includes('<tool_call|>')) {
|
|
255
|
+
const lastCallEnd = fullText.lastIndexOf('<tool_call|>');
|
|
256
|
+
const responseStart = fullText.indexOf('<|tool_response>', lastCallEnd);
|
|
257
|
+
if (responseStart !== -1) {
|
|
258
|
+
// Gemma is hallucinating a response — cancel immediately
|
|
259
|
+
this.inference?.cancelProcessing();
|
|
260
|
+
// Truncate to last valid tool call
|
|
261
|
+
fullText = fullText.slice(0, lastCallEnd + '<tool_call|>'.length);
|
|
262
|
+
cancelledEarly = true;
|
|
263
|
+
break;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Safety: if text grows way too long, force cancel
|
|
268
|
+
if (fullText.length > TOOL_CALL_MAX_CHARS * 2) {
|
|
291
269
|
this.inference?.cancelProcessing();
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
return;
|
|
270
|
+
cancelledEarly = true;
|
|
271
|
+
break;
|
|
295
272
|
}
|
|
296
273
|
}
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
if (
|
|
300
|
-
|
|
301
|
-
return;
|
|
274
|
+
} finally {
|
|
275
|
+
try { reader.releaseLock(); } catch {}
|
|
276
|
+
if (cancelledEarly) {
|
|
277
|
+
try { streamControllerRef.current?.close(); } catch {}
|
|
302
278
|
}
|
|
303
|
-
}
|
|
279
|
+
}
|
|
304
280
|
|
|
281
|
+
const result = await generationPromise.catch(() => '');
|
|
305
282
|
// Fallback if the streaming callback didn't accumulate
|
|
306
283
|
if (result && !fullText) fullText = result;
|
|
284
|
+
|
|
285
|
+
// Pipeline-trace event: why did generation stop?
|
|
286
|
+
// - cancelled: we aborted mid-stream (repetition loop, fake tool_response, oversized, abort signal)
|
|
287
|
+
// - maxTokens: hit the maxTokens ceiling passed via options
|
|
288
|
+
// - eos: natural end-of-stream from MediaPipe (model emitted EOS)
|
|
289
|
+
const endReason = cancelledEarly
|
|
290
|
+
? 'cancelled'
|
|
291
|
+
: tokenCount >= (options?.maxTokens ?? 4096)
|
|
292
|
+
? 'maxTokens'
|
|
293
|
+
: 'eos';
|
|
294
|
+
const tail = fullText.slice(-80).replace(/\n/g, '\\n');
|
|
295
|
+
console.log(`[wasm] end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`);
|
|
296
|
+
this.trace?.push('generate', 'wasm', `end=${endReason} tokens=${tokenCount}/${options?.maxTokens ?? '?'} tail="${tail}"`, endReason === 'eos' ? 'ok' : 'warn');
|
|
297
|
+
|
|
307
298
|
break; // Success — exit retry loop
|
|
308
299
|
} catch (err) {
|
|
309
300
|
const msg = String(err);
|
|
@@ -322,37 +313,15 @@ export class WasmProvider implements LLMProvider {
|
|
|
322
313
|
}
|
|
323
314
|
}
|
|
324
315
|
|
|
325
|
-
//
|
|
326
|
-
//
|
|
327
|
-
//
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
const nextCallStart = afterFirstCall.indexOf('<|tool_call>');
|
|
335
|
-
if (nextCallStart !== -1) {
|
|
336
|
-
// Check if there's a fake tool_response between the two calls
|
|
337
|
-
const betweenCalls = afterFirstCall.slice(0, nextCallStart);
|
|
338
|
-
if (betweenCalls.includes('<|tool_response>') || betweenCalls.includes('<tool_response|>')) {
|
|
339
|
-
// Fake chained response — truncate after first tool call
|
|
340
|
-
fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
|
|
341
|
-
}
|
|
342
|
-
// Otherwise: legitimate multi-tool call, keep both
|
|
343
|
-
} else {
|
|
344
|
-
// No second tool call — truncate any trailing hallucination
|
|
345
|
-
fullText = fullText.slice(0, firstCallEnd + '<tool_call|>'.length);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
// Also strip any standalone <|tool_response> blocks in model output
|
|
351
|
-
// (the model should never generate these — they're injected by the framework)
|
|
352
|
-
fullText = fullText.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '');
|
|
353
|
-
|
|
354
|
-
// Strip thinking blocks — Gemma 4 wraps reasoning in <|channel>thought\n...<channel|>
|
|
355
|
-
fullText = fullText.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '');
|
|
316
|
+
// Strip hallucinated framework tokens the model should never emit on its own:
|
|
317
|
+
// - <|tool_response>...<tool_response|> (injected by the framework, never generated)
|
|
318
|
+
// - <|channel>thought...<channel|> (ghost thought channels if Gemma emits one
|
|
319
|
+
// without <|think|> activation — stray artefacts from pretraining)
|
|
320
|
+
// - <|think|> (stray thinking-mode markers)
|
|
321
|
+
fullText = fullText
|
|
322
|
+
.replace(/<\|tool_response>[\s\S]*?<tool_response\|>/g, '')
|
|
323
|
+
.replace(/<\|channel>thought[\s\S]*?<channel\|>/g, '')
|
|
324
|
+
.replace(/<\|think\|>/g, '');
|
|
356
325
|
|
|
357
326
|
const latencyMs = performance.now() - t0;
|
|
358
327
|
|
|
@@ -364,130 +333,7 @@ export class WasmProvider implements LLMProvider {
|
|
|
364
333
|
}
|
|
365
334
|
} catch {}
|
|
366
335
|
|
|
367
|
-
|
|
368
|
-
// 1. Gemma 4 native: <|tool_call>call:tool_name{key:<|"|>value<|"|>}<tool_call|>
|
|
369
|
-
// 2. JSON format (legacy): <|tool_call>call:tool_name{"key":"value"}<tool_call|>
|
|
370
|
-
// 3. Loose JSON: { "tool": "name", "args": {...} }
|
|
371
|
-
const content: ContentBlock[] = [];
|
|
372
|
-
const gemmaToolCallRe = /<\|tool_call>call:(\w+)(\{[^]*?\})<tool_call\|>/g;
|
|
373
|
-
// Fallback: parenthesized format — call:name("arg1", {arg2})
|
|
374
|
-
const parenToolCallRe = /<\|tool_call>call:(\w+)\(([^)]*(?:\{[^]*?\}[^)]*)?)\)(?:<tool_call\|>|$)/g;
|
|
375
|
-
let match: RegExpExecArray | null;
|
|
376
|
-
let foundToolCall = false;
|
|
377
|
-
|
|
378
|
-
while ((match = gemmaToolCallRe.exec(fullText)) !== null) {
|
|
379
|
-
foundToolCall = true;
|
|
380
|
-
const toolName = match[1];
|
|
381
|
-
let toolArgs: Record<string, unknown> = {};
|
|
382
|
-
const rawArgs = match[2];
|
|
383
|
-
|
|
384
|
-
// Strategy 1: Extract key-value pairs using <|"|> delimiters BEFORE replacing them.
|
|
385
|
-
// This correctly handles internal quotes like: query:<|"|>SELECT data."date"<|"|>
|
|
386
|
-
toolArgs = WasmProvider.parseGemmaArgs(rawArgs);
|
|
387
|
-
|
|
388
|
-
// Strategy 2: If no pairs found, try simple replacement + JSON.parse
|
|
389
|
-
if (Object.keys(toolArgs).length === 0) {
|
|
390
|
-
const argsStr = rawArgs.replace(/<\|"\|>/g, '"');
|
|
391
|
-
try {
|
|
392
|
-
toolArgs = JSON.parse(argsStr);
|
|
393
|
-
this.trace?.push('parse', toolName, 'fell back to quote replacement strategy', 'warn');
|
|
394
|
-
} catch {
|
|
395
|
-
// Strategy 3: regex key:value extraction on replaced string
|
|
396
|
-
try {
|
|
397
|
-
const obj: Record<string, unknown> = {};
|
|
398
|
-
const kvRe = /(\w+)\s*:\s*(?:"([^"]*)"|([\d.]+(?:e[+-]?\d+)?)|(\[.*?\])|(true|false|null))/g;
|
|
399
|
-
let kv: RegExpExecArray | null;
|
|
400
|
-
while ((kv = kvRe.exec(argsStr)) !== null) {
|
|
401
|
-
const [, k, strVal, numVal, arrVal, litVal] = kv;
|
|
402
|
-
if (strVal !== undefined) obj[k] = strVal;
|
|
403
|
-
else if (numVal !== undefined) obj[k] = Number(numVal);
|
|
404
|
-
else if (arrVal !== undefined) { try { obj[k] = JSON.parse(arrVal); } catch { obj[k] = arrVal; } }
|
|
405
|
-
else if (litVal !== undefined) obj[k] = JSON.parse(litVal);
|
|
406
|
-
}
|
|
407
|
-
if (Object.keys(obj).length > 0) {
|
|
408
|
-
toolArgs = obj;
|
|
409
|
-
this.trace?.push('parse', toolName, 'fell back to regex key:value strategy', 'warn');
|
|
410
|
-
}
|
|
411
|
-
} catch {}
|
|
412
|
-
}
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
// P4 fix: recursively parse string fields that look like JSON objects/arrays.
|
|
416
|
-
// Gemma wraps params in <|"|>{...}<|"|> which after replacement becomes "{...}" — a string.
|
|
417
|
-
for (const [k, v] of Object.entries(toolArgs)) {
|
|
418
|
-
if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
|
|
419
|
-
try { toolArgs[k] = JSON.parse(v); } catch { /* keep as string */ }
|
|
420
|
-
}
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
content.push({
|
|
424
|
-
type: 'tool_use',
|
|
425
|
-
id: `tc-${Date.now()}-${content.length}`,
|
|
426
|
-
name: toolName,
|
|
427
|
-
input: toolArgs,
|
|
428
|
-
});
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
// Fallback: try parenthesized format — call:component("table", {data: [...]})
|
|
432
|
-
if (!foundToolCall) {
|
|
433
|
-
while ((match = parenToolCallRe.exec(fullText)) !== null) {
|
|
434
|
-
foundToolCall = true;
|
|
435
|
-
const toolName = match[1];
|
|
436
|
-
const argsRaw = match[2].replace(/<\|"\|>/g, '"').trim();
|
|
437
|
-
let toolArgs: Record<string, unknown> = {};
|
|
438
|
-
|
|
439
|
-
// Parse parenthesized args: could be ("name", {params}) or just ({params})
|
|
440
|
-
try {
|
|
441
|
-
// Try wrapping in array and parsing: ["name", {params}] or [{params}]
|
|
442
|
-
const asArray = JSON.parse(`[${argsRaw}]`);
|
|
443
|
-
if (asArray.length === 2 && typeof asArray[0] === 'string' && typeof asArray[1] === 'object') {
|
|
444
|
-
// component("table", {data: [...]}) → {name: "table", params: {data: [...]}}
|
|
445
|
-
toolArgs = { name: asArray[0], params: asArray[1] };
|
|
446
|
-
} else if (asArray.length === 1 && typeof asArray[0] === 'object') {
|
|
447
|
-
toolArgs = asArray[0];
|
|
448
|
-
} else if (asArray.length >= 1) {
|
|
449
|
-
// Generic: first string arg as name, rest as params
|
|
450
|
-
toolArgs = { name: String(asArray[0]), ...(typeof asArray[1] === 'object' ? { params: asArray[1] } : {}) };
|
|
451
|
-
}
|
|
452
|
-
} catch {
|
|
453
|
-
// Last resort: try parsing the whole thing as JSON object
|
|
454
|
-
try { toolArgs = JSON.parse(argsRaw); } catch {}
|
|
455
|
-
}
|
|
456
|
-
|
|
457
|
-
content.push({
|
|
458
|
-
type: 'tool_use',
|
|
459
|
-
id: `tc-${Date.now()}-${content.length}`,
|
|
460
|
-
name: toolName,
|
|
461
|
-
input: toolArgs,
|
|
462
|
-
});
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
if (!foundToolCall) {
|
|
467
|
-
// Try JSON format fallback — strip markdown code blocks first
|
|
468
|
-
let cleaned = fullText.trim();
|
|
469
|
-
const mdMatch = cleaned.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
470
|
-
if (mdMatch) cleaned = mdMatch[1].trim();
|
|
471
|
-
|
|
472
|
-
try {
|
|
473
|
-
const parsed = JSON.parse(cleaned) as { tool?: string; args?: Record<string, unknown> };
|
|
474
|
-
if (parsed.tool && parsed.args) {
|
|
475
|
-
foundToolCall = true;
|
|
476
|
-
content.push({
|
|
477
|
-
type: 'tool_use',
|
|
478
|
-
id: `tc-${Date.now()}`,
|
|
479
|
-
name: parsed.tool,
|
|
480
|
-
input: parsed.args,
|
|
481
|
-
});
|
|
482
|
-
}
|
|
483
|
-
} catch {}
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
if (!foundToolCall) {
|
|
487
|
-
// Extract text without tool call tags
|
|
488
|
-
const cleanText = fullText.replace(/<\|tool_call>.*?<tool_call\|>/g, '').trim();
|
|
489
|
-
content.push({ type: 'text', text: cleanText || fullText });
|
|
490
|
-
}
|
|
336
|
+
const { content, foundToolCall } = parseToolCalls(fullText, 'gemma-native');
|
|
491
337
|
|
|
492
338
|
return {
|
|
493
339
|
content,
|
|
@@ -504,241 +350,30 @@ export class WasmProvider implements LLMProvider {
|
|
|
504
350
|
};
|
|
505
351
|
}
|
|
506
352
|
|
|
507
|
-
/**
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
* so internal quotes like data."date" are preserved correctly.
|
|
511
|
-
* Example: {schema:<|"|>assemblee<|"|>,query:<|"|>SELECT data."date"<|"|>}
|
|
512
|
-
*/
|
|
513
|
-
private static parseGemmaArgs(raw: string): Record<string, unknown> {
|
|
514
|
-
const pairs: Record<string, unknown> = {};
|
|
515
|
-
|
|
516
|
-
// Extract string values delimited by <|"|>
|
|
517
|
-
const kvRegex = /(\w+)\s*:\s*<\|"\|>([\s\S]*?)<\|"\|>/g;
|
|
518
|
-
let m: RegExpExecArray | null;
|
|
519
|
-
while ((m = kvRegex.exec(raw)) !== null) {
|
|
520
|
-
pairs[m[1]] = m[2];
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
// Extract numeric values (no delimiters)
|
|
524
|
-
const numRegex = /(\w+)\s*:\s*(\d+(?:\.\d+)?(?:e[+-]?\d+)?)\s*(?:[,}]|$)/g;
|
|
525
|
-
while ((m = numRegex.exec(raw)) !== null) {
|
|
526
|
-
if (!(m[1] in pairs)) pairs[m[1]] = Number(m[2]);
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
// Extract boolean/null literals
|
|
530
|
-
const litRegex = /(\w+)\s*:\s*(true|false|null)\s*(?:[,}]|$)/g;
|
|
531
|
-
while ((m = litRegex.exec(raw)) !== null) {
|
|
532
|
-
if (!(m[1] in pairs)) pairs[m[1]] = JSON.parse(m[2]);
|
|
533
|
-
}
|
|
534
|
-
|
|
535
|
-
// Extract inline object/array values (e.g. params:{items:[...]}, data:{a:1})
|
|
536
|
-
// Gemma often writes nested objects without <|"|> delimiters.
|
|
537
|
-
// We find key:{ or key:[ and then match balanced braces/brackets.
|
|
538
|
-
const objRe = /(\w+)\s*:\s*([{\[])/g;
|
|
539
|
-
while ((m = objRe.exec(raw)) !== null) {
|
|
540
|
-
if (m[1] in pairs) continue; // already captured by a higher-priority regex
|
|
541
|
-
const key = m[1];
|
|
542
|
-
const opener = m[2];
|
|
543
|
-
const closer = opener === '{' ? '}' : ']';
|
|
544
|
-
let depth = 1;
|
|
545
|
-
let i = m.index + m[0].length;
|
|
546
|
-
while (i < raw.length && depth > 0) {
|
|
547
|
-
const ch = raw[i];
|
|
548
|
-
if (ch === opener) depth++;
|
|
549
|
-
else if (ch !== opener && (ch === '{' || ch === '[')) depth++;
|
|
550
|
-
else if (ch === closer) depth--;
|
|
551
|
-
else if (ch !== closer && (ch === '}' || ch === ']')) depth--;
|
|
552
|
-
i++;
|
|
553
|
-
}
|
|
554
|
-
const fragment = raw.slice(m.index + m[0].length - 1, i); // includes opener and closer
|
|
555
|
-
// Replace <|"|> with " for JSON parsing
|
|
556
|
-
const jsonStr = fragment.replace(/<\|"\|>/g, '"');
|
|
557
|
-
try { pairs[key] = JSON.parse(jsonStr); } catch { /* unparseable — skip */ }
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
// Try to parse string values that look like JSON objects/arrays
|
|
561
|
-
for (const [k, v] of Object.entries(pairs)) {
|
|
562
|
-
if (typeof v === 'string' && (v.startsWith('{') || v.startsWith('['))) {
|
|
563
|
-
try { pairs[k] = JSON.parse(v); } catch { /* keep as string */ }
|
|
564
|
-
}
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
return pairs;
|
|
353
|
+
/** @internal — delegates to `gemmaValue` from prompts/gemma4-prompt-builder. */
|
|
354
|
+
static gemmaValue(v: unknown): string {
|
|
355
|
+
return gemmaValue(v);
|
|
568
356
|
}
|
|
569
357
|
|
|
570
|
-
/**
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
*/
|
|
574
|
-
private static gemmaValue(v: unknown): string {
|
|
575
|
-
const q = '<|"|>';
|
|
576
|
-
if (v === null || v === undefined) return 'null';
|
|
577
|
-
if (typeof v === 'number' || typeof v === 'boolean') return String(v);
|
|
578
|
-
if (Array.isArray(v)) return `[${v.map(i => WasmProvider.gemmaValue(i)).join(',')}]`;
|
|
579
|
-
if (typeof v === 'object') {
|
|
580
|
-
const entries = Object.entries(v as Record<string, unknown>)
|
|
581
|
-
.map(([k, val]) => `${k}:${WasmProvider.gemmaValue(val)}`);
|
|
582
|
-
return `{${entries.join(',')}}`;
|
|
583
|
-
}
|
|
584
|
-
return `${q}${String(v)}${q}`;
|
|
358
|
+
/** @internal — delegates to `formatGemmaToolDeclaration` from prompts/gemma4-prompt-builder. */
|
|
359
|
+
static formatToolDeclaration(tool: ProviderTool): string {
|
|
360
|
+
return formatGemmaToolDeclaration(tool);
|
|
585
361
|
}
|
|
586
362
|
|
|
587
|
-
/**
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
private static formatToolDeclaration(tool: ProviderTool): string {
|
|
591
|
-
const q = '<|"|>';
|
|
592
|
-
let decl = `<|tool>declaration:${tool.name}{\n`;
|
|
593
|
-
decl += ` description:${q}${tool.description}${q}`;
|
|
594
|
-
|
|
595
|
-
const schema = tool.input_schema;
|
|
596
|
-
if (schema?.properties) {
|
|
597
|
-
const props = schema.properties as Record<string, { description?: string; type?: string; enum?: string[]; format?: string; default?: unknown }>;
|
|
598
|
-
decl += `,\n parameters:{\n properties:{\n`;
|
|
599
|
-
|
|
600
|
-
const propEntries = Object.entries(props);
|
|
601
|
-
for (let i = 0; i < propEntries.length; i++) {
|
|
602
|
-
const [key, val] = propEntries[i];
|
|
603
|
-
decl += ` ${key}:{`;
|
|
604
|
-
const parts: string[] = [];
|
|
605
|
-
if (val.description) parts.push(`description:${q}${val.description}${q}`);
|
|
606
|
-
// P1 fix: if no type specified, infer OBJECT for params-like fields to avoid
|
|
607
|
-
// Gemma wrapping the value in <|"|>...<|"|> (treating it as a string)
|
|
608
|
-
let inferredType = val.type;
|
|
609
|
-
if (!inferredType) {
|
|
610
|
-
const descLower = (val.description ?? '').toLowerCase();
|
|
611
|
-
if (descLower.includes('objet') || descLower.includes('object') || descLower.includes('parameter') || descLower.includes('paramètre') || key === 'params') {
|
|
612
|
-
inferredType = 'object';
|
|
613
|
-
} else {
|
|
614
|
-
inferredType = 'string';
|
|
615
|
-
}
|
|
616
|
-
}
|
|
617
|
-
parts.push(`type:${q}${inferredType.toUpperCase()}${q}`);
|
|
618
|
-
if (val.enum) parts.push(`enum:[${val.enum.map(e => `${q}${e}${q}`).join(',')}]`);
|
|
619
|
-
if (val.format) parts.push(`format:${q}${val.format}${q}`);
|
|
620
|
-
if (val.default !== undefined) parts.push(`default:${WasmProvider.gemmaValue(val.default)}`);
|
|
621
|
-
decl += parts.join(',');
|
|
622
|
-
decl += `}${i < propEntries.length - 1 ? ',' : ''}\n`;
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
decl += ` }`;
|
|
626
|
-
if (schema.required && Array.isArray(schema.required)) {
|
|
627
|
-
decl += `,\n required:[${(schema.required as string[]).map(r => `${q}${r}${q}`).join(',')}]`;
|
|
628
|
-
}
|
|
629
|
-
decl += `,\n type:${q}OBJECT${q}\n }`;
|
|
630
|
-
}
|
|
631
|
-
|
|
632
|
-
decl += `\n}<tool|>`;
|
|
633
|
-
return decl;
|
|
363
|
+
/** @internal — delegates to `formatToolResponse` from prompts/gemma4-prompt-builder. */
|
|
364
|
+
static formatToolResponse(content: string): string {
|
|
365
|
+
return formatToolResponse(content);
|
|
634
366
|
}
|
|
635
367
|
|
|
636
|
-
/**
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
private static formatToolResponse(toolName: string, content: string): string {
|
|
640
|
-
const q = '<|"|>';
|
|
641
|
-
// Try to parse as JSON for structured output
|
|
642
|
-
try {
|
|
643
|
-
const parsed = JSON.parse(content);
|
|
644
|
-
return `<|tool_response>response:${toolName}${WasmProvider.gemmaValue(parsed)}<tool_response|>`;
|
|
645
|
-
} catch {
|
|
646
|
-
// Plain string result
|
|
647
|
-
return `<|tool_response>response:${toolName}{result:${q}${content}${q}}<tool_response|>`;
|
|
648
|
-
}
|
|
368
|
+
/** @internal — delegates to `formatToolCall` from prompts/gemma4-prompt-builder. */
|
|
369
|
+
static formatToolCall(name: string, input: Record<string, unknown>): string {
|
|
370
|
+
return formatToolCall(name, input);
|
|
649
371
|
}
|
|
650
372
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
const entries = Object.entries(input)
|
|
656
|
-
.map(([k, v]) => `${k}:${WasmProvider.gemmaValue(v)}`);
|
|
657
|
-
return `<|tool_call>call:${name}{${entries.join(',')}}<tool_call|>`;
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
private buildPrompt(messages: ChatMessage[], tools: ProviderTool[], systemPrompt?: string, maxTools?: number): string {
|
|
661
|
-
const systemParts: string[] = [];
|
|
662
|
-
|
|
663
|
-
// Inject system prompt from settings if provided.
|
|
664
|
-
// Rewrite paren syntax "tool_name()" / "tool_name(args)" to Gemma 4 native call syntax.
|
|
665
|
-
// Without this, Gemma mimics the paren syntax as plain text (regression from commit 2724b9e).
|
|
666
|
-
if (systemPrompt) {
|
|
667
|
-
const gemmaPrompt = systemPrompt.replace(
|
|
668
|
-
/\b([a-zA-Z_][a-zA-Z0-9_]*)\(([^)]*)\)/g,
|
|
669
|
-
(_full, name, args) => {
|
|
670
|
-
const trimmed = args.trim();
|
|
671
|
-
if (!trimmed) return `<|tool_call>call:${name}{}<tool_call|>`;
|
|
672
|
-
const argBody = trimmed
|
|
673
|
-
.split(',')
|
|
674
|
-
.map((a: string) => `${a.trim()}:<|"|>...<|"|>`)
|
|
675
|
-
.join(',');
|
|
676
|
-
return `<|tool_call>call:${name}{${argBody}}<tool_call|>`;
|
|
677
|
-
}
|
|
678
|
-
);
|
|
679
|
-
systemParts.push(gemmaPrompt);
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
if (tools.length > 0) {
|
|
683
|
-
// Gemma small models struggle with too many tools — limit to most relevant
|
|
684
|
-
const MAX_TOOLS = maxTools ?? 15;
|
|
685
|
-
const limitedTools = tools.length > MAX_TOOLS
|
|
686
|
-
? [
|
|
687
|
-
// Always include render_* tools (UI)
|
|
688
|
-
...tools.filter(t => t.name.startsWith('render_') || t.name === 'clear_canvas').slice(0, 8),
|
|
689
|
-
// Fill with data tools
|
|
690
|
-
...tools.filter(t => !t.name.startsWith('render_') && t.name !== 'clear_canvas').slice(0, MAX_TOOLS - 8),
|
|
691
|
-
]
|
|
692
|
-
: tools;
|
|
693
|
-
|
|
694
|
-
// Native Gemma 4 tool declarations
|
|
695
|
-
systemParts.push(limitedTools.map(t => WasmProvider.formatToolDeclaration(t)).join('\n'));
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
// Build a map of tool_use_id → tool_name from all messages for tool_result resolution
|
|
699
|
-
const toolNameById = new Map<string, string>();
|
|
700
|
-
for (const msg of messages) {
|
|
701
|
-
if (typeof msg.content !== 'string') {
|
|
702
|
-
for (const block of msg.content as ContentBlock[]) {
|
|
703
|
-
if (block.type === 'tool_use') {
|
|
704
|
-
const b = block as { type: 'tool_use'; id: string; name: string };
|
|
705
|
-
toolNameById.set(b.id, b.name);
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
const parts: string[] = [];
|
|
712
|
-
if (systemParts.length > 0) {
|
|
713
|
-
// Gemma 4 has no system role — inject system content as a user turn
|
|
714
|
-
parts.push(`<|turn>user\n${systemParts.join('\n')}<turn|>`);
|
|
715
|
-
}
|
|
716
|
-
for (const msg of messages) {
|
|
717
|
-
const role = msg.role === 'assistant' ? 'model' : 'user';
|
|
718
|
-
if (typeof msg.content === 'string') {
|
|
719
|
-
parts.push(`<|turn>${role}\n${msg.content}<turn|>`);
|
|
720
|
-
} else {
|
|
721
|
-
// Serialize all block types in Gemma 4 native format
|
|
722
|
-
const segments: string[] = [];
|
|
723
|
-
for (const block of msg.content as ContentBlock[]) {
|
|
724
|
-
if (block.type === 'text') {
|
|
725
|
-
segments.push((block as { type: 'text'; text: string }).text);
|
|
726
|
-
} else if (block.type === 'tool_use') {
|
|
727
|
-
const b = block as { type: 'tool_use'; name: string; input: Record<string, unknown> };
|
|
728
|
-
segments.push(WasmProvider.formatToolCall(b.name, b.input));
|
|
729
|
-
} else if (block.type === 'tool_result') {
|
|
730
|
-
const b = block as { type: 'tool_result'; tool_use_id: string; content: string };
|
|
731
|
-
const toolName = toolNameById.get(b.tool_use_id) ?? 'unknown';
|
|
732
|
-
segments.push(WasmProvider.formatToolResponse(toolName, b.content));
|
|
733
|
-
}
|
|
734
|
-
}
|
|
735
|
-
if (segments.length > 0) {
|
|
736
|
-
parts.push(`<|turn>${role}\n${segments.join('\n')}<turn|>`);
|
|
737
|
-
}
|
|
738
|
-
}
|
|
739
|
-
}
|
|
740
|
-
parts.push('<|turn>model\n');
|
|
741
|
-
return parts.join('\n');
|
|
373
|
+
private buildPrompt(messages: ChatMessage[], _tools: ProviderTool[], systemPrompt?: string): string {
|
|
374
|
+
// `_tools` is intentionally ignored — tool declarations are embedded inline
|
|
375
|
+
// inside `systemPrompt` via buildSystemPromptWithAliases({ providerKind: 'gemma' }).
|
|
376
|
+
return buildGemmaPrompt({ systemPrompt, messages });
|
|
742
377
|
}
|
|
743
378
|
|
|
744
379
|
destroy() {
|
|
@@ -748,3 +383,8 @@ export class WasmProvider implements LLMProvider {
|
|
|
748
383
|
this.initPromise = null;
|
|
749
384
|
}
|
|
750
385
|
}
|
|
386
|
+
|
|
387
|
+
// BuildGemmaPromptInput and buildGemmaPrompt now live in
|
|
388
|
+
// ../prompts/gemma4-prompt-builder.ts. Re-exported here for backward compat.
|
|
389
|
+
export { buildGemmaPrompt } from '../prompts/gemma4-prompt-builder.js';
|
|
390
|
+
export type { BuildGemmaPromptInput } from '../prompts/gemma4-prompt-builder.js';
|