@webmcp-auto-ui/agent 2.5.26 → 2.5.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -2
- package/src/autoui-server.ts +80 -65
- package/src/index.ts +25 -6
- package/src/loop.ts +52 -33
- package/src/prompts/claude-prompt-builder.ts +81 -0
- package/src/prompts/gemma4-prompt-builder.ts +205 -0
- package/src/prompts/index.ts +55 -0
- package/src/prompts/mistral-prompt-builder.ts +90 -0
- package/src/prompts/qwen-prompt-builder.ts +90 -0
- package/src/prompts/tool-call-parsers.ts +322 -0
- package/src/prompts/tool-refs.ts +196 -0
- package/src/providers/factory.ts +34 -3
- package/src/providers/hawk-models.ts +22 -0
- package/src/providers/hawk.ts +181 -0
- package/src/providers/transformers-models.ts +143 -0
- package/src/providers/transformers-serialize.ts +81 -0
- package/src/providers/transformers.ts +329 -0
- package/src/providers/transformers.worker.ts +640 -0
- package/src/providers/wasm.ts +132 -332
- package/src/recipes/_generated.ts +306 -0
- package/src/recipes/hackathon-assemblee-nationale.md +111 -0
- package/src/recipes/notebook-playbook.md +193 -0
- package/src/server/hawkProxy.ts +54 -0
- package/src/server/index.ts +2 -0
- package/src/tool-layers.ts +7 -403
- package/src/trace-observer.ts +669 -0
- package/src/types.ts +17 -7
- package/src/util/opfs-cache.ts +364 -0
- package/src/util/storage-inventory.ts +195 -0
- package/tests/gemma-prompt.test.ts +472 -0
- package/tests/loop.test.ts +3 -3
- package/tests/transformers-serialize.test.ts +103 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TransformersProvider — runs transformers.js v4 (ONNX + WebGPU) in a Web Worker.
|
|
3
|
+
*
|
|
4
|
+
* Mirrors the public surface of WasmProvider (MediaPipe) so the agent loop can
|
|
5
|
+
* swap providers freely. The heavy lifting (model load, generation, streaming,
|
|
6
|
+
* KV cache, vision preprocessing) happens inside ./transformers.worker.ts — the
|
|
7
|
+
* main thread only orchestrates postMessage traffic and exposes the standard
|
|
8
|
+
* LLMProvider contract.
|
|
9
|
+
*/
|
|
10
|
+
import type {
|
|
11
|
+
LLMProvider,
|
|
12
|
+
LLMResponse,
|
|
13
|
+
ChatMessage,
|
|
14
|
+
ProviderTool,
|
|
15
|
+
TransformersModelId,
|
|
16
|
+
ContentBlock,
|
|
17
|
+
} from '../types.js';
|
|
18
|
+
import {
|
|
19
|
+
TRANSFORMERS_MODELS,
|
|
20
|
+
type TransformersModelEntry,
|
|
21
|
+
type TransformersFamily,
|
|
22
|
+
} from './transformers-models.js';
|
|
23
|
+
import { serializeMessagesForTemplate } from './transformers-serialize.js';
|
|
24
|
+
// Qwen and Mistral no longer need dedicated prompt builders on the main thread:
|
|
25
|
+
// the worker delegates ChatML / [INST] templating to tokenizer.apply_chat_template
|
|
26
|
+
// using the chat_template baked into each model's tokenizer_config.json. We still
|
|
27
|
+
// ship the FLEX system text (produced upstream by buildSystemPromptWithAliases)
|
|
28
|
+
// as the system turn.
|
|
29
|
+
|
|
30
|
+
export type TransformersStatus = 'idle' | 'loading' | 'ready' | 'error';
|
|
31
|
+
|
|
32
|
+
export interface TransformersProviderOptions {
|
|
33
|
+
model?: TransformersModelId;
|
|
34
|
+
contextSize?: number;
|
|
35
|
+
onProgress?: (progress: number, status: string, loaded?: number, total?: number) => void;
|
|
36
|
+
onStatusChange?: (status: TransformersStatus) => void;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
type PromptKind = 'gemma' | 'qwen' | 'mistral';
|
|
40
|
+
|
|
41
|
+
function promptKindForFamily(family: TransformersFamily): PromptKind {
|
|
42
|
+
switch (family) {
|
|
43
|
+
case 'gemma4': return 'gemma';
|
|
44
|
+
case 'qwen3': return 'qwen';
|
|
45
|
+
case 'mistral': return 'mistral';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
interface PendingRequest {
|
|
50
|
+
resolve: (value: LLMResponse) => void;
|
|
51
|
+
reject: (err: Error) => void;
|
|
52
|
+
onToken?: (token: string) => void;
|
|
53
|
+
signal?: AbortSignal;
|
|
54
|
+
abortHandler?: () => void;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export class TransformersProvider implements LLMProvider {
|
|
58
|
+
readonly name = 'transformers';
|
|
59
|
+
readonly model: TransformersModelId;
|
|
60
|
+
readonly promptKind: PromptKind;
|
|
61
|
+
|
|
62
|
+
private entry: TransformersModelEntry;
|
|
63
|
+
private worker: Worker | null = null;
|
|
64
|
+
private status: TransformersStatus = 'idle';
|
|
65
|
+
private opts: TransformersProviderOptions;
|
|
66
|
+
private initPromise: Promise<void> | null = null;
|
|
67
|
+
private pending = new Map<string, PendingRequest>();
|
|
68
|
+
private requestCounter = 0;
|
|
69
|
+
|
|
70
|
+
constructor(options: TransformersProviderOptions) {
|
|
71
|
+
this.opts = options;
|
|
72
|
+
const modelId = (options.model ?? 'transformers-gemma-4-e2b') as TransformersModelId;
|
|
73
|
+
this.model = modelId;
|
|
74
|
+
const entry = TRANSFORMERS_MODELS[modelId as keyof typeof TRANSFORMERS_MODELS];
|
|
75
|
+
if (!entry) {
|
|
76
|
+
throw new Error(`[transformers] unknown model id: ${modelId}`);
|
|
77
|
+
}
|
|
78
|
+
this.entry = entry;
|
|
79
|
+
this.promptKind = promptKindForFamily(entry.family);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
private setStatus(s: TransformersStatus) {
|
|
83
|
+
this.status = s;
|
|
84
|
+
this.opts.onStatusChange?.(s);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
private nextRequestId(): string {
|
|
88
|
+
this.requestCounter += 1;
|
|
89
|
+
return `req-${Date.now()}-${this.requestCounter}`;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
private ensureWorker(): Worker {
|
|
93
|
+
if (this.worker) return this.worker;
|
|
94
|
+
const w = new Worker(new URL('./transformers.worker.ts', import.meta.url), {
|
|
95
|
+
type: 'module',
|
|
96
|
+
});
|
|
97
|
+
w.addEventListener('message', (ev: MessageEvent) => this.handleMessage(ev.data));
|
|
98
|
+
w.addEventListener('error', (ev) => {
|
|
99
|
+
const msg = (ev as ErrorEvent).message || 'worker error';
|
|
100
|
+
this.setStatus('error');
|
|
101
|
+
for (const [, p] of this.pending) p.reject(new Error(msg));
|
|
102
|
+
this.pending.clear();
|
|
103
|
+
});
|
|
104
|
+
this.worker = w;
|
|
105
|
+
return w;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
private handleMessage(msg: any): void {
|
|
109
|
+
if (!msg || typeof msg !== 'object') return;
|
|
110
|
+
switch (msg.type) {
|
|
111
|
+
case 'progress': {
|
|
112
|
+
this.opts.onProgress?.(
|
|
113
|
+
typeof msg.totalProgress === 'number' ? msg.totalProgress : 0,
|
|
114
|
+
String(msg.status ?? 'downloading'),
|
|
115
|
+
typeof msg.loaded === 'number' ? msg.loaded : undefined,
|
|
116
|
+
typeof msg.total === 'number' ? msg.total : undefined,
|
|
117
|
+
);
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
case 'ready': {
|
|
121
|
+
this.setStatus('ready');
|
|
122
|
+
const resolver = this.pending.get('__init__');
|
|
123
|
+
if (resolver) {
|
|
124
|
+
this.pending.delete('__init__');
|
|
125
|
+
// The init "request" is resolved via a synthetic LLMResponse-less resolver.
|
|
126
|
+
(resolver.resolve as unknown as (v: undefined) => void)(undefined);
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
case 'error': {
|
|
131
|
+
const err = new Error(String(msg.message ?? 'worker error'));
|
|
132
|
+
const requestId: string | undefined = msg.requestId;
|
|
133
|
+
if (requestId && this.pending.has(requestId)) {
|
|
134
|
+
const p = this.pending.get(requestId)!;
|
|
135
|
+
this.pending.delete(requestId);
|
|
136
|
+
if (p.signal && p.abortHandler) p.signal.removeEventListener('abort', p.abortHandler);
|
|
137
|
+
p.reject(err);
|
|
138
|
+
} else {
|
|
139
|
+
const init = this.pending.get('__init__');
|
|
140
|
+
if (init) {
|
|
141
|
+
this.pending.delete('__init__');
|
|
142
|
+
init.reject(err);
|
|
143
|
+
}
|
|
144
|
+
this.setStatus('error');
|
|
145
|
+
}
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
case 'warning': {
|
|
149
|
+
// Worker surfaced a non-fatal warning (e.g. WebGPU fallback to WASM).
|
|
150
|
+
this.opts.onProgress?.(
|
|
151
|
+
typeof msg.totalProgress === 'number' ? msg.totalProgress : 1,
|
|
152
|
+
`warning: ${String(msg.message ?? '')}`,
|
|
153
|
+
);
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
case 'token': {
|
|
157
|
+
const p = this.pending.get(String(msg.requestId));
|
|
158
|
+
if (p?.onToken) p.onToken(String(msg.token ?? ''));
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
case 'done': {
|
|
162
|
+
const requestId = String(msg.requestId);
|
|
163
|
+
const p = this.pending.get(requestId);
|
|
164
|
+
if (!p) return;
|
|
165
|
+
this.pending.delete(requestId);
|
|
166
|
+
if (p.signal && p.abortHandler) p.signal.removeEventListener('abort', p.abortHandler);
|
|
167
|
+
const content: ContentBlock[] = Array.isArray(msg.content)
|
|
168
|
+
? (msg.content as ContentBlock[])
|
|
169
|
+
: [{ type: 'text', text: String(msg.content ?? '') }];
|
|
170
|
+
const hasToolUse = content.some(b => b.type === 'tool_use');
|
|
171
|
+
p.resolve({
|
|
172
|
+
content,
|
|
173
|
+
stopReason: hasToolUse ? 'tool_use' : 'end_turn',
|
|
174
|
+
stats: msg.stats ?? { tokensPerSec: 0, totalTokens: 0, latencyMs: 0 },
|
|
175
|
+
usage: msg.usage ?? { input_tokens: 0, output_tokens: 0 },
|
|
176
|
+
});
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
default:
|
|
180
|
+
// Unknown message type — ignore to stay forward-compatible.
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
async initialize(): Promise<void> {
|
|
186
|
+
if (this.initPromise) return this.initPromise;
|
|
187
|
+
this.initPromise = this._init().catch((err) => {
|
|
188
|
+
this.initPromise = null;
|
|
189
|
+
throw err;
|
|
190
|
+
});
|
|
191
|
+
return this.initPromise;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
private _init(): Promise<void> {
|
|
195
|
+
this.setStatus('loading');
|
|
196
|
+
const worker = this.ensureWorker();
|
|
197
|
+
|
|
198
|
+
return new Promise<void>((resolve, reject) => {
|
|
199
|
+
// Register a synthetic pending entry keyed by '__init__' that handleMessage
|
|
200
|
+
// resolves on 'ready' / rejects on 'error'.
|
|
201
|
+
this.pending.set('__init__', {
|
|
202
|
+
resolve: resolve as unknown as (v: LLMResponse) => void,
|
|
203
|
+
reject,
|
|
204
|
+
});
|
|
205
|
+
worker.postMessage({
|
|
206
|
+
type: 'load',
|
|
207
|
+
modelId: this.model,
|
|
208
|
+
entry: this.entry,
|
|
209
|
+
contextSize: this.opts.contextSize ?? this.entry.contextLength,
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/** Extract a base64 data-URL image from the last user message, if any.
|
|
215
|
+
* Vision turns are always one-shot: only the latest user turn's image is used. */
|
|
216
|
+
private extractImageFromLastUserMessage(messages: ChatMessage[]): Uint8Array | undefined {
|
|
217
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
218
|
+
const m = messages[i];
|
|
219
|
+
if (m.role !== 'user') continue;
|
|
220
|
+
if (typeof m.content === 'string') return undefined;
|
|
221
|
+
const imgBlock = m.content.find(
|
|
222
|
+
(b): b is Extract<ContentBlock, { type: 'image' }> => b.type === 'image',
|
|
223
|
+
);
|
|
224
|
+
if (!imgBlock) return undefined;
|
|
225
|
+
const match = imgBlock.data.match(/^data:[^;]+;base64,(.+)$/);
|
|
226
|
+
if (!match) return undefined;
|
|
227
|
+
const bin = atob(match[1]);
|
|
228
|
+
const bytes = new Uint8Array(bin.length);
|
|
229
|
+
for (let j = 0; j < bin.length; j++) bytes[j] = bin.charCodeAt(j);
|
|
230
|
+
return bytes;
|
|
231
|
+
}
|
|
232
|
+
return undefined;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
async chat(
|
|
236
|
+
messages: ChatMessage[],
|
|
237
|
+
_tools: ProviderTool[],
|
|
238
|
+
options?: {
|
|
239
|
+
signal?: AbortSignal;
|
|
240
|
+
cacheEnabled?: boolean;
|
|
241
|
+
system?: string;
|
|
242
|
+
maxTokens?: number;
|
|
243
|
+
temperature?: number;
|
|
244
|
+
topK?: number;
|
|
245
|
+
onToken?: (token: string) => void;
|
|
246
|
+
},
|
|
247
|
+
): Promise<LLMResponse> {
|
|
248
|
+
if (this.status !== 'ready') await this.initialize();
|
|
249
|
+
const worker = this.ensureWorker();
|
|
250
|
+
|
|
251
|
+
const image = this.entry.vision ? this.extractImageFromLastUserMessage(messages) : undefined;
|
|
252
|
+
const systemText = options?.system;
|
|
253
|
+
|
|
254
|
+
// All three families (gemma4 / qwen3 / mistral) go through
|
|
255
|
+
// tokenizer.apply_chat_template inside the worker. Each model's
|
|
256
|
+
// tokenizer_config.json ships a Jinja chat_template that emits the
|
|
257
|
+
// correct role tags (<start_of_turn> for Gemma 4, <|im_start|> for Qwen,
|
|
258
|
+
// [INST] for Mistral). Building a custom "<|turn>…<turn|>" string on the
|
|
259
|
+
// main thread for Gemma trips the tokenizer on transformers.js 4.1.0
|
|
260
|
+
// ("type N not iterable"), so we rely on apply_chat_template uniformly.
|
|
261
|
+
// WasmProvider (MediaPipe) keeps the custom builder because MediaPipe
|
|
262
|
+
// has no chat_template runtime.
|
|
263
|
+
const chatMessages: Array<{ role: string; content: string }> = [];
|
|
264
|
+
if (systemText) chatMessages.push({ role: 'system', content: systemText });
|
|
265
|
+
chatMessages.push(...serializeMessagesForTemplate(messages, this.promptKind));
|
|
266
|
+
// Mistral Pixtral requires an inline `[IMG]` placeholder in the user turn
|
|
267
|
+
// that corresponds to the attached image — the chat_template counts these
|
|
268
|
+
// tokens to allocate the vision embedding slot. Without it, the processor
|
|
269
|
+
// raises a raw Emscripten exception inside the image_processor.
|
|
270
|
+
if (this.promptKind === 'mistral' && image && chatMessages.length > 0) {
|
|
271
|
+
for (let i = chatMessages.length - 1; i >= 0; i--) {
|
|
272
|
+
if (chatMessages[i].role === 'user') {
|
|
273
|
+
chatMessages[i] = {
|
|
274
|
+
...chatMessages[i],
|
|
275
|
+
content: '[IMG]' + chatMessages[i].content,
|
|
276
|
+
};
|
|
277
|
+
break;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
const requestId = this.nextRequestId();
|
|
282
|
+
|
|
283
|
+
return new Promise<LLMResponse>((resolve, reject) => {
|
|
284
|
+
const pending: PendingRequest = {
|
|
285
|
+
resolve,
|
|
286
|
+
reject,
|
|
287
|
+
onToken: options?.onToken,
|
|
288
|
+
signal: options?.signal,
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
if (options?.signal) {
|
|
292
|
+
const handler = () => {
|
|
293
|
+
worker.postMessage({ type: 'abort', requestId });
|
|
294
|
+
};
|
|
295
|
+
pending.abortHandler = handler;
|
|
296
|
+
options.signal.addEventListener('abort', handler);
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
this.pending.set(requestId, pending);
|
|
300
|
+
|
|
301
|
+
const message: Record<string, unknown> = {
|
|
302
|
+
type: 'generate',
|
|
303
|
+
requestId,
|
|
304
|
+
options: {
|
|
305
|
+
maxTokens: options?.maxTokens ?? 2048,
|
|
306
|
+
temperature: options?.temperature,
|
|
307
|
+
topK: options?.topK,
|
|
308
|
+
},
|
|
309
|
+
};
|
|
310
|
+
message.chatMessages = chatMessages;
|
|
311
|
+
if (image) message.image = image;
|
|
312
|
+
worker.postMessage(message);
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
destroy(): void {
|
|
317
|
+
if (this.worker) {
|
|
318
|
+
try { this.worker.postMessage({ type: 'dispose' }); } catch {}
|
|
319
|
+
try { this.worker.terminate(); } catch {}
|
|
320
|
+
this.worker = null;
|
|
321
|
+
}
|
|
322
|
+
for (const [, p] of this.pending) {
|
|
323
|
+
p.reject(new Error('provider destroyed'));
|
|
324
|
+
}
|
|
325
|
+
this.pending.clear();
|
|
326
|
+
this.setStatus('idle');
|
|
327
|
+
this.initPromise = null;
|
|
328
|
+
}
|
|
329
|
+
}
|