@inbrowser/model 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +50 -0
- package/LICENSE +21 -0
- package/README.md +63 -0
- package/dist/adapters/agent.d.ts +19 -0
- package/dist/adapters/agent.d.ts.map +1 -0
- package/dist/adapters/agent.js +96 -0
- package/dist/adapters/agent.js.map +1 -0
- package/dist/adapters/relay.d.ts +17 -0
- package/dist/adapters/relay.d.ts.map +1 -0
- package/dist/adapters/relay.js +90 -0
- package/dist/adapters/relay.js.map +1 -0
- package/dist/engine.d.ts +35 -0
- package/dist/engine.d.ts.map +1 -0
- package/dist/engine.js +353 -0
- package/dist/engine.js.map +1 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +20 -0
- package/dist/index.js.map +1 -0
- package/dist/parse-tool-calls.d.ts +49 -0
- package/dist/parse-tool-calls.d.ts.map +1 -0
- package/dist/parse-tool-calls.js +115 -0
- package/dist/parse-tool-calls.js.map +1 -0
- package/dist/presets.d.ts +93 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/presets.js +191 -0
- package/dist/presets.js.map +1 -0
- package/dist/think.d.ts +57 -0
- package/dist/think.d.ts.map +1 -0
- package/dist/think.js +138 -0
- package/dist/think.js.map +1 -0
- package/dist/types.d.ts +291 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +17 -0
- package/dist/types.js.map +1 -0
- package/dist/worker.d.ts +62 -0
- package/dist/worker.d.ts.map +1 -0
- package/dist/worker.js +493 -0
- package/dist/worker.js.map +1 -0
- package/package.json +65 -0
package/dist/engine.js
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createEngine` — on-device LLM engine implementation.
|
|
3
|
+
*
|
|
4
|
+
* Wires `@huggingface/transformers` v4 to the narrow `Engine` surface
|
|
5
|
+
* declared in `./types.ts`:
|
|
6
|
+
*
|
|
7
|
+
* - `ensureReady()` loads the `AutoProcessor` + `AutoModelForCausalLM`
|
|
8
|
+
* pair from the configured HF Hub repo, mapping the runtime's
|
|
9
|
+
* `ProgressInfo` stream into `LoadProgress` events.
|
|
10
|
+
* - `generate()` applies the model's bundled chat template, sets up
|
|
11
|
+
* a `TextStreamer` whose `callback_function` pushes tokens into
|
|
12
|
+
* an async-iterator queue, and drives `model.generate()`. Yields
|
|
13
|
+
* `{ kind: 'token' }` per decoded chunk, then a terminal `usage`
|
|
14
|
+
* event with the engine's local accounting.
|
|
15
|
+
*
|
|
16
|
+
* Backend mapping: `Backend` ('auto' | 'webgpu' | 'wasm') passes
|
|
17
|
+
* straight through to Transformers.js's `DeviceType`. `dtype` is
|
|
18
|
+
* forwarded unchanged.
|
|
19
|
+
*
|
|
20
|
+
* Tool calling: the engine itself is toolless. The agent-side
|
|
21
|
+
* polyfill lives in `@inbrowser/agent` (see AGENTS.md).
|
|
22
|
+
*
|
|
23
|
+
* Stop sequences: `GenerateOpts.stop` is accepted but not yet
|
|
24
|
+
* honored — needs a `StoppingCriteria` adapter. Tracked as a
|
|
25
|
+
* follow-up.
|
|
26
|
+
*/
|
|
27
|
+
import { AutoModelForCausalLM, AutoTokenizer, TextStreamer, env as transformersEnv, } from '@huggingface/transformers';
|
|
28
|
+
import { parseToolCalls } from './parse-tool-calls.js';
|
|
29
|
+
export function createEngine(opts) {
|
|
30
|
+
const model = opts.model;
|
|
31
|
+
const capabilities = opts.capabilities;
|
|
32
|
+
let state = 'idle';
|
|
33
|
+
let loadPromise = null;
|
|
34
|
+
let tokenizer = null;
|
|
35
|
+
let llm = null;
|
|
36
|
+
const stateSubs = new Set();
|
|
37
|
+
const loadSubs = new Set();
|
|
38
|
+
if (opts.onLoadProgress)
|
|
39
|
+
loadSubs.add(opts.onLoadProgress);
|
|
40
|
+
function setState(next) {
|
|
41
|
+
if (state === next)
|
|
42
|
+
return;
|
|
43
|
+
state = next;
|
|
44
|
+
for (const sub of stateSubs)
|
|
45
|
+
sub(next);
|
|
46
|
+
}
|
|
47
|
+
function emitLoad(p) {
|
|
48
|
+
for (const sub of loadSubs)
|
|
49
|
+
sub(p);
|
|
50
|
+
}
|
|
51
|
+
function progressCallback(info) {
|
|
52
|
+
// Transformers.js emits five statuses across the load pipeline.
|
|
53
|
+
// Only 'progress' carries byte counts; the others are markers.
|
|
54
|
+
// `LoadProgress.fetch` covers the entire download phase end-to-end.
|
|
55
|
+
if (info.status === 'progress') {
|
|
56
|
+
emitLoad({
|
|
57
|
+
phase: 'fetch',
|
|
58
|
+
file: info.file,
|
|
59
|
+
loadedBytes: info.loaded,
|
|
60
|
+
totalBytes: info.total,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
else if (info.status === 'initiate' || info.status === 'download') {
|
|
64
|
+
emitLoad({
|
|
65
|
+
phase: 'fetch',
|
|
66
|
+
file: info.file,
|
|
67
|
+
loadedBytes: 0,
|
|
68
|
+
totalBytes: 0,
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
// 'done' and 'ready' are deliberately ignored — phase transitions
|
|
72
|
+
// are driven from `ensureReady` so the order is deterministic.
|
|
73
|
+
}
|
|
74
|
+
async function ensureReady() {
|
|
75
|
+
if (state === 'ready')
|
|
76
|
+
return;
|
|
77
|
+
if (state === 'disposed')
|
|
78
|
+
throw new Error('engine disposed');
|
|
79
|
+
if (loadPromise)
|
|
80
|
+
return loadPromise;
|
|
81
|
+
setState('loading');
|
|
82
|
+
// `weightsBaseUrl` overrides the HF Hub origin for self-hosted
|
|
83
|
+
// mirrors. Transformers.js exposes this as the global
|
|
84
|
+
// `env.remoteHost`; we set it process-wide before load. Documented
|
|
85
|
+
// limitation: with multiple engines spanning different remotes,
|
|
86
|
+
// the last one to load wins. Realistic use case (one app, one
|
|
87
|
+
// mirror) is unaffected.
|
|
88
|
+
if (opts.weightsBaseUrl) {
|
|
89
|
+
transformersEnv.remoteHost = opts.weightsBaseUrl;
|
|
90
|
+
}
|
|
91
|
+
loadPromise = (async () => {
|
|
92
|
+
// AutoTokenizer (not AutoProcessor): text-only models like
|
|
93
|
+
// SmolLM2 ship no preprocessor_config.json and AutoProcessor
|
|
94
|
+
// 404s on them. Multimodal models (e.g., Gemma 4 audio) still
|
|
95
|
+
// resolve via AutoTokenizer because their text tokenizer is
|
|
96
|
+
// the same file — we just don't expose the audio path yet.
|
|
97
|
+
tokenizer = await AutoTokenizer.from_pretrained(model.modelId, {
|
|
98
|
+
...(model.revision ? { revision: model.revision } : {}),
|
|
99
|
+
progress_callback: progressCallback,
|
|
100
|
+
});
|
|
101
|
+
emitLoad({ phase: 'init', backend: opts.backend });
|
|
102
|
+
llm = await AutoModelForCausalLM.from_pretrained(model.modelId, {
|
|
103
|
+
dtype: opts.dtype,
|
|
104
|
+
device: toDeviceOption(opts.backend),
|
|
105
|
+
...(model.revision ? { revision: model.revision } : {}),
|
|
106
|
+
progress_callback: progressCallback,
|
|
107
|
+
});
|
|
108
|
+
setState('ready');
|
|
109
|
+
emitLoad({ phase: 'ready' });
|
|
110
|
+
})().catch((e) => {
|
|
111
|
+
setState('error');
|
|
112
|
+
loadPromise = null;
|
|
113
|
+
throw e;
|
|
114
|
+
});
|
|
115
|
+
return loadPromise;
|
|
116
|
+
}
|
|
117
|
+
function on(event, handler) {
|
|
118
|
+
if (event === 'state') {
|
|
119
|
+
const h = handler;
|
|
120
|
+
stateSubs.add(h);
|
|
121
|
+
return () => stateSubs.delete(h);
|
|
122
|
+
}
|
|
123
|
+
const h = handler;
|
|
124
|
+
loadSubs.add(h);
|
|
125
|
+
return () => loadSubs.delete(h);
|
|
126
|
+
}
|
|
127
|
+
async function* generate(messages, genOpts = {}) {
|
|
128
|
+
// Tool calling: pass `tools` to the chat template only when the
|
|
129
|
+
// preset declares support AND the caller provided tools. Then
|
|
130
|
+
// wrap the raw stream with `parseToolCalls` so we surface
|
|
131
|
+
// synthetic `tool_call` events when the model emits an envelope.
|
|
132
|
+
// Both gates matter: a preset claiming `supportsTools: false`
|
|
133
|
+
// (e.g., the base chat template doesn't know about tools) must
|
|
134
|
+
// refuse, and a tools-supporting preset called without tools
|
|
135
|
+
// pays no parsing cost.
|
|
136
|
+
const useTools = capabilities.supportsTools && genOpts.tools !== undefined && genOpts.tools.length > 0;
|
|
137
|
+
const raw = generateRaw(messages, genOpts, useTools);
|
|
138
|
+
if (useTools) {
|
|
139
|
+
yield* parseToolCalls(raw);
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
yield* raw;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
async function* generateRaw(messages, genOpts, useTools) {
|
|
146
|
+
try {
|
|
147
|
+
await ensureReady();
|
|
148
|
+
}
|
|
149
|
+
catch (e) {
|
|
150
|
+
yield {
|
|
151
|
+
kind: 'error',
|
|
152
|
+
message: e instanceof Error ? e.message : String(e),
|
|
153
|
+
recoverable: false,
|
|
154
|
+
};
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
if (!tokenizer || !llm) {
|
|
158
|
+
yield { kind: 'error', message: 'engine not ready', recoverable: false };
|
|
159
|
+
return;
|
|
160
|
+
}
|
|
161
|
+
// ── Build prompt ──────────────────────────────────────────────
|
|
162
|
+
const tools = useTools ? genOpts.tools : undefined;
|
|
163
|
+
// Thinking mode: opt-in via GenerateOpts.enableThinking, gated on
|
|
164
|
+
// capabilities.supportsThinking. The chat template renders its
|
|
165
|
+
// thinking preamble; for models like Gemma 4 that use special
|
|
166
|
+
// tokens for the thinking channel, we also need to STOP stripping
|
|
167
|
+
// them from the TextStreamer output (see below).
|
|
168
|
+
const useThinking = capabilities.supportsThinking && genOpts.enableThinking === true;
|
|
169
|
+
const renderedPrompt = applyChatTemplate(tokenizer, messages, opts.chatTemplate, tools, useThinking);
|
|
170
|
+
// Calling the tokenizer as a function returns a BatchEncoding
|
|
171
|
+
// (`{ input_ids, attention_mask, ... }`) of Tensors.
|
|
172
|
+
const inputs = (await tokenizer(renderedPrompt));
|
|
173
|
+
const promptTokens = countTokens(inputs.input_ids);
|
|
174
|
+
// ── Set up streaming queue ────────────────────────────────────
|
|
175
|
+
const queue = [];
|
|
176
|
+
let resolver = null;
|
|
177
|
+
let producerDone = false;
|
|
178
|
+
let producerError = null;
|
|
179
|
+
let outputTokens = 0;
|
|
180
|
+
function wakeIterator() {
|
|
181
|
+
const r = resolver;
|
|
182
|
+
resolver = null;
|
|
183
|
+
if (r)
|
|
184
|
+
r();
|
|
185
|
+
}
|
|
186
|
+
const pushEvent = (evt) => {
|
|
187
|
+
queue.push(evt);
|
|
188
|
+
wakeIterator();
|
|
189
|
+
};
|
|
190
|
+
// Only expose special tokens in the output stream when the
|
|
191
|
+
// preset declares thinkingTags AND thinking is enabled. The
|
|
192
|
+
// signal that says "I expect a downstream parser to consume
|
|
193
|
+
// structural tokens" is `thinkingTags !== undefined` — without
|
|
194
|
+
// it, the default `skip_special_tokens: true` keeps the output
|
|
195
|
+
// clean (no `<|channel>`, no `<turn|>`, no BOS/EOS leak).
|
|
196
|
+
//
|
|
197
|
+
// Models with reliable channel formats (DeepSeek's `<think>` is
|
|
198
|
+
// literal text, not a special token, so it survives the default)
|
|
199
|
+
// need no special handling here. Models with structural-token
|
|
200
|
+
// channels but inconsistent emission (Gemma 4 family — see
|
|
201
|
+
// presets.ts) deliberately omit `thinkingTags` to take this path.
|
|
202
|
+
const preserveSpecialTokens = useThinking && capabilities.thinkingTags !== undefined;
|
|
203
|
+
const streamer = new TextStreamer(tokenizer, {
|
|
204
|
+
skip_prompt: true,
|
|
205
|
+
...(preserveSpecialTokens ? { skip_special_tokens: false } : {}),
|
|
206
|
+
callback_function: (text) => {
|
|
207
|
+
if (text.length === 0)
|
|
208
|
+
return;
|
|
209
|
+
pushEvent({ kind: 'token', text });
|
|
210
|
+
},
|
|
211
|
+
token_callback_function: (tokenIds) => {
|
|
212
|
+
outputTokens += tokenIds.length;
|
|
213
|
+
},
|
|
214
|
+
});
|
|
215
|
+
const startedAt = performance.now();
|
|
216
|
+
const generateArgs = {
|
|
217
|
+
...inputs,
|
|
218
|
+
max_new_tokens: genOpts.maxNewTokens ?? 512,
|
|
219
|
+
streamer,
|
|
220
|
+
};
|
|
221
|
+
if (typeof genOpts.temperature === 'number') {
|
|
222
|
+
generateArgs.do_sample = true;
|
|
223
|
+
generateArgs.temperature = genOpts.temperature;
|
|
224
|
+
}
|
|
225
|
+
if (typeof genOpts.topP === 'number')
|
|
226
|
+
generateArgs.top_p = genOpts.topP;
|
|
227
|
+
if (typeof genOpts.topK === 'number')
|
|
228
|
+
generateArgs.top_k = genOpts.topK;
|
|
229
|
+
// Drive `model.generate` concurrently with the iterator drain.
|
|
230
|
+
const generatePromise = (async () => {
|
|
231
|
+
try {
|
|
232
|
+
await llm.generate(generateArgs);
|
|
233
|
+
}
|
|
234
|
+
catch (e) {
|
|
235
|
+
producerError = e;
|
|
236
|
+
}
|
|
237
|
+
finally {
|
|
238
|
+
producerDone = true;
|
|
239
|
+
wakeIterator();
|
|
240
|
+
}
|
|
241
|
+
})();
|
|
242
|
+
try {
|
|
243
|
+
while (!producerDone || queue.length > 0) {
|
|
244
|
+
if (queue.length === 0) {
|
|
245
|
+
if (genOpts.signal?.aborted)
|
|
246
|
+
break;
|
|
247
|
+
await new Promise((r) => {
|
|
248
|
+
resolver = r;
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
const next = queue.shift();
|
|
252
|
+
if (next)
|
|
253
|
+
yield next;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
finally {
|
|
257
|
+
await generatePromise;
|
|
258
|
+
}
|
|
259
|
+
if (producerError) {
|
|
260
|
+
yield {
|
|
261
|
+
kind: 'error',
|
|
262
|
+
message: producerError instanceof Error ? producerError.message : String(producerError),
|
|
263
|
+
recoverable: false,
|
|
264
|
+
};
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
yield {
|
|
268
|
+
kind: 'usage',
|
|
269
|
+
promptTokens,
|
|
270
|
+
outputTokens,
|
|
271
|
+
decodeMs: Math.round(performance.now() - startedAt),
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
async function dispose() {
|
|
275
|
+
setState('disposed');
|
|
276
|
+
stateSubs.clear();
|
|
277
|
+
loadSubs.clear();
|
|
278
|
+
// Transformers.js doesn't expose a public dispose; dropping
|
|
279
|
+
// references lets GC reclaim the wasm/webgpu sessions.
|
|
280
|
+
tokenizer = null;
|
|
281
|
+
llm = null;
|
|
282
|
+
}
|
|
283
|
+
return {
|
|
284
|
+
get model() {
|
|
285
|
+
return model;
|
|
286
|
+
},
|
|
287
|
+
get state() {
|
|
288
|
+
return state;
|
|
289
|
+
},
|
|
290
|
+
get capabilities() {
|
|
291
|
+
return capabilities;
|
|
292
|
+
},
|
|
293
|
+
ensureReady,
|
|
294
|
+
on,
|
|
295
|
+
generate,
|
|
296
|
+
dispose,
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Type-safe preset authoring. Identity at runtime; the value of this
|
|
301
|
+
* helper is purely the compile-time completeness check it enforces
|
|
302
|
+
* on caller-defined presets.
|
|
303
|
+
*/
|
|
304
|
+
export function definePreset(p) {
|
|
305
|
+
return p;
|
|
306
|
+
}
|
|
307
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
308
|
+
function toDeviceOption(backend) {
|
|
309
|
+
// Backend names map 1:1 to Transformers.js DeviceType strings.
|
|
310
|
+
return backend;
|
|
311
|
+
}
|
|
312
|
+
function applyChatTemplate(tokenizer, messages, override, tools, enableThinking) {
|
|
313
|
+
if (override)
|
|
314
|
+
return override(messages);
|
|
315
|
+
// EngineMessage's media field is dropped here — text-only path
|
|
316
|
+
// for POC. A multimodal preset will need a different code path.
|
|
317
|
+
const conversation = messages.map((m) => ({
|
|
318
|
+
role: m.role,
|
|
319
|
+
content: m.text,
|
|
320
|
+
}));
|
|
321
|
+
// When `tools` is provided the model's chat template formats them
|
|
322
|
+
// into its native tool-advertisement section (Qwen 3 wraps them in
|
|
323
|
+
// `<tools>`; DeepSeek R1 uses a different block). Transformers.js
|
|
324
|
+
// forwards the `tools` array to the underlying Jinja template
|
|
325
|
+
// unchanged — the OAI function-call shape is the canonical input.
|
|
326
|
+
//
|
|
327
|
+
// `enable_thinking` is a Gemma 3/4 / Qwen 3 convention — passing it
|
|
328
|
+
// true makes the model's template render its thinking-mode preamble
|
|
329
|
+
// (Gemma 4: "Inject Thinking token at the very top of the FIRST
|
|
330
|
+
// system turn"). Templates that don't know about it ignore the
|
|
331
|
+
// unknown key — Jinja is permissive about undeclared vars.
|
|
332
|
+
const rendered = tokenizer.apply_chat_template(conversation, {
|
|
333
|
+
add_generation_prompt: true,
|
|
334
|
+
tokenize: false,
|
|
335
|
+
...(tools && tools.length > 0 ? { tools: tools } : {}),
|
|
336
|
+
...(enableThinking ? { enable_thinking: true } : {}),
|
|
337
|
+
});
|
|
338
|
+
if (typeof rendered !== 'string') {
|
|
339
|
+
throw new Error('apply_chat_template returned non-string with tokenize:false');
|
|
340
|
+
}
|
|
341
|
+
return rendered;
|
|
342
|
+
}
|
|
343
|
+
function countTokens(inputIds) {
|
|
344
|
+
// input_ids is a Transformers.js Tensor with dims [batch, seq_len].
|
|
345
|
+
if (!inputIds || typeof inputIds !== 'object')
|
|
346
|
+
return 0;
|
|
347
|
+
const dims = inputIds.dims;
|
|
348
|
+
if (!Array.isArray(dims) || dims.length < 2)
|
|
349
|
+
return 0;
|
|
350
|
+
const seqLen = dims[dims.length - 1];
|
|
351
|
+
return typeof seqLen === 'number' ? seqLen : 0;
|
|
352
|
+
}
|
|
353
|
+
//# sourceMappingURL=engine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"engine.js","sourceRoot":"","sources":["../src/engine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,EACL,oBAAoB,EACpB,aAAa,EAIb,YAAY,EACZ,GAAG,IAAI,eAAe,GACvB,MAAM,2BAA2B,CAAC;AAEnC,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AAgBvD,MAAM,UAAU,YAAY,CAAC,IAAsB;IACjD,MAAM,KAAK,GAAa,IAAI,CAAC,KAAK,CAAC;IACnC,MAAM,YAAY,GAAuB,IAAI,CAAC,YAAY,CAAC;IAE3D,IAAI,KAAK,GAAgB,MAAM,CAAC;IAChC,IAAI,WAAW,GAAyB,IAAI,CAAC;IAC7C,IAAI,SAAS,GAA+B,IAAI,CAAC;IACjD,IAAI,GAAG,GAA2B,IAAI,CAAC;IAEvC,MAAM,SAAS,GAAG,IAAI,GAAG,EAA4B,CAAC;IACtD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA6B,CAAC;IACtD,IAAI,IAAI,CAAC,cAAc;QAAE,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAE3D,SAAS,QAAQ,CAAC,IAAiB;QACjC,IAAI,KAAK,KAAK,IAAI;YAAE,OAAO;QAC3B,KAAK,GAAG,IAAI,CAAC;QACb,KAAK,MAAM,GAAG,IAAI,SAAS;YAAE,GAAG,CAAC,IAAI,CAAC,CAAC;IACzC,CAAC;IAED,SAAS,QAAQ,CAAC,CAAe;QAC/B,KAAK,MAAM,GAAG,IAAI,QAAQ;YAAE,GAAG,CAAC,CAAC,CAAC,CAAC;IACrC,CAAC;IAED,SAAS,gBAAgB,CAAC,IAAkB;QAC1C,gEAAgE;QAChE,+DAA+D;QAC/D,oEAAoE;QACpE,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAC/B,QAAQ,CAAC;gBACP,KAAK,EAAE,OAAO;gBACd,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,IAAI,CAAC,MAAM;gBACxB,UAAU,EAAE,IAAI,CAAC,KAAK;aACvB,CAAC,CAAC;QACL,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,IAAI,IAAI,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YACpE,QAAQ,CAAC;gBACP,KAAK,EAAE,OAAO;gBACd,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,WAAW,EAAE,CAAC;gBACd,UAAU,EAAE,CAAC;aACd,CAAC,CAAC;QACL,CAAC;QACD,kEAAkE;QAClE,+DAA+D;IACjE,CAAC;IAED,KAAK,UAAU,WAAW;QACxB,IAAI,KAAK,KAAK,OAAO;YAAE,OAAO;QAC9B,IAAI,KAAK,KAAK,UAAU;YAAE,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAC7D,IAAI,WAAW;YAAE,OAAO,WAAW,CAAC;QAEpC,QAAQ,CAAC,SAAS,CAAC,CAAC;QACpB,+DAA+D;QAC/D,sDAAsD;QACtD,mEAAmE;QACnE,gEAAgE;QAChE,8DAA8D;QAC9D,yBAAyB;QACzB,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACxB,eAAe,CAAC,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC;QACnD,CAAC;QACD,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;YACxB,2DAA2D;YAC3D,6DAA6D;YAC7D,8DAA8D;YAC9D,4DAA4D;YAC5D,2DAA2D;YAC3D,SAAS,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,KAAK,CAAC,OAAO,EAAE;gBAC7D,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvD,iBAAiB,EAAE,gBAAgB;aACpC,CAAC,CAAC;YAEH,QAAQ,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YAEnD,GAAG,GAAG,MAAM,oBAAoB,CAAC,eAAe,CAAC,KAAK,CAAC,OAAO,EAAE;gBAC9D,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,MAAM,EAAE,cAAc,CAAC,IAAI,CAAC,OAAO,CAAC;gBACpC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACvD,iBAAiB,EAAE,gBAAgB;aACpC,CAAC,CAAC;YAEH,QAAQ,CAAC,OAAO,CAAC,CAAC;YAClB,QAAQ,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;QAC/B,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;YACf,QAAQ,CAAC,OAAO,CAAC,CAAC;YAClB,WAAW,GAAG,IAAI,CAAC;YACnB,MAAM,CAAC,CAAC;QACV,CAAC,CAAC,CAAC;QACH,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,SAAS,EAAE,CACT,KAAQ,EACR,OAA2C;QAE3C,IAAI,KAAK,KAAK,OAAO,EAAE,CAAC;YACtB,MAAM,CAAC,GAAG,OAAmC,CAAC;YAC9C,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACjB,OAAO,GAAG,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,CAAC,GAAG,OAAoC,CAAC;QAC/C,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAChB,OAAO,GAAG,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IAClC,CAAC;IAED,KAAK,SAAS,CAAC,CAAC,QAAQ,CACtB,QAAsC,EACtC,UAAwB,EAAE;QAE1B,gEAAgE;QAChE,8DAA8D;QAC9D,0DAA0D;QAC1D,iEAAiE;QACjE,8DAA8D;QAC9D,+DAA+D;QAC/D,6DAA6D;QAC7D,wBAAwB;QACxB,MAAM,QAAQ,GACZ,YAAY,CAAC,aAAa,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;QACxF,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACrD,IAAI,QAAQ,EAAE,CAAC;YACb,KAAK,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,KAAK,CAAC,CAAC,GAAG,CAAC;QACb,CAAC;IACH,CAAC;IAED,KAAK,SAAS,CAAC,CAAC,WAAW,CACzB,QAAsC,EACtC,OAAqB,EACrB,QAAiB;QAEjB,IAAI,CAAC;YACH,MAAM,WAAW,EAAE,CAAC;QACtB,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,MAAM;gBACJ,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACnD,WAAW,EAAE,KAAK;aACnB,CAAC;YACF,OAAO;QACT,CAAC;QAED,IAAI,CAAC,SAAS,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,kBAAkB,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;YACzE,OAAO;QACT,CAAC;QAED,iEAAiE;QACjE,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAE,OAAO,CAAC,KAAiC,CAAC,CAAC,CAAC,SAAS,CAAC;QAChF,kEAAkE;QAClE,+DAA+D;QAC/D,8DAA8D;QAC9D,kEAAkE;QAClE,iDAAiD;QACjD,MAAM,WAAW,GAAG,YAAY,CAAC,gBAAgB,IAAI,OAAO,CAAC,cAAc,KAAK,IAAI,CAAC;QACrF,MAAM,cAAc,GAAG,iBAAiB,CACtC,SAAS,EACT,QAAQ,EACR,IAAI,CAAC,YAAY,EACjB,KAAK,EACL,WAAW,CACZ,CAAC;QACF,8DAA8D;QAC9D,qDAAqD;QACrD,MAAM,MAAM,GAAG,CAAC,MAAM,SAAS,CAAC,cAAc,CAAC,CAA4B,CAAC;QAC5E,MAAM,YAAY,GAAG,WAAW,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAEnD,iEAAiE;QACjE,MAAM,KAAK,GAAkB,EAAE,CAAC;QAChC,IAAI,QAAQ,GAAwB,IAAI,CAAC;QACzC,IAAI,YAAY,GAAG,KAAK,CAAC;QACzB,IAAI,aAAa,GAAY,IAAI,CAAC;QAClC,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,SAAS,YAAY;YACnB,MAAM,CAAC,GAAG,QAAQ,CAAC;YACnB,QAAQ,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC;gBAAE,CAAC,EAAE,CAAC;QACb,CAAC;QAED,MAAM,SAAS,GAAG,CAAC,GAAgB,EAAQ,EAAE;YAC3C,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAChB,YAAY,EAAE,CAAC;QACjB,CAAC,CAAC;QAEF,2DAA2D;QAC3D,4DAA4D;QAC5D,4DAA4D;QAC5D,+DAA+D;QAC/D,+DAA+D;QAC/D,0DAA0D;QAC1D,EAAE;QACF,gEAAgE;QAChE,iEAAiE;QACjE,8DAA8D;QAC9D,2DAA2D;QAC3D,kEAAkE;QAClE,MAAM,qBAAqB,GAAG,WAAW,IAAI,YAAY,CAAC,YAAY,KAAK,SAAS,CAAC;QACrF,MAAM,QAAQ,GAAG,IAAI,YAAY,CAAC,SAAS,EAAE;YAC3C,WAAW,EAAE,IAAI;YACjB,GAAG,CAAC,qBAAqB,CAAC,CAAC,CAAC,EAAE,mBAAmB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAChE,iBAAiB,EAAE,CAAC,IAAY,EAAE,EAAE;gBAClC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;oBAAE,OAAO;gBAC9B,SAAS,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;YACD,uBAAuB,EAAE,CAAC,QAAkB,EAAE,EAAE;gBAC9C,YAAY,IAAI,QAAQ,CAAC,MAAM,CAAC;YAClC,CAAC;SACF,CAAC,CAAC;QAEH,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QACpC,MAAM,YAAY,GAA4B;YAC5C,GAAG,MAAM;YACT,cAAc,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YAC3C,QAAQ;SACT,CAAC;QACF,IAAI,OAAO,OAAO,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;YAC5C,YAAY,CAAC,SAAS,GAAG,IAAI,CAAC;YAC9B,YAAY,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;QACjD,CAAC;QACD,IAAI,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ;YAAE,YAAY,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;QACxE,IAAI,OAAO,OAAO,CAAC,IAAI,KAAK,QAAQ;YAAE,YAAY,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC;QAExE,+DAA+D;QAC/D,MAAM,eAAe,GAAG,CAAC,KAAK,IAAI,EAAE;YAClC,IAAI,CAAC;gBACH,MAAM,GAAI,CAAC,QAAQ,CAAC,YAAkD,CAAC,CAAC;YAC1E,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,aAAa,GAAG,CAAC,CAAC;YACpB,CAAC;oBAAS,CAAC;gBACT,YAAY,GAAG,IAAI,CAAC;gBACpB,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC,CAAC,EAAE,CAAC;QAEL,IAAI,CAAC;YACH,OAAO,CAAC,YAAY,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACzC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;oBACvB,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAM;oBACnC,MAAM,IAAI,OAAO,CAAO,CAAC,CAAC,EAAE,EAAE;wBAC5B,QAAQ,GAAG,CAAC,CAAC;oBACf,CAAC,CAAC,CAAC;gBACL,CAAC;gBACD,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;gBAC3B,IAAI,IAAI;oBAAE,MAAM,IAAI,CAAC;YACvB,CAAC;QACH,CAAC;gBAAS,CAAC;YACT,MAAM,eAAe,CAAC;QACxB,CAAC;QAED,IAAI,aAAa,EAAE,CAAC;YAClB,MAAM;gBACJ,IAAI,EAAE,OAAO;gBACb,OAAO,EAAE,aAAa,YAAY,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC;gBACvF,WAAW,EAAE,KAAK;aACnB,CAAC;YACF,OAAO;QACT,CAAC;QAED,MAAM;YACJ,IAAI,EAAE,OAAO;YACb,YAAY;YACZ,YAAY;YACZ,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;SACpD,CAAC;IACJ,CAAC;IAED,KAAK,UAAU,OAAO;QACpB,QAAQ,CAAC,UAAU,CAAC,CAAC;QACrB,SAAS,CAAC,KAAK,EAAE,CAAC;QAClB,QAAQ,CAAC,KAAK,EAAE,CAAC;QACjB,4DAA4D;QAC5D,uDAAuD;QACvD,SAAS,GAAG,IAAI,CAAC;QACjB,GAAG,GAAG,IAAI,CAAC;IACb,CAAC;IAED,OAAO;QACL,IAAI,KAAK;YACP,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,KAAK;YACP,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,YAAY;YACd,OAAO,YAAY,CAAC;QACtB,CAAC;QACD,WAAW;QACX,EAAE;QACF,QAAQ;QACR,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAA6C,CAAI;IAC3E,OAAO,CAAC,CAAC;AACX,CAAC;AAED,gFAAgF;AAEhF,SAAS,cAAc,CAAC,OAAgB;IACtC,+DAA+D;IAC/D,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,iBAAiB,CACxB,SAA8B,EAC9B,QAAsC,EACtC,QAAsD,EACtD,KAA+B,EAC/B,cAAwB;IAExB,IAAI,QAAQ;QAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACxC,+DAA+D;IAC/D,gEAAgE;IAChE,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACxC,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,IAAI;KAChB,CAAC,CAAC,CAAC;IACJ,kEAAkE;IAClE,mEAAmE;IACnE,kEAAkE;IAClE,8DAA8D;IAC9D,kEAAkE;IAClE,EAAE;IACF,oEAAoE;IACpE,oEAAoE;IACpE,gEAAgE;IAChE,+DAA+D;IAC/D,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,SAAS,CAAC,mBAAmB,CAAC,YAAY,EAAE;QAC3D,qBAAqB,EAAE,IAAI;QAC3B,QAAQ,EAAE,KAAK;QACf,GAAG,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,KAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACnE,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAGrD,CAAC,CAAC;IACH,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACjC,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACjF,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,WAAW,CAAC,QAAiB;IACpC,oEAAoE;IACpE,IAAI,CAAC,QAAQ,IAAI,OAAO,QAAQ,KAAK,QAAQ;QAAE,OAAO,CAAC,CAAC;IACxD,MAAM,IAAI,GAAI,QAA+B,CAAC,IAAI,CAAC;IACnD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrC,OAAO,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;AACjD,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@inbrowser/model` — on-device LLM engine.
|
|
3
|
+
*
|
|
4
|
+
* Root export carries the engine factory + types + `definePreset`.
|
|
5
|
+
* Subpaths:
|
|
6
|
+
* - `@inbrowser/model/presets` — bundled Gemma 4 presets.
|
|
7
|
+
* - `@inbrowser/model/relay` — adapter to `@inbrowser/relay`.
|
|
8
|
+
* - `@inbrowser/model/agent` — adapter to `@inbrowser/agent`.
|
|
9
|
+
* - `@inbrowser/model/worker` — host/connect helpers.
|
|
10
|
+
*
|
|
11
|
+
* Spread a preset into `createEngine` to get a running engine:
|
|
12
|
+
*
|
|
13
|
+
* import { createEngine } from '@inbrowser/model';
|
|
14
|
+
* import { gemma4_E2B } from '@inbrowser/model/presets';
|
|
15
|
+
* const engine = createEngine(gemma4_E2B);
|
|
16
|
+
*/
|
|
17
|
+
export { createEngine, definePreset } from './engine.js';
|
|
18
|
+
export { parseToolCalls, type ToolCallParseOpts } from './parse-tool-calls.js';
|
|
19
|
+
export { splitThinking, type ThinkingSplitOpts } from './think.js';
|
|
20
|
+
export type { Backend, CreateEngineOpts, Dtype, Engine, EngineCapabilities, EngineEvent, EngineEventMap, EngineHooks, EngineMessage, EngineState, GenerateOpts, LoadProgress, MediaPart, ModelPreset, ModelRef, ToolSpec, } from './types.js';
|
|
21
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,cAAc,EAAE,KAAK,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAC/E,OAAO,EAAE,aAAa,EAAE,KAAK,iBAAiB,EAAE,MAAM,YAAY,CAAC;AAEnE,YAAY,EACV,OAAO,EACP,gBAAgB,EAChB,KAAK,EACL,MAAM,EACN,kBAAkB,EAClB,WAAW,EACX,cAAc,EACd,WAAW,EACX,aAAa,EACb,WAAW,EACX,YAAY,EACZ,YAAY,EACZ,SAAS,EACT,WAAW,EACX,QAAQ,EACR,QAAQ,GACT,MAAM,YAAY,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `@inbrowser/model` — on-device LLM engine.
|
|
3
|
+
*
|
|
4
|
+
* Root export carries the engine factory + types + `definePreset`.
|
|
5
|
+
* Subpaths:
|
|
6
|
+
* - `@inbrowser/model/presets` — bundled Gemma 4 presets.
|
|
7
|
+
* - `@inbrowser/model/relay` — adapter to `@inbrowser/relay`.
|
|
8
|
+
* - `@inbrowser/model/agent` — adapter to `@inbrowser/agent`.
|
|
9
|
+
* - `@inbrowser/model/worker` — host/connect helpers.
|
|
10
|
+
*
|
|
11
|
+
* Spread a preset into `createEngine` to get a running engine:
|
|
12
|
+
*
|
|
13
|
+
* import { createEngine } from '@inbrowser/model';
|
|
14
|
+
* import { gemma4_E2B } from '@inbrowser/model/presets';
|
|
15
|
+
* const engine = createEngine(gemma4_E2B);
|
|
16
|
+
*/
|
|
17
|
+
export { createEngine, definePreset } from './engine.js';
|
|
18
|
+
export { parseToolCalls } from './parse-tool-calls.js';
|
|
19
|
+
export { splitThinking } from './think.js';
|
|
20
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,cAAc,EAA0B,MAAM,uBAAuB,CAAC;AAC/E,OAAO,EAAE,aAAa,EAA0B,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `parseToolCalls` — stream transformer that detects native
|
|
3
|
+
* tool-call envelopes the model emits and re-emits them as
|
|
4
|
+
* `kind: 'tool_call'` engine events.
|
|
5
|
+
*
|
|
6
|
+
* Different model families use different envelope formats:
|
|
7
|
+
*
|
|
8
|
+
* Qwen 2/3, DeepSeek R1, Hermes-Pro:
|
|
9
|
+
* <tool_call>
|
|
10
|
+
* {"name": "func", "arguments": {...}}
|
|
11
|
+
* </tool_call>
|
|
12
|
+
*
|
|
13
|
+
* Llama 3.2+ (not yet supported — uses special tokens):
|
|
14
|
+
* <|python_tag|>{"name": "func", "parameters": {...}}<|eom_id|>
|
|
15
|
+
*
|
|
16
|
+
* Mistral v0.3+ (not yet supported):
|
|
17
|
+
* [TOOL_CALLS]{"name": "func", "arguments": {...}}[/TOOL_CALLS]
|
|
18
|
+
*
|
|
19
|
+
* Default format is `'qwen'` (handles Qwen 2/3, DeepSeek R1, Hermes).
|
|
20
|
+
* Add new formats by extending the format union.
|
|
21
|
+
*
|
|
22
|
+
* The implementation is a buffer-aware state machine mirroring
|
|
23
|
+
* `splitThinking` — partial tags split across token boundaries
|
|
24
|
+
* resolve correctly. Inside-tag content is parsed as JSON to extract
|
|
25
|
+
* `name` and `arguments`; on parse failure the args field carries
|
|
26
|
+
* `{ _raw: string }` so the caller can surface or salvage.
|
|
27
|
+
*
|
|
28
|
+
* Pass-through behavior: `thinking`, `usage`, and `error` events
|
|
29
|
+
* forward unchanged. `token` events outside an envelope forward as
|
|
30
|
+
* `token`; inside an envelope they're buffered and converted to a
|
|
31
|
+
* single `tool_call` event on close.
|
|
32
|
+
*/
|
|
33
|
+
import type { EngineEvent } from './types.js';
|
|
34
|
+
export interface ToolCallParseOpts {
|
|
35
|
+
/**
|
|
36
|
+
* Envelope format. Default `'qwen'` — `<tool_call>...</tool_call>`
|
|
37
|
+
* with a JSON body containing `name` and `arguments`. The body's
|
|
38
|
+
* JSON is parsed on close; malformed JSON falls through as
|
|
39
|
+
* `{ args: { _raw: string } }` so consumers can salvage or surface.
|
|
40
|
+
*/
|
|
41
|
+
format?: 'qwen';
|
|
42
|
+
/**
|
|
43
|
+
* Override id generator. Default uses a short random suffix.
|
|
44
|
+
* Useful for tests that need deterministic ids.
|
|
45
|
+
*/
|
|
46
|
+
generateId?: () => string;
|
|
47
|
+
}
|
|
48
|
+
export declare function parseToolCalls(source: AsyncIterable<EngineEvent>, opts?: ToolCallParseOpts): AsyncIterable<EngineEvent>;
|
|
49
|
+
//# sourceMappingURL=parse-tool-calls.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-tool-calls.d.ts","sourceRoot":"","sources":["../src/parse-tool-calls.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAE9C,MAAM,WAAW,iBAAiB;IAChC;;;;;OAKG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,MAAM,CAAC;CAC3B;AAgCD,wBAAuB,cAAc,CACnC,MAAM,EAAE,aAAa,CAAC,WAAW,CAAC,EAClC,IAAI,GAAE,iBAAsB,GAC3B,aAAa,CAAC,WAAW,CAAC,CA6D5B"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `parseToolCalls` — stream transformer that detects native
|
|
3
|
+
* tool-call envelopes the model emits and re-emits them as
|
|
4
|
+
* `kind: 'tool_call'` engine events.
|
|
5
|
+
*
|
|
6
|
+
* Different model families use different envelope formats:
|
|
7
|
+
*
|
|
8
|
+
* Qwen 2/3, DeepSeek R1, Hermes-Pro:
|
|
9
|
+
* <tool_call>
|
|
10
|
+
* {"name": "func", "arguments": {...}}
|
|
11
|
+
* </tool_call>
|
|
12
|
+
*
|
|
13
|
+
* Llama 3.2+ (not yet supported — uses special tokens):
|
|
14
|
+
* <|python_tag|>{"name": "func", "parameters": {...}}<|eom_id|>
|
|
15
|
+
*
|
|
16
|
+
* Mistral v0.3+ (not yet supported):
|
|
17
|
+
* [TOOL_CALLS]{"name": "func", "arguments": {...}}[/TOOL_CALLS]
|
|
18
|
+
*
|
|
19
|
+
* Default format is `'qwen'` (handles Qwen 2/3, DeepSeek R1, Hermes).
|
|
20
|
+
* Add new formats by extending the format union.
|
|
21
|
+
*
|
|
22
|
+
* The implementation is a buffer-aware state machine mirroring
|
|
23
|
+
* `splitThinking` — partial tags split across token boundaries
|
|
24
|
+
* resolve correctly. Inside-tag content is parsed as JSON to extract
|
|
25
|
+
* `name` and `arguments`; on parse failure the args field carries
|
|
26
|
+
* `{ _raw: string }` so the caller can surface or salvage.
|
|
27
|
+
*
|
|
28
|
+
* Pass-through behavior: `thinking`, `usage`, and `error` events
|
|
29
|
+
* forward unchanged. `token` events outside an envelope forward as
|
|
30
|
+
* `token`; inside an envelope they're buffered and converted to a
|
|
31
|
+
* single `tool_call` event on close.
|
|
32
|
+
*/
|
|
33
|
+
const DEFAULT_OPEN_TAG = '<tool_call>';
|
|
34
|
+
const DEFAULT_CLOSE_TAG = '</tool_call>';
|
|
35
|
+
function defaultId() {
|
|
36
|
+
return `tc_${Math.random().toString(36).slice(2, 10)}`;
|
|
37
|
+
}
|
|
38
|
+
function parseQwenBody(body) {
|
|
39
|
+
const trimmed = body.trim();
|
|
40
|
+
try {
|
|
41
|
+
const parsed = JSON.parse(trimmed);
|
|
42
|
+
const name = typeof parsed.name === 'string' ? parsed.name : '';
|
|
43
|
+
// Some templates use `arguments`, some use `parameters`. Accept
|
|
44
|
+
// both as a small charitable coercion — the model occasionally
|
|
45
|
+
// mis-keys.
|
|
46
|
+
const args = parsed.arguments !== undefined
|
|
47
|
+
? parsed.arguments
|
|
48
|
+
: (parsed.parameters ?? {});
|
|
49
|
+
return { name, args };
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return { name: '', args: { _raw: trimmed } };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export async function* parseToolCalls(source, opts = {}) {
|
|
56
|
+
const open = DEFAULT_OPEN_TAG;
|
|
57
|
+
const close = DEFAULT_CLOSE_TAG;
|
|
58
|
+
const genId = opts.generateId ?? defaultId;
|
|
59
|
+
let mode = 'normal';
|
|
60
|
+
let buffer = '';
|
|
61
|
+
for await (const evt of source) {
|
|
62
|
+
if (evt.kind !== 'token') {
|
|
63
|
+
yield evt;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
buffer += evt.text;
|
|
67
|
+
while (buffer.length > 0) {
|
|
68
|
+
if (mode === 'normal') {
|
|
69
|
+
const idx = buffer.indexOf(open);
|
|
70
|
+
if (idx === -1) {
|
|
71
|
+
// Hold back the tail in case it's a partial open-tag prefix.
|
|
72
|
+
const safeLen = buffer.length - (open.length - 1);
|
|
73
|
+
if (safeLen > 0) {
|
|
74
|
+
yield { kind: 'token', text: buffer.slice(0, safeLen) };
|
|
75
|
+
buffer = buffer.slice(safeLen);
|
|
76
|
+
}
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
if (idx > 0) {
|
|
80
|
+
yield { kind: 'token', text: buffer.slice(0, idx) };
|
|
81
|
+
}
|
|
82
|
+
buffer = buffer.slice(idx + open.length);
|
|
83
|
+
mode = 'inside';
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
const idx = buffer.indexOf(close);
|
|
87
|
+
if (idx === -1) {
|
|
88
|
+
// Don't dribble out partial tool-call body — wait for the
|
|
89
|
+
// close tag so we emit one tool_call event per envelope.
|
|
90
|
+
// Trade-off: we hold the entire body in `buffer` until close.
|
|
91
|
+
// Tool-call bodies are small (~hundreds of bytes), fine.
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
const body = buffer.slice(0, idx);
|
|
95
|
+
const { name, args } = parseQwenBody(body);
|
|
96
|
+
yield { kind: 'tool_call', id: genId(), name, args };
|
|
97
|
+
buffer = buffer.slice(idx + close.length);
|
|
98
|
+
mode = 'normal';
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Flush. If we ended mid-envelope (model hit max_new_tokens before
|
|
103
|
+
// closing), parse whatever body we have and emit it — same
|
|
104
|
+
// best-effort posture as splitThinking.
|
|
105
|
+
if (buffer.length > 0) {
|
|
106
|
+
if (mode === 'inside') {
|
|
107
|
+
const { name, args } = parseQwenBody(buffer);
|
|
108
|
+
yield { kind: 'tool_call', id: genId(), name, args };
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
yield { kind: 'token', text: buffer };
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=parse-tool-calls.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parse-tool-calls.js","sourceRoot":"","sources":["../src/parse-tool-calls.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAmBH,MAAM,gBAAgB,GAAG,aAAa,CAAC;AACvC,MAAM,iBAAiB,GAAG,cAAc,CAAC;AAEzC,SAAS,SAAS;IAChB,OAAO,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AACzD,CAAC;AAOD,SAAS,aAAa,CAAC,IAAY;IACjC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAA4C,CAAC;QAC9E,MAAM,IAAI,GAAG,OAAO,MAAM,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,gEAAgE;QAChE,+DAA+D;QAC/D,YAAY;QACZ,MAAM,IAAI,GACR,MAAM,CAAC,SAAS,KAAK,SAAS;YAC5B,CAAC,CAAC,MAAM,CAAC,SAAS;YAClB,CAAC,CAAC,CAAE,MAAmC,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC;QAC9D,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,CAAC;IAC/C,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,cAAc,CACnC,MAAkC,EAClC,OAA0B,EAAE;IAE5B,MAAM,IAAI,GAAG,gBAAgB,CAAC;IAC9B,MAAM,KAAK,GAAG,iBAAiB,CAAC;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,SAAS,CAAC;IAE3C,IAAI,IAAI,GAAwB,QAAQ,CAAC;IACzC,IAAI,MAAM,GAAG,EAAE,CAAC;IAEhB,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;YACzB,MAAM,GAAG,CAAC;YACV,SAAS;QACX,CAAC;QACD,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC;QAEnB,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;gBACjC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;oBACf,6DAA6D;oBAC7D,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBAClD,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;wBAChB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,EAAE,CAAC;wBACxD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;oBACjC,CAAC;oBACD,MAAM;gBACR,CAAC;gBACD,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC;oBACZ,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBACtD,CAAC;gBACD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;gBACzC,IAAI,GAAG,QAAQ,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBAClC,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;oBACf,0DAA0D;oBAC1D,yDAAyD;oBACzD,8DAA8D;oBAC9D,yDAAyD;oBACzD,MAAM;gBACR,CAAC;gBACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;gBAClC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;gBAC3C,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACrD,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC;gBAC1C,IAAI,GAAG,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IAED,mEAAmE;IACnE,2DAA2D;IAC3D,wCAAwC;IACxC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;YACtB,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC;YAC7C,MAAM,EAAE,IAAI,EAAE,WAAW,EAAE,EAAE,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACvD,CAAC;aAAM,CAAC;YACN,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QACxC,CAAC;IACH,CAAC;AACH,CAAC"}
|