localm-web 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +81 -0
- package/README.md +15 -3
- package/dist/assets/{inference.worker-CwvQtobb.js → inference.worker-DZbXKJZY.js} +49 -5
- package/dist/assets/inference.worker-DZbXKJZY.js.map +1 -0
- package/dist/index.d.ts +189 -5
- package/dist/index.js +405 -16
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/assets/inference.worker-CwvQtobb.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,10 +1,3 @@
|
|
|
1
|
-
const DOWNLOAD_PATTERN = /\b(fetch|download|loading from cache|cache hit|param)/i;
|
|
2
|
-
const COMPILE_PATTERN = /\b(compil|shader|kernel|tensor|init|allocat|warm)/i;
|
|
3
|
-
function classifyLoadPhase(text) {
|
|
4
|
-
if (DOWNLOAD_PATTERN.test(text)) return "downloading";
|
|
5
|
-
if (COMPILE_PATTERN.test(text)) return "compiling";
|
|
6
|
-
return "loading";
|
|
7
|
-
}
|
|
8
1
|
class LocalmWebError extends Error {
|
|
9
2
|
/**
|
|
10
3
|
* @param message - Human-readable description of the error.
|
|
@@ -30,6 +23,301 @@ class QuotaExceededError extends LocalmWebError {
|
|
|
30
23
|
}
|
|
31
24
|
class BackendNotAvailableError extends LocalmWebError {
|
|
32
25
|
}
|
|
26
|
+
class StructuredOutputError extends LocalmWebError {
|
|
27
|
+
}
|
|
28
|
+
const DOWNLOAD_PATTERN = /\b(fetch|download|loading from cache|cache hit|param)/i;
|
|
29
|
+
const COMPILE_PATTERN = /\b(compil|shader|kernel|tensor|init|allocat|warm)/i;
|
|
30
|
+
function classifyLoadPhase(text) {
|
|
31
|
+
if (DOWNLOAD_PATTERN.test(text)) return "downloading";
|
|
32
|
+
if (COMPILE_PATTERN.test(text)) return "compiling";
|
|
33
|
+
return "loading";
|
|
34
|
+
}
|
|
35
|
+
let transformersModulePromise$2 = null;
|
|
36
|
+
async function loadTransformers$2() {
|
|
37
|
+
if (!transformersModulePromise$2) {
|
|
38
|
+
transformersModulePromise$2 = import("@huggingface/transformers");
|
|
39
|
+
}
|
|
40
|
+
return transformersModulePromise$2;
|
|
41
|
+
}
|
|
42
|
+
function buildSamplingKwargs(options) {
|
|
43
|
+
const kwargs = {};
|
|
44
|
+
if (options.maxTokens !== void 0) kwargs.max_new_tokens = options.maxTokens;
|
|
45
|
+
if (options.temperature !== void 0) kwargs.temperature = options.temperature;
|
|
46
|
+
if (options.topP !== void 0) kwargs.top_p = options.topP;
|
|
47
|
+
if (options.topK !== void 0) kwargs.top_k = options.topK;
|
|
48
|
+
if (options.temperature !== void 0 && options.temperature > 0) {
|
|
49
|
+
kwargs.do_sample = true;
|
|
50
|
+
}
|
|
51
|
+
return kwargs;
|
|
52
|
+
}
|
|
53
|
+
function toChatMessages$1(messages) {
|
|
54
|
+
return messages.map((m) => ({ role: m.role, content: m.content }));
|
|
55
|
+
}
|
|
56
|
+
function lastAssistantContent(output, promptText) {
|
|
57
|
+
const item = Array.isArray(output) ? output[0] : output;
|
|
58
|
+
if (!item) return "";
|
|
59
|
+
const generated = item.generated_text;
|
|
60
|
+
if (typeof generated === "string") {
|
|
61
|
+
return generated.startsWith(promptText) ? generated.slice(promptText.length) : generated;
|
|
62
|
+
}
|
|
63
|
+
if (Array.isArray(generated)) {
|
|
64
|
+
for (let i = generated.length - 1; i >= 0; i -= 1) {
|
|
65
|
+
const turn = generated[i];
|
|
66
|
+
if (turn && turn.role === "assistant") return turn.content;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return "";
|
|
70
|
+
}
|
|
71
|
+
function createAsyncQueue() {
|
|
72
|
+
const buffer = [];
|
|
73
|
+
let waiters = [];
|
|
74
|
+
let finished = false;
|
|
75
|
+
let pendingError = null;
|
|
76
|
+
const drain = () => {
|
|
77
|
+
while (buffer.length > 0 && waiters.length > 0) {
|
|
78
|
+
const resolver = waiters.shift();
|
|
79
|
+
const value = buffer.shift();
|
|
80
|
+
resolver?.({ value, done: false });
|
|
81
|
+
}
|
|
82
|
+
if ((finished || pendingError) && waiters.length > 0) {
|
|
83
|
+
const all = waiters;
|
|
84
|
+
waiters = [];
|
|
85
|
+
for (const w of all) {
|
|
86
|
+
if (pendingError) {
|
|
87
|
+
w({ value: void 0, done: true });
|
|
88
|
+
} else {
|
|
89
|
+
w({ value: void 0, done: true });
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
return {
|
|
95
|
+
push(item) {
|
|
96
|
+
buffer.push(item);
|
|
97
|
+
drain();
|
|
98
|
+
},
|
|
99
|
+
end(error) {
|
|
100
|
+
finished = true;
|
|
101
|
+
if (error) pendingError = error;
|
|
102
|
+
drain();
|
|
103
|
+
},
|
|
104
|
+
iterator: {
|
|
105
|
+
[Symbol.asyncIterator]() {
|
|
106
|
+
return {
|
|
107
|
+
next() {
|
|
108
|
+
if (buffer.length > 0) {
|
|
109
|
+
return Promise.resolve({ value: buffer.shift(), done: false });
|
|
110
|
+
}
|
|
111
|
+
if (pendingError) {
|
|
112
|
+
const err = pendingError;
|
|
113
|
+
pendingError = null;
|
|
114
|
+
return Promise.reject(err);
|
|
115
|
+
}
|
|
116
|
+
if (finished) {
|
|
117
|
+
return Promise.resolve({ value: void 0, done: true });
|
|
118
|
+
}
|
|
119
|
+
return new Promise((resolve) => waiters.push(resolve));
|
|
120
|
+
}
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
class TransformersTextEngine {
|
|
127
|
+
generator = null;
|
|
128
|
+
currentAbortController = null;
|
|
129
|
+
isLoaded() {
|
|
130
|
+
return this.generator !== null;
|
|
131
|
+
}
|
|
132
|
+
async load(modelId, onProgress) {
|
|
133
|
+
const transformers = await loadTransformers$2();
|
|
134
|
+
try {
|
|
135
|
+
const generator = await transformers.pipeline("text-generation", modelId, {
|
|
136
|
+
progress_callback: (report) => {
|
|
137
|
+
const progress = typeof report.progress === "number" ? report.progress / 100 : 0;
|
|
138
|
+
const text = report.status ?? "loading";
|
|
139
|
+
onProgress?.({
|
|
140
|
+
progress,
|
|
141
|
+
text,
|
|
142
|
+
loaded: 0,
|
|
143
|
+
total: 0,
|
|
144
|
+
phase: classifyLoadPhase(text)
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
this.generator = generator;
|
|
149
|
+
onProgress?.({
|
|
150
|
+
progress: 1,
|
|
151
|
+
text: "Model ready.",
|
|
152
|
+
loaded: 0,
|
|
153
|
+
total: 0,
|
|
154
|
+
phase: "ready"
|
|
155
|
+
});
|
|
156
|
+
} catch (err) {
|
|
157
|
+
throw new ModelLoadError(`Failed to load transformers model "${modelId}".`, err);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async generate(messages, options = {}) {
|
|
161
|
+
const generator = this.requireGenerator();
|
|
162
|
+
if (options.signal?.aborted) {
|
|
163
|
+
throw new GenerationAbortedError("Generation aborted before start.");
|
|
164
|
+
}
|
|
165
|
+
const chat = toChatMessages$1(messages);
|
|
166
|
+
try {
|
|
167
|
+
const output = await generator(chat, buildSamplingKwargs(options));
|
|
168
|
+
return lastAssistantContent(output, "");
|
|
169
|
+
} catch (err) {
|
|
170
|
+
if (err instanceof GenerationAbortedError) throw err;
|
|
171
|
+
throw new ModelLoadError("Transformers generation failed.", err);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
async *stream(messages, options = {}) {
|
|
175
|
+
const generator = this.requireGenerator();
|
|
176
|
+
if (options.signal?.aborted) {
|
|
177
|
+
throw new GenerationAbortedError("Generation aborted before start.");
|
|
178
|
+
}
|
|
179
|
+
const transformers = await loadTransformers$2();
|
|
180
|
+
const queue = createAsyncQueue();
|
|
181
|
+
let index = 0;
|
|
182
|
+
const tokenizer = generator.tokenizer;
|
|
183
|
+
const streamer = new transformers.TextStreamer(tokenizer, {
|
|
184
|
+
skip_prompt: true,
|
|
185
|
+
skip_special_tokens: true,
|
|
186
|
+
callback_function: (text) => {
|
|
187
|
+
if (text) {
|
|
188
|
+
queue.push({ text, index, done: false });
|
|
189
|
+
index += 1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
const abortPromise = new Promise((_, reject) => {
|
|
194
|
+
if (options.signal) {
|
|
195
|
+
const onAbort = () => {
|
|
196
|
+
reject(new GenerationAbortedError("Generation aborted by signal."));
|
|
197
|
+
};
|
|
198
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
199
|
+
}
|
|
200
|
+
});
|
|
201
|
+
const chat = toChatMessages$1(messages);
|
|
202
|
+
const generation = generator(chat, { ...buildSamplingKwargs(options), streamer }).then(() => {
|
|
203
|
+
queue.push({ text: "", index, done: true });
|
|
204
|
+
queue.end();
|
|
205
|
+
}).catch((err) => {
|
|
206
|
+
queue.end(err instanceof Error ? err : new Error(String(err)));
|
|
207
|
+
});
|
|
208
|
+
void Promise.race([generation, abortPromise]).catch((err) => {
|
|
209
|
+
if (err instanceof GenerationAbortedError) queue.end(err);
|
|
210
|
+
});
|
|
211
|
+
for await (const chunk of queue.iterator) {
|
|
212
|
+
yield chunk;
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
async complete(prompt, options = {}) {
|
|
216
|
+
const generator = this.requireGenerator();
|
|
217
|
+
if (options.signal?.aborted) {
|
|
218
|
+
throw new GenerationAbortedError("Generation aborted before start.");
|
|
219
|
+
}
|
|
220
|
+
try {
|
|
221
|
+
const output = await generator(prompt, buildSamplingKwargs(options));
|
|
222
|
+
return lastAssistantContent(output, prompt);
|
|
223
|
+
} catch (err) {
|
|
224
|
+
if (err instanceof GenerationAbortedError) throw err;
|
|
225
|
+
throw new ModelLoadError("Transformers completion failed.", err);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
async *streamCompletion(prompt, options = {}) {
|
|
229
|
+
const generator = this.requireGenerator();
|
|
230
|
+
if (options.signal?.aborted) {
|
|
231
|
+
throw new GenerationAbortedError("Generation aborted before start.");
|
|
232
|
+
}
|
|
233
|
+
const transformers = await loadTransformers$2();
|
|
234
|
+
const queue = createAsyncQueue();
|
|
235
|
+
let index = 0;
|
|
236
|
+
const tokenizer = generator.tokenizer;
|
|
237
|
+
const streamer = new transformers.TextStreamer(tokenizer, {
|
|
238
|
+
skip_prompt: true,
|
|
239
|
+
skip_special_tokens: true,
|
|
240
|
+
callback_function: (text) => {
|
|
241
|
+
if (text) {
|
|
242
|
+
queue.push({ text, index, done: false });
|
|
243
|
+
index += 1;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
});
|
|
247
|
+
generator(prompt, { ...buildSamplingKwargs(options), streamer }).then(() => {
|
|
248
|
+
queue.push({ text: "", index, done: true });
|
|
249
|
+
queue.end();
|
|
250
|
+
}).catch((err) => {
|
|
251
|
+
queue.end(err instanceof Error ? err : new Error(String(err)));
|
|
252
|
+
});
|
|
253
|
+
if (options.signal) {
|
|
254
|
+
options.signal.addEventListener(
|
|
255
|
+
"abort",
|
|
256
|
+
() => {
|
|
257
|
+
queue.end(new GenerationAbortedError("Generation aborted by signal."));
|
|
258
|
+
},
|
|
259
|
+
{ once: true }
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
for await (const chunk of queue.iterator) {
|
|
263
|
+
yield chunk;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
async unload() {
|
|
267
|
+
if (this.generator) {
|
|
268
|
+
const disposable = this.generator;
|
|
269
|
+
if (typeof disposable.dispose === "function") {
|
|
270
|
+
await disposable.dispose();
|
|
271
|
+
}
|
|
272
|
+
this.generator = null;
|
|
273
|
+
}
|
|
274
|
+
this.currentAbortController?.abort();
|
|
275
|
+
this.currentAbortController = null;
|
|
276
|
+
}
|
|
277
|
+
requireGenerator() {
|
|
278
|
+
if (!this.generator) {
|
|
279
|
+
throw new ModelNotLoadedError(
|
|
280
|
+
"TransformersTextEngine not loaded. Call load() before generation."
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
return this.generator;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
function assertJsonSchema(schema) {
|
|
287
|
+
if (schema === null || typeof schema !== "object" || Array.isArray(schema)) {
|
|
288
|
+
throw new StructuredOutputError("jsonSchema must be a plain object describing a JSON Schema.");
|
|
289
|
+
}
|
|
290
|
+
const keys = Object.keys(schema);
|
|
291
|
+
const recognized = [
|
|
292
|
+
"type",
|
|
293
|
+
"$ref",
|
|
294
|
+
"oneOf",
|
|
295
|
+
"anyOf",
|
|
296
|
+
"allOf",
|
|
297
|
+
"enum",
|
|
298
|
+
"const",
|
|
299
|
+
"properties"
|
|
300
|
+
];
|
|
301
|
+
if (!keys.some((key) => recognized.includes(key))) {
|
|
302
|
+
throw new StructuredOutputError(
|
|
303
|
+
"jsonSchema does not look like a JSON Schema (missing type/$ref/oneOf/anyOf/allOf/enum/const/properties)."
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
function serializeJsonSchema(schema) {
|
|
308
|
+
assertJsonSchema(schema);
|
|
309
|
+
return JSON.stringify(schema);
|
|
310
|
+
}
|
|
311
|
+
function parseStructuredOutput(text) {
|
|
312
|
+
try {
|
|
313
|
+
return JSON.parse(text);
|
|
314
|
+
} catch (err) {
|
|
315
|
+
throw new StructuredOutputError(
|
|
316
|
+
"Engine output is not valid JSON. The model may have ignored the constrained decoding directive.",
|
|
317
|
+
err
|
|
318
|
+
);
|
|
319
|
+
}
|
|
320
|
+
}
|
|
33
321
|
let webllmModulePromise = null;
|
|
34
322
|
async function loadWebLLM() {
|
|
35
323
|
if (!webllmModulePromise) {
|
|
@@ -47,6 +335,15 @@ function buildSamplingParams(options) {
|
|
|
47
335
|
if (options.topP !== void 0) params.top_p = options.topP;
|
|
48
336
|
return params;
|
|
49
337
|
}
|
|
338
|
+
function buildResponseFormat(options) {
|
|
339
|
+
if (options.jsonSchema !== void 0) {
|
|
340
|
+
return { type: "json_object", schema: serializeJsonSchema(options.jsonSchema) };
|
|
341
|
+
}
|
|
342
|
+
if (options.json) {
|
|
343
|
+
return { type: "json_object" };
|
|
344
|
+
}
|
|
345
|
+
return void 0;
|
|
346
|
+
}
|
|
50
347
|
function toChatMessages(messages) {
|
|
51
348
|
return messages.map((m) => {
|
|
52
349
|
switch (m.role) {
|
|
@@ -101,10 +398,12 @@ class WebLLMEngine {
|
|
|
101
398
|
if (options.signal?.aborted) {
|
|
102
399
|
throw new GenerationAbortedError("Generation aborted before start.");
|
|
103
400
|
}
|
|
401
|
+
const responseFormat = buildResponseFormat(options);
|
|
104
402
|
const completion = await engine.chat.completions.create({
|
|
105
403
|
...buildSamplingParams(options),
|
|
106
404
|
messages: toChatMessages(messages),
|
|
107
|
-
stream: false
|
|
405
|
+
stream: false,
|
|
406
|
+
...responseFormat ? { response_format: responseFormat } : {}
|
|
108
407
|
});
|
|
109
408
|
return completion.choices[0]?.message?.content ?? "";
|
|
110
409
|
}
|
|
@@ -113,10 +412,12 @@ class WebLLMEngine {
|
|
|
113
412
|
if (options.signal?.aborted) {
|
|
114
413
|
throw new GenerationAbortedError("Generation aborted before start.");
|
|
115
414
|
}
|
|
415
|
+
const responseFormat = buildResponseFormat(options);
|
|
116
416
|
const completion = await engine.chat.completions.create({
|
|
117
417
|
...buildSamplingParams(options),
|
|
118
418
|
messages: toChatMessages(messages),
|
|
119
|
-
stream: true
|
|
419
|
+
stream: true,
|
|
420
|
+
...responseFormat ? { response_format: responseFormat } : {}
|
|
120
421
|
});
|
|
121
422
|
let index = 0;
|
|
122
423
|
let finished = false;
|
|
@@ -150,10 +451,12 @@ class WebLLMEngine {
|
|
|
150
451
|
if (options.signal?.aborted) {
|
|
151
452
|
throw new GenerationAbortedError("Generation aborted before start.");
|
|
152
453
|
}
|
|
454
|
+
const responseFormat = buildResponseFormat(options);
|
|
153
455
|
const completion = await engine.completions.create({
|
|
154
456
|
...buildSamplingParams(options),
|
|
155
457
|
prompt,
|
|
156
|
-
stream: false
|
|
458
|
+
stream: false,
|
|
459
|
+
...responseFormat ? { response_format: responseFormat } : {}
|
|
157
460
|
});
|
|
158
461
|
return completion.choices[0]?.text ?? "";
|
|
159
462
|
}
|
|
@@ -162,10 +465,12 @@ class WebLLMEngine {
|
|
|
162
465
|
if (options.signal?.aborted) {
|
|
163
466
|
throw new GenerationAbortedError("Generation aborted before start.");
|
|
164
467
|
}
|
|
468
|
+
const responseFormat = buildResponseFormat(options);
|
|
165
469
|
const completion = await engine.completions.create({
|
|
166
470
|
...buildSamplingParams(options),
|
|
167
471
|
prompt,
|
|
168
|
-
stream: true
|
|
472
|
+
stream: true,
|
|
473
|
+
...responseFormat ? { response_format: responseFormat } : {}
|
|
169
474
|
});
|
|
170
475
|
let index = 0;
|
|
171
476
|
let finished = false;
|
|
@@ -491,6 +796,7 @@ const MODEL_PRESETS = Object.freeze({
|
|
|
491
796
|
parameters: "3.8B",
|
|
492
797
|
quantization: "q4f16_1",
|
|
493
798
|
webllmId: "Phi-3.5-mini-instruct-q4f16_1-MLC",
|
|
799
|
+
transformersId: "onnx-community/Phi-3.5-mini-instruct-onnx-web",
|
|
494
800
|
contextWindow: 4096,
|
|
495
801
|
description: "Microsoft Phi-3.5 mini, INT4 quantized for browser inference."
|
|
496
802
|
},
|
|
@@ -500,6 +806,7 @@ const MODEL_PRESETS = Object.freeze({
|
|
|
500
806
|
parameters: "1B",
|
|
501
807
|
quantization: "q4f16_1",
|
|
502
808
|
webllmId: "Llama-3.2-1B-Instruct-q4f16_1-MLC",
|
|
809
|
+
transformersId: "onnx-community/Llama-3.2-1B-Instruct",
|
|
503
810
|
contextWindow: 4096,
|
|
504
811
|
description: "Meta Llama 3.2 1B Instruct, INT4 quantized."
|
|
505
812
|
},
|
|
@@ -509,8 +816,19 @@ const MODEL_PRESETS = Object.freeze({
|
|
|
509
816
|
parameters: "1.5B",
|
|
510
817
|
quantization: "q4f16_1",
|
|
511
818
|
webllmId: "Qwen2.5-1.5B-Instruct-q4f16_1-MLC",
|
|
819
|
+
transformersId: "onnx-community/Qwen2.5-1.5B-Instruct",
|
|
512
820
|
contextWindow: 4096,
|
|
513
821
|
description: "Alibaba Qwen 2.5 1.5B Instruct, INT4 quantized."
|
|
822
|
+
},
|
|
823
|
+
"smollm2-360m-int8": {
|
|
824
|
+
id: "smollm2-360m-int8",
|
|
825
|
+
family: "SmolLM2",
|
|
826
|
+
parameters: "360M",
|
|
827
|
+
quantization: "q8",
|
|
828
|
+
webllmId: "SmolLM2-360M-Instruct-q4f16_1-MLC",
|
|
829
|
+
transformersId: "HuggingFaceTB/SmolLM2-360M-Instruct",
|
|
830
|
+
contextWindow: 2048,
|
|
831
|
+
description: "HuggingFace SmolLM2 360M Instruct — smallest viable chat model, ideal for the fallback path on low-end devices."
|
|
514
832
|
}
|
|
515
833
|
});
|
|
516
834
|
function resolveModelPreset(modelId) {
|
|
@@ -583,12 +901,33 @@ function listSupportedRerankerModels() {
|
|
|
583
901
|
function createInferenceWorker() {
|
|
584
902
|
return new Worker(new URL(
|
|
585
903
|
/* @vite-ignore */
|
|
586
|
-
"/assets/inference.worker-
|
|
904
|
+
"/assets/inference.worker-DZbXKJZY.js",
|
|
587
905
|
import.meta.url
|
|
588
906
|
), {
|
|
589
907
|
type: "module"
|
|
590
908
|
});
|
|
591
909
|
}
|
|
910
|
+
function defaultWebGPUDetector() {
|
|
911
|
+
return typeof navigator !== "undefined" && "gpu" in navigator;
|
|
912
|
+
}
|
|
913
|
+
function resolveBackend(choice, preset, webGPUAvailable) {
|
|
914
|
+
if (choice === "webllm") return "webllm";
|
|
915
|
+
if (choice === "transformers") {
|
|
916
|
+
if (!preset.transformersId) {
|
|
917
|
+
throw new BackendNotAvailableError(
|
|
918
|
+
`Model "${preset.id}" has no transformersId — cannot run on the transformers.js backend.`
|
|
919
|
+
);
|
|
920
|
+
}
|
|
921
|
+
return "transformers";
|
|
922
|
+
}
|
|
923
|
+
if (webGPUAvailable) return "webllm";
|
|
924
|
+
if (!preset.transformersId) {
|
|
925
|
+
throw new BackendNotAvailableError(
|
|
926
|
+
`WebGPU is unavailable and model "${preset.id}" has no transformersId for the fallback path.`
|
|
927
|
+
);
|
|
928
|
+
}
|
|
929
|
+
return "transformers";
|
|
930
|
+
}
|
|
592
931
|
class LMTask {
|
|
593
932
|
constructor(engine, preset) {
|
|
594
933
|
this.engine = engine;
|
|
@@ -604,13 +943,29 @@ class LMTask {
|
|
|
604
943
|
*/
|
|
605
944
|
static async createEngine(modelId, options = {}) {
|
|
606
945
|
const preset = resolveModelPreset(modelId);
|
|
607
|
-
|
|
946
|
+
if (options.engine) {
|
|
947
|
+
if (!options.engine.isLoaded()) {
|
|
948
|
+
await options.engine.load(preset.webllmId, options.onProgress);
|
|
949
|
+
}
|
|
950
|
+
return { engine: options.engine, preset };
|
|
951
|
+
}
|
|
952
|
+
const choice = options.backend ?? "auto";
|
|
953
|
+
const resolved = resolveBackend(
|
|
954
|
+
choice,
|
|
955
|
+
preset,
|
|
956
|
+
defaultWebGPUDetector()
|
|
957
|
+
);
|
|
958
|
+
const engine = LMTask.instantiateEngine(resolved, options);
|
|
959
|
+
const loadId = resolved === "transformers" ? preset.transformersId ?? "" : preset.webllmId;
|
|
608
960
|
if (!engine.isLoaded()) {
|
|
609
|
-
await engine.load(
|
|
961
|
+
await engine.load(loadId, options.onProgress);
|
|
610
962
|
}
|
|
611
963
|
return { engine, preset };
|
|
612
964
|
}
|
|
613
|
-
static
|
|
965
|
+
static instantiateEngine(resolved, options) {
|
|
966
|
+
if (resolved === "transformers") {
|
|
967
|
+
return new TransformersTextEngine();
|
|
968
|
+
}
|
|
614
969
|
const useWorker = options.inWorker ?? true;
|
|
615
970
|
if (useWorker) {
|
|
616
971
|
return new WorkerEngine(createInferenceWorker());
|
|
@@ -633,6 +988,20 @@ class ChatReply {
|
|
|
633
988
|
this.tokensGenerated = tokensGenerated;
|
|
634
989
|
this.finishReason = finishReason;
|
|
635
990
|
}
|
|
991
|
+
/**
|
|
992
|
+
* Parse {@link ChatReply.text} as JSON.
|
|
993
|
+
*
|
|
994
|
+
* Intended for replies generated with `json: true` or `jsonSchema`.
|
|
995
|
+
* The result is cast to `T` without runtime validation; pair with Zod /
|
|
996
|
+
* Ajv on the call site if you need to verify the schema.
|
|
997
|
+
*
|
|
998
|
+
* @typeParam T - Expected parsed shape.
|
|
999
|
+
* @returns The parsed JSON value.
|
|
1000
|
+
* @throws StructuredOutputError if the text is not valid JSON.
|
|
1001
|
+
*/
|
|
1002
|
+
json() {
|
|
1003
|
+
return parseStructuredOutput(this.text);
|
|
1004
|
+
}
|
|
636
1005
|
}
|
|
637
1006
|
class CompletionResult {
|
|
638
1007
|
constructor(text, prompt, tokensGenerated, finishReason) {
|
|
@@ -641,6 +1010,19 @@ class CompletionResult {
|
|
|
641
1010
|
this.tokensGenerated = tokensGenerated;
|
|
642
1011
|
this.finishReason = finishReason;
|
|
643
1012
|
}
|
|
1013
|
+
/**
|
|
1014
|
+
* Parse {@link CompletionResult.text} as JSON.
|
|
1015
|
+
*
|
|
1016
|
+
* Intended for completions generated with `json: true` or `jsonSchema`.
|
|
1017
|
+
* The result is cast to `T` without runtime validation.
|
|
1018
|
+
*
|
|
1019
|
+
* @typeParam T - Expected parsed shape.
|
|
1020
|
+
* @returns The parsed JSON value.
|
|
1021
|
+
* @throws StructuredOutputError if the text is not valid JSON.
|
|
1022
|
+
*/
|
|
1023
|
+
json() {
|
|
1024
|
+
return parseStructuredOutput(this.text);
|
|
1025
|
+
}
|
|
644
1026
|
}
|
|
645
1027
|
class Chat extends LMTask {
|
|
646
1028
|
history = [];
|
|
@@ -1104,7 +1486,7 @@ async function* tap(stream, onChunk) {
|
|
|
1104
1486
|
yield chunk;
|
|
1105
1487
|
}
|
|
1106
1488
|
}
|
|
1107
|
-
const VERSION = "0.
|
|
1489
|
+
const VERSION = "0.5.0";
|
|
1108
1490
|
export {
|
|
1109
1491
|
BackendNotAvailableError,
|
|
1110
1492
|
Chat,
|
|
@@ -1123,18 +1505,25 @@ export {
|
|
|
1123
1505
|
QuotaExceededError,
|
|
1124
1506
|
RERANKER_PRESETS,
|
|
1125
1507
|
Reranker,
|
|
1508
|
+
StructuredOutputError,
|
|
1509
|
+
TransformersTextEngine,
|
|
1126
1510
|
UnknownModelError,
|
|
1127
1511
|
VERSION,
|
|
1128
1512
|
WebGPUUnavailableError,
|
|
1513
|
+
WebLLMEngine,
|
|
1129
1514
|
WorkerEngine,
|
|
1515
|
+
assertJsonSchema,
|
|
1130
1516
|
collectStream,
|
|
1131
1517
|
createInferenceWorker,
|
|
1132
1518
|
listSupportedEmbeddingModels,
|
|
1133
1519
|
listSupportedModels,
|
|
1134
1520
|
listSupportedRerankerModels,
|
|
1521
|
+
parseStructuredOutput,
|
|
1522
|
+
resolveBackend,
|
|
1135
1523
|
resolveEmbeddingPreset,
|
|
1136
1524
|
resolveModelPreset,
|
|
1137
1525
|
resolveRerankerPreset,
|
|
1526
|
+
serializeJsonSchema,
|
|
1138
1527
|
tap
|
|
1139
1528
|
};
|
|
1140
1529
|
//# sourceMappingURL=index.js.map
|