@tryhamster/gerbil 1.0.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +253 -0
- package/bin/cli.js +2 -0
- package/dist/auto-update-BbNHbSU1.mjs +3 -0
- package/dist/browser/index.d.mts +262 -0
- package/dist/browser/index.d.mts.map +1 -0
- package/dist/browser/index.mjs +755 -0
- package/dist/browser/index.mjs.map +1 -0
- package/dist/chrome-backend-C5Un08O4.mjs +771 -0
- package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
- package/dist/chrome-backend-CtwPENIW.mjs +3 -0
- package/dist/chunk-Ct1HF2bE.mjs +7 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +7078 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +22 -0
- package/dist/frameworks/express.d.mts.map +1 -0
- package/dist/frameworks/express.mjs +123 -0
- package/dist/frameworks/express.mjs.map +1 -0
- package/dist/frameworks/fastify.d.mts +11 -0
- package/dist/frameworks/fastify.d.mts.map +1 -0
- package/dist/frameworks/fastify.mjs +73 -0
- package/dist/frameworks/fastify.mjs.map +1 -0
- package/dist/frameworks/hono.d.mts +14 -0
- package/dist/frameworks/hono.d.mts.map +1 -0
- package/dist/frameworks/hono.mjs +82 -0
- package/dist/frameworks/hono.mjs.map +1 -0
- package/dist/frameworks/next.d.mts +31 -0
- package/dist/frameworks/next.d.mts.map +1 -0
- package/dist/frameworks/next.mjs +116 -0
- package/dist/frameworks/next.mjs.map +1 -0
- package/dist/frameworks/react.d.mts +56 -0
- package/dist/frameworks/react.d.mts.map +1 -0
- package/dist/frameworks/react.mjs +172 -0
- package/dist/frameworks/react.mjs.map +1 -0
- package/dist/frameworks/trpc.d.mts +12 -0
- package/dist/frameworks/trpc.d.mts.map +1 -0
- package/dist/frameworks/trpc.mjs +80 -0
- package/dist/frameworks/trpc.mjs.map +1 -0
- package/dist/gerbil-BfnsFWRE.mjs +644 -0
- package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
- package/dist/gerbil-BjW-z7Fq.mjs +5 -0
- package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
- package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
- package/dist/index.d.mts +223 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +13 -0
- package/dist/index.mjs.map +1 -0
- package/dist/integrations/ai-sdk.d.mts +78 -0
- package/dist/integrations/ai-sdk.d.mts.map +1 -0
- package/dist/integrations/ai-sdk.mjs +199 -0
- package/dist/integrations/ai-sdk.mjs.map +1 -0
- package/dist/integrations/langchain.d.mts +41 -0
- package/dist/integrations/langchain.d.mts.map +1 -0
- package/dist/integrations/langchain.mjs +93 -0
- package/dist/integrations/langchain.mjs.map +1 -0
- package/dist/integrations/llamaindex.d.mts +45 -0
- package/dist/integrations/llamaindex.d.mts.map +1 -0
- package/dist/integrations/llamaindex.mjs +86 -0
- package/dist/integrations/llamaindex.mjs.map +1 -0
- package/dist/integrations/mcp-client.d.mts +206 -0
- package/dist/integrations/mcp-client.d.mts.map +1 -0
- package/dist/integrations/mcp-client.mjs +507 -0
- package/dist/integrations/mcp-client.mjs.map +1 -0
- package/dist/integrations/mcp.d.mts +177 -0
- package/dist/integrations/mcp.d.mts.map +1 -0
- package/dist/integrations/mcp.mjs +8 -0
- package/dist/mcp-R8kRLIKb.mjs +348 -0
- package/dist/mcp-R8kRLIKb.mjs.map +1 -0
- package/dist/models-DKULvhOr.mjs +136 -0
- package/dist/models-DKULvhOr.mjs.map +1 -0
- package/dist/models-De2-_GmQ.d.mts +22 -0
- package/dist/models-De2-_GmQ.d.mts.map +1 -0
- package/dist/one-liner-BUQR0nqq.mjs +98 -0
- package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
- package/dist/skills/index.d.mts +390 -0
- package/dist/skills/index.d.mts.map +1 -0
- package/dist/skills/index.mjs +7 -0
- package/dist/skills-D3CEpgDc.mjs +630 -0
- package/dist/skills-D3CEpgDc.mjs.map +1 -0
- package/dist/tools-BsiEE6f2.mjs +567 -0
- package/dist/tools-BsiEE6f2.mjs.map +1 -0
- package/dist/types-BS1N92Jt.d.mts +183 -0
- package/dist/types-BS1N92Jt.d.mts.map +1 -0
- package/dist/utils-7vXqtq2Q.mjs +63 -0
- package/dist/utils-7vXqtq2Q.mjs.map +1 -0
- package/docs/ai-sdk.md +80 -0
- package/docs/architecture/README.md +84 -0
- package/docs/architecture/caching.md +227 -0
- package/docs/architecture/inference.md +176 -0
- package/docs/architecture/overview.md +179 -0
- package/docs/architecture/streaming.md +261 -0
- package/docs/architecture/webgpu.md +213 -0
- package/docs/browser.md +328 -0
- package/docs/cli.md +155 -0
- package/docs/frameworks.md +90 -0
- package/docs/mcp-client.md +224 -0
- package/docs/mcp.md +109 -0
- package/docs/memory.md +229 -0
- package/docs/repl.md +473 -0
- package/docs/skills.md +261 -0
- package/docs/tools.md +304 -0
- package/package.json +207 -0
|
@@ -0,0 +1,755 @@
|
|
|
1
|
+
import { a as resolveModel, t as BUILTIN_MODELS } from "../models-DKULvhOr.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/browser/index.ts
|
|
4
|
+
/**
|
|
5
|
+
* Gerbil Browser Support
|
|
6
|
+
*
|
|
7
|
+
* Run LLMs directly in the browser with WebGPU acceleration.
|
|
8
|
+
*
|
|
9
|
+
* @example useChat (React)
|
|
10
|
+
* ```tsx
|
|
11
|
+
* import { useChat } from "@tryhamster/gerbil/browser";
|
|
12
|
+
*
|
|
13
|
+
* function Chat() {
|
|
14
|
+
* const { messages, input, setInput, handleSubmit, isLoading } = useChat();
|
|
15
|
+
*
|
|
16
|
+
* if (isLoading) return <div>Loading model...</div>;
|
|
17
|
+
*
|
|
18
|
+
* return (
|
|
19
|
+
* <form onSubmit={handleSubmit}>
|
|
20
|
+
* {messages.map(m => <div key={m.id}>{m.role}: {m.content}</div>)}
|
|
21
|
+
* <input value={input} onChange={e => setInput(e.target.value)} />
|
|
22
|
+
* </form>
|
|
23
|
+
* );
|
|
24
|
+
* }
|
|
25
|
+
* ```
|
|
26
|
+
*
|
|
27
|
+
* @example useCompletion (React)
|
|
28
|
+
* ```tsx
|
|
29
|
+
* import { useCompletion } from "@tryhamster/gerbil/browser";
|
|
30
|
+
*
|
|
31
|
+
* function App() {
|
|
32
|
+
* const { complete, completion, isLoading } = useCompletion();
|
|
33
|
+
* if (isLoading) return <div>Loading...</div>;
|
|
34
|
+
* return <button onClick={() => complete("Write a haiku")}>{completion}</button>;
|
|
35
|
+
* }
|
|
36
|
+
* ```
|
|
37
|
+
*
|
|
38
|
+
* @example Low-level API
|
|
39
|
+
* ```ts
|
|
40
|
+
* import { createGerbilWorker } from "@tryhamster/gerbil/browser";
|
|
41
|
+
*
|
|
42
|
+
* const gerbil = await createGerbilWorker({
|
|
43
|
+
* modelId: "qwen3-0.6b",
|
|
44
|
+
* onToken: (token) => console.log(token.text),
|
|
45
|
+
* });
|
|
46
|
+
* await gerbil.generate("Hello!");
|
|
47
|
+
* gerbil.terminate();
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
/**
|
|
51
|
+
* Create a Gerbil worker for streaming WebGPU inference
|
|
52
|
+
*
|
|
53
|
+
* Uses a Web Worker to keep the UI responsive during model loading
|
|
54
|
+
* and text generation, with real-time token streaming.
|
|
55
|
+
*/
|
|
56
|
+
async function createGerbilWorker(options = {}) {
|
|
57
|
+
const { modelId = "qwen3-0.6b", onProgress, onToken, onComplete, onError } = options;
|
|
58
|
+
const source = resolveModel(modelId);
|
|
59
|
+
return new Promise((resolve, reject) => {
|
|
60
|
+
const blob = new Blob([`
|
|
61
|
+
import {
|
|
62
|
+
AutoTokenizer,
|
|
63
|
+
AutoModelForCausalLM,
|
|
64
|
+
TextStreamer,
|
|
65
|
+
InterruptableStoppingCriteria,
|
|
66
|
+
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0";
|
|
67
|
+
|
|
68
|
+
class ModelPipeline {
|
|
69
|
+
static tokenizer = null;
|
|
70
|
+
static model = null;
|
|
71
|
+
static modelId = "";
|
|
72
|
+
|
|
73
|
+
static async getInstance(modelId, options = {}, progressCallback) {
|
|
74
|
+
if (this.modelId !== modelId) {
|
|
75
|
+
this.tokenizer = null;
|
|
76
|
+
this.model = null;
|
|
77
|
+
}
|
|
78
|
+
this.modelId = modelId;
|
|
79
|
+
|
|
80
|
+
const dtype = options.dtype || "q4f16";
|
|
81
|
+
const device = options.device || "webgpu";
|
|
82
|
+
|
|
83
|
+
if (!this.tokenizer) {
|
|
84
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(modelId, {
|
|
85
|
+
progress_callback: progressCallback,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (!this.model) {
|
|
90
|
+
this.model = await AutoModelForCausalLM.from_pretrained(modelId, {
|
|
91
|
+
dtype,
|
|
92
|
+
device,
|
|
93
|
+
progress_callback: progressCallback,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return { tokenizer: this.tokenizer, model: this.model };
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const stoppingCriteria = new InterruptableStoppingCriteria();
|
|
102
|
+
let pastKeyValuesCache = null;
|
|
103
|
+
|
|
104
|
+
async function load(data) {
|
|
105
|
+
const { modelId, options = {} } = data;
|
|
106
|
+
self.postMessage({ status: "loading", message: "Loading model..." });
|
|
107
|
+
|
|
108
|
+
// Track download state - if we see progress < 100, we're downloading
|
|
109
|
+
const downloadState = {
|
|
110
|
+
downloading: new Set(), // Files currently downloading
|
|
111
|
+
completed: new Set(), // Files completed
|
|
112
|
+
isDownloading: false, // True if any file needed download
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
try {
|
|
116
|
+
const { tokenizer, model } = await ModelPipeline.getInstance(
|
|
117
|
+
modelId,
|
|
118
|
+
options,
|
|
119
|
+
(progress) => {
|
|
120
|
+
if (progress.status === "progress" && progress.file) {
|
|
121
|
+
const pct = Math.round(progress.progress || 0);
|
|
122
|
+
|
|
123
|
+
// If we see progress < 100, this file is being downloaded (not from cache)
|
|
124
|
+
if (pct < 100) {
|
|
125
|
+
downloadState.downloading.add(progress.file);
|
|
126
|
+
downloadState.isDownloading = true;
|
|
127
|
+
} else if (pct === 100) {
|
|
128
|
+
downloadState.downloading.delete(progress.file);
|
|
129
|
+
downloadState.completed.add(progress.file);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Only emit downloading status if actually downloading
|
|
133
|
+
if (downloadState.isDownloading) {
|
|
134
|
+
self.postMessage({
|
|
135
|
+
status: "downloading",
|
|
136
|
+
file: progress.file,
|
|
137
|
+
progress: pct,
|
|
138
|
+
downloadCount: downloadState.downloading.size,
|
|
139
|
+
totalFiles: downloadState.completed.size + downloadState.downloading.size,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
self.postMessage({ status: "loading", message: "Compiling shaders..." });
|
|
147
|
+
const warmupInputs = tokenizer("a");
|
|
148
|
+
await model.generate({ ...warmupInputs, max_new_tokens: 1 });
|
|
149
|
+
|
|
150
|
+
self.postMessage({ status: "ready" });
|
|
151
|
+
} catch (error) {
|
|
152
|
+
self.postMessage({ status: "error", error: error.message || String(error) });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function generate(data) {
|
|
157
|
+
const { messages, options = {} } = data;
|
|
158
|
+
const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false } = options;
|
|
159
|
+
|
|
160
|
+
try {
|
|
161
|
+
const { tokenizer, model } = await ModelPipeline.getInstance(ModelPipeline.modelId, {});
|
|
162
|
+
|
|
163
|
+
const inputs = tokenizer.apply_chat_template(messages, {
|
|
164
|
+
add_generation_prompt: true,
|
|
165
|
+
return_dict: true,
|
|
166
|
+
enable_thinking: thinking,
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
let state = "answering";
|
|
170
|
+
const [START_THINKING_TOKEN_ID, END_THINKING_TOKEN_ID] = tokenizer.encode(
|
|
171
|
+
"<think></think>",
|
|
172
|
+
{ add_special_tokens: false }
|
|
173
|
+
);
|
|
174
|
+
|
|
175
|
+
let startTime = null;
|
|
176
|
+
let numTokens = 0;
|
|
177
|
+
|
|
178
|
+
// Token callback for state tracking (receives raw token IDs)
|
|
179
|
+
const tokenCallback = (tokens) => {
|
|
180
|
+
startTime ??= performance.now();
|
|
181
|
+
numTokens++;
|
|
182
|
+
|
|
183
|
+
const tokenId = Number(tokens[0]);
|
|
184
|
+
if (tokenId === START_THINKING_TOKEN_ID) {
|
|
185
|
+
state = "thinking";
|
|
186
|
+
} else if (tokenId === END_THINKING_TOKEN_ID) {
|
|
187
|
+
state = "answering";
|
|
188
|
+
}
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
// Text callback for streaming (receives decoded text)
|
|
192
|
+
const streamCallback = (text) => {
|
|
193
|
+
const tps = startTime ? (numTokens / (performance.now() - startTime)) * 1000 : 0;
|
|
194
|
+
self.postMessage({ status: "token", text, state, numTokens, tps });
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
const streamer = new TextStreamer(tokenizer, {
|
|
198
|
+
skip_prompt: true,
|
|
199
|
+
skip_special_tokens: true,
|
|
200
|
+
callback_function: streamCallback,
|
|
201
|
+
token_callback_function: tokenCallback,
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
self.postMessage({ status: "start" });
|
|
205
|
+
|
|
206
|
+
const { past_key_values, sequences } = await model.generate({
|
|
207
|
+
...inputs,
|
|
208
|
+
past_key_values: pastKeyValuesCache,
|
|
209
|
+
do_sample: temperature > 0,
|
|
210
|
+
temperature: temperature > 0 ? temperature : undefined,
|
|
211
|
+
top_p: topP,
|
|
212
|
+
top_k: topK,
|
|
213
|
+
max_new_tokens: maxTokens,
|
|
214
|
+
streamer,
|
|
215
|
+
stopping_criteria: stoppingCriteria,
|
|
216
|
+
return_dict_in_generate: true,
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
pastKeyValuesCache = past_key_values;
|
|
220
|
+
|
|
221
|
+
const endTime = performance.now();
|
|
222
|
+
const totalTime = startTime ? endTime - startTime : 0;
|
|
223
|
+
const decoded = tokenizer.batch_decode(sequences, { skip_special_tokens: true });
|
|
224
|
+
|
|
225
|
+
self.postMessage({
|
|
226
|
+
status: "complete",
|
|
227
|
+
text: decoded[0] || "",
|
|
228
|
+
numTokens,
|
|
229
|
+
totalTime,
|
|
230
|
+
tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
|
|
231
|
+
});
|
|
232
|
+
} catch (error) {
|
|
233
|
+
self.postMessage({ status: "error", error: error.message || String(error) });
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
self.addEventListener("message", async (e) => {
|
|
238
|
+
const { type, ...data } = e.data;
|
|
239
|
+
switch (type) {
|
|
240
|
+
case "load": await load(data); break;
|
|
241
|
+
case "generate": stoppingCriteria.reset(); await generate(data); break;
|
|
242
|
+
case "interrupt": stoppingCriteria.interrupt(); break;
|
|
243
|
+
case "reset": pastKeyValuesCache = null; stoppingCriteria.reset(); break;
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
self.postMessage({ status: "init" });
|
|
248
|
+
`], { type: "application/javascript" });
|
|
249
|
+
const workerUrl = URL.createObjectURL(blob);
|
|
250
|
+
const worker = new Worker(workerUrl, { type: "module" });
|
|
251
|
+
let isReady = false;
|
|
252
|
+
let currentResolve = null;
|
|
253
|
+
let currentReject = null;
|
|
254
|
+
let _generatedText = "";
|
|
255
|
+
worker.onmessage = (e) => {
|
|
256
|
+
const msg = e.data;
|
|
257
|
+
switch (msg.status) {
|
|
258
|
+
case "init":
|
|
259
|
+
worker.postMessage({
|
|
260
|
+
type: "load",
|
|
261
|
+
modelId: source.path
|
|
262
|
+
});
|
|
263
|
+
break;
|
|
264
|
+
case "loading":
|
|
265
|
+
case "downloading":
|
|
266
|
+
onProgress?.(msg);
|
|
267
|
+
break;
|
|
268
|
+
case "ready":
|
|
269
|
+
isReady = true;
|
|
270
|
+
onProgress?.(msg);
|
|
271
|
+
resolve(gerbilWorker);
|
|
272
|
+
break;
|
|
273
|
+
case "start":
|
|
274
|
+
_generatedText = "";
|
|
275
|
+
break;
|
|
276
|
+
case "token":
|
|
277
|
+
_generatedText += msg.text;
|
|
278
|
+
onToken?.(msg);
|
|
279
|
+
break;
|
|
280
|
+
case "complete":
|
|
281
|
+
onComplete?.(msg);
|
|
282
|
+
currentResolve?.(msg.text);
|
|
283
|
+
currentResolve = null;
|
|
284
|
+
currentReject = null;
|
|
285
|
+
break;
|
|
286
|
+
case "error":
|
|
287
|
+
onError?.(msg.error);
|
|
288
|
+
onProgress?.({
|
|
289
|
+
status: "error",
|
|
290
|
+
error: msg.error
|
|
291
|
+
});
|
|
292
|
+
if (currentReject) {
|
|
293
|
+
currentReject(new Error(msg.error));
|
|
294
|
+
currentResolve = null;
|
|
295
|
+
currentReject = null;
|
|
296
|
+
} else reject(new Error(msg.error));
|
|
297
|
+
break;
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
worker.onerror = (e) => {
|
|
301
|
+
const error = e.message || "Worker error";
|
|
302
|
+
onError?.(error);
|
|
303
|
+
reject(new Error(error));
|
|
304
|
+
};
|
|
305
|
+
const gerbilWorker = {
|
|
306
|
+
generate: (prompt, options$1 = {}) => {
|
|
307
|
+
return new Promise((res, rej) => {
|
|
308
|
+
currentResolve = res;
|
|
309
|
+
currentReject = rej;
|
|
310
|
+
const messages = [{
|
|
311
|
+
role: "system",
|
|
312
|
+
content: options$1.system || "You are a helpful assistant."
|
|
313
|
+
}, {
|
|
314
|
+
role: "user",
|
|
315
|
+
content: prompt
|
|
316
|
+
}];
|
|
317
|
+
worker.postMessage({
|
|
318
|
+
type: "generate",
|
|
319
|
+
messages,
|
|
320
|
+
options: {
|
|
321
|
+
maxTokens: options$1.maxTokens ?? 256,
|
|
322
|
+
temperature: options$1.temperature ?? .7,
|
|
323
|
+
topP: options$1.topP ?? .9,
|
|
324
|
+
topK: options$1.topK ?? 20,
|
|
325
|
+
thinking: options$1.thinking ?? false
|
|
326
|
+
}
|
|
327
|
+
});
|
|
328
|
+
});
|
|
329
|
+
},
|
|
330
|
+
interrupt: () => {
|
|
331
|
+
worker.postMessage({ type: "interrupt" });
|
|
332
|
+
},
|
|
333
|
+
reset: () => {
|
|
334
|
+
worker.postMessage({ type: "reset" });
|
|
335
|
+
},
|
|
336
|
+
terminate: () => {
|
|
337
|
+
worker.terminate();
|
|
338
|
+
URL.revokeObjectURL(workerUrl);
|
|
339
|
+
},
|
|
340
|
+
isReady: () => isReady
|
|
341
|
+
};
|
|
342
|
+
});
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* React hook for chat with local LLM
|
|
346
|
+
*
|
|
347
|
+
* @example
|
|
348
|
+
* ```tsx
|
|
349
|
+
* import { useChat } from "@tryhamster/gerbil/browser";
|
|
350
|
+
*
|
|
351
|
+
* function Chat() {
|
|
352
|
+
* const { messages, input, setInput, handleSubmit, isLoading, isGenerating } = useChat();
|
|
353
|
+
*
|
|
354
|
+
* if (isLoading) return <div>Loading model...</div>;
|
|
355
|
+
*
|
|
356
|
+
* return (
|
|
357
|
+
* <div>
|
|
358
|
+
* {messages.map(m => (
|
|
359
|
+
* <div key={m.id}>{m.role}: {m.content}</div>
|
|
360
|
+
* ))}
|
|
361
|
+
* <form onSubmit={handleSubmit}>
|
|
362
|
+
* <input value={input} onChange={e => setInput(e.target.value)} />
|
|
363
|
+
* <button disabled={isGenerating}>Send</button>
|
|
364
|
+
* </form>
|
|
365
|
+
* </div>
|
|
366
|
+
* );
|
|
367
|
+
* }
|
|
368
|
+
* ```
|
|
369
|
+
*/
|
|
370
|
+
function useChat(options = {}) {
|
|
371
|
+
const React = globalThis.React;
|
|
372
|
+
if (!React) throw new Error("useChat requires React. Import React before using this hook.");
|
|
373
|
+
const { useState, useEffect, useRef, useCallback } = React;
|
|
374
|
+
const { model = "qwen3-0.6b", system = "You are a helpful assistant.", thinking: enableThinking = false, maxTokens = 512, temperature = .7, initialMessages = [], autoLoad = false, onReady, onError } = options;
|
|
375
|
+
const [messages, setMessages] = useState(initialMessages);
|
|
376
|
+
const [input, setInput] = useState("");
|
|
377
|
+
const [isLoading, setIsLoading] = useState(autoLoad);
|
|
378
|
+
const [loadingProgress, setLoadingProgress] = useState(null);
|
|
379
|
+
const [isGenerating, setIsGenerating] = useState(false);
|
|
380
|
+
const [thinking, setThinking] = useState("");
|
|
381
|
+
const [currentResponse, setCurrentResponse] = useState("");
|
|
382
|
+
const [tps, setTps] = useState(0);
|
|
383
|
+
const [error, setError] = useState(null);
|
|
384
|
+
const [isReady, setIsReady] = useState(false);
|
|
385
|
+
const [shouldLoad, setShouldLoad] = useState(autoLoad);
|
|
386
|
+
const workerRef = useRef(null);
|
|
387
|
+
const messageIdRef = useRef(0);
|
|
388
|
+
const mountedRef = useRef(true);
|
|
389
|
+
const load = useCallback(() => {
|
|
390
|
+
if (workerRef.current || isLoading) return;
|
|
391
|
+
setIsLoading(true);
|
|
392
|
+
setShouldLoad(true);
|
|
393
|
+
}, [isLoading]);
|
|
394
|
+
useEffect(() => {
|
|
395
|
+
if (!shouldLoad) return;
|
|
396
|
+
if (!isWebGPUSupported()) {
|
|
397
|
+
setError("WebGPU not supported. Use Chrome/Edge 113+.");
|
|
398
|
+
setIsLoading(false);
|
|
399
|
+
onError?.("WebGPU not supported");
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
mountedRef.current = true;
|
|
403
|
+
createGerbilWorker({
|
|
404
|
+
modelId: model,
|
|
405
|
+
onProgress: (p) => {
|
|
406
|
+
if (!mountedRef.current) return;
|
|
407
|
+
setLoadingProgress(p);
|
|
408
|
+
if (p.status === "ready") {
|
|
409
|
+
setIsLoading(false);
|
|
410
|
+
setIsReady(true);
|
|
411
|
+
onReady?.();
|
|
412
|
+
}
|
|
413
|
+
},
|
|
414
|
+
onToken: (token) => {
|
|
415
|
+
if (!mountedRef.current) return;
|
|
416
|
+
setTps(token.tps);
|
|
417
|
+
if (token.state === "thinking") setThinking((t) => t + token.text);
|
|
418
|
+
else setCurrentResponse((r) => r + token.text);
|
|
419
|
+
},
|
|
420
|
+
onComplete: () => {
|
|
421
|
+
if (!mountedRef.current) return;
|
|
422
|
+
setIsGenerating(false);
|
|
423
|
+
},
|
|
424
|
+
onError: (err) => {
|
|
425
|
+
if (!mountedRef.current) return;
|
|
426
|
+
setError(err);
|
|
427
|
+
setIsGenerating(false);
|
|
428
|
+
onError?.(err);
|
|
429
|
+
}
|
|
430
|
+
}).then((worker) => {
|
|
431
|
+
if (mountedRef.current) workerRef.current = worker;
|
|
432
|
+
else worker.terminate();
|
|
433
|
+
}).catch((err) => {
|
|
434
|
+
if (mountedRef.current) {
|
|
435
|
+
setError(err.message);
|
|
436
|
+
setIsLoading(false);
|
|
437
|
+
onError?.(err.message);
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
return () => {
|
|
441
|
+
mountedRef.current = false;
|
|
442
|
+
workerRef.current?.terminate();
|
|
443
|
+
};
|
|
444
|
+
}, [model, shouldLoad]);
|
|
445
|
+
useEffect(() => {
|
|
446
|
+
if (!isGenerating && currentResponse) {
|
|
447
|
+
setMessages((msgs) => {
|
|
448
|
+
if (msgs.at(-1)?.role === "assistant") return msgs.map((m, i) => i === msgs.length - 1 ? {
|
|
449
|
+
...m,
|
|
450
|
+
content: currentResponse,
|
|
451
|
+
thinking: thinking || void 0
|
|
452
|
+
} : m);
|
|
453
|
+
return msgs;
|
|
454
|
+
});
|
|
455
|
+
setCurrentResponse("");
|
|
456
|
+
setThinking("");
|
|
457
|
+
}
|
|
458
|
+
return () => {};
|
|
459
|
+
}, [
|
|
460
|
+
isGenerating,
|
|
461
|
+
currentResponse,
|
|
462
|
+
thinking
|
|
463
|
+
]);
|
|
464
|
+
const pendingMessageRef = useRef(null);
|
|
465
|
+
const handleSubmit = useCallback((e) => {
|
|
466
|
+
e?.preventDefault?.();
|
|
467
|
+
if (!input.trim() || isGenerating) return;
|
|
468
|
+
const userMessage = {
|
|
469
|
+
id: `msg-${++messageIdRef.current}`,
|
|
470
|
+
role: "user",
|
|
471
|
+
content: input.trim()
|
|
472
|
+
};
|
|
473
|
+
const assistantMessage = {
|
|
474
|
+
id: `msg-${++messageIdRef.current}`,
|
|
475
|
+
role: "assistant",
|
|
476
|
+
content: ""
|
|
477
|
+
};
|
|
478
|
+
setMessages((msgs) => [
|
|
479
|
+
...msgs,
|
|
480
|
+
userMessage,
|
|
481
|
+
assistantMessage
|
|
482
|
+
]);
|
|
483
|
+
setInput("");
|
|
484
|
+
setCurrentResponse("");
|
|
485
|
+
setThinking("");
|
|
486
|
+
if (!workerRef.current) {
|
|
487
|
+
pendingMessageRef.current = userMessage.content;
|
|
488
|
+
load();
|
|
489
|
+
return;
|
|
490
|
+
}
|
|
491
|
+
setIsGenerating(true);
|
|
492
|
+
workerRef.current.generate(userMessage.content, {
|
|
493
|
+
system,
|
|
494
|
+
thinking: enableThinking,
|
|
495
|
+
maxTokens,
|
|
496
|
+
temperature
|
|
497
|
+
});
|
|
498
|
+
}, [
|
|
499
|
+
input,
|
|
500
|
+
isGenerating,
|
|
501
|
+
system,
|
|
502
|
+
enableThinking,
|
|
503
|
+
maxTokens,
|
|
504
|
+
temperature,
|
|
505
|
+
load
|
|
506
|
+
]);
|
|
507
|
+
useEffect(() => {
|
|
508
|
+
if (isReady && pendingMessageRef.current && workerRef.current) {
|
|
509
|
+
const pendingContent = pendingMessageRef.current;
|
|
510
|
+
pendingMessageRef.current = null;
|
|
511
|
+
setIsGenerating(true);
|
|
512
|
+
workerRef.current.generate(pendingContent, {
|
|
513
|
+
system,
|
|
514
|
+
thinking: enableThinking,
|
|
515
|
+
maxTokens,
|
|
516
|
+
temperature
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
return () => {};
|
|
520
|
+
}, [
|
|
521
|
+
isReady,
|
|
522
|
+
system,
|
|
523
|
+
enableThinking,
|
|
524
|
+
maxTokens,
|
|
525
|
+
temperature
|
|
526
|
+
]);
|
|
527
|
+
const stop = useCallback(() => {
|
|
528
|
+
workerRef.current?.interrupt();
|
|
529
|
+
setIsGenerating(false);
|
|
530
|
+
}, []);
|
|
531
|
+
const clear = useCallback(() => {
|
|
532
|
+
workerRef.current?.reset();
|
|
533
|
+
setMessages([]);
|
|
534
|
+
setCurrentResponse("");
|
|
535
|
+
setThinking("");
|
|
536
|
+
}, []);
|
|
537
|
+
return {
|
|
538
|
+
messages: messages.map((m, i) => {
|
|
539
|
+
if (i === messages.length - 1 && m.role === "assistant" && isGenerating) return {
|
|
540
|
+
...m,
|
|
541
|
+
content: currentResponse,
|
|
542
|
+
thinking: thinking || void 0
|
|
543
|
+
};
|
|
544
|
+
return m;
|
|
545
|
+
}),
|
|
546
|
+
input,
|
|
547
|
+
setInput,
|
|
548
|
+
handleSubmit,
|
|
549
|
+
isLoading,
|
|
550
|
+
loadingProgress,
|
|
551
|
+
isGenerating,
|
|
552
|
+
thinking,
|
|
553
|
+
stop,
|
|
554
|
+
clear,
|
|
555
|
+
tps,
|
|
556
|
+
isReady,
|
|
557
|
+
error,
|
|
558
|
+
load
|
|
559
|
+
};
|
|
560
|
+
}
|
|
561
|
+
/**
|
|
562
|
+
* React hook for text completion with local LLM
|
|
563
|
+
*
|
|
564
|
+
* @example
|
|
565
|
+
* ```tsx
|
|
566
|
+
* import { useCompletion } from "@tryhamster/gerbil/browser";
|
|
567
|
+
*
|
|
568
|
+
* function App() {
|
|
569
|
+
* const { complete, completion, isLoading, isGenerating } = useCompletion();
|
|
570
|
+
*
|
|
571
|
+
* if (isLoading) return <div>Loading...</div>;
|
|
572
|
+
*
|
|
573
|
+
* return (
|
|
574
|
+
* <div>
|
|
575
|
+
* <button onClick={() => complete("Write a haiku")}>Generate</button>
|
|
576
|
+
* <p>{completion}</p>
|
|
577
|
+
* </div>
|
|
578
|
+
* );
|
|
579
|
+
* }
|
|
580
|
+
* ```
|
|
581
|
+
*/
|
|
582
|
+
function useCompletion(options = {}) {
|
|
583
|
+
const React = globalThis.React;
|
|
584
|
+
if (!React) throw new Error("useCompletion requires React. Import React before using this hook.");
|
|
585
|
+
const { useState, useEffect, useRef, useCallback } = React;
|
|
586
|
+
const { model = "qwen3-0.6b", system = "You are a helpful assistant.", thinking: enableThinking = false, maxTokens = 512, temperature = .7, autoLoad = false, onReady, onError } = options;
|
|
587
|
+
const [completion, setCompletion] = useState("");
|
|
588
|
+
const [thinking, setThinking] = useState("");
|
|
589
|
+
const [isLoading, setIsLoading] = useState(autoLoad);
|
|
590
|
+
const [loadingProgress, setLoadingProgress] = useState(null);
|
|
591
|
+
const [isGenerating, setIsGenerating] = useState(false);
|
|
592
|
+
const [tps, setTps] = useState(0);
|
|
593
|
+
const [error, setError] = useState(null);
|
|
594
|
+
const [isReady, setIsReady] = useState(false);
|
|
595
|
+
const [shouldLoad, setShouldLoad] = useState(autoLoad);
|
|
596
|
+
const workerRef = useRef(null);
|
|
597
|
+
const resolveRef = useRef(null);
|
|
598
|
+
const rejectRef = useRef(null);
|
|
599
|
+
const pendingPromptRef = useRef(null);
|
|
600
|
+
const mountedRef = useRef(true);
|
|
601
|
+
const load = useCallback(() => {
|
|
602
|
+
if (workerRef.current || isLoading) return;
|
|
603
|
+
setIsLoading(true);
|
|
604
|
+
setShouldLoad(true);
|
|
605
|
+
}, [isLoading]);
|
|
606
|
+
useEffect(() => {
|
|
607
|
+
if (!shouldLoad) return;
|
|
608
|
+
if (!isWebGPUSupported()) {
|
|
609
|
+
setError("WebGPU not supported. Use Chrome/Edge 113+.");
|
|
610
|
+
setIsLoading(false);
|
|
611
|
+
onError?.("WebGPU not supported");
|
|
612
|
+
return;
|
|
613
|
+
}
|
|
614
|
+
mountedRef.current = true;
|
|
615
|
+
createGerbilWorker({
|
|
616
|
+
modelId: model,
|
|
617
|
+
onProgress: (p) => {
|
|
618
|
+
if (!mountedRef.current) return;
|
|
619
|
+
setLoadingProgress(p);
|
|
620
|
+
if (p.status === "ready") {
|
|
621
|
+
setIsLoading(false);
|
|
622
|
+
setIsReady(true);
|
|
623
|
+
onReady?.();
|
|
624
|
+
}
|
|
625
|
+
},
|
|
626
|
+
onToken: (token) => {
|
|
627
|
+
if (!mountedRef.current) return;
|
|
628
|
+
setTps(token.tps);
|
|
629
|
+
if (token.state === "thinking") setThinking((t) => t + token.text);
|
|
630
|
+
else setCompletion((c) => c + token.text);
|
|
631
|
+
},
|
|
632
|
+
onComplete: (result) => {
|
|
633
|
+
if (!mountedRef.current) return;
|
|
634
|
+
setIsGenerating(false);
|
|
635
|
+
resolveRef.current?.(result.text);
|
|
636
|
+
resolveRef.current = null;
|
|
637
|
+
},
|
|
638
|
+
onError: (err) => {
|
|
639
|
+
if (!mountedRef.current) return;
|
|
640
|
+
setError(err);
|
|
641
|
+
setIsGenerating(false);
|
|
642
|
+
onError?.(err);
|
|
643
|
+
}
|
|
644
|
+
}).then((worker) => {
|
|
645
|
+
if (mountedRef.current) workerRef.current = worker;
|
|
646
|
+
else worker.terminate();
|
|
647
|
+
}).catch((err) => {
|
|
648
|
+
if (mountedRef.current) {
|
|
649
|
+
setError(err.message);
|
|
650
|
+
setIsLoading(false);
|
|
651
|
+
onError?.(err.message);
|
|
652
|
+
}
|
|
653
|
+
});
|
|
654
|
+
return () => {
|
|
655
|
+
mountedRef.current = false;
|
|
656
|
+
workerRef.current?.terminate();
|
|
657
|
+
};
|
|
658
|
+
}, [model, shouldLoad]);
|
|
659
|
+
const complete = useCallback((prompt) => {
|
|
660
|
+
return new Promise((resolve, reject) => {
|
|
661
|
+
setCompletion("");
|
|
662
|
+
setThinking("");
|
|
663
|
+
resolveRef.current = resolve;
|
|
664
|
+
rejectRef.current = reject;
|
|
665
|
+
if (!workerRef.current) {
|
|
666
|
+
pendingPromptRef.current = prompt;
|
|
667
|
+
load();
|
|
668
|
+
return;
|
|
669
|
+
}
|
|
670
|
+
setIsGenerating(true);
|
|
671
|
+
workerRef.current.generate(prompt, {
|
|
672
|
+
system,
|
|
673
|
+
thinking: enableThinking,
|
|
674
|
+
maxTokens,
|
|
675
|
+
temperature
|
|
676
|
+
});
|
|
677
|
+
});
|
|
678
|
+
}, [
|
|
679
|
+
system,
|
|
680
|
+
enableThinking,
|
|
681
|
+
maxTokens,
|
|
682
|
+
temperature,
|
|
683
|
+
load
|
|
684
|
+
]);
|
|
685
|
+
useEffect(() => {
|
|
686
|
+
if (isReady && pendingPromptRef.current && workerRef.current) {
|
|
687
|
+
const pendingPrompt = pendingPromptRef.current;
|
|
688
|
+
pendingPromptRef.current = null;
|
|
689
|
+
setIsGenerating(true);
|
|
690
|
+
workerRef.current.generate(pendingPrompt, {
|
|
691
|
+
system,
|
|
692
|
+
thinking: enableThinking,
|
|
693
|
+
maxTokens,
|
|
694
|
+
temperature
|
|
695
|
+
});
|
|
696
|
+
}
|
|
697
|
+
return () => {};
|
|
698
|
+
}, [
|
|
699
|
+
isReady,
|
|
700
|
+
system,
|
|
701
|
+
enableThinking,
|
|
702
|
+
maxTokens,
|
|
703
|
+
temperature
|
|
704
|
+
]);
|
|
705
|
+
return {
|
|
706
|
+
completion,
|
|
707
|
+
thinking,
|
|
708
|
+
complete,
|
|
709
|
+
isLoading,
|
|
710
|
+
loadingProgress,
|
|
711
|
+
isGenerating,
|
|
712
|
+
stop: useCallback(() => {
|
|
713
|
+
workerRef.current?.interrupt();
|
|
714
|
+
setIsGenerating(false);
|
|
715
|
+
}, []),
|
|
716
|
+
tps,
|
|
717
|
+
isReady,
|
|
718
|
+
error,
|
|
719
|
+
load
|
|
720
|
+
};
|
|
721
|
+
}
|
|
722
|
+
/**
|
|
723
|
+
* Check if WebGPU is supported
|
|
724
|
+
*/
|
|
725
|
+
function isWebGPUSupported() {
|
|
726
|
+
if (typeof navigator === "undefined") return false;
|
|
727
|
+
return "gpu" in navigator;
|
|
728
|
+
}
|
|
729
|
+
/**
|
|
730
|
+
* Get WebGPU adapter info
|
|
731
|
+
*/
|
|
732
|
+
async function getWebGPUInfo() {
|
|
733
|
+
if (!isWebGPUSupported()) return { supported: false };
|
|
734
|
+
try {
|
|
735
|
+
const adapter = await navigator.gpu.requestAdapter();
|
|
736
|
+
if (!adapter) return { supported: false };
|
|
737
|
+
const info = await adapter.requestAdapterInfo();
|
|
738
|
+
return {
|
|
739
|
+
supported: true,
|
|
740
|
+
adapter: info.vendor,
|
|
741
|
+
device: info.device
|
|
742
|
+
};
|
|
743
|
+
} catch {
|
|
744
|
+
return { supported: false };
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
var browser_default = {
|
|
748
|
+
isWebGPUSupported,
|
|
749
|
+
getWebGPUInfo,
|
|
750
|
+
createGerbilWorker
|
|
751
|
+
};
|
|
752
|
+
|
|
753
|
+
//#endregion
|
|
754
|
+
export { BUILTIN_MODELS, createGerbilWorker, browser_default as default, getWebGPUInfo, isWebGPUSupported, useChat, useCompletion };
|
|
755
|
+
//# sourceMappingURL=index.mjs.map
|