@tryhamster/gerbil 1.0.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +23 -0
- package/README.md +253 -0
- package/bin/cli.js +2 -0
- package/dist/auto-update-BbNHbSU1.mjs +3 -0
- package/dist/browser/index.d.mts +262 -0
- package/dist/browser/index.d.mts.map +1 -0
- package/dist/browser/index.mjs +755 -0
- package/dist/browser/index.mjs.map +1 -0
- package/dist/chrome-backend-C5Un08O4.mjs +771 -0
- package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
- package/dist/chrome-backend-CtwPENIW.mjs +3 -0
- package/dist/chunk-Ct1HF2bE.mjs +7 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +7078 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +22 -0
- package/dist/frameworks/express.d.mts.map +1 -0
- package/dist/frameworks/express.mjs +123 -0
- package/dist/frameworks/express.mjs.map +1 -0
- package/dist/frameworks/fastify.d.mts +11 -0
- package/dist/frameworks/fastify.d.mts.map +1 -0
- package/dist/frameworks/fastify.mjs +73 -0
- package/dist/frameworks/fastify.mjs.map +1 -0
- package/dist/frameworks/hono.d.mts +14 -0
- package/dist/frameworks/hono.d.mts.map +1 -0
- package/dist/frameworks/hono.mjs +82 -0
- package/dist/frameworks/hono.mjs.map +1 -0
- package/dist/frameworks/next.d.mts +31 -0
- package/dist/frameworks/next.d.mts.map +1 -0
- package/dist/frameworks/next.mjs +116 -0
- package/dist/frameworks/next.mjs.map +1 -0
- package/dist/frameworks/react.d.mts +56 -0
- package/dist/frameworks/react.d.mts.map +1 -0
- package/dist/frameworks/react.mjs +172 -0
- package/dist/frameworks/react.mjs.map +1 -0
- package/dist/frameworks/trpc.d.mts +12 -0
- package/dist/frameworks/trpc.d.mts.map +1 -0
- package/dist/frameworks/trpc.mjs +80 -0
- package/dist/frameworks/trpc.mjs.map +1 -0
- package/dist/gerbil-BfnsFWRE.mjs +644 -0
- package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
- package/dist/gerbil-BjW-z7Fq.mjs +5 -0
- package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
- package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
- package/dist/index.d.mts +223 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +13 -0
- package/dist/index.mjs.map +1 -0
- package/dist/integrations/ai-sdk.d.mts +78 -0
- package/dist/integrations/ai-sdk.d.mts.map +1 -0
- package/dist/integrations/ai-sdk.mjs +199 -0
- package/dist/integrations/ai-sdk.mjs.map +1 -0
- package/dist/integrations/langchain.d.mts +41 -0
- package/dist/integrations/langchain.d.mts.map +1 -0
- package/dist/integrations/langchain.mjs +93 -0
- package/dist/integrations/langchain.mjs.map +1 -0
- package/dist/integrations/llamaindex.d.mts +45 -0
- package/dist/integrations/llamaindex.d.mts.map +1 -0
- package/dist/integrations/llamaindex.mjs +86 -0
- package/dist/integrations/llamaindex.mjs.map +1 -0
- package/dist/integrations/mcp-client.d.mts +206 -0
- package/dist/integrations/mcp-client.d.mts.map +1 -0
- package/dist/integrations/mcp-client.mjs +507 -0
- package/dist/integrations/mcp-client.mjs.map +1 -0
- package/dist/integrations/mcp.d.mts +177 -0
- package/dist/integrations/mcp.d.mts.map +1 -0
- package/dist/integrations/mcp.mjs +8 -0
- package/dist/mcp-R8kRLIKb.mjs +348 -0
- package/dist/mcp-R8kRLIKb.mjs.map +1 -0
- package/dist/models-DKULvhOr.mjs +136 -0
- package/dist/models-DKULvhOr.mjs.map +1 -0
- package/dist/models-De2-_GmQ.d.mts +22 -0
- package/dist/models-De2-_GmQ.d.mts.map +1 -0
- package/dist/one-liner-BUQR0nqq.mjs +98 -0
- package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
- package/dist/skills/index.d.mts +390 -0
- package/dist/skills/index.d.mts.map +1 -0
- package/dist/skills/index.mjs +7 -0
- package/dist/skills-D3CEpgDc.mjs +630 -0
- package/dist/skills-D3CEpgDc.mjs.map +1 -0
- package/dist/tools-BsiEE6f2.mjs +567 -0
- package/dist/tools-BsiEE6f2.mjs.map +1 -0
- package/dist/types-BS1N92Jt.d.mts +183 -0
- package/dist/types-BS1N92Jt.d.mts.map +1 -0
- package/dist/utils-7vXqtq2Q.mjs +63 -0
- package/dist/utils-7vXqtq2Q.mjs.map +1 -0
- package/docs/ai-sdk.md +80 -0
- package/docs/architecture/README.md +84 -0
- package/docs/architecture/caching.md +227 -0
- package/docs/architecture/inference.md +176 -0
- package/docs/architecture/overview.md +179 -0
- package/docs/architecture/streaming.md +261 -0
- package/docs/architecture/webgpu.md +213 -0
- package/docs/browser.md +328 -0
- package/docs/cli.md +155 -0
- package/docs/frameworks.md +90 -0
- package/docs/mcp-client.md +224 -0
- package/docs/mcp.md +109 -0
- package/docs/memory.md +229 -0
- package/docs/repl.md +473 -0
- package/docs/skills.md +261 -0
- package/docs/tools.md +304 -0
- package/package.json +207 -0
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
import { execSync } from "child_process";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from "fs";
|
|
3
|
+
import { createServer } from "http";
|
|
4
|
+
import { homedir } from "os";
|
|
5
|
+
import { join } from "path";
|
|
6
|
+
import puppeteer from "puppeteer-core";
|
|
7
|
+
|
|
8
|
+
//#region src/core/chrome-backend.ts
|
|
9
|
+
/**
|
|
10
|
+
* Chrome DevTools Protocol Backend for WebGPU Inference
|
|
11
|
+
*
|
|
12
|
+
* Uses headless Chrome as a WebGPU accelerator for Node.js environments.
|
|
13
|
+
* Provides the same performance as browser inference (~100+ tok/s with q4f16).
|
|
14
|
+
*/
|
|
15
|
+
const GERBIL_CACHE_DIR = join(homedir(), ".gerbil", "chrome-cache");
|
|
16
|
+
const WS_ENDPOINT_FILE = join(GERBIL_CACHE_DIR, "ws-endpoint.txt");
|
|
17
|
+
const CACHED_MODELS_FILE = join(homedir(), ".gerbil", "cached-models.json");
|
|
18
|
+
/** Get list of models cached in Chrome's IndexedDB */
|
|
19
|
+
function getChromeCachedModels() {
|
|
20
|
+
try {
|
|
21
|
+
if (!existsSync(CACHED_MODELS_FILE)) return [];
|
|
22
|
+
return JSON.parse(readFileSync(CACHED_MODELS_FILE, "utf-8")).models || [];
|
|
23
|
+
} catch {
|
|
24
|
+
return [];
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/** Fetch model size from HuggingFace API */
|
|
28
|
+
async function fetchModelSize(modelId) {
|
|
29
|
+
try {
|
|
30
|
+
const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main/onnx`);
|
|
31
|
+
if (treeRes.ok) {
|
|
32
|
+
const files = await treeRes.json();
|
|
33
|
+
const q4f16 = files.find((f) => f.path.includes("q4f16") && f.path.endsWith(".onnx"));
|
|
34
|
+
const q4 = files.find((f) => f.path.includes("q4") && !f.path.includes("f16") && f.path.endsWith(".onnx"));
|
|
35
|
+
const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
|
|
36
|
+
const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
|
|
37
|
+
const bestFile = q4f16 || q4 || fp16 || anyOnnx;
|
|
38
|
+
if (bestFile?.size) return bestFile.size;
|
|
39
|
+
}
|
|
40
|
+
const res = await fetch(`https://huggingface.co/api/models/${modelId}`);
|
|
41
|
+
if (res.ok) return (await res.json()).usedStorage;
|
|
42
|
+
} catch {}
|
|
43
|
+
}
|
|
44
|
+
/** Track a model as cached */
|
|
45
|
+
function trackCachedModel(modelId, sizeBytes) {
|
|
46
|
+
try {
|
|
47
|
+
const dir = join(homedir(), ".gerbil");
|
|
48
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
49
|
+
const models = getChromeCachedModels();
|
|
50
|
+
const existing = models.find((m) => m.modelId === modelId);
|
|
51
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
52
|
+
if (existing) {
|
|
53
|
+
existing.lastUsed = now;
|
|
54
|
+
if (sizeBytes) existing.sizeBytes = sizeBytes;
|
|
55
|
+
} else models.push({
|
|
56
|
+
modelId,
|
|
57
|
+
downloadedAt: now,
|
|
58
|
+
lastUsed: now,
|
|
59
|
+
sizeBytes
|
|
60
|
+
});
|
|
61
|
+
writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
|
|
62
|
+
if (!sizeBytes) fetchModelSize(modelId).then((size) => {
|
|
63
|
+
if (size) {
|
|
64
|
+
const updatedModels = getChromeCachedModels();
|
|
65
|
+
const model = updatedModels.find((m) => m.modelId === modelId);
|
|
66
|
+
if (model) {
|
|
67
|
+
model.sizeBytes = size;
|
|
68
|
+
writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models: updatedModels }, null, 2));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}).catch(() => {});
|
|
72
|
+
} catch {}
|
|
73
|
+
}
|
|
74
|
+
/** Refresh sizes for cached models that don't have them */
|
|
75
|
+
async function refreshCachedModelSizes() {
|
|
76
|
+
try {
|
|
77
|
+
const models = getChromeCachedModels();
|
|
78
|
+
const needsSize = models.filter((m) => !m.sizeBytes);
|
|
79
|
+
if (needsSize.length === 0) return;
|
|
80
|
+
const batchSize = 3;
|
|
81
|
+
for (let i = 0; i < needsSize.length; i += batchSize) {
|
|
82
|
+
const batch = needsSize.slice(i, i + batchSize);
|
|
83
|
+
await Promise.all(batch.map(async (model) => {
|
|
84
|
+
const size = await fetchModelSize(model.modelId);
|
|
85
|
+
if (size) model.sizeBytes = size;
|
|
86
|
+
}));
|
|
87
|
+
}
|
|
88
|
+
writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
|
|
89
|
+
} catch {}
|
|
90
|
+
}
|
|
91
|
+
const GERBIL_LOCAL_PORT = 43724;
|
|
92
|
+
let globalBrowser = null;
|
|
93
|
+
let globalBrowserPromise = null;
|
|
94
|
+
let globalServer = null;
|
|
95
|
+
let globalServerPort = 0;
|
|
96
|
+
let activePagesCount = 0;
|
|
97
|
+
const MAX_CONCURRENT_PAGES = 5;
|
|
98
|
+
const CHROME_PATHS = {
|
|
99
|
+
darwin: [
|
|
100
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
101
|
+
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
102
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
103
|
+
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
104
|
+
"/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
|
|
105
|
+
],
|
|
106
|
+
linux: [
|
|
107
|
+
"google-chrome-stable",
|
|
108
|
+
"google-chrome",
|
|
109
|
+
"chromium-browser",
|
|
110
|
+
"chromium",
|
|
111
|
+
"microsoft-edge",
|
|
112
|
+
"brave-browser"
|
|
113
|
+
],
|
|
114
|
+
win32: [
|
|
115
|
+
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
116
|
+
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
|
117
|
+
`${process.env.LOCALAPPDATA}\\Google\\Chrome\\Application\\chrome.exe`,
|
|
118
|
+
"C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
|
|
119
|
+
"C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
|
|
120
|
+
]
|
|
121
|
+
};
|
|
122
|
+
function findChrome() {
|
|
123
|
+
if (process.env.CHROME_PATH) return process.env.CHROME_PATH;
|
|
124
|
+
const platform = process.platform;
|
|
125
|
+
const paths = CHROME_PATHS[platform] || [];
|
|
126
|
+
for (const p of paths) try {
|
|
127
|
+
if (platform === "linux") {
|
|
128
|
+
execSync(`which ${p}`, { stdio: "ignore" });
|
|
129
|
+
return p;
|
|
130
|
+
}
|
|
131
|
+
if (existsSync(p)) return p;
|
|
132
|
+
} catch {}
|
|
133
|
+
throw new Error("Chrome not found. Install Chrome or set CHROME_PATH environment variable.");
|
|
134
|
+
}
|
|
135
|
+
function getChromeFlags(userDataDir, _debuggingPort) {
|
|
136
|
+
const flags = ["--no-sandbox", `--user-data-dir=${userDataDir}`];
|
|
137
|
+
if (process.platform === "linux") flags.push("--enable-unsafe-webgpu", "--enable-features=Vulkan", "--use-angle=vulkan", "--disable-vulkan-surface");
|
|
138
|
+
else if (process.platform === "darwin") {} else flags.push("--enable-unsafe-webgpu");
|
|
139
|
+
return flags;
|
|
140
|
+
}
|
|
141
|
+
function getWorkerPageHTML(modelPath) {
|
|
142
|
+
return `
|
|
143
|
+
<!DOCTYPE html>
|
|
144
|
+
<html>
|
|
145
|
+
<head>
|
|
146
|
+
<title>Gerbil WebGPU Backend</title>
|
|
147
|
+
<script type="module">
|
|
148
|
+
import {
|
|
149
|
+
AutoTokenizer,
|
|
150
|
+
AutoModelForCausalLM,
|
|
151
|
+
TextStreamer,
|
|
152
|
+
InterruptableStoppingCriteria,
|
|
153
|
+
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0";
|
|
154
|
+
|
|
155
|
+
class ModelPipeline {
|
|
156
|
+
static tokenizer = null;
|
|
157
|
+
static model = null;
|
|
158
|
+
static modelId = "${modelPath}";
|
|
159
|
+
|
|
160
|
+
static async getInstance(progressCallback) {
|
|
161
|
+
if (!this.tokenizer) {
|
|
162
|
+
this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId, {
|
|
163
|
+
progress_callback: progressCallback,
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (!this.model) {
|
|
168
|
+
this.model = await AutoModelForCausalLM.from_pretrained(this.modelId, {
|
|
169
|
+
dtype: "q4f16",
|
|
170
|
+
device: "webgpu",
|
|
171
|
+
progress_callback: progressCallback,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return { tokenizer: this.tokenizer, model: this.model };
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const stoppingCriteria = new InterruptableStoppingCriteria();
|
|
180
|
+
let pastKeyValuesCache = null;
|
|
181
|
+
let totalTokensInCache = 0;
|
|
182
|
+
|
|
183
|
+
// Context length for auto-reset (Qwen3 default: 2048)
|
|
184
|
+
// Cache beyond this provides no benefit and wastes memory
|
|
185
|
+
const CONTEXT_LENGTH = 2048;
|
|
186
|
+
|
|
187
|
+
// Auto-load model on page init
|
|
188
|
+
(async function() {
|
|
189
|
+
console.log(JSON.stringify({ type: "progress", status: "Loading model..." }));
|
|
190
|
+
|
|
191
|
+
try {
|
|
192
|
+
const { tokenizer, model } = await ModelPipeline.getInstance((progress) => {
|
|
193
|
+
if (progress.status === "progress" && progress.file) {
|
|
194
|
+
console.log(JSON.stringify({
|
|
195
|
+
type: "progress",
|
|
196
|
+
status: "progress",
|
|
197
|
+
file: progress.file,
|
|
198
|
+
progress: Math.round(progress.progress || 0),
|
|
199
|
+
}));
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
console.log(JSON.stringify({ type: "progress", status: "Compiling shaders..." }));
|
|
204
|
+
const warmupInputs = tokenizer("a");
|
|
205
|
+
await model.generate({ ...warmupInputs, max_new_tokens: 1 });
|
|
206
|
+
|
|
207
|
+
console.log(JSON.stringify({ type: "ready" }));
|
|
208
|
+
} catch (error) {
|
|
209
|
+
console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
|
|
210
|
+
}
|
|
211
|
+
})();
|
|
212
|
+
|
|
213
|
+
window.gerbilGenerate = async function(messages, options = {}) {
|
|
214
|
+
const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false } = options;
|
|
215
|
+
|
|
216
|
+
// Auto-reset KV cache if it exceeds context length
|
|
217
|
+
// This prevents unbounded memory growth while preserving performance
|
|
218
|
+
if (totalTokensInCache > CONTEXT_LENGTH) {
|
|
219
|
+
console.log(JSON.stringify({
|
|
220
|
+
type: "cache_reset",
|
|
221
|
+
reason: "context_exceeded",
|
|
222
|
+
tokensInCache: totalTokensInCache,
|
|
223
|
+
contextLength: CONTEXT_LENGTH
|
|
224
|
+
}));
|
|
225
|
+
pastKeyValuesCache = null;
|
|
226
|
+
totalTokensInCache = 0;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
try {
|
|
230
|
+
const { tokenizer, model } = await ModelPipeline.getInstance();
|
|
231
|
+
|
|
232
|
+
const inputs = tokenizer.apply_chat_template(messages, {
|
|
233
|
+
add_generation_prompt: true,
|
|
234
|
+
return_dict: true,
|
|
235
|
+
enable_thinking: thinking,
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
let state = "answering";
|
|
239
|
+
let prevState = "answering";
|
|
240
|
+
const [START_THINKING_TOKEN_ID, END_THINKING_TOKEN_ID] = tokenizer.encode(
|
|
241
|
+
"<think></think>",
|
|
242
|
+
{ add_special_tokens: false }
|
|
243
|
+
);
|
|
244
|
+
|
|
245
|
+
let startTime = null;
|
|
246
|
+
let numTokens = 0;
|
|
247
|
+
|
|
248
|
+
const tokenCallback = (tokens) => {
|
|
249
|
+
startTime ??= performance.now();
|
|
250
|
+
numTokens++;
|
|
251
|
+
|
|
252
|
+
const tokenId = Number(tokens[0]);
|
|
253
|
+
if (tokenId === START_THINKING_TOKEN_ID) {
|
|
254
|
+
state = "thinking";
|
|
255
|
+
} else if (tokenId === END_THINKING_TOKEN_ID) {
|
|
256
|
+
state = "answering";
|
|
257
|
+
}
|
|
258
|
+
};
|
|
259
|
+
|
|
260
|
+
const streamCallback = (text) => {
|
|
261
|
+
const tps = startTime ? (numTokens / (performance.now() - startTime)) * 1000 : 0;
|
|
262
|
+
|
|
263
|
+
// Inject <think> markers when state changes (since skip_special_tokens removes them)
|
|
264
|
+
let outputText = text;
|
|
265
|
+
if (thinking) {
|
|
266
|
+
if (state === "thinking" && prevState !== "thinking") {
|
|
267
|
+
outputText = "<think>" + text;
|
|
268
|
+
} else if (state === "answering" && prevState === "thinking") {
|
|
269
|
+
outputText = "</think>" + text;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
prevState = state;
|
|
273
|
+
|
|
274
|
+
console.log(JSON.stringify({ type: "token", text: outputText, state, numTokens, tps }));
|
|
275
|
+
};
|
|
276
|
+
|
|
277
|
+
const streamer = new TextStreamer(tokenizer, {
|
|
278
|
+
skip_prompt: true,
|
|
279
|
+
skip_special_tokens: true,
|
|
280
|
+
callback_function: streamCallback,
|
|
281
|
+
token_callback_function: tokenCallback,
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
console.log(JSON.stringify({ type: "start" }));
|
|
285
|
+
|
|
286
|
+
const { past_key_values, sequences } = await model.generate({
|
|
287
|
+
...inputs,
|
|
288
|
+
past_key_values: pastKeyValuesCache,
|
|
289
|
+
do_sample: temperature > 0,
|
|
290
|
+
temperature: temperature > 0 ? temperature : undefined,
|
|
291
|
+
top_p: topP,
|
|
292
|
+
top_k: topK,
|
|
293
|
+
max_new_tokens: maxTokens,
|
|
294
|
+
streamer,
|
|
295
|
+
stopping_criteria: stoppingCriteria,
|
|
296
|
+
return_dict_in_generate: true,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
pastKeyValuesCache = past_key_values;
|
|
300
|
+
|
|
301
|
+
// Track total tokens in cache (input + generated)
|
|
302
|
+
const inputLength = inputs.input_ids.dims[1];
|
|
303
|
+
totalTokensInCache += inputLength + numTokens;
|
|
304
|
+
|
|
305
|
+
const endTime = performance.now();
|
|
306
|
+
const totalTime = startTime ? endTime - startTime : 0;
|
|
307
|
+
|
|
308
|
+
// Extract only the generated tokens (exclude input prompt)
|
|
309
|
+
const generatedTokens = sequences.slice(null, [inputLength, null]);
|
|
310
|
+
const decoded = tokenizer.batch_decode(generatedTokens, { skip_special_tokens: true });
|
|
311
|
+
|
|
312
|
+
console.log(JSON.stringify({
|
|
313
|
+
type: "complete",
|
|
314
|
+
text: decoded[0] || "",
|
|
315
|
+
numTokens,
|
|
316
|
+
totalTime,
|
|
317
|
+
tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
|
|
318
|
+
tokensInCache: totalTokensInCache,
|
|
319
|
+
}));
|
|
320
|
+
|
|
321
|
+
return decoded[0] || "";
|
|
322
|
+
} catch (error) {
|
|
323
|
+
console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
|
|
324
|
+
throw error;
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
window.gerbilInterrupt = function() {
|
|
329
|
+
stoppingCriteria.interrupt();
|
|
330
|
+
};
|
|
331
|
+
|
|
332
|
+
window.gerbilReset = function() {
|
|
333
|
+
pastKeyValuesCache = null;
|
|
334
|
+
totalTokensInCache = 0;
|
|
335
|
+
stoppingCriteria.reset();
|
|
336
|
+
console.log(JSON.stringify({ type: "cache_reset", reason: "manual" }));
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// Signal that the page is ready for commands
|
|
340
|
+
console.log(JSON.stringify({ type: "init" }));
|
|
341
|
+
<\/script>
|
|
342
|
+
</head>
|
|
343
|
+
<body>
|
|
344
|
+
<h1>Gerbil WebGPU Backend</h1>
|
|
345
|
+
<p>This page provides WebGPU inference for the Gerbil CLI.</p>
|
|
346
|
+
</body>
|
|
347
|
+
</html>
|
|
348
|
+
`;
|
|
349
|
+
}
|
|
350
|
+
var ChromeGPUBackend = class ChromeGPUBackend {
|
|
351
|
+
browser = null;
|
|
352
|
+
page = null;
|
|
353
|
+
cdp = null;
|
|
354
|
+
serverPort = 0;
|
|
355
|
+
userDataDir = GERBIL_CACHE_DIR;
|
|
356
|
+
modelId;
|
|
357
|
+
isReady = false;
|
|
358
|
+
messageHandlers = /* @__PURE__ */ new Map();
|
|
359
|
+
pendingRejects = [];
|
|
360
|
+
server = null;
|
|
361
|
+
constructor(modelId) {
|
|
362
|
+
this.modelId = modelId;
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Create and initialize a Chrome GPU backend
|
|
366
|
+
*/
|
|
367
|
+
static async create(options = {}) {
|
|
368
|
+
const backend = new ChromeGPUBackend(options.modelId || "onnx-community/Qwen3-0.6B-ONNX");
|
|
369
|
+
await backend.launch(options);
|
|
370
|
+
return backend;
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Get existing browser or launch a new one (singleton pattern)
|
|
374
|
+
* Multiple Gerbil instances share the same browser process
|
|
375
|
+
*/
|
|
376
|
+
async getOrCreateBrowser(chromePath, options) {
|
|
377
|
+
if (globalBrowser?.connected) {
|
|
378
|
+
options.onProgress?.({ status: "Reusing existing Chrome..." });
|
|
379
|
+
return globalBrowser;
|
|
380
|
+
}
|
|
381
|
+
if (globalBrowserPromise) {
|
|
382
|
+
options.onProgress?.({ status: "Waiting for Chrome startup..." });
|
|
383
|
+
return globalBrowserPromise;
|
|
384
|
+
}
|
|
385
|
+
if (existsSync(WS_ENDPOINT_FILE)) try {
|
|
386
|
+
const wsEndpoint = readFileSync(WS_ENDPOINT_FILE, "utf-8").trim();
|
|
387
|
+
options.onProgress?.({ status: "Connecting to existing Chrome..." });
|
|
388
|
+
globalBrowser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
|
|
389
|
+
return globalBrowser;
|
|
390
|
+
} catch {
|
|
391
|
+
try {
|
|
392
|
+
unlinkSync(WS_ENDPOINT_FILE);
|
|
393
|
+
} catch {}
|
|
394
|
+
}
|
|
395
|
+
globalBrowserPromise = this.launchBrowser(chromePath, options);
|
|
396
|
+
try {
|
|
397
|
+
globalBrowser = await globalBrowserPromise;
|
|
398
|
+
return globalBrowser;
|
|
399
|
+
} finally {
|
|
400
|
+
globalBrowserPromise = null;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
/**
|
|
404
|
+
* Launch a new Chrome browser instance
|
|
405
|
+
*/
|
|
406
|
+
async launchBrowser(chromePath, _options) {
|
|
407
|
+
const debuggingPort = 9222 + Math.floor(Math.random() * 1e3);
|
|
408
|
+
const lockFile = join(this.userDataDir, "SingletonLock");
|
|
409
|
+
if (existsSync(lockFile)) try {
|
|
410
|
+
unlinkSync(lockFile);
|
|
411
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
412
|
+
} catch {}
|
|
413
|
+
const browser = await puppeteer.launch({
|
|
414
|
+
executablePath: chromePath,
|
|
415
|
+
headless: true,
|
|
416
|
+
args: [
|
|
417
|
+
...getChromeFlags(this.userDataDir, debuggingPort),
|
|
418
|
+
"--enable-gpu",
|
|
419
|
+
"--no-first-run",
|
|
420
|
+
"--no-default-browser-check",
|
|
421
|
+
"--disable-background-timer-throttling",
|
|
422
|
+
"--disable-renderer-backgrounding",
|
|
423
|
+
"--disable-dev-shm-usage"
|
|
424
|
+
],
|
|
425
|
+
handleSIGINT: false,
|
|
426
|
+
handleSIGTERM: false,
|
|
427
|
+
handleSIGHUP: false
|
|
428
|
+
});
|
|
429
|
+
writeFileSync(WS_ENDPOINT_FILE, browser.wsEndpoint());
|
|
430
|
+
browser.on("disconnected", () => {
|
|
431
|
+
globalBrowser = null;
|
|
432
|
+
try {
|
|
433
|
+
unlinkSync(WS_ENDPOINT_FILE);
|
|
434
|
+
} catch {}
|
|
435
|
+
});
|
|
436
|
+
return browser;
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Launch Chrome and initialize the worker page
|
|
440
|
+
*/
|
|
441
|
+
async launch(options) {
|
|
442
|
+
if (activePagesCount >= MAX_CONCURRENT_PAGES) throw new Error(`Maximum concurrent pages (${MAX_CONCURRENT_PAGES}) reached. Call dispose() on old Gerbil instances to free resources. Currently active: ${activePagesCount}`);
|
|
443
|
+
const chromePath = options.chromePath || findChrome();
|
|
444
|
+
this.userDataDir = GERBIL_CACHE_DIR;
|
|
445
|
+
if (!existsSync(this.userDataDir)) mkdirSync(this.userDataDir, { recursive: true });
|
|
446
|
+
const html = getWorkerPageHTML(this.modelId);
|
|
447
|
+
await this.startServer(html);
|
|
448
|
+
options.onProgress?.({ status: "Starting Chrome..." });
|
|
449
|
+
this.browser = await this.getOrCreateBrowser(chromePath, options);
|
|
450
|
+
this.page = await this.browser.newPage();
|
|
451
|
+
this.cdp = await this.page.createCDPSession();
|
|
452
|
+
activePagesCount++;
|
|
453
|
+
options.onProgress?.({ status: `Active pages: ${activePagesCount}/${MAX_CONCURRENT_PAGES}` });
|
|
454
|
+
this.browser.on("disconnected", () => {
|
|
455
|
+
console.error("[Chrome] Browser disconnected unexpectedly");
|
|
456
|
+
this.isReady = false;
|
|
457
|
+
this.browser = null;
|
|
458
|
+
this.page = null;
|
|
459
|
+
this.cdp = null;
|
|
460
|
+
this.rejectPendingWaits(/* @__PURE__ */ new Error("CHROME_DISCONNECTED"));
|
|
461
|
+
});
|
|
462
|
+
await this.cdp.send("Runtime.enable");
|
|
463
|
+
await this.cdp.send("Runtime.setAsyncCallStackDepth", { maxDepth: 32 });
|
|
464
|
+
this.cdp.on("Runtime.consoleAPICalled", (event) => {
|
|
465
|
+
const text = event.args.map((a) => a.value || a.description || "").join(" ");
|
|
466
|
+
if (event.type === "log" && event.args[0]?.value) try {
|
|
467
|
+
const data = JSON.parse(event.args[0].value);
|
|
468
|
+
this.handleMessage(data, options);
|
|
469
|
+
} catch {
|
|
470
|
+
if (text.length < 500 && !text.includes("Float32Array") && !text.includes("past_key_values")) {}
|
|
471
|
+
}
|
|
472
|
+
else if (event.type === "error" || event.type === "warning") {
|
|
473
|
+
if (!(text.includes("onnxruntime") || text.includes("content-length") || text.includes("Float32Array") || text.includes("past_key_values")) && text.length < 1e3) console.error(`[Chrome ${event.type}]`, text);
|
|
474
|
+
}
|
|
475
|
+
});
|
|
476
|
+
this.cdp.on("Runtime.exceptionThrown", (event) => {
|
|
477
|
+
const errText = event.exceptionDetails?.text || event.exceptionDetails?.exception?.description || "";
|
|
478
|
+
if (errText.includes("Float32Array") || errText.includes("past_key_values") || errText.length > 1e3) return;
|
|
479
|
+
console.error("[Chrome Exception]", errText);
|
|
480
|
+
});
|
|
481
|
+
await this.page.goto(`http://127.0.0.1:${this.serverPort}/`, {
|
|
482
|
+
waitUntil: "domcontentloaded",
|
|
483
|
+
timeout: 3e4
|
|
484
|
+
});
|
|
485
|
+
await this.waitForMessage("ready", 3e5);
|
|
486
|
+
this.isReady = true;
|
|
487
|
+
options.onProgress?.({ status: "Ready (WebGPU)!" });
|
|
488
|
+
trackCachedModel(this.modelId);
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Handle incoming messages from the page
|
|
492
|
+
*/
|
|
493
|
+
handleMessage(data, options) {
|
|
494
|
+
const { type, ...rest } = data;
|
|
495
|
+
const handler = this.messageHandlers.get(type);
|
|
496
|
+
if (handler) handler(rest);
|
|
497
|
+
if (type === "progress") options.onProgress?.(rest);
|
|
498
|
+
else if (type === "token") options.onToken?.(rest);
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Wait for a specific message type
|
|
502
|
+
*/
|
|
503
|
+
waitForMessage(type, timeout = 3e4) {
|
|
504
|
+
return new Promise((resolve, reject) => {
|
|
505
|
+
this.pendingRejects.push(reject);
|
|
506
|
+
const cleanup = () => {
|
|
507
|
+
clearTimeout(timer);
|
|
508
|
+
this.messageHandlers.delete(type);
|
|
509
|
+
const idx = this.pendingRejects.indexOf(reject);
|
|
510
|
+
if (idx >= 0) this.pendingRejects.splice(idx, 1);
|
|
511
|
+
};
|
|
512
|
+
const timer = setTimeout(() => {
|
|
513
|
+
cleanup();
|
|
514
|
+
reject(/* @__PURE__ */ new Error(`Timeout waiting for ${type} message`));
|
|
515
|
+
}, timeout);
|
|
516
|
+
this.messageHandlers.set(type, (data) => {
|
|
517
|
+
cleanup();
|
|
518
|
+
resolve(data);
|
|
519
|
+
});
|
|
520
|
+
});
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Check if Chrome backend is still alive
|
|
524
|
+
*/
|
|
525
|
+
isAlive() {
|
|
526
|
+
return this.isReady && this.browser !== null && this.page !== null;
|
|
527
|
+
}
|
|
528
|
+
/**
|
|
529
|
+
* Get Chrome backend status information
|
|
530
|
+
*/
|
|
531
|
+
getStatus() {
|
|
532
|
+
let pid = null;
|
|
533
|
+
const browserProcess = this.browser?.process?.() || globalBrowser?.process?.();
|
|
534
|
+
if (browserProcess?.pid) pid = browserProcess.pid;
|
|
535
|
+
return {
|
|
536
|
+
pid,
|
|
537
|
+
port: this.serverPort || globalServerPort,
|
|
538
|
+
modelId: this.modelId,
|
|
539
|
+
startedAt: this.isReady ? /* @__PURE__ */ new Date() : null
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
/**
|
|
543
|
+
* Get Chrome memory usage via CDP Performance metrics
|
|
544
|
+
* Returns memory in bytes or null if unavailable
|
|
545
|
+
*/
|
|
546
|
+
async getMemoryUsage() {
|
|
547
|
+
if (!(this.cdp && this.isReady)) return null;
|
|
548
|
+
try {
|
|
549
|
+
await this.cdp.send("Performance.enable");
|
|
550
|
+
const { metrics } = await this.cdp.send("Performance.getMetrics");
|
|
551
|
+
return {
|
|
552
|
+
jsHeapUsed: metrics.find((m) => m.name === "JSHeapUsedSize")?.value ?? 0,
|
|
553
|
+
jsHeapTotal: metrics.find((m) => m.name === "JSHeapTotalSize")?.value ?? 0
|
|
554
|
+
};
|
|
555
|
+
} catch {
|
|
556
|
+
return null;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
/**
|
|
560
|
+
* Check memory usage and auto-cleanup if threshold exceeded
|
|
561
|
+
* @param thresholdGB Memory threshold in GB (default: 8)
|
|
562
|
+
* @returns true if cleanup was performed
|
|
563
|
+
*/
|
|
564
|
+
async checkMemoryAndCleanup(thresholdGB = 8) {
|
|
565
|
+
const mem = await this.getMemoryUsage();
|
|
566
|
+
if (!mem) return false;
|
|
567
|
+
const usedGB = mem.jsHeapUsed / 1024 ** 3;
|
|
568
|
+
if (usedGB > thresholdGB) {
|
|
569
|
+
console.warn(`[Gerbil] Memory usage high (${usedGB.toFixed(1)}GB > ${thresholdGB}GB), clearing KV cache...`);
|
|
570
|
+
await this.reset();
|
|
571
|
+
return true;
|
|
572
|
+
}
|
|
573
|
+
return false;
|
|
574
|
+
}
|
|
575
|
+
/**
|
|
576
|
+
* Get memory usage in a human-readable format
|
|
577
|
+
*/
|
|
578
|
+
async getMemoryStats() {
|
|
579
|
+
const mem = await this.getMemoryUsage();
|
|
580
|
+
if (!mem) return null;
|
|
581
|
+
return {
|
|
582
|
+
usedGB: mem.jsHeapUsed / 1024 ** 3,
|
|
583
|
+
totalGB: mem.jsHeapTotal / 1024 ** 3,
|
|
584
|
+
usedPercent: mem.jsHeapUsed / mem.jsHeapTotal * 100
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
/**
|
|
588
|
+
* Generate text with streaming
|
|
589
|
+
*/
|
|
590
|
+
async generate(prompt, options = {}) {
|
|
591
|
+
if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
|
|
592
|
+
const messages = [{
|
|
593
|
+
role: "system",
|
|
594
|
+
content: options.system || "You are a helpful assistant."
|
|
595
|
+
}, {
|
|
596
|
+
role: "user",
|
|
597
|
+
content: prompt
|
|
598
|
+
}];
|
|
599
|
+
const genOptions = {
|
|
600
|
+
maxTokens: options.maxTokens ?? 256,
|
|
601
|
+
temperature: options.temperature ?? .7,
|
|
602
|
+
topP: options.topP ?? .9,
|
|
603
|
+
topK: options.topK ?? 20,
|
|
604
|
+
thinking: options.thinking ?? false
|
|
605
|
+
};
|
|
606
|
+
if (options.onToken) this.messageHandlers.set("token", options.onToken);
|
|
607
|
+
try {
|
|
608
|
+
const resultPromise = this.page.evaluate((msgs, opts) => window.gerbilGenerate(msgs, opts), messages, genOptions);
|
|
609
|
+
const completeData = await this.waitForMessage("complete", 6e5);
|
|
610
|
+
this.messageHandlers.delete("token");
|
|
611
|
+
await resultPromise;
|
|
612
|
+
return completeData.text || "";
|
|
613
|
+
} catch (err) {
|
|
614
|
+
if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
|
|
615
|
+
throw err;
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
/**
|
|
619
|
+
* Interrupt current generation
|
|
620
|
+
*/
|
|
621
|
+
async interrupt() {
|
|
622
|
+
if (this.page) await this.page.evaluate("window.gerbilInterrupt()");
|
|
623
|
+
}
|
|
624
|
+
/**
|
|
625
|
+
* Reset conversation cache
|
|
626
|
+
*/
|
|
627
|
+
async reset() {
|
|
628
|
+
if (this.page) await this.page.evaluate("window.gerbilReset()");
|
|
629
|
+
}
|
|
630
|
+
/**
|
|
631
|
+
* Check if backend is ready
|
|
632
|
+
*/
|
|
633
|
+
ready() {
|
|
634
|
+
return this.isReady;
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* Start or reuse the global HTTP server
|
|
638
|
+
* Uses singleton pattern to prevent killing our own server
|
|
639
|
+
*/
|
|
640
|
+
async startServer(html) {
|
|
641
|
+
if (globalServer && globalServerPort) {
|
|
642
|
+
this.server = globalServer;
|
|
643
|
+
this.serverPort = globalServerPort;
|
|
644
|
+
return;
|
|
645
|
+
}
|
|
646
|
+
return new Promise((resolve, reject) => {
|
|
647
|
+
const server = createServer((_req, res) => {
|
|
648
|
+
res.writeHead(200, { "Content-Type": "text/html" });
|
|
649
|
+
res.end(html);
|
|
650
|
+
});
|
|
651
|
+
server.on("error", (err) => {
|
|
652
|
+
if (err.code === "EADDRINUSE") {
|
|
653
|
+
this.serverPort = GERBIL_LOCAL_PORT;
|
|
654
|
+
globalServerPort = GERBIL_LOCAL_PORT;
|
|
655
|
+
resolve();
|
|
656
|
+
} else reject(err);
|
|
657
|
+
});
|
|
658
|
+
server.listen(GERBIL_LOCAL_PORT, "127.0.0.1", () => {
|
|
659
|
+
this.server = server;
|
|
660
|
+
this.serverPort = GERBIL_LOCAL_PORT;
|
|
661
|
+
globalServer = server;
|
|
662
|
+
globalServerPort = GERBIL_LOCAL_PORT;
|
|
663
|
+
resolve();
|
|
664
|
+
});
|
|
665
|
+
});
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Dispose of the backend and clean up
|
|
669
|
+
* Note: We keep the shared browser running for other backends
|
|
670
|
+
*/
|
|
671
|
+
async dispose() {
|
|
672
|
+
this.isReady = false;
|
|
673
|
+
this.pendingRejects = [];
|
|
674
|
+
this.messageHandlers.clear();
|
|
675
|
+
if (this.page) {
|
|
676
|
+
try {
|
|
677
|
+
await this.page.close();
|
|
678
|
+
activePagesCount = Math.max(0, activePagesCount - 1);
|
|
679
|
+
} catch {}
|
|
680
|
+
this.page = null;
|
|
681
|
+
}
|
|
682
|
+
this.cdp = null;
|
|
683
|
+
this.browser = null;
|
|
684
|
+
this.server = null;
|
|
685
|
+
}
|
|
686
|
+
/**
|
|
687
|
+
* Reject all pending waits (called on browser disconnect or dispose)
|
|
688
|
+
*/
|
|
689
|
+
rejectPendingWaits(error) {
|
|
690
|
+
for (const reject of this.pendingRejects) reject(error);
|
|
691
|
+
this.pendingRejects = [];
|
|
692
|
+
this.messageHandlers.clear();
|
|
693
|
+
}
|
|
694
|
+
/**
|
|
695
|
+
* Clear the model cache (forces re-download on next start)
|
|
696
|
+
*/
|
|
697
|
+
static clearCache() {
|
|
698
|
+
if (existsSync(GERBIL_CACHE_DIR)) rmSync(GERBIL_CACHE_DIR, {
|
|
699
|
+
recursive: true,
|
|
700
|
+
force: true
|
|
701
|
+
});
|
|
702
|
+
}
|
|
703
|
+
/**
|
|
704
|
+
* Get the number of active Chrome pages
|
|
705
|
+
*/
|
|
706
|
+
static getActivePageCount() {
|
|
707
|
+
return activePagesCount;
|
|
708
|
+
}
|
|
709
|
+
/**
|
|
710
|
+
* Get memory usage info for all active pages
|
|
711
|
+
*/
|
|
712
|
+
static getMemoryInfo() {
|
|
713
|
+
return {
|
|
714
|
+
activePagesCount,
|
|
715
|
+
maxPages: MAX_CONCURRENT_PAGES
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
/**
|
|
719
|
+
* Gracefully close the shared browser (call on process exit)
|
|
720
|
+
*/
|
|
721
|
+
static async closeSharedBrowser() {
|
|
722
|
+
if (globalBrowser) {
|
|
723
|
+
try {
|
|
724
|
+
await globalBrowser.close();
|
|
725
|
+
} catch {}
|
|
726
|
+
globalBrowser = null;
|
|
727
|
+
globalBrowserPromise = null;
|
|
728
|
+
}
|
|
729
|
+
if (globalServer) {
|
|
730
|
+
globalServer.close();
|
|
731
|
+
globalServer = null;
|
|
732
|
+
globalServerPort = 0;
|
|
733
|
+
}
|
|
734
|
+
activePagesCount = 0;
|
|
735
|
+
try {
|
|
736
|
+
unlinkSync(WS_ENDPOINT_FILE);
|
|
737
|
+
} catch {}
|
|
738
|
+
}
|
|
739
|
+
};
|
|
740
|
+
let cleanupRegistered = false;
|
|
741
|
+
function registerCleanup() {
|
|
742
|
+
if (cleanupRegistered) return;
|
|
743
|
+
cleanupRegistered = true;
|
|
744
|
+
const cleanup = () => {
|
|
745
|
+
if (globalBrowser) {
|
|
746
|
+
try {
|
|
747
|
+
const browserProcess = globalBrowser.process();
|
|
748
|
+
if (browserProcess) browserProcess.kill("SIGTERM");
|
|
749
|
+
} catch {}
|
|
750
|
+
globalBrowser = null;
|
|
751
|
+
}
|
|
752
|
+
if (globalServer) {
|
|
753
|
+
globalServer.close();
|
|
754
|
+
globalServer = null;
|
|
755
|
+
}
|
|
756
|
+
};
|
|
757
|
+
process.on("exit", cleanup);
|
|
758
|
+
process.on("SIGINT", () => {
|
|
759
|
+
cleanup();
|
|
760
|
+
process.exit(0);
|
|
761
|
+
});
|
|
762
|
+
process.on("SIGTERM", () => {
|
|
763
|
+
cleanup();
|
|
764
|
+
process.exit(0);
|
|
765
|
+
});
|
|
766
|
+
}
|
|
767
|
+
registerCleanup();
|
|
768
|
+
|
|
769
|
+
//#endregion
|
|
770
|
+
export { trackCachedModel as i, getChromeCachedModels as n, refreshCachedModelSizes as r, ChromeGPUBackend as t };
|
|
771
|
+
//# sourceMappingURL=chrome-backend-C5Un08O4.mjs.map
|