@tryhamster/gerbil 1.0.0-rc.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +23 -0
  2. package/README.md +253 -0
  3. package/bin/cli.js +2 -0
  4. package/dist/auto-update-BbNHbSU1.mjs +3 -0
  5. package/dist/browser/index.d.mts +262 -0
  6. package/dist/browser/index.d.mts.map +1 -0
  7. package/dist/browser/index.mjs +755 -0
  8. package/dist/browser/index.mjs.map +1 -0
  9. package/dist/chrome-backend-C5Un08O4.mjs +771 -0
  10. package/dist/chrome-backend-C5Un08O4.mjs.map +1 -0
  11. package/dist/chrome-backend-CtwPENIW.mjs +3 -0
  12. package/dist/chunk-Ct1HF2bE.mjs +7 -0
  13. package/dist/cli.d.mts +1 -0
  14. package/dist/cli.mjs +7078 -0
  15. package/dist/cli.mjs.map +1 -0
  16. package/dist/frameworks/express.d.mts +22 -0
  17. package/dist/frameworks/express.d.mts.map +1 -0
  18. package/dist/frameworks/express.mjs +123 -0
  19. package/dist/frameworks/express.mjs.map +1 -0
  20. package/dist/frameworks/fastify.d.mts +11 -0
  21. package/dist/frameworks/fastify.d.mts.map +1 -0
  22. package/dist/frameworks/fastify.mjs +73 -0
  23. package/dist/frameworks/fastify.mjs.map +1 -0
  24. package/dist/frameworks/hono.d.mts +14 -0
  25. package/dist/frameworks/hono.d.mts.map +1 -0
  26. package/dist/frameworks/hono.mjs +82 -0
  27. package/dist/frameworks/hono.mjs.map +1 -0
  28. package/dist/frameworks/next.d.mts +31 -0
  29. package/dist/frameworks/next.d.mts.map +1 -0
  30. package/dist/frameworks/next.mjs +116 -0
  31. package/dist/frameworks/next.mjs.map +1 -0
  32. package/dist/frameworks/react.d.mts +56 -0
  33. package/dist/frameworks/react.d.mts.map +1 -0
  34. package/dist/frameworks/react.mjs +172 -0
  35. package/dist/frameworks/react.mjs.map +1 -0
  36. package/dist/frameworks/trpc.d.mts +12 -0
  37. package/dist/frameworks/trpc.d.mts.map +1 -0
  38. package/dist/frameworks/trpc.mjs +80 -0
  39. package/dist/frameworks/trpc.mjs.map +1 -0
  40. package/dist/gerbil-BfnsFWRE.mjs +644 -0
  41. package/dist/gerbil-BfnsFWRE.mjs.map +1 -0
  42. package/dist/gerbil-BjW-z7Fq.mjs +5 -0
  43. package/dist/gerbil-DZ1k3ChC.d.mts +138 -0
  44. package/dist/gerbil-DZ1k3ChC.d.mts.map +1 -0
  45. package/dist/index.d.mts +223 -0
  46. package/dist/index.d.mts.map +1 -0
  47. package/dist/index.mjs +13 -0
  48. package/dist/index.mjs.map +1 -0
  49. package/dist/integrations/ai-sdk.d.mts +78 -0
  50. package/dist/integrations/ai-sdk.d.mts.map +1 -0
  51. package/dist/integrations/ai-sdk.mjs +199 -0
  52. package/dist/integrations/ai-sdk.mjs.map +1 -0
  53. package/dist/integrations/langchain.d.mts +41 -0
  54. package/dist/integrations/langchain.d.mts.map +1 -0
  55. package/dist/integrations/langchain.mjs +93 -0
  56. package/dist/integrations/langchain.mjs.map +1 -0
  57. package/dist/integrations/llamaindex.d.mts +45 -0
  58. package/dist/integrations/llamaindex.d.mts.map +1 -0
  59. package/dist/integrations/llamaindex.mjs +86 -0
  60. package/dist/integrations/llamaindex.mjs.map +1 -0
  61. package/dist/integrations/mcp-client.d.mts +206 -0
  62. package/dist/integrations/mcp-client.d.mts.map +1 -0
  63. package/dist/integrations/mcp-client.mjs +507 -0
  64. package/dist/integrations/mcp-client.mjs.map +1 -0
  65. package/dist/integrations/mcp.d.mts +177 -0
  66. package/dist/integrations/mcp.d.mts.map +1 -0
  67. package/dist/integrations/mcp.mjs +8 -0
  68. package/dist/mcp-R8kRLIKb.mjs +348 -0
  69. package/dist/mcp-R8kRLIKb.mjs.map +1 -0
  70. package/dist/models-DKULvhOr.mjs +136 -0
  71. package/dist/models-DKULvhOr.mjs.map +1 -0
  72. package/dist/models-De2-_GmQ.d.mts +22 -0
  73. package/dist/models-De2-_GmQ.d.mts.map +1 -0
  74. package/dist/one-liner-BUQR0nqq.mjs +98 -0
  75. package/dist/one-liner-BUQR0nqq.mjs.map +1 -0
  76. package/dist/skills/index.d.mts +390 -0
  77. package/dist/skills/index.d.mts.map +1 -0
  78. package/dist/skills/index.mjs +7 -0
  79. package/dist/skills-D3CEpgDc.mjs +630 -0
  80. package/dist/skills-D3CEpgDc.mjs.map +1 -0
  81. package/dist/tools-BsiEE6f2.mjs +567 -0
  82. package/dist/tools-BsiEE6f2.mjs.map +1 -0
  83. package/dist/types-BS1N92Jt.d.mts +183 -0
  84. package/dist/types-BS1N92Jt.d.mts.map +1 -0
  85. package/dist/utils-7vXqtq2Q.mjs +63 -0
  86. package/dist/utils-7vXqtq2Q.mjs.map +1 -0
  87. package/docs/ai-sdk.md +80 -0
  88. package/docs/architecture/README.md +84 -0
  89. package/docs/architecture/caching.md +227 -0
  90. package/docs/architecture/inference.md +176 -0
  91. package/docs/architecture/overview.md +179 -0
  92. package/docs/architecture/streaming.md +261 -0
  93. package/docs/architecture/webgpu.md +213 -0
  94. package/docs/browser.md +328 -0
  95. package/docs/cli.md +155 -0
  96. package/docs/frameworks.md +90 -0
  97. package/docs/mcp-client.md +224 -0
  98. package/docs/mcp.md +109 -0
  99. package/docs/memory.md +229 -0
  100. package/docs/repl.md +473 -0
  101. package/docs/skills.md +261 -0
  102. package/docs/tools.md +304 -0
  103. package/package.json +207 -0
@@ -0,0 +1,771 @@
1
+ import { execSync } from "child_process";
2
+ import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from "fs";
3
+ import { createServer } from "http";
4
+ import { homedir } from "os";
5
+ import { join } from "path";
6
+ import puppeteer from "puppeteer-core";
7
+
8
+ //#region src/core/chrome-backend.ts
9
+ /**
10
+ * Chrome DevTools Protocol Backend for WebGPU Inference
11
+ *
12
+ * Uses headless Chrome as a WebGPU accelerator for Node.js environments.
13
+ * Provides the same performance as browser inference (~100+ tok/s with q4f16).
14
+ */
15
+ const GERBIL_CACHE_DIR = join(homedir(), ".gerbil", "chrome-cache");
16
+ const WS_ENDPOINT_FILE = join(GERBIL_CACHE_DIR, "ws-endpoint.txt");
17
+ const CACHED_MODELS_FILE = join(homedir(), ".gerbil", "cached-models.json");
18
+ /** Get list of models cached in Chrome's IndexedDB */
19
+ function getChromeCachedModels() {
20
+ try {
21
+ if (!existsSync(CACHED_MODELS_FILE)) return [];
22
+ return JSON.parse(readFileSync(CACHED_MODELS_FILE, "utf-8")).models || [];
23
+ } catch {
24
+ return [];
25
+ }
26
+ }
27
+ /** Fetch model size from HuggingFace API */
28
+ async function fetchModelSize(modelId) {
29
+ try {
30
+ const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main/onnx`);
31
+ if (treeRes.ok) {
32
+ const files = await treeRes.json();
33
+ const q4f16 = files.find((f) => f.path.includes("q4f16") && f.path.endsWith(".onnx"));
34
+ const q4 = files.find((f) => f.path.includes("q4") && !f.path.includes("f16") && f.path.endsWith(".onnx"));
35
+ const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
36
+ const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
37
+ const bestFile = q4f16 || q4 || fp16 || anyOnnx;
38
+ if (bestFile?.size) return bestFile.size;
39
+ }
40
+ const res = await fetch(`https://huggingface.co/api/models/${modelId}`);
41
+ if (res.ok) return (await res.json()).usedStorage;
42
+ } catch {}
43
+ }
44
+ /** Track a model as cached */
45
+ function trackCachedModel(modelId, sizeBytes) {
46
+ try {
47
+ const dir = join(homedir(), ".gerbil");
48
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
49
+ const models = getChromeCachedModels();
50
+ const existing = models.find((m) => m.modelId === modelId);
51
+ const now = (/* @__PURE__ */ new Date()).toISOString();
52
+ if (existing) {
53
+ existing.lastUsed = now;
54
+ if (sizeBytes) existing.sizeBytes = sizeBytes;
55
+ } else models.push({
56
+ modelId,
57
+ downloadedAt: now,
58
+ lastUsed: now,
59
+ sizeBytes
60
+ });
61
+ writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
62
+ if (!sizeBytes) fetchModelSize(modelId).then((size) => {
63
+ if (size) {
64
+ const updatedModels = getChromeCachedModels();
65
+ const model = updatedModels.find((m) => m.modelId === modelId);
66
+ if (model) {
67
+ model.sizeBytes = size;
68
+ writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models: updatedModels }, null, 2));
69
+ }
70
+ }
71
+ }).catch(() => {});
72
+ } catch {}
73
+ }
74
+ /** Refresh sizes for cached models that don't have them */
75
+ async function refreshCachedModelSizes() {
76
+ try {
77
+ const models = getChromeCachedModels();
78
+ const needsSize = models.filter((m) => !m.sizeBytes);
79
+ if (needsSize.length === 0) return;
80
+ const batchSize = 3;
81
+ for (let i = 0; i < needsSize.length; i += batchSize) {
82
+ const batch = needsSize.slice(i, i + batchSize);
83
+ await Promise.all(batch.map(async (model) => {
84
+ const size = await fetchModelSize(model.modelId);
85
+ if (size) model.sizeBytes = size;
86
+ }));
87
+ }
88
+ writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
89
+ } catch {}
90
+ }
91
+ const GERBIL_LOCAL_PORT = 43724;
92
+ let globalBrowser = null;
93
+ let globalBrowserPromise = null;
94
+ let globalServer = null;
95
+ let globalServerPort = 0;
96
+ let activePagesCount = 0;
97
+ const MAX_CONCURRENT_PAGES = 5;
98
+ const CHROME_PATHS = {
99
+ darwin: [
100
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
101
+ "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
102
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
103
+ "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
104
+ "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
105
+ ],
106
+ linux: [
107
+ "google-chrome-stable",
108
+ "google-chrome",
109
+ "chromium-browser",
110
+ "chromium",
111
+ "microsoft-edge",
112
+ "brave-browser"
113
+ ],
114
+ win32: [
115
+ "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
116
+ "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
117
+ `${process.env.LOCALAPPDATA}\\Google\\Chrome\\Application\\chrome.exe`,
118
+ "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
119
+ "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
120
+ ]
121
+ };
122
+ function findChrome() {
123
+ if (process.env.CHROME_PATH) return process.env.CHROME_PATH;
124
+ const platform = process.platform;
125
+ const paths = CHROME_PATHS[platform] || [];
126
+ for (const p of paths) try {
127
+ if (platform === "linux") {
128
+ execSync(`which ${p}`, { stdio: "ignore" });
129
+ return p;
130
+ }
131
+ if (existsSync(p)) return p;
132
+ } catch {}
133
+ throw new Error("Chrome not found. Install Chrome or set CHROME_PATH environment variable.");
134
+ }
135
+ function getChromeFlags(userDataDir, _debuggingPort) {
136
+ const flags = ["--no-sandbox", `--user-data-dir=${userDataDir}`];
137
+ if (process.platform === "linux") flags.push("--enable-unsafe-webgpu", "--enable-features=Vulkan", "--use-angle=vulkan", "--disable-vulkan-surface");
138
+ else if (process.platform === "darwin") {} else flags.push("--enable-unsafe-webgpu");
139
+ return flags;
140
+ }
141
+ function getWorkerPageHTML(modelPath) {
142
+ return `
143
+ <!DOCTYPE html>
144
+ <html>
145
+ <head>
146
+ <title>Gerbil WebGPU Backend</title>
147
+ <script type="module">
148
+ import {
149
+ AutoTokenizer,
150
+ AutoModelForCausalLM,
151
+ TextStreamer,
152
+ InterruptableStoppingCriteria,
153
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0";
154
+
155
+ class ModelPipeline {
156
+ static tokenizer = null;
157
+ static model = null;
158
+ static modelId = "${modelPath}";
159
+
160
+ static async getInstance(progressCallback) {
161
+ if (!this.tokenizer) {
162
+ this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId, {
163
+ progress_callback: progressCallback,
164
+ });
165
+ }
166
+
167
+ if (!this.model) {
168
+ this.model = await AutoModelForCausalLM.from_pretrained(this.modelId, {
169
+ dtype: "q4f16",
170
+ device: "webgpu",
171
+ progress_callback: progressCallback,
172
+ });
173
+ }
174
+
175
+ return { tokenizer: this.tokenizer, model: this.model };
176
+ }
177
+ }
178
+
179
+ const stoppingCriteria = new InterruptableStoppingCriteria();
180
+ let pastKeyValuesCache = null;
181
+ let totalTokensInCache = 0;
182
+
183
+ // Context length for auto-reset (Qwen3 default: 2048)
184
+ // Cache beyond this provides no benefit and wastes memory
185
+ const CONTEXT_LENGTH = 2048;
186
+
187
+ // Auto-load model on page init
188
+ (async function() {
189
+ console.log(JSON.stringify({ type: "progress", status: "Loading model..." }));
190
+
191
+ try {
192
+ const { tokenizer, model } = await ModelPipeline.getInstance((progress) => {
193
+ if (progress.status === "progress" && progress.file) {
194
+ console.log(JSON.stringify({
195
+ type: "progress",
196
+ status: "progress",
197
+ file: progress.file,
198
+ progress: Math.round(progress.progress || 0),
199
+ }));
200
+ }
201
+ });
202
+
203
+ console.log(JSON.stringify({ type: "progress", status: "Compiling shaders..." }));
204
+ const warmupInputs = tokenizer("a");
205
+ await model.generate({ ...warmupInputs, max_new_tokens: 1 });
206
+
207
+ console.log(JSON.stringify({ type: "ready" }));
208
+ } catch (error) {
209
+ console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
210
+ }
211
+ })();
212
+
213
+ window.gerbilGenerate = async function(messages, options = {}) {
214
+ const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false } = options;
215
+
216
+ // Auto-reset KV cache if it exceeds context length
217
+ // This prevents unbounded memory growth while preserving performance
218
+ if (totalTokensInCache > CONTEXT_LENGTH) {
219
+ console.log(JSON.stringify({
220
+ type: "cache_reset",
221
+ reason: "context_exceeded",
222
+ tokensInCache: totalTokensInCache,
223
+ contextLength: CONTEXT_LENGTH
224
+ }));
225
+ pastKeyValuesCache = null;
226
+ totalTokensInCache = 0;
227
+ }
228
+
229
+ try {
230
+ const { tokenizer, model } = await ModelPipeline.getInstance();
231
+
232
+ const inputs = tokenizer.apply_chat_template(messages, {
233
+ add_generation_prompt: true,
234
+ return_dict: true,
235
+ enable_thinking: thinking,
236
+ });
237
+
238
+ let state = "answering";
239
+ let prevState = "answering";
240
+ const [START_THINKING_TOKEN_ID, END_THINKING_TOKEN_ID] = tokenizer.encode(
241
+ "<think></think>",
242
+ { add_special_tokens: false }
243
+ );
244
+
245
+ let startTime = null;
246
+ let numTokens = 0;
247
+
248
+ const tokenCallback = (tokens) => {
249
+ startTime ??= performance.now();
250
+ numTokens++;
251
+
252
+ const tokenId = Number(tokens[0]);
253
+ if (tokenId === START_THINKING_TOKEN_ID) {
254
+ state = "thinking";
255
+ } else if (tokenId === END_THINKING_TOKEN_ID) {
256
+ state = "answering";
257
+ }
258
+ };
259
+
260
+ const streamCallback = (text) => {
261
+ const tps = startTime ? (numTokens / (performance.now() - startTime)) * 1000 : 0;
262
+
263
+ // Inject <think> markers when state changes (since skip_special_tokens removes them)
264
+ let outputText = text;
265
+ if (thinking) {
266
+ if (state === "thinking" && prevState !== "thinking") {
267
+ outputText = "<think>" + text;
268
+ } else if (state === "answering" && prevState === "thinking") {
269
+ outputText = "</think>" + text;
270
+ }
271
+ }
272
+ prevState = state;
273
+
274
+ console.log(JSON.stringify({ type: "token", text: outputText, state, numTokens, tps }));
275
+ };
276
+
277
+ const streamer = new TextStreamer(tokenizer, {
278
+ skip_prompt: true,
279
+ skip_special_tokens: true,
280
+ callback_function: streamCallback,
281
+ token_callback_function: tokenCallback,
282
+ });
283
+
284
+ console.log(JSON.stringify({ type: "start" }));
285
+
286
+ const { past_key_values, sequences } = await model.generate({
287
+ ...inputs,
288
+ past_key_values: pastKeyValuesCache,
289
+ do_sample: temperature > 0,
290
+ temperature: temperature > 0 ? temperature : undefined,
291
+ top_p: topP,
292
+ top_k: topK,
293
+ max_new_tokens: maxTokens,
294
+ streamer,
295
+ stopping_criteria: stoppingCriteria,
296
+ return_dict_in_generate: true,
297
+ });
298
+
299
+ pastKeyValuesCache = past_key_values;
300
+
301
+ // Track total tokens in cache (input + generated)
302
+ const inputLength = inputs.input_ids.dims[1];
303
+ totalTokensInCache += inputLength + numTokens;
304
+
305
+ const endTime = performance.now();
306
+ const totalTime = startTime ? endTime - startTime : 0;
307
+
308
+ // Extract only the generated tokens (exclude input prompt)
309
+ const generatedTokens = sequences.slice(null, [inputLength, null]);
310
+ const decoded = tokenizer.batch_decode(generatedTokens, { skip_special_tokens: true });
311
+
312
+ console.log(JSON.stringify({
313
+ type: "complete",
314
+ text: decoded[0] || "",
315
+ numTokens,
316
+ totalTime,
317
+ tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
318
+ tokensInCache: totalTokensInCache,
319
+ }));
320
+
321
+ return decoded[0] || "";
322
+ } catch (error) {
323
+ console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
324
+ throw error;
325
+ }
326
+ };
327
+
328
+ window.gerbilInterrupt = function() {
329
+ stoppingCriteria.interrupt();
330
+ };
331
+
332
+ window.gerbilReset = function() {
333
+ pastKeyValuesCache = null;
334
+ totalTokensInCache = 0;
335
+ stoppingCriteria.reset();
336
+ console.log(JSON.stringify({ type: "cache_reset", reason: "manual" }));
337
+ };
338
+
339
+ // Signal that the page is ready for commands
340
+ console.log(JSON.stringify({ type: "init" }));
341
+ <\/script>
342
+ </head>
343
+ <body>
344
+ <h1>Gerbil WebGPU Backend</h1>
345
+ <p>This page provides WebGPU inference for the Gerbil CLI.</p>
346
+ </body>
347
+ </html>
348
+ `;
349
+ }
350
+ var ChromeGPUBackend = class ChromeGPUBackend {
351
+ browser = null;
352
+ page = null;
353
+ cdp = null;
354
+ serverPort = 0;
355
+ userDataDir = GERBIL_CACHE_DIR;
356
+ modelId;
357
+ isReady = false;
358
+ messageHandlers = /* @__PURE__ */ new Map();
359
+ pendingRejects = [];
360
+ server = null;
361
+ constructor(modelId) {
362
+ this.modelId = modelId;
363
+ }
364
+ /**
365
+ * Create and initialize a Chrome GPU backend
366
+ */
367
+ static async create(options = {}) {
368
+ const backend = new ChromeGPUBackend(options.modelId || "onnx-community/Qwen3-0.6B-ONNX");
369
+ await backend.launch(options);
370
+ return backend;
371
+ }
372
+ /**
373
+ * Get existing browser or launch a new one (singleton pattern)
374
+ * Multiple Gerbil instances share the same browser process
375
+ */
376
+ async getOrCreateBrowser(chromePath, options) {
377
+ if (globalBrowser?.connected) {
378
+ options.onProgress?.({ status: "Reusing existing Chrome..." });
379
+ return globalBrowser;
380
+ }
381
+ if (globalBrowserPromise) {
382
+ options.onProgress?.({ status: "Waiting for Chrome startup..." });
383
+ return globalBrowserPromise;
384
+ }
385
+ if (existsSync(WS_ENDPOINT_FILE)) try {
386
+ const wsEndpoint = readFileSync(WS_ENDPOINT_FILE, "utf-8").trim();
387
+ options.onProgress?.({ status: "Connecting to existing Chrome..." });
388
+ globalBrowser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
389
+ return globalBrowser;
390
+ } catch {
391
+ try {
392
+ unlinkSync(WS_ENDPOINT_FILE);
393
+ } catch {}
394
+ }
395
+ globalBrowserPromise = this.launchBrowser(chromePath, options);
396
+ try {
397
+ globalBrowser = await globalBrowserPromise;
398
+ return globalBrowser;
399
+ } finally {
400
+ globalBrowserPromise = null;
401
+ }
402
+ }
403
+ /**
404
+ * Launch a new Chrome browser instance
405
+ */
406
+ async launchBrowser(chromePath, _options) {
407
+ const debuggingPort = 9222 + Math.floor(Math.random() * 1e3);
408
+ const lockFile = join(this.userDataDir, "SingletonLock");
409
+ if (existsSync(lockFile)) try {
410
+ unlinkSync(lockFile);
411
+ await new Promise((r) => setTimeout(r, 200));
412
+ } catch {}
413
+ const browser = await puppeteer.launch({
414
+ executablePath: chromePath,
415
+ headless: true,
416
+ args: [
417
+ ...getChromeFlags(this.userDataDir, debuggingPort),
418
+ "--enable-gpu",
419
+ "--no-first-run",
420
+ "--no-default-browser-check",
421
+ "--disable-background-timer-throttling",
422
+ "--disable-renderer-backgrounding",
423
+ "--disable-dev-shm-usage"
424
+ ],
425
+ handleSIGINT: false,
426
+ handleSIGTERM: false,
427
+ handleSIGHUP: false
428
+ });
429
+ writeFileSync(WS_ENDPOINT_FILE, browser.wsEndpoint());
430
+ browser.on("disconnected", () => {
431
+ globalBrowser = null;
432
+ try {
433
+ unlinkSync(WS_ENDPOINT_FILE);
434
+ } catch {}
435
+ });
436
+ return browser;
437
+ }
438
+ /**
439
+ * Launch Chrome and initialize the worker page
440
+ */
441
+ async launch(options) {
442
+ if (activePagesCount >= MAX_CONCURRENT_PAGES) throw new Error(`Maximum concurrent pages (${MAX_CONCURRENT_PAGES}) reached. Call dispose() on old Gerbil instances to free resources. Currently active: ${activePagesCount}`);
443
+ const chromePath = options.chromePath || findChrome();
444
+ this.userDataDir = GERBIL_CACHE_DIR;
445
+ if (!existsSync(this.userDataDir)) mkdirSync(this.userDataDir, { recursive: true });
446
+ const html = getWorkerPageHTML(this.modelId);
447
+ await this.startServer(html);
448
+ options.onProgress?.({ status: "Starting Chrome..." });
449
+ this.browser = await this.getOrCreateBrowser(chromePath, options);
450
+ this.page = await this.browser.newPage();
451
+ this.cdp = await this.page.createCDPSession();
452
+ activePagesCount++;
453
+ options.onProgress?.({ status: `Active pages: ${activePagesCount}/${MAX_CONCURRENT_PAGES}` });
454
+ this.browser.on("disconnected", () => {
455
+ console.error("[Chrome] Browser disconnected unexpectedly");
456
+ this.isReady = false;
457
+ this.browser = null;
458
+ this.page = null;
459
+ this.cdp = null;
460
+ this.rejectPendingWaits(/* @__PURE__ */ new Error("CHROME_DISCONNECTED"));
461
+ });
462
+ await this.cdp.send("Runtime.enable");
463
+ await this.cdp.send("Runtime.setAsyncCallStackDepth", { maxDepth: 32 });
464
+ this.cdp.on("Runtime.consoleAPICalled", (event) => {
465
+ const text = event.args.map((a) => a.value || a.description || "").join(" ");
466
+ if (event.type === "log" && event.args[0]?.value) try {
467
+ const data = JSON.parse(event.args[0].value);
468
+ this.handleMessage(data, options);
469
+ } catch {
470
+ if (text.length < 500 && !text.includes("Float32Array") && !text.includes("past_key_values")) {}
471
+ }
472
+ else if (event.type === "error" || event.type === "warning") {
473
+ if (!(text.includes("onnxruntime") || text.includes("content-length") || text.includes("Float32Array") || text.includes("past_key_values")) && text.length < 1e3) console.error(`[Chrome ${event.type}]`, text);
474
+ }
475
+ });
476
+ this.cdp.on("Runtime.exceptionThrown", (event) => {
477
+ const errText = event.exceptionDetails?.text || event.exceptionDetails?.exception?.description || "";
478
+ if (errText.includes("Float32Array") || errText.includes("past_key_values") || errText.length > 1e3) return;
479
+ console.error("[Chrome Exception]", errText);
480
+ });
481
+ await this.page.goto(`http://127.0.0.1:${this.serverPort}/`, {
482
+ waitUntil: "domcontentloaded",
483
+ timeout: 3e4
484
+ });
485
+ await this.waitForMessage("ready", 3e5);
486
+ this.isReady = true;
487
+ options.onProgress?.({ status: "Ready (WebGPU)!" });
488
+ trackCachedModel(this.modelId);
489
+ }
490
+ /**
491
+ * Handle incoming messages from the page
492
+ */
493
+ handleMessage(data, options) {
494
+ const { type, ...rest } = data;
495
+ const handler = this.messageHandlers.get(type);
496
+ if (handler) handler(rest);
497
+ if (type === "progress") options.onProgress?.(rest);
498
+ else if (type === "token") options.onToken?.(rest);
499
+ }
500
+ /**
501
+ * Wait for a specific message type
502
+ */
503
+ waitForMessage(type, timeout = 3e4) {
504
+ return new Promise((resolve, reject) => {
505
+ this.pendingRejects.push(reject);
506
+ const cleanup = () => {
507
+ clearTimeout(timer);
508
+ this.messageHandlers.delete(type);
509
+ const idx = this.pendingRejects.indexOf(reject);
510
+ if (idx >= 0) this.pendingRejects.splice(idx, 1);
511
+ };
512
+ const timer = setTimeout(() => {
513
+ cleanup();
514
+ reject(/* @__PURE__ */ new Error(`Timeout waiting for ${type} message`));
515
+ }, timeout);
516
+ this.messageHandlers.set(type, (data) => {
517
+ cleanup();
518
+ resolve(data);
519
+ });
520
+ });
521
+ }
522
+ /**
523
+ * Check if Chrome backend is still alive
524
+ */
525
+ isAlive() {
526
+ return this.isReady && this.browser !== null && this.page !== null;
527
+ }
528
+ /**
529
+ * Get Chrome backend status information
530
+ */
531
+ getStatus() {
532
+ let pid = null;
533
+ const browserProcess = this.browser?.process?.() || globalBrowser?.process?.();
534
+ if (browserProcess?.pid) pid = browserProcess.pid;
535
+ return {
536
+ pid,
537
+ port: this.serverPort || globalServerPort,
538
+ modelId: this.modelId,
539
+ startedAt: this.isReady ? /* @__PURE__ */ new Date() : null
540
+ };
541
+ }
542
+ /**
543
+ * Get Chrome memory usage via CDP Performance metrics
544
+ * Returns memory in bytes or null if unavailable
545
+ */
546
+ async getMemoryUsage() {
547
+ if (!(this.cdp && this.isReady)) return null;
548
+ try {
549
+ await this.cdp.send("Performance.enable");
550
+ const { metrics } = await this.cdp.send("Performance.getMetrics");
551
+ return {
552
+ jsHeapUsed: metrics.find((m) => m.name === "JSHeapUsedSize")?.value ?? 0,
553
+ jsHeapTotal: metrics.find((m) => m.name === "JSHeapTotalSize")?.value ?? 0
554
+ };
555
+ } catch {
556
+ return null;
557
+ }
558
+ }
559
+ /**
560
+ * Check memory usage and auto-cleanup if threshold exceeded
561
+ * @param thresholdGB Memory threshold in GB (default: 8)
562
+ * @returns true if cleanup was performed
563
+ */
564
+ async checkMemoryAndCleanup(thresholdGB = 8) {
565
+ const mem = await this.getMemoryUsage();
566
+ if (!mem) return false;
567
+ const usedGB = mem.jsHeapUsed / 1024 ** 3;
568
+ if (usedGB > thresholdGB) {
569
+ console.warn(`[Gerbil] Memory usage high (${usedGB.toFixed(1)}GB > ${thresholdGB}GB), clearing KV cache...`);
570
+ await this.reset();
571
+ return true;
572
+ }
573
+ return false;
574
+ }
575
+ /**
576
+ * Get memory usage in a human-readable format
577
+ */
578
+ async getMemoryStats() {
579
+ const mem = await this.getMemoryUsage();
580
+ if (!mem) return null;
581
+ return {
582
+ usedGB: mem.jsHeapUsed / 1024 ** 3,
583
+ totalGB: mem.jsHeapTotal / 1024 ** 3,
584
+ usedPercent: mem.jsHeapUsed / mem.jsHeapTotal * 100
585
+ };
586
+ }
587
+ /**
588
+ * Generate text with streaming
589
+ */
590
+ async generate(prompt, options = {}) {
591
+ if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
592
+ const messages = [{
593
+ role: "system",
594
+ content: options.system || "You are a helpful assistant."
595
+ }, {
596
+ role: "user",
597
+ content: prompt
598
+ }];
599
+ const genOptions = {
600
+ maxTokens: options.maxTokens ?? 256,
601
+ temperature: options.temperature ?? .7,
602
+ topP: options.topP ?? .9,
603
+ topK: options.topK ?? 20,
604
+ thinking: options.thinking ?? false
605
+ };
606
+ if (options.onToken) this.messageHandlers.set("token", options.onToken);
607
+ try {
608
+ const resultPromise = this.page.evaluate((msgs, opts) => window.gerbilGenerate(msgs, opts), messages, genOptions);
609
+ const completeData = await this.waitForMessage("complete", 6e5);
610
+ this.messageHandlers.delete("token");
611
+ await resultPromise;
612
+ return completeData.text || "";
613
+ } catch (err) {
614
+ if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
615
+ throw err;
616
+ }
617
+ }
618
+ /**
619
+ * Interrupt current generation
620
+ */
621
+ async interrupt() {
622
+ if (this.page) await this.page.evaluate("window.gerbilInterrupt()");
623
+ }
624
+ /**
625
+ * Reset conversation cache
626
+ */
627
+ async reset() {
628
+ if (this.page) await this.page.evaluate("window.gerbilReset()");
629
+ }
630
+ /**
631
+ * Check if backend is ready
632
+ */
633
+ ready() {
634
+ return this.isReady;
635
+ }
636
+ /**
637
+ * Start or reuse the global HTTP server
638
+ * Uses singleton pattern to prevent killing our own server
639
+ */
640
+ async startServer(html) {
641
+ if (globalServer && globalServerPort) {
642
+ this.server = globalServer;
643
+ this.serverPort = globalServerPort;
644
+ return;
645
+ }
646
+ return new Promise((resolve, reject) => {
647
+ const server = createServer((_req, res) => {
648
+ res.writeHead(200, { "Content-Type": "text/html" });
649
+ res.end(html);
650
+ });
651
+ server.on("error", (err) => {
652
+ if (err.code === "EADDRINUSE") {
653
+ this.serverPort = GERBIL_LOCAL_PORT;
654
+ globalServerPort = GERBIL_LOCAL_PORT;
655
+ resolve();
656
+ } else reject(err);
657
+ });
658
+ server.listen(GERBIL_LOCAL_PORT, "127.0.0.1", () => {
659
+ this.server = server;
660
+ this.serverPort = GERBIL_LOCAL_PORT;
661
+ globalServer = server;
662
+ globalServerPort = GERBIL_LOCAL_PORT;
663
+ resolve();
664
+ });
665
+ });
666
+ }
667
+ /**
668
+ * Dispose of the backend and clean up
669
+ * Note: We keep the shared browser running for other backends
670
+ */
671
+ async dispose() {
672
+ this.isReady = false;
673
+ this.pendingRejects = [];
674
+ this.messageHandlers.clear();
675
+ if (this.page) {
676
+ try {
677
+ await this.page.close();
678
+ activePagesCount = Math.max(0, activePagesCount - 1);
679
+ } catch {}
680
+ this.page = null;
681
+ }
682
+ this.cdp = null;
683
+ this.browser = null;
684
+ this.server = null;
685
+ }
686
+ /**
687
+ * Reject all pending waits (called on browser disconnect or dispose)
688
+ */
689
+ rejectPendingWaits(error) {
690
+ for (const reject of this.pendingRejects) reject(error);
691
+ this.pendingRejects = [];
692
+ this.messageHandlers.clear();
693
+ }
694
+ /**
695
+ * Clear the model cache (forces re-download on next start)
696
+ */
697
+ static clearCache() {
698
+ if (existsSync(GERBIL_CACHE_DIR)) rmSync(GERBIL_CACHE_DIR, {
699
+ recursive: true,
700
+ force: true
701
+ });
702
+ }
703
+ /**
704
+ * Get the number of active Chrome pages
705
+ */
706
+ static getActivePageCount() {
707
+ return activePagesCount;
708
+ }
709
+ /**
710
+ * Get memory usage info for all active pages
711
+ */
712
+ static getMemoryInfo() {
713
+ return {
714
+ activePagesCount,
715
+ maxPages: MAX_CONCURRENT_PAGES
716
+ };
717
+ }
718
+ /**
719
+ * Gracefully close the shared browser (call on process exit)
720
+ */
721
+ static async closeSharedBrowser() {
722
+ if (globalBrowser) {
723
+ try {
724
+ await globalBrowser.close();
725
+ } catch {}
726
+ globalBrowser = null;
727
+ globalBrowserPromise = null;
728
+ }
729
+ if (globalServer) {
730
+ globalServer.close();
731
+ globalServer = null;
732
+ globalServerPort = 0;
733
+ }
734
+ activePagesCount = 0;
735
+ try {
736
+ unlinkSync(WS_ENDPOINT_FILE);
737
+ } catch {}
738
+ }
739
+ };
740
+ let cleanupRegistered = false;
741
+ function registerCleanup() {
742
+ if (cleanupRegistered) return;
743
+ cleanupRegistered = true;
744
+ const cleanup = () => {
745
+ if (globalBrowser) {
746
+ try {
747
+ const browserProcess = globalBrowser.process();
748
+ if (browserProcess) browserProcess.kill("SIGTERM");
749
+ } catch {}
750
+ globalBrowser = null;
751
+ }
752
+ if (globalServer) {
753
+ globalServer.close();
754
+ globalServer = null;
755
+ }
756
+ };
757
+ process.on("exit", cleanup);
758
+ process.on("SIGINT", () => {
759
+ cleanup();
760
+ process.exit(0);
761
+ });
762
+ process.on("SIGTERM", () => {
763
+ cleanup();
764
+ process.exit(0);
765
+ });
766
+ }
767
+ registerCleanup();
768
+
769
+ //#endregion
770
+ export { trackCachedModel as i, getChromeCachedModels as n, refreshCachedModelSizes as r, ChromeGPUBackend as t };
771
+ //# sourceMappingURL=chrome-backend-C5Un08O4.mjs.map