@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +318 -104
  3. package/dist/architectures-C1I5V3Dt.mjs +6070 -0
  4. package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
  5. package/dist/browser/index.d.ts +276 -590
  6. package/dist/browser/index.d.ts.map +1 -1
  7. package/dist/browser/index.js +592 -2334
  8. package/dist/browser/index.js.map +1 -1
  9. package/dist/cli.mjs +625 -1098
  10. package/dist/cli.mjs.map +1 -1
  11. package/dist/defaults-9komdrbY.mjs +24 -0
  12. package/dist/defaults-9komdrbY.mjs.map +1 -0
  13. package/dist/frameworks/express.d.mts +1 -3
  14. package/dist/frameworks/express.d.mts.map +1 -1
  15. package/dist/frameworks/express.mjs +7 -7
  16. package/dist/frameworks/express.mjs.map +1 -1
  17. package/dist/frameworks/fastify.d.mts +1 -1
  18. package/dist/frameworks/fastify.d.mts.map +1 -1
  19. package/dist/frameworks/fastify.mjs +3 -3
  20. package/dist/frameworks/fastify.mjs.map +1 -1
  21. package/dist/frameworks/hono.d.mts +1 -1
  22. package/dist/frameworks/hono.d.mts.map +1 -1
  23. package/dist/frameworks/hono.mjs +4 -4
  24. package/dist/frameworks/hono.mjs.map +1 -1
  25. package/dist/frameworks/next.d.mts +3 -2
  26. package/dist/frameworks/next.d.mts.map +1 -1
  27. package/dist/frameworks/next.mjs +4 -4
  28. package/dist/frameworks/next.mjs.map +1 -1
  29. package/dist/frameworks/react.d.mts +1 -1
  30. package/dist/frameworks/trpc.d.mts +1 -1
  31. package/dist/frameworks/trpc.d.mts.map +1 -1
  32. package/dist/frameworks/trpc.mjs +4 -4
  33. package/dist/frameworks/trpc.mjs.map +1 -1
  34. package/dist/gerbil-BetB5xb0.d.mts +488 -0
  35. package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
  36. package/dist/gerbil-CTZUa8EZ.mjs +4 -0
  37. package/dist/gerbil-DNniplr4.mjs +1656 -0
  38. package/dist/gerbil-DNniplr4.mjs.map +1 -0
  39. package/dist/gpu/hooks.d.mts +640 -0
  40. package/dist/gpu/hooks.d.mts.map +1 -0
  41. package/dist/gpu/hooks.mjs +1369 -0
  42. package/dist/gpu/hooks.mjs.map +1 -0
  43. package/dist/gpu/index.d.mts +2 -0
  44. package/dist/gpu/index.mjs +6 -0
  45. package/dist/gpu-DFuglcEx.mjs +3790 -0
  46. package/dist/gpu-DFuglcEx.mjs.map +1 -0
  47. package/dist/index-Dgmb2kE3.d.mts +245 -0
  48. package/dist/index-Dgmb2kE3.d.mts.map +1 -0
  49. package/dist/index-DukkJRMj.d.mts +2114 -0
  50. package/dist/index-DukkJRMj.d.mts.map +1 -0
  51. package/dist/index.d.mts +22 -487
  52. package/dist/index.d.mts.map +1 -1
  53. package/dist/index.mjs +13 -8
  54. package/dist/index.mjs.map +1 -1
  55. package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
  56. package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
  57. package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
  58. package/dist/integrations/ai-sdk.d.mts +75 -6
  59. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  60. package/dist/integrations/ai-sdk.mjs +131 -15
  61. package/dist/integrations/ai-sdk.mjs.map +1 -1
  62. package/dist/integrations/langchain.d.mts +1 -1
  63. package/dist/integrations/langchain.d.mts.map +1 -1
  64. package/dist/integrations/langchain.mjs +5 -5
  65. package/dist/integrations/langchain.mjs.map +1 -1
  66. package/dist/integrations/llamaindex.d.mts +1 -1
  67. package/dist/integrations/llamaindex.d.mts.map +1 -1
  68. package/dist/integrations/llamaindex.mjs +5 -5
  69. package/dist/integrations/llamaindex.mjs.map +1 -1
  70. package/dist/integrations/mcp-client.mjs +3 -3
  71. package/dist/integrations/mcp-client.mjs.map +1 -1
  72. package/dist/integrations/mcp.d.mts +3 -2
  73. package/dist/integrations/mcp.d.mts.map +1 -1
  74. package/dist/integrations/mcp.mjs +5 -5
  75. package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
  76. package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
  77. package/dist/memory/index.d.mts +3 -0
  78. package/dist/memory/index.mjs +6 -0
  79. package/dist/memory-D1P7Tmda.mjs +4 -0
  80. package/dist/memory-DVN0MnIG.mjs +132 -0
  81. package/dist/memory-DVN0MnIG.mjs.map +1 -0
  82. package/dist/memory-Dj0J1v88.mjs +294 -0
  83. package/dist/memory-Dj0J1v88.mjs.map +1 -0
  84. package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
  85. package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
  86. package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
  87. package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
  88. package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
  89. package/dist/repl-BDRkwPGX.mjs +9 -0
  90. package/dist/skills/index.d.mts +270 -320
  91. package/dist/skills/index.d.mts.map +1 -1
  92. package/dist/skills/index.mjs +5 -5
  93. package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
  94. package/dist/skills-CU694Dc8.mjs.map +1 -0
  95. package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
  96. package/dist/tools-DQ1mPUw5.mjs.map +1 -0
  97. package/dist/types-DQBe2lFo.d.mts +165 -0
  98. package/dist/types-DQBe2lFo.d.mts.map +1 -0
  99. package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
  100. package/dist/types-LlyYILII.d.mts.map +1 -0
  101. package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
  102. package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
  103. package/dist/vector-B0panuy6.mjs +95 -0
  104. package/dist/vector-B0panuy6.mjs.map +1 -0
  105. package/docs/PROJECT-STATE.md +321 -0
  106. package/docs/adding-a-model-family.md +280 -0
  107. package/docs/ai-sdk.md +70 -61
  108. package/docs/architecture/overview.md +17 -7
  109. package/docs/browser.md +203 -8
  110. package/docs/embeddings.md +156 -0
  111. package/docs/gerbil-site-native-migration.md +217 -0
  112. package/docs/gpu-engine/architectures.md +398 -0
  113. package/docs/gpu-engine/ir.md +372 -0
  114. package/docs/gpu-engine/kernels.md +718 -0
  115. package/docs/gpu-engine/paper.html +1759 -0
  116. package/docs/gpu-engine/paper.md +2109 -0
  117. package/docs/gpu-engine/safetensors.md +312 -0
  118. package/docs/gpu-engine/tokenizer.md +302 -0
  119. package/docs/memory-rag.md +91 -0
  120. package/docs/metal-safari-intel.md +190 -0
  121. package/docs/mobile-failure-diagnosis.md +124 -0
  122. package/docs/mobile.md +99 -0
  123. package/docs/observability.md +230 -0
  124. package/docs/onnx-removal-plan.md +339 -0
  125. package/docs/research/autoresearch-portable.md +904 -0
  126. package/docs/research/dispatch-reduction-hivemind.md +84 -0
  127. package/docs/research/ios-safari-model-caching.md +117 -0
  128. package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
  129. package/docs/research/native-stt-model-selection.md +49 -0
  130. package/docs/research/native-tts-model-selection.md +90 -0
  131. package/docs/research/native-vs-chromium-decision.md +152 -0
  132. package/docs/research/nemotron-mamba2-inference.md +910 -0
  133. package/docs/research/qwen35-multimodal.md +293 -0
  134. package/docs/research/qwen36-gemma4-targets.md +337 -0
  135. package/docs/research/sota-embedding-models.md +179 -0
  136. package/docs/research/sota-mobile-models-2026.md +263 -0
  137. package/docs/research/sota-modality-models.md +202 -0
  138. package/docs/research/tps-baselines.md +71 -0
  139. package/docs/research/webgpu-m4-reference.md +104 -0
  140. package/docs/site-update-plan.md +155 -0
  141. package/docs/structured-output.md +123 -0
  142. package/docs/stt.md +63 -446
  143. package/docs/tts.md +77 -499
  144. package/docs/vision.md +100 -338
  145. package/package.json +22 -7
  146. package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
  147. package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
  148. package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
  149. package/dist/gerbil-CJ3ifloF.mjs +0 -4
  150. package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
  151. package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
  152. package/dist/gerbil-qOTe1nl2.d.mts +0 -431
  153. package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
  154. package/dist/kokoro-BNTb6egA.mjs +0 -20210
  155. package/dist/kokoro-BNTb6egA.mjs.map +0 -1
  156. package/dist/kokoro-CMOGDSgT.js +0 -20212
  157. package/dist/kokoro-CMOGDSgT.js.map +0 -1
  158. package/dist/mcp-BvbriaBy.mjs.map +0 -1
  159. package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
  160. package/dist/repl-DveXw36T.mjs +0 -9
  161. package/dist/skills-CD3Orlex.mjs.map +0 -1
  162. package/dist/stt-Bu-E23Sc.js +0 -433
  163. package/dist/stt-Bu-E23Sc.js.map +0 -1
  164. package/dist/stt-CpLYbGFd.mjs +0 -433
  165. package/dist/stt-CpLYbGFd.mjs.map +0 -1
  166. package/dist/stt-DRPLEEHB.mjs +0 -3
  167. package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
  168. package/dist/transformers.web-DiD1gTwk.js +0 -44695
  169. package/dist/transformers.web-DiD1gTwk.js.map +0 -1
  170. package/dist/transformers.web-u34VxRFM.js +0 -3
  171. package/dist/tts-CqroPaSK.js +0 -724
  172. package/dist/tts-CqroPaSK.js.map +0 -1
  173. package/dist/tts-DXgsKGCe.mjs +0 -3
  174. package/dist/tts-DeGANMNV.mjs +0 -730
  175. package/dist/tts-DeGANMNV.mjs.map +0 -1
  176. package/dist/types-CiTc7ez3.d.mts.map +0 -1
  177. /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
  178. /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
  179. /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
@@ -1,1212 +0,0 @@
1
- import { execSync } from "node:child_process";
2
- import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from "node:fs";
3
- import { createServer } from "node:http";
4
- import { homedir } from "node:os";
5
- import { join } from "node:path";
6
- import puppeteer from "puppeteer-core";
7
-
8
- //#region src/core/chrome-backend.ts
9
- /**
10
- * Chrome DevTools Protocol Backend for WebGPU Inference
11
- *
12
- * Uses headless Chrome as a WebGPU accelerator for Node.js environments.
13
- * Provides the same performance as browser inference (~100+ tok/s with q4f16).
14
- */
15
- const GERBIL_CACHE_DIR = join(homedir(), ".gerbil", "chrome-cache");
16
- const WS_ENDPOINT_FILE = join(GERBIL_CACHE_DIR, "ws-endpoint.txt");
17
- const CACHED_MODELS_FILE = join(homedir(), ".gerbil", "cached-models.json");
18
- /** Get list of models cached in Chrome's IndexedDB */
19
- function getChromeCachedModels() {
20
- try {
21
- if (!existsSync(CACHED_MODELS_FILE)) return [];
22
- return JSON.parse(readFileSync(CACHED_MODELS_FILE, "utf-8")).models || [];
23
- } catch {
24
- return [];
25
- }
26
- }
27
- /** Fetch model context length from HuggingFace (config.json preferred for actual limit) */
28
- async function fetchContextLength(modelId) {
29
- try {
30
- const res = await fetch(`https://huggingface.co/${modelId}/raw/main/config.json`);
31
- if (res.ok) {
32
- const config = await res.json();
33
- const textConfig = config.text_config || {};
34
- const ctxLen = config.max_position_embeddings || textConfig.max_position_embeddings || config.sliding_window || textConfig.sliding_window || config.max_seq_len || config.max_sequence_length || config.n_ctx || config.n_positions;
35
- if (ctxLen) return ctxLen;
36
- }
37
- } catch {}
38
- try {
39
- const tokRes = await fetch(`https://huggingface.co/${modelId}/raw/main/tokenizer_config.json`);
40
- if (tokRes.ok) {
41
- const tokConfig = await tokRes.json();
42
- if (tokConfig.model_max_length && tokConfig.model_max_length < 1e6) return tokConfig.model_max_length;
43
- }
44
- } catch {}
45
- }
46
- /** Get file size from HuggingFace tree entry (handles both regular and LFS files) */
47
- function getFileSize(file) {
48
- return file.lfs?.size || file.size || 0;
49
- }
50
- /** Fetch model size from HuggingFace API */
51
- async function fetchModelSize(modelId) {
52
- try {
53
- const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main/onnx`);
54
- if (treeRes.ok) {
55
- const files = await treeRes.json();
56
- const q4f16 = files.find((f) => f.path.includes("q4f16") && f.path.endsWith(".onnx"));
57
- const q4 = files.find((f) => f.path.includes("q4") && !f.path.includes("f16") && f.path.endsWith(".onnx"));
58
- const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
59
- const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
60
- const bestFile = q4f16 || q4 || fp16 || anyOnnx;
61
- if (bestFile) {
62
- const baseName = bestFile.path.replace(".onnx", "");
63
- const totalSize = files.filter((f) => f.path === bestFile.path || f.path.startsWith(`${baseName}.onnx_data`)).reduce((sum, f) => sum + getFileSize(f), 0);
64
- if (totalSize > 0) return totalSize;
65
- }
66
- }
67
- const res = await fetch(`https://huggingface.co/api/models/${modelId}`);
68
- if (res.ok) return (await res.json()).usedStorage;
69
- } catch {}
70
- }
71
- /** Track a model as cached */
72
- function trackCachedModel(modelId, sizeBytes, contextLength) {
73
- try {
74
- const dir = join(homedir(), ".gerbil");
75
- if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
76
- const models = getChromeCachedModels();
77
- const existing = models.find((m) => m.modelId === modelId);
78
- const now = (/* @__PURE__ */ new Date()).toISOString();
79
- if (existing) {
80
- existing.lastUsed = now;
81
- if (sizeBytes) existing.sizeBytes = sizeBytes;
82
- if (contextLength) existing.contextLength = contextLength;
83
- } else models.push({
84
- modelId,
85
- downloadedAt: now,
86
- lastUsed: now,
87
- sizeBytes,
88
- contextLength
89
- });
90
- writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
91
- const needsSize = !(sizeBytes || existing?.sizeBytes);
92
- const needsContext = !(contextLength || existing?.contextLength);
93
- if (needsSize || needsContext) Promise.all([needsSize ? fetchModelSize(modelId) : Promise.resolve(void 0), needsContext ? fetchContextLength(modelId) : Promise.resolve(void 0)]).then(([size, context]) => {
94
- const updatedModels = getChromeCachedModels();
95
- const model = updatedModels.find((m) => m.modelId === modelId);
96
- if (model) {
97
- if (size) model.sizeBytes = size;
98
- if (context) model.contextLength = context;
99
- writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models: updatedModels }, null, 2));
100
- }
101
- }).catch(() => {});
102
- } catch {}
103
- }
104
- /** Refresh metadata (size, context length) for cached models that need it */
105
- async function refreshCachedModelSizes() {
106
- try {
107
- const models = getChromeCachedModels();
108
- const MIN_EXPECTED_SIZE = 1e6;
109
- const needsRefresh = models.filter((m) => !m.sizeBytes || m.sizeBytes < MIN_EXPECTED_SIZE || !m.contextLength);
110
- if (needsRefresh.length === 0) return;
111
- const batchSize = 3;
112
- for (let i = 0; i < needsRefresh.length; i += batchSize) {
113
- const batch = needsRefresh.slice(i, i + batchSize);
114
- await Promise.all(batch.map(async (model) => {
115
- const [size, context] = await Promise.all([!model.sizeBytes || model.sizeBytes < MIN_EXPECTED_SIZE ? fetchModelSize(model.modelId) : Promise.resolve(void 0), model.contextLength ? Promise.resolve(void 0) : fetchContextLength(model.modelId)]);
116
- if (size) model.sizeBytes = size;
117
- if (context) model.contextLength = context;
118
- }));
119
- }
120
- writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
121
- } catch {}
122
- }
123
- const GERBIL_LOCAL_PORT = 43724;
124
- let globalBrowser = null;
125
- let globalBrowserPromise = null;
126
- let globalServer = null;
127
- let globalServerPort = 0;
128
- let globalServerHtml = "";
129
- let activePagesCount = 0;
130
- const MAX_CONCURRENT_PAGES = 5;
131
- const activeBackends = /* @__PURE__ */ new Set();
132
- const CHROME_PATHS = {
133
- darwin: [
134
- "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
135
- "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
136
- "/Applications/Chromium.app/Contents/MacOS/Chromium",
137
- "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
138
- "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser"
139
- ],
140
- linux: [
141
- "google-chrome-stable",
142
- "google-chrome",
143
- "chromium-browser",
144
- "chromium",
145
- "microsoft-edge",
146
- "brave-browser"
147
- ],
148
- win32: [
149
- "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
150
- "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
151
- `${process.env.LOCALAPPDATA}\\Google\\Chrome\\Application\\chrome.exe`,
152
- "C:\\Program Files\\Microsoft\\Edge\\Application\\msedge.exe",
153
- "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
154
- ]
155
- };
156
- function findChrome() {
157
- if (process.env.CHROME_PATH) return process.env.CHROME_PATH;
158
- const platform = process.platform;
159
- const paths = CHROME_PATHS[platform] || [];
160
- for (const p of paths) try {
161
- if (platform === "linux") {
162
- execSync(`which ${p}`, { stdio: "ignore" });
163
- return p;
164
- }
165
- if (existsSync(p)) return p;
166
- } catch {}
167
- throw new Error("Chrome not found. Install Chrome or set CHROME_PATH environment variable.");
168
- }
169
- function getChromeFlags(userDataDir, _debuggingPort) {
170
- const flags = ["--no-sandbox", `--user-data-dir=${userDataDir}`];
171
- if (process.platform === "linux") flags.push("--enable-unsafe-webgpu", "--enable-features=Vulkan", "--use-angle=vulkan", "--disable-vulkan-surface");
172
- else if (process.platform === "darwin") {} else flags.push("--enable-unsafe-webgpu");
173
- return flags;
174
- }
175
- function getWorkerPageHTML(modelPath, contextLength = 32768, isVision = false) {
176
- return `
177
- <!DOCTYPE html>
178
- <html>
179
- <head>
180
- <title>Gerbil WebGPU Backend</title>
181
- <script type="module">
182
- import {
183
- AutoTokenizer,
184
- AutoModelForCausalLM,
185
- AutoProcessor,
186
- AutoModelForImageTextToText,
187
- RawImage,
188
- TextStreamer,
189
- InterruptableStoppingCriteria,
190
- env,
191
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
192
-
193
- // Enable IndexedDB caching (prevents re-downloading models)
194
- env.useBrowserCache = true;
195
- env.allowLocalModels = false;
196
-
197
- const IS_VISION = ${isVision};
198
-
199
- class ModelPipeline {
200
- static tokenizer = null;
201
- static processor = null;
202
- static model = null;
203
- static modelId = "${modelPath}";
204
- static isVision = IS_VISION;
205
-
206
- static async getInstance(progressCallback) {
207
- if (this.isVision) {
208
- // Vision model: use AutoProcessor + AutoModelForImageTextToText
209
- if (!this.processor) {
210
- this.processor = await AutoProcessor.from_pretrained(this.modelId, {
211
- progress_callback: progressCallback,
212
- });
213
- }
214
- if (!this.model) {
215
- this.model = await AutoModelForImageTextToText.from_pretrained(this.modelId, {
216
- device: "webgpu",
217
- progress_callback: progressCallback,
218
- });
219
- }
220
- return {
221
- processor: this.processor,
222
- tokenizer: this.processor.tokenizer,
223
- model: this.model,
224
- isVision: true
225
- };
226
- } else {
227
- // Text model: use AutoTokenizer + AutoModelForCausalLM
228
- if (!this.tokenizer) {
229
- this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId, {
230
- progress_callback: progressCallback,
231
- });
232
- }
233
- if (!this.model) {
234
- this.model = await AutoModelForCausalLM.from_pretrained(this.modelId, {
235
- dtype: "q4f16",
236
- device: "webgpu",
237
- progress_callback: progressCallback,
238
- });
239
- }
240
- return {
241
- tokenizer: this.tokenizer,
242
- model: this.model,
243
- isVision: false
244
- };
245
- }
246
- }
247
- }
248
-
249
- const stoppingCriteria = new InterruptableStoppingCriteria();
250
- let pastKeyValuesCache = null;
251
- let totalTokensInCache = 0;
252
-
253
- // Context length for auto-reset (passed from model config)
254
- const CONTEXT_LENGTH = ${contextLength};
255
-
256
- // Auto-load model on page init
257
- (async function() {
258
- console.log(JSON.stringify({ type: "progress", status: IS_VISION ? "Loading vision model..." : "Loading model..." }));
259
-
260
- try {
261
- const result = await ModelPipeline.getInstance((progress) => {
262
- if (progress.status === "progress" && progress.file) {
263
- console.log(JSON.stringify({
264
- type: "progress",
265
- status: "progress",
266
- file: progress.file,
267
- progress: Math.round(progress.progress || 0),
268
- }));
269
- }
270
- });
271
-
272
- console.log(JSON.stringify({ type: "progress", status: "Compiling shaders..." }));
273
-
274
- // Warmup generation to compile shaders and initialize model
275
- // Always do text warmup first
276
- const textWarmupInputs = result.tokenizer("hello");
277
- await result.model.generate({ ...textWarmupInputs, max_new_tokens: 1 });
278
-
279
- // Vision models also need vision warmup
280
- if (result.isVision) {
281
- console.log(JSON.stringify({ type: "progress", status: "Warming up vision encoder..." }));
282
- try {
283
- // Create a tiny 8x8 red test image
284
- const canvas = new OffscreenCanvas(8, 8);
285
- const ctx = canvas.getContext('2d');
286
- ctx.fillStyle = 'red';
287
- ctx.fillRect(0, 0, 8, 8);
288
- const blob = await canvas.convertToBlob({ type: 'image/png' });
289
- const warmupImage = await RawImage.fromBlob(blob);
290
-
291
- // Process with vision pipeline
292
- const warmupContent = [{ type: "image" }, { type: "text", text: "hi" }];
293
- const warmupMessages = [{ role: "user", content: warmupContent }];
294
- const warmupPrompt = result.processor.apply_chat_template(warmupMessages);
295
- const warmupInputs = await result.processor(warmupImage, warmupPrompt, { add_special_tokens: false });
296
-
297
- // Run vision warmup generation
298
- await result.model.generate({
299
- ...warmupInputs,
300
- max_new_tokens: 1,
301
- do_sample: false,
302
- });
303
- } catch {
304
- // Vision warmup failed, text warmup was done so continue
305
- }
306
- }
307
-
308
- // Set page title to model ID for cross-process identification
309
- document.title = "Gerbil: " + ModelPipeline.modelId;
310
-
311
- console.log(JSON.stringify({ type: "ready", isVision: result.isVision }));
312
- } catch (error) {
313
- console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
314
- }
315
- })();
316
-
317
- // Text generation (for non-vision models or vision without images)
318
- window.gerbilGenerate = async function(messages, options = {}) {
319
- const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false, images = [] } = options;
320
-
321
- const result = await ModelPipeline.getInstance();
322
-
323
- // Route to vision generation if we have images and this is a vision model
324
- if (images.length > 0 && result.isVision) {
325
- return window.gerbilGenerateVision(messages, images, options);
326
- }
327
-
328
- // Auto-reset KV cache if it exceeds context length
329
- if (totalTokensInCache > CONTEXT_LENGTH) {
330
- console.log(JSON.stringify({
331
- type: "cache_reset",
332
- reason: "context_exceeded",
333
- tokensInCache: totalTokensInCache,
334
- contextLength: CONTEXT_LENGTH
335
- }));
336
- pastKeyValuesCache = null;
337
- totalTokensInCache = 0;
338
- }
339
-
340
- try {
341
- const { tokenizer, model } = result;
342
-
343
- const inputs = tokenizer.apply_chat_template(messages, {
344
- add_generation_prompt: true,
345
- return_dict: true,
346
- enable_thinking: thinking,
347
- });
348
-
349
- let state = "answering";
350
- let prevState = "answering";
351
- const [START_THINKING_TOKEN_ID, END_THINKING_TOKEN_ID] = tokenizer.encode(
352
- "<think></think>",
353
- { add_special_tokens: false }
354
- );
355
-
356
- let startTime = null;
357
- let numTokens = 0;
358
-
359
- const tokenCallback = (tokens) => {
360
- startTime ??= performance.now();
361
- numTokens += 1;
362
-
363
- const tokenId = Number(tokens[0]);
364
- if (tokenId === START_THINKING_TOKEN_ID) {
365
- state = "thinking";
366
- } else if (tokenId === END_THINKING_TOKEN_ID) {
367
- state = "answering";
368
- }
369
- };
370
-
371
- const streamCallback = (text) => {
372
- const tps = startTime ? (numTokens / (performance.now() - startTime)) * 1000 : 0;
373
-
374
- let outputText = text;
375
- if (thinking) {
376
- if (state === "thinking" && prevState !== "thinking") {
377
- outputText = "<think>" + text;
378
- } else if (state === "answering" && prevState === "thinking") {
379
- outputText = "</think>" + text;
380
- }
381
- }
382
- prevState = state;
383
-
384
- console.log(JSON.stringify({ type: "token", text: outputText, state, numTokens, tps }));
385
- };
386
-
387
- const streamer = new TextStreamer(tokenizer, {
388
- skip_prompt: true,
389
- skip_special_tokens: true,
390
- callback_function: streamCallback,
391
- token_callback_function: tokenCallback,
392
- });
393
-
394
- console.log(JSON.stringify({ type: "start" }));
395
-
396
- const { past_key_values, sequences } = await model.generate({
397
- ...inputs,
398
- past_key_values: pastKeyValuesCache,
399
- do_sample: temperature > 0,
400
- temperature: temperature > 0 ? temperature : undefined,
401
- top_p: topP,
402
- top_k: topK,
403
- max_new_tokens: maxTokens,
404
- streamer,
405
- stopping_criteria: stoppingCriteria,
406
- return_dict_in_generate: true,
407
- });
408
-
409
- pastKeyValuesCache = past_key_values;
410
-
411
- const inputLength = inputs.input_ids.dims[1];
412
- totalTokensInCache += inputLength + numTokens;
413
-
414
- const endTime = performance.now();
415
- const totalTime = startTime ? endTime - startTime : 0;
416
-
417
- const generatedTokens = sequences.slice(null, [inputLength, null]);
418
- const decoded = tokenizer.batch_decode(generatedTokens, { skip_special_tokens: true });
419
-
420
- console.log(JSON.stringify({
421
- type: "complete",
422
- text: decoded[0] || "",
423
- numTokens,
424
- totalTime,
425
- tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
426
- tokensInCache: totalTokensInCache,
427
- }));
428
-
429
- return decoded[0] || "";
430
- } catch (error) {
431
- console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
432
- throw error;
433
- }
434
- };
435
-
436
- // Vision generation (for vision models with images)
437
- window.gerbilGenerateVision = async function(messages, imageUrls, options = {}) {
438
- const { maxTokens = 2048, temperature = 0.7, topP = 0.9, topK = 20 } = options;
439
-
440
- try {
441
- const { processor, tokenizer, model } = await ModelPipeline.getInstance();
442
-
443
- // Build message content with image placeholders for the user prompt
444
- const lastMessage = messages[messages.length - 1];
445
- const content = [];
446
- for (let i = 0; i < imageUrls.length; i += 1) {
447
- content.push({ type: "image" });
448
- }
449
- content.push({ type: "text", text: lastMessage.content });
450
-
451
- // For vision models, include a brief system instruction for concise responses
452
- const visionMessages = [
453
- { role: "system", content: "You are a helpful assistant. Be concise and direct in your responses." },
454
- { role: "user", content }
455
- ];
456
-
457
- // Apply chat template with generation prompt
458
- const chatPrompt = processor.apply_chat_template(visionMessages, {
459
- add_generation_prompt: true
460
- });
461
-
462
- // Load images
463
- console.log(JSON.stringify({ type: "progress", status: "Loading images..." }));
464
- const loadedImages = await Promise.all(
465
- imageUrls.map(url => RawImage.fromURL(url))
466
- );
467
-
468
- // Process inputs
469
- const inputs = await processor(
470
- loadedImages.length === 1 ? loadedImages[0] : loadedImages,
471
- chatPrompt,
472
- { add_special_tokens: false }
473
- );
474
-
475
- let startTime = null;
476
- let numTokens = 0;
477
-
478
- const streamCallback = (text) => {
479
- startTime ??= performance.now();
480
- numTokens += 1;
481
- const tps = (numTokens / (performance.now() - startTime)) * 1000;
482
- console.log(JSON.stringify({ type: "token", text, state: "answering", numTokens, tps }));
483
- };
484
-
485
- const streamer = new TextStreamer(tokenizer, {
486
- skip_prompt: true,
487
- skip_special_tokens: true,
488
- callback_function: streamCallback,
489
- });
490
-
491
- console.log(JSON.stringify({ type: "start" }));
492
-
493
- const outputs = await model.generate({
494
- ...inputs,
495
- max_new_tokens: maxTokens,
496
- do_sample: temperature > 0,
497
- temperature: temperature > 0 ? temperature : undefined,
498
- top_p: topP,
499
- top_k: topK,
500
- streamer,
501
- stopping_criteria: stoppingCriteria,
502
- });
503
-
504
- // Decode output (skip prompt)
505
- const inputLength = inputs.input_ids.dims?.at(-1) || 0;
506
- const decoded = processor.batch_decode(
507
- outputs.slice(null, [inputLength, null]),
508
- { skip_special_tokens: true }
509
- );
510
-
511
- const endTime = performance.now();
512
- const totalTime = startTime ? endTime - startTime : 0;
513
-
514
- console.log(JSON.stringify({
515
- type: "complete",
516
- text: decoded[0] || "",
517
- numTokens,
518
- totalTime,
519
- tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
520
- }));
521
-
522
- return decoded[0] || "";
523
- } catch (error) {
524
- console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
525
- throw error;
526
- }
527
- };
528
-
529
- window.gerbilInterrupt = function() {
530
- stoppingCriteria.interrupt();
531
- };
532
-
533
- window.gerbilReset = function() {
534
- pastKeyValuesCache = null;
535
- totalTokensInCache = 0;
536
- stoppingCriteria.reset();
537
- console.log(JSON.stringify({ type: "cache_reset", reason: "manual" }));
538
- };
539
-
540
- // Signal that the page is ready for commands
541
- console.log(JSON.stringify({ type: "init" }));
542
- <\/script>
543
- </head>
544
- <body>
545
- <h1>Gerbil WebGPU Backend</h1>
546
- <p>This page provides WebGPU inference for the Gerbil CLI.</p>
547
- </body>
548
- </html>
549
- `;
550
- }
551
- var ChromeGPUBackend = class ChromeGPUBackend {
552
- browser = null;
553
- page = null;
554
- cdp = null;
555
- server = null;
556
- serverPort = 0;
557
- userDataDir = GERBIL_CACHE_DIR;
558
- modelId;
559
- isReady = false;
560
- isVisionModel = false;
561
- messageHandlers = /* @__PURE__ */ new Map();
562
- pendingRejects = [];
563
- constructor(modelId, isVision = false) {
564
- this.modelId = modelId;
565
- this.isVisionModel = isVision;
566
- }
567
- /**
568
- * Create and initialize a Chrome GPU backend
569
- */
570
- static async create(options = {}) {
571
- const modelId = options.modelId || "onnx-community/Qwen3-0.6B-ONNX";
572
- const backend = new ChromeGPUBackend(modelId, options.isVision ?? ChromeGPUBackend.detectVisionModel(modelId));
573
- await backend.launch(options);
574
- return backend;
575
- }
576
- /**
577
- * Detect if a model is a vision model based on its ID
578
- */
579
- static detectVisionModel(modelId) {
580
- return [
581
- /ministral/i,
582
- /pixtral/i,
583
- /llava/i,
584
- /vision/i,
585
- /vl/i,
586
- /image-text/i,
587
- /multimodal/i
588
- ].some((pattern) => pattern.test(modelId));
589
- }
590
- /**
591
- * Check if this backend is for a vision model
592
- */
593
- isVision() {
594
- return this.isVisionModel;
595
- }
596
- /**
597
- * Clean up orphan Gerbil pages from previous sessions
598
- * These are pages that were left behind when process exited without proper cleanup
599
- */
600
- async cleanupOrphanPages(browser, options) {
601
- try {
602
- const gerbilPages = (await browser.pages()).filter((p) => {
603
- const url = p.url();
604
- return /127\.0\.0\.1:4\d{4}/.test(url);
605
- });
606
- const orphanCount = gerbilPages.length - activeBackends.size;
607
- if (orphanCount > 0) {
608
- options.onProgress?.({ status: `Cleaning up ${orphanCount} orphan page(s)...` });
609
- for (const page of gerbilPages) {
610
- let isOwned = false;
611
- for (const backend of activeBackends) if (backend.page === page) {
612
- isOwned = true;
613
- break;
614
- }
615
- if (!isOwned) try {
616
- await page.close();
617
- } catch {}
618
- }
619
- }
620
- return orphanCount;
621
- } catch {
622
- return 0;
623
- }
624
- }
625
- /**
626
- * Get existing browser or launch a new one (singleton pattern)
627
- * Multiple Gerbil instances share the same browser process
628
- */
629
- async getOrCreateBrowser(chromePath, options) {
630
- if (globalBrowser?.connected) {
631
- options.onProgress?.({ status: "Reusing existing Chrome..." });
632
- await this.cleanupOrphanPages(globalBrowser, options);
633
- return globalBrowser;
634
- }
635
- if (globalBrowserPromise) {
636
- options.onProgress?.({ status: "Waiting for Chrome startup..." });
637
- return globalBrowserPromise;
638
- }
639
- if (existsSync(WS_ENDPOINT_FILE)) try {
640
- const wsEndpoint = readFileSync(WS_ENDPOINT_FILE, "utf-8").trim();
641
- options.onProgress?.({ status: "Connecting to existing Chrome..." });
642
- globalBrowser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
643
- await this.cleanupOrphanPages(globalBrowser, options);
644
- return globalBrowser;
645
- } catch {
646
- try {
647
- unlinkSync(WS_ENDPOINT_FILE);
648
- } catch {}
649
- }
650
- globalBrowserPromise = this.launchBrowser(chromePath, options);
651
- try {
652
- globalBrowser = await globalBrowserPromise;
653
- return globalBrowser;
654
- } finally {
655
- globalBrowserPromise = null;
656
- }
657
- }
658
- /**
659
- * Launch a new Chrome browser instance
660
- */
661
- async launchBrowser(chromePath, _options) {
662
- const debuggingPort = 9222 + Math.floor(Math.random() * 1e3);
663
- const lockFile = join(this.userDataDir, "SingletonLock");
664
- if (existsSync(lockFile)) try {
665
- unlinkSync(lockFile);
666
- await new Promise((r) => setTimeout(r, 200));
667
- } catch {}
668
- const browser = await puppeteer.launch({
669
- executablePath: chromePath,
670
- headless: true,
671
- args: [
672
- ...getChromeFlags(this.userDataDir, debuggingPort),
673
- "--enable-gpu",
674
- "--no-first-run",
675
- "--no-default-browser-check",
676
- "--disable-background-timer-throttling",
677
- "--disable-renderer-backgrounding",
678
- "--disable-dev-shm-usage"
679
- ],
680
- handleSIGINT: false,
681
- handleSIGTERM: false,
682
- handleSIGHUP: false
683
- });
684
- writeFileSync(WS_ENDPOINT_FILE, browser.wsEndpoint());
685
- browser.on("disconnected", () => {
686
- globalBrowser = null;
687
- try {
688
- unlinkSync(WS_ENDPOINT_FILE);
689
- } catch {}
690
- });
691
- return browser;
692
- }
693
- /**
694
- * Launch Chrome and initialize the worker page
695
- */
696
- async launch(options) {
697
- if (activePagesCount >= MAX_CONCURRENT_PAGES) throw new Error(`Maximum concurrent pages (${MAX_CONCURRENT_PAGES}) reached. Call dispose() on old Gerbil instances to free resources. Currently active: ${activePagesCount}`);
698
- const chromePath = options.chromePath || findChrome();
699
- this.userDataDir = GERBIL_CACHE_DIR;
700
- if (!existsSync(this.userDataDir)) mkdirSync(this.userDataDir, { recursive: true });
701
- const contextLength = options.contextLength || 32768;
702
- const html = getWorkerPageHTML(this.modelId, contextLength, this.isVisionModel);
703
- await this.startServer(html);
704
- options.onProgress?.({ status: "Starting Chrome..." });
705
- this.browser = await this.getOrCreateBrowser(chromePath, options);
706
- this.page = await this.browser.newPage();
707
- this.cdp = await this.page.createCDPSession();
708
- activePagesCount += 1;
709
- activeBackends.add(this);
710
- options.onProgress?.({ status: `Active pages: ${activePagesCount}/${MAX_CONCURRENT_PAGES}` });
711
- this.browser.on("disconnected", () => {
712
- this.isReady = false;
713
- this.browser = null;
714
- this.page = null;
715
- this.cdp = null;
716
- this.rejectPendingWaits(/* @__PURE__ */ new Error("CHROME_DISCONNECTED"));
717
- });
718
- await this.cdp.send("Runtime.enable");
719
- await this.cdp.send("Runtime.setAsyncCallStackDepth", { maxDepth: 32 });
720
- this.cdp.on("Runtime.consoleAPICalled", (event) => {
721
- const text = event.args.map((a) => a.value || a.description || "").join(" ");
722
- if (event.type === "log" && event.args[0]?.value) try {
723
- const data = JSON.parse(event.args[0].value);
724
- this.handleMessage(data, options);
725
- } catch {
726
- if (text.length < 500 && !text.includes("Float32Array") && !text.includes("past_key_values")) {}
727
- }
728
- else if (event.type === "error" || event.type === "warning") {
729
- if (!(text.includes("onnxruntime") || text.includes("content-length") || text.includes("Float32Array") || text.includes("past_key_values")) && text.length < 1e3) {}
730
- }
731
- });
732
- this.cdp.on("Runtime.exceptionThrown", (event) => {
733
- const errText = event.exceptionDetails?.text || event.exceptionDetails?.exception?.description || "";
734
- if (errText.includes("Float32Array") || errText.includes("past_key_values") || errText.length > 1e3) return;
735
- });
736
- await this.page.goto(`http://127.0.0.1:${this.serverPort}/`, {
737
- waitUntil: "domcontentloaded",
738
- timeout: 3e4
739
- });
740
- await this.waitForMessage("ready", 3e5);
741
- this.isReady = true;
742
- options.onProgress?.({ status: "Ready (WebGPU)!" });
743
- trackCachedModel(this.modelId);
744
- }
745
- /**
746
- * Handle incoming messages from the page
747
- */
748
- handleMessage(data, options) {
749
- const { type, ...rest } = data;
750
- const handler = this.messageHandlers.get(type);
751
- if (handler) handler(rest);
752
- if (type === "progress") options.onProgress?.(rest);
753
- else if (type === "token") options.onToken?.(rest);
754
- }
755
- /**
756
- * Wait for a specific message type
757
- */
758
- waitForMessage(type, timeout = 3e4) {
759
- return new Promise((resolve, reject) => {
760
- this.pendingRejects.push(reject);
761
- const cleanup = () => {
762
- clearTimeout(timer);
763
- this.messageHandlers.delete(type);
764
- const idx = this.pendingRejects.indexOf(reject);
765
- if (idx >= 0) this.pendingRejects.splice(idx, 1);
766
- };
767
- const timer = setTimeout(() => {
768
- cleanup();
769
- reject(/* @__PURE__ */ new Error(`Timeout waiting for ${type} message`));
770
- }, timeout);
771
- this.messageHandlers.set(type, (data) => {
772
- cleanup();
773
- resolve(data);
774
- });
775
- });
776
- }
777
- /**
778
- * Check if Chrome backend is still alive
779
- */
780
- isAlive() {
781
- return this.isReady && this.browser !== null && this.page !== null;
782
- }
783
- /**
784
- * Get Chrome backend status information
785
- */
786
- getStatus() {
787
- let pid = null;
788
- const browserProcess = this.browser?.process?.() || globalBrowser?.process?.();
789
- if (browserProcess?.pid) pid = browserProcess.pid;
790
- return {
791
- pid,
792
- port: this.serverPort || globalServerPort,
793
- modelId: this.modelId,
794
- startedAt: this.isReady ? /* @__PURE__ */ new Date() : null
795
- };
796
- }
797
- /**
798
- * Get Chrome memory usage via CDP Performance metrics
799
- * Returns memory in bytes or null if unavailable
800
- */
801
- async getMemoryUsage() {
802
- if (!(this.cdp && this.isReady)) return null;
803
- try {
804
- await this.cdp.send("Performance.enable");
805
- const { metrics } = await this.cdp.send("Performance.getMetrics");
806
- return {
807
- jsHeapUsed: metrics.find((m) => m.name === "JSHeapUsedSize")?.value ?? 0,
808
- jsHeapTotal: metrics.find((m) => m.name === "JSHeapTotalSize")?.value ?? 0
809
- };
810
- } catch {
811
- return null;
812
- }
813
- }
814
- /**
815
- * Check memory usage and auto-cleanup if threshold exceeded
816
- * @param thresholdGB Memory threshold in GB (default: 8)
817
- * @returns true if cleanup was performed
818
- */
819
- async checkMemoryAndCleanup(thresholdGB = 8) {
820
- const mem = await this.getMemoryUsage();
821
- if (!mem) return false;
822
- if (mem.jsHeapUsed / 1024 ** 3 > thresholdGB) {
823
- await this.reset();
824
- return true;
825
- }
826
- return false;
827
- }
828
- /**
829
- * Get memory usage in a human-readable format
830
- */
831
- async getMemoryStats() {
832
- const mem = await this.getMemoryUsage();
833
- if (!mem) return null;
834
- return {
835
- usedGB: mem.jsHeapUsed / 1024 ** 3,
836
- totalGB: mem.jsHeapTotal / 1024 ** 3,
837
- usedPercent: mem.jsHeapUsed / mem.jsHeapTotal * 100
838
- };
839
- }
840
- /**
841
- * Generate text with streaming
842
- */
843
- async generate(prompt, options = {}) {
844
- if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
845
- const messages = [{
846
- role: "system",
847
- content: options.system || "You are a helpful assistant."
848
- }, {
849
- role: "user",
850
- content: prompt
851
- }];
852
- const genOptions = {
853
- maxTokens: options.maxTokens ?? (this.isVisionModel ? 2048 : 256),
854
- temperature: options.temperature ?? .7,
855
- topP: options.topP ?? .9,
856
- topK: options.topK ?? 20,
857
- thinking: options.thinking ?? false,
858
- images: options.images ?? []
859
- };
860
- if (options.onToken) this.messageHandlers.set("token", options.onToken);
861
- try {
862
- const resultPromise = this.page?.evaluate((msgs, opts) => window.gerbilGenerate(msgs, opts), messages, genOptions);
863
- const completeData = await this.waitForMessage("complete", 6e5);
864
- this.messageHandlers.delete("token");
865
- await resultPromise;
866
- return completeData.text || "";
867
- } catch (err) {
868
- if (!this.isAlive()) throw new Error("CHROME_BACKEND_DEAD");
869
- throw err;
870
- }
871
- }
872
- /**
873
- * Interrupt current generation
874
- */
875
- async interrupt() {
876
- if (this.page) await this.page.evaluate("window.gerbilInterrupt()");
877
- }
878
- /**
879
- * Reset conversation cache
880
- */
881
- async reset() {
882
- if (this.page) await this.page.evaluate("window.gerbilReset()");
883
- }
884
- /**
885
- * Check if backend is ready
886
- */
887
- ready() {
888
- return this.isReady;
889
- }
890
- /**
891
- * Start or reuse the global HTTP server
892
- * Uses singleton pattern to prevent killing our own server
893
- * Updates HTML content for new model loads
894
- */
895
- async startServer(html) {
896
- globalServerHtml = html;
897
- if (globalServer && globalServerPort) {
898
- this.server = globalServer;
899
- this.serverPort = globalServerPort;
900
- return;
901
- }
902
- return new Promise((resolve, reject) => {
903
- const server = createServer((_req, res) => {
904
- res.writeHead(200, { "Content-Type": "text/html" });
905
- res.end(globalServerHtml);
906
- });
907
- server.on("error", (err) => {
908
- if (err.code === "EADDRINUSE") {
909
- this.serverPort = GERBIL_LOCAL_PORT;
910
- globalServerPort = GERBIL_LOCAL_PORT;
911
- resolve();
912
- } else reject(err);
913
- });
914
- server.listen(GERBIL_LOCAL_PORT, "127.0.0.1", () => {
915
- this.server = server;
916
- this.serverPort = GERBIL_LOCAL_PORT;
917
- globalServer = server;
918
- globalServerPort = GERBIL_LOCAL_PORT;
919
- resolve();
920
- });
921
- });
922
- }
923
- /**
924
- * Dispose of the backend and clean up
925
- * Note: We keep the shared browser running for other backends
926
- * @param disconnect If true, also disconnect from shared browser (for clean script exit)
927
- */
928
- async dispose(disconnect = false) {
929
- this.isReady = false;
930
- this.pendingRejects = [];
931
- this.messageHandlers.clear();
932
- if (this.cdp) {
933
- try {
934
- await this.cdp.detach();
935
- } catch {}
936
- this.cdp = null;
937
- }
938
- if (this.page) {
939
- try {
940
- await this.page.goto("about:blank").catch(() => {});
941
- await new Promise((r) => setTimeout(r, 50));
942
- await this.page.close({ runBeforeUnload: false });
943
- activePagesCount = Math.max(0, activePagesCount - 1);
944
- } catch {}
945
- this.page = null;
946
- }
947
- activeBackends.delete(this);
948
- this.browser = null;
949
- this.server = null;
950
- if (disconnect) await new Promise((r) => setTimeout(r, 100));
951
- if (disconnect && activeBackends.size === 0 && globalBrowser) try {
952
- globalBrowser.disconnect();
953
- globalBrowser = null;
954
- globalBrowserPromise = null;
955
- } catch {}
956
- }
957
- /**
958
- * Reject all pending waits (called on browser disconnect or dispose)
959
- */
960
- rejectPendingWaits(error) {
961
- for (const reject of this.pendingRejects) reject(error);
962
- this.pendingRejects = [];
963
- this.messageHandlers.clear();
964
- }
965
- /**
966
- * Clear the model cache (forces re-download on next start)
967
- */
968
- static clearCache() {
969
- if (existsSync(GERBIL_CACHE_DIR)) rmSync(GERBIL_CACHE_DIR, {
970
- recursive: true,
971
- force: true
972
- });
973
- }
974
- /**
975
- * Get the number of active Chrome pages
976
- */
977
- static getActivePageCount() {
978
- return activePagesCount;
979
- }
980
- /**
981
- * Get memory usage info for all active pages
982
- */
983
- static getMemoryInfo() {
984
- return {
985
- activePagesCount,
986
- maxPages: MAX_CONCURRENT_PAGES
987
- };
988
- }
989
- /**
990
- * Get global browser status (even if no active backends)
991
- */
992
- static getGlobalBrowserStatus() {
993
- let pid = null;
994
- let wsEndpoint = null;
995
- if (globalBrowser?.connected) {
996
- const browserProcess = globalBrowser.process?.();
997
- if (browserProcess?.pid) pid = browserProcess.pid;
998
- wsEndpoint = globalBrowser.wsEndpoint();
999
- }
1000
- return {
1001
- running: globalBrowser?.connected ?? false,
1002
- pid,
1003
- port: globalServerPort,
1004
- activePagesCount,
1005
- maxPages: MAX_CONCURRENT_PAGES,
1006
- wsEndpoint
1007
- };
1008
- }
1009
- /**
1010
- * Get total page count from Chrome (all processes)
1011
- */
1012
- static async getTotalPageCount() {
1013
- if (!globalBrowser?.connected) return 0;
1014
- try {
1015
- return (await globalBrowser.pages()).filter((p) => {
1016
- return p.url().includes(`127.0.0.1:${globalServerPort}`);
1017
- }).length;
1018
- } catch {
1019
- return 0;
1020
- }
1021
- }
1022
- /**
1023
- * Get all active backends with their memory usage (this process only)
1024
- */
1025
- static async getAllBackendsInfo() {
1026
- const results = [];
1027
- for (const backend of activeBackends) {
1028
- const mem = await backend.getMemoryStats();
1029
- results.push({
1030
- modelId: backend.modelId,
1031
- isVision: backend.isVisionModel,
1032
- isReady: backend.isReady,
1033
- memory: mem
1034
- });
1035
- }
1036
- return results;
1037
- }
1038
- /**
1039
- * Get ALL pages in Chrome browser (cross-process visibility)
1040
- * This shows pages from ALL Gerbil processes sharing the browser
1041
- */
1042
- static async getAllChromePages() {
1043
- if (!globalBrowser?.connected) return [];
1044
- try {
1045
- const pages = await globalBrowser.pages();
1046
- const results = [];
1047
- for (const page of pages) {
1048
- const url = page.url();
1049
- const title = await page.title().catch(() => "");
1050
- if (url === "about:blank" || !url.includes(`127.0.0.1:${globalServerPort}`)) continue;
1051
- let modelId = null;
1052
- let isOurs = false;
1053
- let memory = null;
1054
- for (const backend of activeBackends) if (backend.page === page) {
1055
- isOurs = true;
1056
- modelId = backend.modelId;
1057
- const mem = await backend.getMemoryStats();
1058
- if (mem) memory = {
1059
- usedGB: mem.usedGB,
1060
- totalGB: mem.totalGB
1061
- };
1062
- break;
1063
- }
1064
- if (!isOurs) {
1065
- if (title.startsWith("Gerbil: ")) modelId = title.replace("Gerbil: ", "");
1066
- try {
1067
- const cdp = await page.createCDPSession();
1068
- await cdp.send("Performance.enable");
1069
- const { metrics } = await cdp.send("Performance.getMetrics");
1070
- const jsHeapUsed = metrics.find((m) => m.name === "JSHeapUsedSize")?.value ?? 0;
1071
- const jsHeapTotal = metrics.find((m) => m.name === "JSHeapTotalSize")?.value ?? 0;
1072
- memory = {
1073
- usedGB: jsHeapUsed / 1024 ** 3,
1074
- totalGB: jsHeapTotal / 1024 ** 3
1075
- };
1076
- await cdp.detach();
1077
- } catch {}
1078
- }
1079
- results.push({
1080
- url,
1081
- title: title || "Gerbil WebGPU Backend",
1082
- isOurs,
1083
- modelId,
1084
- memory
1085
- });
1086
- }
1087
- return results;
1088
- } catch {
1089
- return [];
1090
- }
1091
- }
1092
- /**
1093
- * Kill a Chrome page by index (works cross-process)
1094
- */
1095
- static async killPageByIndex(index) {
1096
- if (!globalBrowser?.connected) return false;
1097
- try {
1098
- const gerbilPages = (await globalBrowser.pages()).filter((p) => {
1099
- return p.url().includes(`127.0.0.1:${globalServerPort}`);
1100
- });
1101
- if (index < 0 || index >= gerbilPages.length) return false;
1102
- const page = gerbilPages[index];
1103
- for (const backend of activeBackends) if (backend.page === page) {
1104
- await backend.dispose();
1105
- return true;
1106
- }
1107
- await page.close();
1108
- return true;
1109
- } catch {
1110
- return false;
1111
- }
1112
- }
1113
- /**
1114
- * Kill a specific backend by index (this process only)
1115
- */
1116
- static async killBackendByIndex(index) {
1117
- const backends = [...activeBackends];
1118
- if (index < 0 || index >= backends.length) return false;
1119
- const backend = backends[index];
1120
- try {
1121
- await backend.dispose();
1122
- return true;
1123
- } catch {
1124
- return false;
1125
- }
1126
- }
1127
- /**
1128
- * Force kill all backends (for zombie cleanup)
1129
- */
1130
- static async killAllBackends() {
1131
- const count = activeBackends.size;
1132
- for (const backend of [...activeBackends]) try {
1133
- await backend.dispose();
1134
- } catch {}
1135
- activeBackends.clear();
1136
- let browserKilled = false;
1137
- if (globalBrowser) {
1138
- try {
1139
- await globalBrowser.close();
1140
- browserKilled = true;
1141
- } catch {}
1142
- globalBrowser = null;
1143
- globalBrowserPromise = null;
1144
- }
1145
- if (globalServer) {
1146
- globalServer.close();
1147
- globalServer = null;
1148
- globalServerPort = 0;
1149
- }
1150
- activePagesCount = 0;
1151
- try {
1152
- unlinkSync(WS_ENDPOINT_FILE);
1153
- } catch {}
1154
- return {
1155
- pagesKilled: count,
1156
- browserKilled
1157
- };
1158
- }
1159
- /**
1160
- * Gracefully close the shared browser (call on process exit)
1161
- */
1162
- static async closeSharedBrowser() {
1163
- if (globalBrowser) {
1164
- try {
1165
- await globalBrowser.close();
1166
- } catch {}
1167
- globalBrowser = null;
1168
- globalBrowserPromise = null;
1169
- }
1170
- if (globalServer) {
1171
- globalServer.close();
1172
- globalServer = null;
1173
- globalServerPort = 0;
1174
- }
1175
- activePagesCount = 0;
1176
- try {
1177
- unlinkSync(WS_ENDPOINT_FILE);
1178
- } catch {}
1179
- }
1180
- };
1181
- let cleanupRegistered = false;
1182
- function registerCleanup() {
1183
- if (cleanupRegistered) return;
1184
- cleanupRegistered = true;
1185
- const cleanup = () => {
1186
- if (globalBrowser) {
1187
- try {
1188
- const browserProcess = globalBrowser.process();
1189
- if (browserProcess) browserProcess.kill("SIGTERM");
1190
- } catch {}
1191
- globalBrowser = null;
1192
- }
1193
- if (globalServer) {
1194
- globalServer.close();
1195
- globalServer = null;
1196
- }
1197
- };
1198
- process.on("exit", cleanup);
1199
- process.on("SIGINT", () => {
1200
- cleanup();
1201
- process.exit(0);
1202
- });
1203
- process.on("SIGTERM", () => {
1204
- cleanup();
1205
- process.exit(0);
1206
- });
1207
- }
1208
- registerCleanup();
1209
-
1210
- //#endregion
1211
- export { trackCachedModel as i, getChromeCachedModels as n, refreshCachedModelSizes as r, ChromeGPUBackend as t };
1212
- //# sourceMappingURL=chrome-backend-CORwaIyC.mjs.map