@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/README.md +79 -14
  2. package/dist/auto-update-S9s5-g0C.mjs +3 -0
  3. package/dist/browser/index.d.ts +1009 -0
  4. package/dist/browser/index.d.ts.map +1 -0
  5. package/dist/browser/index.js +2492 -0
  6. package/dist/browser/index.js.map +1 -0
  7. package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-CORwaIyC.mjs} +514 -73
  8. package/dist/chrome-backend-CORwaIyC.mjs.map +1 -0
  9. package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-DIKYoWj-.mjs} +1 -1
  10. package/dist/cli.mjs +3359 -647
  11. package/dist/cli.mjs.map +1 -1
  12. package/dist/frameworks/express.d.mts +1 -1
  13. package/dist/frameworks/express.mjs +3 -4
  14. package/dist/frameworks/express.mjs.map +1 -1
  15. package/dist/frameworks/fastify.d.mts +1 -1
  16. package/dist/frameworks/fastify.mjs +2 -3
  17. package/dist/frameworks/fastify.mjs.map +1 -1
  18. package/dist/frameworks/hono.d.mts +1 -1
  19. package/dist/frameworks/hono.mjs +2 -3
  20. package/dist/frameworks/hono.mjs.map +1 -1
  21. package/dist/frameworks/next.d.mts +2 -2
  22. package/dist/frameworks/next.mjs +2 -3
  23. package/dist/frameworks/next.mjs.map +1 -1
  24. package/dist/frameworks/react.d.mts +1 -1
  25. package/dist/frameworks/trpc.d.mts +1 -1
  26. package/dist/frameworks/trpc.mjs +2 -3
  27. package/dist/frameworks/trpc.mjs.map +1 -1
  28. package/dist/gerbil-DJGqq7BX.mjs +4 -0
  29. package/dist/gerbil-DoDGHe6Z.mjs +1631 -0
  30. package/dist/gerbil-DoDGHe6Z.mjs.map +1 -0
  31. package/dist/gerbil-qOTe1nl2.d.mts +431 -0
  32. package/dist/gerbil-qOTe1nl2.d.mts.map +1 -0
  33. package/dist/index.d.mts +411 -9
  34. package/dist/index.d.mts.map +1 -1
  35. package/dist/index.mjs +7 -6
  36. package/dist/index.mjs.map +1 -1
  37. package/dist/integrations/ai-sdk.d.mts +122 -4
  38. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  39. package/dist/integrations/ai-sdk.mjs +238 -11
  40. package/dist/integrations/ai-sdk.mjs.map +1 -1
  41. package/dist/integrations/langchain.d.mts +132 -2
  42. package/dist/integrations/langchain.d.mts.map +1 -1
  43. package/dist/integrations/langchain.mjs +175 -8
  44. package/dist/integrations/langchain.mjs.map +1 -1
  45. package/dist/integrations/llamaindex.d.mts +1 -1
  46. package/dist/integrations/llamaindex.mjs +2 -3
  47. package/dist/integrations/llamaindex.mjs.map +1 -1
  48. package/dist/integrations/mcp-client.mjs +4 -4
  49. package/dist/integrations/mcp-client.mjs.map +1 -1
  50. package/dist/integrations/mcp.d.mts +2 -2
  51. package/dist/integrations/mcp.d.mts.map +1 -1
  52. package/dist/integrations/mcp.mjs +5 -6
  53. package/dist/kokoro-BNTb6egA.mjs +20210 -0
  54. package/dist/kokoro-BNTb6egA.mjs.map +1 -0
  55. package/dist/kokoro-CMOGDSgT.js +20212 -0
  56. package/dist/kokoro-CMOGDSgT.js.map +1 -0
  57. package/dist/{mcp-R8kRLIKb.mjs → mcp-kzDDWIoS.mjs} +10 -37
  58. package/dist/mcp-kzDDWIoS.mjs.map +1 -0
  59. package/dist/microphone-DaMZFRuR.mjs +3 -0
  60. package/dist/{one-liner-BUQR0nqq.mjs → one-liner-DxnNs_JK.mjs} +2 -2
  61. package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-DxnNs_JK.mjs.map} +1 -1
  62. package/dist/repl-DGUw4fCc.mjs +9 -0
  63. package/dist/skills/index.d.mts +305 -14
  64. package/dist/skills/index.d.mts.map +1 -1
  65. package/dist/skills/index.mjs +5 -6
  66. package/dist/skills-DulrOPeP.mjs +1435 -0
  67. package/dist/skills-DulrOPeP.mjs.map +1 -0
  68. package/dist/stt-1WIefHwc.mjs +3 -0
  69. package/dist/stt-CG_7KB_0.mjs +434 -0
  70. package/dist/stt-CG_7KB_0.mjs.map +1 -0
  71. package/dist/stt-Dne6SENv.js +434 -0
  72. package/dist/stt-Dne6SENv.js.map +1 -0
  73. package/dist/{tools-BsiEE6f2.mjs → tools-Bi1P7Xoy.mjs} +6 -7
  74. package/dist/{tools-BsiEE6f2.mjs.map → tools-Bi1P7Xoy.mjs.map} +1 -1
  75. package/dist/transformers.web-DiD1gTwk.js +44695 -0
  76. package/dist/transformers.web-DiD1gTwk.js.map +1 -0
  77. package/dist/transformers.web-u34VxRFM.js +3 -0
  78. package/dist/tts-B1pZMlDv.mjs +3 -0
  79. package/dist/tts-C2FzKuSx.js +725 -0
  80. package/dist/tts-C2FzKuSx.js.map +1 -0
  81. package/dist/tts-CyHhcLtN.mjs +731 -0
  82. package/dist/tts-CyHhcLtN.mjs.map +1 -0
  83. package/dist/types-CiTc7ez3.d.mts +353 -0
  84. package/dist/types-CiTc7ez3.d.mts.map +1 -0
  85. package/dist/{utils-7vXqtq2Q.mjs → utils-CZBZ8dgR.mjs} +1 -1
  86. package/dist/{utils-7vXqtq2Q.mjs.map → utils-CZBZ8dgR.mjs.map} +1 -1
  87. package/docs/ai-sdk.md +137 -21
  88. package/docs/browser.md +241 -2
  89. package/docs/memory.md +72 -0
  90. package/docs/stt.md +494 -0
  91. package/docs/tts.md +569 -0
  92. package/docs/vision.md +396 -0
  93. package/package.json +21 -22
  94. package/dist/auto-update-BbNHbSU1.mjs +0 -3
  95. package/dist/browser/index.d.mts +0 -262
  96. package/dist/browser/index.d.mts.map +0 -1
  97. package/dist/browser/index.mjs +0 -755
  98. package/dist/browser/index.mjs.map +0 -1
  99. package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
  100. package/dist/gerbil-BfnsFWRE.mjs +0 -644
  101. package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
  102. package/dist/gerbil-BjW-z7Fq.mjs +0 -5
  103. package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
  104. package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
  105. package/dist/mcp-R8kRLIKb.mjs.map +0 -1
  106. package/dist/models-DKULvhOr.mjs +0 -136
  107. package/dist/models-DKULvhOr.mjs.map +0 -1
  108. package/dist/models-De2-_GmQ.d.mts +0 -22
  109. package/dist/models-De2-_GmQ.d.mts.map +0 -1
  110. package/dist/skills-D3CEpgDc.mjs +0 -630
  111. package/dist/skills-D3CEpgDc.mjs.map +0 -1
  112. package/dist/types-BS1N92Jt.d.mts +0 -183
  113. package/dist/types-BS1N92Jt.d.mts.map +0 -1
  114. /package/dist/{chunk-Ct1HF2bE.mjs → chunk-CkXuGtQK.mjs} +0 -0
@@ -1,8 +1,8 @@
1
- import { execSync } from "child_process";
2
- import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from "fs";
3
- import { createServer } from "http";
4
- import { homedir } from "os";
5
- import { join } from "path";
1
+ import { execSync } from "node:child_process";
2
+ import { existsSync, mkdirSync, readFileSync, rmSync, unlinkSync, writeFileSync } from "node:fs";
3
+ import { createServer } from "node:http";
4
+ import { homedir } from "node:os";
5
+ import { join } from "node:path";
6
6
  import puppeteer from "puppeteer-core";
7
7
 
8
8
  //#region src/core/chrome-backend.ts
@@ -24,6 +24,29 @@ function getChromeCachedModels() {
24
24
  return [];
25
25
  }
26
26
  }
27
+ /** Fetch model context length from HuggingFace (config.json preferred for actual limit) */
28
+ async function fetchContextLength(modelId) {
29
+ try {
30
+ const res = await fetch(`https://huggingface.co/${modelId}/raw/main/config.json`);
31
+ if (res.ok) {
32
+ const config = await res.json();
33
+ const textConfig = config.text_config || {};
34
+ const ctxLen = config.max_position_embeddings || textConfig.max_position_embeddings || config.sliding_window || textConfig.sliding_window || config.max_seq_len || config.max_sequence_length || config.n_ctx || config.n_positions;
35
+ if (ctxLen) return ctxLen;
36
+ }
37
+ } catch {}
38
+ try {
39
+ const tokRes = await fetch(`https://huggingface.co/${modelId}/raw/main/tokenizer_config.json`);
40
+ if (tokRes.ok) {
41
+ const tokConfig = await tokRes.json();
42
+ if (tokConfig.model_max_length && tokConfig.model_max_length < 1e6) return tokConfig.model_max_length;
43
+ }
44
+ } catch {}
45
+ }
46
+ /** Get file size from HuggingFace tree entry (handles both regular and LFS files) */
47
+ function getFileSize(file) {
48
+ return file.lfs?.size || file.size || 0;
49
+ }
27
50
  /** Fetch model size from HuggingFace API */
28
51
  async function fetchModelSize(modelId) {
29
52
  try {
@@ -35,14 +58,18 @@ async function fetchModelSize(modelId) {
35
58
  const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
36
59
  const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
37
60
  const bestFile = q4f16 || q4 || fp16 || anyOnnx;
38
- if (bestFile?.size) return bestFile.size;
61
+ if (bestFile) {
62
+ const baseName = bestFile.path.replace(".onnx", "");
63
+ const totalSize = files.filter((f) => f.path === bestFile.path || f.path.startsWith(`${baseName}.onnx_data`)).reduce((sum, f) => sum + getFileSize(f), 0);
64
+ if (totalSize > 0) return totalSize;
65
+ }
39
66
  }
40
67
  const res = await fetch(`https://huggingface.co/api/models/${modelId}`);
41
68
  if (res.ok) return (await res.json()).usedStorage;
42
69
  } catch {}
43
70
  }
44
71
  /** Track a model as cached */
45
- function trackCachedModel(modelId, sizeBytes) {
72
+ function trackCachedModel(modelId, sizeBytes, contextLength) {
46
73
  try {
47
74
  const dir = join(homedir(), ".gerbil");
48
75
  if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
@@ -52,37 +79,42 @@ function trackCachedModel(modelId, sizeBytes) {
52
79
  if (existing) {
53
80
  existing.lastUsed = now;
54
81
  if (sizeBytes) existing.sizeBytes = sizeBytes;
82
+ if (contextLength) existing.contextLength = contextLength;
55
83
  } else models.push({
56
84
  modelId,
57
85
  downloadedAt: now,
58
86
  lastUsed: now,
59
- sizeBytes
87
+ sizeBytes,
88
+ contextLength
60
89
  });
61
90
  writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
62
- if (!sizeBytes) fetchModelSize(modelId).then((size) => {
63
- if (size) {
64
- const updatedModels = getChromeCachedModels();
65
- const model = updatedModels.find((m) => m.modelId === modelId);
66
- if (model) {
67
- model.sizeBytes = size;
68
- writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models: updatedModels }, null, 2));
69
- }
91
+ const needsSize = !(sizeBytes || existing?.sizeBytes);
92
+ const needsContext = !(contextLength || existing?.contextLength);
93
+ if (needsSize || needsContext) Promise.all([needsSize ? fetchModelSize(modelId) : Promise.resolve(void 0), needsContext ? fetchContextLength(modelId) : Promise.resolve(void 0)]).then(([size, context]) => {
94
+ const updatedModels = getChromeCachedModels();
95
+ const model = updatedModels.find((m) => m.modelId === modelId);
96
+ if (model) {
97
+ if (size) model.sizeBytes = size;
98
+ if (context) model.contextLength = context;
99
+ writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models: updatedModels }, null, 2));
70
100
  }
71
101
  }).catch(() => {});
72
102
  } catch {}
73
103
  }
74
- /** Refresh sizes for cached models that don't have them */
104
+ /** Refresh metadata (size, context length) for cached models that need it */
75
105
  async function refreshCachedModelSizes() {
76
106
  try {
77
107
  const models = getChromeCachedModels();
78
- const needsSize = models.filter((m) => !m.sizeBytes);
79
- if (needsSize.length === 0) return;
108
+ const MIN_EXPECTED_SIZE = 1e6;
109
+ const needsRefresh = models.filter((m) => !m.sizeBytes || m.sizeBytes < MIN_EXPECTED_SIZE || !m.contextLength);
110
+ if (needsRefresh.length === 0) return;
80
111
  const batchSize = 3;
81
- for (let i = 0; i < needsSize.length; i += batchSize) {
82
- const batch = needsSize.slice(i, i + batchSize);
112
+ for (let i = 0; i < needsRefresh.length; i += batchSize) {
113
+ const batch = needsRefresh.slice(i, i + batchSize);
83
114
  await Promise.all(batch.map(async (model) => {
84
- const size = await fetchModelSize(model.modelId);
115
+ const [size, context] = await Promise.all([!model.sizeBytes || model.sizeBytes < MIN_EXPECTED_SIZE ? fetchModelSize(model.modelId) : Promise.resolve(void 0), model.contextLength ? Promise.resolve(void 0) : fetchContextLength(model.modelId)]);
85
116
  if (size) model.sizeBytes = size;
117
+ if (context) model.contextLength = context;
86
118
  }));
87
119
  }
88
120
  writeFileSync(CACHED_MODELS_FILE, JSON.stringify({ models }, null, 2));
@@ -93,8 +125,10 @@ let globalBrowser = null;
93
125
  let globalBrowserPromise = null;
94
126
  let globalServer = null;
95
127
  let globalServerPort = 0;
128
+ let globalServerHtml = "";
96
129
  let activePagesCount = 0;
97
130
  const MAX_CONCURRENT_PAGES = 5;
131
+ const activeBackends = /* @__PURE__ */ new Set();
98
132
  const CHROME_PATHS = {
99
133
  darwin: [
100
134
  "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
@@ -138,7 +172,7 @@ function getChromeFlags(userDataDir, _debuggingPort) {
138
172
  else if (process.platform === "darwin") {} else flags.push("--enable-unsafe-webgpu");
139
173
  return flags;
140
174
  }
141
- function getWorkerPageHTML(modelPath) {
175
+ function getWorkerPageHTML(modelPath, contextLength = 32768, isVision = false) {
142
176
  return `
143
177
  <!DOCTYPE html>
144
178
  <html>
@@ -148,31 +182,67 @@ function getWorkerPageHTML(modelPath) {
148
182
  import {
149
183
  AutoTokenizer,
150
184
  AutoModelForCausalLM,
185
+ AutoProcessor,
186
+ AutoModelForImageTextToText,
187
+ RawImage,
151
188
  TextStreamer,
152
189
  InterruptableStoppingCriteria,
153
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.0";
190
+ env,
191
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.8.1";
192
+
193
+ // Enable IndexedDB caching (prevents re-downloading models)
194
+ env.useBrowserCache = true;
195
+ env.allowLocalModels = false;
154
196
 
197
+ const IS_VISION = ${isVision};
198
+
155
199
  class ModelPipeline {
156
200
  static tokenizer = null;
201
+ static processor = null;
157
202
  static model = null;
158
203
  static modelId = "${modelPath}";
204
+ static isVision = IS_VISION;
159
205
 
160
206
  static async getInstance(progressCallback) {
161
- if (!this.tokenizer) {
162
- this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId, {
163
- progress_callback: progressCallback,
164
- });
165
- }
166
-
167
- if (!this.model) {
168
- this.model = await AutoModelForCausalLM.from_pretrained(this.modelId, {
169
- dtype: "q4f16",
170
- device: "webgpu",
171
- progress_callback: progressCallback,
172
- });
207
+ if (this.isVision) {
208
+ // Vision model: use AutoProcessor + AutoModelForImageTextToText
209
+ if (!this.processor) {
210
+ this.processor = await AutoProcessor.from_pretrained(this.modelId, {
211
+ progress_callback: progressCallback,
212
+ });
213
+ }
214
+ if (!this.model) {
215
+ this.model = await AutoModelForImageTextToText.from_pretrained(this.modelId, {
216
+ device: "webgpu",
217
+ progress_callback: progressCallback,
218
+ });
219
+ }
220
+ return {
221
+ processor: this.processor,
222
+ tokenizer: this.processor.tokenizer,
223
+ model: this.model,
224
+ isVision: true
225
+ };
226
+ } else {
227
+ // Text model: use AutoTokenizer + AutoModelForCausalLM
228
+ if (!this.tokenizer) {
229
+ this.tokenizer = await AutoTokenizer.from_pretrained(this.modelId, {
230
+ progress_callback: progressCallback,
231
+ });
232
+ }
233
+ if (!this.model) {
234
+ this.model = await AutoModelForCausalLM.from_pretrained(this.modelId, {
235
+ dtype: "q4f16",
236
+ device: "webgpu",
237
+ progress_callback: progressCallback,
238
+ });
239
+ }
240
+ return {
241
+ tokenizer: this.tokenizer,
242
+ model: this.model,
243
+ isVision: false
244
+ };
173
245
  }
174
-
175
- return { tokenizer: this.tokenizer, model: this.model };
176
246
  }
177
247
  }
178
248
 
@@ -180,16 +250,15 @@ function getWorkerPageHTML(modelPath) {
180
250
  let pastKeyValuesCache = null;
181
251
  let totalTokensInCache = 0;
182
252
 
183
- // Context length for auto-reset (Qwen3 default: 2048)
184
- // Cache beyond this provides no benefit and wastes memory
185
- const CONTEXT_LENGTH = 2048;
253
+ // Context length for auto-reset (passed from model config)
254
+ const CONTEXT_LENGTH = ${contextLength};
186
255
 
187
256
  // Auto-load model on page init
188
257
  (async function() {
189
- console.log(JSON.stringify({ type: "progress", status: "Loading model..." }));
258
+ console.log(JSON.stringify({ type: "progress", status: IS_VISION ? "Loading vision model..." : "Loading model..." }));
190
259
 
191
260
  try {
192
- const { tokenizer, model } = await ModelPipeline.getInstance((progress) => {
261
+ const result = await ModelPipeline.getInstance((progress) => {
193
262
  if (progress.status === "progress" && progress.file) {
194
263
  console.log(JSON.stringify({
195
264
  type: "progress",
@@ -201,20 +270,62 @@ function getWorkerPageHTML(modelPath) {
201
270
  });
202
271
 
203
272
  console.log(JSON.stringify({ type: "progress", status: "Compiling shaders..." }));
204
- const warmupInputs = tokenizer("a");
205
- await model.generate({ ...warmupInputs, max_new_tokens: 1 });
206
-
207
- console.log(JSON.stringify({ type: "ready" }));
273
+
274
+ // Warmup generation to compile shaders and initialize model
275
+ // Always do text warmup first
276
+ const textWarmupInputs = result.tokenizer("hello");
277
+ await result.model.generate({ ...textWarmupInputs, max_new_tokens: 1 });
278
+
279
+ // Vision models also need vision warmup
280
+ if (result.isVision) {
281
+ console.log(JSON.stringify({ type: "progress", status: "Warming up vision encoder..." }));
282
+ try {
283
+ // Create a tiny 8x8 red test image
284
+ const canvas = new OffscreenCanvas(8, 8);
285
+ const ctx = canvas.getContext('2d');
286
+ ctx.fillStyle = 'red';
287
+ ctx.fillRect(0, 0, 8, 8);
288
+ const blob = await canvas.convertToBlob({ type: 'image/png' });
289
+ const warmupImage = await RawImage.fromBlob(blob);
290
+
291
+ // Process with vision pipeline
292
+ const warmupContent = [{ type: "image" }, { type: "text", text: "hi" }];
293
+ const warmupMessages = [{ role: "user", content: warmupContent }];
294
+ const warmupPrompt = result.processor.apply_chat_template(warmupMessages);
295
+ const warmupInputs = await result.processor(warmupImage, warmupPrompt, { add_special_tokens: false });
296
+
297
+ // Run vision warmup generation
298
+ await result.model.generate({
299
+ ...warmupInputs,
300
+ max_new_tokens: 1,
301
+ do_sample: false,
302
+ });
303
+ } catch {
304
+ // Vision warmup failed, text warmup was done so continue
305
+ }
306
+ }
307
+
308
+ // Set page title to model ID for cross-process identification
309
+ document.title = "Gerbil: " + ModelPipeline.modelId;
310
+
311
+ console.log(JSON.stringify({ type: "ready", isVision: result.isVision }));
208
312
  } catch (error) {
209
313
  console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
210
314
  }
211
315
  })();
212
316
 
317
+ // Text generation (for non-vision models or vision without images)
213
318
  window.gerbilGenerate = async function(messages, options = {}) {
214
- const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false } = options;
319
+ const { maxTokens = 256, temperature = 0.7, topP = 0.9, topK = 20, thinking = false, images = [] } = options;
320
+
321
+ const result = await ModelPipeline.getInstance();
322
+
323
+ // Route to vision generation if we have images and this is a vision model
324
+ if (images.length > 0 && result.isVision) {
325
+ return window.gerbilGenerateVision(messages, images, options);
326
+ }
215
327
 
216
328
  // Auto-reset KV cache if it exceeds context length
217
- // This prevents unbounded memory growth while preserving performance
218
329
  if (totalTokensInCache > CONTEXT_LENGTH) {
219
330
  console.log(JSON.stringify({
220
331
  type: "cache_reset",
@@ -227,7 +338,7 @@ function getWorkerPageHTML(modelPath) {
227
338
  }
228
339
 
229
340
  try {
230
- const { tokenizer, model } = await ModelPipeline.getInstance();
341
+ const { tokenizer, model } = result;
231
342
 
232
343
  const inputs = tokenizer.apply_chat_template(messages, {
233
344
  add_generation_prompt: true,
@@ -247,7 +358,7 @@ function getWorkerPageHTML(modelPath) {
247
358
 
248
359
  const tokenCallback = (tokens) => {
249
360
  startTime ??= performance.now();
250
- numTokens++;
361
+ numTokens += 1;
251
362
 
252
363
  const tokenId = Number(tokens[0]);
253
364
  if (tokenId === START_THINKING_TOKEN_ID) {
@@ -260,7 +371,6 @@ function getWorkerPageHTML(modelPath) {
260
371
  const streamCallback = (text) => {
261
372
  const tps = startTime ? (numTokens / (performance.now() - startTime)) * 1000 : 0;
262
373
 
263
- // Inject <think> markers when state changes (since skip_special_tokens removes them)
264
374
  let outputText = text;
265
375
  if (thinking) {
266
376
  if (state === "thinking" && prevState !== "thinking") {
@@ -298,14 +408,12 @@ function getWorkerPageHTML(modelPath) {
298
408
 
299
409
  pastKeyValuesCache = past_key_values;
300
410
 
301
- // Track total tokens in cache (input + generated)
302
411
  const inputLength = inputs.input_ids.dims[1];
303
412
  totalTokensInCache += inputLength + numTokens;
304
413
 
305
414
  const endTime = performance.now();
306
415
  const totalTime = startTime ? endTime - startTime : 0;
307
416
 
308
- // Extract only the generated tokens (exclude input prompt)
309
417
  const generatedTokens = sequences.slice(null, [inputLength, null]);
310
418
  const decoded = tokenizer.batch_decode(generatedTokens, { skip_special_tokens: true });
311
419
 
@@ -325,6 +433,99 @@ function getWorkerPageHTML(modelPath) {
325
433
  }
326
434
  };
327
435
 
436
+ // Vision generation (for vision models with images)
437
+ window.gerbilGenerateVision = async function(messages, imageUrls, options = {}) {
438
+ const { maxTokens = 2048, temperature = 0.7, topP = 0.9, topK = 20 } = options;
439
+
440
+ try {
441
+ const { processor, tokenizer, model } = await ModelPipeline.getInstance();
442
+
443
+ // Build message content with image placeholders for the user prompt
444
+ const lastMessage = messages[messages.length - 1];
445
+ const content = [];
446
+ for (let i = 0; i < imageUrls.length; i += 1) {
447
+ content.push({ type: "image" });
448
+ }
449
+ content.push({ type: "text", text: lastMessage.content });
450
+
451
+ // For vision models, include a brief system instruction for concise responses
452
+ const visionMessages = [
453
+ { role: "system", content: "You are a helpful assistant. Be concise and direct in your responses." },
454
+ { role: "user", content }
455
+ ];
456
+
457
+ // Apply chat template with generation prompt
458
+ const chatPrompt = processor.apply_chat_template(visionMessages, {
459
+ add_generation_prompt: true
460
+ });
461
+
462
+ // Load images
463
+ console.log(JSON.stringify({ type: "progress", status: "Loading images..." }));
464
+ const loadedImages = await Promise.all(
465
+ imageUrls.map(url => RawImage.fromURL(url))
466
+ );
467
+
468
+ // Process inputs
469
+ const inputs = await processor(
470
+ loadedImages.length === 1 ? loadedImages[0] : loadedImages,
471
+ chatPrompt,
472
+ { add_special_tokens: false }
473
+ );
474
+
475
+ let startTime = null;
476
+ let numTokens = 0;
477
+
478
+ const streamCallback = (text) => {
479
+ startTime ??= performance.now();
480
+ numTokens += 1;
481
+ const tps = (numTokens / (performance.now() - startTime)) * 1000;
482
+ console.log(JSON.stringify({ type: "token", text, state: "answering", numTokens, tps }));
483
+ };
484
+
485
+ const streamer = new TextStreamer(tokenizer, {
486
+ skip_prompt: true,
487
+ skip_special_tokens: true,
488
+ callback_function: streamCallback,
489
+ });
490
+
491
+ console.log(JSON.stringify({ type: "start" }));
492
+
493
+ const outputs = await model.generate({
494
+ ...inputs,
495
+ max_new_tokens: maxTokens,
496
+ do_sample: temperature > 0,
497
+ temperature: temperature > 0 ? temperature : undefined,
498
+ top_p: topP,
499
+ top_k: topK,
500
+ streamer,
501
+ stopping_criteria: stoppingCriteria,
502
+ });
503
+
504
+ // Decode output (skip prompt)
505
+ const inputLength = inputs.input_ids.dims?.at(-1) || 0;
506
+ const decoded = processor.batch_decode(
507
+ outputs.slice(null, [inputLength, null]),
508
+ { skip_special_tokens: true }
509
+ );
510
+
511
+ const endTime = performance.now();
512
+ const totalTime = startTime ? endTime - startTime : 0;
513
+
514
+ console.log(JSON.stringify({
515
+ type: "complete",
516
+ text: decoded[0] || "",
517
+ numTokens,
518
+ totalTime,
519
+ tps: totalTime > 0 ? (numTokens / totalTime) * 1000 : 0,
520
+ }));
521
+
522
+ return decoded[0] || "";
523
+ } catch (error) {
524
+ console.log(JSON.stringify({ type: "error", error: error.message || String(error) }));
525
+ throw error;
526
+ }
527
+ };
528
+
328
529
  window.gerbilInterrupt = function() {
329
530
  stoppingCriteria.interrupt();
330
531
  };
@@ -351,31 +552,84 @@ var ChromeGPUBackend = class ChromeGPUBackend {
351
552
  browser = null;
352
553
  page = null;
353
554
  cdp = null;
555
+ server = null;
354
556
  serverPort = 0;
355
557
  userDataDir = GERBIL_CACHE_DIR;
356
558
  modelId;
357
559
  isReady = false;
560
+ isVisionModel = false;
358
561
  messageHandlers = /* @__PURE__ */ new Map();
359
562
  pendingRejects = [];
360
- server = null;
361
- constructor(modelId) {
563
+ constructor(modelId, isVision = false) {
362
564
  this.modelId = modelId;
565
+ this.isVisionModel = isVision;
363
566
  }
364
567
  /**
365
568
  * Create and initialize a Chrome GPU backend
366
569
  */
367
570
  static async create(options = {}) {
368
- const backend = new ChromeGPUBackend(options.modelId || "onnx-community/Qwen3-0.6B-ONNX");
571
+ const modelId = options.modelId || "onnx-community/Qwen3-0.6B-ONNX";
572
+ const backend = new ChromeGPUBackend(modelId, options.isVision ?? ChromeGPUBackend.detectVisionModel(modelId));
369
573
  await backend.launch(options);
370
574
  return backend;
371
575
  }
372
576
  /**
577
+ * Detect if a model is a vision model based on its ID
578
+ */
579
+ static detectVisionModel(modelId) {
580
+ return [
581
+ /ministral/i,
582
+ /pixtral/i,
583
+ /llava/i,
584
+ /vision/i,
585
+ /vl/i,
586
+ /image-text/i,
587
+ /multimodal/i
588
+ ].some((pattern) => pattern.test(modelId));
589
+ }
590
+ /**
591
+ * Check if this backend is for a vision model
592
+ */
593
+ isVision() {
594
+ return this.isVisionModel;
595
+ }
596
+ /**
597
+ * Clean up orphan Gerbil pages from previous sessions
598
+ * These are pages that were left behind when process exited without proper cleanup
599
+ */
600
+ async cleanupOrphanPages(browser, options) {
601
+ try {
602
+ const gerbilPages = (await browser.pages()).filter((p) => {
603
+ const url = p.url();
604
+ return /127\.0\.0\.1:4\d{4}/.test(url);
605
+ });
606
+ const orphanCount = gerbilPages.length - activeBackends.size;
607
+ if (orphanCount > 0) {
608
+ options.onProgress?.({ status: `Cleaning up ${orphanCount} orphan page(s)...` });
609
+ for (const page of gerbilPages) {
610
+ let isOwned = false;
611
+ for (const backend of activeBackends) if (backend.page === page) {
612
+ isOwned = true;
613
+ break;
614
+ }
615
+ if (!isOwned) try {
616
+ await page.close();
617
+ } catch {}
618
+ }
619
+ }
620
+ return orphanCount;
621
+ } catch {
622
+ return 0;
623
+ }
624
+ }
625
+ /**
373
626
  * Get existing browser or launch a new one (singleton pattern)
374
627
  * Multiple Gerbil instances share the same browser process
375
628
  */
376
629
  async getOrCreateBrowser(chromePath, options) {
377
630
  if (globalBrowser?.connected) {
378
631
  options.onProgress?.({ status: "Reusing existing Chrome..." });
632
+ await this.cleanupOrphanPages(globalBrowser, options);
379
633
  return globalBrowser;
380
634
  }
381
635
  if (globalBrowserPromise) {
@@ -386,6 +640,7 @@ var ChromeGPUBackend = class ChromeGPUBackend {
386
640
  const wsEndpoint = readFileSync(WS_ENDPOINT_FILE, "utf-8").trim();
387
641
  options.onProgress?.({ status: "Connecting to existing Chrome..." });
388
642
  globalBrowser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
643
+ await this.cleanupOrphanPages(globalBrowser, options);
389
644
  return globalBrowser;
390
645
  } catch {
391
646
  try {
@@ -443,16 +698,17 @@ var ChromeGPUBackend = class ChromeGPUBackend {
443
698
  const chromePath = options.chromePath || findChrome();
444
699
  this.userDataDir = GERBIL_CACHE_DIR;
445
700
  if (!existsSync(this.userDataDir)) mkdirSync(this.userDataDir, { recursive: true });
446
- const html = getWorkerPageHTML(this.modelId);
701
+ const contextLength = options.contextLength || 32768;
702
+ const html = getWorkerPageHTML(this.modelId, contextLength, this.isVisionModel);
447
703
  await this.startServer(html);
448
704
  options.onProgress?.({ status: "Starting Chrome..." });
449
705
  this.browser = await this.getOrCreateBrowser(chromePath, options);
450
706
  this.page = await this.browser.newPage();
451
707
  this.cdp = await this.page.createCDPSession();
452
- activePagesCount++;
708
+ activePagesCount += 1;
709
+ activeBackends.add(this);
453
710
  options.onProgress?.({ status: `Active pages: ${activePagesCount}/${MAX_CONCURRENT_PAGES}` });
454
711
  this.browser.on("disconnected", () => {
455
- console.error("[Chrome] Browser disconnected unexpectedly");
456
712
  this.isReady = false;
457
713
  this.browser = null;
458
714
  this.page = null;
@@ -470,13 +726,12 @@ var ChromeGPUBackend = class ChromeGPUBackend {
470
726
  if (text.length < 500 && !text.includes("Float32Array") && !text.includes("past_key_values")) {}
471
727
  }
472
728
  else if (event.type === "error" || event.type === "warning") {
473
- if (!(text.includes("onnxruntime") || text.includes("content-length") || text.includes("Float32Array") || text.includes("past_key_values")) && text.length < 1e3) console.error(`[Chrome ${event.type}]`, text);
729
+ if (!(text.includes("onnxruntime") || text.includes("content-length") || text.includes("Float32Array") || text.includes("past_key_values")) && text.length < 1e3) {}
474
730
  }
475
731
  });
476
732
  this.cdp.on("Runtime.exceptionThrown", (event) => {
477
733
  const errText = event.exceptionDetails?.text || event.exceptionDetails?.exception?.description || "";
478
734
  if (errText.includes("Float32Array") || errText.includes("past_key_values") || errText.length > 1e3) return;
479
- console.error("[Chrome Exception]", errText);
480
735
  });
481
736
  await this.page.goto(`http://127.0.0.1:${this.serverPort}/`, {
482
737
  waitUntil: "domcontentloaded",
@@ -564,9 +819,7 @@ var ChromeGPUBackend = class ChromeGPUBackend {
564
819
  async checkMemoryAndCleanup(thresholdGB = 8) {
565
820
  const mem = await this.getMemoryUsage();
566
821
  if (!mem) return false;
567
- const usedGB = mem.jsHeapUsed / 1024 ** 3;
568
- if (usedGB > thresholdGB) {
569
- console.warn(`[Gerbil] Memory usage high (${usedGB.toFixed(1)}GB > ${thresholdGB}GB), clearing KV cache...`);
822
+ if (mem.jsHeapUsed / 1024 ** 3 > thresholdGB) {
570
823
  await this.reset();
571
824
  return true;
572
825
  }
@@ -597,15 +850,16 @@ var ChromeGPUBackend = class ChromeGPUBackend {
597
850
  content: prompt
598
851
  }];
599
852
  const genOptions = {
600
- maxTokens: options.maxTokens ?? 256,
853
+ maxTokens: options.maxTokens ?? (this.isVisionModel ? 2048 : 256),
601
854
  temperature: options.temperature ?? .7,
602
855
  topP: options.topP ?? .9,
603
856
  topK: options.topK ?? 20,
604
- thinking: options.thinking ?? false
857
+ thinking: options.thinking ?? false,
858
+ images: options.images ?? []
605
859
  };
606
860
  if (options.onToken) this.messageHandlers.set("token", options.onToken);
607
861
  try {
608
- const resultPromise = this.page.evaluate((msgs, opts) => window.gerbilGenerate(msgs, opts), messages, genOptions);
862
+ const resultPromise = this.page?.evaluate((msgs, opts) => window.gerbilGenerate(msgs, opts), messages, genOptions);
609
863
  const completeData = await this.waitForMessage("complete", 6e5);
610
864
  this.messageHandlers.delete("token");
611
865
  await resultPromise;
@@ -636,8 +890,10 @@ var ChromeGPUBackend = class ChromeGPUBackend {
636
890
  /**
637
891
  * Start or reuse the global HTTP server
638
892
  * Uses singleton pattern to prevent killing our own server
893
+ * Updates HTML content for new model loads
639
894
  */
640
895
  async startServer(html) {
896
+ globalServerHtml = html;
641
897
  if (globalServer && globalServerPort) {
642
898
  this.server = globalServer;
643
899
  this.serverPort = globalServerPort;
@@ -646,7 +902,7 @@ var ChromeGPUBackend = class ChromeGPUBackend {
646
902
  return new Promise((resolve, reject) => {
647
903
  const server = createServer((_req, res) => {
648
904
  res.writeHead(200, { "Content-Type": "text/html" });
649
- res.end(html);
905
+ res.end(globalServerHtml);
650
906
  });
651
907
  server.on("error", (err) => {
652
908
  if (err.code === "EADDRINUSE") {
@@ -667,21 +923,36 @@ var ChromeGPUBackend = class ChromeGPUBackend {
667
923
  /**
668
924
  * Dispose of the backend and clean up
669
925
  * Note: We keep the shared browser running for other backends
926
+ * @param disconnect If true, also disconnect from shared browser (for clean script exit)
670
927
  */
671
- async dispose() {
928
+ async dispose(disconnect = false) {
672
929
  this.isReady = false;
673
930
  this.pendingRejects = [];
674
931
  this.messageHandlers.clear();
932
+ if (this.cdp) {
933
+ try {
934
+ await this.cdp.detach();
935
+ } catch {}
936
+ this.cdp = null;
937
+ }
675
938
  if (this.page) {
676
939
  try {
677
- await this.page.close();
940
+ await this.page.goto("about:blank").catch(() => {});
941
+ await new Promise((r) => setTimeout(r, 50));
942
+ await this.page.close({ runBeforeUnload: false });
678
943
  activePagesCount = Math.max(0, activePagesCount - 1);
679
944
  } catch {}
680
945
  this.page = null;
681
946
  }
682
- this.cdp = null;
947
+ activeBackends.delete(this);
683
948
  this.browser = null;
684
949
  this.server = null;
950
+ if (disconnect) await new Promise((r) => setTimeout(r, 100));
951
+ if (disconnect && activeBackends.size === 0 && globalBrowser) try {
952
+ globalBrowser.disconnect();
953
+ globalBrowser = null;
954
+ globalBrowserPromise = null;
955
+ } catch {}
685
956
  }
686
957
  /**
687
958
  * Reject all pending waits (called on browser disconnect or dispose)
@@ -716,6 +987,176 @@ var ChromeGPUBackend = class ChromeGPUBackend {
716
987
  };
717
988
  }
718
989
  /**
990
+ * Get global browser status (even if no active backends)
991
+ */
992
+ static getGlobalBrowserStatus() {
993
+ let pid = null;
994
+ let wsEndpoint = null;
995
+ if (globalBrowser?.connected) {
996
+ const browserProcess = globalBrowser.process?.();
997
+ if (browserProcess?.pid) pid = browserProcess.pid;
998
+ wsEndpoint = globalBrowser.wsEndpoint();
999
+ }
1000
+ return {
1001
+ running: globalBrowser?.connected ?? false,
1002
+ pid,
1003
+ port: globalServerPort,
1004
+ activePagesCount,
1005
+ maxPages: MAX_CONCURRENT_PAGES,
1006
+ wsEndpoint
1007
+ };
1008
+ }
1009
+ /**
1010
+ * Get total page count from Chrome (all processes)
1011
+ */
1012
+ static async getTotalPageCount() {
1013
+ if (!globalBrowser?.connected) return 0;
1014
+ try {
1015
+ return (await globalBrowser.pages()).filter((p) => {
1016
+ return p.url().includes(`127.0.0.1:${globalServerPort}`);
1017
+ }).length;
1018
+ } catch {
1019
+ return 0;
1020
+ }
1021
+ }
1022
+ /**
1023
+ * Get all active backends with their memory usage (this process only)
1024
+ */
1025
+ static async getAllBackendsInfo() {
1026
+ const results = [];
1027
+ for (const backend of activeBackends) {
1028
+ const mem = await backend.getMemoryStats();
1029
+ results.push({
1030
+ modelId: backend.modelId,
1031
+ isVision: backend.isVisionModel,
1032
+ isReady: backend.isReady,
1033
+ memory: mem
1034
+ });
1035
+ }
1036
+ return results;
1037
+ }
1038
+ /**
1039
+ * Get ALL pages in Chrome browser (cross-process visibility)
1040
+ * This shows pages from ALL Gerbil processes sharing the browser
1041
+ */
1042
+ static async getAllChromePages() {
1043
+ if (!globalBrowser?.connected) return [];
1044
+ try {
1045
+ const pages = await globalBrowser.pages();
1046
+ const results = [];
1047
+ for (const page of pages) {
1048
+ const url = page.url();
1049
+ const title = await page.title().catch(() => "");
1050
+ if (url === "about:blank" || !url.includes(`127.0.0.1:${globalServerPort}`)) continue;
1051
+ let modelId = null;
1052
+ let isOurs = false;
1053
+ let memory = null;
1054
+ for (const backend of activeBackends) if (backend.page === page) {
1055
+ isOurs = true;
1056
+ modelId = backend.modelId;
1057
+ const mem = await backend.getMemoryStats();
1058
+ if (mem) memory = {
1059
+ usedGB: mem.usedGB,
1060
+ totalGB: mem.totalGB
1061
+ };
1062
+ break;
1063
+ }
1064
+ if (!isOurs) {
1065
+ if (title.startsWith("Gerbil: ")) modelId = title.replace("Gerbil: ", "");
1066
+ try {
1067
+ const cdp = await page.createCDPSession();
1068
+ await cdp.send("Performance.enable");
1069
+ const { metrics } = await cdp.send("Performance.getMetrics");
1070
+ const jsHeapUsed = metrics.find((m) => m.name === "JSHeapUsedSize")?.value ?? 0;
1071
+ const jsHeapTotal = metrics.find((m) => m.name === "JSHeapTotalSize")?.value ?? 0;
1072
+ memory = {
1073
+ usedGB: jsHeapUsed / 1024 ** 3,
1074
+ totalGB: jsHeapTotal / 1024 ** 3
1075
+ };
1076
+ await cdp.detach();
1077
+ } catch {}
1078
+ }
1079
+ results.push({
1080
+ url,
1081
+ title: title || "Gerbil WebGPU Backend",
1082
+ isOurs,
1083
+ modelId,
1084
+ memory
1085
+ });
1086
+ }
1087
+ return results;
1088
+ } catch {
1089
+ return [];
1090
+ }
1091
+ }
1092
+ /**
1093
+ * Kill a Chrome page by index (works cross-process)
1094
+ */
1095
+ static async killPageByIndex(index) {
1096
+ if (!globalBrowser?.connected) return false;
1097
+ try {
1098
+ const gerbilPages = (await globalBrowser.pages()).filter((p) => {
1099
+ return p.url().includes(`127.0.0.1:${globalServerPort}`);
1100
+ });
1101
+ if (index < 0 || index >= gerbilPages.length) return false;
1102
+ const page = gerbilPages[index];
1103
+ for (const backend of activeBackends) if (backend.page === page) {
1104
+ await backend.dispose();
1105
+ return true;
1106
+ }
1107
+ await page.close();
1108
+ return true;
1109
+ } catch {
1110
+ return false;
1111
+ }
1112
+ }
1113
+ /**
1114
+ * Kill a specific backend by index (this process only)
1115
+ */
1116
+ static async killBackendByIndex(index) {
1117
+ const backends = [...activeBackends];
1118
+ if (index < 0 || index >= backends.length) return false;
1119
+ const backend = backends[index];
1120
+ try {
1121
+ await backend.dispose();
1122
+ return true;
1123
+ } catch {
1124
+ return false;
1125
+ }
1126
+ }
1127
+ /**
1128
+ * Force kill all backends (for zombie cleanup)
1129
+ */
1130
+ static async killAllBackends() {
1131
+ const count = activeBackends.size;
1132
+ for (const backend of [...activeBackends]) try {
1133
+ await backend.dispose();
1134
+ } catch {}
1135
+ activeBackends.clear();
1136
+ let browserKilled = false;
1137
+ if (globalBrowser) {
1138
+ try {
1139
+ await globalBrowser.close();
1140
+ browserKilled = true;
1141
+ } catch {}
1142
+ globalBrowser = null;
1143
+ globalBrowserPromise = null;
1144
+ }
1145
+ if (globalServer) {
1146
+ globalServer.close();
1147
+ globalServer = null;
1148
+ globalServerPort = 0;
1149
+ }
1150
+ activePagesCount = 0;
1151
+ try {
1152
+ unlinkSync(WS_ENDPOINT_FILE);
1153
+ } catch {}
1154
+ return {
1155
+ pagesKilled: count,
1156
+ browserKilled
1157
+ };
1158
+ }
1159
+ /**
719
1160
  * Gracefully close the shared browser (call on process exit)
720
1161
  */
721
1162
  static async closeSharedBrowser() {
@@ -768,4 +1209,4 @@ registerCleanup();
768
1209
 
769
1210
  //#endregion
770
1211
  export { trackCachedModel as i, getChromeCachedModels as n, refreshCachedModelSizes as r, ChromeGPUBackend as t };
771
- //# sourceMappingURL=chrome-backend-C5Un08O4.mjs.map
1212
+ //# sourceMappingURL=chrome-backend-CORwaIyC.mjs.map