@tryhamster/gerbil 1.0.0-rc.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +247 -84
  3. package/dist/architectures-C1I5V3Dt.mjs +6070 -0
  4. package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
  5. package/dist/browser/index.d.ts +264 -588
  6. package/dist/browser/index.d.ts.map +1 -1
  7. package/dist/browser/index.js +585 -2334
  8. package/dist/browser/index.js.map +1 -1
  9. package/dist/cli.mjs +625 -1098
  10. package/dist/cli.mjs.map +1 -1
  11. package/dist/defaults-9komdrbY.mjs +24 -0
  12. package/dist/defaults-9komdrbY.mjs.map +1 -0
  13. package/dist/frameworks/express.d.mts +1 -3
  14. package/dist/frameworks/express.d.mts.map +1 -1
  15. package/dist/frameworks/express.mjs +7 -7
  16. package/dist/frameworks/express.mjs.map +1 -1
  17. package/dist/frameworks/fastify.d.mts +1 -1
  18. package/dist/frameworks/fastify.d.mts.map +1 -1
  19. package/dist/frameworks/fastify.mjs +3 -3
  20. package/dist/frameworks/fastify.mjs.map +1 -1
  21. package/dist/frameworks/hono.d.mts +1 -1
  22. package/dist/frameworks/hono.d.mts.map +1 -1
  23. package/dist/frameworks/hono.mjs +4 -4
  24. package/dist/frameworks/hono.mjs.map +1 -1
  25. package/dist/frameworks/next.d.mts +3 -2
  26. package/dist/frameworks/next.d.mts.map +1 -1
  27. package/dist/frameworks/next.mjs +4 -4
  28. package/dist/frameworks/next.mjs.map +1 -1
  29. package/dist/frameworks/react.d.mts +1 -1
  30. package/dist/frameworks/trpc.d.mts +1 -1
  31. package/dist/frameworks/trpc.d.mts.map +1 -1
  32. package/dist/frameworks/trpc.mjs +4 -4
  33. package/dist/frameworks/trpc.mjs.map +1 -1
  34. package/dist/gerbil-BHrJJIa4.mjs +1656 -0
  35. package/dist/gerbil-BHrJJIa4.mjs.map +1 -0
  36. package/dist/gerbil-BT9fCydo.d.mts +488 -0
  37. package/dist/gerbil-BT9fCydo.d.mts.map +1 -0
  38. package/dist/gerbil-DomNfIr1.mjs +4 -0
  39. package/dist/gpu/hooks.d.mts +520 -0
  40. package/dist/gpu/hooks.d.mts.map +1 -0
  41. package/dist/gpu/hooks.mjs +1188 -0
  42. package/dist/gpu/hooks.mjs.map +1 -0
  43. package/dist/gpu/index.d.mts +2 -0
  44. package/dist/gpu/index.mjs +6 -0
  45. package/dist/gpu-33qCAtHW.mjs +3615 -0
  46. package/dist/gpu-33qCAtHW.mjs.map +1 -0
  47. package/dist/index-Dgmb2kE3.d.mts +245 -0
  48. package/dist/index-Dgmb2kE3.d.mts.map +1 -0
  49. package/dist/index-jEAL2s-A.d.mts +2022 -0
  50. package/dist/index-jEAL2s-A.d.mts.map +1 -0
  51. package/dist/index.d.mts +22 -487
  52. package/dist/index.d.mts.map +1 -1
  53. package/dist/index.mjs +13 -8
  54. package/dist/index.mjs.map +1 -1
  55. package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
  56. package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
  57. package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
  58. package/dist/integrations/ai-sdk.d.mts +75 -6
  59. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  60. package/dist/integrations/ai-sdk.mjs +131 -15
  61. package/dist/integrations/ai-sdk.mjs.map +1 -1
  62. package/dist/integrations/langchain.d.mts +1 -1
  63. package/dist/integrations/langchain.d.mts.map +1 -1
  64. package/dist/integrations/langchain.mjs +5 -5
  65. package/dist/integrations/langchain.mjs.map +1 -1
  66. package/dist/integrations/llamaindex.d.mts +1 -1
  67. package/dist/integrations/llamaindex.d.mts.map +1 -1
  68. package/dist/integrations/llamaindex.mjs +5 -5
  69. package/dist/integrations/llamaindex.mjs.map +1 -1
  70. package/dist/integrations/mcp-client.mjs +3 -3
  71. package/dist/integrations/mcp-client.mjs.map +1 -1
  72. package/dist/integrations/mcp.d.mts +3 -2
  73. package/dist/integrations/mcp.d.mts.map +1 -1
  74. package/dist/integrations/mcp.mjs +5 -5
  75. package/dist/{mcp-BvbriaBy.mjs → mcp-1DaMsaBc.mjs} +4 -4
  76. package/dist/mcp-1DaMsaBc.mjs.map +1 -0
  77. package/dist/memory/index.d.mts +3 -0
  78. package/dist/memory/index.mjs +6 -0
  79. package/dist/memory-D1P7Tmda.mjs +4 -0
  80. package/dist/memory-DVN0MnIG.mjs +132 -0
  81. package/dist/memory-DVN0MnIG.mjs.map +1 -0
  82. package/dist/memory-Dj0J1v88.mjs +294 -0
  83. package/dist/memory-Dj0J1v88.mjs.map +1 -0
  84. package/dist/moonshine-stt-BLyVoRpB.mjs +4 -0
  85. package/dist/moonshine-stt-v_P_Ci_m.mjs +11936 -0
  86. package/dist/moonshine-stt-v_P_Ci_m.mjs.map +1 -0
  87. package/dist/{one-liner-s-lD8rCC.mjs → one-liner-DnQn7HJK.mjs} +14 -16
  88. package/dist/one-liner-DnQn7HJK.mjs.map +1 -0
  89. package/dist/repl-jV5gcJFA.mjs +9 -0
  90. package/dist/skills/index.d.mts +270 -320
  91. package/dist/skills/index.d.mts.map +1 -1
  92. package/dist/skills/index.mjs +5 -5
  93. package/dist/{skills-CD3Orlex.mjs → skills-DX8D59UH.mjs} +187 -32
  94. package/dist/skills-DX8D59UH.mjs.map +1 -0
  95. package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
  96. package/dist/tools-DQ1mPUw5.mjs.map +1 -0
  97. package/dist/{types-CiTc7ez3.d.mts → types-D6FiR_oh.d.mts} +106 -12
  98. package/dist/types-D6FiR_oh.d.mts.map +1 -0
  99. package/dist/types-DQBe2lFo.d.mts +165 -0
  100. package/dist/types-DQBe2lFo.d.mts.map +1 -0
  101. package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
  102. package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
  103. package/dist/vector-B0panuy6.mjs +95 -0
  104. package/dist/vector-B0panuy6.mjs.map +1 -0
  105. package/docs/PROJECT-STATE.md +321 -0
  106. package/docs/adding-a-model-family.md +280 -0
  107. package/docs/ai-sdk.md +70 -61
  108. package/docs/architecture/overview.md +17 -7
  109. package/docs/browser.md +203 -8
  110. package/docs/embeddings.md +156 -0
  111. package/docs/gerbil-site-native-migration.md +217 -0
  112. package/docs/gpu-engine/architectures.md +398 -0
  113. package/docs/gpu-engine/ir.md +372 -0
  114. package/docs/gpu-engine/kernels.md +718 -0
  115. package/docs/gpu-engine/paper.html +1759 -0
  116. package/docs/gpu-engine/paper.md +2109 -0
  117. package/docs/gpu-engine/safetensors.md +312 -0
  118. package/docs/gpu-engine/tokenizer.md +302 -0
  119. package/docs/memory-rag.md +91 -0
  120. package/docs/metal-safari-intel.md +190 -0
  121. package/docs/mobile-failure-diagnosis.md +124 -0
  122. package/docs/mobile.md +99 -0
  123. package/docs/observability.md +230 -0
  124. package/docs/onnx-removal-plan.md +339 -0
  125. package/docs/research/autoresearch-portable.md +904 -0
  126. package/docs/research/dispatch-reduction-hivemind.md +84 -0
  127. package/docs/research/ios-safari-model-caching.md +117 -0
  128. package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
  129. package/docs/research/native-stt-model-selection.md +49 -0
  130. package/docs/research/native-tts-model-selection.md +90 -0
  131. package/docs/research/native-vs-chromium-decision.md +152 -0
  132. package/docs/research/nemotron-mamba2-inference.md +910 -0
  133. package/docs/research/qwen35-multimodal.md +293 -0
  134. package/docs/research/qwen36-gemma4-targets.md +337 -0
  135. package/docs/research/sota-embedding-models.md +179 -0
  136. package/docs/research/sota-mobile-models-2026.md +263 -0
  137. package/docs/research/sota-modality-models.md +202 -0
  138. package/docs/research/tps-baselines.md +71 -0
  139. package/docs/research/webgpu-m4-reference.md +104 -0
  140. package/docs/site-update-plan.md +155 -0
  141. package/docs/structured-output.md +123 -0
  142. package/docs/stt.md +63 -446
  143. package/docs/tts.md +77 -499
  144. package/docs/vision.md +100 -338
  145. package/package.json +22 -7
  146. package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
  147. package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
  148. package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
  149. package/dist/gerbil-CJ3ifloF.mjs +0 -4
  150. package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
  151. package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
  152. package/dist/gerbil-qOTe1nl2.d.mts +0 -431
  153. package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
  154. package/dist/kokoro-BNTb6egA.mjs +0 -20210
  155. package/dist/kokoro-BNTb6egA.mjs.map +0 -1
  156. package/dist/kokoro-DFRQ1OeM.js +0 -20212
  157. package/dist/kokoro-DFRQ1OeM.js.map +0 -1
  158. package/dist/mcp-BvbriaBy.mjs.map +0 -1
  159. package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
  160. package/dist/repl-DveXw36T.mjs +0 -9
  161. package/dist/skills-CD3Orlex.mjs.map +0 -1
  162. package/dist/stt-CpLYbGFd.mjs +0 -433
  163. package/dist/stt-CpLYbGFd.mjs.map +0 -1
  164. package/dist/stt-DRPLEEHB.mjs +0 -3
  165. package/dist/stt-Te8Qz-Ay.js +0 -433
  166. package/dist/stt-Te8Qz-Ay.js.map +0 -1
  167. package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
  168. package/dist/transformers.web-DokyH3rP.js +0 -3
  169. package/dist/transformers.web-M6mCnEYJ.js +0 -30382
  170. package/dist/transformers.web-M6mCnEYJ.js.map +0 -1
  171. package/dist/tts-C0xx3CtE.js +0 -724
  172. package/dist/tts-C0xx3CtE.js.map +0 -1
  173. package/dist/tts-DXgsKGCe.mjs +0 -3
  174. package/dist/tts-DeGANMNV.mjs +0 -730
  175. package/dist/tts-DeGANMNV.mjs.map +0 -1
  176. package/dist/types-CiTc7ez3.d.mts.map +0 -1
  177. /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
  178. /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
  179. /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
package/dist/cli.mjs CHANGED
@@ -1,17 +1,16 @@
1
1
  #!/usr/bin/env node
2
- import { t as __require } from "./chunk-CkXuGtQK.mjs";
3
- import { n as BUILTIN_MODELS, t as Gerbil } from "./gerbil-Dw4Qj77e.mjs";
4
- import { n as getChromeCachedModels, r as refreshCachedModelSizes } from "./chrome-backend-CORwaIyC.mjs";
5
- import "./utils-CZBZ8dgR.mjs";
6
- import "./one-liner-s-lD8rCC.mjs";
7
- import { D as listSkills, T as getSkillInfo, a as summarize, d as explain, k as useSkill, m as commit, s as review, v as loadProjectSkills } from "./skills-CD3Orlex.mjs";
8
- import { r as startMCPServer } from "./mcp-BvbriaBy.mjs";
9
- import { a as getToolDefinitions, c as setToolContext, i as getTool, n as executeToolCall, o as loadProjectTools, r as formatToolsForPrompt, s as parseToolCall } from "./tools-Bi1P7Xoy.mjs";
10
- import { exec, spawn, spawnSync } from "node:child_process";
2
+ import { t as __require } from "./chunk-B9cbKln6.mjs";
3
+ import { n as BUILTIN_MODELS, r as DEFAULT_MODEL, t as Gerbil } from "./gerbil-BHrJJIa4.mjs";
4
+ import "./utils-DKO55ZmZ.mjs";
5
+ import "./one-liner-DnQn7HJK.mjs";
6
+ import { n as isArchitectureSupported } from "./architectures-C1I5V3Dt.mjs";
7
+ import { A as useSkill, E as getSkillInfo, O as listSkills, a as summarize, f as explain, h as commit, s as review, y as loadProjectSkills } from "./skills-DX8D59UH.mjs";
8
+ import { r as startMCPServer } from "./mcp-1DaMsaBc.mjs";
9
+ import { a as getToolDefinitions, c as setToolContext, i as getTool, n as executeToolCall, o as loadProjectTools, r as formatToolsForPrompt, s as parseToolCall } from "./tools-DQ1mPUw5.mjs";
11
10
  import fs, { existsSync, readFileSync, unlinkSync } from "node:fs";
12
- import http from "node:http";
13
11
  import os, { tmpdir } from "node:os";
14
12
  import path, { join } from "node:path";
13
+ import { exec, spawn, spawnSync } from "node:child_process";
15
14
  import React, { useCallback, useEffect, useRef, useState } from "react";
16
15
  import chalk from "chalk";
17
16
  import { Command } from "commander";
@@ -22,9 +21,10 @@ import Spinner from "ink-spinner";
22
21
  import { Fragment, jsx, jsxs } from "react/jsx-runtime";
23
22
  import SelectInput from "ink-select-input";
24
23
  import TextInput from "ink-text-input";
24
+ import http from "node:http";
25
25
 
26
26
  //#region package.json
27
- var version = "1.0.0-rc.8";
27
+ var version = "1.0.0";
28
28
 
29
29
  //#endregion
30
30
  //#region src/cli/repl/auto-update.ts
@@ -123,120 +123,82 @@ function getGerbilModelsDir() {
123
123
  return path.join(process.cwd(), ".gerbil", "models");
124
124
  }
125
125
  /**
126
- * Get transformers.js cache directory (downloaded HF models)
126
+ * Get the native WebGPU engine cache directory (downloaded safetensors models).
127
+ * Layout: ~/.cache/gerbil/<repo>/<revision>/ — see src/gpu/model-loader.ts.
127
128
  */
128
- function getTransformersCacheDir() {
129
- const nodeModulesCache = path.join(process.cwd(), "node_modules", "@huggingface", "transformers", ".cache");
130
- if (fs.existsSync(nodeModulesCache)) return nodeModulesCache;
131
- return process.env.HF_HOME || process.env.TRANSFORMERS_CACHE || path.join(os.homedir(), ".cache", "huggingface", "hub");
129
+ function getNativeCacheDir() {
130
+ return path.join(os.homedir(), ".cache", "gerbil");
132
131
  }
133
132
  /**
134
- * Check if a model is cached locally (checks gerbil, transformers, and Chrome cache)
133
+ * Check if a model is cached locally for the native engine.
134
+ *
135
+ * The native loader stores each repo under ~/.cache/gerbil/<repo>/<revision>/
136
+ * (slashes in the repo replaced by underscores), plus the project-local
137
+ * .gerbil/models dir. A model counts as cached when a safetensors weight file
138
+ * is present somewhere under its directory.
135
139
  */
136
140
  function isModelCached(hfId) {
137
141
  const [org, model] = hfId.split("/");
138
142
  if (!(org && model)) return false;
139
- try {
140
- if (getChromeCachedModels().some((m) => m.modelId === hfId)) return true;
141
- } catch {}
142
- const cacheDirs = [getGerbilModelsDir(), getTransformersCacheDir()];
143
- for (const cacheDir of cacheDirs) {
144
- const possiblePaths = [
145
- path.join(cacheDir, org, model),
146
- path.join(cacheDir, hfId.replace("/", "--")),
147
- path.join(cacheDir, `models--${org}--${model}`)
148
- ];
149
- for (const p of possiblePaths) try {
150
- if (fs.existsSync(p)) {
151
- if (fs.statSync(p).isDirectory()) {
152
- if (fs.readdirSync(p, { recursive: true }).some((f) => {
153
- const fname = String(f).toLowerCase();
154
- return fname.endsWith(".onnx") || fname.endsWith("model.safetensors") || fname.includes("decoder_model") || fname.includes("encoder_model");
155
- })) return true;
156
- }
157
- }
158
- } catch {}
159
- }
160
- return false;
143
+ const hasSafetensors = (root) => {
144
+ try {
145
+ if (!fs.existsSync(root)) return false;
146
+ if (!fs.statSync(root).isDirectory()) return false;
147
+ return fs.readdirSync(root, { recursive: true }).some((f) => String(f).toLowerCase().endsWith(".safetensors"));
148
+ } catch {
149
+ return false;
150
+ }
151
+ };
152
+ if (hasSafetensors(path.join(getNativeCacheDir(), hfId.replace(/\//g, "_")))) return true;
153
+ const gerbilDir = getGerbilModelsDir();
154
+ return [
155
+ path.join(gerbilDir, org, model),
156
+ path.join(gerbilDir, hfId.replace(/\//g, "_")),
157
+ path.join(gerbilDir, hfId.replace("/", "--"))
158
+ ].some(hasSafetensors);
161
159
  }
162
160
  /**
163
161
  * Scan a single cache directory for models
164
162
  */
165
163
  function scanCacheDir(cacheDir, models) {
166
164
  let totalBytes = 0;
165
+ const measure = (root) => {
166
+ let total = 0;
167
+ let modelBytes = 0;
168
+ try {
169
+ const files = fs.readdirSync(root, { recursive: true });
170
+ for (const file of files) try {
171
+ const fileStat = fs.statSync(path.join(root, String(file)));
172
+ if (fileStat.isFile()) {
173
+ total += fileStat.size;
174
+ if (String(file).toLowerCase().endsWith(".safetensors")) modelBytes += fileStat.size;
175
+ }
176
+ } catch {}
177
+ } catch {}
178
+ return {
179
+ total,
180
+ modelBytes
181
+ };
182
+ };
167
183
  try {
168
184
  if (!fs.existsSync(cacheDir)) return 0;
169
- const entries = fs.readdirSync(cacheDir);
170
- for (const entry of entries) {
185
+ for (const entry of fs.readdirSync(cacheDir)) {
171
186
  const entryPath = path.join(cacheDir, entry);
172
- const entryStat = fs.statSync(entryPath);
173
- if (!entryStat.isDirectory()) continue;
174
- let modelName = entry;
175
- if (entry.startsWith("models--")) modelName = entry.replace("models--", "").replace("--", "/");
176
- const subEntries = fs.readdirSync(entryPath);
177
- for (const subEntry of subEntries) {
178
- const subPath = path.join(entryPath, subEntry);
179
- try {
180
- const subStat = fs.statSync(subPath);
181
- if (subStat.isDirectory()) {
182
- const fullModelName = `${entry}/${subEntry}`;
183
- let totalSize = 0;
184
- let modelFileSize = 0;
185
- try {
186
- const files = fs.readdirSync(subPath, { recursive: true });
187
- for (const file of files) try {
188
- const filePath = path.join(subPath, String(file));
189
- const fileStat = fs.statSync(filePath);
190
- if (fileStat.isFile()) {
191
- totalSize += fileStat.size;
192
- const fname = String(file).toLowerCase();
193
- if (fname.endsWith(".onnx") || fname.endsWith(".safetensors")) modelFileSize += fileStat.size;
194
- }
195
- } catch {}
196
- } catch {
197
- totalSize = subStat.size;
198
- }
199
- if (totalSize > 0) {
200
- totalBytes += totalSize;
201
- const lastUsed = formatTimeAgo(subStat.mtime);
202
- models.push({
203
- name: fullModelName,
204
- modelSize: formatBytes(modelFileSize || totalSize),
205
- totalSize: formatBytes(totalSize),
206
- lastUsed,
207
- location: cacheDir
208
- });
209
- }
210
- }
211
- } catch {}
212
- }
213
- if (subEntries.some((f) => f.endsWith(".onnx") || f.endsWith(".json"))) {
214
- let totalSize = 0;
215
- let modelFileSize = 0;
216
- try {
217
- const files = fs.readdirSync(entryPath, { recursive: true });
218
- for (const file of files) try {
219
- const filePath = path.join(entryPath, String(file));
220
- const fileStat = fs.statSync(filePath);
221
- if (fileStat.isFile()) {
222
- totalSize += fileStat.size;
223
- const fname = String(file).toLowerCase();
224
- if (fname.endsWith(".onnx") || fname.endsWith(".safetensors")) modelFileSize += fileStat.size;
225
- }
226
- } catch {}
227
- } catch {
228
- totalSize = entryStat.size;
187
+ try {
188
+ const entryStat = fs.statSync(entryPath);
189
+ if (!entryStat.isDirectory()) continue;
190
+ const { total, modelBytes } = measure(entryPath);
191
+ if (total > 0) {
192
+ totalBytes += total;
193
+ models.push({
194
+ name: entry.replace(/_/g, "/"),
195
+ modelSize: formatBytes(modelBytes || total),
196
+ totalSize: formatBytes(total),
197
+ lastUsed: formatTimeAgo(entryStat.mtime),
198
+ location: cacheDir
199
+ });
229
200
  }
230
- totalBytes += totalSize;
231
- const lastUsed = formatTimeAgo(entryStat.mtime);
232
- models.push({
233
- name: modelName,
234
- modelSize: formatBytes(modelFileSize || totalSize),
235
- totalSize: formatBytes(totalSize),
236
- lastUsed,
237
- location: cacheDir
238
- });
239
- }
201
+ } catch {}
240
202
  }
241
203
  } catch {}
242
204
  return totalBytes;
@@ -246,32 +208,14 @@ function scanCacheDir(cacheDir, models) {
246
208
  */
247
209
  function getCacheInfo() {
248
210
  const gerbilDir = getGerbilModelsDir();
249
- const transformersDir = getTransformersCacheDir();
211
+ const nativeDir = getNativeCacheDir();
250
212
  const models = [];
251
213
  let totalBytes = 0;
252
214
  totalBytes += scanCacheDir(gerbilDir, models);
253
- totalBytes += scanCacheDir(transformersDir, models);
254
- let hasMissingSizes = false;
255
- try {
256
- const chromeCached = getChromeCachedModels();
257
- for (const entry of chromeCached) if (!models.some((m) => m.name === entry.modelId || m.name.replace("--", "/") === entry.modelId || entry.modelId.replace("/", "--") === m.name)) {
258
- const lastUsed = entry.lastUsed ? formatTimeAgo(new Date(entry.lastUsed)) : "unknown";
259
- if (!(entry.sizeBytes && entry.contextLength)) hasMissingSizes = true;
260
- models.push({
261
- name: entry.modelId,
262
- modelSize: entry.sizeBytes ? formatBytes(entry.sizeBytes) : "~",
263
- totalSize: entry.sizeBytes ? formatBytes(entry.sizeBytes) : "~",
264
- lastUsed,
265
- location: "Chrome IndexedDB",
266
- contextLength: entry.contextLength
267
- });
268
- if (entry.sizeBytes) totalBytes += entry.sizeBytes;
269
- }
270
- if (hasMissingSizes) refreshCachedModelSizes().catch(() => {});
271
- } catch {}
215
+ totalBytes += scanCacheDir(nativeDir, models);
272
216
  const activeLocations = [...new Set(models.map((m) => m.location))];
273
217
  return {
274
- locations: activeLocations.length > 0 ? activeLocations : [gerbilDir, transformersDir],
218
+ locations: activeLocations.length > 0 ? activeLocations : [gerbilDir, nativeDir],
275
219
  totalSize: formatBytes(totalBytes),
276
220
  models: models.sort((a, b) => a.name.localeCompare(b.name))
277
221
  };
@@ -304,50 +248,35 @@ function cleanResponse(text) {
304
248
  return text.replace(/<think>[\s\S]*?<\/think>/g, "").replace(/<\/?think>/g, "").trim();
305
249
  }
306
250
  /**
307
- * Preset models with known performance
251
+ * Preset models, native WebGPU engine only.
252
+ *
253
+ * The native engine loads standard HuggingFace safetensors repos and quantizes
254
+ * to INT4 on the fly (dtype "q4"); there is no ONNX involved. Every entry here
255
+ * is an architecture the engine supports (Qwen2/Qwen3/Qwen3.5, LFM2 — see
256
+ * src/gpu/architectures/index.ts). `hfId` is the safetensors repo the loader
257
+ * pulls and the key used for native cache detection.
308
258
  */
309
259
  const PRESET_MODELS = [
310
260
  {
311
- id: "qwen3-0.6b",
312
- name: "Qwen3 0.6B",
313
- size: "~400MB",
261
+ id: "qwen3.5-0.8b",
262
+ name: "Qwen3.5 0.8B",
263
+ size: "~500MB (q4)",
314
264
  speed: "fastest",
315
- hfId: "onnx-community/Qwen3-0.6B-ONNX"
265
+ hfId: "Qwen/Qwen3.5-0.8B"
316
266
  },
317
267
  {
318
- id: "qwen2.5-0.5b",
319
- name: "Qwen2.5 0.5B",
320
- size: "~350MB",
268
+ id: "qwen3.5-2b",
269
+ name: "Qwen3.5 2B",
270
+ size: "~1.3GB (q4)",
321
271
  speed: "fast",
322
- hfId: "onnx-community/Qwen2.5-0.5B-Instruct"
272
+ hfId: "Qwen/Qwen3.5-2B"
323
273
  },
324
274
  {
325
- id: "qwen2.5-coder-0.5b",
326
- name: "Qwen2.5 Coder",
327
- size: "~400MB",
275
+ id: "lfm2.5-1.2b-thinking",
276
+ name: "LFM2.5 1.2B Thinking",
277
+ size: "~760MB (q4)",
328
278
  speed: "fast",
329
- hfId: "onnx-community/Qwen2.5-Coder-0.5B-Instruct"
330
- },
331
- {
332
- id: "smollm2-360m",
333
- name: "SmolLM2 360M",
334
- size: "~250MB",
335
- speed: "fastest",
336
- hfId: "HuggingFaceTB/SmolLM2-360M-Instruct"
337
- },
338
- {
339
- id: "smollm2-135m",
340
- name: "SmolLM2 135M",
341
- size: "~100MB",
342
- speed: "fastest",
343
- hfId: "HuggingFaceTB/SmolLM2-135M-Instruct"
344
- },
345
- {
346
- id: "phi-3-mini",
347
- name: "Phi-3 Mini",
348
- size: "~2.1GB",
349
- speed: "medium",
350
- hfId: "microsoft/Phi-3-mini-4k-instruct-onnx"
279
+ hfId: "LiquidAI/LFM2.5-1.2B-Thinking"
351
280
  }
352
281
  ];
353
282
  /**
@@ -371,19 +300,12 @@ async function fetchModelMetadata(modelId) {
371
300
  }
372
301
  } catch {}
373
302
  try {
374
- const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main/onnx`);
303
+ const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main?recursive=true`);
375
304
  if (treeRes.ok) {
376
305
  const files = await treeRes.json();
377
306
  const getSize = (f) => f.lfs?.size || f.size || 0;
378
- const q4f16 = files.find((f) => f.path.includes("q4f16") && f.path.endsWith(".onnx"));
379
- const q4 = files.find((f) => f.path.includes("q4") && !f.path.includes("f16") && f.path.endsWith(".onnx"));
380
- const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
381
- const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
382
- const bestFile = q4f16 || q4 || fp16 || anyOnnx;
383
- if (bestFile) {
384
- const baseName = bestFile.path.replace(".onnx", "");
385
- result.sizeBytes = files.filter((f) => f.path === bestFile.path || f.path.startsWith(`${baseName}.onnx_data`)).reduce((sum, f) => sum + getSize(f), 0);
386
- }
307
+ const safetensors = files.filter((f) => f.path.toLowerCase().endsWith(".safetensors"));
308
+ if (safetensors.length > 0) result.sizeBytes = safetensors.reduce((sum, f) => sum + getSize(f), 0);
387
309
  }
388
310
  } catch {}
389
311
  return result;
@@ -421,12 +343,12 @@ function saveBenchmark(result) {
421
343
  */
422
344
  /**
423
345
  * Normalize model ID for matching (extract core name)
424
- * e.g., "HuggingFaceTB/SmolLM2-135M-Instruct" -> "smollm2-135m"
425
- * "onnx-community/Qwen3-0.6B-ONNX" -> "qwen3-0.6b"
426
- * "qwen3-0.6b" -> "qwen3-0.6b"
346
+ * e.g., "Qwen/Qwen3.5-0.8B" -> "qwen3.5-0.8b"
347
+ * "mlx-community/Qwen3.5-0.8B-4bit" -> "qwen3.5-0.8b"
348
+ * "qwen3.5-0.8b" -> "qwen3.5-0.8b"
427
349
  */
428
350
  function normalizeModelId(id) {
429
- return (id.split("/").pop() || id).toLowerCase().replace(/-instruct$/i, "").replace(/-onnx$/i, "").replace(/-chat$/i, "").replace(/-it$/i, "");
351
+ return (id.split("/").pop() || id).toLowerCase().replace(/-instruct$/i, "").replace(/-onnx$/i, "").replace(/-4bit$/i, "").replace(/-chat$/i, "").replace(/-it$/i, "");
430
352
  }
431
353
  /**
432
354
  * Process an image path (URL or local file) and return info for attaching
@@ -508,7 +430,7 @@ function getModelBenchmarkStats(modelId) {
508
430
 
509
431
  //#endregion
510
432
  //#region src/cli/repl/views/BenchmarkView.tsx
511
- function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchModel, onSwitchDevice, onStatusChange, benchmarkResults, setBenchmarkResults, benchmarkModels, setBenchmarkModels }) {
433
+ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchModel, onStatusChange, benchmarkResults, setBenchmarkResults, benchmarkModels, setBenchmarkModels }) {
512
434
  const [status, setStatus] = useState("idle");
513
435
  const [currentRun, setCurrentRun] = useState(benchmarkResults.length);
514
436
  const [showHistory, setShowHistory] = useState(false);
@@ -564,7 +486,6 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
564
486
  setBenchmarkModels([model]);
565
487
  }
566
488
  if ((input === "m" || input === "M") && !blocked) onSwitchModel?.();
567
- if ((input === "d" || input === "D") && !blocked) onSwitchDevice?.(currentDevice === "webgpu" ? "cpu" : "webgpu");
568
489
  if ((input === "h" || input === "H") && !blocked) {
569
490
  if (!showHistory) setHistoryData(getStoredBenchmarks());
570
491
  setShowHistory(!showHistory);
@@ -593,25 +514,30 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
593
514
  "List 3 benefits of TypeScript."
594
515
  ];
595
516
  const prompt = prompts[currentRun % prompts.length];
596
- const startTime = Date.now();
597
- let firstTokenTime = 0;
598
- let gotFirstToken = false;
517
+ const startTime = performance.now();
518
+ let firstTokenAt = 0;
519
+ let lastTokenAt = 0;
520
+ let tokenCount = 0;
599
521
  const result = await gerbil.generate(prompt, {
600
522
  maxTokens: 100,
601
523
  onToken: () => {
602
- if (!gotFirstToken) {
603
- firstTokenTime = Date.now() - startTime;
604
- gotFirstToken = true;
605
- }
524
+ const now = performance.now();
525
+ tokenCount++;
526
+ if (tokenCount === 1) firstTokenAt = now;
527
+ lastTokenAt = now;
606
528
  }
607
529
  });
530
+ const endTime = performance.now();
531
+ const hasTokenEvents = tokenCount > 0;
532
+ const firstTokenMs = hasTokenEvents ? Math.round(firstTokenAt - startTime) : Math.round(result.totalTime * .15);
533
+ const tokensPerSec = hasTokenEvents ? tokenCount > 1 && lastTokenAt > firstTokenAt ? Math.round(tokenCount / (lastTokenAt - firstTokenAt) * 1e3) : Math.round(tokenCount / (endTime - startTime) * 1e3) : Math.round(result.tokensGenerated / result.totalTime * 1e3);
608
534
  const newResult = {
609
535
  model,
610
536
  device: gerbil.getDeviceMode(),
611
- tokensPerSec: Math.round(result.tokensPerSecond),
612
- firstTokenMs: firstTokenTime || Math.round(result.totalTime * .1),
613
- totalTokens: result.tokensGenerated,
614
- totalMs: Math.round(result.totalTime)
537
+ tokensPerSec,
538
+ firstTokenMs,
539
+ totalTokens: hasTokenEvents ? tokenCount : result.tokensGenerated,
540
+ totalMs: Math.round(endTime - startTime)
615
541
  };
616
542
  saveBenchmark(newResult);
617
543
  const updatedResults = [...benchmarkResults, newResult];
@@ -621,13 +547,11 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
621
547
  onResult?.(newResult.tokensPerSec);
622
548
  };
623
549
  const currentResults = benchmarkResults.filter((r) => r.model === model && r.device === currentDevice);
624
- const modelStats = [...new Set(benchmarkResults.map((r) => `${r.model}|${r.device}`))].map((key) => {
625
- const [m, d] = key.split("|");
626
- const results = benchmarkResults.filter((r) => r.model === m && r.device === d);
550
+ const modelStats = [...new Set(benchmarkResults.map((r) => r.model))].map((m) => {
551
+ const results = benchmarkResults.filter((r) => r.model === m);
627
552
  if (results.length === 0) return null;
628
553
  return {
629
554
  model: m,
630
- device: d,
631
555
  runs: results.length,
632
556
  avgTokPerSec: Math.round(results.reduce((a, r) => a + r.tokensPerSec, 0) / results.length),
633
557
  avgFirstToken: Math.round(results.reduce((a, r) => a + r.firstTokenMs, 0) / results.length)
@@ -664,7 +588,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
664
588
  /* @__PURE__ */ jsx(Text, { children: " on " }),
665
589
  /* @__PURE__ */ jsx(Text, {
666
590
  bold: true,
667
- color: currentDevice === "webgpu" ? "green" : currentDevice === "cpu" ? "yellow" : "gray",
591
+ color: "green",
668
592
  children: currentDevice.toUpperCase()
669
593
  }),
670
594
  modelStats.length > 1 && /* @__PURE__ */ jsxs(Text, {
@@ -672,7 +596,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
672
596
  children: [
673
597
  " (comparing ",
674
598
  modelStats.length,
675
- " configs)"
599
+ " models)"
676
600
  ]
677
601
  })
678
602
  ] }),
@@ -735,22 +659,12 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
735
659
  color: "green",
736
660
  children: modelStats.length > 1 ? "Comparison:" : "Averages:"
737
661
  }), modelStats.map((s) => {
738
- const isCurrent = s.model === model && s.device === currentDevice;
739
- const dColor = s.device === "webgpu" ? "green" : s.device === "cpu" ? "yellow" : "gray";
740
662
  return /* @__PURE__ */ jsxs(Box, { children: [
741
663
  /* @__PURE__ */ jsxs(Text, { children: [
742
- isCurrent ? ">" : " ",
664
+ s.model === model ? ">" : " ",
743
665
  " ",
744
666
  s.model
745
667
  ] }),
746
- /* @__PURE__ */ jsxs(Text, {
747
- color: dColor,
748
- children: [
749
- " [",
750
- s.device,
751
- "]"
752
- ]
753
- }),
754
668
  /* @__PURE__ */ jsx(Text, { children: ": " }),
755
669
  /* @__PURE__ */ jsxs(Text, {
756
670
  bold: true,
@@ -774,7 +688,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
774
688
  color: "green",
775
689
  children: " *fastest*"
776
690
  })
777
- ] }, `${s.model}-${s.device}`);
691
+ ] }, s.model);
778
692
  })]
779
693
  }), bests && benchmarkResults.length >= 2 && /* @__PURE__ */ jsxs(Box, {
780
694
  borderColor: "magenta",
@@ -803,11 +717,8 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
803
717
  /* @__PURE__ */ jsxs(Text, {
804
718
  dimColor: true,
805
719
  children: [
806
- " ",
807
- "(",
720
+ " (",
808
721
  bests.fastestRun.model,
809
- " on ",
810
- bests.fastestRun.device,
811
722
  ")"
812
723
  ]
813
724
  })
@@ -827,11 +738,8 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
827
738
  /* @__PURE__ */ jsxs(Text, {
828
739
  dimColor: true,
829
740
  children: [
830
- " ",
831
- "(",
741
+ " (",
832
742
  bests.fastestFirstToken.model,
833
- " on ",
834
- bests.fastestFirstToken.device,
835
743
  ")"
836
744
  ]
837
745
  })
@@ -851,12 +759,9 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
851
759
  dimColor: true,
852
760
  children: [
853
761
  " ",
854
- "on ",
855
- bests.bestOverall.device,
856
- " (",
762
+ "(",
857
763
  bests.bestOverall.avgTokPerSec,
858
- " tok/s,",
859
- " ",
764
+ " tok/s, ",
860
765
  bests.bestOverall.avgFirstToken,
861
766
  "ms)"
862
767
  ]
@@ -917,9 +822,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
917
822
  " ",
918
823
  "- ",
919
824
  b.model,
920
- " [",
921
- b.device,
922
- "] ",
825
+ " ",
923
826
  new Date(b.timestamp).toLocaleDateString()
924
827
  ]
925
828
  })
@@ -960,14 +863,6 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
960
863
  children: "Enter"
961
864
  }),
962
865
  " run | ",
963
- /* @__PURE__ */ jsx(Text, {
964
- color: "yellow",
965
- children: "d"
966
- }),
967
- " switch",
968
- " ",
969
- currentDevice === "webgpu" ? "CPU" : "GPU",
970
- " | ",
971
866
  /* @__PURE__ */ jsx(Text, {
972
867
  color: "yellow",
973
868
  children: "m"
@@ -1667,9 +1562,7 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
1667
1562
  }, []);
1668
1563
  const modes = Object.keys(CHAT_MODES);
1669
1564
  const modelInfo = gerbil.getModelInfo();
1670
- const modelId = modelInfo?.repo;
1671
- const cachedModels = modelId ? getChromeCachedModels() : [];
1672
- const maxContext = (modelId ? cachedModels.find((m) => m.modelId === modelId) : null)?.contextLength || modelInfo?.contextLength || 32768;
1565
+ const maxContext = modelInfo?.contextLength || 32768;
1673
1566
  const currentTokens = estimateTokens(buildConversationContext(messages, mode));
1674
1567
  const contextPercent = Math.round(currentTokens / maxContext * 100);
1675
1568
  const ACTION_COUNT = supportsVision ? 5 : 4;
@@ -2035,11 +1928,9 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
2035
1928
  setInput("");
2036
1929
  try {
2037
1930
  await gerbil.clearCache();
2038
- const mem = await gerbil.getMemoryUsage();
2039
- const memInfo = mem ? ` (was ${mem.usedGB.toFixed(1)}GB)` : "";
2040
1931
  setMessages((m) => [...m, {
2041
1932
  role: "system",
2042
- content: `🧹 Cache cleared${memInfo}. Conversation context reset.`
1933
+ content: "🧹 Cache cleared. Conversation context reset."
2043
1934
  }]);
2044
1935
  } catch (err) {
2045
1936
  setMessages((m) => [...m, {
@@ -2052,14 +1943,11 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
2052
1943
  if (cmd === "memory" || cmd === "mem") {
2053
1944
  setInput("");
2054
1945
  try {
2055
- const mem = await gerbil.getMemoryUsage();
2056
- if (mem) setMessages((m) => [...m, {
2057
- role: "system",
2058
- content: `💾 Memory: ${mem.usedGB.toFixed(1)}GB / ${mem.totalGB.toFixed(1)}GB (${mem.usedPercent.toFixed(1)}%)`
2059
- }]);
2060
- else setMessages((m) => [...m, {
1946
+ const sys = getMemoryInfo();
1947
+ const rssMB = (process.memoryUsage().rss / 1024 / 1024).toFixed(0);
1948
+ setMessages((m) => [...m, {
2061
1949
  role: "system",
2062
- content: "Memory monitoring not available (CPU mode)"
1950
+ content: `💾 Process RSS: ${rssMB} MB · System: ${sys.used} / ${sys.total} (${sys.percentUsed}%)`
2063
1951
  }]);
2064
1952
  } catch (err) {
2065
1953
  setMessages((m) => [...m, {
@@ -3785,10 +3673,6 @@ const CAPABILITIES = [
3785
3673
  id: "text",
3786
3674
  name: "Text"
3787
3675
  },
3788
- {
3789
- id: "vision",
3790
- name: "Vision"
3791
- },
3792
3676
  {
3793
3677
  id: "tts",
3794
3678
  name: "Text to Speech"
@@ -3836,7 +3720,7 @@ const EXAMPLES = {
3836
3720
  code: `import { Gerbil } from "@tryhamster/gerbil";
3837
3721
 
3838
3722
  const g = new Gerbil();
3839
- await g.loadModel("qwen3-0.6b");
3723
+ await g.loadModel("qwen3.5-0.8b");
3840
3724
 
3841
3725
  // Generate text
3842
3726
  const result = await g.generate("Write a haiku", {
@@ -3863,13 +3747,13 @@ import { gerbil } from "@tryhamster/gerbil/ai";
3863
3747
 
3864
3748
  // Generate
3865
3749
  const { text } = await generateText({
3866
- model: gerbil("qwen3-0.6b"),
3750
+ model: gerbil("qwen3.5-0.8b"),
3867
3751
  prompt: "Explain quantum computing",
3868
3752
  });
3869
3753
 
3870
3754
  // Stream
3871
3755
  const { textStream } = streamText({
3872
- model: gerbil("qwen3-0.6b"),
3756
+ model: gerbil("qwen3.5-0.8b"),
3873
3757
  system: "You are a helpful assistant.",
3874
3758
  messages: [{ role: "user", content: "Hello!" }],
3875
3759
  });
@@ -3881,20 +3765,20 @@ for await (const chunk of textStream) {
3881
3765
  browser: {
3882
3766
  install: "npm install @tryhamster/gerbil",
3883
3767
  description: "React hooks for browser-based inference with WebGPU.",
3884
- code: `import { useChat, useCompletion } from "@tryhamster/gerbil/browser";
3768
+ code: `import { useChat, useCompletion } from "@tryhamster/gerbil/hooks";
3885
3769
 
3886
- // useChat - Full chat with message history
3770
+ // useChat - Full chat with message history (history is sent each turn)
3887
3771
  function Chat() {
3888
- const { messages, input, setInput, handleSubmit, isLoading } = useChat({
3889
- model: "qwen3-0.6b",
3772
+ const { messages, send, isGenerating, isLoading } = useChat({
3773
+ model: "qwen3.5-0.8b",
3890
3774
  thinking: true,
3891
3775
  });
3892
3776
  if (isLoading) return <div>Loading...</div>;
3893
3777
  return (
3894
- <form onSubmit={handleSubmit}>
3895
- {messages.map(m => <div key={m.id}>{m.role}: {m.content}</div>)}
3896
- <input value={input} onChange={e => setInput(e.target.value)} />
3897
- </form>
3778
+ <div>
3779
+ {messages.map((m, i) => <div key={i}>{m.role}: {m.content}</div>)}
3780
+ <button disabled={isGenerating} onClick={() => send("Hello!")}>Send</button>
3781
+ </div>
3898
3782
  );
3899
3783
  }
3900
3784
 
@@ -3912,7 +3796,7 @@ function Generator() {
3912
3796
  import { gerbil } from "@tryhamster/gerbil/next";
3913
3797
 
3914
3798
  export const POST = gerbil.handler({
3915
- model: "qwen3-0.6b",
3799
+ model: "qwen3.5-0.8b",
3916
3800
  system: "You are a helpful assistant.",
3917
3801
  });
3918
3802
 
@@ -3928,7 +3812,7 @@ import { gerbil } from "@tryhamster/gerbil/express";
3928
3812
 
3929
3813
  const app = express();
3930
3814
  app.use(express.json());
3931
- app.use("/api/ai", gerbil({ model: "qwen3-0.6b" })());
3815
+ app.use("/api/ai", gerbil({ model: "qwen3.5-0.8b" })());
3932
3816
 
3933
3817
  // POST /api/ai/generate { prompt, options }
3934
3818
  // POST /api/ai/stream { prompt, options } (SSE)
@@ -3944,7 +3828,7 @@ import { PromptTemplate } from "langchain/prompts";
3944
3828
  import { LLMChain } from "langchain/chains";
3945
3829
 
3946
3830
  const llm = new GerbilLLM({
3947
- model: "qwen3-0.6b",
3831
+ model: "qwen3.5-0.8b",
3948
3832
  temperature: 0.7,
3949
3833
  });
3950
3834
 
@@ -3953,135 +3837,6 @@ const chain = new LLMChain({ llm, prompt });
3953
3837
  const result = await chain.call({ text: "..." });`
3954
3838
  }
3955
3839
  },
3956
- vision: {
3957
- standalone: {
3958
- install: "npm install @tryhamster/gerbil",
3959
- description: "Vision models for image understanding and description.",
3960
- code: `import { Gerbil } from "@tryhamster/gerbil";
3961
-
3962
- const g = new Gerbil();
3963
- await g.loadModel("ministral-3b"); // Vision-capable model
3964
-
3965
- // Describe an image
3966
- const result = await g.generate("What's in this image?", {
3967
- images: [{ source: "https://example.com/photo.jpg" }]
3968
- });
3969
- console.log(result.text);
3970
-
3971
- // Compare images
3972
- const diff = await g.generate("What's different?", {
3973
- images: [
3974
- { source: "before.jpg" },
3975
- { source: "after.jpg" }
3976
- ]
3977
- });
3978
-
3979
- // Check if model supports vision
3980
- console.log(g.supportsVision()); // true`
3981
- },
3982
- "ai-sdk": {
3983
- install: "npm install @tryhamster/gerbil ai",
3984
- description: "AI SDK with image content parts.",
3985
- code: `import { generateText } from "ai";
3986
- import { gerbil } from "@tryhamster/gerbil/ai";
3987
-
3988
- const { text } = await generateText({
3989
- model: gerbil("ministral-3b"),
3990
- messages: [{
3991
- role: "user",
3992
- content: [
3993
- { type: "image", image: new URL("https://example.com/photo.jpg") },
3994
- { type: "text", text: "Describe this image in detail" },
3995
- ],
3996
- }],
3997
- });
3998
-
3999
- // Also accepts:
4000
- // - Base64 strings: { type: "image", image: "data:image/png;base64,..." }
4001
- // - Uint8Array: { type: "image", image: bytes, mimeType: "image/png" }`
4002
- },
4003
- browser: {
4004
- install: "npm install @tryhamster/gerbil",
4005
- description: "React hooks with image attachment support.",
4006
- code: `import { useChat } from "@tryhamster/gerbil/browser";
4007
-
4008
- function VisionChat() {
4009
- const { messages, input, setInput, handleSubmit, attachImage, attachedImages } = useChat({
4010
- model: "ministral-3b"
4011
- });
4012
-
4013
- const handleFile = (e) => {
4014
- const file = e.target.files?.[0];
4015
- if (file) {
4016
- const reader = new FileReader();
4017
- reader.onload = () => attachImage(reader.result);
4018
- reader.readAsDataURL(file);
4019
- }
4020
- };
4021
-
4022
- return (
4023
- <div>
4024
- {messages.map(m => <div key={m.id}>{m.content}</div>)}
4025
- <input type="file" accept="image/*" onChange={handleFile} />
4026
- {attachedImages.length > 0 && <span>📎 {attachedImages.length} attached</span>}
4027
- <form onSubmit={handleSubmit}>
4028
- <input value={input} onChange={e => setInput(e.target.value)} />
4029
- </form>
4030
- </div>
4031
- );
4032
- }`
4033
- },
4034
- nextjs: {
4035
- install: "npm install @tryhamster/gerbil",
4036
- description: "Next.js vision endpoint with image handling.",
4037
- code: `// app/api/vision/route.ts
4038
- import { gerbil } from "@tryhamster/gerbil/next";
4039
-
4040
- export const POST = gerbil.handler({ model: "ministral-3b" });
4041
-
4042
- // Client fetch:
4043
- // await fetch("/api/vision", {
4044
- // method: "POST",
4045
- // body: JSON.stringify({
4046
- // prompt: "What's in this image?",
4047
- // images: [{ source: dataUri }]
4048
- // })
4049
- // });`
4050
- },
4051
- express: {
4052
- install: "npm install @tryhamster/gerbil express",
4053
- description: "Express vision endpoint with image input.",
4054
- code: `import express from "express";
4055
- import { gerbil } from "@tryhamster/gerbil/express";
4056
-
4057
- const app = express();
4058
- app.use(express.json({ limit: "10mb" })); // For large images
4059
- app.use("/api/vision", gerbil({ model: "ministral-3b" })());
4060
-
4061
- // POST /api/vision/generate
4062
- // Body: { prompt: "Describe this", images: [{ source: "..." }] }
4063
-
4064
- app.listen(3000);`
4065
- },
4066
- langchain: {
4067
- install: "npm install @tryhamster/gerbil langchain",
4068
- description: "LangChain LLM with vision support.",
4069
- code: `import { GerbilLLM } from "@tryhamster/gerbil/langchain";
4070
-
4071
- const llm = new GerbilLLM({ model: "ministral-3b" });
4072
-
4073
- // Check vision support
4074
- const hasVision = await llm.supportsVision();
4075
-
4076
- // Generate with images
4077
- const result = await llm.invokeWithImages(
4078
- "Describe what you see in this image",
4079
- [{ source: "https://example.com/photo.jpg" }]
4080
- );
4081
-
4082
- console.log(result);`
4083
- }
4084
- },
4085
3840
  tts: {
4086
3841
  standalone: {
4087
3842
  install: "npm install @tryhamster/gerbil",
@@ -4132,24 +3887,26 @@ const voices = gerbil.listVoices();
4132
3887
  browser: {
4133
3888
  install: "npm install @tryhamster/gerbil",
4134
3889
  description: "React hook for browser TTS with playback.",
4135
- code: `import { useSpeech } from "@tryhamster/gerbil/browser";
3890
+ code: `import { useTTS, KANI_VOICES, type SpeakOptions } from "@tryhamster/gerbil/hooks";
3891
+ import { useState } from "react";
4136
3892
 
4137
3893
  function SpeechDemo() {
4138
- const { speak, stop, isSpeaking, isLoading, listVoices, setVoice } = useSpeech();
3894
+ const { speak, stop, isPlaying, isLoading } = useTTS();
3895
+ const [voice, setVoice] = useState<SpeakOptions["voice"]>(KANI_VOICES[0].value);
4139
3896
 
4140
3897
  if (isLoading) return <div>Loading TTS model...</div>;
4141
3898
 
4142
3899
  return (
4143
3900
  <div>
4144
- <select onChange={e => setVoice(e.target.value)}>
4145
- {listVoices().map(v => (
4146
- <option key={v.id} value={v.id}>{v.name} ({v.language})</option>
3901
+ <select value={voice} onChange={e => setVoice(e.target.value as SpeakOptions["voice"])}>
3902
+ {KANI_VOICES.map(v => (
3903
+ <option key={v.value} value={v.value}>{v.label}</option>
4147
3904
  ))}
4148
3905
  </select>
4149
- <button onClick={() => speak("Hello world!")}>
4150
- {isSpeaking ? "Speaking..." : "Speak"}
3906
+ <button onClick={() => speak("Hello world!", { voice })}>
3907
+ {isPlaying ? "Speaking..." : "Speak"}
4151
3908
  </button>
4152
- {isSpeaking && <button onClick={stop}>Stop</button>}
3909
+ {isPlaying && <button onClick={stop}>Stop</button>}
4153
3910
  </div>
4154
3911
  );
4155
3912
  }`
@@ -4273,33 +4030,32 @@ const models = gerbil.listTranscriptionModels();`
4273
4030
  browser: {
4274
4031
  install: "npm install @tryhamster/gerbil",
4275
4032
  description: "React hooks for recording and transcription.",
4276
- code: `import { useVoiceInput, useVoiceChat } from "@tryhamster/gerbil/browser";
4033
+ code: `import { useSTT, useVoiceChat } from "@tryhamster/gerbil/hooks";
4277
4034
 
4278
4035
  // Record and transcribe
4279
4036
  function VoiceInput() {
4280
- const { startRecording, stopRecording, isRecording, transcript } = useVoiceInput({
4281
- model: "whisper-tiny.en",
4282
- onTranscript: (text) => console.log("User said:", text),
4283
- });
4037
+ const { startRecording, stopRecording, isRecording, transcript } = useSTT();
4284
4038
 
4285
4039
  return (
4286
- <button onClick={isRecording ? stopRecording : startRecording}>
4287
- {isRecording ? "🔴 Stop" : "🎤 Record"}
4288
- </button>
4040
+ <div>
4041
+ <button onClick={isRecording ? stopRecording : startRecording}>
4042
+ {isRecording ? "🔴 Stop" : "🎤 Record"}
4043
+ </button>
4044
+ {transcript && <p>{transcript}</p>}
4045
+ </div>
4289
4046
  );
4290
4047
  }
4291
4048
 
4292
4049
  // Full voice conversation: STT → LLM → TTS
4293
4050
  function VoiceAssistant() {
4294
- const { startListening, stopListening, stage, messages } = useVoiceChat({
4295
- llmModel: "qwen3-0.6b",
4296
- sttModel: "whisper-tiny.en",
4297
- voice: "af_bella",
4051
+ const { start, stop, isListening, messages } = useVoiceChat({
4052
+ model: "qwen3.5-0.8b",
4053
+ voice: "en_us",
4298
4054
  });
4299
4055
 
4300
4056
  return (
4301
- <button onMouseDown={startListening} onMouseUp={stopListening}>
4302
- {stage === "idle" ? "🎤 Hold to Speak" : stage}
4057
+ <button onMouseDown={start} onMouseUp={stop}>
4058
+ {isListening ? "Listening..." : "🎤 Hold to Speak"}
4303
4059
  </button>
4304
4060
  );
4305
4061
  }`
@@ -4428,25 +4184,21 @@ const result = await g.embed("Your text here");
4428
4184
  browser: {
4429
4185
  install: "npm install @tryhamster/gerbil",
4430
4186
  description: "Browser-based embeddings with WebGPU.",
4431
- code: `import { createGerbilWorker } from "@tryhamster/gerbil/browser";
4187
+ code: `import { useEmbedding } from "@tryhamster/gerbil/hooks";
4432
4188
 
4433
- // Create worker for embeddings
4434
- const worker = await createGerbilWorker({ modelId: "qwen3-0.6b" });
4189
+ function Embeddings() {
4190
+ const { embed, similarity, isLoading } = useEmbedding();
4435
4191
 
4436
- // Note: Browser embeddings use the main model
4437
- // For dedicated embedding model, use Gerbil on server
4192
+ if (isLoading) return <div>Loading embedding model...</div>;
4438
4193
 
4439
- // Server-side approach (recommended):
4440
- // 1. Create API endpoint with Gerbil
4441
- // 2. Call from browser
4194
+ const run = async () => {
4195
+ const { vector } = await embed("Hello world"); // number[]
4196
+ const score = await similarity("cat", "kitten"); // 0..1
4197
+ console.log(vector.length, score);
4198
+ };
4442
4199
 
4443
- // app/api/embed/route.ts
4444
- // const g = new Gerbil();
4445
- // export async function POST(req) {
4446
- // const { text } = await req.json();
4447
- // const result = await g.embed(text);
4448
- // return Response.json(result);
4449
- // }`
4200
+ return <button onClick={run}>Embed</button>;
4201
+ }`
4450
4202
  },
4451
4203
  nextjs: {
4452
4204
  install: "npm install @tryhamster/gerbil",
@@ -4665,106 +4417,33 @@ function FrameworksView() {
4665
4417
  //#region src/cli/repl/views/InfoView.tsx
4666
4418
  function getDeviceLabel(deviceMode) {
4667
4419
  const platform = os.platform();
4668
- const arch = os.arch();
4669
4420
  if (deviceMode === "webgpu") {
4670
- if (platform === "darwin" && arch === "arm64") return "WebGPU (Chrome → Metal)";
4671
- if (platform === "linux") return "WebGPU (Chrome → Vulkan)";
4672
- if (platform === "win32") return "WebGPU (Chrome → D3D12)";
4673
- return "WebGPU (Chrome)";
4421
+ if (platform === "darwin") return "WebGPU (Dawn → Metal)";
4422
+ if (platform === "linux") return "WebGPU (Dawn → Vulkan)";
4423
+ if (platform === "win32") return "WebGPU (Dawn → D3D12)";
4424
+ return "WebGPU (Dawn)";
4674
4425
  }
4675
- if (platform === "darwin" && arch === "arm64") return `Apple Silicon (${deviceMode.toUpperCase()})`;
4676
- if (platform === "darwin" && arch === "x64") return `Intel Mac (${deviceMode.toUpperCase()})`;
4677
4426
  return deviceMode.toUpperCase();
4678
4427
  }
4679
4428
  function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
4680
4429
  const deviceMode = gerbil.getDeviceMode();
4681
4430
  const dtype = gerbil.getDtype();
4682
- const chromeStatus = gerbil.getChromeStatus();
4683
4431
  const deviceLabel = getDeviceLabel(deviceMode);
4684
4432
  const cacheInfo = getCacheInfo();
4685
4433
  const memInfo = getMemoryInfo();
4686
- const [webgpuInfo, setWebgpuInfo] = useState(null);
4687
- const [allPages, setAllPages] = useState([]);
4688
- const [totalPageCount, setTotalPageCount] = useState(0);
4689
- const [selectedIndex, setSelectedIndex] = useState(-1);
4690
- const [killing, setKilling] = useState(false);
4691
- const [killResult, setKillResult] = useState(null);
4692
4434
  const [ttsInfo, setTtsInfo] = useState(gerbil.getTTSModelInfo());
4693
4435
  const [sttInfo, setSttInfo] = useState(gerbil.getSTTModelInfo());
4694
- const fetchInfo = useCallback(async () => {
4695
- const [info, pages, totalPages] = await Promise.all([
4696
- Gerbil.getWebGPUProcesses(),
4697
- Gerbil.getAllChromePagesInfo(),
4698
- Gerbil.getTotalChromePageCount()
4699
- ]);
4700
- setWebgpuInfo(info);
4701
- setAllPages(pages || []);
4702
- setTotalPageCount(totalPages);
4703
- setTtsInfo(gerbil.getTTSModelInfo());
4704
- setSttInfo(gerbil.getSTTModelInfo());
4705
- const pageCount = pages?.length ?? 0;
4706
- if (selectedIndex >= pageCount) setSelectedIndex(pageCount > 0 ? 0 : -1);
4707
- }, [selectedIndex, gerbil]);
4708
4436
  useEffect(() => {
4709
- fetchInfo();
4710
- const interval = setInterval(fetchInfo, 2e3);
4437
+ const refresh = () => {
4438
+ setTtsInfo(gerbil.getTTSModelInfo());
4439
+ setSttInfo(gerbil.getSTTModelInfo());
4440
+ };
4441
+ refresh();
4442
+ const interval = setInterval(refresh, 2e3);
4711
4443
  return () => clearInterval(interval);
4712
- }, [fetchInfo]);
4713
- useInput(async (input, key) => {
4714
- if (input === "c" || input === "C") {
4715
- onGoToCache?.();
4716
- return;
4717
- }
4718
- if (input === "r" || input === "R") {
4719
- await fetchInfo();
4720
- return;
4721
- }
4722
- const pageCount = allPages.length;
4723
- if (pageCount > 0) {
4724
- if (key.downArrow || input === "j") {
4725
- setSelectedIndex((prev) => Math.min(prev + 1, pageCount - 1));
4726
- return;
4727
- }
4728
- if (key.upArrow || input === "k") {
4729
- setSelectedIndex((prev) => Math.max(prev - 1, 0));
4730
- return;
4731
- }
4732
- const num = Number.parseInt(input, 10);
4733
- if (num >= 1 && num <= 9 && num <= pageCount) {
4734
- setSelectedIndex(num - 1);
4735
- return;
4736
- }
4737
- }
4738
- if (input === "x" && !killing && selectedIndex >= 0 && allPages[selectedIndex]) {
4739
- setKilling(true);
4740
- setKillResult(null);
4741
- try {
4742
- const page = allPages[selectedIndex];
4743
- if (await Gerbil.killChromePage(selectedIndex)) setKillResult(`Killed: ${page.modelId?.split("/").pop() || "page"}${page.isOurs ? "" : " (other session)"}`);
4744
- else setKillResult("Failed to kill page");
4745
- await fetchInfo();
4746
- } catch (err) {
4747
- setKillResult(`Error: ${err.message}`);
4748
- }
4749
- setKilling(false);
4750
- setTimeout(() => setKillResult(null), 3e3);
4751
- return;
4752
- }
4753
- if (input === "K" && !killing && webgpuInfo?.browser.running) {
4754
- setKilling(true);
4755
- setKillResult(null);
4756
- try {
4757
- const result = await Gerbil.killAllWebGPU();
4758
- if (result) setKillResult(`Killed ${result.pagesKilled} page(s)${result.browserKilled ? ", browser closed" : ""}`);
4759
- else setKillResult("No WebGPU processes to kill");
4760
- await fetchInfo();
4761
- setSelectedIndex(-1);
4762
- } catch (err) {
4763
- setKillResult(`Error: ${err.message}`);
4764
- }
4765
- setKilling(false);
4766
- setTimeout(() => setKillResult(null), 3e3);
4767
- }
4444
+ }, [gerbil]);
4445
+ useInput((input) => {
4446
+ if (input === "c" || input === "C") onGoToCache?.();
4768
4447
  });
4769
4448
  return /* @__PURE__ */ jsxs(Box, {
4770
4449
  flexDirection: "column",
@@ -4803,10 +4482,18 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
4803
4482
  color: deviceMode === "webgpu" ? "green" : "yellow",
4804
4483
  children: deviceLabel
4805
4484
  })] }),
4806
- /* @__PURE__ */ jsxs(Text, { children: ["Dtype: ", /* @__PURE__ */ jsx(Text, {
4807
- color: dtype === "q4f16" ? "green" : "gray",
4808
- children: dtype
4809
- })] })
4485
+ /* @__PURE__ */ jsxs(Text, { children: [
4486
+ "Weights: ",
4487
+ /* @__PURE__ */ jsx(Text, {
4488
+ color: "green",
4489
+ children: dtype
4490
+ }),
4491
+ " ",
4492
+ /* @__PURE__ */ jsx(Text, {
4493
+ dimColor: true,
4494
+ children: "(INT4, quantized on load)"
4495
+ })
4496
+ ] })
4810
4497
  ]
4811
4498
  }),
4812
4499
  /* @__PURE__ */ jsxs(Box, {
@@ -4900,269 +4587,88 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
4900
4587
  /* @__PURE__ */ jsxs(Box, {
4901
4588
  flexDirection: "row",
4902
4589
  marginBottom: 1,
4903
- children: [
4904
- /* @__PURE__ */ jsxs(Box, {
4905
- borderColor: "gray",
4906
- borderStyle: "single",
4907
- flexDirection: "column",
4908
- marginRight: 1,
4909
- minWidth: 30,
4910
- paddingX: 1,
4911
- children: [
4912
- /* @__PURE__ */ jsx(Text, {
4913
- bold: true,
4914
- color: "cyan",
4915
- children: "Memory"
4916
- }),
4917
- /* @__PURE__ */ jsxs(Text, { children: ["Total: ", /* @__PURE__ */ jsx(Text, {
4918
- color: "gray",
4919
- children: memInfo.total
4920
- })] }),
4921
- /* @__PURE__ */ jsxs(Text, { children: [
4922
- "Used:",
4923
- " ",
4924
- /* @__PURE__ */ jsxs(Text, {
4925
- color: memInfo.percentUsed > 80 ? "red" : "yellow",
4926
- children: [
4927
- memInfo.used,
4928
- " (",
4929
- memInfo.percentUsed,
4930
- "%)"
4931
- ]
4932
- })
4933
- ] }),
4934
- /* @__PURE__ */ jsxs(Text, { children: ["Free: ", /* @__PURE__ */ jsx(Text, {
4590
+ children: [/* @__PURE__ */ jsxs(Box, {
4591
+ borderColor: "gray",
4592
+ borderStyle: "single",
4593
+ flexDirection: "column",
4594
+ marginRight: 1,
4595
+ minWidth: 30,
4596
+ paddingX: 1,
4597
+ children: [
4598
+ /* @__PURE__ */ jsx(Text, {
4599
+ bold: true,
4600
+ color: "cyan",
4601
+ children: "Memory"
4602
+ }),
4603
+ /* @__PURE__ */ jsxs(Text, { children: ["Total: ", /* @__PURE__ */ jsx(Text, {
4604
+ color: "gray",
4605
+ children: memInfo.total
4606
+ })] }),
4607
+ /* @__PURE__ */ jsxs(Text, { children: [
4608
+ "Used:",
4609
+ " ",
4610
+ /* @__PURE__ */ jsxs(Text, {
4611
+ color: memInfo.percentUsed > 80 ? "red" : "yellow",
4612
+ children: [
4613
+ memInfo.used,
4614
+ " (",
4615
+ memInfo.percentUsed,
4616
+ "%)"
4617
+ ]
4618
+ })
4619
+ ] }),
4620
+ /* @__PURE__ */ jsxs(Text, { children: ["Free: ", /* @__PURE__ */ jsx(Text, {
4621
+ color: "green",
4622
+ children: memInfo.free
4623
+ })] })
4624
+ ]
4625
+ }), /* @__PURE__ */ jsxs(Box, {
4626
+ borderColor: "gray",
4627
+ borderStyle: "single",
4628
+ flexDirection: "column",
4629
+ marginRight: 1,
4630
+ minWidth: 30,
4631
+ paddingX: 1,
4632
+ children: [
4633
+ /* @__PURE__ */ jsx(Text, {
4634
+ bold: true,
4635
+ color: "cyan",
4636
+ children: "Session Stats"
4637
+ }),
4638
+ /* @__PURE__ */ jsxs(Text, { children: ["Prompts: ", /* @__PURE__ */ jsx(Text, {
4639
+ color: "gray",
4640
+ children: stats.prompts
4641
+ })] }),
4642
+ /* @__PURE__ */ jsxs(Text, { children: ["Tokens In: ", /* @__PURE__ */ jsx(Text, {
4643
+ color: "gray",
4644
+ children: stats.tokensIn
4645
+ })] }),
4646
+ /* @__PURE__ */ jsxs(Text, { children: ["Tokens Out: ", /* @__PURE__ */ jsx(Text, {
4647
+ color: "gray",
4648
+ children: stats.tokensOut
4649
+ })] }),
4650
+ /* @__PURE__ */ jsxs(Text, { children: ["Total Time: ", /* @__PURE__ */ jsxs(Text, {
4651
+ color: "gray",
4652
+ children: [Math.round(stats.totalTimeMs / 1e3), "s"]
4653
+ })] }),
4654
+ stats.lastTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Last tok/s: ", /* @__PURE__ */ jsx(Text, {
4655
+ color: "yellow",
4656
+ children: stats.lastTokPerSec.toFixed(1)
4657
+ })] }),
4658
+ stats.avgTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Avg tok/s: ", /* @__PURE__ */ jsx(Text, {
4659
+ color: "cyan",
4660
+ children: stats.avgTokPerSec.toFixed(1)
4661
+ })] }),
4662
+ stats.benchResults.length > 0 && /* @__PURE__ */ jsxs(Text, { children: [
4663
+ "Best Bench:",
4664
+ " ",
4665
+ /* @__PURE__ */ jsxs(Text, {
4935
4666
  color: "green",
4936
- children: memInfo.free
4937
- })] })
4938
- ]
4939
- }),
4940
- /* @__PURE__ */ jsxs(Box, {
4941
- borderColor: "gray",
4942
- borderStyle: "single",
4943
- flexDirection: "column",
4944
- marginRight: 1,
4945
- minWidth: 30,
4946
- paddingX: 1,
4947
- children: [
4948
- /* @__PURE__ */ jsx(Text, {
4949
- bold: true,
4950
- color: "cyan",
4951
- children: "Session Stats"
4952
- }),
4953
- /* @__PURE__ */ jsxs(Text, { children: ["Prompts: ", /* @__PURE__ */ jsx(Text, {
4954
- color: "gray",
4955
- children: stats.prompts
4956
- })] }),
4957
- /* @__PURE__ */ jsxs(Text, { children: ["Tokens In: ", /* @__PURE__ */ jsx(Text, {
4958
- color: "gray",
4959
- children: stats.tokensIn
4960
- })] }),
4961
- /* @__PURE__ */ jsxs(Text, { children: ["Tokens Out: ", /* @__PURE__ */ jsx(Text, {
4962
- color: "gray",
4963
- children: stats.tokensOut
4964
- })] }),
4965
- /* @__PURE__ */ jsxs(Text, { children: ["Total Time: ", /* @__PURE__ */ jsxs(Text, {
4966
- color: "gray",
4967
- children: [Math.round(stats.totalTimeMs / 1e3), "s"]
4968
- })] }),
4969
- stats.lastTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Last tok/s: ", /* @__PURE__ */ jsx(Text, {
4970
- color: "yellow",
4971
- children: stats.lastTokPerSec.toFixed(1)
4972
- })] }),
4973
- stats.avgTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Avg tok/s: ", /* @__PURE__ */ jsx(Text, {
4974
- color: "cyan",
4975
- children: stats.avgTokPerSec.toFixed(1)
4976
- })] }),
4977
- stats.benchResults.length > 0 && /* @__PURE__ */ jsxs(Text, { children: [
4978
- "Best Bench:",
4979
- " ",
4980
- /* @__PURE__ */ jsxs(Text, {
4981
- color: "green",
4982
- children: [Math.max(...stats.benchResults.map((b) => b.tokPerSec)), " tok/s"]
4983
- })
4984
- ] })
4985
- ]
4986
- }),
4987
- chromeStatus && /* @__PURE__ */ jsxs(Box, {
4988
- borderColor: "green",
4989
- borderStyle: "single",
4990
- flexDirection: "column",
4991
- minWidth: 38,
4992
- paddingX: 1,
4993
- children: [
4994
- /* @__PURE__ */ jsx(Text, {
4995
- bold: true,
4996
- color: "green",
4997
- children: "WebGPU Backend"
4998
- }),
4999
- /* @__PURE__ */ jsxs(Text, { children: ["Status: ", /* @__PURE__ */ jsx(Text, {
5000
- color: "green",
5001
- children: "● Running"
5002
- })] }),
5003
- /* @__PURE__ */ jsxs(Text, { children: [
5004
- "PID:",
5005
- " ",
5006
- /* @__PURE__ */ jsx(Text, {
5007
- color: chromeStatus.pid ? "gray" : "yellow",
5008
- children: chromeStatus.pid ?? "connected (no PID)"
5009
- })
5010
- ] }),
5011
- /* @__PURE__ */ jsxs(Text, { children: ["Port: ", /* @__PURE__ */ jsx(Text, {
5012
- color: "gray",
5013
- children: chromeStatus.port
5014
- })] }),
5015
- /* @__PURE__ */ jsxs(Text, { children: [
5016
- "Model:",
5017
- " ",
5018
- /* @__PURE__ */ jsx(Text, {
5019
- color: "gray",
5020
- children: chromeStatus.modelId.length > 28 ? `...${chromeStatus.modelId.slice(-25)}` : chromeStatus.modelId
5021
- })
5022
- ] })
5023
- ]
5024
- })
5025
- ]
5026
- }),
5027
- webgpuInfo && /* @__PURE__ */ jsxs(Box, {
5028
- borderColor: webgpuInfo.browser.running ? "green" : "gray",
5029
- borderStyle: "single",
5030
- flexDirection: "column",
5031
- marginBottom: 1,
5032
- paddingX: 1,
5033
- children: [
5034
- /* @__PURE__ */ jsxs(Text, {
5035
- bold: true,
5036
- color: webgpuInfo.browser.running ? "green" : "gray",
5037
- children: ["WebGPU Processes ", /* @__PURE__ */ jsx(Text, {
5038
- dimColor: true,
5039
- children: "(all sessions)"
5040
- })]
5041
- }),
5042
- /* @__PURE__ */ jsxs(Text, { children: [
5043
- "Chrome:",
5044
- " ",
5045
- /* @__PURE__ */ jsx(Text, {
5046
- color: webgpuInfo.browser.running ? "green" : "gray",
5047
- children: webgpuInfo.browser.running ? "● Running" : "○ Not running"
5048
- })
5049
- ] }),
5050
- webgpuInfo.browser.running && /* @__PURE__ */ jsxs(Fragment, { children: [
5051
- /* @__PURE__ */ jsxs(Text, { children: [
5052
- " ",
5053
- "PID: ",
5054
- /* @__PURE__ */ jsx(Text, {
5055
- color: "gray",
5056
- children: webgpuInfo.browser.pid ?? "unknown"
5057
- })
5058
- ] }),
5059
- /* @__PURE__ */ jsxs(Text, { children: [
5060
- " ",
5061
- "Port: ",
5062
- /* @__PURE__ */ jsx(Text, {
5063
- color: "gray",
5064
- children: webgpuInfo.browser.port
5065
- })
5066
- ] }),
5067
- /* @__PURE__ */ jsxs(Text, { children: [
5068
- " ",
5069
- "Total Pages:",
5070
- " ",
5071
- /* @__PURE__ */ jsx(Text, {
5072
- color: totalPageCount > 0 ? "yellow" : "gray",
5073
- children: totalPageCount
5074
- }),
5075
- " ",
5076
- /* @__PURE__ */ jsxs(Text, {
5077
- dimColor: true,
5078
- children: [
5079
- "(",
5080
- webgpuInfo.browser.activePagesCount,
5081
- " ours)"
5082
- ]
4667
+ children: [Math.max(...stats.benchResults.map((b) => b.tokPerSec)), " tok/s"]
5083
4668
  })
5084
4669
  ] })
5085
- ] }),
5086
- allPages.length > 0 && /* @__PURE__ */ jsxs(Box, {
5087
- flexDirection: "column",
5088
- marginTop: 1,
5089
- children: [/* @__PURE__ */ jsxs(Text, {
5090
- dimColor: true,
5091
- children: [
5092
- "Active Pages ",
5093
- /* @__PURE__ */ jsx(Text, {
5094
- color: "gray",
5095
- children: "(↑↓ select, x kill, K kill all)"
5096
- }),
5097
- ":"
5098
- ]
5099
- }), allPages.map((page, i) => {
5100
- const isSelected = i === selectedIndex;
5101
- const modelName = page.modelId || "loading...";
5102
- const shortName = modelName.length > 30 ? `...${modelName.slice(-27)}` : modelName;
5103
- return /* @__PURE__ */ jsxs(Box, {
5104
- flexDirection: "column",
5105
- marginLeft: 1,
5106
- children: [/* @__PURE__ */ jsxs(Text, { children: [
5107
- /* @__PURE__ */ jsxs(Text, {
5108
- color: "gray",
5109
- children: [i + 1, "."]
5110
- }),
5111
- " ",
5112
- /* @__PURE__ */ jsx(Text, {
5113
- color: isSelected ? "cyan" : page.isOurs ? "green" : "yellow",
5114
- children: isSelected ? "▶" : page.isOurs ? "●" : "○"
5115
- }),
5116
- " ",
5117
- /* @__PURE__ */ jsx(Text, {
5118
- bold: isSelected,
5119
- inverse: isSelected,
5120
- children: shortName
5121
- }),
5122
- page.isOurs ? /* @__PURE__ */ jsxs(Text, {
5123
- color: "green",
5124
- dimColor: true,
5125
- children: [" ", "(this session)"]
5126
- }) : /* @__PURE__ */ jsxs(Text, {
5127
- color: "yellow",
5128
- dimColor: true,
5129
- children: [" ", "(other session)"]
5130
- })
5131
- ] }), page.memory && /* @__PURE__ */ jsxs(Text, {
5132
- dimColor: true,
5133
- children: [
5134
- " ",
5135
- "Heap: ",
5136
- page.memory.usedGB.toFixed(2),
5137
- "GB used of",
5138
- " ",
5139
- page.memory.totalGB.toFixed(2),
5140
- "GB"
5141
- ]
5142
- })]
5143
- }, i);
5144
- })]
5145
- }),
5146
- totalPageCount === 0 && webgpuInfo.browser.running && /* @__PURE__ */ jsx(Text, {
5147
- color: "yellow",
5148
- dimColor: true,
5149
- children: "⚠ No active pages (zombie browser?) - press K to kill"
5150
- }),
5151
- killing && /* @__PURE__ */ jsx(Box, {
5152
- marginTop: 1,
5153
- children: /* @__PURE__ */ jsx(Text, {
5154
- color: "cyan",
5155
- children: "Killing..."
5156
- })
5157
- }),
5158
- killResult && /* @__PURE__ */ jsx(Box, {
5159
- marginTop: 1,
5160
- children: /* @__PURE__ */ jsx(Text, {
5161
- color: "cyan",
5162
- children: killResult
5163
- })
5164
- })
5165
- ]
4670
+ ]
4671
+ })]
5166
4672
  }),
5167
4673
  /* @__PURE__ */ jsxs(Box, {
5168
4674
  borderColor: "gray",
@@ -5236,36 +4742,6 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
5236
4742
  children: "c"
5237
4743
  }),
5238
4744
  " cache | ",
5239
- /* @__PURE__ */ jsx(Text, {
5240
- color: "cyan",
5241
- children: "r"
5242
- }),
5243
- " refresh",
5244
- allPages.length > 0 && /* @__PURE__ */ jsxs(Fragment, { children: [
5245
- " ",
5246
- "| ",
5247
- /* @__PURE__ */ jsx(Text, {
5248
- color: "red",
5249
- children: "x"
5250
- }),
5251
- " kill selected | ",
5252
- /* @__PURE__ */ jsx(Text, {
5253
- color: "red",
5254
- children: "K"
5255
- }),
5256
- " kill all"
5257
- ] }),
5258
- webgpuInfo?.browser.running && allPages.length === 0 && /* @__PURE__ */ jsxs(Fragment, { children: [
5259
- " ",
5260
- "| ",
5261
- /* @__PURE__ */ jsx(Text, {
5262
- color: "red",
5263
- children: "K"
5264
- }),
5265
- " kill browser"
5266
- ] }),
5267
- " ",
5268
- "| ",
5269
4745
  /* @__PURE__ */ jsx(Text, {
5270
4746
  color: "gray",
5271
4747
  children: "Esc"
@@ -5354,6 +4830,29 @@ function formatContext(contextLength) {
5354
4830
  if (contextLength >= 1e3) return `${(contextLength / 1e3).toFixed(0)}K`;
5355
4831
  return `${contextLength}`;
5356
4832
  }
4833
+ /**
4834
+ * Read a model's config.json and decide whether the native WebGPU engine
4835
+ * currently supports its architecture. Uses the same `architectures` key the
4836
+ * loader reads, falling back to `model_type`. A model that is NOT supported can
4837
+ * still be selected and downloaded (discovery) — it just fails loudly at load
4838
+ * with a clear "Unsupported model architecture" message, and Gerbil may add
4839
+ * support for it later.
4840
+ */
4841
+ async function detectNativeSupport(modelId) {
4842
+ try {
4843
+ const res = await fetch(`https://huggingface.co/${modelId}/raw/main/config.json`);
4844
+ if (!res.ok) return { supported: false };
4845
+ const config = await res.json();
4846
+ const architecture = (Array.isArray(config.architectures) ? config.architectures[0] : void 0) || config.model_type;
4847
+ if (!architecture) return { supported: false };
4848
+ return {
4849
+ architecture,
4850
+ supported: isArchitectureSupported(architecture)
4851
+ };
4852
+ } catch {
4853
+ return { supported: false };
4854
+ }
4855
+ }
5357
4856
  const TTS_MODELS = [{
5358
4857
  id: "kokoro-82m",
5359
4858
  name: "Kokoro 82M",
@@ -5424,7 +4923,6 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5424
4923
  const abortControllerRef = useRef(null);
5425
4924
  const hasFetchedOnMountRef = useRef(false);
5426
4925
  const refreshCachedModels = async () => {
5427
- await refreshCachedModelSizes().catch(() => {});
5428
4926
  const cached = /* @__PURE__ */ new Set();
5429
4927
  for (const model of PRESET_MODELS) if (isModelCached(model.hfId)) cached.add(model.id);
5430
4928
  setCachedModels(cached);
@@ -5487,10 +4985,14 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5487
4985
  params = p.BF16 || p.F16 || p.F32 || info.safetensors.total || 0;
5488
4986
  }
5489
4987
  const metadata = await fetchModelMetadata(modelId);
4988
+ const sizeBytes = metadata.sizeBytes || info.usedStorage || 0;
4989
+ const { architecture, supported } = await detectNativeSupport(modelId);
5490
4990
  return {
5491
- sizeBytes: metadata.sizeBytes || info.usedStorage || 0,
4991
+ sizeBytes,
5492
4992
  params,
5493
- contextLength: metadata.contextLength
4993
+ contextLength: metadata.contextLength,
4994
+ architecture,
4995
+ supported
5494
4996
  };
5495
4997
  } catch {
5496
4998
  return null;
@@ -5508,6 +5010,8 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5508
5010
  sizeBytes: result.sizeBytes,
5509
5011
  params: result.params,
5510
5012
  contextLength: result.contextLength,
5013
+ architecture: result.architecture,
5014
+ supported: result.supported,
5511
5015
  loading: false
5512
5016
  };
5513
5017
  else if (updatedModels[modelIdx]) updatedModels[modelIdx] = {
@@ -5517,6 +5021,12 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5517
5021
  });
5518
5022
  setHfModels([...updatedModels]);
5519
5023
  }
5024
+ const detected = updatedModels.filter((m) => m.supported !== void 0);
5025
+ if (detected.length > 0 && process.env.GERBIL_DEBUG) {
5026
+ const unsupported = detected.filter((m) => m.supported === false).length;
5027
+ const pct = Math.round(unsupported / detected.length * 100);
5028
+ console.error(`[discovery] ${unsupported}/${detected.length} (${pct}%) sampled HF models are outside the native architecture whitelist`);
5029
+ }
5520
5030
  };
5521
5031
  const fetchHFModels = async (query, append = false, sort = sortMode) => {
5522
5032
  if (hfLoading) return;
@@ -5527,16 +5037,16 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5527
5037
  try {
5528
5038
  const offset = append ? hfModels.length : 0;
5529
5039
  const params = new URLSearchParams({
5530
- filter: "onnx",
5040
+ filter: "text-generation",
5041
+ library: "safetensors",
5531
5042
  sort: "downloads",
5532
5043
  direction: "-1",
5533
5044
  limit: String(PAGE_SIZE * 2)
5534
5045
  });
5535
5046
  if (query) {
5536
5047
  const searchTerms = query.split(",").map((t) => t.trim()).filter(Boolean);
5537
- if (!searchTerms.some((t) => t.toLowerCase().includes("onnx"))) searchTerms.push("onnx");
5538
- params.set("search", searchTerms.join(" "));
5539
- } else params.set("search", "onnx");
5048
+ if (searchTerms.length > 0) params.set("search", searchTerms.join(" "));
5049
+ }
5540
5050
  if (offset > 0) params.set("skip", String(offset));
5541
5051
  const res = await fetch(`https://huggingface.co/api/models?${params}`, { signal: abortControllerRef.current.signal });
5542
5052
  if (!res.ok) throw new Error("API error");
@@ -5546,13 +5056,6 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
5546
5056
  const aDate = a.createdAt ? new Date(a.createdAt).getTime() : 0;
5547
5057
  return (b.createdAt ? new Date(b.createdAt).getTime() : 0) - aDate;
5548
5058
  });
5549
- sortedData.sort((a, b) => {
5550
- const aHasQuant = a.id.includes("q4f16") || a.id.includes("q4") || a.id.startsWith("onnx-community/");
5551
- const bHasQuant = b.id.includes("q4f16") || b.id.includes("q4") || b.id.startsWith("onnx-community/");
5552
- if (aHasQuant && !bHasQuant) return -1;
5553
- if (!aHasQuant && bHasQuant) return 1;
5554
- return 0;
5555
- });
5556
5059
  const models = sortedData.map((m) => ({
5557
5060
  id: m.id,
5558
5061
  downloads: m.downloads || 0,
@@ -6164,7 +5667,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6164
5667
  children: [/* @__PURE__ */ jsx(Spinner, { type: "dots" }), " "]
6165
5668
  }), /* @__PURE__ */ jsx(Text, {
6166
5669
  dimColor: true,
6167
- children: "Fetching ONNX models..."
5670
+ children: "Fetching models..."
6168
5671
  })] }),
6169
5672
  hfError && /* @__PURE__ */ jsx(Text, {
6170
5673
  color: "red",
@@ -6182,17 +5685,22 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6182
5685
  /* @__PURE__ */ jsx(Text, {
6183
5686
  bold: true,
6184
5687
  dimColor: true,
6185
- children: "Model".padEnd(44)
5688
+ children: "Model".padEnd(40)
6186
5689
  }),
6187
5690
  /* @__PURE__ */ jsx(Text, {
6188
5691
  bold: true,
6189
5692
  dimColor: true,
6190
- children: "Params".padStart(8)
5693
+ children: "Engine".padEnd(13)
6191
5694
  }),
6192
5695
  /* @__PURE__ */ jsx(Text, {
6193
5696
  bold: true,
6194
5697
  dimColor: true,
6195
- children: "Size".padStart(10)
5698
+ children: "Params".padStart(7)
5699
+ }),
5700
+ /* @__PURE__ */ jsx(Text, {
5701
+ bold: true,
5702
+ dimColor: true,
5703
+ children: "Size".padStart(9)
6196
5704
  }),
6197
5705
  /* @__PURE__ */ jsx(Text, {
6198
5706
  bold: true,
@@ -6202,7 +5710,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6202
5710
  /* @__PURE__ */ jsx(Text, {
6203
5711
  bold: true,
6204
5712
  dimColor: true,
6205
- children: "Updated".padStart(10)
5713
+ children: "Updated".padStart(9)
6206
5714
  }),
6207
5715
  /* @__PURE__ */ jsx(Text, {
6208
5716
  bold: true,
@@ -6212,6 +5720,19 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6212
5720
  ] }),
6213
5721
  filteredModels.map((model, i) => {
6214
5722
  const isCached = isModelCached(model.id);
5723
+ let badge;
5724
+ if (model.loading || model.supported === void 0) badge = /* @__PURE__ */ jsx(Text, {
5725
+ dimColor: true,
5726
+ children: "…".padEnd(13)
5727
+ });
5728
+ else if (model.supported) badge = /* @__PURE__ */ jsx(Text, {
5729
+ color: "green",
5730
+ children: "native ✓".padEnd(13)
5731
+ });
5732
+ else badge = /* @__PURE__ */ jsx(Text, {
5733
+ color: "yellow",
5734
+ children: "unsupported".padEnd(13)
5735
+ });
6215
5736
  return /* @__PURE__ */ jsxs(Box, { children: [
6216
5737
  /* @__PURE__ */ jsxs(Text, {
6217
5738
  color: isCached ? "green" : "gray",
@@ -6219,15 +5740,16 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6219
5740
  }),
6220
5741
  /* @__PURE__ */ jsxs(Text, {
6221
5742
  color: i === hfSelected ? "cyan" : "white",
6222
- children: [i === hfSelected ? "> " : " ", model.id.length > 42 ? `${model.id.slice(0, 39)}...` : model.id.padEnd(42)]
5743
+ children: [i === hfSelected ? "> " : " ", model.id.length > 38 ? `${model.id.slice(0, 35)}...` : model.id.padEnd(38)]
6223
5744
  }),
5745
+ badge,
6224
5746
  /* @__PURE__ */ jsx(Text, {
6225
5747
  color: "yellow",
6226
- children: formatParams(model.params, model.id).padStart(8)
5748
+ children: formatParams(model.params, model.id).padStart(7)
6227
5749
  }),
6228
5750
  /* @__PURE__ */ jsx(Text, {
6229
5751
  color: "magenta",
6230
- children: (model.sizeBytes ? formatBytes(model.sizeBytes) : "-").padStart(10)
5752
+ children: (model.sizeBytes ? formatBytes(model.sizeBytes) : "-").padStart(9)
6231
5753
  }),
6232
5754
  /* @__PURE__ */ jsx(Text, {
6233
5755
  color: "cyan",
@@ -6235,7 +5757,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6235
5757
  }),
6236
5758
  /* @__PURE__ */ jsx(Text, {
6237
5759
  dimColor: true,
6238
- children: formatDate(model.createdAt).padStart(10)
5760
+ children: formatDate(model.createdAt).padStart(9)
6239
5761
  }),
6240
5762
  /* @__PURE__ */ jsx(Text, {
6241
5763
  color: "gray",
@@ -6278,7 +5800,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
6278
5800
  }),
6279
5801
  !hfLoading && hfModels.length === 0 && !hfError && /* @__PURE__ */ jsx(Text, {
6280
5802
  dimColor: true,
6281
- children: "Loading ONNX models..."
5803
+ children: "Loading models..."
6282
5804
  })
6283
5805
  ] }),
6284
5806
  tab === "voice" && /* @__PURE__ */ jsxs(Box, {
@@ -6950,7 +6472,7 @@ function SkillsView({ gerbil, onCreateNew, onSwitchModel }) {
6950
6472
  const [skillsLoaded, setSkillsLoaded] = useState(false);
6951
6473
  const [cachedModels, setCachedModels] = useState([]);
6952
6474
  const [modelSelectIndex, setModelSelectIndex] = useState(0);
6953
- const [currentModel, setCurrentModel] = useState(gerbil.getModelInfo()?.id || "qwen3-0.6b");
6475
+ const [currentModel, setCurrentModel] = useState(gerbil.getModelInfo()?.id || DEFAULT_MODEL);
6954
6476
  const [attachedImage, setAttachedImage] = useState(null);
6955
6477
  const [attachedImageName, setAttachedImageName] = useState(null);
6956
6478
  const [imageError, setImageError] = useState(null);
@@ -8040,20 +7562,20 @@ function getViewPrompt(view, ctx = {}) {
8040
7562
  }
8041
7563
  }
8042
7564
  getViewPrompt("chat"), getViewPrompt("skills"), getViewPrompt("create-skill"), getViewPrompt("code");
8043
- function App({ initialView = "menu", forcedDevice } = {}) {
7565
+ function App({ initialView = "menu" } = {}) {
8044
7566
  const { exit } = useApp();
8045
7567
  const gerbilRef = useRef(null);
8046
7568
  const [state, setState] = useState({
8047
7569
  view: "menu",
8048
7570
  gerbil: null,
8049
- model: "qwen3-0.6b",
7571
+ model: DEFAULT_MODEL,
8050
7572
  loading: true,
8051
7573
  loadingMessage: "Loading model...",
8052
7574
  thinkingMode: false,
8053
7575
  agentMode: true,
8054
7576
  voiceMode: false,
8055
7577
  downloadStatus: "",
8056
- deviceMode: forcedDevice ?? (process.env.GERBIL_CPU === "1" ? "cpu" : "webgpu"),
7578
+ deviceMode: "webgpu",
8057
7579
  ttsModel: "kokoro-82m",
8058
7580
  sttModel: "whisper-tiny.en"
8059
7581
  });
@@ -8137,9 +7659,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8137
7659
  const loadModel = async () => {
8138
7660
  const g = new Gerbil();
8139
7661
  try {
8140
- const device = state.deviceMode;
8141
7662
  await g.loadModel(state.model, {
8142
- device,
7663
+ device: "webgpu",
8143
7664
  onProgress: (p) => {
8144
7665
  if (!mounted) return;
8145
7666
  if (p.status?.includes("Unable to determine content-length")) return;
@@ -8166,8 +7687,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8166
7687
  view: pendingView || "menu"
8167
7688
  }));
8168
7689
  if (pendingView) setPendingView(null);
8169
- if (g.getDeviceMode() === "webgpu") setTimeout(() => generateWelcome(g).catch(() => {}), 1500);
8170
- else generateWelcome(g).catch(() => {});
7690
+ setTimeout(() => generateWelcome(g).catch(() => {}), 1500);
8171
7691
  } catch (error) {
8172
7692
  if (!mounted) return;
8173
7693
  setState((s) => ({
@@ -8181,7 +7701,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8181
7701
  return () => {
8182
7702
  mounted = false;
8183
7703
  if (gerbilRef.current) {
8184
- import("./repl-DveXw36T.mjs").then(({ setCleanupPromise: setCleanupPromise$1 }) => {
7704
+ import("./repl-jV5gcJFA.mjs").then(({ setCleanupPromise: setCleanupPromise$1 }) => {
8185
7705
  setCleanupPromise$1(gerbilRef.current?.dispose(true) ?? Promise.resolve());
8186
7706
  });
8187
7707
  gerbilRef.current = null;
@@ -8271,49 +7791,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8271
7791
  return () => clearTimeout(timer);
8272
7792
  }
8273
7793
  }, [ctrlCPressed]);
8274
- const handleDeviceSwitch = async (newDevice) => {
8275
- if (newDevice === state.deviceMode) return;
8276
- setState((s) => ({
8277
- ...s,
8278
- deviceMode: newDevice,
8279
- loading: true,
8280
- loadingMessage: `Switching to ${newDevice.toUpperCase()}...`
8281
- }));
8282
- if (gerbilRef.current) {
8283
- await gerbilRef.current.dispose();
8284
- gerbilRef.current = null;
8285
- }
8286
- const g = new Gerbil();
8287
- try {
8288
- await g.loadModel(state.model, {
8289
- device: newDevice,
8290
- onProgress: (p) => {
8291
- if (p.status?.includes("Unable to determine content-length")) return;
8292
- setState((s) => ({
8293
- ...s,
8294
- loadingMessage: p.file ? `${p.file} ${p.progress || 0}%` : p.status
8295
- }));
8296
- }
8297
- });
8298
- const supportsThinking = g.getModelInfo()?.supportsThinking ?? false;
8299
- setToolContext({ generate: async (prompt) => {
8300
- return (await g.generate(prompt, { maxTokens: 200 })).text;
8301
- } });
8302
- gerbilRef.current = g;
8303
- setState((s) => ({
8304
- ...s,
8305
- gerbil: g,
8306
- loading: false,
8307
- thinkingMode: supportsThinking
8308
- }));
8309
- } catch (error) {
8310
- setState((s) => ({
8311
- ...s,
8312
- loading: false,
8313
- loadingMessage: `Error: ${error}`
8314
- }));
8315
- }
8316
- };
8317
7794
  useInput((input, key) => {
8318
7795
  if (key.ctrl && input === "c") {
8319
7796
  if (ctrlCPressed) exit();
@@ -8432,7 +7909,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8432
7909
  ...s,
8433
7910
  voiceMode: !s.voiceMode
8434
7911
  }));
8435
- if ((input === "m" || input === "M") && state.view === "menu" && !state.loading) handleDeviceSwitch(state.deviceMode === "webgpu" ? "cpu" : "webgpu");
8436
7912
  });
8437
7913
  if (state.loading) return /* @__PURE__ */ jsx(LoadingView, { message: state.loadingMessage });
8438
7914
  if (state.view === "menu") return /* @__PURE__ */ jsxs(Box, {
@@ -8486,8 +7962,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8486
7962
  }),
8487
7963
  /* @__PURE__ */ jsx(Text, {
8488
7964
  bold: true,
8489
- color: state.gerbil?.getDeviceMode() === "webgpu" ? "green" : "yellow",
8490
- children: state.gerbil?.getDeviceMode()?.toUpperCase() || "CPU"
7965
+ color: "green",
7966
+ children: (state.gerbil?.getDeviceMode() ?? "webgpu").toUpperCase()
8491
7967
  }),
8492
7968
  state.gerbil?.getModelInfo()?.supportsThinking ? /* @__PURE__ */ jsxs(Fragment, { children: [/* @__PURE__ */ jsx(Text, {
8493
7969
  dimColor: true,
@@ -8608,14 +8084,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8608
8084
  dimColor: true,
8609
8085
  children: "oice · "
8610
8086
  }),
8611
- /* @__PURE__ */ jsx(Text, {
8612
- color: "cyan",
8613
- children: "m"
8614
- }),
8615
- /* @__PURE__ */ jsx(Text, {
8616
- dimColor: true,
8617
- children: "ode · "
8618
- }),
8619
8087
  /* @__PURE__ */ jsx(Text, {
8620
8088
  color: "blue",
8621
8089
  children: "d"
@@ -8652,13 +8120,11 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8652
8120
  }) : null
8653
8121
  ]
8654
8122
  });
8655
- const handleModelSelect = async (model, deviceOverride) => {
8123
+ const handleModelSelect = async (model) => {
8656
8124
  const returnView = returnToBenchmark ? "benchmark" : "menu";
8657
- const newDevice = deviceOverride ?? state.deviceMode;
8658
8125
  setState((s) => ({
8659
8126
  ...s,
8660
8127
  model,
8661
- deviceMode: newDevice,
8662
8128
  view: returnView,
8663
8129
  loading: true,
8664
8130
  loadingMessage: "Switching model..."
@@ -8671,7 +8137,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8671
8137
  const g = new Gerbil();
8672
8138
  try {
8673
8139
  await g.loadModel(model, {
8674
- device: newDevice,
8140
+ device: "webgpu",
8675
8141
  onProgress: (p) => {
8676
8142
  if (p.status?.includes("Unable to determine content-length")) return;
8677
8143
  setState((s) => ({
@@ -8706,15 +8172,23 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8706
8172
  downloadStatus: `Downloading ${modelId}...`
8707
8173
  }));
8708
8174
  const tempGerbil = new Gerbil();
8175
+ let initTimer = null;
8709
8176
  try {
8710
8177
  await tempGerbil.loadModel(modelId, { onProgress: (p) => {
8711
8178
  if (p.status?.includes("Unable to determine content-length")) return;
8179
+ if (initTimer) clearTimeout(initTimer);
8712
8180
  setState((s) => ({
8713
8181
  ...s,
8714
8182
  downloadStatus: p.file ? `${p.file} ${p.progress || 0}%` : p.status || ""
8715
8183
  }));
8184
+ initTimer = setTimeout(() => {
8185
+ setState((s) => ({
8186
+ ...s,
8187
+ downloadStatus: `Initializing ${modelId}...`
8188
+ }));
8189
+ }, 600);
8716
8190
  } });
8717
- await tempGerbil.dispose();
8191
+ if (initTimer) clearTimeout(initTimer);
8718
8192
  setState((s) => ({
8719
8193
  ...s,
8720
8194
  downloadStatus: `[done] Downloaded ${modelId}`
@@ -8725,6 +8199,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8725
8199
  })), 2500);
8726
8200
  return true;
8727
8201
  } catch (error) {
8202
+ if (initTimer) clearTimeout(initTimer);
8728
8203
  setState((s) => ({
8729
8204
  ...s,
8730
8205
  downloadStatus: `[error] ${error}`
@@ -8734,6 +8209,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8734
8209
  downloadStatus: ""
8735
8210
  })), 3e3);
8736
8211
  return false;
8212
+ } finally {
8213
+ await tempGerbil.dispose();
8737
8214
  }
8738
8215
  };
8739
8216
  return /* @__PURE__ */ jsxs(Box, {
@@ -8973,7 +8450,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
8973
8450
  setBenchmarkStatus(status);
8974
8451
  setBenchmarkStatsState(stats);
8975
8452
  },
8976
- onSwitchDevice: handleDeviceSwitch,
8977
8453
  onSwitchModel: () => {
8978
8454
  setReturnToBenchmark(true);
8979
8455
  transitionToView("model");
@@ -9070,10 +8546,7 @@ async function startRepl(options) {
9070
8546
  process.on("SIGINT", handleSignal);
9071
8547
  process.on("SIGTERM", () => process.exit(0));
9072
8548
  try {
9073
- const { waitUntilExit } = render(/* @__PURE__ */ jsx(App, {
9074
- forcedDevice: options?.forcedDevice,
9075
- initialView: options?.initialView
9076
- }));
8549
+ const { waitUntilExit } = render(/* @__PURE__ */ jsx(App, { initialView: options?.initialView }));
9077
8550
  await waitUntilExit();
9078
8551
  if (pendingCleanup) try {
9079
8552
  await Promise.race([pendingCleanup, new Promise((r) => setTimeout(r, 500))]);
@@ -9104,97 +8577,113 @@ async function startRepl(options) {
9104
8577
  */
9105
8578
  const program = new Command();
9106
8579
  program.name("gerbil").description("🐹 Local LLM inference for Node.js").version(version);
9107
- program.argument("[prompt...]", "Prompt to generate from (opens REPL if empty)").option("-m, --model <id>", "Model to use", "qwen3-0.6b").option("-n, --max-tokens <n>", "Max tokens", "256").option("-t, --temperature <t>", "Temperature", "0.7").option("-s, --system <text>", "System prompt").option("--thinking", "Enable thinking mode").option("--stream", "Stream output").option("--json", "Output as JSON").action(async (promptParts, opts) => {
9108
- if (promptParts.length === 0) {
8580
+ program.command("generate [prompt...]", { isDefault: true }).alias("g").description("Generate text (opens the REPL when no prompt is given)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-n, --max-tokens <n>", "Max tokens", "256").option("-t, --temperature <t>", "Temperature", "0.7").option("-s, --system <text>", "System prompt").option("--thinking", "Enable thinking mode").option("--stream", "Stream output").option("--json", "Output as JSON").action(async (promptParts, opts) => {
8581
+ if (!promptParts || promptParts.length === 0) {
9109
8582
  await startRepl({});
9110
8583
  return;
9111
8584
  }
9112
8585
  await runGenerate(promptParts.join(" "), opts);
9113
8586
  });
9114
- program.command("generate <prompt...>").alias("g").description("Generate text").option("-m, --model <id>", "Model to use", "qwen3-0.6b").option("-n, --max-tokens <n>", "Max tokens", "256").option("-t, --temperature <t>", "Temperature", "0.7").option("-s, --system <text>", "System prompt").option("--thinking", "Enable thinking mode").option("--stream", "Stream output").option("--json", "Output as JSON").action(async (promptParts, opts) => {
9115
- await runGenerate(promptParts.join(" "), opts);
9116
- });
9117
8587
  async function runGenerate(prompt, opts) {
9118
8588
  const g = new Gerbil();
9119
8589
  const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
9120
8590
  try {
9121
- await g.loadModel(opts.model, { onProgress: (p) => {
9122
- spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
9123
- } });
8591
+ await g.loadModel(opts.model, {
8592
+ device: "webgpu",
8593
+ onProgress: (p) => {
8594
+ spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
8595
+ }
8596
+ });
9124
8597
  spinner.succeed("Model loaded");
9125
- if (opts.thinking) {}
8598
+ const genOpts = {
8599
+ maxTokens: Number.parseInt(opts.maxTokens, 10),
8600
+ temperature: Number.parseFloat(opts.temperature),
8601
+ system: opts.system,
8602
+ thinking: opts.thinking
8603
+ };
9126
8604
  if (opts.stream) {
9127
8605
  process.stdout.write(chalk.green("Response: "));
9128
- for await (const chunk of g.stream(prompt, {
9129
- maxTokens: Number.parseInt(opts.maxTokens, 10),
9130
- temperature: Number.parseFloat(opts.temperature),
9131
- system: opts.system,
9132
- thinking: opts.thinking
9133
- })) process.stdout.write(chunk);
8606
+ for await (const chunk of g.stream(prompt, genOpts)) process.stdout.write(chunk);
8607
+ process.stdout.write("\n");
9134
8608
  } else {
9135
8609
  const genSpinner = ora("Generating...").start();
9136
- const result = await g.generate(prompt, {
9137
- maxTokens: Number.parseInt(opts.maxTokens, 10),
9138
- temperature: Number.parseFloat(opts.temperature),
9139
- system: opts.system,
9140
- thinking: opts.thinking
9141
- });
8610
+ const result = await g.generate(prompt, genOpts);
9142
8611
  genSpinner.stop();
9143
- if (opts.json) {} else if (result.thinking) {}
8612
+ if (opts.json) console.log(JSON.stringify(result, null, 2));
8613
+ else {
8614
+ if (opts.thinking && result.thinking) console.log(chalk.gray(`\n[thinking]\n${result.thinking}\n`));
8615
+ console.log(result.text);
8616
+ console.log(chalk.gray(`\n${result.tokensGenerated} tokens · ${result.tokensPerSecond.toFixed(1)} tok/s`));
8617
+ }
9144
8618
  }
9145
8619
  await g.dispose();
9146
- } catch (_e) {
9147
- spinner.fail("Error");
8620
+ } catch (e) {
8621
+ spinner.fail(`Error: ${e?.message ?? e}`);
9148
8622
  process.exit(1);
9149
8623
  }
9150
8624
  }
9151
- program.command("repl").alias("r").description("Interactive TUI with chat, skills, and more").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9152
- await startRepl({ forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0 });
8625
+ program.command("object <prompt...>").alias("obj").description("Generate a structured JSON object (with optional schema validation)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-n, --max-tokens <n>", "Max tokens", "512").option("-t, --temperature <t>", "Temperature", "0.3").option("--schema <file>", "Path to a JSON file with a { required: string[] } shape to validate against").option("--retries <n>", "Max retries after the first attempt", "4").action(async (promptParts, opts) => {
8626
+ const g = new Gerbil();
8627
+ const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
8628
+ try {
8629
+ let schema;
8630
+ if (opts.schema) {
8631
+ const fs$1 = await import("node:fs");
8632
+ schema = JSON.parse(fs$1.readFileSync(opts.schema, "utf-8"));
8633
+ }
8634
+ await g.loadModel(opts.model, {
8635
+ device: "webgpu",
8636
+ onProgress: (p) => {
8637
+ spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
8638
+ }
8639
+ });
8640
+ spinner.succeed("Model loaded");
8641
+ const genSpinner = ora("Generating object...").start();
8642
+ const result = await g.generateObject(promptParts.join(" "), {
8643
+ schema,
8644
+ maxRetries: Number.parseInt(opts.retries, 10),
8645
+ maxTokens: Number.parseInt(opts.maxTokens, 10),
8646
+ sampling: { temperature: Number.parseFloat(opts.temperature) }
8647
+ });
8648
+ genSpinner.stop();
8649
+ console.log(JSON.stringify(result.object, null, 2));
8650
+ console.log(chalk.gray(`\n${result.attempts} attempt(s)`));
8651
+ await g.dispose();
8652
+ } catch (e) {
8653
+ spinner.fail(`Error: ${e?.message ?? e}`);
8654
+ process.exit(1);
8655
+ }
9153
8656
  });
9154
- program.command("chat").alias("c").description("Interactive chat (opens REPL chat view)").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9155
- await startRepl({
9156
- initialView: "chat",
9157
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9158
- });
8657
+ program.command("repl").alias("r").description("Interactive TUI with chat, skills, and more").action(async () => {
8658
+ await startRepl({});
9159
8659
  });
9160
- program.command("skills").description("Open REPL skills view").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9161
- await startRepl({
9162
- initialView: "skills",
9163
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9164
- });
8660
+ program.command("chat").alias("c").description("Interactive chat (opens REPL chat view)").action(async () => {
8661
+ await startRepl({ initialView: "chat" });
9165
8662
  });
9166
- program.command("tools").description("Open REPL tools view").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9167
- await startRepl({
9168
- initialView: "tools",
9169
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9170
- });
8663
+ program.command("skills").description("Open REPL skills view").action(async () => {
8664
+ await startRepl({ initialView: "skills" });
9171
8665
  });
9172
- program.command("model").description("Open REPL model view").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9173
- await startRepl({
9174
- initialView: "model",
9175
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9176
- });
8666
+ program.command("tools").description("Open REPL tools view").action(async () => {
8667
+ await startRepl({ initialView: "tools" });
9177
8668
  });
9178
- program.command("integrate").description("Open REPL framework integration view").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9179
- await startRepl({
9180
- initialView: "frameworks",
9181
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9182
- });
8669
+ program.command("model").description("Open REPL model view").action(async () => {
8670
+ await startRepl({ initialView: "model" });
9183
8671
  });
9184
- program.command("benchmark").description("Open REPL benchmark view").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9185
- await startRepl({
9186
- initialView: "benchmark",
9187
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9188
- });
8672
+ program.command("integrate").description("Open REPL framework integration view").action(async () => {
8673
+ await startRepl({ initialView: "frameworks" });
8674
+ });
8675
+ program.command("benchmark").description("Open REPL benchmark view").action(async () => {
8676
+ await startRepl({ initialView: "benchmark" });
9189
8677
  });
9190
8678
  program.command("commit").description("Generate commit message from staged changes").option("--type <type>", "Commit style: conventional, simple, detailed", "conventional").option("--write", "Write message to .git/COMMIT_EDITMSG").action(async (opts) => {
9191
8679
  const spinner = ora("Generating commit message...").start();
9192
8680
  try {
9193
8681
  const message = await commit({ type: opts.type });
9194
8682
  spinner.stop();
8683
+ console.log(message);
9195
8684
  if (opts.write) (await import("node:fs")).writeFileSync(".git/COMMIT_EDITMSG", message);
9196
- } catch (_e) {
9197
- spinner.fail("Error");
8685
+ } catch (e) {
8686
+ spinner.fail(`Error: ${e?.message ?? e}`);
9198
8687
  process.exit(1);
9199
8688
  }
9200
8689
  });
@@ -9202,14 +8691,15 @@ program.command("summarize <file>").description("Summarize a file").option("-l,
9202
8691
  const content = (await import("node:fs")).readFileSync(file, "utf-8");
9203
8692
  const spinner = ora("Summarizing...").start();
9204
8693
  try {
9205
- await summarize({
8694
+ const summary = await summarize({
9206
8695
  content,
9207
8696
  length: opts.length,
9208
8697
  format: opts.format
9209
8698
  });
9210
8699
  spinner.stop();
9211
- } catch (_e) {
9212
- spinner.fail("Error");
8700
+ console.log(summary);
8701
+ } catch (e) {
8702
+ spinner.fail(`Error: ${e?.message ?? e}`);
9213
8703
  process.exit(1);
9214
8704
  }
9215
8705
  });
@@ -9217,13 +8707,14 @@ program.command("explain <file>").description("Explain code").option("-l, --leve
9217
8707
  const content = (await import("node:fs")).readFileSync(file, "utf-8");
9218
8708
  const spinner = ora("Explaining...").start();
9219
8709
  try {
9220
- await explain({
8710
+ const explanation = await explain({
9221
8711
  content,
9222
8712
  level: opts.level
9223
8713
  });
9224
8714
  spinner.stop();
9225
- } catch (_e) {
9226
- spinner.fail("Error");
8715
+ console.log(explanation);
8716
+ } catch (e) {
8717
+ spinner.fail(`Error: ${e?.message ?? e}`);
9227
8718
  process.exit(1);
9228
8719
  }
9229
8720
  });
@@ -9232,13 +8723,14 @@ program.command("review <file>").description("Review code").option("-f, --focus
9232
8723
  const spinner = ora("Reviewing...").start();
9233
8724
  try {
9234
8725
  const focus = opts.focus === "all" ? ["all"] : opts.focus.split(",");
9235
- await review({
8726
+ const reviewResult = await review({
9236
8727
  code: content,
9237
8728
  focus
9238
8729
  });
9239
8730
  spinner.stop();
9240
- } catch (_e) {
9241
- spinner.fail("Error");
8731
+ console.log(reviewResult);
8732
+ } catch (e) {
8733
+ spinner.fail(`Error: ${e?.message ?? e}`);
9242
8734
  process.exit(1);
9243
8735
  }
9244
8736
  });
@@ -9324,19 +8816,19 @@ program.command("speak [text...]").description("Convert text to speech using loc
9324
8816
  process.exit(1);
9325
8817
  }
9326
8818
  });
9327
- program.command("voice [audio]").description("Voice conversation: transcribe audio, generate response, speak it back").option("-m, --model <model>", "LLM model to use", "qwen3-0.6b").option("-s, --stt-model <model>", "STT model ID", "whisper-tiny.en").option("-v, --voice <voice>", "TTS voice ID", "af_heart").option("--system <prompt>", "System prompt", "You are a helpful voice assistant. Keep responses brief and conversational.").option("--thinking", "Enable thinking mode").action(async (audioFile, opts) => {
8819
+ program.command("voice [audio]").description("Voice conversation: transcribe audio, generate response, speak it back").option("-m, --model <model>", "LLM model to use", DEFAULT_MODEL).option("-s, --stt-model <model>", "STT model ID", "whisper-tiny.en").option("-v, --voice <voice>", "TTS voice ID", "af_heart").option("--system <prompt>", "System prompt", "You are a helpful voice assistant. Keep responses brief and conversational.").option("--thinking", "Enable thinking mode").action(async (audioFile, opts) => {
9328
8820
  if (!audioFile) {
9329
8821
  console.log(chalk.cyan("\n🎙️ Gerbil Voice Chat\n"));
9330
8822
  console.log(chalk.yellow("Usage: gerbil voice <audio.wav> [options]\n"));
9331
8823
  console.log("Options:");
9332
- console.log(" -m, --model <model> LLM model (default: qwen3-0.6b)");
8824
+ console.log(` -m, --model <model> LLM model (default: ${DEFAULT_MODEL})`);
9333
8825
  console.log(" -s, --stt-model <model> STT model (default: whisper-tiny.en)");
9334
8826
  console.log(" -v, --voice <voice> TTS voice (default: af_heart)");
9335
8827
  console.log(" --system <prompt> System prompt");
9336
8828
  console.log(" --thinking Enable thinking mode");
9337
8829
  console.log("\nExample:");
9338
8830
  console.log(chalk.gray(" gerbil voice question.wav --voice bf_emma"));
9339
- console.log(chalk.gray(" gerbil voice \"what time is it.wav\" --model qwen3-1.7b\n"));
8831
+ console.log(chalk.gray(" gerbil voice \"what time is it.wav\" --model qwen3.5-2b\n"));
9340
8832
  return;
9341
8833
  }
9342
8834
  const g = new Gerbil();
@@ -9498,29 +8990,26 @@ function audioToWav(audio, sampleRate) {
9498
8990
  }
9499
8991
  return buffer;
9500
8992
  }
9501
- program.command("serve").description("Start Gerbil server (use --mcp or --http for CLI, otherwise opens REPL)").option("-m, --model <id>", "Model to use", "qwen3-0.6b").option("-p, --port <port>", "HTTP port", "3000").option("--mcp", "Start MCP server (stdio)").option("--http", "Start HTTP server (CLI mode)").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
8993
+ program.command("serve").description("Start Gerbil server (use --mcp or --http for CLI, otherwise opens REPL)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-p, --port <port>", "HTTP port", "3000").option("--mcp", "Start MCP server (stdio)").option("--http", "Start HTTP server (CLI mode)").action(async (opts) => {
9502
8994
  if (!(opts.mcp || opts.http)) {
9503
- await startRepl({
9504
- initialView: "serve",
9505
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9506
- });
8995
+ await startRepl({ initialView: "serve" });
9507
8996
  return;
9508
8997
  }
9509
8998
  if (opts.mcp) await startMCPServer({ model: opts.model });
9510
8999
  else {
9511
- const { Gerbil: Gerbil$1 } = await import("./gerbil-CJ3ifloF.mjs");
9000
+ const { Gerbil: Gerbil$1 } = await import("./gerbil-DomNfIr1.mjs");
9512
9001
  const g = new Gerbil$1();
9513
9002
  const spinner = ora("Loading model...").start();
9514
9003
  await g.loadModel(opts.model);
9515
9004
  spinner.succeed("Model loaded");
9516
- (await import("node:http")).createServer(async (req, res) => {
9005
+ const server = (await import("node:http")).createServer(async (req, res) => {
9517
9006
  if (req.method === "POST" && req.url === "/generate") {
9518
9007
  let body = "";
9519
9008
  req.on("data", (chunk) => body += chunk);
9520
9009
  req.on("end", async () => {
9521
9010
  try {
9522
- const { prompt, ...opts$1 } = JSON.parse(body);
9523
- const result = await g.generate(prompt, opts$1);
9011
+ const { prompt, ...genOpts } = JSON.parse(body);
9012
+ const result = await g.generate(prompt, genOpts);
9524
9013
  res.writeHead(200, { "Content-Type": "application/json" });
9525
9014
  res.end(JSON.stringify(result));
9526
9015
  } catch (e) {
@@ -9528,6 +9017,25 @@ program.command("serve").description("Start Gerbil server (use --mcp or --http f
9528
9017
  res.end(JSON.stringify({ error: String(e) }));
9529
9018
  }
9530
9019
  });
9020
+ } else if (req.method === "POST" && req.url === "/stream") {
9021
+ let body = "";
9022
+ req.on("data", (chunk) => body += chunk);
9023
+ req.on("end", async () => {
9024
+ try {
9025
+ const { prompt, ...genOpts } = JSON.parse(body);
9026
+ res.writeHead(200, {
9027
+ "Content-Type": "text/event-stream",
9028
+ "Cache-Control": "no-cache",
9029
+ Connection: "keep-alive"
9030
+ });
9031
+ for await (const token of g.stream(prompt, genOpts)) res.write(`data: ${JSON.stringify({ token })}\n\n`);
9032
+ res.write("data: [DONE]\n\n");
9033
+ res.end();
9034
+ } catch (e) {
9035
+ res.write(`data: ${JSON.stringify({ error: String(e) })}\n\n`);
9036
+ res.end();
9037
+ }
9038
+ });
9531
9039
  } else if (req.method === "GET" && req.url === "/info") {
9532
9040
  res.writeHead(200, { "Content-Type": "application/json" });
9533
9041
  res.end(JSON.stringify(g.getInfo()));
@@ -9535,7 +9043,14 @@ program.command("serve").description("Start Gerbil server (use --mcp or --http f
9535
9043
  res.writeHead(404);
9536
9044
  res.end("Not found");
9537
9045
  }
9538
- }).listen(Number.parseInt(opts.port, 10), () => {});
9046
+ });
9047
+ const port = Number.parseInt(opts.port, 10);
9048
+ server.listen(port, () => {
9049
+ console.log(chalk.green(`\nHTTP server on http://localhost:${port}`));
9050
+ console.log(chalk.gray(" POST /generate { prompt, ...options }"));
9051
+ console.log(chalk.gray(" POST /stream { prompt, ...options } (SSE)"));
9052
+ console.log(chalk.gray(" GET /info"));
9053
+ });
9539
9054
  }
9540
9055
  });
9541
9056
  program.command("models").description("List available models").option("--search <query>", "Search models").action(async (opts) => {
@@ -9544,38 +9059,61 @@ program.command("models").description("List available models").option("--search
9544
9059
  const q = opts.search.toLowerCase();
9545
9060
  models = models.filter((m) => m.id.toLowerCase().includes(q) || m.description.toLowerCase().includes(q));
9546
9061
  }
9547
- for (const m of models) if (m.supportsThinking) {}
9062
+ console.log(chalk.cyan("\nBuilt-in models (native WebGPU engine):\n"));
9063
+ for (const m of models) {
9064
+ const flags = [
9065
+ m.supportsThinking ? chalk.magenta("thinking") : null,
9066
+ m.supportsJson ? chalk.blue("json") : null,
9067
+ m.supportsVision ? chalk.green("vision") : null
9068
+ ].filter(Boolean).join(" ");
9069
+ console.log(` ${chalk.green(m.id.padEnd(22))} ${chalk.gray((m.size || "").padStart(7))} ${m.description}`);
9070
+ if (flags) console.log(` ${" ".repeat(22)} ${flags}`);
9071
+ }
9072
+ console.log(chalk.gray("\nUse any HuggingFace repo with a supported architecture via hf:org/model.\n"));
9548
9073
  });
9549
- program.command("info").description("Show system and model info (use --cli for text output, otherwise opens REPL)").option("-m, --model <id>", "Model to load and show info for", "qwen3-0.6b").option("--cli", "Output text info instead of opening REPL").option("--cpu", "Force CPU inference mode").option("--gpu", "Force GPU/WebGPU inference mode").action(async (opts) => {
9074
+ program.command("info").description("Show system and model info (use --cli for text output, otherwise opens REPL)").option("-m, --model <id>", "Model to load and show info for", DEFAULT_MODEL).option("--cli", "Output text info instead of opening REPL").action(async (opts) => {
9550
9075
  if (!opts.cli) {
9551
- await startRepl({
9552
- initialView: "info",
9553
- forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
9554
- });
9076
+ await startRepl({ initialView: "info" });
9555
9077
  return;
9556
9078
  }
9557
9079
  const g = new Gerbil();
9558
9080
  const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
9559
9081
  try {
9560
- await g.loadModel(opts.model, { onProgress: (p) => {
9561
- spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
9562
- } });
9082
+ await g.loadModel(opts.model, {
9083
+ device: "webgpu",
9084
+ onProgress: (p) => {
9085
+ spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
9086
+ }
9087
+ });
9563
9088
  spinner.stop();
9564
- g.getInfo();
9089
+ const info = g.getInfo();
9090
+ const deviceMode = g.getDeviceMode();
9091
+ const deviceLabel = deviceMode === "webgpu" ? `${deviceMode.toUpperCase()} (Dawn)` : deviceMode.toUpperCase();
9092
+ console.log(chalk.cyan("\nSystem Info\n"));
9093
+ console.log(` Backend: ${chalk.green(info.device.backend)}`);
9094
+ console.log(` Device: ${chalk.green(deviceLabel)}`);
9095
+ console.log(` Model: ${chalk.cyan(info.model?.id ?? opts.model)}`);
9096
+ console.log(` Family: ${chalk.gray(info.model?.family ?? "unknown")}`);
9097
+ console.log(` Dtype: ${chalk.gray(g.getDtype())} (INT4 weights)`);
9098
+ console.log(` Context: ${chalk.gray(info.context.max.toLocaleString())} tokens`);
9099
+ console.log(` Thinking: ${info.model?.supportsThinking ? chalk.magenta("yes") : "no"}`);
9100
+ console.log(` Cache: ${chalk.gray(info.cache.location)}\n`);
9565
9101
  await g.dispose();
9566
- } catch (_e) {
9567
- spinner.fail("Error loading model");
9102
+ } catch (e) {
9103
+ spinner.fail(`Error loading model: ${e?.message ?? e}`);
9104
+ process.exit(1);
9568
9105
  }
9569
9106
  });
9570
9107
  program.command("cache").description("Show or manage model cache").option("--clean", "Remove all cached models").option("--older-than <days>", "Remove models older than N days").action(async (opts) => {
9571
9108
  const fs$1 = await import("node:fs");
9572
9109
  const path$1 = await import("node:path");
9573
9110
  const os$1 = await import("node:os");
9574
- const nodeModulesCache = path$1.join(process.cwd(), "node_modules", "@huggingface", "transformers", ".cache");
9575
- const cacheDir = fs$1.existsSync(nodeModulesCache) ? nodeModulesCache : process.env.HF_HOME || process.env.TRANSFORMERS_CACHE || path$1.join(os$1.homedir(), ".cache", "huggingface", "hub");
9111
+ const cacheDir = path$1.join(os$1.homedir(), ".cache", "gerbil");
9576
9112
  try {
9577
- if (!fs$1.existsSync(cacheDir)) return;
9578
- const entries = fs$1.readdirSync(cacheDir);
9113
+ if (!fs$1.existsSync(cacheDir)) {
9114
+ console.log(chalk.gray("No native model cache found."));
9115
+ return;
9116
+ }
9579
9117
  const models = [];
9580
9118
  const getSize = (dir) => {
9581
9119
  let total = 0;
@@ -9590,132 +9128,119 @@ program.command("cache").description("Show or manage model cache").option("--cle
9590
9128
  } catch {}
9591
9129
  return total;
9592
9130
  };
9593
- for (const entry of entries) {
9131
+ for (const entry of fs$1.readdirSync(cacheDir)) {
9594
9132
  const entryPath = path$1.join(cacheDir, entry);
9595
9133
  try {
9596
9134
  const entryStat = fs$1.statSync(entryPath);
9597
9135
  if (!entryStat.isDirectory()) continue;
9598
- if (entry.startsWith("models--")) {
9599
- const modelName = entry.replace("models--", "").replace("--", "/");
9600
- const size = getSize(entryPath);
9601
- models.push({
9602
- name: modelName,
9603
- size,
9604
- mtime: entryStat.mtime,
9605
- path: entryPath
9606
- });
9607
- } else {
9608
- const subEntries = fs$1.readdirSync(entryPath);
9609
- for (const subEntry of subEntries) {
9610
- const subPath = path$1.join(entryPath, subEntry);
9611
- try {
9612
- const subStat = fs$1.statSync(subPath);
9613
- if (subStat.isDirectory()) {
9614
- const modelName = `${entry}/${subEntry}`;
9615
- const size = getSize(subPath);
9616
- if (size > 0) models.push({
9617
- name: modelName,
9618
- size,
9619
- mtime: subStat.mtime,
9620
- path: subPath
9621
- });
9622
- }
9623
- } catch {}
9624
- }
9625
- }
9136
+ const size = getSize(entryPath);
9137
+ if (size > 0) models.push({
9138
+ name: entry.replace(/_/g, "/"),
9139
+ size,
9140
+ mtime: entryStat.mtime,
9141
+ path: entryPath
9142
+ });
9626
9143
  } catch {}
9627
9144
  }
9628
- if (models.length === 0) return;
9145
+ if (models.length === 0) {
9146
+ console.log(chalk.gray("No cached models."));
9147
+ return;
9148
+ }
9629
9149
  if (opts.clean || opts.olderThan) {
9630
9150
  const daysThreshold = opts.olderThan ? Number.parseInt(opts.olderThan, 10) : 0;
9631
9151
  const cutoffDate = /* @__PURE__ */ new Date(Date.now() - daysThreshold * 24 * 60 * 60 * 1e3);
9632
9152
  let removed = 0;
9633
- let _removedSize = 0;
9153
+ let removedSize = 0;
9634
9154
  for (const model of models) if (opts.clean || model.mtime < cutoffDate) try {
9635
9155
  fs$1.rmSync(model.path, {
9636
9156
  recursive: true,
9637
9157
  force: true
9638
9158
  });
9639
9159
  removed += 1;
9640
- _removedSize += model.size;
9160
+ removedSize += model.size;
9641
9161
  } catch (_e) {}
9642
- if (removed > 0) {}
9162
+ if (removed > 0) console.log(chalk.green(`Removed ${removed} model(s), freed ${formatSize(removedSize)}`));
9163
+ else console.log(chalk.gray("Nothing to remove."));
9643
9164
  return;
9644
9165
  }
9645
- let _totalSize = 0;
9166
+ console.log(chalk.cyan(`\nNative model cache (${chalk.gray(cacheDir)})\n`));
9167
+ let totalSize = 0;
9646
9168
  for (const model of models.sort((a, b) => b.mtime.getTime() - a.mtime.getTime())) {
9647
- formatTimeAgo(model.mtime);
9648
- formatSize(model.size).padStart(8);
9649
- _totalSize += model.size;
9169
+ const timeAgo = formatTimeAgo(model.mtime);
9170
+ const sizeStr = formatSize(model.size).padStart(8);
9171
+ totalSize += model.size;
9172
+ console.log(` ${model.name.padEnd(40)} ${chalk.gray(sizeStr)} ${chalk.gray(timeAgo)}`);
9650
9173
  }
9174
+ console.log(chalk.cyan(`\nTotal: ${formatSize(totalSize)}\n`));
9651
9175
  } catch (_e) {}
9652
9176
  });
9653
9177
  const formatSize = formatBytes;
9654
- program.command("cleanup").description("Clean up zombie Chrome pages and free memory").option("--kill-browser", "Also kill the shared Chrome browser (forces restart on next use)").option("--force", "Force kill all Gerbil Chrome processes (use if --kill-browser fails)").action(async (opts) => {
9655
- const { ChromeGPUBackend } = await import("./chrome-backend-DIKYoWj-.mjs");
9656
- const { execSync: execSync$1 } = await import("node:child_process");
9657
- console.log(chalk.cyan("\nChecking Chrome backend status...\n"));
9658
- let orphanPids = [];
9659
- try {
9660
- orphanPids = execSync$1("ps aux | grep \"gerbil/chrome-cache\" | grep -v grep", { encoding: "utf-8" }).trim().split("\n").filter(Boolean).map((line) => {
9661
- const parts = line.trim().split(/\s+/);
9662
- return Number.parseInt(parts[1], 10);
9663
- });
9664
- } catch {}
9665
- if (opts.force && orphanPids.length > 0) {
9666
- const spinner = ora(`Force killing ${orphanPids.length} Gerbil Chrome processes...`).start();
9178
+ program.command("cleanup").description("Clean up the native model cache and free disk space").option("--all", "Remove every cached model (frees the most space)").action(async (opts) => {
9179
+ const fs$1 = await import("node:fs");
9180
+ const path$1 = await import("node:path");
9181
+ const os$1 = await import("node:os");
9182
+ const nativeCacheDir = path$1.join(os$1.homedir(), ".cache", "gerbil");
9183
+ console.log(chalk.cyan("\nNative model cache\n"));
9184
+ if (!fs$1.existsSync(nativeCacheDir)) {
9185
+ console.log(chalk.gray("No native model cache found."));
9186
+ console.log();
9187
+ return;
9188
+ }
9189
+ const dirSize = (dir) => {
9190
+ let total = 0;
9667
9191
  try {
9668
- execSync$1("pkill -f \"gerbil/chrome-cache\"", { stdio: "ignore" });
9669
- spinner.succeed(`Force killed ${orphanPids.length} process(es)`);
9192
+ for (const item of fs$1.readdirSync(dir)) {
9193
+ const itemPath = path$1.join(dir, item);
9194
+ const stat = fs$1.statSync(itemPath);
9195
+ total += stat.isDirectory() ? dirSize(itemPath) : stat.size;
9196
+ }
9197
+ } catch {}
9198
+ return total;
9199
+ };
9200
+ const entries = fs$1.readdirSync(nativeCacheDir).filter((entry) => {
9201
+ try {
9202
+ return fs$1.statSync(path$1.join(nativeCacheDir, entry)).isDirectory();
9670
9203
  } catch {
9671
- spinner.fail("Failed to kill processes");
9204
+ return false;
9672
9205
  }
9206
+ });
9207
+ if (entries.length === 0) {
9208
+ console.log(chalk.gray("No cached models."));
9673
9209
  console.log();
9674
9210
  return;
9675
9211
  }
9676
- const status = ChromeGPUBackend.getGlobalBrowserStatus();
9677
- if (!status.running) {
9678
- if (orphanPids.length > 0) {
9679
- console.log(chalk.yellow(`Found ${orphanPids.length} orphan Chrome process(es) but no WS connection.`));
9680
- console.log(chalk.gray("Use --force to kill them: gerbil cleanup --force"));
9681
- } else console.log(chalk.gray("No Chrome backend running."));
9682
- return;
9683
- }
9684
- console.log(`Chrome PID: ${chalk.yellow(status.pid)}`);
9685
- console.log(`Server port: ${chalk.yellow(status.port)}`);
9686
- console.log(`Active pages: ${chalk.yellow(status.activePagesCount)}/${status.maxPages}`);
9687
- const pageInfo = await ChromeGPUBackend.getAllChromePages();
9688
- if (pageInfo.length > 0) {
9689
- console.log(chalk.cyan("\nGerbil pages:"));
9690
- for (let i = 0; i < pageInfo.length; i++) {
9691
- const p = pageInfo[i];
9692
- const memStr = p.memory ? `${p.memory.usedGB.toFixed(2)}GB` : "?";
9693
- const owner = p.isOurs ? chalk.green("(this process)") : chalk.yellow("(orphan)");
9694
- console.log(` [${i}] ${p.modelId || "unknown"} - ${memStr} ${owner}`);
9695
- }
9212
+ console.log(`Location: ${chalk.gray(nativeCacheDir)}`);
9213
+ let totalSize = 0;
9214
+ for (const entry of entries) {
9215
+ const size = dirSize(path$1.join(nativeCacheDir, entry));
9216
+ totalSize += size;
9217
+ console.log(` ${entry.replace(/_/g, "/")} ${chalk.gray(formatSize(size).padStart(10))}`);
9696
9218
  }
9697
- if (opts.killBrowser) {
9698
- const spinner = ora("Killing Chrome browser...").start();
9699
- const result = await ChromeGPUBackend.killAllBackends();
9700
- spinner.succeed(`Killed ${result.pagesKilled} page(s)${result.browserKilled ? " and browser" : ""}`);
9701
- } else {
9702
- const spinner = ora("Cleaning up orphan pages...").start();
9703
- let cleaned = 0;
9704
- for (let i = pageInfo.length - 1; i >= 0; i--) if (!pageInfo[i].isOurs) {
9705
- if (await ChromeGPUBackend.killPageByIndex(i)) cleaned++;
9219
+ console.log(chalk.cyan(`\nTotal: ${formatSize(totalSize)}\n`));
9220
+ if (opts.all) {
9221
+ const spinner = ora("Removing all cached models...").start();
9222
+ try {
9223
+ fs$1.rmSync(nativeCacheDir, {
9224
+ recursive: true,
9225
+ force: true
9226
+ });
9227
+ spinner.succeed(`Removed ${entries.length} model(s), freed ${formatSize(totalSize)}`);
9228
+ } catch (e) {
9229
+ spinner.fail(`Failed to remove cache: ${e.message}`);
9706
9230
  }
9707
- if (cleaned > 0) spinner.succeed(`Cleaned up ${cleaned} orphan page(s)`);
9708
- else spinner.info("No orphan pages to clean up");
9709
- }
9231
+ } else console.log(chalk.gray("Use --all to remove every cached model."));
9710
9232
  console.log();
9711
9233
  });
9712
- program.command("bench").description("Benchmark model performance").option("-m, --model <id>", "Model to benchmark", "qwen3-0.6b").option("-n, --runs <n>", "Number of runs", "3").action(async (opts) => {
9234
+ program.command("bench").description("Benchmark model performance").option("-m, --model <id>", "Model to benchmark", DEFAULT_MODEL).option("-n, --runs <n>", "Number of runs", "3").action(async (opts) => {
9713
9235
  const g = new Gerbil();
9714
9236
  const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
9715
9237
  try {
9716
- await g.loadModel(opts.model, { onProgress: (p) => {
9717
- spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
9718
- } });
9238
+ await g.loadModel(opts.model, {
9239
+ device: "webgpu",
9240
+ onProgress: (p) => {
9241
+ spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
9242
+ }
9243
+ });
9719
9244
  spinner.succeed("Model loaded");
9720
9245
  const runs = Number.parseInt(opts.runs, 10);
9721
9246
  const results = [];
@@ -9743,16 +9268,17 @@ program.command("bench").description("Benchmark model performance").option("-m,
9743
9268
  });
9744
9269
  runSpinner.succeed(`Run ${i + 1}: ${chalk.cyan(tokPerSec)} tok/s, ${chalk.yellow(`${firstTokenTime}ms`)} first token`);
9745
9270
  }
9746
- Math.round(results.reduce((a, r) => a + r.tokPerSec, 0) / results.length);
9747
- Math.round(results.reduce((a, r) => a + r.firstToken, 0) / results.length);
9271
+ const avgTokPerSec = Math.round(results.reduce((a, r) => a + r.tokPerSec, 0) / results.length);
9272
+ const avgFirstToken = Math.round(results.reduce((a, r) => a + r.firstToken, 0) / results.length);
9273
+ console.log(`\n${chalk.bold("Average")}: ${chalk.cyan(avgTokPerSec)} tok/s · ${chalk.yellow(`${avgFirstToken}ms`)} first token (${results.length} runs on ${chalk.green(g.getDeviceMode().toUpperCase())})\n`);
9748
9274
  await g.dispose();
9749
- } catch (_e) {
9750
- spinner.fail("Error");
9275
+ } catch (e) {
9276
+ spinner.fail(`Error: ${e?.message ?? e}`);
9751
9277
  process.exit(1);
9752
9278
  }
9753
9279
  });
9754
9280
  program.command("update").description("Update Gerbil to the latest version").action(async () => {
9755
- const { checkForUpdate: checkForUpdate$1, installUpdate: installUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-S9s5-g0C.mjs");
9281
+ const { checkForUpdate: checkForUpdate$1, installUpdate: installUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-BVaLXcDE.mjs");
9756
9282
  const spinner = ora("Checking for updates...").start();
9757
9283
  try {
9758
9284
  const check = await checkForUpdate$1();
@@ -9780,8 +9306,9 @@ program.parse = (...args) => {
9780
9306
  return result;
9781
9307
  };
9782
9308
  async function checkForUpdateCLI() {
9783
- const { checkForUpdate: checkForUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-S9s5-g0C.mjs");
9784
- if ((await checkForUpdate$1()).updateAvailable) {}
9309
+ const { checkForUpdate: checkForUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-BVaLXcDE.mjs");
9310
+ const check = await checkForUpdate$1();
9311
+ if (check.updateAvailable) console.error(chalk.yellow(`\nUpdate available: v${CURRENT_VERSION$1} → v${check.latestVersion}. Run ${chalk.cyan("gerbil update")}.`));
9785
9312
  }
9786
9313
  program.parse();
9787
9314