@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +318 -104
- package/dist/architectures-C1I5V3Dt.mjs +6070 -0
- package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
- package/dist/browser/index.d.ts +276 -590
- package/dist/browser/index.d.ts.map +1 -1
- package/dist/browser/index.js +592 -2334
- package/dist/browser/index.js.map +1 -1
- package/dist/cli.mjs +625 -1098
- package/dist/cli.mjs.map +1 -1
- package/dist/defaults-9komdrbY.mjs +24 -0
- package/dist/defaults-9komdrbY.mjs.map +1 -0
- package/dist/frameworks/express.d.mts +1 -3
- package/dist/frameworks/express.d.mts.map +1 -1
- package/dist/frameworks/express.mjs +7 -7
- package/dist/frameworks/express.mjs.map +1 -1
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.d.mts.map +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/fastify.mjs.map +1 -1
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.d.mts.map +1 -1
- package/dist/frameworks/hono.mjs +4 -4
- package/dist/frameworks/hono.mjs.map +1 -1
- package/dist/frameworks/next.d.mts +3 -2
- package/dist/frameworks/next.d.mts.map +1 -1
- package/dist/frameworks/next.mjs +4 -4
- package/dist/frameworks/next.mjs.map +1 -1
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts.map +1 -1
- package/dist/frameworks/trpc.mjs +4 -4
- package/dist/frameworks/trpc.mjs.map +1 -1
- package/dist/gerbil-BetB5xb0.d.mts +488 -0
- package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
- package/dist/gerbil-CTZUa8EZ.mjs +4 -0
- package/dist/gerbil-DNniplr4.mjs +1656 -0
- package/dist/gerbil-DNniplr4.mjs.map +1 -0
- package/dist/gpu/hooks.d.mts +640 -0
- package/dist/gpu/hooks.d.mts.map +1 -0
- package/dist/gpu/hooks.mjs +1369 -0
- package/dist/gpu/hooks.mjs.map +1 -0
- package/dist/gpu/index.d.mts +2 -0
- package/dist/gpu/index.mjs +6 -0
- package/dist/gpu-DFuglcEx.mjs +3790 -0
- package/dist/gpu-DFuglcEx.mjs.map +1 -0
- package/dist/index-Dgmb2kE3.d.mts +245 -0
- package/dist/index-Dgmb2kE3.d.mts.map +1 -0
- package/dist/index-DukkJRMj.d.mts +2114 -0
- package/dist/index-DukkJRMj.d.mts.map +1 -0
- package/dist/index.d.mts +22 -487
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +13 -8
- package/dist/index.mjs.map +1 -1
- package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
- package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
- package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
- package/dist/integrations/ai-sdk.d.mts +75 -6
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +131 -15
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +1 -1
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +5 -5
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.d.mts.map +1 -1
- package/dist/integrations/llamaindex.mjs +5 -5
- package/dist/integrations/llamaindex.mjs.map +1 -1
- package/dist/integrations/mcp-client.mjs +3 -3
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +3 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +5 -5
- package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
- package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
- package/dist/memory/index.d.mts +3 -0
- package/dist/memory/index.mjs +6 -0
- package/dist/memory-D1P7Tmda.mjs +4 -0
- package/dist/memory-DVN0MnIG.mjs +132 -0
- package/dist/memory-DVN0MnIG.mjs.map +1 -0
- package/dist/memory-Dj0J1v88.mjs +294 -0
- package/dist/memory-Dj0J1v88.mjs.map +1 -0
- package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
- package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
- package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
- package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
- package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
- package/dist/repl-BDRkwPGX.mjs +9 -0
- package/dist/skills/index.d.mts +270 -320
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +5 -5
- package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
- package/dist/skills-CU694Dc8.mjs.map +1 -0
- package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
- package/dist/tools-DQ1mPUw5.mjs.map +1 -0
- package/dist/types-DQBe2lFo.d.mts +165 -0
- package/dist/types-DQBe2lFo.d.mts.map +1 -0
- package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
- package/dist/types-LlyYILII.d.mts.map +1 -0
- package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
- package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
- package/dist/vector-B0panuy6.mjs +95 -0
- package/dist/vector-B0panuy6.mjs.map +1 -0
- package/docs/PROJECT-STATE.md +321 -0
- package/docs/adding-a-model-family.md +280 -0
- package/docs/ai-sdk.md +70 -61
- package/docs/architecture/overview.md +17 -7
- package/docs/browser.md +203 -8
- package/docs/embeddings.md +156 -0
- package/docs/gerbil-site-native-migration.md +217 -0
- package/docs/gpu-engine/architectures.md +398 -0
- package/docs/gpu-engine/ir.md +372 -0
- package/docs/gpu-engine/kernels.md +718 -0
- package/docs/gpu-engine/paper.html +1759 -0
- package/docs/gpu-engine/paper.md +2109 -0
- package/docs/gpu-engine/safetensors.md +312 -0
- package/docs/gpu-engine/tokenizer.md +302 -0
- package/docs/memory-rag.md +91 -0
- package/docs/metal-safari-intel.md +190 -0
- package/docs/mobile-failure-diagnosis.md +124 -0
- package/docs/mobile.md +99 -0
- package/docs/observability.md +230 -0
- package/docs/onnx-removal-plan.md +339 -0
- package/docs/research/autoresearch-portable.md +904 -0
- package/docs/research/dispatch-reduction-hivemind.md +84 -0
- package/docs/research/ios-safari-model-caching.md +117 -0
- package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
- package/docs/research/native-stt-model-selection.md +49 -0
- package/docs/research/native-tts-model-selection.md +90 -0
- package/docs/research/native-vs-chromium-decision.md +152 -0
- package/docs/research/nemotron-mamba2-inference.md +910 -0
- package/docs/research/qwen35-multimodal.md +293 -0
- package/docs/research/qwen36-gemma4-targets.md +337 -0
- package/docs/research/sota-embedding-models.md +179 -0
- package/docs/research/sota-mobile-models-2026.md +263 -0
- package/docs/research/sota-modality-models.md +202 -0
- package/docs/research/tps-baselines.md +71 -0
- package/docs/research/webgpu-m4-reference.md +104 -0
- package/docs/site-update-plan.md +155 -0
- package/docs/structured-output.md +123 -0
- package/docs/stt.md +63 -446
- package/docs/tts.md +77 -499
- package/docs/vision.md +100 -338
- package/package.json +22 -7
- package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
- package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
- package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
- package/dist/gerbil-CJ3ifloF.mjs +0 -4
- package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
- package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
- package/dist/gerbil-qOTe1nl2.d.mts +0 -431
- package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
- package/dist/kokoro-BNTb6egA.mjs +0 -20210
- package/dist/kokoro-BNTb6egA.mjs.map +0 -1
- package/dist/kokoro-CMOGDSgT.js +0 -20212
- package/dist/kokoro-CMOGDSgT.js.map +0 -1
- package/dist/mcp-BvbriaBy.mjs.map +0 -1
- package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
- package/dist/repl-DveXw36T.mjs +0 -9
- package/dist/skills-CD3Orlex.mjs.map +0 -1
- package/dist/stt-Bu-E23Sc.js +0 -433
- package/dist/stt-Bu-E23Sc.js.map +0 -1
- package/dist/stt-CpLYbGFd.mjs +0 -433
- package/dist/stt-CpLYbGFd.mjs.map +0 -1
- package/dist/stt-DRPLEEHB.mjs +0 -3
- package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
- package/dist/transformers.web-DiD1gTwk.js +0 -44695
- package/dist/transformers.web-DiD1gTwk.js.map +0 -1
- package/dist/transformers.web-u34VxRFM.js +0 -3
- package/dist/tts-CqroPaSK.js +0 -724
- package/dist/tts-CqroPaSK.js.map +0 -1
- package/dist/tts-DXgsKGCe.mjs +0 -3
- package/dist/tts-DeGANMNV.mjs +0 -730
- package/dist/tts-DeGANMNV.mjs.map +0 -1
- package/dist/types-CiTc7ez3.d.mts.map +0 -1
- /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
- /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
- /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
package/dist/cli.mjs
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import { t as __require } from "./chunk-
|
|
3
|
-
import { n as BUILTIN_MODELS, t as Gerbil } from "./gerbil-
|
|
4
|
-
import
|
|
5
|
-
import "./
|
|
6
|
-
import "./
|
|
7
|
-
import {
|
|
8
|
-
import { r as startMCPServer } from "./mcp-
|
|
9
|
-
import { a as getToolDefinitions, c as setToolContext, i as getTool, n as executeToolCall, o as loadProjectTools, r as formatToolsForPrompt, s as parseToolCall } from "./tools-
|
|
10
|
-
import { exec, spawn, spawnSync } from "node:child_process";
|
|
2
|
+
import { t as __require } from "./chunk-B9cbKln6.mjs";
|
|
3
|
+
import { n as BUILTIN_MODELS, r as DEFAULT_MODEL, t as Gerbil } from "./gerbil-DNniplr4.mjs";
|
|
4
|
+
import "./utils-DKO55ZmZ.mjs";
|
|
5
|
+
import "./one-liner-JhdIPxzF.mjs";
|
|
6
|
+
import { n as isArchitectureSupported } from "./architectures-C1I5V3Dt.mjs";
|
|
7
|
+
import { A as useSkill, E as getSkillInfo, O as listSkills, a as summarize, f as explain, h as commit, s as review, y as loadProjectSkills } from "./skills-CU694Dc8.mjs";
|
|
8
|
+
import { r as startMCPServer } from "./mcp-D2vvH1Xc.mjs";
|
|
9
|
+
import { a as getToolDefinitions, c as setToolContext, i as getTool, n as executeToolCall, o as loadProjectTools, r as formatToolsForPrompt, s as parseToolCall } from "./tools-DQ1mPUw5.mjs";
|
|
11
10
|
import fs, { existsSync, readFileSync, unlinkSync } from "node:fs";
|
|
12
|
-
import http from "node:http";
|
|
13
11
|
import os, { tmpdir } from "node:os";
|
|
14
12
|
import path, { join } from "node:path";
|
|
13
|
+
import { exec, spawn, spawnSync } from "node:child_process";
|
|
15
14
|
import React, { useCallback, useEffect, useRef, useState } from "react";
|
|
16
15
|
import chalk from "chalk";
|
|
17
16
|
import { Command } from "commander";
|
|
@@ -22,9 +21,10 @@ import Spinner from "ink-spinner";
|
|
|
22
21
|
import { Fragment, jsx, jsxs } from "react/jsx-runtime";
|
|
23
22
|
import SelectInput from "ink-select-input";
|
|
24
23
|
import TextInput from "ink-text-input";
|
|
24
|
+
import http from "node:http";
|
|
25
25
|
|
|
26
26
|
//#region package.json
|
|
27
|
-
var version = "1.0.
|
|
27
|
+
var version = "1.0.1";
|
|
28
28
|
|
|
29
29
|
//#endregion
|
|
30
30
|
//#region src/cli/repl/auto-update.ts
|
|
@@ -123,120 +123,82 @@ function getGerbilModelsDir() {
|
|
|
123
123
|
return path.join(process.cwd(), ".gerbil", "models");
|
|
124
124
|
}
|
|
125
125
|
/**
|
|
126
|
-
* Get
|
|
126
|
+
* Get the native WebGPU engine cache directory (downloaded safetensors models).
|
|
127
|
+
* Layout: ~/.cache/gerbil/<repo>/<revision>/ — see src/gpu/model-loader.ts.
|
|
127
128
|
*/
|
|
128
|
-
function
|
|
129
|
-
|
|
130
|
-
if (fs.existsSync(nodeModulesCache)) return nodeModulesCache;
|
|
131
|
-
return process.env.HF_HOME || process.env.TRANSFORMERS_CACHE || path.join(os.homedir(), ".cache", "huggingface", "hub");
|
|
129
|
+
function getNativeCacheDir() {
|
|
130
|
+
return path.join(os.homedir(), ".cache", "gerbil");
|
|
132
131
|
}
|
|
133
132
|
/**
|
|
134
|
-
* Check if a model is cached locally
|
|
133
|
+
* Check if a model is cached locally for the native engine.
|
|
134
|
+
*
|
|
135
|
+
* The native loader stores each repo under ~/.cache/gerbil/<repo>/<revision>/
|
|
136
|
+
* (slashes in the repo replaced by underscores), plus the project-local
|
|
137
|
+
* .gerbil/models dir. A model counts as cached when a safetensors weight file
|
|
138
|
+
* is present somewhere under its directory.
|
|
135
139
|
*/
|
|
136
140
|
function isModelCached(hfId) {
|
|
137
141
|
const [org, model] = hfId.split("/");
|
|
138
142
|
if (!(org && model)) return false;
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
})) return true;
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
} catch {}
|
|
159
|
-
}
|
|
160
|
-
return false;
|
|
143
|
+
const hasSafetensors = (root) => {
|
|
144
|
+
try {
|
|
145
|
+
if (!fs.existsSync(root)) return false;
|
|
146
|
+
if (!fs.statSync(root).isDirectory()) return false;
|
|
147
|
+
return fs.readdirSync(root, { recursive: true }).some((f) => String(f).toLowerCase().endsWith(".safetensors"));
|
|
148
|
+
} catch {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
};
|
|
152
|
+
if (hasSafetensors(path.join(getNativeCacheDir(), hfId.replace(/\//g, "_")))) return true;
|
|
153
|
+
const gerbilDir = getGerbilModelsDir();
|
|
154
|
+
return [
|
|
155
|
+
path.join(gerbilDir, org, model),
|
|
156
|
+
path.join(gerbilDir, hfId.replace(/\//g, "_")),
|
|
157
|
+
path.join(gerbilDir, hfId.replace("/", "--"))
|
|
158
|
+
].some(hasSafetensors);
|
|
161
159
|
}
|
|
162
160
|
/**
|
|
163
161
|
* Scan a single cache directory for models
|
|
164
162
|
*/
|
|
165
163
|
function scanCacheDir(cacheDir, models) {
|
|
166
164
|
let totalBytes = 0;
|
|
165
|
+
const measure = (root) => {
|
|
166
|
+
let total = 0;
|
|
167
|
+
let modelBytes = 0;
|
|
168
|
+
try {
|
|
169
|
+
const files = fs.readdirSync(root, { recursive: true });
|
|
170
|
+
for (const file of files) try {
|
|
171
|
+
const fileStat = fs.statSync(path.join(root, String(file)));
|
|
172
|
+
if (fileStat.isFile()) {
|
|
173
|
+
total += fileStat.size;
|
|
174
|
+
if (String(file).toLowerCase().endsWith(".safetensors")) modelBytes += fileStat.size;
|
|
175
|
+
}
|
|
176
|
+
} catch {}
|
|
177
|
+
} catch {}
|
|
178
|
+
return {
|
|
179
|
+
total,
|
|
180
|
+
modelBytes
|
|
181
|
+
};
|
|
182
|
+
};
|
|
167
183
|
try {
|
|
168
184
|
if (!fs.existsSync(cacheDir)) return 0;
|
|
169
|
-
const
|
|
170
|
-
for (const entry of entries) {
|
|
185
|
+
for (const entry of fs.readdirSync(cacheDir)) {
|
|
171
186
|
const entryPath = path.join(cacheDir, entry);
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
try {
|
|
186
|
-
const files = fs.readdirSync(subPath, { recursive: true });
|
|
187
|
-
for (const file of files) try {
|
|
188
|
-
const filePath = path.join(subPath, String(file));
|
|
189
|
-
const fileStat = fs.statSync(filePath);
|
|
190
|
-
if (fileStat.isFile()) {
|
|
191
|
-
totalSize += fileStat.size;
|
|
192
|
-
const fname = String(file).toLowerCase();
|
|
193
|
-
if (fname.endsWith(".onnx") || fname.endsWith(".safetensors")) modelFileSize += fileStat.size;
|
|
194
|
-
}
|
|
195
|
-
} catch {}
|
|
196
|
-
} catch {
|
|
197
|
-
totalSize = subStat.size;
|
|
198
|
-
}
|
|
199
|
-
if (totalSize > 0) {
|
|
200
|
-
totalBytes += totalSize;
|
|
201
|
-
const lastUsed = formatTimeAgo(subStat.mtime);
|
|
202
|
-
models.push({
|
|
203
|
-
name: fullModelName,
|
|
204
|
-
modelSize: formatBytes(modelFileSize || totalSize),
|
|
205
|
-
totalSize: formatBytes(totalSize),
|
|
206
|
-
lastUsed,
|
|
207
|
-
location: cacheDir
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
}
|
|
211
|
-
} catch {}
|
|
212
|
-
}
|
|
213
|
-
if (subEntries.some((f) => f.endsWith(".onnx") || f.endsWith(".json"))) {
|
|
214
|
-
let totalSize = 0;
|
|
215
|
-
let modelFileSize = 0;
|
|
216
|
-
try {
|
|
217
|
-
const files = fs.readdirSync(entryPath, { recursive: true });
|
|
218
|
-
for (const file of files) try {
|
|
219
|
-
const filePath = path.join(entryPath, String(file));
|
|
220
|
-
const fileStat = fs.statSync(filePath);
|
|
221
|
-
if (fileStat.isFile()) {
|
|
222
|
-
totalSize += fileStat.size;
|
|
223
|
-
const fname = String(file).toLowerCase();
|
|
224
|
-
if (fname.endsWith(".onnx") || fname.endsWith(".safetensors")) modelFileSize += fileStat.size;
|
|
225
|
-
}
|
|
226
|
-
} catch {}
|
|
227
|
-
} catch {
|
|
228
|
-
totalSize = entryStat.size;
|
|
187
|
+
try {
|
|
188
|
+
const entryStat = fs.statSync(entryPath);
|
|
189
|
+
if (!entryStat.isDirectory()) continue;
|
|
190
|
+
const { total, modelBytes } = measure(entryPath);
|
|
191
|
+
if (total > 0) {
|
|
192
|
+
totalBytes += total;
|
|
193
|
+
models.push({
|
|
194
|
+
name: entry.replace(/_/g, "/"),
|
|
195
|
+
modelSize: formatBytes(modelBytes || total),
|
|
196
|
+
totalSize: formatBytes(total),
|
|
197
|
+
lastUsed: formatTimeAgo(entryStat.mtime),
|
|
198
|
+
location: cacheDir
|
|
199
|
+
});
|
|
229
200
|
}
|
|
230
|
-
|
|
231
|
-
const lastUsed = formatTimeAgo(entryStat.mtime);
|
|
232
|
-
models.push({
|
|
233
|
-
name: modelName,
|
|
234
|
-
modelSize: formatBytes(modelFileSize || totalSize),
|
|
235
|
-
totalSize: formatBytes(totalSize),
|
|
236
|
-
lastUsed,
|
|
237
|
-
location: cacheDir
|
|
238
|
-
});
|
|
239
|
-
}
|
|
201
|
+
} catch {}
|
|
240
202
|
}
|
|
241
203
|
} catch {}
|
|
242
204
|
return totalBytes;
|
|
@@ -246,32 +208,14 @@ function scanCacheDir(cacheDir, models) {
|
|
|
246
208
|
*/
|
|
247
209
|
function getCacheInfo() {
|
|
248
210
|
const gerbilDir = getGerbilModelsDir();
|
|
249
|
-
const
|
|
211
|
+
const nativeDir = getNativeCacheDir();
|
|
250
212
|
const models = [];
|
|
251
213
|
let totalBytes = 0;
|
|
252
214
|
totalBytes += scanCacheDir(gerbilDir, models);
|
|
253
|
-
totalBytes += scanCacheDir(
|
|
254
|
-
let hasMissingSizes = false;
|
|
255
|
-
try {
|
|
256
|
-
const chromeCached = getChromeCachedModels();
|
|
257
|
-
for (const entry of chromeCached) if (!models.some((m) => m.name === entry.modelId || m.name.replace("--", "/") === entry.modelId || entry.modelId.replace("/", "--") === m.name)) {
|
|
258
|
-
const lastUsed = entry.lastUsed ? formatTimeAgo(new Date(entry.lastUsed)) : "unknown";
|
|
259
|
-
if (!(entry.sizeBytes && entry.contextLength)) hasMissingSizes = true;
|
|
260
|
-
models.push({
|
|
261
|
-
name: entry.modelId,
|
|
262
|
-
modelSize: entry.sizeBytes ? formatBytes(entry.sizeBytes) : "~",
|
|
263
|
-
totalSize: entry.sizeBytes ? formatBytes(entry.sizeBytes) : "~",
|
|
264
|
-
lastUsed,
|
|
265
|
-
location: "Chrome IndexedDB",
|
|
266
|
-
contextLength: entry.contextLength
|
|
267
|
-
});
|
|
268
|
-
if (entry.sizeBytes) totalBytes += entry.sizeBytes;
|
|
269
|
-
}
|
|
270
|
-
if (hasMissingSizes) refreshCachedModelSizes().catch(() => {});
|
|
271
|
-
} catch {}
|
|
215
|
+
totalBytes += scanCacheDir(nativeDir, models);
|
|
272
216
|
const activeLocations = [...new Set(models.map((m) => m.location))];
|
|
273
217
|
return {
|
|
274
|
-
locations: activeLocations.length > 0 ? activeLocations : [gerbilDir,
|
|
218
|
+
locations: activeLocations.length > 0 ? activeLocations : [gerbilDir, nativeDir],
|
|
275
219
|
totalSize: formatBytes(totalBytes),
|
|
276
220
|
models: models.sort((a, b) => a.name.localeCompare(b.name))
|
|
277
221
|
};
|
|
@@ -304,50 +248,35 @@ function cleanResponse(text) {
|
|
|
304
248
|
return text.replace(/<think>[\s\S]*?<\/think>/g, "").replace(/<\/?think>/g, "").trim();
|
|
305
249
|
}
|
|
306
250
|
/**
|
|
307
|
-
* Preset models
|
|
251
|
+
* Preset models, native WebGPU engine only.
|
|
252
|
+
*
|
|
253
|
+
* The native engine loads standard HuggingFace safetensors repos and quantizes
|
|
254
|
+
* to INT4 on the fly (dtype "q4"); there is no ONNX involved. Every entry here
|
|
255
|
+
* is an architecture the engine supports (Qwen2/Qwen3/Qwen3.5, LFM2 — see
|
|
256
|
+
* src/gpu/architectures/index.ts). `hfId` is the safetensors repo the loader
|
|
257
|
+
* pulls and the key used for native cache detection.
|
|
308
258
|
*/
|
|
309
259
|
const PRESET_MODELS = [
|
|
310
260
|
{
|
|
311
|
-
id: "qwen3-0.
|
|
312
|
-
name: "Qwen3 0.
|
|
313
|
-
size: "~
|
|
261
|
+
id: "qwen3.5-0.8b",
|
|
262
|
+
name: "Qwen3.5 0.8B",
|
|
263
|
+
size: "~500MB (q4)",
|
|
314
264
|
speed: "fastest",
|
|
315
|
-
hfId: "
|
|
265
|
+
hfId: "Qwen/Qwen3.5-0.8B"
|
|
316
266
|
},
|
|
317
267
|
{
|
|
318
|
-
id: "
|
|
319
|
-
name: "
|
|
320
|
-
size: "~
|
|
268
|
+
id: "qwen3.5-2b",
|
|
269
|
+
name: "Qwen3.5 2B",
|
|
270
|
+
size: "~1.3GB (q4)",
|
|
321
271
|
speed: "fast",
|
|
322
|
-
hfId: "
|
|
272
|
+
hfId: "Qwen/Qwen3.5-2B"
|
|
323
273
|
},
|
|
324
274
|
{
|
|
325
|
-
id: "
|
|
326
|
-
name: "
|
|
327
|
-
size: "~
|
|
275
|
+
id: "lfm2.5-1.2b-thinking",
|
|
276
|
+
name: "LFM2.5 1.2B Thinking",
|
|
277
|
+
size: "~760MB (q4)",
|
|
328
278
|
speed: "fast",
|
|
329
|
-
hfId: "
|
|
330
|
-
},
|
|
331
|
-
{
|
|
332
|
-
id: "smollm2-360m",
|
|
333
|
-
name: "SmolLM2 360M",
|
|
334
|
-
size: "~250MB",
|
|
335
|
-
speed: "fastest",
|
|
336
|
-
hfId: "HuggingFaceTB/SmolLM2-360M-Instruct"
|
|
337
|
-
},
|
|
338
|
-
{
|
|
339
|
-
id: "smollm2-135m",
|
|
340
|
-
name: "SmolLM2 135M",
|
|
341
|
-
size: "~100MB",
|
|
342
|
-
speed: "fastest",
|
|
343
|
-
hfId: "HuggingFaceTB/SmolLM2-135M-Instruct"
|
|
344
|
-
},
|
|
345
|
-
{
|
|
346
|
-
id: "phi-3-mini",
|
|
347
|
-
name: "Phi-3 Mini",
|
|
348
|
-
size: "~2.1GB",
|
|
349
|
-
speed: "medium",
|
|
350
|
-
hfId: "microsoft/Phi-3-mini-4k-instruct-onnx"
|
|
279
|
+
hfId: "LiquidAI/LFM2.5-1.2B-Thinking"
|
|
351
280
|
}
|
|
352
281
|
];
|
|
353
282
|
/**
|
|
@@ -371,19 +300,12 @@ async function fetchModelMetadata(modelId) {
|
|
|
371
300
|
}
|
|
372
301
|
} catch {}
|
|
373
302
|
try {
|
|
374
|
-
const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main
|
|
303
|
+
const treeRes = await fetch(`https://huggingface.co/api/models/${modelId}/tree/main?recursive=true`);
|
|
375
304
|
if (treeRes.ok) {
|
|
376
305
|
const files = await treeRes.json();
|
|
377
306
|
const getSize = (f) => f.lfs?.size || f.size || 0;
|
|
378
|
-
const
|
|
379
|
-
|
|
380
|
-
const fp16 = files.find((f) => f.path.includes("fp16") && f.path.endsWith(".onnx"));
|
|
381
|
-
const anyOnnx = files.find((f) => f.path.endsWith(".onnx"));
|
|
382
|
-
const bestFile = q4f16 || q4 || fp16 || anyOnnx;
|
|
383
|
-
if (bestFile) {
|
|
384
|
-
const baseName = bestFile.path.replace(".onnx", "");
|
|
385
|
-
result.sizeBytes = files.filter((f) => f.path === bestFile.path || f.path.startsWith(`${baseName}.onnx_data`)).reduce((sum, f) => sum + getSize(f), 0);
|
|
386
|
-
}
|
|
307
|
+
const safetensors = files.filter((f) => f.path.toLowerCase().endsWith(".safetensors"));
|
|
308
|
+
if (safetensors.length > 0) result.sizeBytes = safetensors.reduce((sum, f) => sum + getSize(f), 0);
|
|
387
309
|
}
|
|
388
310
|
} catch {}
|
|
389
311
|
return result;
|
|
@@ -421,12 +343,12 @@ function saveBenchmark(result) {
|
|
|
421
343
|
*/
|
|
422
344
|
/**
|
|
423
345
|
* Normalize model ID for matching (extract core name)
|
|
424
|
-
* e.g., "
|
|
425
|
-
* "
|
|
426
|
-
* "qwen3-0.
|
|
346
|
+
* e.g., "Qwen/Qwen3.5-0.8B" -> "qwen3.5-0.8b"
|
|
347
|
+
* "mlx-community/Qwen3.5-0.8B-4bit" -> "qwen3.5-0.8b"
|
|
348
|
+
* "qwen3.5-0.8b" -> "qwen3.5-0.8b"
|
|
427
349
|
*/
|
|
428
350
|
function normalizeModelId(id) {
|
|
429
|
-
return (id.split("/").pop() || id).toLowerCase().replace(/-instruct$/i, "").replace(/-onnx$/i, "").replace(/-chat$/i, "").replace(/-it$/i, "");
|
|
351
|
+
return (id.split("/").pop() || id).toLowerCase().replace(/-instruct$/i, "").replace(/-onnx$/i, "").replace(/-4bit$/i, "").replace(/-chat$/i, "").replace(/-it$/i, "");
|
|
430
352
|
}
|
|
431
353
|
/**
|
|
432
354
|
* Process an image path (URL or local file) and return info for attaching
|
|
@@ -508,7 +430,7 @@ function getModelBenchmarkStats(modelId) {
|
|
|
508
430
|
|
|
509
431
|
//#endregion
|
|
510
432
|
//#region src/cli/repl/views/BenchmarkView.tsx
|
|
511
|
-
function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchModel,
|
|
433
|
+
function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchModel, onStatusChange, benchmarkResults, setBenchmarkResults, benchmarkModels, setBenchmarkModels }) {
|
|
512
434
|
const [status, setStatus] = useState("idle");
|
|
513
435
|
const [currentRun, setCurrentRun] = useState(benchmarkResults.length);
|
|
514
436
|
const [showHistory, setShowHistory] = useState(false);
|
|
@@ -564,7 +486,6 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
564
486
|
setBenchmarkModels([model]);
|
|
565
487
|
}
|
|
566
488
|
if ((input === "m" || input === "M") && !blocked) onSwitchModel?.();
|
|
567
|
-
if ((input === "d" || input === "D") && !blocked) onSwitchDevice?.(currentDevice === "webgpu" ? "cpu" : "webgpu");
|
|
568
489
|
if ((input === "h" || input === "H") && !blocked) {
|
|
569
490
|
if (!showHistory) setHistoryData(getStoredBenchmarks());
|
|
570
491
|
setShowHistory(!showHistory);
|
|
@@ -593,25 +514,30 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
593
514
|
"List 3 benefits of TypeScript."
|
|
594
515
|
];
|
|
595
516
|
const prompt = prompts[currentRun % prompts.length];
|
|
596
|
-
const startTime =
|
|
597
|
-
let
|
|
598
|
-
let
|
|
517
|
+
const startTime = performance.now();
|
|
518
|
+
let firstTokenAt = 0;
|
|
519
|
+
let lastTokenAt = 0;
|
|
520
|
+
let tokenCount = 0;
|
|
599
521
|
const result = await gerbil.generate(prompt, {
|
|
600
522
|
maxTokens: 100,
|
|
601
523
|
onToken: () => {
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
524
|
+
const now = performance.now();
|
|
525
|
+
tokenCount++;
|
|
526
|
+
if (tokenCount === 1) firstTokenAt = now;
|
|
527
|
+
lastTokenAt = now;
|
|
606
528
|
}
|
|
607
529
|
});
|
|
530
|
+
const endTime = performance.now();
|
|
531
|
+
const hasTokenEvents = tokenCount > 0;
|
|
532
|
+
const firstTokenMs = hasTokenEvents ? Math.round(firstTokenAt - startTime) : Math.round(result.totalTime * .15);
|
|
533
|
+
const tokensPerSec = hasTokenEvents ? tokenCount > 1 && lastTokenAt > firstTokenAt ? Math.round(tokenCount / (lastTokenAt - firstTokenAt) * 1e3) : Math.round(tokenCount / (endTime - startTime) * 1e3) : Math.round(result.tokensGenerated / result.totalTime * 1e3);
|
|
608
534
|
const newResult = {
|
|
609
535
|
model,
|
|
610
536
|
device: gerbil.getDeviceMode(),
|
|
611
|
-
tokensPerSec
|
|
612
|
-
firstTokenMs
|
|
613
|
-
totalTokens: result.tokensGenerated,
|
|
614
|
-
totalMs: Math.round(
|
|
537
|
+
tokensPerSec,
|
|
538
|
+
firstTokenMs,
|
|
539
|
+
totalTokens: hasTokenEvents ? tokenCount : result.tokensGenerated,
|
|
540
|
+
totalMs: Math.round(endTime - startTime)
|
|
615
541
|
};
|
|
616
542
|
saveBenchmark(newResult);
|
|
617
543
|
const updatedResults = [...benchmarkResults, newResult];
|
|
@@ -621,13 +547,11 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
621
547
|
onResult?.(newResult.tokensPerSec);
|
|
622
548
|
};
|
|
623
549
|
const currentResults = benchmarkResults.filter((r) => r.model === model && r.device === currentDevice);
|
|
624
|
-
const modelStats = [...new Set(benchmarkResults.map((r) =>
|
|
625
|
-
const
|
|
626
|
-
const results = benchmarkResults.filter((r) => r.model === m && r.device === d);
|
|
550
|
+
const modelStats = [...new Set(benchmarkResults.map((r) => r.model))].map((m) => {
|
|
551
|
+
const results = benchmarkResults.filter((r) => r.model === m);
|
|
627
552
|
if (results.length === 0) return null;
|
|
628
553
|
return {
|
|
629
554
|
model: m,
|
|
630
|
-
device: d,
|
|
631
555
|
runs: results.length,
|
|
632
556
|
avgTokPerSec: Math.round(results.reduce((a, r) => a + r.tokensPerSec, 0) / results.length),
|
|
633
557
|
avgFirstToken: Math.round(results.reduce((a, r) => a + r.firstTokenMs, 0) / results.length)
|
|
@@ -664,7 +588,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
664
588
|
/* @__PURE__ */ jsx(Text, { children: " on " }),
|
|
665
589
|
/* @__PURE__ */ jsx(Text, {
|
|
666
590
|
bold: true,
|
|
667
|
-
color:
|
|
591
|
+
color: "green",
|
|
668
592
|
children: currentDevice.toUpperCase()
|
|
669
593
|
}),
|
|
670
594
|
modelStats.length > 1 && /* @__PURE__ */ jsxs(Text, {
|
|
@@ -672,7 +596,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
672
596
|
children: [
|
|
673
597
|
" (comparing ",
|
|
674
598
|
modelStats.length,
|
|
675
|
-
"
|
|
599
|
+
" models)"
|
|
676
600
|
]
|
|
677
601
|
})
|
|
678
602
|
] }),
|
|
@@ -735,22 +659,12 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
735
659
|
color: "green",
|
|
736
660
|
children: modelStats.length > 1 ? "Comparison:" : "Averages:"
|
|
737
661
|
}), modelStats.map((s) => {
|
|
738
|
-
const isCurrent = s.model === model && s.device === currentDevice;
|
|
739
|
-
const dColor = s.device === "webgpu" ? "green" : s.device === "cpu" ? "yellow" : "gray";
|
|
740
662
|
return /* @__PURE__ */ jsxs(Box, { children: [
|
|
741
663
|
/* @__PURE__ */ jsxs(Text, { children: [
|
|
742
|
-
|
|
664
|
+
s.model === model ? ">" : " ",
|
|
743
665
|
" ",
|
|
744
666
|
s.model
|
|
745
667
|
] }),
|
|
746
|
-
/* @__PURE__ */ jsxs(Text, {
|
|
747
|
-
color: dColor,
|
|
748
|
-
children: [
|
|
749
|
-
" [",
|
|
750
|
-
s.device,
|
|
751
|
-
"]"
|
|
752
|
-
]
|
|
753
|
-
}),
|
|
754
668
|
/* @__PURE__ */ jsx(Text, { children: ": " }),
|
|
755
669
|
/* @__PURE__ */ jsxs(Text, {
|
|
756
670
|
bold: true,
|
|
@@ -774,7 +688,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
774
688
|
color: "green",
|
|
775
689
|
children: " *fastest*"
|
|
776
690
|
})
|
|
777
|
-
] },
|
|
691
|
+
] }, s.model);
|
|
778
692
|
})]
|
|
779
693
|
}), bests && benchmarkResults.length >= 2 && /* @__PURE__ */ jsxs(Box, {
|
|
780
694
|
borderColor: "magenta",
|
|
@@ -803,11 +717,8 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
803
717
|
/* @__PURE__ */ jsxs(Text, {
|
|
804
718
|
dimColor: true,
|
|
805
719
|
children: [
|
|
806
|
-
" ",
|
|
807
|
-
"(",
|
|
720
|
+
" (",
|
|
808
721
|
bests.fastestRun.model,
|
|
809
|
-
" on ",
|
|
810
|
-
bests.fastestRun.device,
|
|
811
722
|
")"
|
|
812
723
|
]
|
|
813
724
|
})
|
|
@@ -827,11 +738,8 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
827
738
|
/* @__PURE__ */ jsxs(Text, {
|
|
828
739
|
dimColor: true,
|
|
829
740
|
children: [
|
|
830
|
-
" ",
|
|
831
|
-
"(",
|
|
741
|
+
" (",
|
|
832
742
|
bests.fastestFirstToken.model,
|
|
833
|
-
" on ",
|
|
834
|
-
bests.fastestFirstToken.device,
|
|
835
743
|
")"
|
|
836
744
|
]
|
|
837
745
|
})
|
|
@@ -851,12 +759,9 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
851
759
|
dimColor: true,
|
|
852
760
|
children: [
|
|
853
761
|
" ",
|
|
854
|
-
"
|
|
855
|
-
bests.bestOverall.device,
|
|
856
|
-
" (",
|
|
762
|
+
"(",
|
|
857
763
|
bests.bestOverall.avgTokPerSec,
|
|
858
|
-
" tok/s,",
|
|
859
|
-
" ",
|
|
764
|
+
" tok/s, ",
|
|
860
765
|
bests.bestOverall.avgFirstToken,
|
|
861
766
|
"ms)"
|
|
862
767
|
]
|
|
@@ -917,9 +822,7 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
917
822
|
" ",
|
|
918
823
|
"- ",
|
|
919
824
|
b.model,
|
|
920
|
-
"
|
|
921
|
-
b.device,
|
|
922
|
-
"] ",
|
|
825
|
+
" ",
|
|
923
826
|
new Date(b.timestamp).toLocaleDateString()
|
|
924
827
|
]
|
|
925
828
|
})
|
|
@@ -960,14 +863,6 @@ function BenchmarkView({ gerbil, model, disabled = false, onResult, onSwitchMode
|
|
|
960
863
|
children: "Enter"
|
|
961
864
|
}),
|
|
962
865
|
" run | ",
|
|
963
|
-
/* @__PURE__ */ jsx(Text, {
|
|
964
|
-
color: "yellow",
|
|
965
|
-
children: "d"
|
|
966
|
-
}),
|
|
967
|
-
" switch",
|
|
968
|
-
" ",
|
|
969
|
-
currentDevice === "webgpu" ? "CPU" : "GPU",
|
|
970
|
-
" | ",
|
|
971
866
|
/* @__PURE__ */ jsx(Text, {
|
|
972
867
|
color: "yellow",
|
|
973
868
|
children: "m"
|
|
@@ -1667,9 +1562,7 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
|
|
|
1667
1562
|
}, []);
|
|
1668
1563
|
const modes = Object.keys(CHAT_MODES);
|
|
1669
1564
|
const modelInfo = gerbil.getModelInfo();
|
|
1670
|
-
const
|
|
1671
|
-
const cachedModels = modelId ? getChromeCachedModels() : [];
|
|
1672
|
-
const maxContext = (modelId ? cachedModels.find((m) => m.modelId === modelId) : null)?.contextLength || modelInfo?.contextLength || 32768;
|
|
1565
|
+
const maxContext = modelInfo?.contextLength || 32768;
|
|
1673
1566
|
const currentTokens = estimateTokens(buildConversationContext(messages, mode));
|
|
1674
1567
|
const contextPercent = Math.round(currentTokens / maxContext * 100);
|
|
1675
1568
|
const ACTION_COUNT = supportsVision ? 5 : 4;
|
|
@@ -2035,11 +1928,9 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
|
|
|
2035
1928
|
setInput("");
|
|
2036
1929
|
try {
|
|
2037
1930
|
await gerbil.clearCache();
|
|
2038
|
-
const mem = await gerbil.getMemoryUsage();
|
|
2039
|
-
const memInfo = mem ? ` (was ${mem.usedGB.toFixed(1)}GB)` : "";
|
|
2040
1931
|
setMessages((m) => [...m, {
|
|
2041
1932
|
role: "system",
|
|
2042
|
-
content:
|
|
1933
|
+
content: "🧹 Cache cleared. Conversation context reset."
|
|
2043
1934
|
}]);
|
|
2044
1935
|
} catch (err) {
|
|
2045
1936
|
setMessages((m) => [...m, {
|
|
@@ -2052,14 +1943,11 @@ function ChatView({ gerbil, thinkingMode, agentMode, voiceMode, onToggleThinking
|
|
|
2052
1943
|
if (cmd === "memory" || cmd === "mem") {
|
|
2053
1944
|
setInput("");
|
|
2054
1945
|
try {
|
|
2055
|
-
const
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
content: `💾 Memory: ${mem.usedGB.toFixed(1)}GB / ${mem.totalGB.toFixed(1)}GB (${mem.usedPercent.toFixed(1)}%)`
|
|
2059
|
-
}]);
|
|
2060
|
-
else setMessages((m) => [...m, {
|
|
1946
|
+
const sys = getMemoryInfo();
|
|
1947
|
+
const rssMB = (process.memoryUsage().rss / 1024 / 1024).toFixed(0);
|
|
1948
|
+
setMessages((m) => [...m, {
|
|
2061
1949
|
role: "system",
|
|
2062
|
-
content:
|
|
1950
|
+
content: `💾 Process RSS: ${rssMB} MB · System: ${sys.used} / ${sys.total} (${sys.percentUsed}%)`
|
|
2063
1951
|
}]);
|
|
2064
1952
|
} catch (err) {
|
|
2065
1953
|
setMessages((m) => [...m, {
|
|
@@ -3785,10 +3673,6 @@ const CAPABILITIES = [
|
|
|
3785
3673
|
id: "text",
|
|
3786
3674
|
name: "Text"
|
|
3787
3675
|
},
|
|
3788
|
-
{
|
|
3789
|
-
id: "vision",
|
|
3790
|
-
name: "Vision"
|
|
3791
|
-
},
|
|
3792
3676
|
{
|
|
3793
3677
|
id: "tts",
|
|
3794
3678
|
name: "Text to Speech"
|
|
@@ -3836,7 +3720,7 @@ const EXAMPLES = {
|
|
|
3836
3720
|
code: `import { Gerbil } from "@tryhamster/gerbil";
|
|
3837
3721
|
|
|
3838
3722
|
const g = new Gerbil();
|
|
3839
|
-
await g.loadModel("qwen3-0.
|
|
3723
|
+
await g.loadModel("qwen3.5-0.8b");
|
|
3840
3724
|
|
|
3841
3725
|
// Generate text
|
|
3842
3726
|
const result = await g.generate("Write a haiku", {
|
|
@@ -3863,13 +3747,13 @@ import { gerbil } from "@tryhamster/gerbil/ai";
|
|
|
3863
3747
|
|
|
3864
3748
|
// Generate
|
|
3865
3749
|
const { text } = await generateText({
|
|
3866
|
-
model: gerbil("qwen3-0.
|
|
3750
|
+
model: gerbil("qwen3.5-0.8b"),
|
|
3867
3751
|
prompt: "Explain quantum computing",
|
|
3868
3752
|
});
|
|
3869
3753
|
|
|
3870
3754
|
// Stream
|
|
3871
3755
|
const { textStream } = streamText({
|
|
3872
|
-
model: gerbil("qwen3-0.
|
|
3756
|
+
model: gerbil("qwen3.5-0.8b"),
|
|
3873
3757
|
system: "You are a helpful assistant.",
|
|
3874
3758
|
messages: [{ role: "user", content: "Hello!" }],
|
|
3875
3759
|
});
|
|
@@ -3881,20 +3765,20 @@ for await (const chunk of textStream) {
|
|
|
3881
3765
|
browser: {
|
|
3882
3766
|
install: "npm install @tryhamster/gerbil",
|
|
3883
3767
|
description: "React hooks for browser-based inference with WebGPU.",
|
|
3884
|
-
code: `import { useChat, useCompletion } from "@tryhamster/gerbil/
|
|
3768
|
+
code: `import { useChat, useCompletion } from "@tryhamster/gerbil/hooks";
|
|
3885
3769
|
|
|
3886
|
-
// useChat - Full chat with message history
|
|
3770
|
+
// useChat - Full chat with message history (history is sent each turn)
|
|
3887
3771
|
function Chat() {
|
|
3888
|
-
const { messages,
|
|
3889
|
-
model: "qwen3-0.
|
|
3772
|
+
const { messages, send, isGenerating, isLoading } = useChat({
|
|
3773
|
+
model: "qwen3.5-0.8b",
|
|
3890
3774
|
thinking: true,
|
|
3891
3775
|
});
|
|
3892
3776
|
if (isLoading) return <div>Loading...</div>;
|
|
3893
3777
|
return (
|
|
3894
|
-
<
|
|
3895
|
-
{messages.map(m => <div key={
|
|
3896
|
-
<
|
|
3897
|
-
</
|
|
3778
|
+
<div>
|
|
3779
|
+
{messages.map((m, i) => <div key={i}>{m.role}: {m.content}</div>)}
|
|
3780
|
+
<button disabled={isGenerating} onClick={() => send("Hello!")}>Send</button>
|
|
3781
|
+
</div>
|
|
3898
3782
|
);
|
|
3899
3783
|
}
|
|
3900
3784
|
|
|
@@ -3912,7 +3796,7 @@ function Generator() {
|
|
|
3912
3796
|
import { gerbil } from "@tryhamster/gerbil/next";
|
|
3913
3797
|
|
|
3914
3798
|
export const POST = gerbil.handler({
|
|
3915
|
-
model: "qwen3-0.
|
|
3799
|
+
model: "qwen3.5-0.8b",
|
|
3916
3800
|
system: "You are a helpful assistant.",
|
|
3917
3801
|
});
|
|
3918
3802
|
|
|
@@ -3928,7 +3812,7 @@ import { gerbil } from "@tryhamster/gerbil/express";
|
|
|
3928
3812
|
|
|
3929
3813
|
const app = express();
|
|
3930
3814
|
app.use(express.json());
|
|
3931
|
-
app.use("/api/ai", gerbil({ model: "qwen3-0.
|
|
3815
|
+
app.use("/api/ai", gerbil({ model: "qwen3.5-0.8b" })());
|
|
3932
3816
|
|
|
3933
3817
|
// POST /api/ai/generate { prompt, options }
|
|
3934
3818
|
// POST /api/ai/stream { prompt, options } (SSE)
|
|
@@ -3944,7 +3828,7 @@ import { PromptTemplate } from "langchain/prompts";
|
|
|
3944
3828
|
import { LLMChain } from "langchain/chains";
|
|
3945
3829
|
|
|
3946
3830
|
const llm = new GerbilLLM({
|
|
3947
|
-
model: "qwen3-0.
|
|
3831
|
+
model: "qwen3.5-0.8b",
|
|
3948
3832
|
temperature: 0.7,
|
|
3949
3833
|
});
|
|
3950
3834
|
|
|
@@ -3953,135 +3837,6 @@ const chain = new LLMChain({ llm, prompt });
|
|
|
3953
3837
|
const result = await chain.call({ text: "..." });`
|
|
3954
3838
|
}
|
|
3955
3839
|
},
|
|
3956
|
-
vision: {
|
|
3957
|
-
standalone: {
|
|
3958
|
-
install: "npm install @tryhamster/gerbil",
|
|
3959
|
-
description: "Vision models for image understanding and description.",
|
|
3960
|
-
code: `import { Gerbil } from "@tryhamster/gerbil";
|
|
3961
|
-
|
|
3962
|
-
const g = new Gerbil();
|
|
3963
|
-
await g.loadModel("ministral-3b"); // Vision-capable model
|
|
3964
|
-
|
|
3965
|
-
// Describe an image
|
|
3966
|
-
const result = await g.generate("What's in this image?", {
|
|
3967
|
-
images: [{ source: "https://example.com/photo.jpg" }]
|
|
3968
|
-
});
|
|
3969
|
-
console.log(result.text);
|
|
3970
|
-
|
|
3971
|
-
// Compare images
|
|
3972
|
-
const diff = await g.generate("What's different?", {
|
|
3973
|
-
images: [
|
|
3974
|
-
{ source: "before.jpg" },
|
|
3975
|
-
{ source: "after.jpg" }
|
|
3976
|
-
]
|
|
3977
|
-
});
|
|
3978
|
-
|
|
3979
|
-
// Check if model supports vision
|
|
3980
|
-
console.log(g.supportsVision()); // true`
|
|
3981
|
-
},
|
|
3982
|
-
"ai-sdk": {
|
|
3983
|
-
install: "npm install @tryhamster/gerbil ai",
|
|
3984
|
-
description: "AI SDK with image content parts.",
|
|
3985
|
-
code: `import { generateText } from "ai";
|
|
3986
|
-
import { gerbil } from "@tryhamster/gerbil/ai";
|
|
3987
|
-
|
|
3988
|
-
const { text } = await generateText({
|
|
3989
|
-
model: gerbil("ministral-3b"),
|
|
3990
|
-
messages: [{
|
|
3991
|
-
role: "user",
|
|
3992
|
-
content: [
|
|
3993
|
-
{ type: "image", image: new URL("https://example.com/photo.jpg") },
|
|
3994
|
-
{ type: "text", text: "Describe this image in detail" },
|
|
3995
|
-
],
|
|
3996
|
-
}],
|
|
3997
|
-
});
|
|
3998
|
-
|
|
3999
|
-
// Also accepts:
|
|
4000
|
-
// - Base64 strings: { type: "image", image: "data:image/png;base64,..." }
|
|
4001
|
-
// - Uint8Array: { type: "image", image: bytes, mimeType: "image/png" }`
|
|
4002
|
-
},
|
|
4003
|
-
browser: {
|
|
4004
|
-
install: "npm install @tryhamster/gerbil",
|
|
4005
|
-
description: "React hooks with image attachment support.",
|
|
4006
|
-
code: `import { useChat } from "@tryhamster/gerbil/browser";
|
|
4007
|
-
|
|
4008
|
-
function VisionChat() {
|
|
4009
|
-
const { messages, input, setInput, handleSubmit, attachImage, attachedImages } = useChat({
|
|
4010
|
-
model: "ministral-3b"
|
|
4011
|
-
});
|
|
4012
|
-
|
|
4013
|
-
const handleFile = (e) => {
|
|
4014
|
-
const file = e.target.files?.[0];
|
|
4015
|
-
if (file) {
|
|
4016
|
-
const reader = new FileReader();
|
|
4017
|
-
reader.onload = () => attachImage(reader.result);
|
|
4018
|
-
reader.readAsDataURL(file);
|
|
4019
|
-
}
|
|
4020
|
-
};
|
|
4021
|
-
|
|
4022
|
-
return (
|
|
4023
|
-
<div>
|
|
4024
|
-
{messages.map(m => <div key={m.id}>{m.content}</div>)}
|
|
4025
|
-
<input type="file" accept="image/*" onChange={handleFile} />
|
|
4026
|
-
{attachedImages.length > 0 && <span>📎 {attachedImages.length} attached</span>}
|
|
4027
|
-
<form onSubmit={handleSubmit}>
|
|
4028
|
-
<input value={input} onChange={e => setInput(e.target.value)} />
|
|
4029
|
-
</form>
|
|
4030
|
-
</div>
|
|
4031
|
-
);
|
|
4032
|
-
}`
|
|
4033
|
-
},
|
|
4034
|
-
nextjs: {
|
|
4035
|
-
install: "npm install @tryhamster/gerbil",
|
|
4036
|
-
description: "Next.js vision endpoint with image handling.",
|
|
4037
|
-
code: `// app/api/vision/route.ts
|
|
4038
|
-
import { gerbil } from "@tryhamster/gerbil/next";
|
|
4039
|
-
|
|
4040
|
-
export const POST = gerbil.handler({ model: "ministral-3b" });
|
|
4041
|
-
|
|
4042
|
-
// Client fetch:
|
|
4043
|
-
// await fetch("/api/vision", {
|
|
4044
|
-
// method: "POST",
|
|
4045
|
-
// body: JSON.stringify({
|
|
4046
|
-
// prompt: "What's in this image?",
|
|
4047
|
-
// images: [{ source: dataUri }]
|
|
4048
|
-
// })
|
|
4049
|
-
// });`
|
|
4050
|
-
},
|
|
4051
|
-
express: {
|
|
4052
|
-
install: "npm install @tryhamster/gerbil express",
|
|
4053
|
-
description: "Express vision endpoint with image input.",
|
|
4054
|
-
code: `import express from "express";
|
|
4055
|
-
import { gerbil } from "@tryhamster/gerbil/express";
|
|
4056
|
-
|
|
4057
|
-
const app = express();
|
|
4058
|
-
app.use(express.json({ limit: "10mb" })); // For large images
|
|
4059
|
-
app.use("/api/vision", gerbil({ model: "ministral-3b" })());
|
|
4060
|
-
|
|
4061
|
-
// POST /api/vision/generate
|
|
4062
|
-
// Body: { prompt: "Describe this", images: [{ source: "..." }] }
|
|
4063
|
-
|
|
4064
|
-
app.listen(3000);`
|
|
4065
|
-
},
|
|
4066
|
-
langchain: {
|
|
4067
|
-
install: "npm install @tryhamster/gerbil langchain",
|
|
4068
|
-
description: "LangChain LLM with vision support.",
|
|
4069
|
-
code: `import { GerbilLLM } from "@tryhamster/gerbil/langchain";
|
|
4070
|
-
|
|
4071
|
-
const llm = new GerbilLLM({ model: "ministral-3b" });
|
|
4072
|
-
|
|
4073
|
-
// Check vision support
|
|
4074
|
-
const hasVision = await llm.supportsVision();
|
|
4075
|
-
|
|
4076
|
-
// Generate with images
|
|
4077
|
-
const result = await llm.invokeWithImages(
|
|
4078
|
-
"Describe what you see in this image",
|
|
4079
|
-
[{ source: "https://example.com/photo.jpg" }]
|
|
4080
|
-
);
|
|
4081
|
-
|
|
4082
|
-
console.log(result);`
|
|
4083
|
-
}
|
|
4084
|
-
},
|
|
4085
3840
|
tts: {
|
|
4086
3841
|
standalone: {
|
|
4087
3842
|
install: "npm install @tryhamster/gerbil",
|
|
@@ -4132,24 +3887,26 @@ const voices = gerbil.listVoices();
|
|
|
4132
3887
|
browser: {
|
|
4133
3888
|
install: "npm install @tryhamster/gerbil",
|
|
4134
3889
|
description: "React hook for browser TTS with playback.",
|
|
4135
|
-
code: `import {
|
|
3890
|
+
code: `import { useTTS, KANI_VOICES, type SpeakOptions } from "@tryhamster/gerbil/hooks";
|
|
3891
|
+
import { useState } from "react";
|
|
4136
3892
|
|
|
4137
3893
|
function SpeechDemo() {
|
|
4138
|
-
const { speak, stop,
|
|
3894
|
+
const { speak, stop, isPlaying, isLoading } = useTTS();
|
|
3895
|
+
const [voice, setVoice] = useState<SpeakOptions["voice"]>(KANI_VOICES[0].value);
|
|
4139
3896
|
|
|
4140
3897
|
if (isLoading) return <div>Loading TTS model...</div>;
|
|
4141
3898
|
|
|
4142
3899
|
return (
|
|
4143
3900
|
<div>
|
|
4144
|
-
<select onChange={e => setVoice(e.target.value)}>
|
|
4145
|
-
{
|
|
4146
|
-
<option key={v.
|
|
3901
|
+
<select value={voice} onChange={e => setVoice(e.target.value as SpeakOptions["voice"])}>
|
|
3902
|
+
{KANI_VOICES.map(v => (
|
|
3903
|
+
<option key={v.value} value={v.value}>{v.label}</option>
|
|
4147
3904
|
))}
|
|
4148
3905
|
</select>
|
|
4149
|
-
<button onClick={() => speak("Hello world!")}>
|
|
4150
|
-
{
|
|
3906
|
+
<button onClick={() => speak("Hello world!", { voice })}>
|
|
3907
|
+
{isPlaying ? "Speaking..." : "Speak"}
|
|
4151
3908
|
</button>
|
|
4152
|
-
{
|
|
3909
|
+
{isPlaying && <button onClick={stop}>Stop</button>}
|
|
4153
3910
|
</div>
|
|
4154
3911
|
);
|
|
4155
3912
|
}`
|
|
@@ -4273,33 +4030,32 @@ const models = gerbil.listTranscriptionModels();`
|
|
|
4273
4030
|
browser: {
|
|
4274
4031
|
install: "npm install @tryhamster/gerbil",
|
|
4275
4032
|
description: "React hooks for recording and transcription.",
|
|
4276
|
-
code: `import {
|
|
4033
|
+
code: `import { useSTT, useVoiceChat } from "@tryhamster/gerbil/hooks";
|
|
4277
4034
|
|
|
4278
4035
|
// Record and transcribe
|
|
4279
4036
|
function VoiceInput() {
|
|
4280
|
-
const { startRecording, stopRecording, isRecording, transcript } =
|
|
4281
|
-
model: "whisper-tiny.en",
|
|
4282
|
-
onTranscript: (text) => console.log("User said:", text),
|
|
4283
|
-
});
|
|
4037
|
+
const { startRecording, stopRecording, isRecording, transcript } = useSTT();
|
|
4284
4038
|
|
|
4285
4039
|
return (
|
|
4286
|
-
<
|
|
4287
|
-
{isRecording ?
|
|
4288
|
-
|
|
4040
|
+
<div>
|
|
4041
|
+
<button onClick={isRecording ? stopRecording : startRecording}>
|
|
4042
|
+
{isRecording ? "🔴 Stop" : "🎤 Record"}
|
|
4043
|
+
</button>
|
|
4044
|
+
{transcript && <p>{transcript}</p>}
|
|
4045
|
+
</div>
|
|
4289
4046
|
);
|
|
4290
4047
|
}
|
|
4291
4048
|
|
|
4292
4049
|
// Full voice conversation: STT → LLM → TTS
|
|
4293
4050
|
function VoiceAssistant() {
|
|
4294
|
-
const {
|
|
4295
|
-
|
|
4296
|
-
|
|
4297
|
-
voice: "af_bella",
|
|
4051
|
+
const { start, stop, isListening, messages } = useVoiceChat({
|
|
4052
|
+
model: "qwen3.5-0.8b",
|
|
4053
|
+
voice: "en_us",
|
|
4298
4054
|
});
|
|
4299
4055
|
|
|
4300
4056
|
return (
|
|
4301
|
-
<button onMouseDown={
|
|
4302
|
-
{
|
|
4057
|
+
<button onMouseDown={start} onMouseUp={stop}>
|
|
4058
|
+
{isListening ? "Listening..." : "🎤 Hold to Speak"}
|
|
4303
4059
|
</button>
|
|
4304
4060
|
);
|
|
4305
4061
|
}`
|
|
@@ -4428,25 +4184,21 @@ const result = await g.embed("Your text here");
|
|
|
4428
4184
|
browser: {
|
|
4429
4185
|
install: "npm install @tryhamster/gerbil",
|
|
4430
4186
|
description: "Browser-based embeddings with WebGPU.",
|
|
4431
|
-
code: `import {
|
|
4187
|
+
code: `import { useEmbedding } from "@tryhamster/gerbil/hooks";
|
|
4432
4188
|
|
|
4433
|
-
|
|
4434
|
-
const
|
|
4189
|
+
function Embeddings() {
|
|
4190
|
+
const { embed, similarity, isLoading } = useEmbedding();
|
|
4435
4191
|
|
|
4436
|
-
|
|
4437
|
-
// For dedicated embedding model, use Gerbil on server
|
|
4192
|
+
if (isLoading) return <div>Loading embedding model...</div>;
|
|
4438
4193
|
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4194
|
+
const run = async () => {
|
|
4195
|
+
const { vector } = await embed("Hello world"); // number[]
|
|
4196
|
+
const score = await similarity("cat", "kitten"); // 0..1
|
|
4197
|
+
console.log(vector.length, score);
|
|
4198
|
+
};
|
|
4442
4199
|
|
|
4443
|
-
|
|
4444
|
-
|
|
4445
|
-
// export async function POST(req) {
|
|
4446
|
-
// const { text } = await req.json();
|
|
4447
|
-
// const result = await g.embed(text);
|
|
4448
|
-
// return Response.json(result);
|
|
4449
|
-
// }`
|
|
4200
|
+
return <button onClick={run}>Embed</button>;
|
|
4201
|
+
}`
|
|
4450
4202
|
},
|
|
4451
4203
|
nextjs: {
|
|
4452
4204
|
install: "npm install @tryhamster/gerbil",
|
|
@@ -4665,106 +4417,33 @@ function FrameworksView() {
|
|
|
4665
4417
|
//#region src/cli/repl/views/InfoView.tsx
|
|
4666
4418
|
function getDeviceLabel(deviceMode) {
|
|
4667
4419
|
const platform = os.platform();
|
|
4668
|
-
const arch = os.arch();
|
|
4669
4420
|
if (deviceMode === "webgpu") {
|
|
4670
|
-
if (platform === "darwin"
|
|
4671
|
-
if (platform === "linux") return "WebGPU (
|
|
4672
|
-
if (platform === "win32") return "WebGPU (
|
|
4673
|
-
return "WebGPU (
|
|
4421
|
+
if (platform === "darwin") return "WebGPU (Dawn → Metal)";
|
|
4422
|
+
if (platform === "linux") return "WebGPU (Dawn → Vulkan)";
|
|
4423
|
+
if (platform === "win32") return "WebGPU (Dawn → D3D12)";
|
|
4424
|
+
return "WebGPU (Dawn)";
|
|
4674
4425
|
}
|
|
4675
|
-
if (platform === "darwin" && arch === "arm64") return `Apple Silicon (${deviceMode.toUpperCase()})`;
|
|
4676
|
-
if (platform === "darwin" && arch === "x64") return `Intel Mac (${deviceMode.toUpperCase()})`;
|
|
4677
4426
|
return deviceMode.toUpperCase();
|
|
4678
4427
|
}
|
|
4679
4428
|
function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
|
|
4680
4429
|
const deviceMode = gerbil.getDeviceMode();
|
|
4681
4430
|
const dtype = gerbil.getDtype();
|
|
4682
|
-
const chromeStatus = gerbil.getChromeStatus();
|
|
4683
4431
|
const deviceLabel = getDeviceLabel(deviceMode);
|
|
4684
4432
|
const cacheInfo = getCacheInfo();
|
|
4685
4433
|
const memInfo = getMemoryInfo();
|
|
4686
|
-
const [webgpuInfo, setWebgpuInfo] = useState(null);
|
|
4687
|
-
const [allPages, setAllPages] = useState([]);
|
|
4688
|
-
const [totalPageCount, setTotalPageCount] = useState(0);
|
|
4689
|
-
const [selectedIndex, setSelectedIndex] = useState(-1);
|
|
4690
|
-
const [killing, setKilling] = useState(false);
|
|
4691
|
-
const [killResult, setKillResult] = useState(null);
|
|
4692
4434
|
const [ttsInfo, setTtsInfo] = useState(gerbil.getTTSModelInfo());
|
|
4693
4435
|
const [sttInfo, setSttInfo] = useState(gerbil.getSTTModelInfo());
|
|
4694
|
-
const fetchInfo = useCallback(async () => {
|
|
4695
|
-
const [info, pages, totalPages] = await Promise.all([
|
|
4696
|
-
Gerbil.getWebGPUProcesses(),
|
|
4697
|
-
Gerbil.getAllChromePagesInfo(),
|
|
4698
|
-
Gerbil.getTotalChromePageCount()
|
|
4699
|
-
]);
|
|
4700
|
-
setWebgpuInfo(info);
|
|
4701
|
-
setAllPages(pages || []);
|
|
4702
|
-
setTotalPageCount(totalPages);
|
|
4703
|
-
setTtsInfo(gerbil.getTTSModelInfo());
|
|
4704
|
-
setSttInfo(gerbil.getSTTModelInfo());
|
|
4705
|
-
const pageCount = pages?.length ?? 0;
|
|
4706
|
-
if (selectedIndex >= pageCount) setSelectedIndex(pageCount > 0 ? 0 : -1);
|
|
4707
|
-
}, [selectedIndex, gerbil]);
|
|
4708
4436
|
useEffect(() => {
|
|
4709
|
-
|
|
4710
|
-
|
|
4437
|
+
const refresh = () => {
|
|
4438
|
+
setTtsInfo(gerbil.getTTSModelInfo());
|
|
4439
|
+
setSttInfo(gerbil.getSTTModelInfo());
|
|
4440
|
+
};
|
|
4441
|
+
refresh();
|
|
4442
|
+
const interval = setInterval(refresh, 2e3);
|
|
4711
4443
|
return () => clearInterval(interval);
|
|
4712
|
-
}, [
|
|
4713
|
-
useInput(
|
|
4714
|
-
if (input === "c" || input === "C")
|
|
4715
|
-
onGoToCache?.();
|
|
4716
|
-
return;
|
|
4717
|
-
}
|
|
4718
|
-
if (input === "r" || input === "R") {
|
|
4719
|
-
await fetchInfo();
|
|
4720
|
-
return;
|
|
4721
|
-
}
|
|
4722
|
-
const pageCount = allPages.length;
|
|
4723
|
-
if (pageCount > 0) {
|
|
4724
|
-
if (key.downArrow || input === "j") {
|
|
4725
|
-
setSelectedIndex((prev) => Math.min(prev + 1, pageCount - 1));
|
|
4726
|
-
return;
|
|
4727
|
-
}
|
|
4728
|
-
if (key.upArrow || input === "k") {
|
|
4729
|
-
setSelectedIndex((prev) => Math.max(prev - 1, 0));
|
|
4730
|
-
return;
|
|
4731
|
-
}
|
|
4732
|
-
const num = Number.parseInt(input, 10);
|
|
4733
|
-
if (num >= 1 && num <= 9 && num <= pageCount) {
|
|
4734
|
-
setSelectedIndex(num - 1);
|
|
4735
|
-
return;
|
|
4736
|
-
}
|
|
4737
|
-
}
|
|
4738
|
-
if (input === "x" && !killing && selectedIndex >= 0 && allPages[selectedIndex]) {
|
|
4739
|
-
setKilling(true);
|
|
4740
|
-
setKillResult(null);
|
|
4741
|
-
try {
|
|
4742
|
-
const page = allPages[selectedIndex];
|
|
4743
|
-
if (await Gerbil.killChromePage(selectedIndex)) setKillResult(`Killed: ${page.modelId?.split("/").pop() || "page"}${page.isOurs ? "" : " (other session)"}`);
|
|
4744
|
-
else setKillResult("Failed to kill page");
|
|
4745
|
-
await fetchInfo();
|
|
4746
|
-
} catch (err) {
|
|
4747
|
-
setKillResult(`Error: ${err.message}`);
|
|
4748
|
-
}
|
|
4749
|
-
setKilling(false);
|
|
4750
|
-
setTimeout(() => setKillResult(null), 3e3);
|
|
4751
|
-
return;
|
|
4752
|
-
}
|
|
4753
|
-
if (input === "K" && !killing && webgpuInfo?.browser.running) {
|
|
4754
|
-
setKilling(true);
|
|
4755
|
-
setKillResult(null);
|
|
4756
|
-
try {
|
|
4757
|
-
const result = await Gerbil.killAllWebGPU();
|
|
4758
|
-
if (result) setKillResult(`Killed ${result.pagesKilled} page(s)${result.browserKilled ? ", browser closed" : ""}`);
|
|
4759
|
-
else setKillResult("No WebGPU processes to kill");
|
|
4760
|
-
await fetchInfo();
|
|
4761
|
-
setSelectedIndex(-1);
|
|
4762
|
-
} catch (err) {
|
|
4763
|
-
setKillResult(`Error: ${err.message}`);
|
|
4764
|
-
}
|
|
4765
|
-
setKilling(false);
|
|
4766
|
-
setTimeout(() => setKillResult(null), 3e3);
|
|
4767
|
-
}
|
|
4444
|
+
}, [gerbil]);
|
|
4445
|
+
useInput((input) => {
|
|
4446
|
+
if (input === "c" || input === "C") onGoToCache?.();
|
|
4768
4447
|
});
|
|
4769
4448
|
return /* @__PURE__ */ jsxs(Box, {
|
|
4770
4449
|
flexDirection: "column",
|
|
@@ -4803,10 +4482,18 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
|
|
|
4803
4482
|
color: deviceMode === "webgpu" ? "green" : "yellow",
|
|
4804
4483
|
children: deviceLabel
|
|
4805
4484
|
})] }),
|
|
4806
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
4807
|
-
|
|
4808
|
-
|
|
4809
|
-
|
|
4485
|
+
/* @__PURE__ */ jsxs(Text, { children: [
|
|
4486
|
+
"Weights: ",
|
|
4487
|
+
/* @__PURE__ */ jsx(Text, {
|
|
4488
|
+
color: "green",
|
|
4489
|
+
children: dtype
|
|
4490
|
+
}),
|
|
4491
|
+
" ",
|
|
4492
|
+
/* @__PURE__ */ jsx(Text, {
|
|
4493
|
+
dimColor: true,
|
|
4494
|
+
children: "(INT4, quantized on load)"
|
|
4495
|
+
})
|
|
4496
|
+
] })
|
|
4810
4497
|
]
|
|
4811
4498
|
}),
|
|
4812
4499
|
/* @__PURE__ */ jsxs(Box, {
|
|
@@ -4900,269 +4587,88 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
|
|
|
4900
4587
|
/* @__PURE__ */ jsxs(Box, {
|
|
4901
4588
|
flexDirection: "row",
|
|
4902
4589
|
marginBottom: 1,
|
|
4903
|
-
children: [
|
|
4904
|
-
|
|
4905
|
-
|
|
4906
|
-
|
|
4907
|
-
|
|
4908
|
-
|
|
4909
|
-
|
|
4910
|
-
|
|
4911
|
-
|
|
4912
|
-
|
|
4913
|
-
|
|
4914
|
-
|
|
4915
|
-
|
|
4916
|
-
|
|
4917
|
-
|
|
4918
|
-
|
|
4919
|
-
|
|
4920
|
-
|
|
4921
|
-
|
|
4922
|
-
|
|
4923
|
-
|
|
4924
|
-
|
|
4925
|
-
|
|
4926
|
-
|
|
4927
|
-
|
|
4928
|
-
|
|
4929
|
-
|
|
4930
|
-
|
|
4931
|
-
|
|
4932
|
-
|
|
4933
|
-
|
|
4934
|
-
|
|
4590
|
+
children: [/* @__PURE__ */ jsxs(Box, {
|
|
4591
|
+
borderColor: "gray",
|
|
4592
|
+
borderStyle: "single",
|
|
4593
|
+
flexDirection: "column",
|
|
4594
|
+
marginRight: 1,
|
|
4595
|
+
minWidth: 30,
|
|
4596
|
+
paddingX: 1,
|
|
4597
|
+
children: [
|
|
4598
|
+
/* @__PURE__ */ jsx(Text, {
|
|
4599
|
+
bold: true,
|
|
4600
|
+
color: "cyan",
|
|
4601
|
+
children: "Memory"
|
|
4602
|
+
}),
|
|
4603
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Total: ", /* @__PURE__ */ jsx(Text, {
|
|
4604
|
+
color: "gray",
|
|
4605
|
+
children: memInfo.total
|
|
4606
|
+
})] }),
|
|
4607
|
+
/* @__PURE__ */ jsxs(Text, { children: [
|
|
4608
|
+
"Used:",
|
|
4609
|
+
" ",
|
|
4610
|
+
/* @__PURE__ */ jsxs(Text, {
|
|
4611
|
+
color: memInfo.percentUsed > 80 ? "red" : "yellow",
|
|
4612
|
+
children: [
|
|
4613
|
+
memInfo.used,
|
|
4614
|
+
" (",
|
|
4615
|
+
memInfo.percentUsed,
|
|
4616
|
+
"%)"
|
|
4617
|
+
]
|
|
4618
|
+
})
|
|
4619
|
+
] }),
|
|
4620
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Free: ", /* @__PURE__ */ jsx(Text, {
|
|
4621
|
+
color: "green",
|
|
4622
|
+
children: memInfo.free
|
|
4623
|
+
})] })
|
|
4624
|
+
]
|
|
4625
|
+
}), /* @__PURE__ */ jsxs(Box, {
|
|
4626
|
+
borderColor: "gray",
|
|
4627
|
+
borderStyle: "single",
|
|
4628
|
+
flexDirection: "column",
|
|
4629
|
+
marginRight: 1,
|
|
4630
|
+
minWidth: 30,
|
|
4631
|
+
paddingX: 1,
|
|
4632
|
+
children: [
|
|
4633
|
+
/* @__PURE__ */ jsx(Text, {
|
|
4634
|
+
bold: true,
|
|
4635
|
+
color: "cyan",
|
|
4636
|
+
children: "Session Stats"
|
|
4637
|
+
}),
|
|
4638
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Prompts: ", /* @__PURE__ */ jsx(Text, {
|
|
4639
|
+
color: "gray",
|
|
4640
|
+
children: stats.prompts
|
|
4641
|
+
})] }),
|
|
4642
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Tokens In: ", /* @__PURE__ */ jsx(Text, {
|
|
4643
|
+
color: "gray",
|
|
4644
|
+
children: stats.tokensIn
|
|
4645
|
+
})] }),
|
|
4646
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Tokens Out: ", /* @__PURE__ */ jsx(Text, {
|
|
4647
|
+
color: "gray",
|
|
4648
|
+
children: stats.tokensOut
|
|
4649
|
+
})] }),
|
|
4650
|
+
/* @__PURE__ */ jsxs(Text, { children: ["Total Time: ", /* @__PURE__ */ jsxs(Text, {
|
|
4651
|
+
color: "gray",
|
|
4652
|
+
children: [Math.round(stats.totalTimeMs / 1e3), "s"]
|
|
4653
|
+
})] }),
|
|
4654
|
+
stats.lastTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Last tok/s: ", /* @__PURE__ */ jsx(Text, {
|
|
4655
|
+
color: "yellow",
|
|
4656
|
+
children: stats.lastTokPerSec.toFixed(1)
|
|
4657
|
+
})] }),
|
|
4658
|
+
stats.avgTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Avg tok/s: ", /* @__PURE__ */ jsx(Text, {
|
|
4659
|
+
color: "cyan",
|
|
4660
|
+
children: stats.avgTokPerSec.toFixed(1)
|
|
4661
|
+
})] }),
|
|
4662
|
+
stats.benchResults.length > 0 && /* @__PURE__ */ jsxs(Text, { children: [
|
|
4663
|
+
"Best Bench:",
|
|
4664
|
+
" ",
|
|
4665
|
+
/* @__PURE__ */ jsxs(Text, {
|
|
4935
4666
|
color: "green",
|
|
4936
|
-
children:
|
|
4937
|
-
})] })
|
|
4938
|
-
]
|
|
4939
|
-
}),
|
|
4940
|
-
/* @__PURE__ */ jsxs(Box, {
|
|
4941
|
-
borderColor: "gray",
|
|
4942
|
-
borderStyle: "single",
|
|
4943
|
-
flexDirection: "column",
|
|
4944
|
-
marginRight: 1,
|
|
4945
|
-
minWidth: 30,
|
|
4946
|
-
paddingX: 1,
|
|
4947
|
-
children: [
|
|
4948
|
-
/* @__PURE__ */ jsx(Text, {
|
|
4949
|
-
bold: true,
|
|
4950
|
-
color: "cyan",
|
|
4951
|
-
children: "Session Stats"
|
|
4952
|
-
}),
|
|
4953
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Prompts: ", /* @__PURE__ */ jsx(Text, {
|
|
4954
|
-
color: "gray",
|
|
4955
|
-
children: stats.prompts
|
|
4956
|
-
})] }),
|
|
4957
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Tokens In: ", /* @__PURE__ */ jsx(Text, {
|
|
4958
|
-
color: "gray",
|
|
4959
|
-
children: stats.tokensIn
|
|
4960
|
-
})] }),
|
|
4961
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Tokens Out: ", /* @__PURE__ */ jsx(Text, {
|
|
4962
|
-
color: "gray",
|
|
4963
|
-
children: stats.tokensOut
|
|
4964
|
-
})] }),
|
|
4965
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Total Time: ", /* @__PURE__ */ jsxs(Text, {
|
|
4966
|
-
color: "gray",
|
|
4967
|
-
children: [Math.round(stats.totalTimeMs / 1e3), "s"]
|
|
4968
|
-
})] }),
|
|
4969
|
-
stats.lastTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Last tok/s: ", /* @__PURE__ */ jsx(Text, {
|
|
4970
|
-
color: "yellow",
|
|
4971
|
-
children: stats.lastTokPerSec.toFixed(1)
|
|
4972
|
-
})] }),
|
|
4973
|
-
stats.avgTokPerSec > 0 && /* @__PURE__ */ jsxs(Text, { children: ["Avg tok/s: ", /* @__PURE__ */ jsx(Text, {
|
|
4974
|
-
color: "cyan",
|
|
4975
|
-
children: stats.avgTokPerSec.toFixed(1)
|
|
4976
|
-
})] }),
|
|
4977
|
-
stats.benchResults.length > 0 && /* @__PURE__ */ jsxs(Text, { children: [
|
|
4978
|
-
"Best Bench:",
|
|
4979
|
-
" ",
|
|
4980
|
-
/* @__PURE__ */ jsxs(Text, {
|
|
4981
|
-
color: "green",
|
|
4982
|
-
children: [Math.max(...stats.benchResults.map((b) => b.tokPerSec)), " tok/s"]
|
|
4983
|
-
})
|
|
4984
|
-
] })
|
|
4985
|
-
]
|
|
4986
|
-
}),
|
|
4987
|
-
chromeStatus && /* @__PURE__ */ jsxs(Box, {
|
|
4988
|
-
borderColor: "green",
|
|
4989
|
-
borderStyle: "single",
|
|
4990
|
-
flexDirection: "column",
|
|
4991
|
-
minWidth: 38,
|
|
4992
|
-
paddingX: 1,
|
|
4993
|
-
children: [
|
|
4994
|
-
/* @__PURE__ */ jsx(Text, {
|
|
4995
|
-
bold: true,
|
|
4996
|
-
color: "green",
|
|
4997
|
-
children: "WebGPU Backend"
|
|
4998
|
-
}),
|
|
4999
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Status: ", /* @__PURE__ */ jsx(Text, {
|
|
5000
|
-
color: "green",
|
|
5001
|
-
children: "● Running"
|
|
5002
|
-
})] }),
|
|
5003
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5004
|
-
"PID:",
|
|
5005
|
-
" ",
|
|
5006
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5007
|
-
color: chromeStatus.pid ? "gray" : "yellow",
|
|
5008
|
-
children: chromeStatus.pid ?? "connected (no PID)"
|
|
5009
|
-
})
|
|
5010
|
-
] }),
|
|
5011
|
-
/* @__PURE__ */ jsxs(Text, { children: ["Port: ", /* @__PURE__ */ jsx(Text, {
|
|
5012
|
-
color: "gray",
|
|
5013
|
-
children: chromeStatus.port
|
|
5014
|
-
})] }),
|
|
5015
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5016
|
-
"Model:",
|
|
5017
|
-
" ",
|
|
5018
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5019
|
-
color: "gray",
|
|
5020
|
-
children: chromeStatus.modelId.length > 28 ? `...${chromeStatus.modelId.slice(-25)}` : chromeStatus.modelId
|
|
5021
|
-
})
|
|
5022
|
-
] })
|
|
5023
|
-
]
|
|
5024
|
-
})
|
|
5025
|
-
]
|
|
5026
|
-
}),
|
|
5027
|
-
webgpuInfo && /* @__PURE__ */ jsxs(Box, {
|
|
5028
|
-
borderColor: webgpuInfo.browser.running ? "green" : "gray",
|
|
5029
|
-
borderStyle: "single",
|
|
5030
|
-
flexDirection: "column",
|
|
5031
|
-
marginBottom: 1,
|
|
5032
|
-
paddingX: 1,
|
|
5033
|
-
children: [
|
|
5034
|
-
/* @__PURE__ */ jsxs(Text, {
|
|
5035
|
-
bold: true,
|
|
5036
|
-
color: webgpuInfo.browser.running ? "green" : "gray",
|
|
5037
|
-
children: ["WebGPU Processes ", /* @__PURE__ */ jsx(Text, {
|
|
5038
|
-
dimColor: true,
|
|
5039
|
-
children: "(all sessions)"
|
|
5040
|
-
})]
|
|
5041
|
-
}),
|
|
5042
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5043
|
-
"Chrome:",
|
|
5044
|
-
" ",
|
|
5045
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5046
|
-
color: webgpuInfo.browser.running ? "green" : "gray",
|
|
5047
|
-
children: webgpuInfo.browser.running ? "● Running" : "○ Not running"
|
|
5048
|
-
})
|
|
5049
|
-
] }),
|
|
5050
|
-
webgpuInfo.browser.running && /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
5051
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5052
|
-
" ",
|
|
5053
|
-
"PID: ",
|
|
5054
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5055
|
-
color: "gray",
|
|
5056
|
-
children: webgpuInfo.browser.pid ?? "unknown"
|
|
5057
|
-
})
|
|
5058
|
-
] }),
|
|
5059
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5060
|
-
" ",
|
|
5061
|
-
"Port: ",
|
|
5062
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5063
|
-
color: "gray",
|
|
5064
|
-
children: webgpuInfo.browser.port
|
|
5065
|
-
})
|
|
5066
|
-
] }),
|
|
5067
|
-
/* @__PURE__ */ jsxs(Text, { children: [
|
|
5068
|
-
" ",
|
|
5069
|
-
"Total Pages:",
|
|
5070
|
-
" ",
|
|
5071
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5072
|
-
color: totalPageCount > 0 ? "yellow" : "gray",
|
|
5073
|
-
children: totalPageCount
|
|
5074
|
-
}),
|
|
5075
|
-
" ",
|
|
5076
|
-
/* @__PURE__ */ jsxs(Text, {
|
|
5077
|
-
dimColor: true,
|
|
5078
|
-
children: [
|
|
5079
|
-
"(",
|
|
5080
|
-
webgpuInfo.browser.activePagesCount,
|
|
5081
|
-
" ours)"
|
|
5082
|
-
]
|
|
4667
|
+
children: [Math.max(...stats.benchResults.map((b) => b.tokPerSec)), " tok/s"]
|
|
5083
4668
|
})
|
|
5084
4669
|
] })
|
|
5085
|
-
]
|
|
5086
|
-
|
|
5087
|
-
flexDirection: "column",
|
|
5088
|
-
marginTop: 1,
|
|
5089
|
-
children: [/* @__PURE__ */ jsxs(Text, {
|
|
5090
|
-
dimColor: true,
|
|
5091
|
-
children: [
|
|
5092
|
-
"Active Pages ",
|
|
5093
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5094
|
-
color: "gray",
|
|
5095
|
-
children: "(↑↓ select, x kill, K kill all)"
|
|
5096
|
-
}),
|
|
5097
|
-
":"
|
|
5098
|
-
]
|
|
5099
|
-
}), allPages.map((page, i) => {
|
|
5100
|
-
const isSelected = i === selectedIndex;
|
|
5101
|
-
const modelName = page.modelId || "loading...";
|
|
5102
|
-
const shortName = modelName.length > 30 ? `...${modelName.slice(-27)}` : modelName;
|
|
5103
|
-
return /* @__PURE__ */ jsxs(Box, {
|
|
5104
|
-
flexDirection: "column",
|
|
5105
|
-
marginLeft: 1,
|
|
5106
|
-
children: [/* @__PURE__ */ jsxs(Text, { children: [
|
|
5107
|
-
/* @__PURE__ */ jsxs(Text, {
|
|
5108
|
-
color: "gray",
|
|
5109
|
-
children: [i + 1, "."]
|
|
5110
|
-
}),
|
|
5111
|
-
" ",
|
|
5112
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5113
|
-
color: isSelected ? "cyan" : page.isOurs ? "green" : "yellow",
|
|
5114
|
-
children: isSelected ? "▶" : page.isOurs ? "●" : "○"
|
|
5115
|
-
}),
|
|
5116
|
-
" ",
|
|
5117
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5118
|
-
bold: isSelected,
|
|
5119
|
-
inverse: isSelected,
|
|
5120
|
-
children: shortName
|
|
5121
|
-
}),
|
|
5122
|
-
page.isOurs ? /* @__PURE__ */ jsxs(Text, {
|
|
5123
|
-
color: "green",
|
|
5124
|
-
dimColor: true,
|
|
5125
|
-
children: [" ", "(this session)"]
|
|
5126
|
-
}) : /* @__PURE__ */ jsxs(Text, {
|
|
5127
|
-
color: "yellow",
|
|
5128
|
-
dimColor: true,
|
|
5129
|
-
children: [" ", "(other session)"]
|
|
5130
|
-
})
|
|
5131
|
-
] }), page.memory && /* @__PURE__ */ jsxs(Text, {
|
|
5132
|
-
dimColor: true,
|
|
5133
|
-
children: [
|
|
5134
|
-
" ",
|
|
5135
|
-
"Heap: ",
|
|
5136
|
-
page.memory.usedGB.toFixed(2),
|
|
5137
|
-
"GB used of",
|
|
5138
|
-
" ",
|
|
5139
|
-
page.memory.totalGB.toFixed(2),
|
|
5140
|
-
"GB"
|
|
5141
|
-
]
|
|
5142
|
-
})]
|
|
5143
|
-
}, i);
|
|
5144
|
-
})]
|
|
5145
|
-
}),
|
|
5146
|
-
totalPageCount === 0 && webgpuInfo.browser.running && /* @__PURE__ */ jsx(Text, {
|
|
5147
|
-
color: "yellow",
|
|
5148
|
-
dimColor: true,
|
|
5149
|
-
children: "⚠ No active pages (zombie browser?) - press K to kill"
|
|
5150
|
-
}),
|
|
5151
|
-
killing && /* @__PURE__ */ jsx(Box, {
|
|
5152
|
-
marginTop: 1,
|
|
5153
|
-
children: /* @__PURE__ */ jsx(Text, {
|
|
5154
|
-
color: "cyan",
|
|
5155
|
-
children: "Killing..."
|
|
5156
|
-
})
|
|
5157
|
-
}),
|
|
5158
|
-
killResult && /* @__PURE__ */ jsx(Box, {
|
|
5159
|
-
marginTop: 1,
|
|
5160
|
-
children: /* @__PURE__ */ jsx(Text, {
|
|
5161
|
-
color: "cyan",
|
|
5162
|
-
children: killResult
|
|
5163
|
-
})
|
|
5164
|
-
})
|
|
5165
|
-
]
|
|
4670
|
+
]
|
|
4671
|
+
})]
|
|
5166
4672
|
}),
|
|
5167
4673
|
/* @__PURE__ */ jsxs(Box, {
|
|
5168
4674
|
borderColor: "gray",
|
|
@@ -5236,36 +4742,6 @@ function InfoView({ gerbil, model, modelFamily, stats, onGoToCache }) {
|
|
|
5236
4742
|
children: "c"
|
|
5237
4743
|
}),
|
|
5238
4744
|
" cache | ",
|
|
5239
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5240
|
-
color: "cyan",
|
|
5241
|
-
children: "r"
|
|
5242
|
-
}),
|
|
5243
|
-
" refresh",
|
|
5244
|
-
allPages.length > 0 && /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
5245
|
-
" ",
|
|
5246
|
-
"| ",
|
|
5247
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5248
|
-
color: "red",
|
|
5249
|
-
children: "x"
|
|
5250
|
-
}),
|
|
5251
|
-
" kill selected | ",
|
|
5252
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5253
|
-
color: "red",
|
|
5254
|
-
children: "K"
|
|
5255
|
-
}),
|
|
5256
|
-
" kill all"
|
|
5257
|
-
] }),
|
|
5258
|
-
webgpuInfo?.browser.running && allPages.length === 0 && /* @__PURE__ */ jsxs(Fragment, { children: [
|
|
5259
|
-
" ",
|
|
5260
|
-
"| ",
|
|
5261
|
-
/* @__PURE__ */ jsx(Text, {
|
|
5262
|
-
color: "red",
|
|
5263
|
-
children: "K"
|
|
5264
|
-
}),
|
|
5265
|
-
" kill browser"
|
|
5266
|
-
] }),
|
|
5267
|
-
" ",
|
|
5268
|
-
"| ",
|
|
5269
4745
|
/* @__PURE__ */ jsx(Text, {
|
|
5270
4746
|
color: "gray",
|
|
5271
4747
|
children: "Esc"
|
|
@@ -5354,6 +4830,29 @@ function formatContext(contextLength) {
|
|
|
5354
4830
|
if (contextLength >= 1e3) return `${(contextLength / 1e3).toFixed(0)}K`;
|
|
5355
4831
|
return `${contextLength}`;
|
|
5356
4832
|
}
|
|
4833
|
+
/**
|
|
4834
|
+
* Read a model's config.json and decide whether the native WebGPU engine
|
|
4835
|
+
* currently supports its architecture. Uses the same `architectures` key the
|
|
4836
|
+
* loader reads, falling back to `model_type`. A model that is NOT supported can
|
|
4837
|
+
* still be selected and downloaded (discovery) — it just fails loudly at load
|
|
4838
|
+
* with a clear "Unsupported model architecture" message, and Gerbil may add
|
|
4839
|
+
* support for it later.
|
|
4840
|
+
*/
|
|
4841
|
+
async function detectNativeSupport(modelId) {
|
|
4842
|
+
try {
|
|
4843
|
+
const res = await fetch(`https://huggingface.co/${modelId}/raw/main/config.json`);
|
|
4844
|
+
if (!res.ok) return { supported: false };
|
|
4845
|
+
const config = await res.json();
|
|
4846
|
+
const architecture = (Array.isArray(config.architectures) ? config.architectures[0] : void 0) || config.model_type;
|
|
4847
|
+
if (!architecture) return { supported: false };
|
|
4848
|
+
return {
|
|
4849
|
+
architecture,
|
|
4850
|
+
supported: isArchitectureSupported(architecture)
|
|
4851
|
+
};
|
|
4852
|
+
} catch {
|
|
4853
|
+
return { supported: false };
|
|
4854
|
+
}
|
|
4855
|
+
}
|
|
5357
4856
|
const TTS_MODELS = [{
|
|
5358
4857
|
id: "kokoro-82m",
|
|
5359
4858
|
name: "Kokoro 82M",
|
|
@@ -5424,7 +4923,6 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5424
4923
|
const abortControllerRef = useRef(null);
|
|
5425
4924
|
const hasFetchedOnMountRef = useRef(false);
|
|
5426
4925
|
const refreshCachedModels = async () => {
|
|
5427
|
-
await refreshCachedModelSizes().catch(() => {});
|
|
5428
4926
|
const cached = /* @__PURE__ */ new Set();
|
|
5429
4927
|
for (const model of PRESET_MODELS) if (isModelCached(model.hfId)) cached.add(model.id);
|
|
5430
4928
|
setCachedModels(cached);
|
|
@@ -5487,10 +4985,14 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5487
4985
|
params = p.BF16 || p.F16 || p.F32 || info.safetensors.total || 0;
|
|
5488
4986
|
}
|
|
5489
4987
|
const metadata = await fetchModelMetadata(modelId);
|
|
4988
|
+
const sizeBytes = metadata.sizeBytes || info.usedStorage || 0;
|
|
4989
|
+
const { architecture, supported } = await detectNativeSupport(modelId);
|
|
5490
4990
|
return {
|
|
5491
|
-
sizeBytes
|
|
4991
|
+
sizeBytes,
|
|
5492
4992
|
params,
|
|
5493
|
-
contextLength: metadata.contextLength
|
|
4993
|
+
contextLength: metadata.contextLength,
|
|
4994
|
+
architecture,
|
|
4995
|
+
supported
|
|
5494
4996
|
};
|
|
5495
4997
|
} catch {
|
|
5496
4998
|
return null;
|
|
@@ -5508,6 +5010,8 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5508
5010
|
sizeBytes: result.sizeBytes,
|
|
5509
5011
|
params: result.params,
|
|
5510
5012
|
contextLength: result.contextLength,
|
|
5013
|
+
architecture: result.architecture,
|
|
5014
|
+
supported: result.supported,
|
|
5511
5015
|
loading: false
|
|
5512
5016
|
};
|
|
5513
5017
|
else if (updatedModels[modelIdx]) updatedModels[modelIdx] = {
|
|
@@ -5517,6 +5021,12 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5517
5021
|
});
|
|
5518
5022
|
setHfModels([...updatedModels]);
|
|
5519
5023
|
}
|
|
5024
|
+
const detected = updatedModels.filter((m) => m.supported !== void 0);
|
|
5025
|
+
if (detected.length > 0 && process.env.GERBIL_DEBUG) {
|
|
5026
|
+
const unsupported = detected.filter((m) => m.supported === false).length;
|
|
5027
|
+
const pct = Math.round(unsupported / detected.length * 100);
|
|
5028
|
+
console.error(`[discovery] ${unsupported}/${detected.length} (${pct}%) sampled HF models are outside the native architecture whitelist`);
|
|
5029
|
+
}
|
|
5520
5030
|
};
|
|
5521
5031
|
const fetchHFModels = async (query, append = false, sort = sortMode) => {
|
|
5522
5032
|
if (hfLoading) return;
|
|
@@ -5527,16 +5037,16 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5527
5037
|
try {
|
|
5528
5038
|
const offset = append ? hfModels.length : 0;
|
|
5529
5039
|
const params = new URLSearchParams({
|
|
5530
|
-
filter: "
|
|
5040
|
+
filter: "text-generation",
|
|
5041
|
+
library: "safetensors",
|
|
5531
5042
|
sort: "downloads",
|
|
5532
5043
|
direction: "-1",
|
|
5533
5044
|
limit: String(PAGE_SIZE * 2)
|
|
5534
5045
|
});
|
|
5535
5046
|
if (query) {
|
|
5536
5047
|
const searchTerms = query.split(",").map((t) => t.trim()).filter(Boolean);
|
|
5537
|
-
if (
|
|
5538
|
-
|
|
5539
|
-
} else params.set("search", "onnx");
|
|
5048
|
+
if (searchTerms.length > 0) params.set("search", searchTerms.join(" "));
|
|
5049
|
+
}
|
|
5540
5050
|
if (offset > 0) params.set("skip", String(offset));
|
|
5541
5051
|
const res = await fetch(`https://huggingface.co/api/models?${params}`, { signal: abortControllerRef.current.signal });
|
|
5542
5052
|
if (!res.ok) throw new Error("API error");
|
|
@@ -5546,13 +5056,6 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
5546
5056
|
const aDate = a.createdAt ? new Date(a.createdAt).getTime() : 0;
|
|
5547
5057
|
return (b.createdAt ? new Date(b.createdAt).getTime() : 0) - aDate;
|
|
5548
5058
|
});
|
|
5549
|
-
sortedData.sort((a, b) => {
|
|
5550
|
-
const aHasQuant = a.id.includes("q4f16") || a.id.includes("q4") || a.id.startsWith("onnx-community/");
|
|
5551
|
-
const bHasQuant = b.id.includes("q4f16") || b.id.includes("q4") || b.id.startsWith("onnx-community/");
|
|
5552
|
-
if (aHasQuant && !bHasQuant) return -1;
|
|
5553
|
-
if (!aHasQuant && bHasQuant) return 1;
|
|
5554
|
-
return 0;
|
|
5555
|
-
});
|
|
5556
5059
|
const models = sortedData.map((m) => ({
|
|
5557
5060
|
id: m.id,
|
|
5558
5061
|
downloads: m.downloads || 0,
|
|
@@ -6164,7 +5667,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6164
5667
|
children: [/* @__PURE__ */ jsx(Spinner, { type: "dots" }), " "]
|
|
6165
5668
|
}), /* @__PURE__ */ jsx(Text, {
|
|
6166
5669
|
dimColor: true,
|
|
6167
|
-
children: "Fetching
|
|
5670
|
+
children: "Fetching models..."
|
|
6168
5671
|
})] }),
|
|
6169
5672
|
hfError && /* @__PURE__ */ jsx(Text, {
|
|
6170
5673
|
color: "red",
|
|
@@ -6182,17 +5685,22 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6182
5685
|
/* @__PURE__ */ jsx(Text, {
|
|
6183
5686
|
bold: true,
|
|
6184
5687
|
dimColor: true,
|
|
6185
|
-
children: "Model".padEnd(
|
|
5688
|
+
children: "Model".padEnd(40)
|
|
6186
5689
|
}),
|
|
6187
5690
|
/* @__PURE__ */ jsx(Text, {
|
|
6188
5691
|
bold: true,
|
|
6189
5692
|
dimColor: true,
|
|
6190
|
-
children: "
|
|
5693
|
+
children: "Engine".padEnd(13)
|
|
6191
5694
|
}),
|
|
6192
5695
|
/* @__PURE__ */ jsx(Text, {
|
|
6193
5696
|
bold: true,
|
|
6194
5697
|
dimColor: true,
|
|
6195
|
-
children: "
|
|
5698
|
+
children: "Params".padStart(7)
|
|
5699
|
+
}),
|
|
5700
|
+
/* @__PURE__ */ jsx(Text, {
|
|
5701
|
+
bold: true,
|
|
5702
|
+
dimColor: true,
|
|
5703
|
+
children: "Size".padStart(9)
|
|
6196
5704
|
}),
|
|
6197
5705
|
/* @__PURE__ */ jsx(Text, {
|
|
6198
5706
|
bold: true,
|
|
@@ -6202,7 +5710,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6202
5710
|
/* @__PURE__ */ jsx(Text, {
|
|
6203
5711
|
bold: true,
|
|
6204
5712
|
dimColor: true,
|
|
6205
|
-
children: "Updated".padStart(
|
|
5713
|
+
children: "Updated".padStart(9)
|
|
6206
5714
|
}),
|
|
6207
5715
|
/* @__PURE__ */ jsx(Text, {
|
|
6208
5716
|
bold: true,
|
|
@@ -6212,6 +5720,19 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6212
5720
|
] }),
|
|
6213
5721
|
filteredModels.map((model, i) => {
|
|
6214
5722
|
const isCached = isModelCached(model.id);
|
|
5723
|
+
let badge;
|
|
5724
|
+
if (model.loading || model.supported === void 0) badge = /* @__PURE__ */ jsx(Text, {
|
|
5725
|
+
dimColor: true,
|
|
5726
|
+
children: "…".padEnd(13)
|
|
5727
|
+
});
|
|
5728
|
+
else if (model.supported) badge = /* @__PURE__ */ jsx(Text, {
|
|
5729
|
+
color: "green",
|
|
5730
|
+
children: "native ✓".padEnd(13)
|
|
5731
|
+
});
|
|
5732
|
+
else badge = /* @__PURE__ */ jsx(Text, {
|
|
5733
|
+
color: "yellow",
|
|
5734
|
+
children: "unsupported".padEnd(13)
|
|
5735
|
+
});
|
|
6215
5736
|
return /* @__PURE__ */ jsxs(Box, { children: [
|
|
6216
5737
|
/* @__PURE__ */ jsxs(Text, {
|
|
6217
5738
|
color: isCached ? "green" : "gray",
|
|
@@ -6219,15 +5740,16 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6219
5740
|
}),
|
|
6220
5741
|
/* @__PURE__ */ jsxs(Text, {
|
|
6221
5742
|
color: i === hfSelected ? "cyan" : "white",
|
|
6222
|
-
children: [i === hfSelected ? "> " : " ", model.id.length >
|
|
5743
|
+
children: [i === hfSelected ? "> " : " ", model.id.length > 38 ? `${model.id.slice(0, 35)}...` : model.id.padEnd(38)]
|
|
6223
5744
|
}),
|
|
5745
|
+
badge,
|
|
6224
5746
|
/* @__PURE__ */ jsx(Text, {
|
|
6225
5747
|
color: "yellow",
|
|
6226
|
-
children: formatParams(model.params, model.id).padStart(
|
|
5748
|
+
children: formatParams(model.params, model.id).padStart(7)
|
|
6227
5749
|
}),
|
|
6228
5750
|
/* @__PURE__ */ jsx(Text, {
|
|
6229
5751
|
color: "magenta",
|
|
6230
|
-
children: (model.sizeBytes ? formatBytes(model.sizeBytes) : "-").padStart(
|
|
5752
|
+
children: (model.sizeBytes ? formatBytes(model.sizeBytes) : "-").padStart(9)
|
|
6231
5753
|
}),
|
|
6232
5754
|
/* @__PURE__ */ jsx(Text, {
|
|
6233
5755
|
color: "cyan",
|
|
@@ -6235,7 +5757,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6235
5757
|
}),
|
|
6236
5758
|
/* @__PURE__ */ jsx(Text, {
|
|
6237
5759
|
dimColor: true,
|
|
6238
|
-
children: formatDate(model.createdAt).padStart(
|
|
5760
|
+
children: formatDate(model.createdAt).padStart(9)
|
|
6239
5761
|
}),
|
|
6240
5762
|
/* @__PURE__ */ jsx(Text, {
|
|
6241
5763
|
color: "gray",
|
|
@@ -6278,7 +5800,7 @@ function ModelView({ currentModel, onSelect, onDownloadOnly, onTabChange, onSear
|
|
|
6278
5800
|
}),
|
|
6279
5801
|
!hfLoading && hfModels.length === 0 && !hfError && /* @__PURE__ */ jsx(Text, {
|
|
6280
5802
|
dimColor: true,
|
|
6281
|
-
children: "Loading
|
|
5803
|
+
children: "Loading models..."
|
|
6282
5804
|
})
|
|
6283
5805
|
] }),
|
|
6284
5806
|
tab === "voice" && /* @__PURE__ */ jsxs(Box, {
|
|
@@ -6950,7 +6472,7 @@ function SkillsView({ gerbil, onCreateNew, onSwitchModel }) {
|
|
|
6950
6472
|
const [skillsLoaded, setSkillsLoaded] = useState(false);
|
|
6951
6473
|
const [cachedModels, setCachedModels] = useState([]);
|
|
6952
6474
|
const [modelSelectIndex, setModelSelectIndex] = useState(0);
|
|
6953
|
-
const [currentModel, setCurrentModel] = useState(gerbil.getModelInfo()?.id ||
|
|
6475
|
+
const [currentModel, setCurrentModel] = useState(gerbil.getModelInfo()?.id || DEFAULT_MODEL);
|
|
6954
6476
|
const [attachedImage, setAttachedImage] = useState(null);
|
|
6955
6477
|
const [attachedImageName, setAttachedImageName] = useState(null);
|
|
6956
6478
|
const [imageError, setImageError] = useState(null);
|
|
@@ -8040,20 +7562,20 @@ function getViewPrompt(view, ctx = {}) {
|
|
|
8040
7562
|
}
|
|
8041
7563
|
}
|
|
8042
7564
|
getViewPrompt("chat"), getViewPrompt("skills"), getViewPrompt("create-skill"), getViewPrompt("code");
|
|
8043
|
-
function App({ initialView = "menu"
|
|
7565
|
+
function App({ initialView = "menu" } = {}) {
|
|
8044
7566
|
const { exit } = useApp();
|
|
8045
7567
|
const gerbilRef = useRef(null);
|
|
8046
7568
|
const [state, setState] = useState({
|
|
8047
7569
|
view: "menu",
|
|
8048
7570
|
gerbil: null,
|
|
8049
|
-
model:
|
|
7571
|
+
model: DEFAULT_MODEL,
|
|
8050
7572
|
loading: true,
|
|
8051
7573
|
loadingMessage: "Loading model...",
|
|
8052
7574
|
thinkingMode: false,
|
|
8053
7575
|
agentMode: true,
|
|
8054
7576
|
voiceMode: false,
|
|
8055
7577
|
downloadStatus: "",
|
|
8056
|
-
deviceMode:
|
|
7578
|
+
deviceMode: "webgpu",
|
|
8057
7579
|
ttsModel: "kokoro-82m",
|
|
8058
7580
|
sttModel: "whisper-tiny.en"
|
|
8059
7581
|
});
|
|
@@ -8137,9 +7659,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8137
7659
|
const loadModel = async () => {
|
|
8138
7660
|
const g = new Gerbil();
|
|
8139
7661
|
try {
|
|
8140
|
-
const device = state.deviceMode;
|
|
8141
7662
|
await g.loadModel(state.model, {
|
|
8142
|
-
device,
|
|
7663
|
+
device: "webgpu",
|
|
8143
7664
|
onProgress: (p) => {
|
|
8144
7665
|
if (!mounted) return;
|
|
8145
7666
|
if (p.status?.includes("Unable to determine content-length")) return;
|
|
@@ -8166,8 +7687,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8166
7687
|
view: pendingView || "menu"
|
|
8167
7688
|
}));
|
|
8168
7689
|
if (pendingView) setPendingView(null);
|
|
8169
|
-
|
|
8170
|
-
else generateWelcome(g).catch(() => {});
|
|
7690
|
+
setTimeout(() => generateWelcome(g).catch(() => {}), 1500);
|
|
8171
7691
|
} catch (error) {
|
|
8172
7692
|
if (!mounted) return;
|
|
8173
7693
|
setState((s) => ({
|
|
@@ -8181,7 +7701,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8181
7701
|
return () => {
|
|
8182
7702
|
mounted = false;
|
|
8183
7703
|
if (gerbilRef.current) {
|
|
8184
|
-
import("./repl-
|
|
7704
|
+
import("./repl-BDRkwPGX.mjs").then(({ setCleanupPromise: setCleanupPromise$1 }) => {
|
|
8185
7705
|
setCleanupPromise$1(gerbilRef.current?.dispose(true) ?? Promise.resolve());
|
|
8186
7706
|
});
|
|
8187
7707
|
gerbilRef.current = null;
|
|
@@ -8271,49 +7791,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8271
7791
|
return () => clearTimeout(timer);
|
|
8272
7792
|
}
|
|
8273
7793
|
}, [ctrlCPressed]);
|
|
8274
|
-
const handleDeviceSwitch = async (newDevice) => {
|
|
8275
|
-
if (newDevice === state.deviceMode) return;
|
|
8276
|
-
setState((s) => ({
|
|
8277
|
-
...s,
|
|
8278
|
-
deviceMode: newDevice,
|
|
8279
|
-
loading: true,
|
|
8280
|
-
loadingMessage: `Switching to ${newDevice.toUpperCase()}...`
|
|
8281
|
-
}));
|
|
8282
|
-
if (gerbilRef.current) {
|
|
8283
|
-
await gerbilRef.current.dispose();
|
|
8284
|
-
gerbilRef.current = null;
|
|
8285
|
-
}
|
|
8286
|
-
const g = new Gerbil();
|
|
8287
|
-
try {
|
|
8288
|
-
await g.loadModel(state.model, {
|
|
8289
|
-
device: newDevice,
|
|
8290
|
-
onProgress: (p) => {
|
|
8291
|
-
if (p.status?.includes("Unable to determine content-length")) return;
|
|
8292
|
-
setState((s) => ({
|
|
8293
|
-
...s,
|
|
8294
|
-
loadingMessage: p.file ? `${p.file} ${p.progress || 0}%` : p.status
|
|
8295
|
-
}));
|
|
8296
|
-
}
|
|
8297
|
-
});
|
|
8298
|
-
const supportsThinking = g.getModelInfo()?.supportsThinking ?? false;
|
|
8299
|
-
setToolContext({ generate: async (prompt) => {
|
|
8300
|
-
return (await g.generate(prompt, { maxTokens: 200 })).text;
|
|
8301
|
-
} });
|
|
8302
|
-
gerbilRef.current = g;
|
|
8303
|
-
setState((s) => ({
|
|
8304
|
-
...s,
|
|
8305
|
-
gerbil: g,
|
|
8306
|
-
loading: false,
|
|
8307
|
-
thinkingMode: supportsThinking
|
|
8308
|
-
}));
|
|
8309
|
-
} catch (error) {
|
|
8310
|
-
setState((s) => ({
|
|
8311
|
-
...s,
|
|
8312
|
-
loading: false,
|
|
8313
|
-
loadingMessage: `Error: ${error}`
|
|
8314
|
-
}));
|
|
8315
|
-
}
|
|
8316
|
-
};
|
|
8317
7794
|
useInput((input, key) => {
|
|
8318
7795
|
if (key.ctrl && input === "c") {
|
|
8319
7796
|
if (ctrlCPressed) exit();
|
|
@@ -8432,7 +7909,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8432
7909
|
...s,
|
|
8433
7910
|
voiceMode: !s.voiceMode
|
|
8434
7911
|
}));
|
|
8435
|
-
if ((input === "m" || input === "M") && state.view === "menu" && !state.loading) handleDeviceSwitch(state.deviceMode === "webgpu" ? "cpu" : "webgpu");
|
|
8436
7912
|
});
|
|
8437
7913
|
if (state.loading) return /* @__PURE__ */ jsx(LoadingView, { message: state.loadingMessage });
|
|
8438
7914
|
if (state.view === "menu") return /* @__PURE__ */ jsxs(Box, {
|
|
@@ -8486,8 +7962,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8486
7962
|
}),
|
|
8487
7963
|
/* @__PURE__ */ jsx(Text, {
|
|
8488
7964
|
bold: true,
|
|
8489
|
-
color:
|
|
8490
|
-
children: state.gerbil?.getDeviceMode()
|
|
7965
|
+
color: "green",
|
|
7966
|
+
children: (state.gerbil?.getDeviceMode() ?? "webgpu").toUpperCase()
|
|
8491
7967
|
}),
|
|
8492
7968
|
state.gerbil?.getModelInfo()?.supportsThinking ? /* @__PURE__ */ jsxs(Fragment, { children: [/* @__PURE__ */ jsx(Text, {
|
|
8493
7969
|
dimColor: true,
|
|
@@ -8608,14 +8084,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8608
8084
|
dimColor: true,
|
|
8609
8085
|
children: "oice · "
|
|
8610
8086
|
}),
|
|
8611
|
-
/* @__PURE__ */ jsx(Text, {
|
|
8612
|
-
color: "cyan",
|
|
8613
|
-
children: "m"
|
|
8614
|
-
}),
|
|
8615
|
-
/* @__PURE__ */ jsx(Text, {
|
|
8616
|
-
dimColor: true,
|
|
8617
|
-
children: "ode · "
|
|
8618
|
-
}),
|
|
8619
8087
|
/* @__PURE__ */ jsx(Text, {
|
|
8620
8088
|
color: "blue",
|
|
8621
8089
|
children: "d"
|
|
@@ -8652,13 +8120,11 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8652
8120
|
}) : null
|
|
8653
8121
|
]
|
|
8654
8122
|
});
|
|
8655
|
-
const handleModelSelect = async (model
|
|
8123
|
+
const handleModelSelect = async (model) => {
|
|
8656
8124
|
const returnView = returnToBenchmark ? "benchmark" : "menu";
|
|
8657
|
-
const newDevice = deviceOverride ?? state.deviceMode;
|
|
8658
8125
|
setState((s) => ({
|
|
8659
8126
|
...s,
|
|
8660
8127
|
model,
|
|
8661
|
-
deviceMode: newDevice,
|
|
8662
8128
|
view: returnView,
|
|
8663
8129
|
loading: true,
|
|
8664
8130
|
loadingMessage: "Switching model..."
|
|
@@ -8671,7 +8137,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8671
8137
|
const g = new Gerbil();
|
|
8672
8138
|
try {
|
|
8673
8139
|
await g.loadModel(model, {
|
|
8674
|
-
device:
|
|
8140
|
+
device: "webgpu",
|
|
8675
8141
|
onProgress: (p) => {
|
|
8676
8142
|
if (p.status?.includes("Unable to determine content-length")) return;
|
|
8677
8143
|
setState((s) => ({
|
|
@@ -8706,15 +8172,23 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8706
8172
|
downloadStatus: `Downloading ${modelId}...`
|
|
8707
8173
|
}));
|
|
8708
8174
|
const tempGerbil = new Gerbil();
|
|
8175
|
+
let initTimer = null;
|
|
8709
8176
|
try {
|
|
8710
8177
|
await tempGerbil.loadModel(modelId, { onProgress: (p) => {
|
|
8711
8178
|
if (p.status?.includes("Unable to determine content-length")) return;
|
|
8179
|
+
if (initTimer) clearTimeout(initTimer);
|
|
8712
8180
|
setState((s) => ({
|
|
8713
8181
|
...s,
|
|
8714
8182
|
downloadStatus: p.file ? `${p.file} ${p.progress || 0}%` : p.status || ""
|
|
8715
8183
|
}));
|
|
8184
|
+
initTimer = setTimeout(() => {
|
|
8185
|
+
setState((s) => ({
|
|
8186
|
+
...s,
|
|
8187
|
+
downloadStatus: `Initializing ${modelId}...`
|
|
8188
|
+
}));
|
|
8189
|
+
}, 600);
|
|
8716
8190
|
} });
|
|
8717
|
-
|
|
8191
|
+
if (initTimer) clearTimeout(initTimer);
|
|
8718
8192
|
setState((s) => ({
|
|
8719
8193
|
...s,
|
|
8720
8194
|
downloadStatus: `[done] Downloaded ${modelId}`
|
|
@@ -8725,6 +8199,7 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8725
8199
|
})), 2500);
|
|
8726
8200
|
return true;
|
|
8727
8201
|
} catch (error) {
|
|
8202
|
+
if (initTimer) clearTimeout(initTimer);
|
|
8728
8203
|
setState((s) => ({
|
|
8729
8204
|
...s,
|
|
8730
8205
|
downloadStatus: `[error] ${error}`
|
|
@@ -8734,6 +8209,8 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8734
8209
|
downloadStatus: ""
|
|
8735
8210
|
})), 3e3);
|
|
8736
8211
|
return false;
|
|
8212
|
+
} finally {
|
|
8213
|
+
await tempGerbil.dispose();
|
|
8737
8214
|
}
|
|
8738
8215
|
};
|
|
8739
8216
|
return /* @__PURE__ */ jsxs(Box, {
|
|
@@ -8973,7 +8450,6 @@ function App({ initialView = "menu", forcedDevice } = {}) {
|
|
|
8973
8450
|
setBenchmarkStatus(status);
|
|
8974
8451
|
setBenchmarkStatsState(stats);
|
|
8975
8452
|
},
|
|
8976
|
-
onSwitchDevice: handleDeviceSwitch,
|
|
8977
8453
|
onSwitchModel: () => {
|
|
8978
8454
|
setReturnToBenchmark(true);
|
|
8979
8455
|
transitionToView("model");
|
|
@@ -9070,10 +8546,7 @@ async function startRepl(options) {
|
|
|
9070
8546
|
process.on("SIGINT", handleSignal);
|
|
9071
8547
|
process.on("SIGTERM", () => process.exit(0));
|
|
9072
8548
|
try {
|
|
9073
|
-
const { waitUntilExit } = render(/* @__PURE__ */ jsx(App, {
|
|
9074
|
-
forcedDevice: options?.forcedDevice,
|
|
9075
|
-
initialView: options?.initialView
|
|
9076
|
-
}));
|
|
8549
|
+
const { waitUntilExit } = render(/* @__PURE__ */ jsx(App, { initialView: options?.initialView }));
|
|
9077
8550
|
await waitUntilExit();
|
|
9078
8551
|
if (pendingCleanup) try {
|
|
9079
8552
|
await Promise.race([pendingCleanup, new Promise((r) => setTimeout(r, 500))]);
|
|
@@ -9104,97 +8577,113 @@ async function startRepl(options) {
|
|
|
9104
8577
|
*/
|
|
9105
8578
|
const program = new Command();
|
|
9106
8579
|
program.name("gerbil").description("🐹 Local LLM inference for Node.js").version(version);
|
|
9107
|
-
program.
|
|
9108
|
-
if (promptParts.length === 0) {
|
|
8580
|
+
program.command("generate [prompt...]", { isDefault: true }).alias("g").description("Generate text (opens the REPL when no prompt is given)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-n, --max-tokens <n>", "Max tokens", "256").option("-t, --temperature <t>", "Temperature", "0.7").option("-s, --system <text>", "System prompt").option("--thinking", "Enable thinking mode").option("--stream", "Stream output").option("--json", "Output as JSON").action(async (promptParts, opts) => {
|
|
8581
|
+
if (!promptParts || promptParts.length === 0) {
|
|
9109
8582
|
await startRepl({});
|
|
9110
8583
|
return;
|
|
9111
8584
|
}
|
|
9112
8585
|
await runGenerate(promptParts.join(" "), opts);
|
|
9113
8586
|
});
|
|
9114
|
-
program.command("generate <prompt...>").alias("g").description("Generate text").option("-m, --model <id>", "Model to use", "qwen3-0.6b").option("-n, --max-tokens <n>", "Max tokens", "256").option("-t, --temperature <t>", "Temperature", "0.7").option("-s, --system <text>", "System prompt").option("--thinking", "Enable thinking mode").option("--stream", "Stream output").option("--json", "Output as JSON").action(async (promptParts, opts) => {
|
|
9115
|
-
await runGenerate(promptParts.join(" "), opts);
|
|
9116
|
-
});
|
|
9117
8587
|
async function runGenerate(prompt, opts) {
|
|
9118
8588
|
const g = new Gerbil();
|
|
9119
8589
|
const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
|
|
9120
8590
|
try {
|
|
9121
|
-
await g.loadModel(opts.model, {
|
|
9122
|
-
|
|
9123
|
-
|
|
8591
|
+
await g.loadModel(opts.model, {
|
|
8592
|
+
device: "webgpu",
|
|
8593
|
+
onProgress: (p) => {
|
|
8594
|
+
spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
|
|
8595
|
+
}
|
|
8596
|
+
});
|
|
9124
8597
|
spinner.succeed("Model loaded");
|
|
9125
|
-
|
|
8598
|
+
const genOpts = {
|
|
8599
|
+
maxTokens: Number.parseInt(opts.maxTokens, 10),
|
|
8600
|
+
temperature: Number.parseFloat(opts.temperature),
|
|
8601
|
+
system: opts.system,
|
|
8602
|
+
thinking: opts.thinking
|
|
8603
|
+
};
|
|
9126
8604
|
if (opts.stream) {
|
|
9127
8605
|
process.stdout.write(chalk.green("Response: "));
|
|
9128
|
-
for await (const chunk of g.stream(prompt,
|
|
9129
|
-
|
|
9130
|
-
temperature: Number.parseFloat(opts.temperature),
|
|
9131
|
-
system: opts.system,
|
|
9132
|
-
thinking: opts.thinking
|
|
9133
|
-
})) process.stdout.write(chunk);
|
|
8606
|
+
for await (const chunk of g.stream(prompt, genOpts)) process.stdout.write(chunk);
|
|
8607
|
+
process.stdout.write("\n");
|
|
9134
8608
|
} else {
|
|
9135
8609
|
const genSpinner = ora("Generating...").start();
|
|
9136
|
-
const result = await g.generate(prompt,
|
|
9137
|
-
maxTokens: Number.parseInt(opts.maxTokens, 10),
|
|
9138
|
-
temperature: Number.parseFloat(opts.temperature),
|
|
9139
|
-
system: opts.system,
|
|
9140
|
-
thinking: opts.thinking
|
|
9141
|
-
});
|
|
8610
|
+
const result = await g.generate(prompt, genOpts);
|
|
9142
8611
|
genSpinner.stop();
|
|
9143
|
-
if (opts.json)
|
|
8612
|
+
if (opts.json) console.log(JSON.stringify(result, null, 2));
|
|
8613
|
+
else {
|
|
8614
|
+
if (opts.thinking && result.thinking) console.log(chalk.gray(`\n[thinking]\n${result.thinking}\n`));
|
|
8615
|
+
console.log(result.text);
|
|
8616
|
+
console.log(chalk.gray(`\n${result.tokensGenerated} tokens · ${result.tokensPerSecond.toFixed(1)} tok/s`));
|
|
8617
|
+
}
|
|
9144
8618
|
}
|
|
9145
8619
|
await g.dispose();
|
|
9146
|
-
} catch (
|
|
9147
|
-
spinner.fail(
|
|
8620
|
+
} catch (e) {
|
|
8621
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9148
8622
|
process.exit(1);
|
|
9149
8623
|
}
|
|
9150
8624
|
}
|
|
9151
|
-
program.command("
|
|
9152
|
-
|
|
8625
|
+
program.command("object <prompt...>").alias("obj").description("Generate a structured JSON object (with optional schema validation)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-n, --max-tokens <n>", "Max tokens", "512").option("-t, --temperature <t>", "Temperature", "0.3").option("--schema <file>", "Path to a JSON file with a { required: string[] } shape to validate against").option("--retries <n>", "Max retries after the first attempt", "4").action(async (promptParts, opts) => {
|
|
8626
|
+
const g = new Gerbil();
|
|
8627
|
+
const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
|
|
8628
|
+
try {
|
|
8629
|
+
let schema;
|
|
8630
|
+
if (opts.schema) {
|
|
8631
|
+
const fs$1 = await import("node:fs");
|
|
8632
|
+
schema = JSON.parse(fs$1.readFileSync(opts.schema, "utf-8"));
|
|
8633
|
+
}
|
|
8634
|
+
await g.loadModel(opts.model, {
|
|
8635
|
+
device: "webgpu",
|
|
8636
|
+
onProgress: (p) => {
|
|
8637
|
+
spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
|
|
8638
|
+
}
|
|
8639
|
+
});
|
|
8640
|
+
spinner.succeed("Model loaded");
|
|
8641
|
+
const genSpinner = ora("Generating object...").start();
|
|
8642
|
+
const result = await g.generateObject(promptParts.join(" "), {
|
|
8643
|
+
schema,
|
|
8644
|
+
maxRetries: Number.parseInt(opts.retries, 10),
|
|
8645
|
+
maxTokens: Number.parseInt(opts.maxTokens, 10),
|
|
8646
|
+
sampling: { temperature: Number.parseFloat(opts.temperature) }
|
|
8647
|
+
});
|
|
8648
|
+
genSpinner.stop();
|
|
8649
|
+
console.log(JSON.stringify(result.object, null, 2));
|
|
8650
|
+
console.log(chalk.gray(`\n${result.attempts} attempt(s)`));
|
|
8651
|
+
await g.dispose();
|
|
8652
|
+
} catch (e) {
|
|
8653
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
8654
|
+
process.exit(1);
|
|
8655
|
+
}
|
|
9153
8656
|
});
|
|
9154
|
-
program.command("
|
|
9155
|
-
await startRepl({
|
|
9156
|
-
initialView: "chat",
|
|
9157
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9158
|
-
});
|
|
8657
|
+
program.command("repl").alias("r").description("Interactive TUI with chat, skills, and more").action(async () => {
|
|
8658
|
+
await startRepl({});
|
|
9159
8659
|
});
|
|
9160
|
-
program.command("
|
|
9161
|
-
await startRepl({
|
|
9162
|
-
initialView: "skills",
|
|
9163
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9164
|
-
});
|
|
8660
|
+
program.command("chat").alias("c").description("Interactive chat (opens REPL chat view)").action(async () => {
|
|
8661
|
+
await startRepl({ initialView: "chat" });
|
|
9165
8662
|
});
|
|
9166
|
-
program.command("
|
|
9167
|
-
await startRepl({
|
|
9168
|
-
initialView: "tools",
|
|
9169
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9170
|
-
});
|
|
8663
|
+
program.command("skills").description("Open REPL skills view").action(async () => {
|
|
8664
|
+
await startRepl({ initialView: "skills" });
|
|
9171
8665
|
});
|
|
9172
|
-
program.command("
|
|
9173
|
-
await startRepl({
|
|
9174
|
-
initialView: "model",
|
|
9175
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9176
|
-
});
|
|
8666
|
+
program.command("tools").description("Open REPL tools view").action(async () => {
|
|
8667
|
+
await startRepl({ initialView: "tools" });
|
|
9177
8668
|
});
|
|
9178
|
-
program.command("
|
|
9179
|
-
await startRepl({
|
|
9180
|
-
initialView: "frameworks",
|
|
9181
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9182
|
-
});
|
|
8669
|
+
program.command("model").description("Open REPL model view").action(async () => {
|
|
8670
|
+
await startRepl({ initialView: "model" });
|
|
9183
8671
|
});
|
|
9184
|
-
program.command("
|
|
9185
|
-
await startRepl({
|
|
9186
|
-
|
|
9187
|
-
|
|
9188
|
-
});
|
|
8672
|
+
program.command("integrate").description("Open REPL framework integration view").action(async () => {
|
|
8673
|
+
await startRepl({ initialView: "frameworks" });
|
|
8674
|
+
});
|
|
8675
|
+
program.command("benchmark").description("Open REPL benchmark view").action(async () => {
|
|
8676
|
+
await startRepl({ initialView: "benchmark" });
|
|
9189
8677
|
});
|
|
9190
8678
|
program.command("commit").description("Generate commit message from staged changes").option("--type <type>", "Commit style: conventional, simple, detailed", "conventional").option("--write", "Write message to .git/COMMIT_EDITMSG").action(async (opts) => {
|
|
9191
8679
|
const spinner = ora("Generating commit message...").start();
|
|
9192
8680
|
try {
|
|
9193
8681
|
const message = await commit({ type: opts.type });
|
|
9194
8682
|
spinner.stop();
|
|
8683
|
+
console.log(message);
|
|
9195
8684
|
if (opts.write) (await import("node:fs")).writeFileSync(".git/COMMIT_EDITMSG", message);
|
|
9196
|
-
} catch (
|
|
9197
|
-
spinner.fail(
|
|
8685
|
+
} catch (e) {
|
|
8686
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9198
8687
|
process.exit(1);
|
|
9199
8688
|
}
|
|
9200
8689
|
});
|
|
@@ -9202,14 +8691,15 @@ program.command("summarize <file>").description("Summarize a file").option("-l,
|
|
|
9202
8691
|
const content = (await import("node:fs")).readFileSync(file, "utf-8");
|
|
9203
8692
|
const spinner = ora("Summarizing...").start();
|
|
9204
8693
|
try {
|
|
9205
|
-
await summarize({
|
|
8694
|
+
const summary = await summarize({
|
|
9206
8695
|
content,
|
|
9207
8696
|
length: opts.length,
|
|
9208
8697
|
format: opts.format
|
|
9209
8698
|
});
|
|
9210
8699
|
spinner.stop();
|
|
9211
|
-
|
|
9212
|
-
|
|
8700
|
+
console.log(summary);
|
|
8701
|
+
} catch (e) {
|
|
8702
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9213
8703
|
process.exit(1);
|
|
9214
8704
|
}
|
|
9215
8705
|
});
|
|
@@ -9217,13 +8707,14 @@ program.command("explain <file>").description("Explain code").option("-l, --leve
|
|
|
9217
8707
|
const content = (await import("node:fs")).readFileSync(file, "utf-8");
|
|
9218
8708
|
const spinner = ora("Explaining...").start();
|
|
9219
8709
|
try {
|
|
9220
|
-
await explain({
|
|
8710
|
+
const explanation = await explain({
|
|
9221
8711
|
content,
|
|
9222
8712
|
level: opts.level
|
|
9223
8713
|
});
|
|
9224
8714
|
spinner.stop();
|
|
9225
|
-
|
|
9226
|
-
|
|
8715
|
+
console.log(explanation);
|
|
8716
|
+
} catch (e) {
|
|
8717
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9227
8718
|
process.exit(1);
|
|
9228
8719
|
}
|
|
9229
8720
|
});
|
|
@@ -9232,13 +8723,14 @@ program.command("review <file>").description("Review code").option("-f, --focus
|
|
|
9232
8723
|
const spinner = ora("Reviewing...").start();
|
|
9233
8724
|
try {
|
|
9234
8725
|
const focus = opts.focus === "all" ? ["all"] : opts.focus.split(",");
|
|
9235
|
-
await review({
|
|
8726
|
+
const reviewResult = await review({
|
|
9236
8727
|
code: content,
|
|
9237
8728
|
focus
|
|
9238
8729
|
});
|
|
9239
8730
|
spinner.stop();
|
|
9240
|
-
|
|
9241
|
-
|
|
8731
|
+
console.log(reviewResult);
|
|
8732
|
+
} catch (e) {
|
|
8733
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9242
8734
|
process.exit(1);
|
|
9243
8735
|
}
|
|
9244
8736
|
});
|
|
@@ -9324,19 +8816,19 @@ program.command("speak [text...]").description("Convert text to speech using loc
|
|
|
9324
8816
|
process.exit(1);
|
|
9325
8817
|
}
|
|
9326
8818
|
});
|
|
9327
|
-
program.command("voice [audio]").description("Voice conversation: transcribe audio, generate response, speak it back").option("-m, --model <model>", "LLM model to use",
|
|
8819
|
+
program.command("voice [audio]").description("Voice conversation: transcribe audio, generate response, speak it back").option("-m, --model <model>", "LLM model to use", DEFAULT_MODEL).option("-s, --stt-model <model>", "STT model ID", "whisper-tiny.en").option("-v, --voice <voice>", "TTS voice ID", "af_heart").option("--system <prompt>", "System prompt", "You are a helpful voice assistant. Keep responses brief and conversational.").option("--thinking", "Enable thinking mode").action(async (audioFile, opts) => {
|
|
9328
8820
|
if (!audioFile) {
|
|
9329
8821
|
console.log(chalk.cyan("\n🎙️ Gerbil Voice Chat\n"));
|
|
9330
8822
|
console.log(chalk.yellow("Usage: gerbil voice <audio.wav> [options]\n"));
|
|
9331
8823
|
console.log("Options:");
|
|
9332
|
-
console.log(
|
|
8824
|
+
console.log(` -m, --model <model> LLM model (default: ${DEFAULT_MODEL})`);
|
|
9333
8825
|
console.log(" -s, --stt-model <model> STT model (default: whisper-tiny.en)");
|
|
9334
8826
|
console.log(" -v, --voice <voice> TTS voice (default: af_heart)");
|
|
9335
8827
|
console.log(" --system <prompt> System prompt");
|
|
9336
8828
|
console.log(" --thinking Enable thinking mode");
|
|
9337
8829
|
console.log("\nExample:");
|
|
9338
8830
|
console.log(chalk.gray(" gerbil voice question.wav --voice bf_emma"));
|
|
9339
|
-
console.log(chalk.gray(" gerbil voice \"what time is it.wav\" --model qwen3-
|
|
8831
|
+
console.log(chalk.gray(" gerbil voice \"what time is it.wav\" --model qwen3.5-2b\n"));
|
|
9340
8832
|
return;
|
|
9341
8833
|
}
|
|
9342
8834
|
const g = new Gerbil();
|
|
@@ -9498,29 +8990,26 @@ function audioToWav(audio, sampleRate) {
|
|
|
9498
8990
|
}
|
|
9499
8991
|
return buffer;
|
|
9500
8992
|
}
|
|
9501
|
-
program.command("serve").description("Start Gerbil server (use --mcp or --http for CLI, otherwise opens REPL)").option("-m, --model <id>", "Model to use",
|
|
8993
|
+
program.command("serve").description("Start Gerbil server (use --mcp or --http for CLI, otherwise opens REPL)").option("-m, --model <id>", "Model to use", DEFAULT_MODEL).option("-p, --port <port>", "HTTP port", "3000").option("--mcp", "Start MCP server (stdio)").option("--http", "Start HTTP server (CLI mode)").action(async (opts) => {
|
|
9502
8994
|
if (!(opts.mcp || opts.http)) {
|
|
9503
|
-
await startRepl({
|
|
9504
|
-
initialView: "serve",
|
|
9505
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9506
|
-
});
|
|
8995
|
+
await startRepl({ initialView: "serve" });
|
|
9507
8996
|
return;
|
|
9508
8997
|
}
|
|
9509
8998
|
if (opts.mcp) await startMCPServer({ model: opts.model });
|
|
9510
8999
|
else {
|
|
9511
|
-
const { Gerbil: Gerbil$1 } = await import("./gerbil-
|
|
9000
|
+
const { Gerbil: Gerbil$1 } = await import("./gerbil-CTZUa8EZ.mjs");
|
|
9512
9001
|
const g = new Gerbil$1();
|
|
9513
9002
|
const spinner = ora("Loading model...").start();
|
|
9514
9003
|
await g.loadModel(opts.model);
|
|
9515
9004
|
spinner.succeed("Model loaded");
|
|
9516
|
-
(await import("node:http")).createServer(async (req, res) => {
|
|
9005
|
+
const server = (await import("node:http")).createServer(async (req, res) => {
|
|
9517
9006
|
if (req.method === "POST" && req.url === "/generate") {
|
|
9518
9007
|
let body = "";
|
|
9519
9008
|
req.on("data", (chunk) => body += chunk);
|
|
9520
9009
|
req.on("end", async () => {
|
|
9521
9010
|
try {
|
|
9522
|
-
const { prompt, ...
|
|
9523
|
-
const result = await g.generate(prompt,
|
|
9011
|
+
const { prompt, ...genOpts } = JSON.parse(body);
|
|
9012
|
+
const result = await g.generate(prompt, genOpts);
|
|
9524
9013
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
9525
9014
|
res.end(JSON.stringify(result));
|
|
9526
9015
|
} catch (e) {
|
|
@@ -9528,6 +9017,25 @@ program.command("serve").description("Start Gerbil server (use --mcp or --http f
|
|
|
9528
9017
|
res.end(JSON.stringify({ error: String(e) }));
|
|
9529
9018
|
}
|
|
9530
9019
|
});
|
|
9020
|
+
} else if (req.method === "POST" && req.url === "/stream") {
|
|
9021
|
+
let body = "";
|
|
9022
|
+
req.on("data", (chunk) => body += chunk);
|
|
9023
|
+
req.on("end", async () => {
|
|
9024
|
+
try {
|
|
9025
|
+
const { prompt, ...genOpts } = JSON.parse(body);
|
|
9026
|
+
res.writeHead(200, {
|
|
9027
|
+
"Content-Type": "text/event-stream",
|
|
9028
|
+
"Cache-Control": "no-cache",
|
|
9029
|
+
Connection: "keep-alive"
|
|
9030
|
+
});
|
|
9031
|
+
for await (const token of g.stream(prompt, genOpts)) res.write(`data: ${JSON.stringify({ token })}\n\n`);
|
|
9032
|
+
res.write("data: [DONE]\n\n");
|
|
9033
|
+
res.end();
|
|
9034
|
+
} catch (e) {
|
|
9035
|
+
res.write(`data: ${JSON.stringify({ error: String(e) })}\n\n`);
|
|
9036
|
+
res.end();
|
|
9037
|
+
}
|
|
9038
|
+
});
|
|
9531
9039
|
} else if (req.method === "GET" && req.url === "/info") {
|
|
9532
9040
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
9533
9041
|
res.end(JSON.stringify(g.getInfo()));
|
|
@@ -9535,7 +9043,14 @@ program.command("serve").description("Start Gerbil server (use --mcp or --http f
|
|
|
9535
9043
|
res.writeHead(404);
|
|
9536
9044
|
res.end("Not found");
|
|
9537
9045
|
}
|
|
9538
|
-
})
|
|
9046
|
+
});
|
|
9047
|
+
const port = Number.parseInt(opts.port, 10);
|
|
9048
|
+
server.listen(port, () => {
|
|
9049
|
+
console.log(chalk.green(`\nHTTP server on http://localhost:${port}`));
|
|
9050
|
+
console.log(chalk.gray(" POST /generate { prompt, ...options }"));
|
|
9051
|
+
console.log(chalk.gray(" POST /stream { prompt, ...options } (SSE)"));
|
|
9052
|
+
console.log(chalk.gray(" GET /info"));
|
|
9053
|
+
});
|
|
9539
9054
|
}
|
|
9540
9055
|
});
|
|
9541
9056
|
program.command("models").description("List available models").option("--search <query>", "Search models").action(async (opts) => {
|
|
@@ -9544,38 +9059,61 @@ program.command("models").description("List available models").option("--search
|
|
|
9544
9059
|
const q = opts.search.toLowerCase();
|
|
9545
9060
|
models = models.filter((m) => m.id.toLowerCase().includes(q) || m.description.toLowerCase().includes(q));
|
|
9546
9061
|
}
|
|
9547
|
-
|
|
9062
|
+
console.log(chalk.cyan("\nBuilt-in models (native WebGPU engine):\n"));
|
|
9063
|
+
for (const m of models) {
|
|
9064
|
+
const flags = [
|
|
9065
|
+
m.supportsThinking ? chalk.magenta("thinking") : null,
|
|
9066
|
+
m.supportsJson ? chalk.blue("json") : null,
|
|
9067
|
+
m.supportsVision ? chalk.green("vision") : null
|
|
9068
|
+
].filter(Boolean).join(" ");
|
|
9069
|
+
console.log(` ${chalk.green(m.id.padEnd(22))} ${chalk.gray((m.size || "").padStart(7))} ${m.description}`);
|
|
9070
|
+
if (flags) console.log(` ${" ".repeat(22)} ${flags}`);
|
|
9071
|
+
}
|
|
9072
|
+
console.log(chalk.gray("\nUse any HuggingFace repo with a supported architecture via hf:org/model.\n"));
|
|
9548
9073
|
});
|
|
9549
|
-
program.command("info").description("Show system and model info (use --cli for text output, otherwise opens REPL)").option("-m, --model <id>", "Model to load and show info for",
|
|
9074
|
+
program.command("info").description("Show system and model info (use --cli for text output, otherwise opens REPL)").option("-m, --model <id>", "Model to load and show info for", DEFAULT_MODEL).option("--cli", "Output text info instead of opening REPL").action(async (opts) => {
|
|
9550
9075
|
if (!opts.cli) {
|
|
9551
|
-
await startRepl({
|
|
9552
|
-
initialView: "info",
|
|
9553
|
-
forcedDevice: opts.cpu ? "cpu" : opts.gpu ? "webgpu" : void 0
|
|
9554
|
-
});
|
|
9076
|
+
await startRepl({ initialView: "info" });
|
|
9555
9077
|
return;
|
|
9556
9078
|
}
|
|
9557
9079
|
const g = new Gerbil();
|
|
9558
9080
|
const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
|
|
9559
9081
|
try {
|
|
9560
|
-
await g.loadModel(opts.model, {
|
|
9561
|
-
|
|
9562
|
-
|
|
9082
|
+
await g.loadModel(opts.model, {
|
|
9083
|
+
device: "webgpu",
|
|
9084
|
+
onProgress: (p) => {
|
|
9085
|
+
spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
|
|
9086
|
+
}
|
|
9087
|
+
});
|
|
9563
9088
|
spinner.stop();
|
|
9564
|
-
g.getInfo();
|
|
9089
|
+
const info = g.getInfo();
|
|
9090
|
+
const deviceMode = g.getDeviceMode();
|
|
9091
|
+
const deviceLabel = deviceMode === "webgpu" ? `${deviceMode.toUpperCase()} (Dawn)` : deviceMode.toUpperCase();
|
|
9092
|
+
console.log(chalk.cyan("\nSystem Info\n"));
|
|
9093
|
+
console.log(` Backend: ${chalk.green(info.device.backend)}`);
|
|
9094
|
+
console.log(` Device: ${chalk.green(deviceLabel)}`);
|
|
9095
|
+
console.log(` Model: ${chalk.cyan(info.model?.id ?? opts.model)}`);
|
|
9096
|
+
console.log(` Family: ${chalk.gray(info.model?.family ?? "unknown")}`);
|
|
9097
|
+
console.log(` Dtype: ${chalk.gray(g.getDtype())} (INT4 weights)`);
|
|
9098
|
+
console.log(` Context: ${chalk.gray(info.context.max.toLocaleString())} tokens`);
|
|
9099
|
+
console.log(` Thinking: ${info.model?.supportsThinking ? chalk.magenta("yes") : "no"}`);
|
|
9100
|
+
console.log(` Cache: ${chalk.gray(info.cache.location)}\n`);
|
|
9565
9101
|
await g.dispose();
|
|
9566
|
-
} catch (
|
|
9567
|
-
spinner.fail(
|
|
9102
|
+
} catch (e) {
|
|
9103
|
+
spinner.fail(`Error loading model: ${e?.message ?? e}`);
|
|
9104
|
+
process.exit(1);
|
|
9568
9105
|
}
|
|
9569
9106
|
});
|
|
9570
9107
|
program.command("cache").description("Show or manage model cache").option("--clean", "Remove all cached models").option("--older-than <days>", "Remove models older than N days").action(async (opts) => {
|
|
9571
9108
|
const fs$1 = await import("node:fs");
|
|
9572
9109
|
const path$1 = await import("node:path");
|
|
9573
9110
|
const os$1 = await import("node:os");
|
|
9574
|
-
const
|
|
9575
|
-
const cacheDir = fs$1.existsSync(nodeModulesCache) ? nodeModulesCache : process.env.HF_HOME || process.env.TRANSFORMERS_CACHE || path$1.join(os$1.homedir(), ".cache", "huggingface", "hub");
|
|
9111
|
+
const cacheDir = path$1.join(os$1.homedir(), ".cache", "gerbil");
|
|
9576
9112
|
try {
|
|
9577
|
-
if (!fs$1.existsSync(cacheDir))
|
|
9578
|
-
|
|
9113
|
+
if (!fs$1.existsSync(cacheDir)) {
|
|
9114
|
+
console.log(chalk.gray("No native model cache found."));
|
|
9115
|
+
return;
|
|
9116
|
+
}
|
|
9579
9117
|
const models = [];
|
|
9580
9118
|
const getSize = (dir) => {
|
|
9581
9119
|
let total = 0;
|
|
@@ -9590,132 +9128,119 @@ program.command("cache").description("Show or manage model cache").option("--cle
|
|
|
9590
9128
|
} catch {}
|
|
9591
9129
|
return total;
|
|
9592
9130
|
};
|
|
9593
|
-
for (const entry of
|
|
9131
|
+
for (const entry of fs$1.readdirSync(cacheDir)) {
|
|
9594
9132
|
const entryPath = path$1.join(cacheDir, entry);
|
|
9595
9133
|
try {
|
|
9596
9134
|
const entryStat = fs$1.statSync(entryPath);
|
|
9597
9135
|
if (!entryStat.isDirectory()) continue;
|
|
9598
|
-
|
|
9599
|
-
|
|
9600
|
-
|
|
9601
|
-
|
|
9602
|
-
|
|
9603
|
-
|
|
9604
|
-
|
|
9605
|
-
path: entryPath
|
|
9606
|
-
});
|
|
9607
|
-
} else {
|
|
9608
|
-
const subEntries = fs$1.readdirSync(entryPath);
|
|
9609
|
-
for (const subEntry of subEntries) {
|
|
9610
|
-
const subPath = path$1.join(entryPath, subEntry);
|
|
9611
|
-
try {
|
|
9612
|
-
const subStat = fs$1.statSync(subPath);
|
|
9613
|
-
if (subStat.isDirectory()) {
|
|
9614
|
-
const modelName = `${entry}/${subEntry}`;
|
|
9615
|
-
const size = getSize(subPath);
|
|
9616
|
-
if (size > 0) models.push({
|
|
9617
|
-
name: modelName,
|
|
9618
|
-
size,
|
|
9619
|
-
mtime: subStat.mtime,
|
|
9620
|
-
path: subPath
|
|
9621
|
-
});
|
|
9622
|
-
}
|
|
9623
|
-
} catch {}
|
|
9624
|
-
}
|
|
9625
|
-
}
|
|
9136
|
+
const size = getSize(entryPath);
|
|
9137
|
+
if (size > 0) models.push({
|
|
9138
|
+
name: entry.replace(/_/g, "/"),
|
|
9139
|
+
size,
|
|
9140
|
+
mtime: entryStat.mtime,
|
|
9141
|
+
path: entryPath
|
|
9142
|
+
});
|
|
9626
9143
|
} catch {}
|
|
9627
9144
|
}
|
|
9628
|
-
if (models.length === 0)
|
|
9145
|
+
if (models.length === 0) {
|
|
9146
|
+
console.log(chalk.gray("No cached models."));
|
|
9147
|
+
return;
|
|
9148
|
+
}
|
|
9629
9149
|
if (opts.clean || opts.olderThan) {
|
|
9630
9150
|
const daysThreshold = opts.olderThan ? Number.parseInt(opts.olderThan, 10) : 0;
|
|
9631
9151
|
const cutoffDate = /* @__PURE__ */ new Date(Date.now() - daysThreshold * 24 * 60 * 60 * 1e3);
|
|
9632
9152
|
let removed = 0;
|
|
9633
|
-
let
|
|
9153
|
+
let removedSize = 0;
|
|
9634
9154
|
for (const model of models) if (opts.clean || model.mtime < cutoffDate) try {
|
|
9635
9155
|
fs$1.rmSync(model.path, {
|
|
9636
9156
|
recursive: true,
|
|
9637
9157
|
force: true
|
|
9638
9158
|
});
|
|
9639
9159
|
removed += 1;
|
|
9640
|
-
|
|
9160
|
+
removedSize += model.size;
|
|
9641
9161
|
} catch (_e) {}
|
|
9642
|
-
if (removed > 0) {}
|
|
9162
|
+
if (removed > 0) console.log(chalk.green(`Removed ${removed} model(s), freed ${formatSize(removedSize)}`));
|
|
9163
|
+
else console.log(chalk.gray("Nothing to remove."));
|
|
9643
9164
|
return;
|
|
9644
9165
|
}
|
|
9645
|
-
|
|
9166
|
+
console.log(chalk.cyan(`\nNative model cache (${chalk.gray(cacheDir)})\n`));
|
|
9167
|
+
let totalSize = 0;
|
|
9646
9168
|
for (const model of models.sort((a, b) => b.mtime.getTime() - a.mtime.getTime())) {
|
|
9647
|
-
formatTimeAgo(model.mtime);
|
|
9648
|
-
formatSize(model.size).padStart(8);
|
|
9649
|
-
|
|
9169
|
+
const timeAgo = formatTimeAgo(model.mtime);
|
|
9170
|
+
const sizeStr = formatSize(model.size).padStart(8);
|
|
9171
|
+
totalSize += model.size;
|
|
9172
|
+
console.log(` ${model.name.padEnd(40)} ${chalk.gray(sizeStr)} ${chalk.gray(timeAgo)}`);
|
|
9650
9173
|
}
|
|
9174
|
+
console.log(chalk.cyan(`\nTotal: ${formatSize(totalSize)}\n`));
|
|
9651
9175
|
} catch (_e) {}
|
|
9652
9176
|
});
|
|
9653
9177
|
const formatSize = formatBytes;
|
|
9654
|
-
program.command("cleanup").description("Clean up
|
|
9655
|
-
const
|
|
9656
|
-
const
|
|
9657
|
-
|
|
9658
|
-
|
|
9659
|
-
|
|
9660
|
-
|
|
9661
|
-
|
|
9662
|
-
|
|
9663
|
-
|
|
9664
|
-
}
|
|
9665
|
-
|
|
9666
|
-
|
|
9178
|
+
program.command("cleanup").description("Clean up the native model cache and free disk space").option("--all", "Remove every cached model (frees the most space)").action(async (opts) => {
|
|
9179
|
+
const fs$1 = await import("node:fs");
|
|
9180
|
+
const path$1 = await import("node:path");
|
|
9181
|
+
const os$1 = await import("node:os");
|
|
9182
|
+
const nativeCacheDir = path$1.join(os$1.homedir(), ".cache", "gerbil");
|
|
9183
|
+
console.log(chalk.cyan("\nNative model cache\n"));
|
|
9184
|
+
if (!fs$1.existsSync(nativeCacheDir)) {
|
|
9185
|
+
console.log(chalk.gray("No native model cache found."));
|
|
9186
|
+
console.log();
|
|
9187
|
+
return;
|
|
9188
|
+
}
|
|
9189
|
+
const dirSize = (dir) => {
|
|
9190
|
+
let total = 0;
|
|
9667
9191
|
try {
|
|
9668
|
-
|
|
9669
|
-
|
|
9192
|
+
for (const item of fs$1.readdirSync(dir)) {
|
|
9193
|
+
const itemPath = path$1.join(dir, item);
|
|
9194
|
+
const stat = fs$1.statSync(itemPath);
|
|
9195
|
+
total += stat.isDirectory() ? dirSize(itemPath) : stat.size;
|
|
9196
|
+
}
|
|
9197
|
+
} catch {}
|
|
9198
|
+
return total;
|
|
9199
|
+
};
|
|
9200
|
+
const entries = fs$1.readdirSync(nativeCacheDir).filter((entry) => {
|
|
9201
|
+
try {
|
|
9202
|
+
return fs$1.statSync(path$1.join(nativeCacheDir, entry)).isDirectory();
|
|
9670
9203
|
} catch {
|
|
9671
|
-
|
|
9204
|
+
return false;
|
|
9672
9205
|
}
|
|
9206
|
+
});
|
|
9207
|
+
if (entries.length === 0) {
|
|
9208
|
+
console.log(chalk.gray("No cached models."));
|
|
9673
9209
|
console.log();
|
|
9674
9210
|
return;
|
|
9675
9211
|
}
|
|
9676
|
-
|
|
9677
|
-
|
|
9678
|
-
|
|
9679
|
-
|
|
9680
|
-
|
|
9681
|
-
|
|
9682
|
-
return;
|
|
9683
|
-
}
|
|
9684
|
-
console.log(`Chrome PID: ${chalk.yellow(status.pid)}`);
|
|
9685
|
-
console.log(`Server port: ${chalk.yellow(status.port)}`);
|
|
9686
|
-
console.log(`Active pages: ${chalk.yellow(status.activePagesCount)}/${status.maxPages}`);
|
|
9687
|
-
const pageInfo = await ChromeGPUBackend.getAllChromePages();
|
|
9688
|
-
if (pageInfo.length > 0) {
|
|
9689
|
-
console.log(chalk.cyan("\nGerbil pages:"));
|
|
9690
|
-
for (let i = 0; i < pageInfo.length; i++) {
|
|
9691
|
-
const p = pageInfo[i];
|
|
9692
|
-
const memStr = p.memory ? `${p.memory.usedGB.toFixed(2)}GB` : "?";
|
|
9693
|
-
const owner = p.isOurs ? chalk.green("(this process)") : chalk.yellow("(orphan)");
|
|
9694
|
-
console.log(` [${i}] ${p.modelId || "unknown"} - ${memStr} ${owner}`);
|
|
9695
|
-
}
|
|
9212
|
+
console.log(`Location: ${chalk.gray(nativeCacheDir)}`);
|
|
9213
|
+
let totalSize = 0;
|
|
9214
|
+
for (const entry of entries) {
|
|
9215
|
+
const size = dirSize(path$1.join(nativeCacheDir, entry));
|
|
9216
|
+
totalSize += size;
|
|
9217
|
+
console.log(` ${entry.replace(/_/g, "/")} ${chalk.gray(formatSize(size).padStart(10))}`);
|
|
9696
9218
|
}
|
|
9697
|
-
|
|
9698
|
-
|
|
9699
|
-
const
|
|
9700
|
-
|
|
9701
|
-
|
|
9702
|
-
|
|
9703
|
-
|
|
9704
|
-
|
|
9705
|
-
|
|
9219
|
+
console.log(chalk.cyan(`\nTotal: ${formatSize(totalSize)}\n`));
|
|
9220
|
+
if (opts.all) {
|
|
9221
|
+
const spinner = ora("Removing all cached models...").start();
|
|
9222
|
+
try {
|
|
9223
|
+
fs$1.rmSync(nativeCacheDir, {
|
|
9224
|
+
recursive: true,
|
|
9225
|
+
force: true
|
|
9226
|
+
});
|
|
9227
|
+
spinner.succeed(`Removed ${entries.length} model(s), freed ${formatSize(totalSize)}`);
|
|
9228
|
+
} catch (e) {
|
|
9229
|
+
spinner.fail(`Failed to remove cache: ${e.message}`);
|
|
9706
9230
|
}
|
|
9707
|
-
|
|
9708
|
-
else spinner.info("No orphan pages to clean up");
|
|
9709
|
-
}
|
|
9231
|
+
} else console.log(chalk.gray("Use --all to remove every cached model."));
|
|
9710
9232
|
console.log();
|
|
9711
9233
|
});
|
|
9712
|
-
program.command("bench").description("Benchmark model performance").option("-m, --model <id>", "Model to benchmark",
|
|
9234
|
+
program.command("bench").description("Benchmark model performance").option("-m, --model <id>", "Model to benchmark", DEFAULT_MODEL).option("-n, --runs <n>", "Number of runs", "3").action(async (opts) => {
|
|
9713
9235
|
const g = new Gerbil();
|
|
9714
9236
|
const spinner = ora(`Loading ${chalk.cyan(opts.model)}...`).start();
|
|
9715
9237
|
try {
|
|
9716
|
-
await g.loadModel(opts.model, {
|
|
9717
|
-
|
|
9718
|
-
|
|
9238
|
+
await g.loadModel(opts.model, {
|
|
9239
|
+
device: "webgpu",
|
|
9240
|
+
onProgress: (p) => {
|
|
9241
|
+
spinner.text = p.progress ? `${p.status} (${p.progress}%)` : p.status;
|
|
9242
|
+
}
|
|
9243
|
+
});
|
|
9719
9244
|
spinner.succeed("Model loaded");
|
|
9720
9245
|
const runs = Number.parseInt(opts.runs, 10);
|
|
9721
9246
|
const results = [];
|
|
@@ -9743,16 +9268,17 @@ program.command("bench").description("Benchmark model performance").option("-m,
|
|
|
9743
9268
|
});
|
|
9744
9269
|
runSpinner.succeed(`Run ${i + 1}: ${chalk.cyan(tokPerSec)} tok/s, ${chalk.yellow(`${firstTokenTime}ms`)} first token`);
|
|
9745
9270
|
}
|
|
9746
|
-
Math.round(results.reduce((a, r) => a + r.tokPerSec, 0) / results.length);
|
|
9747
|
-
Math.round(results.reduce((a, r) => a + r.firstToken, 0) / results.length);
|
|
9271
|
+
const avgTokPerSec = Math.round(results.reduce((a, r) => a + r.tokPerSec, 0) / results.length);
|
|
9272
|
+
const avgFirstToken = Math.round(results.reduce((a, r) => a + r.firstToken, 0) / results.length);
|
|
9273
|
+
console.log(`\n${chalk.bold("Average")}: ${chalk.cyan(avgTokPerSec)} tok/s · ${chalk.yellow(`${avgFirstToken}ms`)} first token (${results.length} runs on ${chalk.green(g.getDeviceMode().toUpperCase())})\n`);
|
|
9748
9274
|
await g.dispose();
|
|
9749
|
-
} catch (
|
|
9750
|
-
spinner.fail(
|
|
9275
|
+
} catch (e) {
|
|
9276
|
+
spinner.fail(`Error: ${e?.message ?? e}`);
|
|
9751
9277
|
process.exit(1);
|
|
9752
9278
|
}
|
|
9753
9279
|
});
|
|
9754
9280
|
program.command("update").description("Update Gerbil to the latest version").action(async () => {
|
|
9755
|
-
const { checkForUpdate: checkForUpdate$1, installUpdate: installUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-
|
|
9281
|
+
const { checkForUpdate: checkForUpdate$1, installUpdate: installUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-BVaLXcDE.mjs");
|
|
9756
9282
|
const spinner = ora("Checking for updates...").start();
|
|
9757
9283
|
try {
|
|
9758
9284
|
const check = await checkForUpdate$1();
|
|
@@ -9780,8 +9306,9 @@ program.parse = (...args) => {
|
|
|
9780
9306
|
return result;
|
|
9781
9307
|
};
|
|
9782
9308
|
async function checkForUpdateCLI() {
|
|
9783
|
-
const { checkForUpdate: checkForUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-
|
|
9784
|
-
|
|
9309
|
+
const { checkForUpdate: checkForUpdate$1, CURRENT_VERSION: CURRENT_VERSION$1 } = await import("./auto-update-BVaLXcDE.mjs");
|
|
9310
|
+
const check = await checkForUpdate$1();
|
|
9311
|
+
if (check.updateAvailable) console.error(chalk.yellow(`\nUpdate available: v${CURRENT_VERSION$1} → v${check.latestVersion}. Run ${chalk.cyan("gerbil update")}.`));
|
|
9785
9312
|
}
|
|
9786
9313
|
program.parse();
|
|
9787
9314
|
|