@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-DsWBBnEk.mjs +3 -0
- package/dist/browser/index.d.mts +401 -5
- package/dist/browser/index.d.mts.map +1 -1
- package/dist/browser/index.mjs +1772 -146
- package/dist/browser/index.mjs.map +1 -1
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
- package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
- package/dist/cli.mjs +3359 -646
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -3
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +3 -3
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +3 -3
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +3 -3
- package/dist/gerbil-DeQlX_Mt.mjs +5 -0
- package/dist/gerbil-POAz8peb.d.mts +431 -0
- package/dist/gerbil-POAz8peb.d.mts.map +1 -0
- package/dist/gerbil-yoSpRHgv.mjs +1463 -0
- package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
- package/dist/index.d.mts +395 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +8 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +239 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +176 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +3 -3
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +6 -6
- package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
- package/dist/mcp-Bitg4sjX.mjs.map +1 -0
- package/dist/microphone-D-6y9aiE.mjs +3 -0
- package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
- package/dist/models-BAtL8qsA.mjs.map +1 -0
- package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
- package/dist/models-CE0fBq0U.d.mts.map +1 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
- package/dist/repl-D20JO260.mjs +10 -0
- package/dist/skills/index.d.mts +303 -12
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +6 -6
- package/dist/skills-5DxAV-rn.mjs +1435 -0
- package/dist/skills-5DxAV-rn.mjs.map +1 -0
- package/dist/stt-Bv_dum-R.mjs +433 -0
- package/dist/stt-Bv_dum-R.mjs.map +1 -0
- package/dist/stt-KzSoNvwI.mjs +3 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
- package/dist/tts-5yWeP_I0.mjs +3 -0
- package/dist/tts-DG6denWG.mjs +729 -0
- package/dist/tts-DG6denWG.mjs.map +1 -0
- package/dist/types-s6Py2_DL.d.mts +353 -0
- package/dist/types-s6Py2_DL.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +17 -18
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
package/dist/gerbil-BfnsFWRE.mjs
DELETED
|
@@ -1,644 +0,0 @@
|
|
|
1
|
-
import { a as resolveModel, n as createExternalModelConfig, r as getModelConfig, t as BUILTIN_MODELS } from "./models-DKULvhOr.mjs";
|
|
2
|
-
import { n as zodToJsonSchema, t as extractJson } from "./utils-7vXqtq2Q.mjs";
|
|
3
|
-
import { AutoModelForCausalLM, AutoTokenizer, env, pipeline } from "@huggingface/transformers";
|
|
4
|
-
|
|
5
|
-
//#region src/core/gerbil.ts
|
|
6
|
-
/**
|
|
7
|
-
* Gerbil - Local GPU-accelerated LLM inference
|
|
8
|
-
*/
|
|
9
|
-
const pipeline$1 = pipeline;
|
|
10
|
-
function suppressNoisyWarnings(fn) {
|
|
11
|
-
const originalWarn = console.warn;
|
|
12
|
-
console.warn = (...args) => {
|
|
13
|
-
const msg = args[0]?.toString?.() || "";
|
|
14
|
-
if (msg.includes("content-length") || msg.includes("Unable to determine")) return;
|
|
15
|
-
originalWarn.apply(console, args);
|
|
16
|
-
};
|
|
17
|
-
return fn().finally(() => {
|
|
18
|
-
console.warn = originalWarn;
|
|
19
|
-
});
|
|
20
|
-
}
|
|
21
|
-
const isBrowser = typeof window !== "undefined";
|
|
22
|
-
env.allowLocalModels = !isBrowser;
|
|
23
|
-
env.useBrowserCache = isBrowser;
|
|
24
|
-
let webgpuInitialized = false;
|
|
25
|
-
let webgpuAvailable = false;
|
|
26
|
-
/**
|
|
27
|
-
* Initialize WebGPU for Node.js environments
|
|
28
|
-
* Called automatically before model loading
|
|
29
|
-
*/
|
|
30
|
-
async function initNodeWebGPU() {
|
|
31
|
-
if (webgpuInitialized) return webgpuAvailable;
|
|
32
|
-
webgpuInitialized = true;
|
|
33
|
-
if (typeof window !== "undefined") {
|
|
34
|
-
webgpuAvailable = "gpu" in navigator;
|
|
35
|
-
return webgpuAvailable;
|
|
36
|
-
}
|
|
37
|
-
try {
|
|
38
|
-
const { create, globals } = await new Function("specifier", "return import(specifier)")("webgpu");
|
|
39
|
-
Object.assign(globalThis, globals);
|
|
40
|
-
if (!globalThis.navigator) globalThis.navigator = {};
|
|
41
|
-
globalThis.navigator.gpu = create([]);
|
|
42
|
-
webgpuAvailable = true;
|
|
43
|
-
} catch {
|
|
44
|
-
webgpuAvailable = false;
|
|
45
|
-
}
|
|
46
|
-
return webgpuAvailable;
|
|
47
|
-
}
|
|
48
|
-
var Gerbil = class {
|
|
49
|
-
generator = null;
|
|
50
|
-
tokenizer = null;
|
|
51
|
-
model = null;
|
|
52
|
-
embedder = null;
|
|
53
|
-
currentModel = null;
|
|
54
|
-
modelConfig = null;
|
|
55
|
-
config;
|
|
56
|
-
stats;
|
|
57
|
-
useDirect = false;
|
|
58
|
-
chromeBackend = null;
|
|
59
|
-
_deviceMode = "cpu";
|
|
60
|
-
constructor(config = {}) {
|
|
61
|
-
this.config = config;
|
|
62
|
-
this.stats = {
|
|
63
|
-
prompts: 0,
|
|
64
|
-
tokensIn: 0,
|
|
65
|
-
tokensOut: 0,
|
|
66
|
-
avgSpeed: 0,
|
|
67
|
-
totalTime: 0,
|
|
68
|
-
cacheHits: 0,
|
|
69
|
-
cacheMisses: 0
|
|
70
|
-
};
|
|
71
|
-
}
|
|
72
|
-
static listModels() {
|
|
73
|
-
return Object.values(BUILTIN_MODELS);
|
|
74
|
-
}
|
|
75
|
-
static getModel(modelId) {
|
|
76
|
-
return BUILTIN_MODELS[modelId];
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* Load a model
|
|
80
|
-
*
|
|
81
|
-
* @example
|
|
82
|
-
* ```ts
|
|
83
|
-
* // Built-in model
|
|
84
|
-
* await g.loadModel("qwen3-0.6b");
|
|
85
|
-
*
|
|
86
|
-
* // HuggingFace model
|
|
87
|
-
* await g.loadModel("hf:microsoft/Phi-3-mini");
|
|
88
|
-
*
|
|
89
|
-
* // Local model
|
|
90
|
-
* await g.loadModel("file:./models/my-model");
|
|
91
|
-
* ```
|
|
92
|
-
*/
|
|
93
|
-
async loadModel(modelId = "qwen3-0.6b", options = {}) {
|
|
94
|
-
await initNodeWebGPU();
|
|
95
|
-
const source = resolveModel(modelId);
|
|
96
|
-
const { onProgress, device = "auto", dtype: userDtype } = options;
|
|
97
|
-
let config = getModelConfig(modelId);
|
|
98
|
-
if (!config) config = createExternalModelConfig(modelId, source.path);
|
|
99
|
-
onProgress?.({ status: `Loading ${modelId}...` });
|
|
100
|
-
const isBrowser$1 = typeof window !== "undefined";
|
|
101
|
-
const fallbackDevice = isBrowser$1 ? "wasm" : "cpu";
|
|
102
|
-
let tfDevice = fallbackDevice;
|
|
103
|
-
if (device === "webgpu" || device === "gpu" || device === "auto") tfDevice = "webgpu";
|
|
104
|
-
const dtype = userDtype ?? (tfDevice === "webgpu" ? "q4f16" : "q4");
|
|
105
|
-
let isLoading = true;
|
|
106
|
-
let lastFile = "";
|
|
107
|
-
let lastPct = -1;
|
|
108
|
-
const progressCallback = (progress) => {
|
|
109
|
-
if (!isLoading) return;
|
|
110
|
-
if (progress.status === "progress" && progress.file) {
|
|
111
|
-
const pct = Math.round(progress.progress || 0);
|
|
112
|
-
if (progress.file !== lastFile || pct >= lastPct + 5) {
|
|
113
|
-
lastFile = progress.file;
|
|
114
|
-
lastPct = pct;
|
|
115
|
-
onProgress?.({
|
|
116
|
-
status: `Downloading ${progress.file}`,
|
|
117
|
-
progress: pct,
|
|
118
|
-
file: progress.file
|
|
119
|
-
});
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
};
|
|
123
|
-
try {
|
|
124
|
-
if (isBrowser$1 && tfDevice === "webgpu") {
|
|
125
|
-
onProgress?.({ status: "Loading tokenizer..." });
|
|
126
|
-
this.tokenizer = await suppressNoisyWarnings(() => AutoTokenizer.from_pretrained(source.path, { progress_callback: progressCallback }));
|
|
127
|
-
onProgress?.({ status: "Loading model..." });
|
|
128
|
-
this.model = await suppressNoisyWarnings(() => AutoModelForCausalLM.from_pretrained(source.path, {
|
|
129
|
-
dtype,
|
|
130
|
-
device: tfDevice,
|
|
131
|
-
progress_callback: progressCallback
|
|
132
|
-
}));
|
|
133
|
-
this.useDirect = true;
|
|
134
|
-
this._deviceMode = "webgpu";
|
|
135
|
-
isLoading = false;
|
|
136
|
-
this.currentModel = modelId;
|
|
137
|
-
this.modelConfig = config;
|
|
138
|
-
onProgress?.({ status: "Ready (WebGPU)!" });
|
|
139
|
-
} else if (!isBrowser$1 && tfDevice === "webgpu") {
|
|
140
|
-
onProgress?.({ status: "Starting Chrome WebGPU backend..." });
|
|
141
|
-
const { ChromeGPUBackend } = await import("./chrome-backend-CtwPENIW.mjs");
|
|
142
|
-
this.chromeBackend = await ChromeGPUBackend.create({
|
|
143
|
-
modelId: source.path,
|
|
144
|
-
onProgress
|
|
145
|
-
});
|
|
146
|
-
this.useDirect = false;
|
|
147
|
-
this._deviceMode = "webgpu";
|
|
148
|
-
isLoading = false;
|
|
149
|
-
this.currentModel = modelId;
|
|
150
|
-
this.modelConfig = config;
|
|
151
|
-
} else {
|
|
152
|
-
const pipelineOptions = {
|
|
153
|
-
dtype,
|
|
154
|
-
device: tfDevice,
|
|
155
|
-
progress_callback: progressCallback
|
|
156
|
-
};
|
|
157
|
-
this.generator = await suppressNoisyWarnings(() => pipeline$1("text-generation", source.path, pipelineOptions));
|
|
158
|
-
this.useDirect = false;
|
|
159
|
-
this._deviceMode = tfDevice;
|
|
160
|
-
isLoading = false;
|
|
161
|
-
this.currentModel = modelId;
|
|
162
|
-
this.modelConfig = config;
|
|
163
|
-
onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });
|
|
164
|
-
}
|
|
165
|
-
} catch (err) {
|
|
166
|
-
if (tfDevice !== fallbackDevice) {
|
|
167
|
-
onProgress?.({ status: `Using ${fallbackDevice.toUpperCase()}...` });
|
|
168
|
-
if (this.chromeBackend) {
|
|
169
|
-
await this.chromeBackend.dispose();
|
|
170
|
-
this.chromeBackend = null;
|
|
171
|
-
}
|
|
172
|
-
this.generator = await suppressNoisyWarnings(() => pipeline$1("text-generation", source.path, {
|
|
173
|
-
dtype: "q4",
|
|
174
|
-
device: fallbackDevice,
|
|
175
|
-
progress_callback: progressCallback
|
|
176
|
-
}));
|
|
177
|
-
this.useDirect = false;
|
|
178
|
-
this._deviceMode = fallbackDevice;
|
|
179
|
-
isLoading = false;
|
|
180
|
-
this.currentModel = modelId;
|
|
181
|
-
this.modelConfig = config;
|
|
182
|
-
onProgress?.({ status: `Ready (${fallbackDevice.toUpperCase()})!` });
|
|
183
|
-
} else throw err;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Check if a model is loaded
|
|
188
|
-
*/
|
|
189
|
-
isLoaded() {
|
|
190
|
-
return this.generator !== null || this.useDirect && this.model !== null || this.chromeBackend !== null;
|
|
191
|
-
}
|
|
192
|
-
/**
|
|
193
|
-
* Get current model info
|
|
194
|
-
*/
|
|
195
|
-
getModelInfo() {
|
|
196
|
-
return this.modelConfig;
|
|
197
|
-
}
|
|
198
|
-
/**
|
|
199
|
-
* Get current device mode (webgpu, cpu, or wasm)
|
|
200
|
-
*/
|
|
201
|
-
getDeviceMode() {
|
|
202
|
-
return this._deviceMode;
|
|
203
|
-
}
|
|
204
|
-
/**
|
|
205
|
-
* Get dtype used for current model
|
|
206
|
-
*/
|
|
207
|
-
getDtype() {
|
|
208
|
-
return this._deviceMode === "webgpu" ? "q4f16" : "q4";
|
|
209
|
-
}
|
|
210
|
-
/**
|
|
211
|
-
* Get Chrome backend status (if using WebGPU via Chrome)
|
|
212
|
-
*/
|
|
213
|
-
getChromeStatus() {
|
|
214
|
-
if (!this.chromeBackend) return null;
|
|
215
|
-
return this.chromeBackend.getStatus();
|
|
216
|
-
}
|
|
217
|
-
/**
|
|
218
|
-
* Get Chrome memory usage (if using WebGPU via Chrome)
|
|
219
|
-
* Returns JS heap memory in bytes
|
|
220
|
-
*/
|
|
221
|
-
async getChromeMemory() {
|
|
222
|
-
if (!this.chromeBackend) return null;
|
|
223
|
-
return this.chromeBackend.getMemoryUsage();
|
|
224
|
-
}
|
|
225
|
-
/**
|
|
226
|
-
* Get memory usage in GB (if using WebGPU via Chrome)
|
|
227
|
-
*/
|
|
228
|
-
async getMemoryUsage() {
|
|
229
|
-
if (!this.chromeBackend) return null;
|
|
230
|
-
return this.chromeBackend.getMemoryStats();
|
|
231
|
-
}
|
|
232
|
-
/**
|
|
233
|
-
* Clear KV cache to free memory
|
|
234
|
-
* This will reset the conversation context but free up memory
|
|
235
|
-
*/
|
|
236
|
-
async clearCache() {
|
|
237
|
-
if (this.chromeBackend) await this.chromeBackend.reset();
|
|
238
|
-
}
|
|
239
|
-
/**
|
|
240
|
-
* Check memory usage and cleanup if needed
|
|
241
|
-
* @param thresholdGB Memory threshold in GB (default: 8)
|
|
242
|
-
* @returns true if cleanup was performed
|
|
243
|
-
*/
|
|
244
|
-
async checkMemoryAndCleanup(thresholdGB = 8) {
|
|
245
|
-
if (!this.chromeBackend) return false;
|
|
246
|
-
return this.chromeBackend.checkMemoryAndCleanup(thresholdGB);
|
|
247
|
-
}
|
|
248
|
-
/**
|
|
249
|
-
* Generate text
|
|
250
|
-
*/
|
|
251
|
-
async generate(prompt, options = {}) {
|
|
252
|
-
if (!this.isLoaded()) await this.loadModel(this.config.model || "qwen3-0.6b");
|
|
253
|
-
const { maxTokens = 256, temperature = .7, topP = .9, topK = 50, thinking = false, system } = options;
|
|
254
|
-
const startTime = performance.now();
|
|
255
|
-
try {
|
|
256
|
-
let rawText = "";
|
|
257
|
-
if (this.chromeBackend) try {
|
|
258
|
-
rawText = await this.chromeBackend.generate(prompt, {
|
|
259
|
-
maxTokens,
|
|
260
|
-
temperature,
|
|
261
|
-
topP,
|
|
262
|
-
topK,
|
|
263
|
-
thinking,
|
|
264
|
-
system,
|
|
265
|
-
onToken: options.onToken ? (t) => options.onToken(t.text) : void 0
|
|
266
|
-
});
|
|
267
|
-
} catch (chromeErr) {
|
|
268
|
-
if (chromeErr?.message === "CHROME_BACKEND_DEAD" || !this.chromeBackend?.isAlive()) {
|
|
269
|
-
await this.chromeBackend?.dispose().catch(() => {});
|
|
270
|
-
this.chromeBackend = null;
|
|
271
|
-
this._deviceMode = "cpu";
|
|
272
|
-
this.generator = await pipeline$1("text-generation", this.currentModel || "qwen3-0.6b", {
|
|
273
|
-
dtype: "q4",
|
|
274
|
-
device: "cpu"
|
|
275
|
-
});
|
|
276
|
-
return this.generate(prompt, options);
|
|
277
|
-
}
|
|
278
|
-
throw chromeErr;
|
|
279
|
-
}
|
|
280
|
-
else if (this.useDirect && this.model && this.tokenizer) {
|
|
281
|
-
const messages = this.buildMessages(prompt, {
|
|
282
|
-
...options,
|
|
283
|
-
thinking
|
|
284
|
-
});
|
|
285
|
-
const inputs = this.tokenizer.apply_chat_template(messages, {
|
|
286
|
-
add_generation_prompt: true,
|
|
287
|
-
return_dict: true,
|
|
288
|
-
enable_thinking: thinking
|
|
289
|
-
});
|
|
290
|
-
const output = await this.model.generate({
|
|
291
|
-
...inputs,
|
|
292
|
-
max_new_tokens: maxTokens,
|
|
293
|
-
temperature: temperature > 0 ? temperature : void 0,
|
|
294
|
-
top_p: topP,
|
|
295
|
-
top_k: topK,
|
|
296
|
-
do_sample: temperature > 0
|
|
297
|
-
});
|
|
298
|
-
const inputLength = inputs.input_ids.dims?.[1] || inputs.input_ids.data?.length || 0;
|
|
299
|
-
const outputTokens = output.slice(null, [inputLength, null]);
|
|
300
|
-
rawText = this.tokenizer.batch_decode(outputTokens, { skip_special_tokens: true })[0] || "";
|
|
301
|
-
if (rawText.toLowerCase().includes("assistant")) {
|
|
302
|
-
const match = rawText.match(/assistant[:\s]*([\s\S]*)/i);
|
|
303
|
-
if (match) rawText = match[1].trim();
|
|
304
|
-
}
|
|
305
|
-
} else if (this.generator) {
|
|
306
|
-
const formattedPrompt = this.formatPrompt(prompt, {
|
|
307
|
-
...options,
|
|
308
|
-
thinking
|
|
309
|
-
});
|
|
310
|
-
const output = await this.generator(formattedPrompt, {
|
|
311
|
-
max_new_tokens: maxTokens,
|
|
312
|
-
temperature,
|
|
313
|
-
top_p: topP,
|
|
314
|
-
top_k: topK,
|
|
315
|
-
do_sample: temperature > 0,
|
|
316
|
-
return_full_text: false
|
|
317
|
-
});
|
|
318
|
-
if (Array.isArray(output) && output[0]) {
|
|
319
|
-
const result = output[0];
|
|
320
|
-
if (Array.isArray(result.generated_text)) rawText = result.generated_text.at(-1)?.content || "";
|
|
321
|
-
else rawText = result.generated_text || "";
|
|
322
|
-
}
|
|
323
|
-
} else throw new Error("No model loaded");
|
|
324
|
-
const totalTime = performance.now() - startTime;
|
|
325
|
-
rawText = this.cleanOutput(rawText);
|
|
326
|
-
const { thinking: thinkingText, response } = this.parseThinking(rawText);
|
|
327
|
-
const finalThinking = thinking ? thinkingText : void 0;
|
|
328
|
-
const tokensGenerated = Math.ceil(response.length / 4);
|
|
329
|
-
this.stats.prompts++;
|
|
330
|
-
this.stats.tokensOut += tokensGenerated;
|
|
331
|
-
this.stats.totalTime += totalTime;
|
|
332
|
-
this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
|
|
333
|
-
return {
|
|
334
|
-
text: response,
|
|
335
|
-
thinking: finalThinking,
|
|
336
|
-
tokensGenerated,
|
|
337
|
-
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
338
|
-
totalTime,
|
|
339
|
-
finishReason: "stop",
|
|
340
|
-
provider: "local",
|
|
341
|
-
cached: false
|
|
342
|
-
};
|
|
343
|
-
} catch (_error) {
|
|
344
|
-
return {
|
|
345
|
-
text: "",
|
|
346
|
-
tokensGenerated: 0,
|
|
347
|
-
tokensPerSecond: 0,
|
|
348
|
-
totalTime: performance.now() - startTime,
|
|
349
|
-
finishReason: "error",
|
|
350
|
-
provider: "local",
|
|
351
|
-
cached: false
|
|
352
|
-
};
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
/**
|
|
356
|
-
* Stream text generation (simulated token-by-token)
|
|
357
|
-
*
|
|
358
|
-
* Note: Yields the raw output including <think> tags if thinking mode is enabled.
|
|
359
|
-
* The final result has parsed thinking separated out.
|
|
360
|
-
*/
|
|
361
|
-
async *stream(prompt, options = {}) {
|
|
362
|
-
if (!this.isLoaded()) await this.loadModel(this.config.model || "qwen3-0.6b");
|
|
363
|
-
const startTime = performance.now();
|
|
364
|
-
if (this.chromeBackend) {
|
|
365
|
-
let fullText = "";
|
|
366
|
-
const tokenQueue = [];
|
|
367
|
-
let resolveNext = null;
|
|
368
|
-
let done = false;
|
|
369
|
-
const generatePromise = this.chromeBackend.generate(prompt, {
|
|
370
|
-
...options,
|
|
371
|
-
onToken: (token) => {
|
|
372
|
-
fullText += token.text;
|
|
373
|
-
if (resolveNext) {
|
|
374
|
-
resolveNext(token.text);
|
|
375
|
-
resolveNext = null;
|
|
376
|
-
} else tokenQueue.push(token.text);
|
|
377
|
-
}
|
|
378
|
-
}).then(() => {
|
|
379
|
-
done = true;
|
|
380
|
-
if (resolveNext) resolveNext(null);
|
|
381
|
-
}).catch((err) => {
|
|
382
|
-
done = true;
|
|
383
|
-
if (resolveNext) resolveNext(null);
|
|
384
|
-
throw err;
|
|
385
|
-
});
|
|
386
|
-
while (!done || tokenQueue.length > 0) if (tokenQueue.length > 0) {
|
|
387
|
-
const token = tokenQueue.shift();
|
|
388
|
-
yield token;
|
|
389
|
-
options.onToken?.(token);
|
|
390
|
-
} else if (!done) {
|
|
391
|
-
const token = await new Promise((resolve) => {
|
|
392
|
-
resolveNext = resolve;
|
|
393
|
-
});
|
|
394
|
-
if (token) {
|
|
395
|
-
yield token;
|
|
396
|
-
options.onToken?.(token);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
await generatePromise;
|
|
400
|
-
const { thinking: thinkingText, response } = this.parseThinking(fullText);
|
|
401
|
-
const tokensGenerated = Math.ceil(response.length / 4);
|
|
402
|
-
const totalTime = performance.now() - startTime;
|
|
403
|
-
return {
|
|
404
|
-
text: response,
|
|
405
|
-
thinking: options.thinking ? thinkingText : void 0,
|
|
406
|
-
tokensGenerated,
|
|
407
|
-
totalTime,
|
|
408
|
-
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
409
|
-
finishReason: "stop"
|
|
410
|
-
};
|
|
411
|
-
}
|
|
412
|
-
const result = await this.generateRaw(prompt, options);
|
|
413
|
-
const words = result.rawText.split(/(\s+)/);
|
|
414
|
-
for (const word of words) if (word) {
|
|
415
|
-
yield word;
|
|
416
|
-
options.onToken?.(word);
|
|
417
|
-
}
|
|
418
|
-
return result.result;
|
|
419
|
-
}
|
|
420
|
-
/**
|
|
421
|
-
* Internal: Generate with raw text access for streaming
|
|
422
|
-
*/
|
|
423
|
-
async generateRaw(prompt, options = {}) {
|
|
424
|
-
const { maxTokens = 256, temperature = .7, topP = .9, topK = 50, thinking = false } = options;
|
|
425
|
-
const startTime = performance.now();
|
|
426
|
-
const formattedPrompt = this.formatPrompt(prompt, {
|
|
427
|
-
...options,
|
|
428
|
-
thinking
|
|
429
|
-
});
|
|
430
|
-
try {
|
|
431
|
-
const output = await this.generator(formattedPrompt, {
|
|
432
|
-
max_new_tokens: maxTokens,
|
|
433
|
-
temperature,
|
|
434
|
-
top_p: topP,
|
|
435
|
-
top_k: topK,
|
|
436
|
-
do_sample: temperature > 0,
|
|
437
|
-
return_full_text: false
|
|
438
|
-
});
|
|
439
|
-
const totalTime = performance.now() - startTime;
|
|
440
|
-
let rawText = "";
|
|
441
|
-
if (Array.isArray(output) && output[0]) {
|
|
442
|
-
const result = output[0];
|
|
443
|
-
if (Array.isArray(result.generated_text)) rawText = result.generated_text.at(-1)?.content || "";
|
|
444
|
-
else rawText = result.generated_text || "";
|
|
445
|
-
}
|
|
446
|
-
rawText = this.cleanOutput(rawText);
|
|
447
|
-
const { thinking: thinkingText, response } = this.parseThinking(rawText);
|
|
448
|
-
const finalThinking = thinking ? thinkingText : void 0;
|
|
449
|
-
const tokensGenerated = Math.ceil(response.length / 4);
|
|
450
|
-
this.stats.prompts++;
|
|
451
|
-
this.stats.tokensOut += tokensGenerated;
|
|
452
|
-
this.stats.totalTime += totalTime;
|
|
453
|
-
this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
|
|
454
|
-
return {
|
|
455
|
-
rawText,
|
|
456
|
-
result: {
|
|
457
|
-
text: response,
|
|
458
|
-
thinking: finalThinking,
|
|
459
|
-
tokensGenerated,
|
|
460
|
-
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
461
|
-
totalTime,
|
|
462
|
-
finishReason: "stop",
|
|
463
|
-
provider: "local",
|
|
464
|
-
cached: false
|
|
465
|
-
}
|
|
466
|
-
};
|
|
467
|
-
} catch (_error) {
|
|
468
|
-
return {
|
|
469
|
-
rawText: "",
|
|
470
|
-
result: {
|
|
471
|
-
text: "",
|
|
472
|
-
tokensGenerated: 0,
|
|
473
|
-
tokensPerSecond: 0,
|
|
474
|
-
totalTime: performance.now() - startTime,
|
|
475
|
-
finishReason: "error",
|
|
476
|
-
provider: "local",
|
|
477
|
-
cached: false
|
|
478
|
-
}
|
|
479
|
-
};
|
|
480
|
-
}
|
|
481
|
-
}
|
|
482
|
-
/**
|
|
483
|
-
* Generate structured JSON output
|
|
484
|
-
*/
|
|
485
|
-
async json(prompt, options) {
|
|
486
|
-
const { schema, retries = 3, temperature = .3 } = options;
|
|
487
|
-
const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.
|
|
488
|
-
No explanations, no markdown, no code blocks. Just pure JSON.
|
|
489
|
-
The JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;
|
|
490
|
-
for (let attempt = 0; attempt < retries; attempt++) {
|
|
491
|
-
const result = await this.generate(prompt, {
|
|
492
|
-
system: options.system || systemPrompt,
|
|
493
|
-
temperature,
|
|
494
|
-
maxTokens: 1e3
|
|
495
|
-
});
|
|
496
|
-
try {
|
|
497
|
-
const jsonStr = extractJson(result.text);
|
|
498
|
-
const parsed = JSON.parse(jsonStr);
|
|
499
|
-
return schema.parse(parsed);
|
|
500
|
-
} catch (error) {
|
|
501
|
-
if (attempt === retries - 1) throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);
|
|
502
|
-
}
|
|
503
|
-
}
|
|
504
|
-
throw new Error("Failed to generate valid JSON");
|
|
505
|
-
}
|
|
506
|
-
/**
|
|
507
|
-
* Generate embeddings
|
|
508
|
-
*/
|
|
509
|
-
async embed(text, options = {}) {
|
|
510
|
-
if (!this.embedder) this.embedder = await pipeline$1("feature-extraction", options.model || "Xenova/all-MiniLM-L6-v2");
|
|
511
|
-
const startTime = performance.now();
|
|
512
|
-
const output = await this.embedder(text, {
|
|
513
|
-
pooling: "mean",
|
|
514
|
-
normalize: options.normalize !== false
|
|
515
|
-
});
|
|
516
|
-
return {
|
|
517
|
-
vector: Array.from(output.data),
|
|
518
|
-
text,
|
|
519
|
-
totalTime: performance.now() - startTime
|
|
520
|
-
};
|
|
521
|
-
}
|
|
522
|
-
/**
|
|
523
|
-
* Generate embeddings for multiple texts
|
|
524
|
-
*/
|
|
525
|
-
async embedBatch(texts, options = {}) {
|
|
526
|
-
const results = [];
|
|
527
|
-
for (const text of texts) results.push(await this.embed(text, options));
|
|
528
|
-
return results;
|
|
529
|
-
}
|
|
530
|
-
/**
|
|
531
|
-
* Get session stats
|
|
532
|
-
*/
|
|
533
|
-
getStats() {
|
|
534
|
-
return { ...this.stats };
|
|
535
|
-
}
|
|
536
|
-
/**
|
|
537
|
-
* Get system info
|
|
538
|
-
*/
|
|
539
|
-
getInfo() {
|
|
540
|
-
return {
|
|
541
|
-
version: "1.0.0",
|
|
542
|
-
model: this.modelConfig,
|
|
543
|
-
device: {
|
|
544
|
-
backend: "transformers.js",
|
|
545
|
-
gpu: null,
|
|
546
|
-
vram: null,
|
|
547
|
-
status: this.isLoaded() ? "ready" : "loading"
|
|
548
|
-
},
|
|
549
|
-
context: {
|
|
550
|
-
max: this.modelConfig?.contextLength || 0,
|
|
551
|
-
used: 0,
|
|
552
|
-
available: this.modelConfig?.contextLength || 0
|
|
553
|
-
},
|
|
554
|
-
cache: {
|
|
555
|
-
location: "~/.gerbil/models",
|
|
556
|
-
size: "0 MB",
|
|
557
|
-
modelCount: 0
|
|
558
|
-
}
|
|
559
|
-
};
|
|
560
|
-
}
|
|
561
|
-
/**
|
|
562
|
-
* Reset stats
|
|
563
|
-
*/
|
|
564
|
-
resetStats() {
|
|
565
|
-
this.stats = {
|
|
566
|
-
prompts: 0,
|
|
567
|
-
tokensIn: 0,
|
|
568
|
-
tokensOut: 0,
|
|
569
|
-
avgSpeed: 0,
|
|
570
|
-
totalTime: 0,
|
|
571
|
-
cacheHits: 0,
|
|
572
|
-
cacheMisses: 0
|
|
573
|
-
};
|
|
574
|
-
}
|
|
575
|
-
/**
|
|
576
|
-
* Dispose of resources
|
|
577
|
-
*/
|
|
578
|
-
async dispose() {
|
|
579
|
-
if (this.chromeBackend) {
|
|
580
|
-
try {
|
|
581
|
-
await this.chromeBackend.dispose();
|
|
582
|
-
} catch {}
|
|
583
|
-
this.chromeBackend = null;
|
|
584
|
-
}
|
|
585
|
-
if (this.generator) {
|
|
586
|
-
if (typeof this.generator.dispose === "function") try {
|
|
587
|
-
await this.generator.dispose();
|
|
588
|
-
} catch {}
|
|
589
|
-
this.generator = null;
|
|
590
|
-
}
|
|
591
|
-
if (this.embedder) {
|
|
592
|
-
if (typeof this.embedder.dispose === "function") try {
|
|
593
|
-
await this.embedder.dispose();
|
|
594
|
-
} catch {}
|
|
595
|
-
this.embedder = null;
|
|
596
|
-
}
|
|
597
|
-
this.currentModel = null;
|
|
598
|
-
this.modelConfig = null;
|
|
599
|
-
}
|
|
600
|
-
formatPrompt(prompt, options) {
|
|
601
|
-
const system = options.system || "You are a helpful assistant.";
|
|
602
|
-
const isQwen = this.currentModel?.includes("qwen");
|
|
603
|
-
if (options.thinking && this.modelConfig?.supportsThinking) return `<|im_start|>system\n${`${system}\n\nThink step-by-step before answering. Wrap your reasoning in <think></think> tags, then provide your answer.`}<|im_end|>\n<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
|
604
|
-
if (isQwen) return `<|im_start|>system\n${system}<|im_end|>\n<|im_start|>user\n${prompt} /no_think<|im_end|>\n<|im_start|>assistant\n`;
|
|
605
|
-
return `<|im_start|>system\n${system}<|im_end|>\n<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
|
606
|
-
}
|
|
607
|
-
buildMessages(prompt, options) {
|
|
608
|
-
const system = options.system || "You are a helpful assistant.";
|
|
609
|
-
const messages = [];
|
|
610
|
-
messages.push({
|
|
611
|
-
role: "system",
|
|
612
|
-
content: system
|
|
613
|
-
});
|
|
614
|
-
messages.push({
|
|
615
|
-
role: "user",
|
|
616
|
-
content: prompt
|
|
617
|
-
});
|
|
618
|
-
return messages;
|
|
619
|
-
}
|
|
620
|
-
parseThinking(text) {
|
|
621
|
-
const match = text.match(/<think>([\s\S]*?)<\/think>/);
|
|
622
|
-
if (match) return {
|
|
623
|
-
thinking: match[1].trim(),
|
|
624
|
-
response: text.replace(/<think>[\s\S]*?<\/think>/, "").trim()
|
|
625
|
-
};
|
|
626
|
-
const unclosedMatch = text.match(/<think>([\s\S]*)$/);
|
|
627
|
-
if (unclosedMatch) {
|
|
628
|
-
const thinking = unclosedMatch[1].trim();
|
|
629
|
-
const response = text.replace(/<think>[\s\S]*$/, "").trim();
|
|
630
|
-
return {
|
|
631
|
-
thinking: thinking || void 0,
|
|
632
|
-
response
|
|
633
|
-
};
|
|
634
|
-
}
|
|
635
|
-
return { response: text.replace(/<\/?think>/g, "").trim() };
|
|
636
|
-
}
|
|
637
|
-
cleanOutput(text) {
|
|
638
|
-
return text.replace(/<\|im_end\|>/g, "").replace(/<\|im_start\|>/g, "").replace(/<\|endoftext\|>/g, "").replace(/<\/s>/g, "").replace(/^\/no_think\s*/i, "").replace(/^assistant\s*/i, "").replace(/^\s*\/no_think\s*/gim, "").replace(/^\s*assistant\s*/gim, "").replace(/^(system|user|assistant):\s*/gim, "").trim();
|
|
639
|
-
}
|
|
640
|
-
};
|
|
641
|
-
|
|
642
|
-
//#endregion
|
|
643
|
-
export { Gerbil as t };
|
|
644
|
-
//# sourceMappingURL=gerbil-BfnsFWRE.mjs.map
|