@tryhamster/gerbil 1.0.0-rc.0 → 1.0.0-rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -14
- package/dist/auto-update-DsWBBnEk.mjs +3 -0
- package/dist/browser/index.d.mts +401 -5
- package/dist/browser/index.d.mts.map +1 -1
- package/dist/browser/index.mjs +1772 -146
- package/dist/browser/index.mjs.map +1 -1
- package/dist/{chrome-backend-CtwPENIW.mjs → chrome-backend-JEPeM2YE.mjs} +1 -1
- package/dist/{chrome-backend-C5Un08O4.mjs → chrome-backend-Y9F7W5VQ.mjs} +514 -73
- package/dist/chrome-backend-Y9F7W5VQ.mjs.map +1 -0
- package/dist/cli.mjs +3359 -646
- package/dist/cli.mjs.map +1 -1
- package/dist/frameworks/express.d.mts +1 -1
- package/dist/frameworks/express.mjs +3 -3
- package/dist/frameworks/fastify.d.mts +1 -1
- package/dist/frameworks/fastify.mjs +3 -3
- package/dist/frameworks/hono.d.mts +1 -1
- package/dist/frameworks/hono.mjs +3 -3
- package/dist/frameworks/next.d.mts +2 -2
- package/dist/frameworks/next.mjs +3 -3
- package/dist/frameworks/react.d.mts +1 -1
- package/dist/frameworks/trpc.d.mts +1 -1
- package/dist/frameworks/trpc.mjs +3 -3
- package/dist/gerbil-DeQlX_Mt.mjs +5 -0
- package/dist/gerbil-POAz8peb.d.mts +431 -0
- package/dist/gerbil-POAz8peb.d.mts.map +1 -0
- package/dist/gerbil-yoSpRHgv.mjs +1463 -0
- package/dist/gerbil-yoSpRHgv.mjs.map +1 -0
- package/dist/index.d.mts +395 -9
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +8 -6
- package/dist/index.mjs.map +1 -1
- package/dist/integrations/ai-sdk.d.mts +122 -4
- package/dist/integrations/ai-sdk.d.mts.map +1 -1
- package/dist/integrations/ai-sdk.mjs +239 -11
- package/dist/integrations/ai-sdk.mjs.map +1 -1
- package/dist/integrations/langchain.d.mts +132 -2
- package/dist/integrations/langchain.d.mts.map +1 -1
- package/dist/integrations/langchain.mjs +176 -8
- package/dist/integrations/langchain.mjs.map +1 -1
- package/dist/integrations/llamaindex.d.mts +1 -1
- package/dist/integrations/llamaindex.mjs +3 -3
- package/dist/integrations/mcp-client.mjs +4 -4
- package/dist/integrations/mcp-client.mjs.map +1 -1
- package/dist/integrations/mcp.d.mts +2 -2
- package/dist/integrations/mcp.d.mts.map +1 -1
- package/dist/integrations/mcp.mjs +6 -6
- package/dist/{mcp-R8kRLIKb.mjs → mcp-Bitg4sjX.mjs} +10 -37
- package/dist/mcp-Bitg4sjX.mjs.map +1 -0
- package/dist/microphone-D-6y9aiE.mjs +3 -0
- package/dist/{models-DKULvhOr.mjs → models-BAtL8qsA.mjs} +42 -7
- package/dist/models-BAtL8qsA.mjs.map +1 -0
- package/dist/{models-De2-_GmQ.d.mts → models-CE0fBq0U.d.mts} +2 -2
- package/dist/models-CE0fBq0U.d.mts.map +1 -0
- package/dist/{one-liner-BUQR0nqq.mjs → one-liner-B1rmFto6.mjs} +2 -2
- package/dist/{one-liner-BUQR0nqq.mjs.map → one-liner-B1rmFto6.mjs.map} +1 -1
- package/dist/repl-D20JO260.mjs +10 -0
- package/dist/skills/index.d.mts +303 -12
- package/dist/skills/index.d.mts.map +1 -1
- package/dist/skills/index.mjs +6 -6
- package/dist/skills-5DxAV-rn.mjs +1435 -0
- package/dist/skills-5DxAV-rn.mjs.map +1 -0
- package/dist/stt-Bv_dum-R.mjs +433 -0
- package/dist/stt-Bv_dum-R.mjs.map +1 -0
- package/dist/stt-KzSoNvwI.mjs +3 -0
- package/dist/{tools-BsiEE6f2.mjs → tools-IYPrqoek.mjs} +6 -7
- package/dist/{tools-BsiEE6f2.mjs.map → tools-IYPrqoek.mjs.map} +1 -1
- package/dist/tts-5yWeP_I0.mjs +3 -0
- package/dist/tts-DG6denWG.mjs +729 -0
- package/dist/tts-DG6denWG.mjs.map +1 -0
- package/dist/types-s6Py2_DL.d.mts +353 -0
- package/dist/types-s6Py2_DL.d.mts.map +1 -0
- package/dist/{utils-7vXqtq2Q.mjs → utils-CkB4Roi6.mjs} +1 -1
- package/dist/{utils-7vXqtq2Q.mjs.map → utils-CkB4Roi6.mjs.map} +1 -1
- package/docs/ai-sdk.md +137 -21
- package/docs/browser.md +241 -2
- package/docs/memory.md +72 -0
- package/docs/stt.md +494 -0
- package/docs/tts.md +569 -0
- package/docs/vision.md +396 -0
- package/package.json +17 -18
- package/dist/auto-update-BbNHbSU1.mjs +0 -3
- package/dist/chrome-backend-C5Un08O4.mjs.map +0 -1
- package/dist/gerbil-BfnsFWRE.mjs +0 -644
- package/dist/gerbil-BfnsFWRE.mjs.map +0 -1
- package/dist/gerbil-BjW-z7Fq.mjs +0 -5
- package/dist/gerbil-DZ1k3ChC.d.mts +0 -138
- package/dist/gerbil-DZ1k3ChC.d.mts.map +0 -1
- package/dist/mcp-R8kRLIKb.mjs.map +0 -1
- package/dist/models-DKULvhOr.mjs.map +0 -1
- package/dist/models-De2-_GmQ.d.mts.map +0 -1
- package/dist/skills-D3CEpgDc.mjs +0 -630
- package/dist/skills-D3CEpgDc.mjs.map +0 -1
- package/dist/types-BS1N92Jt.d.mts +0 -183
- package/dist/types-BS1N92Jt.d.mts.map +0 -1
|
@@ -0,0 +1,1463 @@
|
|
|
1
|
+
import { i as getModelConfig, n as createExternalModelConfig, o as resolveModel, r as fetchModelContextLength, t as BUILTIN_MODELS } from "./models-BAtL8qsA.mjs";
|
|
2
|
+
import { n as zodToJsonSchema, t as extractJson } from "./utils-CkB4Roi6.mjs";
|
|
3
|
+
import { AutoModelForCausalLM, AutoModelForImageTextToText, AutoProcessor, AutoTokenizer, RawImage, TextStreamer, env, pipeline } from "@huggingface/transformers";
|
|
4
|
+
|
|
5
|
+
//#region src/core/cache.ts
|
|
6
|
+
/**
|
|
7
|
+
* Generate a deterministic cache key from prompt and options.
|
|
8
|
+
* Key includes all parameters that affect the output.
|
|
9
|
+
*/
|
|
10
|
+
function generateCacheKey(prompt, modelId, options) {
|
|
11
|
+
const keyParts = [
|
|
12
|
+
prompt,
|
|
13
|
+
modelId,
|
|
14
|
+
options.maxTokens ?? 256,
|
|
15
|
+
options.temperature ?? .7,
|
|
16
|
+
options.topP ?? .9,
|
|
17
|
+
options.topK ?? 50,
|
|
18
|
+
options.system ?? "",
|
|
19
|
+
options.thinking ?? false
|
|
20
|
+
];
|
|
21
|
+
const str = JSON.stringify(keyParts);
|
|
22
|
+
let hash = 0;
|
|
23
|
+
for (let i = 0; i < str.length; i++) {
|
|
24
|
+
const char = str.charCodeAt(i);
|
|
25
|
+
hash = (hash << 5) - hash + char;
|
|
26
|
+
hash = hash & hash;
|
|
27
|
+
}
|
|
28
|
+
return `gerbil:${hash.toString(16)}`;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* LRU cache with TTL expiration for inference responses.
|
|
32
|
+
*/
|
|
33
|
+
var ResponseCache = class {
|
|
34
|
+
cache = /* @__PURE__ */ new Map();
|
|
35
|
+
maxSize;
|
|
36
|
+
defaultTtl;
|
|
37
|
+
hits = 0;
|
|
38
|
+
misses = 0;
|
|
39
|
+
/**
|
|
40
|
+
* Create a new response cache.
|
|
41
|
+
* @param maxSize Maximum number of entries (default: 100)
|
|
42
|
+
* @param defaultTtl Default TTL in ms (default: 5 minutes)
|
|
43
|
+
*/
|
|
44
|
+
constructor(maxSize = 100, defaultTtl = 300 * 1e3) {
|
|
45
|
+
this.maxSize = maxSize;
|
|
46
|
+
this.defaultTtl = defaultTtl;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get a cached response if it exists and hasn't expired.
|
|
50
|
+
*/
|
|
51
|
+
get(key) {
|
|
52
|
+
const entry = this.cache.get(key);
|
|
53
|
+
if (!entry) {
|
|
54
|
+
this.misses++;
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
if (Date.now() - entry.createdAt > entry.ttl) {
|
|
58
|
+
this.cache.delete(key);
|
|
59
|
+
this.misses++;
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
this.cache.delete(key);
|
|
63
|
+
this.cache.set(key, entry);
|
|
64
|
+
this.hits++;
|
|
65
|
+
return {
|
|
66
|
+
...entry.result,
|
|
67
|
+
cached: true
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Store a response in the cache.
|
|
72
|
+
*/
|
|
73
|
+
set(key, result, ttl) {
|
|
74
|
+
while (this.cache.size >= this.maxSize) {
|
|
75
|
+
const firstKey = this.cache.keys().next().value;
|
|
76
|
+
if (firstKey) this.cache.delete(firstKey);
|
|
77
|
+
}
|
|
78
|
+
this.cache.set(key, {
|
|
79
|
+
result,
|
|
80
|
+
createdAt: Date.now(),
|
|
81
|
+
ttl: ttl ?? this.defaultTtl
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Check if a key exists and is not expired.
|
|
86
|
+
*/
|
|
87
|
+
has(key) {
|
|
88
|
+
const entry = this.cache.get(key);
|
|
89
|
+
if (!entry) return false;
|
|
90
|
+
if (Date.now() - entry.createdAt > entry.ttl) {
|
|
91
|
+
this.cache.delete(key);
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
return true;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Remove a specific key from the cache.
|
|
98
|
+
*/
|
|
99
|
+
delete(key) {
|
|
100
|
+
return this.cache.delete(key);
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Clear all entries from the cache.
|
|
104
|
+
*/
|
|
105
|
+
clear() {
|
|
106
|
+
this.cache.clear();
|
|
107
|
+
this.hits = 0;
|
|
108
|
+
this.misses = 0;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Remove all expired entries.
|
|
112
|
+
*/
|
|
113
|
+
prune() {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
let pruned = 0;
|
|
116
|
+
for (const [key, entry] of this.cache) if (now - entry.createdAt > entry.ttl) {
|
|
117
|
+
this.cache.delete(key);
|
|
118
|
+
pruned++;
|
|
119
|
+
}
|
|
120
|
+
return pruned;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Get cache statistics.
|
|
124
|
+
*/
|
|
125
|
+
getStats() {
|
|
126
|
+
return {
|
|
127
|
+
hits: this.hits,
|
|
128
|
+
misses: this.misses,
|
|
129
|
+
size: this.cache.size,
|
|
130
|
+
maxSize: this.maxSize
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get hit rate as a percentage.
|
|
135
|
+
*/
|
|
136
|
+
getHitRate() {
|
|
137
|
+
const total = this.hits + this.misses;
|
|
138
|
+
if (total === 0) return 0;
|
|
139
|
+
return this.hits / total * 100;
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
let globalCache = null;
|
|
143
|
+
/**
|
|
144
|
+
* Get the global response cache instance.
|
|
145
|
+
* Creates one if it doesn't exist.
|
|
146
|
+
*/
|
|
147
|
+
function getGlobalCache() {
|
|
148
|
+
if (!globalCache) globalCache = new ResponseCache();
|
|
149
|
+
return globalCache;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Configure the global cache with custom settings.
|
|
153
|
+
*/
|
|
154
|
+
function configureGlobalCache(maxSize, defaultTtl) {
|
|
155
|
+
globalCache = new ResponseCache(maxSize, defaultTtl);
|
|
156
|
+
return globalCache;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Clear and reset the global cache.
|
|
160
|
+
*/
|
|
161
|
+
function clearGlobalCache() {
|
|
162
|
+
if (globalCache) globalCache.clear();
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
//#endregion
|
|
166
|
+
//#region src/core/gerbil.ts
|
|
167
|
+
/**
|
|
168
|
+
* Gerbil - Local GPU-accelerated LLM inference
|
|
169
|
+
*/
|
|
170
|
+
const pipeline$1 = pipeline;
|
|
171
|
+
function suppressNoisyWarnings(fn) {
|
|
172
|
+
const originalWarn = console.warn;
|
|
173
|
+
console.warn = (...args) => {
|
|
174
|
+
const msg = args[0]?.toString?.() || "";
|
|
175
|
+
if (msg.includes("content-length") || msg.includes("Unable to determine")) return;
|
|
176
|
+
originalWarn.apply(console, args);
|
|
177
|
+
};
|
|
178
|
+
return fn().finally(() => {
|
|
179
|
+
console.warn = originalWarn;
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
const KOKORO_VOICES_DEFAULT = [
|
|
183
|
+
{
|
|
184
|
+
id: "af_bella",
|
|
185
|
+
name: "Bella",
|
|
186
|
+
gender: "female",
|
|
187
|
+
language: "en-us",
|
|
188
|
+
description: "American female, warm and friendly"
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
id: "af_sarah",
|
|
192
|
+
name: "Sarah",
|
|
193
|
+
gender: "female",
|
|
194
|
+
language: "en-us",
|
|
195
|
+
description: "American female, clear and professional"
|
|
196
|
+
},
|
|
197
|
+
{
|
|
198
|
+
id: "af_nicole",
|
|
199
|
+
name: "Nicole",
|
|
200
|
+
gender: "female",
|
|
201
|
+
language: "en-us",
|
|
202
|
+
description: "American female, soft and gentle"
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
id: "af_sky",
|
|
206
|
+
name: "Sky",
|
|
207
|
+
gender: "female",
|
|
208
|
+
language: "en-us",
|
|
209
|
+
description: "American female, young and energetic"
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
id: "am_adam",
|
|
213
|
+
name: "Adam",
|
|
214
|
+
gender: "male",
|
|
215
|
+
language: "en-us",
|
|
216
|
+
description: "American male, deep and confident"
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
id: "am_michael",
|
|
220
|
+
name: "Michael",
|
|
221
|
+
gender: "male",
|
|
222
|
+
language: "en-us",
|
|
223
|
+
description: "American male, warm and friendly"
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
id: "bf_emma",
|
|
227
|
+
name: "Emma",
|
|
228
|
+
gender: "female",
|
|
229
|
+
language: "en-gb",
|
|
230
|
+
description: "British female, elegant and clear"
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
id: "bf_isabella",
|
|
234
|
+
name: "Isabella",
|
|
235
|
+
gender: "female",
|
|
236
|
+
language: "en-gb",
|
|
237
|
+
description: "British female, sophisticated"
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
id: "bm_george",
|
|
241
|
+
name: "George",
|
|
242
|
+
gender: "male",
|
|
243
|
+
language: "en-gb",
|
|
244
|
+
description: "British male, distinguished"
|
|
245
|
+
},
|
|
246
|
+
{
|
|
247
|
+
id: "bm_lewis",
|
|
248
|
+
name: "Lewis",
|
|
249
|
+
gender: "male",
|
|
250
|
+
language: "en-gb",
|
|
251
|
+
description: "British male, friendly and warm"
|
|
252
|
+
}
|
|
253
|
+
];
|
|
254
|
+
const isBrowser = typeof window !== "undefined";
|
|
255
|
+
env.allowLocalModels = !isBrowser;
|
|
256
|
+
env.useBrowserCache = isBrowser;
|
|
257
|
+
let webgpuInitialized = false;
|
|
258
|
+
let webgpuAvailable = false;
|
|
259
|
+
/**
|
|
260
|
+
* Initialize WebGPU for Node.js environments
|
|
261
|
+
* Called automatically before model loading
|
|
262
|
+
*/
|
|
263
|
+
async function initNodeWebGPU() {
|
|
264
|
+
if (webgpuInitialized) return webgpuAvailable;
|
|
265
|
+
webgpuInitialized = true;
|
|
266
|
+
if (typeof window !== "undefined") {
|
|
267
|
+
webgpuAvailable = "gpu" in navigator;
|
|
268
|
+
return webgpuAvailable;
|
|
269
|
+
}
|
|
270
|
+
try {
|
|
271
|
+
const { create, globals } = await new Function("specifier", "return import(specifier)")("webgpu");
|
|
272
|
+
Object.assign(globalThis, globals);
|
|
273
|
+
if (!globalThis.navigator) globalThis.navigator = {};
|
|
274
|
+
globalThis.navigator.gpu = create([]);
|
|
275
|
+
webgpuAvailable = true;
|
|
276
|
+
} catch {
|
|
277
|
+
webgpuAvailable = false;
|
|
278
|
+
}
|
|
279
|
+
return webgpuAvailable;
|
|
280
|
+
}
|
|
281
|
+
var Gerbil = class {
|
|
282
|
+
generator = null;
|
|
283
|
+
tokenizer = null;
|
|
284
|
+
model = null;
|
|
285
|
+
embedder = null;
|
|
286
|
+
currentModel = null;
|
|
287
|
+
modelConfig = null;
|
|
288
|
+
config;
|
|
289
|
+
stats;
|
|
290
|
+
useDirect = false;
|
|
291
|
+
chromeBackend = null;
|
|
292
|
+
_deviceMode = "cpu";
|
|
293
|
+
processor = null;
|
|
294
|
+
visionModel = null;
|
|
295
|
+
isVisionModel = false;
|
|
296
|
+
constructor(config = {}) {
|
|
297
|
+
this.config = config;
|
|
298
|
+
this.stats = {
|
|
299
|
+
prompts: 0,
|
|
300
|
+
tokensIn: 0,
|
|
301
|
+
tokensOut: 0,
|
|
302
|
+
avgSpeed: 0,
|
|
303
|
+
totalTime: 0,
|
|
304
|
+
cacheHits: 0,
|
|
305
|
+
cacheMisses: 0
|
|
306
|
+
};
|
|
307
|
+
}
|
|
308
|
+
static listModels() {
|
|
309
|
+
return Object.values(BUILTIN_MODELS);
|
|
310
|
+
}
|
|
311
|
+
static getModel(modelId) {
|
|
312
|
+
return BUILTIN_MODELS[modelId];
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Load a model
|
|
316
|
+
*
|
|
317
|
+
* @example
|
|
318
|
+
* ```ts
|
|
319
|
+
* // Built-in model
|
|
320
|
+
* await g.loadModel("qwen3-0.6b");
|
|
321
|
+
*
|
|
322
|
+
* // HuggingFace model
|
|
323
|
+
* await g.loadModel("hf:microsoft/Phi-3-mini");
|
|
324
|
+
*
|
|
325
|
+
* // Local model
|
|
326
|
+
* await g.loadModel("file:./models/my-model");
|
|
327
|
+
*
|
|
328
|
+
* // Vision model
|
|
329
|
+
* await g.loadModel("ministral-3b");
|
|
330
|
+
* ```
|
|
331
|
+
*/
|
|
332
|
+
async loadModel(modelId = "qwen3-0.6b", options = {}) {
|
|
333
|
+
if (this.isLoaded()) await this.dispose();
|
|
334
|
+
await initNodeWebGPU();
|
|
335
|
+
const source = resolveModel(modelId);
|
|
336
|
+
const { onProgress, device = "auto", dtype: userDtype } = options;
|
|
337
|
+
let config = getModelConfig(modelId);
|
|
338
|
+
if (!config) {
|
|
339
|
+
const contextLength = await fetchModelContextLength(source.path).catch(() => null);
|
|
340
|
+
config = createExternalModelConfig(modelId, source.path, contextLength || void 0);
|
|
341
|
+
}
|
|
342
|
+
if (config.supportsVision) return this.loadVisionModel(modelId, source.path, config, options);
|
|
343
|
+
onProgress?.({ status: `Loading ${modelId}...` });
|
|
344
|
+
const isBrowser$1 = typeof window !== "undefined";
|
|
345
|
+
const fallbackDevice = isBrowser$1 ? "wasm" : "cpu";
|
|
346
|
+
let tfDevice = fallbackDevice;
|
|
347
|
+
if (device === "webgpu" || device === "gpu" || device === "auto") tfDevice = "webgpu";
|
|
348
|
+
const dtype = userDtype ?? (tfDevice === "webgpu" ? "q4f16" : "q4");
|
|
349
|
+
let isLoading = true;
|
|
350
|
+
let lastFile = "";
|
|
351
|
+
let lastPct = -1;
|
|
352
|
+
const progressCallback = (progress) => {
|
|
353
|
+
if (!isLoading) return;
|
|
354
|
+
if (progress.status === "progress" && progress.file) {
|
|
355
|
+
const pct = Math.round(progress.progress || 0);
|
|
356
|
+
if (progress.file !== lastFile || pct >= lastPct + 5) {
|
|
357
|
+
lastFile = progress.file;
|
|
358
|
+
lastPct = pct;
|
|
359
|
+
onProgress?.({
|
|
360
|
+
status: `Downloading ${progress.file}`,
|
|
361
|
+
progress: pct,
|
|
362
|
+
file: progress.file
|
|
363
|
+
});
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
};
|
|
367
|
+
try {
|
|
368
|
+
if (isBrowser$1 && tfDevice === "webgpu") {
|
|
369
|
+
onProgress?.({ status: "Loading tokenizer..." });
|
|
370
|
+
this.tokenizer = await suppressNoisyWarnings(() => AutoTokenizer.from_pretrained(source.path, { progress_callback: progressCallback }));
|
|
371
|
+
onProgress?.({ status: "Loading model..." });
|
|
372
|
+
this.model = await suppressNoisyWarnings(() => AutoModelForCausalLM.from_pretrained(source.path, {
|
|
373
|
+
dtype,
|
|
374
|
+
device: tfDevice,
|
|
375
|
+
progress_callback: progressCallback
|
|
376
|
+
}));
|
|
377
|
+
this.useDirect = true;
|
|
378
|
+
this._deviceMode = "webgpu";
|
|
379
|
+
this.isVisionModel = false;
|
|
380
|
+
isLoading = false;
|
|
381
|
+
this.currentModel = modelId;
|
|
382
|
+
this.modelConfig = config;
|
|
383
|
+
onProgress?.({ status: "Ready (WebGPU)!" });
|
|
384
|
+
} else if (!isBrowser$1 && tfDevice === "webgpu") {
|
|
385
|
+
onProgress?.({ status: "Starting Chrome WebGPU backend..." });
|
|
386
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
387
|
+
this.chromeBackend = await ChromeGPUBackend.create({
|
|
388
|
+
modelId: source.path,
|
|
389
|
+
contextLength: config.contextLength,
|
|
390
|
+
onProgress
|
|
391
|
+
});
|
|
392
|
+
this.useDirect = false;
|
|
393
|
+
this._deviceMode = "webgpu";
|
|
394
|
+
this.isVisionModel = false;
|
|
395
|
+
isLoading = false;
|
|
396
|
+
this.currentModel = modelId;
|
|
397
|
+
this.modelConfig = config;
|
|
398
|
+
} else {
|
|
399
|
+
const pipelineOptions = {
|
|
400
|
+
dtype,
|
|
401
|
+
device: tfDevice,
|
|
402
|
+
progress_callback: progressCallback
|
|
403
|
+
};
|
|
404
|
+
this.generator = await suppressNoisyWarnings(() => pipeline$1("text-generation", source.path, pipelineOptions));
|
|
405
|
+
this.useDirect = false;
|
|
406
|
+
this._deviceMode = tfDevice;
|
|
407
|
+
this.isVisionModel = false;
|
|
408
|
+
isLoading = false;
|
|
409
|
+
this.currentModel = modelId;
|
|
410
|
+
this.modelConfig = config;
|
|
411
|
+
onProgress?.({ status: `Ready (${tfDevice.toUpperCase()})!` });
|
|
412
|
+
}
|
|
413
|
+
} catch (err) {
|
|
414
|
+
if (tfDevice !== fallbackDevice) {
|
|
415
|
+
onProgress?.({ status: `Using ${fallbackDevice.toUpperCase()}...` });
|
|
416
|
+
if (this.chromeBackend) {
|
|
417
|
+
await this.chromeBackend.dispose();
|
|
418
|
+
this.chromeBackend = null;
|
|
419
|
+
}
|
|
420
|
+
this.generator = await suppressNoisyWarnings(() => pipeline$1("text-generation", source.path, {
|
|
421
|
+
dtype: "q4",
|
|
422
|
+
device: fallbackDevice,
|
|
423
|
+
progress_callback: progressCallback
|
|
424
|
+
}));
|
|
425
|
+
this.useDirect = false;
|
|
426
|
+
this._deviceMode = fallbackDevice;
|
|
427
|
+
this.isVisionModel = false;
|
|
428
|
+
isLoading = false;
|
|
429
|
+
this.currentModel = modelId;
|
|
430
|
+
this.modelConfig = config;
|
|
431
|
+
onProgress?.({ status: `Ready (${fallbackDevice.toUpperCase()})!` });
|
|
432
|
+
} else throw err;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Load a vision model (VLM)
|
|
437
|
+
* Uses AutoProcessor + AutoModelForImageTextToText instead of tokenizer + causal LM
|
|
438
|
+
*/
|
|
439
|
+
async loadVisionModel(modelId, repoPath, config, options = {}) {
|
|
440
|
+
const { onProgress, device = "auto" } = options;
|
|
441
|
+
onProgress?.({ status: `Loading ${modelId} (vision model)...` });
|
|
442
|
+
const isBrowser$1 = typeof window !== "undefined";
|
|
443
|
+
const fallbackDevice = isBrowser$1 ? "wasm" : "cpu";
|
|
444
|
+
let tfDevice = fallbackDevice;
|
|
445
|
+
if (device === "webgpu" || device === "gpu" || device === "auto") tfDevice = "webgpu";
|
|
446
|
+
if (!isBrowser$1 && tfDevice === "webgpu") {
|
|
447
|
+
onProgress?.({ status: "Starting Chrome WebGPU backend (vision)..." });
|
|
448
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
449
|
+
this.chromeBackend = await ChromeGPUBackend.create({
|
|
450
|
+
modelId: repoPath,
|
|
451
|
+
contextLength: config.contextLength,
|
|
452
|
+
isVision: true,
|
|
453
|
+
onProgress
|
|
454
|
+
});
|
|
455
|
+
this.useDirect = false;
|
|
456
|
+
this._deviceMode = "webgpu";
|
|
457
|
+
this.isVisionModel = true;
|
|
458
|
+
this.currentModel = modelId;
|
|
459
|
+
this.modelConfig = config;
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
462
|
+
let lastFile = "";
|
|
463
|
+
let lastPct = -1;
|
|
464
|
+
const progressCallback = (progress) => {
|
|
465
|
+
if (progress.status === "progress" && progress.file) {
|
|
466
|
+
const pct = Math.round(progress.progress || 0);
|
|
467
|
+
if (progress.file !== lastFile || pct >= lastPct + 5) {
|
|
468
|
+
lastFile = progress.file;
|
|
469
|
+
lastPct = pct;
|
|
470
|
+
onProgress?.({
|
|
471
|
+
status: `Downloading ${progress.file}`,
|
|
472
|
+
progress: pct,
|
|
473
|
+
file: progress.file
|
|
474
|
+
});
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
};
|
|
478
|
+
try {
|
|
479
|
+
onProgress?.({ status: "Loading processor..." });
|
|
480
|
+
this.processor = await suppressNoisyWarnings(() => AutoProcessor.from_pretrained(repoPath, { progress_callback: progressCallback }));
|
|
481
|
+
onProgress?.({ status: "Loading vision model..." });
|
|
482
|
+
this.visionModel = await suppressNoisyWarnings(() => AutoModelForImageTextToText.from_pretrained(repoPath, {
|
|
483
|
+
device: tfDevice,
|
|
484
|
+
progress_callback: progressCallback
|
|
485
|
+
}));
|
|
486
|
+
this.isVisionModel = true;
|
|
487
|
+
this.useDirect = true;
|
|
488
|
+
this._deviceMode = tfDevice === "webgpu" ? "webgpu" : tfDevice;
|
|
489
|
+
this.currentModel = modelId;
|
|
490
|
+
this.modelConfig = config;
|
|
491
|
+
onProgress?.({ status: `Ready (Vision, ${tfDevice.toUpperCase()})!` });
|
|
492
|
+
} catch (err) {
|
|
493
|
+
if (tfDevice !== fallbackDevice) {
|
|
494
|
+
onProgress?.({ status: `Vision model: Using ${fallbackDevice.toUpperCase()}...` });
|
|
495
|
+
this.processor = await suppressNoisyWarnings(() => AutoProcessor.from_pretrained(repoPath, { progress_callback: progressCallback }));
|
|
496
|
+
this.visionModel = await suppressNoisyWarnings(() => AutoModelForImageTextToText.from_pretrained(repoPath, {
|
|
497
|
+
device: fallbackDevice,
|
|
498
|
+
progress_callback: progressCallback
|
|
499
|
+
}));
|
|
500
|
+
this.isVisionModel = true;
|
|
501
|
+
this.useDirect = true;
|
|
502
|
+
this._deviceMode = fallbackDevice;
|
|
503
|
+
this.currentModel = modelId;
|
|
504
|
+
this.modelConfig = config;
|
|
505
|
+
onProgress?.({ status: `Ready (Vision, ${fallbackDevice.toUpperCase()})!` });
|
|
506
|
+
} else throw err;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
/**
|
|
510
|
+
* Check if a model is loaded
|
|
511
|
+
*/
|
|
512
|
+
isLoaded() {
|
|
513
|
+
return this.generator !== null || this.useDirect && this.model !== null || this.chromeBackend !== null || this.isVisionModel && this.visionModel !== null;
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Check if current model supports vision
|
|
517
|
+
*/
|
|
518
|
+
supportsVision() {
|
|
519
|
+
return this.isVisionModel && this.modelConfig?.supportsVision === true;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Get current model info
|
|
523
|
+
*/
|
|
524
|
+
getModelInfo() {
|
|
525
|
+
return this.modelConfig;
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* Get current device mode (webgpu, cpu, or wasm)
|
|
529
|
+
*/
|
|
530
|
+
getDeviceMode() {
|
|
531
|
+
return this._deviceMode;
|
|
532
|
+
}
|
|
533
|
+
/**
|
|
534
|
+
* Get dtype used for current model
|
|
535
|
+
*/
|
|
536
|
+
getDtype() {
|
|
537
|
+
return this._deviceMode === "webgpu" ? "q4f16" : "q4";
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Get response cache statistics
|
|
541
|
+
*/
|
|
542
|
+
getResponseCacheStats() {
|
|
543
|
+
const cache = getGlobalCache();
|
|
544
|
+
const stats = cache.getStats();
|
|
545
|
+
return {
|
|
546
|
+
hits: stats.hits,
|
|
547
|
+
misses: stats.misses,
|
|
548
|
+
size: stats.size,
|
|
549
|
+
hitRate: cache.getHitRate()
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
/**
|
|
553
|
+
* Clear the response cache (for cached generate() results)
|
|
554
|
+
*/
|
|
555
|
+
clearResponseCache() {
|
|
556
|
+
getGlobalCache().clear();
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Get Chrome backend status (if using WebGPU via Chrome)
|
|
560
|
+
*/
|
|
561
|
+
getChromeStatus() {
|
|
562
|
+
if (!this.chromeBackend) return null;
|
|
563
|
+
return this.chromeBackend.getStatus();
|
|
564
|
+
}
|
|
565
|
+
/**
|
|
566
|
+
* Get Chrome memory usage (if using WebGPU via Chrome)
|
|
567
|
+
* Returns JS heap memory in bytes
|
|
568
|
+
*/
|
|
569
|
+
async getChromeMemory() {
|
|
570
|
+
if (!this.chromeBackend) return null;
|
|
571
|
+
return this.chromeBackend.getMemoryUsage();
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Get memory usage in GB (if using WebGPU via Chrome)
|
|
575
|
+
*/
|
|
576
|
+
async getMemoryUsage() {
|
|
577
|
+
if (!this.chromeBackend) return null;
|
|
578
|
+
return this.chromeBackend.getMemoryStats();
|
|
579
|
+
}
|
|
580
|
+
/**
|
|
581
|
+
* Clear KV cache to free memory
|
|
582
|
+
* This will reset the conversation context but free up memory
|
|
583
|
+
*/
|
|
584
|
+
async clearCache() {
|
|
585
|
+
if (this.chromeBackend) await this.chromeBackend.reset();
|
|
586
|
+
}
|
|
587
|
+
/**
|
|
588
|
+
* Check memory usage and cleanup if needed
|
|
589
|
+
* @param thresholdGB Memory threshold in GB (default: 8)
|
|
590
|
+
* @returns true if cleanup was performed
|
|
591
|
+
*/
|
|
592
|
+
async checkMemoryAndCleanup(thresholdGB = 8) {
|
|
593
|
+
if (!this.chromeBackend) return false;
|
|
594
|
+
return this.chromeBackend.checkMemoryAndCleanup(thresholdGB);
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Generate text (automatically routes to vision generation if images provided)
|
|
598
|
+
*
|
|
599
|
+
* @example
|
|
600
|
+
* ```ts
|
|
601
|
+
* // Text generation
|
|
602
|
+
* const result = await g.generate("Hello!");
|
|
603
|
+
*
|
|
604
|
+
* // Vision generation (with vision model)
|
|
605
|
+
* const result = await g.generate("What's in this image?", {
|
|
606
|
+
* images: [{ source: "https://example.com/cat.jpg" }]
|
|
607
|
+
* });
|
|
608
|
+
* ```
|
|
609
|
+
*/
|
|
610
|
+
async generate(prompt, options = {}) {
|
|
611
|
+
if (!this.isLoaded()) await this.loadModel(this.config.model || "qwen3-0.6b");
|
|
612
|
+
const { images } = options;
|
|
613
|
+
if (images?.length && this.isVisionModel && !this.chromeBackend) return this.generateWithVision(prompt, options);
|
|
614
|
+
if (images?.length && !this.isVisionModel) {}
|
|
615
|
+
const { maxTokens = 256, temperature = .7, topP = .9, topK = 50, thinking = false, system, cache = false, cacheTtl } = options;
|
|
616
|
+
if (cache && !options.onToken && !images?.length) {
|
|
617
|
+
const cacheKey = generateCacheKey(prompt, this.currentModel || "", {
|
|
618
|
+
maxTokens,
|
|
619
|
+
temperature,
|
|
620
|
+
topP,
|
|
621
|
+
topK,
|
|
622
|
+
system,
|
|
623
|
+
thinking
|
|
624
|
+
});
|
|
625
|
+
const cached = getGlobalCache().get(cacheKey);
|
|
626
|
+
if (cached) return cached;
|
|
627
|
+
}
|
|
628
|
+
const startTime = performance.now();
|
|
629
|
+
try {
|
|
630
|
+
let rawText = "";
|
|
631
|
+
if (this.chromeBackend) try {
|
|
632
|
+
rawText = await this.chromeBackend.generate(prompt, {
|
|
633
|
+
maxTokens,
|
|
634
|
+
temperature,
|
|
635
|
+
topP,
|
|
636
|
+
topK,
|
|
637
|
+
thinking,
|
|
638
|
+
system,
|
|
639
|
+
images: images?.map((img) => img.source),
|
|
640
|
+
onToken: options.onToken ? (t) => options.onToken?.(t.text) : void 0
|
|
641
|
+
});
|
|
642
|
+
} catch (chromeErr) {
|
|
643
|
+
if (chromeErr?.message === "CHROME_BACKEND_DEAD" || !this.chromeBackend?.isAlive()) {
|
|
644
|
+
await this.chromeBackend?.dispose().catch(() => {});
|
|
645
|
+
this.chromeBackend = null;
|
|
646
|
+
this._deviceMode = "cpu";
|
|
647
|
+
this.generator = await pipeline$1("text-generation", this.currentModel || "qwen3-0.6b", {
|
|
648
|
+
dtype: "q4",
|
|
649
|
+
device: "cpu"
|
|
650
|
+
});
|
|
651
|
+
return this.generate(prompt, options);
|
|
652
|
+
}
|
|
653
|
+
throw chromeErr;
|
|
654
|
+
}
|
|
655
|
+
else if (this.useDirect && this.model && this.tokenizer) {
|
|
656
|
+
const messages = this.buildMessages(prompt, {
|
|
657
|
+
...options,
|
|
658
|
+
thinking
|
|
659
|
+
});
|
|
660
|
+
const inputs = this.tokenizer.apply_chat_template(messages, {
|
|
661
|
+
add_generation_prompt: true,
|
|
662
|
+
return_dict: true,
|
|
663
|
+
enable_thinking: thinking
|
|
664
|
+
});
|
|
665
|
+
const output = await this.model.generate({
|
|
666
|
+
...inputs,
|
|
667
|
+
max_new_tokens: maxTokens,
|
|
668
|
+
temperature: temperature > 0 ? temperature : void 0,
|
|
669
|
+
top_p: topP,
|
|
670
|
+
top_k: topK,
|
|
671
|
+
do_sample: temperature > 0
|
|
672
|
+
});
|
|
673
|
+
const inputLength = inputs.input_ids.dims?.[1] || inputs.input_ids.data?.length || 0;
|
|
674
|
+
const outputTokens = output.slice(null, [inputLength, null]);
|
|
675
|
+
rawText = this.tokenizer.batch_decode(outputTokens, { skip_special_tokens: true })[0] || "";
|
|
676
|
+
if (rawText.toLowerCase().includes("assistant")) {
|
|
677
|
+
const match = rawText.match(/assistant[:\s]*([\s\S]*)/i);
|
|
678
|
+
if (match) rawText = match[1].trim();
|
|
679
|
+
}
|
|
680
|
+
} else if (this.generator) {
|
|
681
|
+
const formattedPrompt = this.formatPrompt(prompt, {
|
|
682
|
+
...options,
|
|
683
|
+
thinking
|
|
684
|
+
});
|
|
685
|
+
const output = await this.generator(formattedPrompt, {
|
|
686
|
+
max_new_tokens: maxTokens,
|
|
687
|
+
temperature,
|
|
688
|
+
top_p: topP,
|
|
689
|
+
top_k: topK,
|
|
690
|
+
do_sample: temperature > 0,
|
|
691
|
+
return_full_text: false
|
|
692
|
+
});
|
|
693
|
+
if (Array.isArray(output) && output[0]) {
|
|
694
|
+
const result$1 = output[0];
|
|
695
|
+
if (Array.isArray(result$1.generated_text)) rawText = result$1.generated_text.at(-1)?.content || "";
|
|
696
|
+
else rawText = result$1.generated_text || "";
|
|
697
|
+
}
|
|
698
|
+
} else throw new Error("No model loaded");
|
|
699
|
+
const totalTime = performance.now() - startTime;
|
|
700
|
+
rawText = this.cleanOutput(rawText);
|
|
701
|
+
const { thinking: thinkingText, response } = this.parseThinking(rawText);
|
|
702
|
+
const finalThinking = thinking ? thinkingText : void 0;
|
|
703
|
+
const tokensGenerated = Math.ceil(response.length / 4);
|
|
704
|
+
this.stats.prompts += 1;
|
|
705
|
+
this.stats.tokensOut += tokensGenerated;
|
|
706
|
+
this.stats.totalTime += totalTime;
|
|
707
|
+
this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
|
|
708
|
+
const result = {
|
|
709
|
+
text: response,
|
|
710
|
+
thinking: finalThinking,
|
|
711
|
+
tokensGenerated,
|
|
712
|
+
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
713
|
+
totalTime,
|
|
714
|
+
finishReason: "stop",
|
|
715
|
+
provider: "local",
|
|
716
|
+
cached: false
|
|
717
|
+
};
|
|
718
|
+
if (cache && !options.onToken && !images?.length) {
|
|
719
|
+
const cacheKey = generateCacheKey(prompt, this.currentModel || "", {
|
|
720
|
+
maxTokens,
|
|
721
|
+
temperature,
|
|
722
|
+
topP,
|
|
723
|
+
topK,
|
|
724
|
+
system,
|
|
725
|
+
thinking
|
|
726
|
+
});
|
|
727
|
+
getGlobalCache().set(cacheKey, result, cacheTtl);
|
|
728
|
+
}
|
|
729
|
+
return result;
|
|
730
|
+
} catch (_error) {
|
|
731
|
+
return {
|
|
732
|
+
text: "",
|
|
733
|
+
tokensGenerated: 0,
|
|
734
|
+
tokensPerSecond: 0,
|
|
735
|
+
totalTime: performance.now() - startTime,
|
|
736
|
+
finishReason: "error",
|
|
737
|
+
provider: "local",
|
|
738
|
+
cached: false
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
/**
|
|
743
|
+
* Stream text generation (simulated token-by-token)
|
|
744
|
+
*
|
|
745
|
+
* Note: Yields the raw output including <think> tags if thinking mode is enabled.
|
|
746
|
+
* The final result has parsed thinking separated out.
|
|
747
|
+
*/
|
|
748
|
+
async *stream(prompt, options = {}) {
|
|
749
|
+
if (!this.isLoaded()) await this.loadModel(this.config.model || "qwen3-0.6b");
|
|
750
|
+
const startTime = performance.now();
|
|
751
|
+
if (this.chromeBackend) {
|
|
752
|
+
let fullText = "";
|
|
753
|
+
const tokenQueue = [];
|
|
754
|
+
let resolveNext = null;
|
|
755
|
+
let done = false;
|
|
756
|
+
const generatePromise = this.chromeBackend.generate(prompt, {
|
|
757
|
+
...options,
|
|
758
|
+
images: options.images?.map((img) => img.source),
|
|
759
|
+
onToken: (token) => {
|
|
760
|
+
fullText += token.text;
|
|
761
|
+
if (resolveNext) {
|
|
762
|
+
resolveNext(token.text);
|
|
763
|
+
resolveNext = null;
|
|
764
|
+
} else tokenQueue.push(token.text);
|
|
765
|
+
}
|
|
766
|
+
}).then(() => {
|
|
767
|
+
done = true;
|
|
768
|
+
if (resolveNext) resolveNext(null);
|
|
769
|
+
}).catch((err) => {
|
|
770
|
+
done = true;
|
|
771
|
+
if (resolveNext) resolveNext(null);
|
|
772
|
+
throw err;
|
|
773
|
+
});
|
|
774
|
+
while (!done || tokenQueue.length > 0) if (tokenQueue.length > 0) {
|
|
775
|
+
const token = tokenQueue.shift();
|
|
776
|
+
yield token;
|
|
777
|
+
options.onToken?.(token);
|
|
778
|
+
} else if (!done) {
|
|
779
|
+
const token = await new Promise((resolve) => {
|
|
780
|
+
resolveNext = resolve;
|
|
781
|
+
});
|
|
782
|
+
if (token) {
|
|
783
|
+
yield token;
|
|
784
|
+
options.onToken?.(token);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
await generatePromise;
|
|
788
|
+
const { thinking: thinkingText, response } = this.parseThinking(fullText);
|
|
789
|
+
const tokensGenerated = Math.ceil(response.length / 4);
|
|
790
|
+
const totalTime = performance.now() - startTime;
|
|
791
|
+
return {
|
|
792
|
+
text: response,
|
|
793
|
+
thinking: options.thinking ? thinkingText : void 0,
|
|
794
|
+
tokensGenerated,
|
|
795
|
+
totalTime,
|
|
796
|
+
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
797
|
+
finishReason: "stop"
|
|
798
|
+
};
|
|
799
|
+
}
|
|
800
|
+
const result = await this.generateRaw(prompt, options);
|
|
801
|
+
const words = result.rawText.split(/(\s+)/);
|
|
802
|
+
for (const word of words) if (word) {
|
|
803
|
+
yield word;
|
|
804
|
+
options.onToken?.(word);
|
|
805
|
+
}
|
|
806
|
+
return result.result;
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Internal: Generate with raw text access for streaming
|
|
810
|
+
*/
|
|
811
|
+
async generateRaw(prompt, options = {}) {
|
|
812
|
+
const { maxTokens = 256, temperature = .7, topP = .9, topK = 50, thinking = false } = options;
|
|
813
|
+
const startTime = performance.now();
|
|
814
|
+
const formattedPrompt = this.formatPrompt(prompt, {
|
|
815
|
+
...options,
|
|
816
|
+
thinking
|
|
817
|
+
});
|
|
818
|
+
try {
|
|
819
|
+
const output = await this.generator?.(formattedPrompt, {
|
|
820
|
+
max_new_tokens: maxTokens,
|
|
821
|
+
temperature,
|
|
822
|
+
top_p: topP,
|
|
823
|
+
top_k: topK,
|
|
824
|
+
do_sample: temperature > 0,
|
|
825
|
+
return_full_text: false
|
|
826
|
+
});
|
|
827
|
+
const totalTime = performance.now() - startTime;
|
|
828
|
+
let rawText = "";
|
|
829
|
+
if (Array.isArray(output) && output[0]) {
|
|
830
|
+
const result = output[0];
|
|
831
|
+
if (Array.isArray(result.generated_text)) rawText = result.generated_text.at(-1)?.content || "";
|
|
832
|
+
else rawText = result.generated_text || "";
|
|
833
|
+
}
|
|
834
|
+
rawText = this.cleanOutput(rawText);
|
|
835
|
+
const { thinking: thinkingText, response } = this.parseThinking(rawText);
|
|
836
|
+
const finalThinking = thinking ? thinkingText : void 0;
|
|
837
|
+
const tokensGenerated = Math.ceil(response.length / 4);
|
|
838
|
+
this.stats.prompts += 1;
|
|
839
|
+
this.stats.tokensOut += tokensGenerated;
|
|
840
|
+
this.stats.totalTime += totalTime;
|
|
841
|
+
this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
|
|
842
|
+
return {
|
|
843
|
+
rawText,
|
|
844
|
+
result: {
|
|
845
|
+
text: response,
|
|
846
|
+
thinking: finalThinking,
|
|
847
|
+
tokensGenerated,
|
|
848
|
+
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
849
|
+
totalTime,
|
|
850
|
+
finishReason: "stop",
|
|
851
|
+
provider: "local",
|
|
852
|
+
cached: false
|
|
853
|
+
}
|
|
854
|
+
};
|
|
855
|
+
} catch (_error) {
|
|
856
|
+
return {
|
|
857
|
+
rawText: "",
|
|
858
|
+
result: {
|
|
859
|
+
text: "",
|
|
860
|
+
tokensGenerated: 0,
|
|
861
|
+
tokensPerSecond: 0,
|
|
862
|
+
totalTime: performance.now() - startTime,
|
|
863
|
+
finishReason: "error",
|
|
864
|
+
provider: "local",
|
|
865
|
+
cached: false
|
|
866
|
+
}
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
/**
|
|
871
|
+
* Generate text from images using a vision model
|
|
872
|
+
* Called automatically by generate() when images are provided
|
|
873
|
+
*/
|
|
874
|
+
async generateWithVision(prompt, options) {
|
|
875
|
+
if (!(this.processor && this.visionModel)) throw new Error("Vision model not loaded. Load a vision model first.");
|
|
876
|
+
const { images = [], maxTokens = 2048, temperature = .7, topP = .9, topK = 20, system } = options;
|
|
877
|
+
const startTime = performance.now();
|
|
878
|
+
try {
|
|
879
|
+
const content = [];
|
|
880
|
+
for (let i = 0; i < images.length; i += 1) content.push({ type: "image" });
|
|
881
|
+
content.push({
|
|
882
|
+
type: "text",
|
|
883
|
+
text: prompt
|
|
884
|
+
});
|
|
885
|
+
const messages = [...system ? [{
|
|
886
|
+
role: "system",
|
|
887
|
+
content: system
|
|
888
|
+
}] : [], {
|
|
889
|
+
role: "user",
|
|
890
|
+
content
|
|
891
|
+
}];
|
|
892
|
+
const chatPrompt = this.processor.apply_chat_template(messages);
|
|
893
|
+
const loadedImages = await Promise.all(images.map(async (img) => await RawImage.fromURL(img.source)));
|
|
894
|
+
const inputs = await this.processor(loadedImages.length === 1 ? loadedImages[0] : loadedImages, chatPrompt, { add_special_tokens: false });
|
|
895
|
+
let fullText = "";
|
|
896
|
+
const streamer = options.onToken ? new TextStreamer(this.processor.tokenizer, {
|
|
897
|
+
skip_prompt: true,
|
|
898
|
+
skip_special_tokens: true,
|
|
899
|
+
callback_function: (text$1) => {
|
|
900
|
+
fullText += text$1;
|
|
901
|
+
options.onToken?.(text$1);
|
|
902
|
+
}
|
|
903
|
+
}) : void 0;
|
|
904
|
+
const outputs = await this.visionModel.generate({
|
|
905
|
+
...inputs,
|
|
906
|
+
max_new_tokens: maxTokens,
|
|
907
|
+
temperature: temperature > 0 ? temperature : void 0,
|
|
908
|
+
top_p: topP,
|
|
909
|
+
top_k: topK,
|
|
910
|
+
do_sample: temperature > 0,
|
|
911
|
+
...streamer ? { streamer } : {}
|
|
912
|
+
});
|
|
913
|
+
const inputLength = inputs.input_ids.dims?.at(-1) || 0;
|
|
914
|
+
const text = this.processor.batch_decode(outputs.slice(null, [inputLength, null]), { skip_special_tokens: true })[0] || fullText || "";
|
|
915
|
+
const totalTime = performance.now() - startTime;
|
|
916
|
+
const tokensGenerated = Math.ceil(text.length / 4);
|
|
917
|
+
this.stats.prompts += 1;
|
|
918
|
+
this.stats.tokensOut += tokensGenerated;
|
|
919
|
+
this.stats.totalTime += totalTime;
|
|
920
|
+
this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
|
|
921
|
+
return {
|
|
922
|
+
text: this.cleanOutput(text),
|
|
923
|
+
tokensGenerated,
|
|
924
|
+
tokensPerSecond: tokensGenerated / totalTime * 1e3,
|
|
925
|
+
totalTime,
|
|
926
|
+
finishReason: "stop",
|
|
927
|
+
provider: "local",
|
|
928
|
+
cached: false
|
|
929
|
+
};
|
|
930
|
+
} catch (_error) {
|
|
931
|
+
return {
|
|
932
|
+
text: "",
|
|
933
|
+
tokensGenerated: 0,
|
|
934
|
+
tokensPerSecond: 0,
|
|
935
|
+
totalTime: performance.now() - startTime,
|
|
936
|
+
finishReason: "error",
|
|
937
|
+
provider: "local",
|
|
938
|
+
cached: false
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
}
|
|
942
|
+
/**
|
|
943
|
+
* Generate structured JSON output
|
|
944
|
+
*/
|
|
945
|
+
async json(prompt, options) {
|
|
946
|
+
const { schema, retries = 3, temperature = .3 } = options;
|
|
947
|
+
const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.
|
|
948
|
+
No explanations, no markdown, no code blocks. Just pure JSON.
|
|
949
|
+
The JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;
|
|
950
|
+
for (let attempt = 0; attempt < retries; attempt += 1) {
|
|
951
|
+
const result = await this.generate(prompt, {
|
|
952
|
+
system: options.system || systemPrompt,
|
|
953
|
+
temperature,
|
|
954
|
+
maxTokens: 1e3
|
|
955
|
+
});
|
|
956
|
+
try {
|
|
957
|
+
const jsonStr = extractJson(result.text);
|
|
958
|
+
const parsed = JSON.parse(jsonStr);
|
|
959
|
+
return schema.parse(parsed);
|
|
960
|
+
} catch (error) {
|
|
961
|
+
if (attempt === retries - 1) throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
throw new Error("Failed to generate valid JSON");
|
|
965
|
+
}
|
|
966
|
+
/**
|
|
967
|
+
* Generate embeddings
|
|
968
|
+
*/
|
|
969
|
+
async embed(text, options = {}) {
|
|
970
|
+
if (!this.embedder) this.embedder = await pipeline$1("feature-extraction", options.model || "Xenova/all-MiniLM-L6-v2");
|
|
971
|
+
const startTime = performance.now();
|
|
972
|
+
const output = await this.embedder(text, {
|
|
973
|
+
pooling: "mean",
|
|
974
|
+
normalize: options.normalize !== false
|
|
975
|
+
});
|
|
976
|
+
return {
|
|
977
|
+
vector: Array.from(output.data),
|
|
978
|
+
text,
|
|
979
|
+
totalTime: performance.now() - startTime
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
/**
|
|
983
|
+
* Generate embeddings for multiple texts
|
|
984
|
+
*/
|
|
985
|
+
async embedBatch(texts, options = {}) {
|
|
986
|
+
const results = [];
|
|
987
|
+
for (const text of texts) results.push(await this.embed(text, options));
|
|
988
|
+
return results;
|
|
989
|
+
}
|
|
990
|
+
/**
|
|
991
|
+
* Get session stats
|
|
992
|
+
*/
|
|
993
|
+
getStats() {
|
|
994
|
+
return { ...this.stats };
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Get system info
|
|
998
|
+
*/
|
|
999
|
+
getInfo() {
|
|
1000
|
+
return {
|
|
1001
|
+
version: "1.0.0",
|
|
1002
|
+
model: this.modelConfig,
|
|
1003
|
+
device: {
|
|
1004
|
+
backend: "transformers.js",
|
|
1005
|
+
gpu: null,
|
|
1006
|
+
vram: null,
|
|
1007
|
+
status: this.isLoaded() ? "ready" : "loading"
|
|
1008
|
+
},
|
|
1009
|
+
context: {
|
|
1010
|
+
max: this.modelConfig?.contextLength || 0,
|
|
1011
|
+
used: 0,
|
|
1012
|
+
available: this.modelConfig?.contextLength || 0
|
|
1013
|
+
},
|
|
1014
|
+
cache: {
|
|
1015
|
+
location: "~/.gerbil/models",
|
|
1016
|
+
size: "0 MB",
|
|
1017
|
+
modelCount: 0
|
|
1018
|
+
}
|
|
1019
|
+
};
|
|
1020
|
+
}
|
|
1021
|
+
/**
|
|
1022
|
+
* Reset stats
|
|
1023
|
+
*/
|
|
1024
|
+
resetStats() {
|
|
1025
|
+
this.stats = {
|
|
1026
|
+
prompts: 0,
|
|
1027
|
+
tokensIn: 0,
|
|
1028
|
+
tokensOut: 0,
|
|
1029
|
+
avgSpeed: 0,
|
|
1030
|
+
totalTime: 0,
|
|
1031
|
+
cacheHits: 0,
|
|
1032
|
+
cacheMisses: 0
|
|
1033
|
+
};
|
|
1034
|
+
}
|
|
1035
|
+
tts = null;
|
|
1036
|
+
ttsModelId = "kokoro-82m";
|
|
1037
|
+
/**
|
|
1038
|
+
* Load TTS model for text-to-speech synthesis
|
|
1039
|
+
*
|
|
1040
|
+
* @example
|
|
1041
|
+
* ```ts
|
|
1042
|
+
* // Load default (Kokoro)
|
|
1043
|
+
* await g.loadTTS({ onProgress: (p) => console.log(p.status) });
|
|
1044
|
+
*
|
|
1045
|
+
* // Load Supertonic (faster, 44kHz output)
|
|
1046
|
+
* await g.loadTTS({ model: "supertonic-66m" });
|
|
1047
|
+
*
|
|
1048
|
+
* const result = await g.speak("Hello world");
|
|
1049
|
+
* // result.audio = Float32Array, result.sampleRate = 24000 or 44100
|
|
1050
|
+
* ```
|
|
1051
|
+
*/
|
|
1052
|
+
async loadTTS(options = {}) {
|
|
1053
|
+
const modelId = options.model || "kokoro-82m";
|
|
1054
|
+
if (this.tts && this.ttsModelId !== modelId) {
|
|
1055
|
+
await this.tts.dispose();
|
|
1056
|
+
this.tts = null;
|
|
1057
|
+
}
|
|
1058
|
+
if (this.tts?.isLoaded()) return;
|
|
1059
|
+
this.ttsModelId = modelId;
|
|
1060
|
+
const { createTTS } = await import("./tts-5yWeP_I0.mjs");
|
|
1061
|
+
if (!this.tts) this.tts = createTTS(modelId);
|
|
1062
|
+
await this.tts.load(options);
|
|
1063
|
+
}
|
|
1064
|
+
/**
|
|
1065
|
+
* Ensure TTS model is loaded (lazy loading)
|
|
1066
|
+
*/
|
|
1067
|
+
async ensureTTSLoaded(options) {
|
|
1068
|
+
if (!this.tts?.isLoaded()) await this.loadTTS(options);
|
|
1069
|
+
}
|
|
1070
|
+
/**
|
|
1071
|
+
* Generate speech from text
|
|
1072
|
+
*
|
|
1073
|
+
* @example
|
|
1074
|
+
* ```ts
|
|
1075
|
+
* const result = await g.speak("Hello world", { voice: "af_bella" });
|
|
1076
|
+
* // result.audio = Float32Array PCM
|
|
1077
|
+
* // result.sampleRate = 24000
|
|
1078
|
+
* // result.duration = seconds
|
|
1079
|
+
* ```
|
|
1080
|
+
*/
|
|
1081
|
+
async speak(text, options = {}) {
|
|
1082
|
+
await this.ensureTTSLoaded({ onProgress: options.onProgress });
|
|
1083
|
+
return this.tts.speak(text, options);
|
|
1084
|
+
}
|
|
1085
|
+
/**
|
|
1086
|
+
* Stream speech generation (yields audio chunks as they're generated)
|
|
1087
|
+
*
|
|
1088
|
+
* @example
|
|
1089
|
+
* ```ts
|
|
1090
|
+
* for await (const chunk of g.speakStream("Long text...")) {
|
|
1091
|
+
* // chunk.samples = Float32Array
|
|
1092
|
+
* // chunk.isFinal = boolean
|
|
1093
|
+
* playChunk(chunk);
|
|
1094
|
+
* }
|
|
1095
|
+
* ```
|
|
1096
|
+
*/
|
|
1097
|
+
async *speakStream(text, options = {}) {
|
|
1098
|
+
await this.ensureTTSLoaded({ onProgress: options.onProgress });
|
|
1099
|
+
return yield* this.tts.speakStream(text, options);
|
|
1100
|
+
}
|
|
1101
|
+
/**
|
|
1102
|
+
* Get list of available TTS voices
|
|
1103
|
+
*/
|
|
1104
|
+
listVoices() {
|
|
1105
|
+
if (!this.tts) return KOKORO_VOICES_DEFAULT;
|
|
1106
|
+
return this.tts.listVoices();
|
|
1107
|
+
}
|
|
1108
|
+
/**
|
|
1109
|
+
* Check if TTS model is loaded
|
|
1110
|
+
*/
|
|
1111
|
+
isTTSLoaded() {
|
|
1112
|
+
return this.tts?.isLoaded() ?? false;
|
|
1113
|
+
}
|
|
1114
|
+
/**
|
|
1115
|
+
* Get current TTS model info
|
|
1116
|
+
*/
|
|
1117
|
+
getTTSModelInfo() {
|
|
1118
|
+
if (!this.tts) return null;
|
|
1119
|
+
return {
|
|
1120
|
+
id: this.ttsModelId,
|
|
1121
|
+
loaded: this.tts.isLoaded(),
|
|
1122
|
+
device: this.tts.isLoaded() ? this.tts.getDeviceMode() : void 0
|
|
1123
|
+
};
|
|
1124
|
+
}
|
|
1125
|
+
/**
|
|
1126
|
+
* List available TTS models
|
|
1127
|
+
*/
|
|
1128
|
+
async listTTSModels() {
|
|
1129
|
+
const { TTS_MODELS } = await import("./tts-5yWeP_I0.mjs");
|
|
1130
|
+
return Object.values(TTS_MODELS).map((m) => ({
|
|
1131
|
+
id: m.id,
|
|
1132
|
+
description: m.description,
|
|
1133
|
+
sampleRate: m.sampleRate,
|
|
1134
|
+
voiceCount: m.voices.length
|
|
1135
|
+
}));
|
|
1136
|
+
}
|
|
1137
|
+
stt = null;
|
|
1138
|
+
/**
|
|
1139
|
+
* Load STT model for speech-to-text transcription
|
|
1140
|
+
*
|
|
1141
|
+
* @example
|
|
1142
|
+
* ```ts
|
|
1143
|
+
* await g.loadSTT({
|
|
1144
|
+
* onProgress: (p) => console.log(p.status)
|
|
1145
|
+
* });
|
|
1146
|
+
*
|
|
1147
|
+
* const result = await g.transcribe(audioData);
|
|
1148
|
+
* console.log(result.text);
|
|
1149
|
+
* ```
|
|
1150
|
+
*/
|
|
1151
|
+
async loadSTT(modelId, options = {}) {
|
|
1152
|
+
if (this.stt?.isLoaded()) return;
|
|
1153
|
+
const { WhisperSTT } = await import("./stt-KzSoNvwI.mjs");
|
|
1154
|
+
if (!this.stt) this.stt = new WhisperSTT(modelId);
|
|
1155
|
+
await this.stt.load(options);
|
|
1156
|
+
}
|
|
1157
|
+
/**
|
|
1158
|
+
* Ensure STT model is loaded (lazy loading)
|
|
1159
|
+
*/
|
|
1160
|
+
async ensureSTTLoaded(modelId, options) {
|
|
1161
|
+
if (!this.stt?.isLoaded()) await this.loadSTT(modelId, options);
|
|
1162
|
+
}
|
|
1163
|
+
/**
|
|
1164
|
+
* Transcribe audio to text
|
|
1165
|
+
*
|
|
1166
|
+
* @param audio - Audio data as Float32Array (16kHz mono) or Uint8Array (WAV file)
|
|
1167
|
+
* @param options - Transcription options
|
|
1168
|
+
*
|
|
1169
|
+
* @example
|
|
1170
|
+
* ```ts
|
|
1171
|
+
* // From Float32Array (16kHz mono)
|
|
1172
|
+
* const result = await g.transcribe(audioData);
|
|
1173
|
+
* console.log(result.text);
|
|
1174
|
+
*
|
|
1175
|
+
* // With timestamps
|
|
1176
|
+
* const result = await g.transcribe(audioData, { timestamps: true });
|
|
1177
|
+
* for (const seg of result.segments) {
|
|
1178
|
+
* console.log(`[${seg.start}s] ${seg.text}`);
|
|
1179
|
+
* }
|
|
1180
|
+
*
|
|
1181
|
+
* // From WAV file
|
|
1182
|
+
* const wavData = fs.readFileSync("audio.wav");
|
|
1183
|
+
* const result = await g.transcribe(new Uint8Array(wavData));
|
|
1184
|
+
* ```
|
|
1185
|
+
*/
|
|
1186
|
+
async transcribe(audio, options = {}) {
|
|
1187
|
+
await this.ensureSTTLoaded(void 0, { onProgress: options.onProgress });
|
|
1188
|
+
return this.stt.transcribe(audio, options);
|
|
1189
|
+
}
|
|
1190
|
+
/**
|
|
1191
|
+
* Create a streaming transcription session
|
|
1192
|
+
*
|
|
1193
|
+
* Transcribes audio in real-time by processing chunks at regular intervals.
|
|
1194
|
+
* Perfect for live captioning, call transcription, or real-time subtitles.
|
|
1195
|
+
*
|
|
1196
|
+
* @param options - Streaming options
|
|
1197
|
+
* @returns Streaming session controller
|
|
1198
|
+
*
|
|
1199
|
+
* @example
|
|
1200
|
+
* ```ts
|
|
1201
|
+
* const session = await g.createStreamingTranscription({
|
|
1202
|
+
* chunkDuration: 3000, // Transcribe every 3 seconds
|
|
1203
|
+
* onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
|
|
1204
|
+
* onTranscript: (fullText) => console.log("Full:", fullText),
|
|
1205
|
+
* });
|
|
1206
|
+
*
|
|
1207
|
+
* // Feed audio data as it comes in
|
|
1208
|
+
* session.feedAudio(audioChunk);
|
|
1209
|
+
*
|
|
1210
|
+
* // Start automatic interval-based transcription
|
|
1211
|
+
* session.start();
|
|
1212
|
+
*
|
|
1213
|
+
* // Later, stop and get final transcript
|
|
1214
|
+
* const finalText = await session.stop();
|
|
1215
|
+
* ```
|
|
1216
|
+
*/
|
|
1217
|
+
async createStreamingTranscription(options = {}) {
|
|
1218
|
+
await this.ensureSTTLoaded();
|
|
1219
|
+
return this.stt.createStreamingSession(options);
|
|
1220
|
+
}
|
|
1221
|
+
/**
|
|
1222
|
+
* Get list of available STT models
|
|
1223
|
+
*/
|
|
1224
|
+
async listSTTModels() {
|
|
1225
|
+
const { WhisperSTT } = await import("./stt-KzSoNvwI.mjs");
|
|
1226
|
+
return WhisperSTT.listModels();
|
|
1227
|
+
}
|
|
1228
|
+
/**
|
|
1229
|
+
* Check if STT model is loaded
|
|
1230
|
+
*/
|
|
1231
|
+
isSTTLoaded() {
|
|
1232
|
+
return this.stt?.isLoaded() ?? false;
|
|
1233
|
+
}
|
|
1234
|
+
/**
|
|
1235
|
+
* Get current STT model info
|
|
1236
|
+
*/
|
|
1237
|
+
getSTTModelInfo() {
|
|
1238
|
+
if (!this.stt) return null;
|
|
1239
|
+
return {
|
|
1240
|
+
id: this.stt.getModelInfo().id,
|
|
1241
|
+
loaded: this.stt.isLoaded(),
|
|
1242
|
+
device: this.stt.isLoaded() ? this.stt.getDeviceMode() : void 0
|
|
1243
|
+
};
|
|
1244
|
+
}
|
|
1245
|
+
/**
|
|
1246
|
+
* Record audio from microphone and transcribe
|
|
1247
|
+
*
|
|
1248
|
+
* @example
|
|
1249
|
+
* ```ts
|
|
1250
|
+
* // Record for 5 seconds and transcribe
|
|
1251
|
+
* const result = await g.listen(5000);
|
|
1252
|
+
* console.log(result.text);
|
|
1253
|
+
*
|
|
1254
|
+
* // Use with voice chat
|
|
1255
|
+
* const userInput = await g.listen(10000);
|
|
1256
|
+
* const response = await g.generate(userInput.text);
|
|
1257
|
+
* await g.speak(response.text);
|
|
1258
|
+
* ```
|
|
1259
|
+
*/
|
|
1260
|
+
async listen(durationMs = 5e3, options = {}) {
|
|
1261
|
+
const { Microphone, isSoxAvailable } = await import("./microphone-D-6y9aiE.mjs");
|
|
1262
|
+
if (!isSoxAvailable()) throw new Error("Microphone recording requires SoX. Install with:\n macOS: brew install sox\n Ubuntu: sudo apt install sox\n Windows: https://sox.sourceforge.net/");
|
|
1263
|
+
options.onProgress?.("Starting microphone...");
|
|
1264
|
+
const mic = new Microphone({ sampleRate: 16e3 });
|
|
1265
|
+
await mic.start();
|
|
1266
|
+
options.onProgress?.(`Recording for ${(durationMs / 1e3).toFixed(1)}s...`);
|
|
1267
|
+
await new Promise((r) => setTimeout(r, durationMs));
|
|
1268
|
+
options.onProgress?.("Processing audio...");
|
|
1269
|
+
const { audio } = await mic.stop();
|
|
1270
|
+
options.onProgress?.("Transcribing...");
|
|
1271
|
+
return this.transcribe(audio, { onProgress: (p) => options.onProgress?.(p.status || "Transcribing...") });
|
|
1272
|
+
}
|
|
1273
|
+
/**
|
|
1274
|
+
* Check if microphone recording is available
|
|
1275
|
+
*/
|
|
1276
|
+
async isMicrophoneAvailable() {
|
|
1277
|
+
try {
|
|
1278
|
+
const { isSoxAvailable } = await import("./microphone-D-6y9aiE.mjs");
|
|
1279
|
+
return isSoxAvailable();
|
|
1280
|
+
} catch {
|
|
1281
|
+
return false;
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
/**
|
|
1285
|
+
* Dispose of resources
|
|
1286
|
+
* @param disconnect If true, also disconnect from shared browser (for clean script exit)
|
|
1287
|
+
*/
|
|
1288
|
+
async dispose(disconnect = false) {
|
|
1289
|
+
if (this.chromeBackend) {
|
|
1290
|
+
try {
|
|
1291
|
+
await this.chromeBackend.dispose(disconnect);
|
|
1292
|
+
} catch {}
|
|
1293
|
+
this.chromeBackend = null;
|
|
1294
|
+
}
|
|
1295
|
+
if (this.generator) {
|
|
1296
|
+
if (typeof this.generator.dispose === "function") try {
|
|
1297
|
+
await this.generator.dispose();
|
|
1298
|
+
} catch {}
|
|
1299
|
+
this.generator = null;
|
|
1300
|
+
}
|
|
1301
|
+
if (this.embedder) {
|
|
1302
|
+
if (typeof this.embedder.dispose === "function") try {
|
|
1303
|
+
await this.embedder.dispose();
|
|
1304
|
+
} catch {}
|
|
1305
|
+
this.embedder = null;
|
|
1306
|
+
}
|
|
1307
|
+
if (this.visionModel) {
|
|
1308
|
+
if (typeof this.visionModel.dispose === "function") try {
|
|
1309
|
+
await this.visionModel.dispose();
|
|
1310
|
+
} catch {}
|
|
1311
|
+
this.visionModel = null;
|
|
1312
|
+
}
|
|
1313
|
+
if (this.processor) this.processor = null;
|
|
1314
|
+
if (this.tts) {
|
|
1315
|
+
try {
|
|
1316
|
+
await this.tts.dispose();
|
|
1317
|
+
} catch {}
|
|
1318
|
+
this.tts = null;
|
|
1319
|
+
}
|
|
1320
|
+
if (this.stt) {
|
|
1321
|
+
try {
|
|
1322
|
+
this.stt.dispose();
|
|
1323
|
+
} catch {}
|
|
1324
|
+
this.stt = null;
|
|
1325
|
+
}
|
|
1326
|
+
this.currentModel = null;
|
|
1327
|
+
this.modelConfig = null;
|
|
1328
|
+
this.isVisionModel = false;
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* Shutdown the shared Chrome backend completely.
|
|
1332
|
+
* Call this when your script/process is done to ensure proper cleanup.
|
|
1333
|
+
* This closes the shared browser used for WebGPU acceleration.
|
|
1334
|
+
*/
|
|
1335
|
+
static async shutdown() {
|
|
1336
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1337
|
+
await ChromeGPUBackend.closeSharedBrowser();
|
|
1338
|
+
}
|
|
1339
|
+
/**
|
|
1340
|
+
* Get global WebGPU process info (all active backends)
|
|
1341
|
+
* Useful for monitoring and debugging memory leaks
|
|
1342
|
+
*/
|
|
1343
|
+
static async getWebGPUProcesses() {
|
|
1344
|
+
if (typeof window !== "undefined") return null;
|
|
1345
|
+
try {
|
|
1346
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1347
|
+
return {
|
|
1348
|
+
browser: ChromeGPUBackend.getGlobalBrowserStatus(),
|
|
1349
|
+
backends: await ChromeGPUBackend.getAllBackendsInfo()
|
|
1350
|
+
};
|
|
1351
|
+
} catch {
|
|
1352
|
+
return null;
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
/**
|
|
1356
|
+
* Kill all WebGPU processes (for zombie cleanup)
|
|
1357
|
+
* Use this if you suspect memory leaks from undisposed Gerbil instances
|
|
1358
|
+
*/
|
|
1359
|
+
static async killAllWebGPU() {
|
|
1360
|
+
if (typeof window !== "undefined") return null;
|
|
1361
|
+
try {
|
|
1362
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1363
|
+
return await ChromeGPUBackend.killAllBackends();
|
|
1364
|
+
} catch {
|
|
1365
|
+
return null;
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
/**
|
|
1369
|
+
* Kill a specific WebGPU backend by index
|
|
1370
|
+
* @param index Index of the backend to kill (0-based)
|
|
1371
|
+
*/
|
|
1372
|
+
static async killWebGPUBackend(index) {
|
|
1373
|
+
if (typeof window !== "undefined") return false;
|
|
1374
|
+
try {
|
|
1375
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1376
|
+
return await ChromeGPUBackend.killBackendByIndex(index);
|
|
1377
|
+
} catch {
|
|
1378
|
+
return false;
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
/**
|
|
1382
|
+
* Get all Chrome pages across ALL Gerbil processes
|
|
1383
|
+
* This provides cross-process visibility into WebGPU backends
|
|
1384
|
+
*/
|
|
1385
|
+
static async getAllChromePagesInfo() {
|
|
1386
|
+
if (typeof window !== "undefined") return null;
|
|
1387
|
+
try {
|
|
1388
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1389
|
+
return await ChromeGPUBackend.getAllChromePages();
|
|
1390
|
+
} catch {
|
|
1391
|
+
return null;
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
/**
|
|
1395
|
+
* Kill a Chrome page by index (works across processes)
|
|
1396
|
+
* @param index Index of the page to kill (0-based)
|
|
1397
|
+
*/
|
|
1398
|
+
static async killChromePage(index) {
|
|
1399
|
+
if (typeof window !== "undefined") return false;
|
|
1400
|
+
try {
|
|
1401
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1402
|
+
return await ChromeGPUBackend.killPageByIndex(index);
|
|
1403
|
+
} catch {
|
|
1404
|
+
return false;
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
/**
|
|
1408
|
+
* Get total Chrome page count (all processes)
|
|
1409
|
+
*/
|
|
1410
|
+
static async getTotalChromePageCount() {
|
|
1411
|
+
if (typeof window !== "undefined") return 0;
|
|
1412
|
+
try {
|
|
1413
|
+
const { ChromeGPUBackend } = await import("./chrome-backend-JEPeM2YE.mjs");
|
|
1414
|
+
return await ChromeGPUBackend.getTotalPageCount();
|
|
1415
|
+
} catch {
|
|
1416
|
+
return 0;
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
formatPrompt(prompt, options) {
|
|
1420
|
+
const system = options.system || "You are a helpful assistant.";
|
|
1421
|
+
const isQwen = this.currentModel?.includes("qwen");
|
|
1422
|
+
if (options.thinking && this.modelConfig?.supportsThinking) return `<|im_start|>system\n${`${system}\n\nThink step-by-step before answering. Wrap your reasoning in <think></think> tags, then provide your answer.`}<|im_end|>\n<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
|
1423
|
+
if (isQwen) return `<|im_start|>system\n${system}<|im_end|>\n<|im_start|>user\n${prompt} /no_think<|im_end|>\n<|im_start|>assistant\n`;
|
|
1424
|
+
return `<|im_start|>system\n${system}<|im_end|>\n<|im_start|>user\n${prompt}<|im_end|>\n<|im_start|>assistant\n`;
|
|
1425
|
+
}
|
|
1426
|
+
buildMessages(prompt, options) {
|
|
1427
|
+
const system = options.system || "You are a helpful assistant.";
|
|
1428
|
+
const messages = [];
|
|
1429
|
+
messages.push({
|
|
1430
|
+
role: "system",
|
|
1431
|
+
content: system
|
|
1432
|
+
});
|
|
1433
|
+
messages.push({
|
|
1434
|
+
role: "user",
|
|
1435
|
+
content: prompt
|
|
1436
|
+
});
|
|
1437
|
+
return messages;
|
|
1438
|
+
}
|
|
1439
|
+
parseThinking(text) {
|
|
1440
|
+
const match = text.match(/<think>([\s\S]*?)<\/think>/);
|
|
1441
|
+
if (match) return {
|
|
1442
|
+
thinking: match[1].trim(),
|
|
1443
|
+
response: text.replace(/<think>[\s\S]*?<\/think>/, "").trim()
|
|
1444
|
+
};
|
|
1445
|
+
const unclosedMatch = text.match(/<think>([\s\S]*)$/);
|
|
1446
|
+
if (unclosedMatch) {
|
|
1447
|
+
const thinking = unclosedMatch[1].trim();
|
|
1448
|
+
const response = text.replace(/<think>[\s\S]*$/, "").trim();
|
|
1449
|
+
return {
|
|
1450
|
+
thinking: thinking || void 0,
|
|
1451
|
+
response
|
|
1452
|
+
};
|
|
1453
|
+
}
|
|
1454
|
+
return { response: text.replace(/<\/?think>/g, "").trim() };
|
|
1455
|
+
}
|
|
1456
|
+
cleanOutput(text) {
|
|
1457
|
+
return text.replace(/<\|im_end\|>/g, "").replace(/<\|im_start\|>/g, "").replace(/<\|endoftext\|>/g, "").replace(/<\/s>/g, "").replace(/^\/no_think\s*/i, "").replace(/^assistant\s*/i, "").replace(/^\s*\/no_think\s*/gim, "").replace(/^\s*assistant\s*/gim, "").replace(/^(system|user|assistant):\s*/gim, "").trim();
|
|
1458
|
+
}
|
|
1459
|
+
};
|
|
1460
|
+
|
|
1461
|
+
//#endregion
|
|
1462
|
+
export { getGlobalCache as a, configureGlobalCache as i, ResponseCache as n, clearGlobalCache as r, Gerbil as t };
|
|
1463
|
+
//# sourceMappingURL=gerbil-yoSpRHgv.mjs.map
|