npm - @tryhamster/gerbil - Versions diffs - 1.0.0-rc.8 → 1.0.0 - Mend

@tryhamster/gerbil 1.0.0-rc.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/LICENSE +1 -1
package/README.md +247 -84
package/dist/architectures-C1I5V3Dt.mjs +6070 -0
package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
package/dist/browser/index.d.ts +264 -588
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +585 -2334
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +625 -1098
package/dist/cli.mjs.map +1 -1
package/dist/defaults-9komdrbY.mjs +24 -0
package/dist/defaults-9komdrbY.mjs.map +1 -0
package/dist/frameworks/express.d.mts +1 -3
package/dist/frameworks/express.d.mts.map +1 -1
package/dist/frameworks/express.mjs +7 -7
package/dist/frameworks/express.mjs.map +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.d.mts.map +1 -1
package/dist/frameworks/fastify.mjs +3 -3
package/dist/frameworks/fastify.mjs.map +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.d.mts.map +1 -1
package/dist/frameworks/hono.mjs +4 -4
package/dist/frameworks/hono.mjs.map +1 -1
package/dist/frameworks/next.d.mts +3 -2
package/dist/frameworks/next.d.mts.map +1 -1
package/dist/frameworks/next.mjs +4 -4
package/dist/frameworks/next.mjs.map +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.d.mts.map +1 -1
package/dist/frameworks/trpc.mjs +4 -4
package/dist/frameworks/trpc.mjs.map +1 -1
package/dist/gerbil-BHrJJIa4.mjs +1656 -0
package/dist/gerbil-BHrJJIa4.mjs.map +1 -0
package/dist/gerbil-BT9fCydo.d.mts +488 -0
package/dist/gerbil-BT9fCydo.d.mts.map +1 -0
package/dist/gerbil-DomNfIr1.mjs +4 -0
package/dist/gpu/hooks.d.mts +520 -0
package/dist/gpu/hooks.d.mts.map +1 -0
package/dist/gpu/hooks.mjs +1188 -0
package/dist/gpu/hooks.mjs.map +1 -0
package/dist/gpu/index.d.mts +2 -0
package/dist/gpu/index.mjs +6 -0
package/dist/gpu-33qCAtHW.mjs +3615 -0
package/dist/gpu-33qCAtHW.mjs.map +1 -0
package/dist/index-Dgmb2kE3.d.mts +245 -0
package/dist/index-Dgmb2kE3.d.mts.map +1 -0
package/dist/index-jEAL2s-A.d.mts +2022 -0
package/dist/index-jEAL2s-A.d.mts.map +1 -0
package/dist/index.d.mts +22 -487
package/dist/index.d.mts.map +1 -1
package/dist/index.mjs +13 -8
package/dist/index.mjs.map +1 -1
package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
package/dist/integrations/ai-sdk.d.mts +75 -6
package/dist/integrations/ai-sdk.d.mts.map +1 -1
package/dist/integrations/ai-sdk.mjs +131 -15
package/dist/integrations/ai-sdk.mjs.map +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.d.mts.map +1 -1
package/dist/integrations/langchain.mjs +5 -5
package/dist/integrations/langchain.mjs.map +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.d.mts.map +1 -1
package/dist/integrations/llamaindex.mjs +5 -5
package/dist/integrations/llamaindex.mjs.map +1 -1
package/dist/integrations/mcp-client.mjs +3 -3
package/dist/integrations/mcp-client.mjs.map +1 -1
package/dist/integrations/mcp.d.mts +3 -2
package/dist/integrations/mcp.d.mts.map +1 -1
package/dist/integrations/mcp.mjs +5 -5
package/dist/{mcp-BvbriaBy.mjs → mcp-1DaMsaBc.mjs} +4 -4
package/dist/mcp-1DaMsaBc.mjs.map +1 -0
package/dist/memory/index.d.mts +3 -0
package/dist/memory/index.mjs +6 -0
package/dist/memory-D1P7Tmda.mjs +4 -0
package/dist/memory-DVN0MnIG.mjs +132 -0
package/dist/memory-DVN0MnIG.mjs.map +1 -0
package/dist/memory-Dj0J1v88.mjs +294 -0
package/dist/memory-Dj0J1v88.mjs.map +1 -0
package/dist/moonshine-stt-BLyVoRpB.mjs +4 -0
package/dist/moonshine-stt-v_P_Ci_m.mjs +11936 -0
package/dist/moonshine-stt-v_P_Ci_m.mjs.map +1 -0
package/dist/{one-liner-s-lD8rCC.mjs → one-liner-DnQn7HJK.mjs} +14 -16
package/dist/one-liner-DnQn7HJK.mjs.map +1 -0
package/dist/repl-jV5gcJFA.mjs +9 -0
package/dist/skills/index.d.mts +270 -320
package/dist/skills/index.d.mts.map +1 -1
package/dist/skills/index.mjs +5 -5
package/dist/{skills-CD3Orlex.mjs → skills-DX8D59UH.mjs} +187 -32
package/dist/skills-DX8D59UH.mjs.map +1 -0
package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
package/dist/tools-DQ1mPUw5.mjs.map +1 -0
package/dist/{types-CiTc7ez3.d.mts → types-D6FiR_oh.d.mts} +106 -12
package/dist/types-D6FiR_oh.d.mts.map +1 -0
package/dist/types-DQBe2lFo.d.mts +165 -0
package/dist/types-DQBe2lFo.d.mts.map +1 -0
package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
package/dist/vector-B0panuy6.mjs +95 -0
package/dist/vector-B0panuy6.mjs.map +1 -0
package/docs/PROJECT-STATE.md +321 -0
package/docs/adding-a-model-family.md +280 -0
package/docs/ai-sdk.md +70 -61
package/docs/architecture/overview.md +17 -7
package/docs/browser.md +203 -8
package/docs/embeddings.md +156 -0
package/docs/gerbil-site-native-migration.md +217 -0
package/docs/gpu-engine/architectures.md +398 -0
package/docs/gpu-engine/ir.md +372 -0
package/docs/gpu-engine/kernels.md +718 -0
package/docs/gpu-engine/paper.html +1759 -0
package/docs/gpu-engine/paper.md +2109 -0
package/docs/gpu-engine/safetensors.md +312 -0
package/docs/gpu-engine/tokenizer.md +302 -0
package/docs/memory-rag.md +91 -0
package/docs/metal-safari-intel.md +190 -0
package/docs/mobile-failure-diagnosis.md +124 -0
package/docs/mobile.md +99 -0
package/docs/observability.md +230 -0
package/docs/onnx-removal-plan.md +339 -0
package/docs/research/autoresearch-portable.md +904 -0
package/docs/research/dispatch-reduction-hivemind.md +84 -0
package/docs/research/ios-safari-model-caching.md +117 -0
package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
package/docs/research/native-stt-model-selection.md +49 -0
package/docs/research/native-tts-model-selection.md +90 -0
package/docs/research/native-vs-chromium-decision.md +152 -0
package/docs/research/nemotron-mamba2-inference.md +910 -0
package/docs/research/qwen35-multimodal.md +293 -0
package/docs/research/qwen36-gemma4-targets.md +337 -0
package/docs/research/sota-embedding-models.md +179 -0
package/docs/research/sota-mobile-models-2026.md +263 -0
package/docs/research/sota-modality-models.md +202 -0
package/docs/research/tps-baselines.md +71 -0
package/docs/research/webgpu-m4-reference.md +104 -0
package/docs/site-update-plan.md +155 -0
package/docs/structured-output.md +123 -0
package/docs/stt.md +63 -446
package/docs/tts.md +77 -499
package/docs/vision.md +100 -338
package/package.json +22 -7
package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
package/dist/gerbil-CJ3ifloF.mjs +0 -4
package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
package/dist/gerbil-qOTe1nl2.d.mts +0 -431
package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
package/dist/kokoro-BNTb6egA.mjs +0 -20210
package/dist/kokoro-BNTb6egA.mjs.map +0 -1
package/dist/kokoro-DFRQ1OeM.js +0 -20212
package/dist/kokoro-DFRQ1OeM.js.map +0 -1
package/dist/mcp-BvbriaBy.mjs.map +0 -1
package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
package/dist/repl-DveXw36T.mjs +0 -9
package/dist/skills-CD3Orlex.mjs.map +0 -1
package/dist/stt-CpLYbGFd.mjs +0 -433
package/dist/stt-CpLYbGFd.mjs.map +0 -1
package/dist/stt-DRPLEEHB.mjs +0 -3
package/dist/stt-Te8Qz-Ay.js +0 -433
package/dist/stt-Te8Qz-Ay.js.map +0 -1
package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
package/dist/transformers.web-DokyH3rP.js +0 -3
package/dist/transformers.web-M6mCnEYJ.js +0 -30382
package/dist/transformers.web-M6mCnEYJ.js.map +0 -1
package/dist/tts-C0xx3CtE.js +0 -724
package/dist/tts-C0xx3CtE.js.map +0 -1
package/dist/tts-DXgsKGCe.mjs +0 -3
package/dist/tts-DeGANMNV.mjs +0 -730
package/dist/tts-DeGANMNV.mjs.map +0 -1
package/dist/types-CiTc7ez3.d.mts.map +0 -1
/package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
/package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
/package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0

package/dist/gerbil-BHrJJIa4.mjs ADDED Viewed

@@ -0,0 +1,1656 @@
+import { n as zodToJsonSchema, t as extractJson } from "./utils-DKO55ZmZ.mjs";
+import { existsSync } from "node:fs";
+import os from "node:os";
+import path from "node:path";
+import zlib from "node:zlib";
+import PQueue from "p-queue";
+//#region src/core/cache.ts
+/**
+* Generate a deterministic cache key from prompt and options.
+* Key includes all parameters that affect the output.
+*/
+function generateCacheKey(prompt, modelId, options) {
+	const keyParts = [
+		prompt,
+		modelId,
+		options.maxTokens ?? 256,
+		options.temperature ?? .7,
+		options.topP ?? .9,
+		options.topK ?? 50,
+		options.system ?? "",
+		options.thinking ?? false
+	];
+	const str = JSON.stringify(keyParts);
+	let hash = 0;
+	for (let i = 0; i < str.length; i++) {
+		const char = str.charCodeAt(i);
+		hash = (hash << 5) - hash + char;
+		hash = hash & hash;
+	}
+	return `gerbil:${hash.toString(16)}`;
+}
+/**
+* LRU cache with TTL expiration for inference responses.
+*/
+var ResponseCache = class {
+	cache = /* @__PURE__ */ new Map();
+	maxSize;
+	defaultTtl;
+	hits = 0;
+	misses = 0;
+	/**
+	* Create a new response cache.
+	* @param maxSize Maximum number of entries (default: 100)
+	* @param defaultTtl Default TTL in ms (default: 5 minutes)
+	*/
+	constructor(maxSize = 100, defaultTtl = 300 * 1e3) {
+		this.maxSize = maxSize;
+		this.defaultTtl = defaultTtl;
+	}
+	/**
+	* Get a cached response if it exists and hasn't expired.
+	*/
+	get(key) {
+		const entry = this.cache.get(key);
+		if (!entry) {
+			this.misses++;
+			return null;
+		}
+		if (Date.now() - entry.createdAt > entry.ttl) {
+			this.cache.delete(key);
+			this.misses++;
+			return null;
+		}
+		this.cache.delete(key);
+		this.cache.set(key, entry);
+		this.hits++;
+		return {
+			...entry.result,
+			cached: true
+		};
+	}
+	/**
+	* Store a response in the cache.
+	*/
+	set(key, result, ttl) {
+		while (this.cache.size >= this.maxSize) {
+			const firstKey = this.cache.keys().next().value;
+			if (firstKey) this.cache.delete(firstKey);
+		}
+		this.cache.set(key, {
+			result,
+			createdAt: Date.now(),
+			ttl: ttl ?? this.defaultTtl
+		});
+	}
+	/**
+	* Check if a key exists and is not expired.
+	*/
+	has(key) {
+		const entry = this.cache.get(key);
+		if (!entry) return false;
+		if (Date.now() - entry.createdAt > entry.ttl) {
+			this.cache.delete(key);
+			return false;
+		}
+		return true;
+	}
+	/**
+	* Remove a specific key from the cache.
+	*/
+	delete(key) {
+		return this.cache.delete(key);
+	}
+	/**
+	* Clear all entries from the cache.
+	*/
+	clear() {
+		this.cache.clear();
+		this.hits = 0;
+		this.misses = 0;
+	}
+	/**
+	* Remove all expired entries.
+	*/
+	prune() {
+		const now = Date.now();
+		let pruned = 0;
+		for (const [key, entry] of this.cache) if (now - entry.createdAt > entry.ttl) {
+			this.cache.delete(key);
+			pruned++;
+		}
+		return pruned;
+	}
+	/**
+	* Get cache statistics.
+	*/
+	getStats() {
+		return {
+			hits: this.hits,
+			misses: this.misses,
+			size: this.cache.size,
+			maxSize: this.maxSize
+		};
+	}
+	/**
+	* Get hit rate as a percentage.
+	*/
+	getHitRate() {
+		const total = this.hits + this.misses;
+		if (total === 0) return 0;
+		return this.hits / total * 100;
+	}
+};
+let globalCache = null;
+/**
+* Get the global response cache instance.
+* Creates one if it doesn't exist.
+*/
+function getGlobalCache() {
+	if (!globalCache) globalCache = new ResponseCache();
+	return globalCache;
+}
+/**
+* Configure the global cache with custom settings.
+*/
+function configureGlobalCache(maxSize, defaultTtl) {
+	globalCache = new ResponseCache(maxSize, defaultTtl);
+	return globalCache;
+}
+/**
+* Clear and reset the global cache.
+*/
+function clearGlobalCache() {
+	if (globalCache) globalCache.clear();
+}
+//#endregion
+//#region src/core/models.ts
+/**
+* The default model used everywhere a model id is not explicitly provided
+* (CLI flags, REPL, framework adapters, integrations, one-liner). This is the
+* e2e-validated model; reference this constant instead of hard-coding the id.
+*/
+const DEFAULT_MODEL = "qwen3.5-0.8b";
+const BUILTIN_MODELS = {
+	"qwen3.5-0.8b": {
+		id: "qwen3.5-0.8b",
+		repo: "Qwen/Qwen3.5-0.8B",
+		description: "Qwen3.5 0.8B - Fast, multimodal (vision), 262k context, supports thinking (default)",
+		size: "~1.6GB",
+		contextLength: 262144,
+		supportsThinking: true,
+		supportsJson: true,
+		supportsVision: true,
+		family: "qwen"
+	},
+	"qwen3.5-2b": {
+		id: "qwen3.5-2b",
+		repo: "Qwen/Qwen3.5-2B",
+		description: "Qwen3.5 2B - Higher quality, multimodal (vision), 262k context, supports thinking",
+		size: "~4GB",
+		contextLength: 262144,
+		supportsThinking: true,
+		supportsJson: true,
+		supportsVision: true,
+		family: "qwen"
+	},
+	"lfm2.5-1.2b-thinking": {
+		id: "lfm2.5-1.2b-thinking",
+		repo: "LiquidAI/LFM2.5-1.2B-Thinking",
+		description: "LFM2.5 1.2B Thinking - Efficient reasoning model, 128k context",
+		size: "~2.4GB",
+		contextLength: 128e3,
+		supportsThinking: true,
+		supportsJson: false,
+		family: "other"
+	}
+};
+/**
+* Parse model identifier and resolve to source
+*
+* Supported formats:
+* - "qwen3.5-0.8b" (built-in)
+* - "hf:org/model" (HuggingFace shorthand)
+* - "https://huggingface.co/org/model" (full URL)
+* - "file:./path/to/model" (local path)
+*/
+function resolveModel(modelId) {
+	if (BUILTIN_MODELS[modelId]) return {
+		type: "builtin",
+		path: BUILTIN_MODELS[modelId].repo
+	};
+	if (modelId.startsWith("hf:")) return {
+		type: "huggingface",
+		path: modelId.slice(3)
+	};
+	if (modelId.startsWith("https://huggingface.co/")) return {
+		type: "huggingface",
+		path: modelId.replace("https://huggingface.co/", "")
+	};
+	if (modelId.startsWith("file:")) return {
+		type: "local",
+		path: modelId.slice(5)
+	};
+	if (modelId.includes("/")) return {
+		type: "huggingface",
+		path: modelId
+	};
+	return {
+		type: "huggingface",
+		path: modelId
+	};
+}
+/**
+* Get model config (built-in only)
+*/
+function getModelConfig(modelId) {
+	return BUILTIN_MODELS[modelId] || null;
+}
+const FAMILY_CONTEXT_DEFAULTS = {
+	qwen: 32768,
+	other: 32768
+};
+/**
+* Create model config for an external HuggingFace model.
+*
+* Inference is restricted to families the engine can actually run — Qwen
+* (Qwen2/Qwen3/Qwen3.5) and LFM2 (Liquid). Everything else is left as "other"
+* with conservative capability flags so the REPL doesn't advertise features the
+* engine can't deliver.
+*/
+function createExternalModelConfig(modelId, repo, contextLength) {
+	const repoLower = repo.toLowerCase();
+	let family = "other";
+	if (repoLower.includes("qwen")) family = "qwen";
+	const isLiquid = repoLower.includes("lfm") || repoLower.includes("liquid");
+	const isQwen = family === "qwen";
+	return {
+		id: modelId,
+		repo,
+		description: `External model: ${repo}`,
+		size: "Unknown",
+		contextLength: contextLength || FAMILY_CONTEXT_DEFAULTS[family] || 32768,
+		supportsThinking: isQwen || isLiquid,
+		supportsJson: isQwen,
+		family
+	};
+}
+/**
+* Fetch context length from HuggingFace model config
+*/
+async function fetchModelContextLength(repo) {
+	try {
+		const res = await fetch(`https://huggingface.co/${repo}/raw/main/config.json`);
+		if (!res.ok) return null;
+		const config = await res.json();
+		return config.max_position_embeddings || config.n_positions || config.max_seq_len || config.sliding_window || config.context_length || null;
+	} catch {
+		return null;
+	}
+}
+/**
+* List all built-in models
+*/
+function listBuiltinModels() {
+	return Object.values(BUILTIN_MODELS);
+}
+//#endregion
+//#region src/core/gerbil.ts
+/**
+* Gerbil - Local GPU-accelerated LLM inference
+*/
+/**
+* Minimal PNG decoder: 8-bit, non-interlaced, color type 2 (RGB) or 6 (RGBA).
+* Returns packed RGB pixels for the native vision encoder. Replaces the
+* transformers.js RawImage decoder for the common PNG case.
+*/
+function decodePng(buf) {
+	const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
+	const w = view.getUint32(16);
+	const h = view.getUint32(20);
+	const bitDepth = buf[24];
+	const colorType = buf[25];
+	if (bitDepth !== 8 || colorType !== 2 && colorType !== 6) throw new Error(`Unsupported PNG (bitDepth=${bitDepth} colorType=${colorType}); need 8-bit RGB/RGBA`);
+	const channels = colorType === 6 ? 4 : 3;
+	let off = 8;
+	const idat = [];
+	while (off < buf.length) {
+		const len = view.getUint32(off);
+		const type = String.fromCharCode(buf[off + 4], buf[off + 5], buf[off + 6], buf[off + 7]);
+		if (type === "IDAT") idat.push(buf.subarray(off + 8, off + 8 + len));
+		off += 12 + len;
+		if (type === "IEND") break;
+	}
+	const raw = zlib.inflateSync(Buffer.concat(idat));
+	const stride = w * channels;
+	const out = new Uint8Array(w * h * 3);
+	const line = new Uint8Array(stride);
+	const prev = new Uint8Array(stride);
+	let p = 0;
+	for (let y = 0; y < h; y += 1) {
+		const filter = raw[p];
+		p += 1;
+		for (let i = 0; i < stride; i += 1) {
+			const x = raw[p];
+			p += 1;
+			const a = i >= channels ? line[i - channels] : 0;
+			const b = prev[i];
+			const c = i >= channels ? prev[i - channels] : 0;
+			let v;
+			switch (filter) {
+				case 0:
+					v = x;
+					break;
+				case 1:
+					v = x + a;
+					break;
+				case 2:
+					v = x + b;
+					break;
+				case 3:
+					v = x + (a + b >> 1);
+					break;
+				case 4: {
+					const pp = a + b - c;
+					const pa = Math.abs(pp - a);
+					const pb = Math.abs(pp - b);
+					const pc = Math.abs(pp - c);
+					let pred = c;
+					if (pa <= pb && pa <= pc) pred = a;
+					else if (pb <= pc) pred = b;
+					v = x + pred;
+					break;
+				}
+				default: throw new Error(`bad PNG filter ${filter}`);
+			}
+			line[i] = v & 255;
+		}
+		for (let x = 0; x < w; x += 1) {
+			out[(y * w + x) * 3 + 0] = line[x * channels + 0];
+			out[(y * w + x) * 3 + 1] = line[x * channels + 1];
+			out[(y * w + x) * 3 + 2] = line[x * channels + 2];
+		}
+		prev.set(line);
+	}
+	return {
+		pixels: out,
+		width: w,
+		height: h
+	};
+}
+const KOKORO_VOICES_DEFAULT = [
+	{
+		id: "af_bella",
+		name: "Bella",
+		gender: "female",
+		language: "en-us",
+		description: "American female, warm and friendly"
+	},
+	{
+		id: "af_sarah",
+		name: "Sarah",
+		gender: "female",
+		language: "en-us",
+		description: "American female, clear and professional"
+	},
+	{
+		id: "af_nicole",
+		name: "Nicole",
+		gender: "female",
+		language: "en-us",
+		description: "American female, soft and gentle"
+	},
+	{
+		id: "af_sky",
+		name: "Sky",
+		gender: "female",
+		language: "en-us",
+		description: "American female, young and energetic"
+	},
+	{
+		id: "am_adam",
+		name: "Adam",
+		gender: "male",
+		language: "en-us",
+		description: "American male, deep and confident"
+	},
+	{
+		id: "am_michael",
+		name: "Michael",
+		gender: "male",
+		language: "en-us",
+		description: "American male, warm and friendly"
+	},
+	{
+		id: "bf_emma",
+		name: "Emma",
+		gender: "female",
+		language: "en-gb",
+		description: "British female, elegant and clear"
+	},
+	{
+		id: "bf_isabella",
+		name: "Isabella",
+		gender: "female",
+		language: "en-gb",
+		description: "British female, sophisticated"
+	},
+	{
+		id: "bm_george",
+		name: "George",
+		gender: "male",
+		language: "en-gb",
+		description: "British male, distinguished"
+	},
+	{
+		id: "bm_lewis",
+		name: "Lewis",
+		gender: "male",
+		language: "en-gb",
+		description: "British male, friendly and warm"
+	}
+];
+var Gerbil = class {
+	currentModel = null;
+	modelConfig = null;
+	config;
+	stats;
+	_deviceMode = "cpu";
+	webgpuEngine = null;
+	nativeEmbedEngine = null;
+	nativeEmbedRepo = null;
+	nativeSTT = null;
+	nativeTTSEngine = null;
+	isVisionModel = false;
+	queue;
+	telemetry;
+	constructor(config = {}) {
+		this.config = config;
+		this.stats = {
+			prompts: 0,
+			tokensIn: 0,
+			tokensOut: 0,
+			avgSpeed: 0,
+			totalTime: 0,
+			cacheHits: 0,
+			cacheMisses: 0
+		};
+		const concurrency = config.concurrency || {};
+		this.queue = new PQueue({
+			concurrency: concurrency.maxConcurrent ?? 1,
+			timeout: concurrency.timeout ?? 3e5
+		});
+		this.telemetry = config.telemetry || {};
+	}
+	reportError(error, context) {
+		try {
+			this.telemetry.onError?.(error, context);
+		} catch {}
+	}
+	/**
+	* Whether the native (src/gpu) WebGPU engine should be used for a capability
+	* (embed / transcribe / speak / vision). The native WebGPU engine is the only
+	* inference backend, so this is always true; kept as a seam for callers.
+	*/
+	preferNative() {
+		return true;
+	}
+	static listModels() {
+		return Object.values(BUILTIN_MODELS);
+	}
+	static getModel(modelId) {
+		return BUILTIN_MODELS[modelId];
+	}
+	/**
+	* Load a model
+	*
+	* @example
+	* ```ts
+	* // Built-in model
+	* await g.loadModel("qwen3.5-0.8b");
+	*
+	* // HuggingFace model
+	* await g.loadModel("hf:microsoft/Phi-3-mini");
+	*
+	* // Local model
+	* await g.loadModel("file:./models/my-model");
+	*
+	* // Vision model
+	* await g.loadModel("ministral-3b");
+	* ```
+	*/
+	async loadModel(modelId = DEFAULT_MODEL, options = {}) {
+		const loadStartTime = performance.now();
+		if (this.isLoaded()) await this.dispose();
+		const source = resolveModel(modelId);
+		const { onProgress, device = "auto", dtype: userDtype } = options;
+		let config = getModelConfig(modelId);
+		if (!config) {
+			const contextLength = await fetchModelContextLength(source.path).catch(() => null);
+			config = createExternalModelConfig(modelId, source.path, contextLength || void 0);
+		}
+		if (config.supportsVision) return this.loadVisionModel(modelId, source.path, config, options);
+		if (device === "cpu" || device === "gpu") throw new Error("Gerbil requires WebGPU. CPU/WASM and the legacy ONNX backend have been removed; use device \"webgpu\" or \"auto\".");
+		onProgress?.({ status: `Loading ${modelId}...` });
+		try {
+			onProgress?.({ status: "Initializing WebGPU engine..." });
+			const { WebGPUEngine } = await import("./gpu/index.mjs");
+			let hfRepo = source.path;
+			if (hfRepo.includes("onnx-community/") || hfRepo.includes("-ONNX")) hfRepo = {
+				"onnx-community/Qwen3.5-0.8B-ONNX": "Qwen/Qwen3.5-0.8B",
+				"onnx-community/Qwen3-0.6B-ONNX": "Qwen/Qwen3-0.6B",
+				"onnx-community/Qwen3-1.7B-ONNX": "Qwen/Qwen3-1.7B",
+				"onnx-community/Qwen3.5-2B-ONNX": "Qwen/Qwen3.5-2B"
+			}[hfRepo] || hfRepo;
+			const gpuDtype = userDtype === "q4" ? "q4" : void 0;
+			this.webgpuEngine = await WebGPUEngine.create({
+				repo: hfRepo,
+				maxSeqLen: options.contextLength ?? config.contextLength ?? 4096,
+				dtype: gpuDtype,
+				onProgress: (loaded, total, message) => {
+					onProgress?.({
+						status: message,
+						progress: total > 0 ? Math.round(loaded / total * 100) : void 0
+					});
+				}
+			});
+			this._deviceMode = "webgpu";
+			this.isVisionModel = false;
+			this.currentModel = modelId;
+			this.modelConfig = config;
+			onProgress?.({ status: "Ready (WebGPU Native)!" });
+			if (this.telemetry.onModelLoad) try {
+				this.telemetry.onModelLoad({
+					modelId,
+					loadTimeMs: performance.now() - loadStartTime,
+					fromCache: false,
+					device: this._deviceMode,
+					success: true
+				});
+			} catch {}
+		} catch (err) {
+			this.reportError(err instanceof Error ? err : new Error(String(err)), {
+				operation: "load",
+				modelId
+			});
+			if (this.telemetry.onModelLoad) try {
+				this.telemetry.onModelLoad({
+					modelId,
+					loadTimeMs: performance.now() - loadStartTime,
+					fromCache: false,
+					device: this._deviceMode,
+					success: false,
+					error: err instanceof Error ? err.message : String(err)
+				});
+			} catch {}
+			throw err;
+		}
+	}
+	/**
+	* Load a vision model (VLM) on the native WebGPU engine.
+	* The native engine loads the vision-capable safetensors checkpoint directly
+	* and builds its ViT tower on demand (enableVision: true). describeImage() then
+	* runs encode → splice → decode entirely in WebGPU compute.
+	*/
+	async loadVisionModel(modelId, repoPath, config, options = {}) {
+		const { onProgress, device = "auto" } = options;
+		onProgress?.({ status: `Loading ${modelId} (vision model)...` });
+		if (device === "cpu" || device === "gpu") throw new Error("Gerbil vision models require WebGPU. CPU/WASM and the legacy ONNX backend have been removed; use device \"webgpu\" or \"auto\".");
+		onProgress?.({ status: "Initializing WebGPU vision engine..." });
+		const { WebGPUEngine } = await import("./gpu/index.mjs");
+		let visRepo = repoPath;
+		if (visRepo.includes("onnx-community/") || visRepo.includes("-ONNX")) visRepo = { "onnx-community/Qwen3.5-0.8B-ONNX": "Qwen/Qwen3.5-0.8B" }[visRepo] || visRepo;
+		this.webgpuEngine = await WebGPUEngine.create({
+			repo: visRepo,
+			enableVision: true,
+			maxSeqLen: options.contextLength ?? config.contextLength ?? 4096,
+			onProgress: (loaded, total, message) => onProgress?.({
+				status: message,
+				progress: total > 0 ? Math.round(loaded / total * 100) : void 0
+			})
+		});
+		this._deviceMode = "webgpu";
+		this.isVisionModel = true;
+		this.currentModel = modelId;
+		this.modelConfig = config;
+		onProgress?.({ status: "Ready (Vision, WebGPU Native)!" });
+	}
+	/**
+	* Check if a model is loaded
+	*/
+	isLoaded() {
+		return this.webgpuEngine !== null;
+	}
+	/**
+	* Check if current model supports vision
+	*/
+	supportsVision() {
+		return this.isVisionModel && this.modelConfig?.supportsVision === true;
+	}
+	/**
+	* Get current model info
+	*/
+	getModelInfo() {
+		return this.modelConfig;
+	}
+	/**
+	* Get current device mode (webgpu, cpu, or wasm)
+	*/
+	getDeviceMode() {
+		return this._deviceMode;
+	}
+	/**
+	* Get the in-memory weight quantization the native engine uses for the loaded
+	* model. The WebGPU engine quantizes weights to INT4 ("q4") on load; the KV
+	* cache precision (f16/f32) is separate and device-detected.
+	*/
+	getDtype() {
+		return "q4";
+	}
+	/**
+	* Get response cache statistics
+	*/
+	getResponseCacheStats() {
+		const cache = getGlobalCache();
+		const stats = cache.getStats();
+		return {
+			hits: stats.hits,
+			misses: stats.misses,
+			size: stats.size,
+			hitRate: cache.getHitRate()
+		};
+	}
+	/**
+	* Clear the response cache (for cached generate() results)
+	*/
+	clearResponseCache() {
+		getGlobalCache().clear();
+	}
+	/**
+	* Check if a model is cached (downloaded) without loading it
+	*
+	* @example
+	* ```ts
+	* if (await g.isModelCached("qwen3.5-0.8b")) {
+	*   console.log("Model ready, will load instantly");
+	* } else {
+	*   console.log("Model needs to download (~400MB)");
+	* }
+	* ```
+	*/
+	async isModelCached(modelId) {
+		const source = resolveModel(modelId);
+		return this.isNativeRepoCached(source.path);
+	}
+	/**
+	* Check whether the native WebGPU engine has a repo cached on disk.
+	* The native loader stores files under ~/.cache/gerbil/<repo>/<revision>/.
+	*/
+	isNativeRepoCached(repo, revision = "main") {
+		try {
+			const home = process.env.HOME || process.env.USERPROFILE || os.homedir();
+			if (!home) return false;
+			const modelDir = path.join(home, ".cache", "gerbil", repo.replace(/\//g, "_"), revision);
+			return existsSync(path.join(modelDir, "config.json".replace(/\//g, "_")));
+		} catch {
+			return false;
+		}
+	}
+	/**
+	* Preload a model (download without initializing for inference)
+	*
+	* Use this to download models ahead of time, e.g., during app startup,
+	* so users don't wait when they first use AI.
+	*
+	* @example
+	* ```ts
+	* // Preload for later (download only, free memory)
+	* await g.preloadModel("qwen3.5-0.8b", {
+	*   onProgress: (p) => console.log(p.status, p.progress),
+	* });
+	*
+	* // Preload and keep in memory for instant use
+	* await g.preloadModel("qwen3.5-0.8b", { keepLoaded: true });
+	* await g.generate("Hello"); // Instant, no loading needed
+	* ```
+	*/
+	async preloadModel(modelId, options = {}) {
+		resolveModel(modelId);
+		const { onProgress, keepLoaded = false } = options;
+		if (keepLoaded && this.isLoaded() && this.currentModel === modelId) {
+			onProgress?.({ status: "Model already loaded" });
+			return;
+		}
+		if (!keepLoaded && await this.isModelCached(modelId)) {
+			onProgress?.({ status: "Model already cached" });
+			return;
+		}
+		if (keepLoaded) {
+			await this.loadModel(modelId, { onProgress });
+			return;
+		}
+		onProgress?.({ status: `Preloading ${modelId}...` });
+		await this.loadModel(modelId, { onProgress });
+		await this.dispose();
+		onProgress?.({ status: "Preload complete" });
+	}
+	/**
+	* Check if the native TTS model is cached. The native engine always uses the
+	* Kani-TTS-2 checkpoint, so `modelId` is accepted for API compatibility only.
+	*/
+	async isTTSCached(_modelId) {
+		const { DEFAULT_MODELS } = await import("./gpu/index.mjs");
+		return this.isNativeRepoCached(DEFAULT_MODELS.tts);
+	}
+	/**
+	* Preload the native TTS model (downloads Kani-TTS-2 weights to disk cache).
+	*
+	* @param modelId - Accepted for API compatibility; native TTS uses Kani-TTS-2.
+	* @param options.keepLoaded - Keep the engine in memory for instant use.
+	*/
+	async preloadTTS(modelId, options = {}) {
+		const { onProgress, keepLoaded = false } = options;
+		if (keepLoaded && this.isTTSLoaded()) {
+			onProgress?.({ status: "TTS model already loaded" });
+			return;
+		}
+		if (!keepLoaded && await this.isTTSCached(modelId)) {
+			onProgress?.({ status: "TTS model already cached" });
+			return;
+		}
+		onProgress?.({ status: "Preloading TTS model..." });
+		await this.ensureNativeTTSEngine();
+		if (!keepLoaded && this.nativeTTSEngine) {
+			try {
+				this.nativeTTSEngine.destroy();
+			} catch {}
+			this.nativeTTSEngine = null;
+		}
+		onProgress?.({ status: "Preload complete" });
+	}
+	/**
+	* Check if the native STT model is cached. The native engine always uses the
+	* Moonshine checkpoint, so `modelId` is accepted for API compatibility only.
+	*/
+	async isSTTCached(_modelId) {
+		const { DEFAULT_MODELS } = await import("./gpu/index.mjs");
+		return this.isNativeRepoCached(DEFAULT_MODELS.stt);
+	}
+	/**
+	* Preload the native STT model (downloads Moonshine weights to disk cache).
+	*
+	* @param modelId - Accepted for API compatibility; native STT uses Moonshine.
+	* @param options.keepLoaded - Keep the engine in memory for instant use.
+	*/
+	async preloadSTT(modelId, options = {}) {
+		const { onProgress, keepLoaded = false } = options;
+		if (keepLoaded && this.isSTTLoaded()) {
+			onProgress?.({ status: "STT model already loaded" });
+			return;
+		}
+		if (!keepLoaded && await this.isSTTCached(modelId)) {
+			onProgress?.({ status: "STT model already cached" });
+			return;
+		}
+		onProgress?.({ status: "Preloading STT model..." });
+		await this.ensureNativeSTT();
+		if (!keepLoaded && this.nativeSTT) {
+			try {
+				this.nativeSTT.destroy?.();
+			} catch {}
+			this.nativeSTT = null;
+		}
+		onProgress?.({ status: "Preload complete" });
+	}
+	/**
+	* Check if a native embedding model is cached. Defaults to the native
+	* EmbeddingGemma checkpoint when no repo is provided.
+	*/
+	async isEmbeddingCached(modelId) {
+		const { DEFAULT_MODELS } = await import("./gpu/index.mjs");
+		return this.isNativeRepoCached(modelId || DEFAULT_MODELS.embedding);
+	}
+	/**
+	* Preload a native embedding model (downloads weights to disk cache).
+	*
+	* @param modelId - Embedding repo (default: native EmbeddingGemma).
+	* @param options.keepLoaded - Keep the engine in memory for instant use.
+	*/
+	async preloadEmbedding(modelId, options = {}) {
+		const { onProgress, keepLoaded = false } = options;
+		if (keepLoaded && this.nativeEmbedEngine) {
+			onProgress?.({ status: "Embedding model already loaded" });
+			return;
+		}
+		if (!keepLoaded && await this.isEmbeddingCached(modelId)) {
+			onProgress?.({ status: "Embedding model already cached" });
+			return;
+		}
+		onProgress?.({ status: "Preloading embedding model..." });
+		await this.ensureNativeEmbedEngine(modelId);
+		if (!keepLoaded && this.nativeEmbedEngine) {
+			try {
+				this.nativeEmbedEngine.destroy();
+			} catch {}
+			this.nativeEmbedEngine = null;
+			this.nativeEmbedRepo = null;
+		}
+		onProgress?.({ status: "Preload complete" });
+	}
+	/**
+	* Clear KV cache to free memory.
+	* The native engine manages its own KV cache; this is a no-op kept for API
+	* compatibility.
+	*/
+	async clearCache() {}
+	/**
+	* Generate text (automatically routes to vision generation if images provided)
+	*
+	* @example
+	* ```ts
+	* // Text generation
+	* const result = await g.generate("Hello!");
+	*
+	* // Vision generation (with vision model)
+	* const result = await g.generate("What's in this image?", {
+	*   images: [{ source: "https://example.com/cat.jpg" }]
+	* });
+	* ```
+	*/
+	async generate(prompt, options = {}) {
+		const queueStartTime = performance.now();
+		try {
+			return await this.queue.add(async () => {
+				const queueWaitTime = performance.now() - queueStartTime;
+				if (queueWaitTime > 100 && this.telemetry.onQueueWait) try {
+					this.telemetry.onQueueWait(queueWaitTime);
+				} catch {}
+				const generatedResult = await this.generateInternal(prompt, options);
+				if (this.telemetry.onGenerate) try {
+					this.telemetry.onGenerate({
+						modelId: this.currentModel || "unknown",
+						result: generatedResult,
+						cached: generatedResult.cached ?? false,
+						queueTimeMs: queueWaitTime > 100 ? queueWaitTime : void 0
+					});
+				} catch {}
+				return generatedResult;
+			});
+		} catch (error) {
+			if (this.telemetry.onError) try {
+				this.telemetry.onError(error instanceof Error ? error : new Error(String(error)), {
+					method: "generate",
+					modelId: this.currentModel || "unknown",
+					prompt: prompt.slice(0, 100),
+					queueWaitTime: performance.now() - queueStartTime
+				});
+			} catch {}
+			throw error;
+		}
+	}
+	/**
+	* Internal generate implementation (called within queue)
+	*/
+	async generateInternal(prompt, options = {}) {
+		if (!this.isLoaded()) await this.loadModel(this.config.model || DEFAULT_MODEL);
+		const { images } = options;
+		if (images?.length && this.isVisionModel) return this.generateWithVision(prompt, options);
+		const { maxTokens = 256, temperature = .7, topP = .9, topK = 50, thinking = false, system, cache = false, cacheTtl } = options;
+		if (cache && !options.onToken && !images?.length) {
+			const cacheKey = generateCacheKey(prompt, this.currentModel || "", {
+				maxTokens,
+				temperature,
+				topP,
+				topK,
+				system,
+				thinking
+			});
+			const cached = getGlobalCache().get(cacheKey);
+			if (cached) return cached;
+		}
+		const startTime = performance.now();
+		try {
+			let rawText = "";
+			let engineTokensGenerated = 0;
+			let engineTokensPerSecond = 0;
+			if (this.webgpuEngine) {
+				const result$1 = await this.webgpuEngine.generate(prompt, {
+					maxTokens,
+					sampling: {
+						temperature,
+						topP,
+						topK
+					},
+					systemPrompt: system,
+					onToken: options.onToken ? (t) => options.onToken?.(t) : void 0
+				});
+				rawText = result$1.text;
+				engineTokensGenerated = result$1.tokensGenerated;
+				engineTokensPerSecond = result$1.tokensPerSecond;
+			} else throw new Error("No model loaded");
+			const totalTime = performance.now() - startTime;
+			rawText = this.cleanOutput(rawText);
+			const { thinking: thinkingText, response } = this.parseThinking(rawText);
+			const finalThinking = thinking ? thinkingText : void 0;
+			const tokensGenerated = engineTokensGenerated;
+			this.stats.prompts += 1;
+			this.stats.tokensOut += tokensGenerated;
+			this.stats.totalTime += totalTime;
+			this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
+			const result = {
+				text: response,
+				thinking: finalThinking,
+				tokensGenerated,
+				tokensPerSecond: engineTokensPerSecond,
+				totalTime,
+				finishReason: "stop",
+				provider: "local",
+				cached: false
+			};
+			if (cache && !options.onToken && !images?.length) {
+				const cacheKey = generateCacheKey(prompt, this.currentModel || "", {
+					maxTokens,
+					temperature,
+					topP,
+					topK,
+					system,
+					thinking
+				});
+				getGlobalCache().set(cacheKey, result, cacheTtl);
+			}
+			return result;
+		} catch (error) {
+			this.reportError(error instanceof Error ? error : new Error(String(error)), {
+				operation: "generate",
+				modelId: this.currentModel || void 0
+			});
+			return {
+				text: "",
+				tokensGenerated: 0,
+				tokensPerSecond: 0,
+				totalTime: performance.now() - startTime,
+				finishReason: "error",
+				provider: "local",
+				cached: false
+			};
+		}
+	}
+	/**
+	* Stream text generation (simulated token-by-token)
+	*
+	* Note: Yields the raw output including <think> tags if thinking mode is enabled.
+	* The final result has parsed thinking separated out.
+	*/
+	async *stream(prompt, options = {}) {
+		if (!this.isLoaded()) await this.loadModel(this.config.model || DEFAULT_MODEL);
+		const startTime = performance.now();
+		if (this.webgpuEngine) {
+			let fullText = "";
+			const tokenQueue = [];
+			let resolveNext = null;
+			let done = false;
+			let engineTokensGenerated = 0;
+			let engineTokensPerSecond = 0;
+			const generatePromise = this.webgpuEngine.generate(prompt, {
+				...options,
+				sampling: {
+					temperature: options.temperature,
+					topP: options.topP,
+					topK: options.topK
+				},
+				systemPrompt: options.system,
+				onToken: (token) => {
+					fullText += token;
+					if (resolveNext) {
+						resolveNext(token);
+						resolveNext = null;
+					} else tokenQueue.push(token);
+				}
+			}).then((result) => {
+				engineTokensGenerated = result.tokensGenerated;
+				engineTokensPerSecond = result.tokensPerSecond;
+				done = true;
+				if (resolveNext) resolveNext(null);
+			}).catch((err) => {
+				done = true;
+				if (resolveNext) resolveNext(null);
+				throw err;
+			});
+			while (!done || tokenQueue.length > 0) if (tokenQueue.length > 0) {
+				const token = tokenQueue.shift();
+				yield token;
+				options.onToken?.(token);
+			} else if (!done) {
+				const token = await new Promise((resolve) => {
+					resolveNext = resolve;
+				});
+				if (token) {
+					yield token;
+					options.onToken?.(token);
+				}
+			}
+			await generatePromise;
+			const { thinking: thinkingText, response } = this.parseThinking(fullText);
+			const totalTime = performance.now() - startTime;
+			return {
+				text: response,
+				thinking: options.thinking ? thinkingText : void 0,
+				tokensGenerated: engineTokensGenerated,
+				totalTime,
+				tokensPerSecond: engineTokensPerSecond,
+				finishReason: "stop"
+			};
+		}
+		throw new Error("No model loaded");
+	}
+	/**
+	* Generate text from images using a vision model
+	* Called automatically by generate() when images are provided
+	*/
+	async generateWithVision(prompt, options) {
+		if (!(this.webgpuEngine && typeof this.webgpuEngine.describeImage === "function")) throw new Error("Vision model not loaded. Load a vision-capable model with device 'webgpu' first.");
+		const imgs = options.images ?? [];
+		if (imgs.length !== 1) throw new Error(`Native WebGPU vision supports exactly one image per request (got ${imgs.length}).`);
+		const startTime = performance.now();
+		const { pixels, width, height } = await this.decodeImageToPixels(imgs[0].source);
+		const result = await this.webgpuEngine.describeImage({
+			pixels,
+			width,
+			height
+		}, prompt, {
+			maxTokens: options.maxTokens ?? 512,
+			sampling: {
+				temperature: options.temperature ?? .7,
+				topP: options.topP ?? .9,
+				topK: options.topK ?? 20
+			},
+			onToken: options.onToken ? (t) => options.onToken?.(t) : void 0
+		});
+		const totalTime = performance.now() - startTime;
+		this.stats.prompts += 1;
+		this.stats.tokensOut += result.tokensGenerated;
+		this.stats.totalTime += totalTime;
+		this.stats.avgSpeed = this.stats.tokensOut / this.stats.totalTime * 1e3;
+		return {
+			text: this.cleanOutput(result.text),
+			tokensGenerated: result.tokensGenerated,
+			tokensPerSecond: result.tokensPerSecond,
+			totalTime,
+			finishReason: "stop",
+			provider: "local",
+			cached: false
+		};
+	}
+	/**
+	* Decode an image source (http(s) URL, file path, or data URI) to raw RGB
+	* pixels for the native vision encoder. Supports 8-bit non-interlaced PNG
+	* (color types 2/RGB and 6/RGBA). Other formats throw a clear error — callers
+	* can pre-decode and use the lower-level WebGPUEngine.describeImage() with
+	* pixels directly.
+	*/
+	async decodeImageToPixels(source) {
+		const bytes = await this.fetchImageBytes(source);
+		if (!(bytes.length > 8 && bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71)) throw new Error("Native vision currently decodes PNG images only. For other formats, pre-decode to RGB pixels and call the GPU engine's describeImage() directly.");
+		return decodePng(bytes);
+	}
+	/** Fetch an image source to raw bytes (URL, data URI, or local file path). */
+	async fetchImageBytes(source) {
+		if (source.startsWith("data:")) {
+			const comma = source.indexOf(",");
+			const meta = source.slice(5, comma);
+			const data = source.slice(comma + 1);
+			if (meta.includes("base64")) return Uint8Array.from(Buffer.from(data, "base64"));
+			return Uint8Array.from(Buffer.from(decodeURIComponent(data), "binary"));
+		}
+		if (source.startsWith("http://") || source.startsWith("https://")) {
+			const res = await fetch(source);
+			if (!res.ok) throw new Error(`Failed to fetch image (${res.status}): ${source}`);
+			return new Uint8Array(await res.arrayBuffer());
+		}
+		const { readFile } = await import("node:fs/promises");
+		return new Uint8Array(await readFile(source));
+	}
+	/**
+	* Generate structured JSON output
+	*/
+	async json(prompt, options) {
+		const { schema, retries = 3, temperature = .3 } = options;
+		const systemPrompt = `You are a JSON generator. You MUST respond with valid JSON only.
+No explanations, no markdown, no code blocks. Just pure JSON.
+The JSON must conform to this schema: ${JSON.stringify(zodToJsonSchema(schema))}`;
+		for (let attempt = 0; attempt < retries; attempt += 1) {
+			const result = await this.generate(prompt, {
+				system: options.system || systemPrompt,
+				temperature,
+				maxTokens: 1e3
+			});
+			try {
+				const jsonStr = extractJson(result.text);
+				const parsed = JSON.parse(jsonStr);
+				return schema.parse(parsed);
+			} catch (error) {
+				if (attempt === retries - 1) throw new Error(`Failed to generate valid JSON after ${retries} attempts: ${error}`);
+			}
+		}
+		throw new Error("Failed to generate valid JSON");
+	}
+	/**
+	* Generate a structured object via the native engine's retrying
+	* `generateObject` (extract JSON → validate → retry with a nudge).
+	*
+	* Unlike {@link json} (which is Zod-driven), this passes through to the engine
+	* and accepts either a predicate validator `(o) => boolean` or a minimal
+	* `{ required: [...] }` schema; omit `schema` to accept any valid JSON.
+	*
+	* @example
+	* ```ts
+	* const { object } = await g.generateObject<{ name: string; age: number }>(
+	*   'Extract {name, age} from: "I am Sarah, 28"',
+	*   { schema: { required: ["name", "age"] } },
+	* );
+	* ```
+	*/
+	async generateObject(prompt, options = {}) {
+		if (!this.isLoaded()) await this.loadModel(this.config.model || DEFAULT_MODEL);
+		if (!this.webgpuEngine) throw new Error("No model loaded");
+		return this.webgpuEngine.generateObject(prompt, options);
+	}
+	/**
+	* Generate embeddings
+	*/
+	async embed(text, options = {}) {
+		if (!this.preferNative()) throw new Error("Embeddings require WebGPU. CPU/WASM and the legacy ONNX backend have been removed.");
+		const native = await this.ensureNativeEmbedEngine(options.model);
+		const startTime = performance.now();
+		const vec = await native.embed(text);
+		return {
+			vector: Array.from(vec),
+			text,
+			totalTime: performance.now() - startTime
+		};
+	}
+	/**
+	* Lazily build (or reuse) the native embedding engine. Re-creates it when the
+	* requested repo differs from the cached one. The default native embedding
+	* model is resolved by the engine itself (EmbeddingGemma) when no repo given.
+	*/
+	async ensureNativeEmbedEngine(repo) {
+		if (this.nativeEmbedEngine && (!repo || repo === this.nativeEmbedRepo)) return this.nativeEmbedEngine;
+		if (this.nativeEmbedEngine) {
+			try {
+				this.nativeEmbedEngine.destroy();
+			} catch {}
+			this.nativeEmbedEngine = null;
+			this.nativeEmbedRepo = null;
+		}
+		const { WebGPUEngine } = await import("./gpu/index.mjs");
+		this.nativeEmbedEngine = await WebGPUEngine.create({
+			repo,
+			embedding: true
+		});
+		this.nativeEmbedRepo = repo ?? null;
+		return this.nativeEmbedEngine;
+	}
+	/**
+	* Generate embeddings for multiple texts
+	*/
+	async embedBatch(texts, options = {}) {
+		const results = [];
+		for (const text of texts) results.push(await this.embed(text, options));
+		return results;
+	}
+	/**
+	* Compute cosine similarity between two vectors
+	*
+	* @example
+	* ```ts
+	* const sim = g.cosineSimilarity([1, 0, 0], [1, 0, 0]); // 1.0
+	* const sim2 = g.cosineSimilarity([1, 0, 0], [0, 1, 0]); // 0.0
+	* ```
+	*/
+	cosineSimilarity(a, b) {
+		if (a.length !== b.length) throw new Error(`Vector dimensions must match: ${a.length} vs ${b.length}`);
+		let dotProduct = 0;
+		let normA = 0;
+		let normB = 0;
+		for (let i = 0; i < a.length; i++) {
+			dotProduct += a[i] * b[i];
+			normA += a[i] * a[i];
+			normB += b[i] * b[i];
+		}
+		const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
+		if (magnitude === 0) return 0;
+		return dotProduct / magnitude;
+	}
+	/**
+	* Compare similarity between two texts
+	*
+	* @example
+	* ```ts
+	* const result = await g.similarity("Hello world", "Hi there");
+	* console.log(result.score); // 0.85
+	* ```
+	*/
+	async similarity(textA, textB, options = {}) {
+		const startTime = performance.now();
+		const [embedA, embedB] = await Promise.all([this.embed(textA, options), this.embed(textB, options)]);
+		return {
+			score: this.cosineSimilarity(embedA.vector, embedB.vector),
+			textA,
+			textB,
+			totalTime: performance.now() - startTime
+		};
+	}
+	/**
+	* Semantic search - find most similar texts from a corpus
+	*
+	* @example
+	* ```ts
+	* const results = await g.search("capital of France", [
+	*   "Paris is beautiful",
+	*   "London is in England",
+	*   "Dogs are pets"
+	* ]);
+	* // [{ text: "Paris is beautiful", score: 0.89, index: 0 }, ...]
+	* ```
+	*/
+	async search(query, corpus, options = {}) {
+		const { topK = corpus.length, ...embedOptions } = options;
+		const queryEmbedding = await this.embed(query, embedOptions);
+		return (await this.embedBatch(corpus, embedOptions)).map((doc, index) => ({
+			text: doc.text,
+			score: this.cosineSimilarity(queryEmbedding.vector, doc.vector),
+			index
+		})).sort((a, b) => b.score - a.score).slice(0, topK);
+	}
+	/**
+	* Find the nearest text to an embedding vector
+	*
+	* @example
+	* ```ts
+	* const embedding = (await g.embed("dog")).vector;
+	* const match = await g.findNearest(embedding, ["cat", "car", "tree"]);
+	* // { text: "cat", score: 0.85, index: 0 }
+	* ```
+	*/
+	async findNearest(embedding, candidates, options = {}) {
+		const { topK = candidates.length, ...embedOptions } = options;
+		return (await this.embedBatch(candidates, embedOptions)).map((doc, index) => ({
+			text: doc.text,
+			score: this.cosineSimilarity(embedding, doc.vector),
+			index
+		})).sort((a, b) => b.score - a.score).slice(0, topK);
+	}
+	/**
+	* Get session stats
+	*/
+	getStats() {
+		return { ...this.stats };
+	}
+	/**
+	* Get system info
+	*/
+	getInfo() {
+		return {
+			version: "1.0.0",
+			model: this.modelConfig,
+			device: {
+				backend: "webgpu-native",
+				gpu: null,
+				vram: null,
+				status: this.isLoaded() ? "ready" : "loading"
+			},
+			context: {
+				max: this.modelConfig?.contextLength || 0,
+				used: 0,
+				available: this.modelConfig?.contextLength || 0
+			},
+			cache: {
+				location: "~/.cache/gerbil",
+				size: "0 MB",
+				modelCount: 0
+			}
+		};
+	}
+	/**
+	* Reset stats
+	*/
+	resetStats() {
+		this.stats = {
+			prompts: 0,
+			tokensIn: 0,
+			tokensOut: 0,
+			avgSpeed: 0,
+			totalTime: 0,
+			cacheHits: 0,
+			cacheMisses: 0
+		};
+	}
+	ttsModelId = "kani-tts-2";
+	/**
+	* Load the native TTS model (Kani-TTS-2) for text-to-speech synthesis.
+	*
+	* @example
+	* ```ts
+	* await g.loadTTS({ onProgress: (p) => console.log(p.status) });
+	* const result = await g.speak("Hello world");
+	* // result.audio = Float32Array PCM, result.sampleRate = 22050
+	* ```
+	*/
+	async loadTTS(_options = {}) {
+		await this.ensureNativeTTSEngine();
+	}
+	/**
+	* Ensure TTS model is loaded (lazy loading)
+	*/
+	async ensureTTSLoaded(_options) {
+		await this.ensureNativeTTSEngine();
+	}
+	/**
+	* Generate speech from text using the native Kani-TTS-2 WebGPU engine.
+	*
+	* @example
+	* ```ts
+	* const result = await g.speak("Hello world");
+	* // result.audio = Float32Array PCM, result.sampleRate = 22050
+	* ```
+	*/
+	async speak(text, options = {}) {
+		if (!this.preferNative()) throw new Error("Speech synthesis requires WebGPU. CPU/WASM and the legacy ONNX backend have been removed.");
+		const native = await this.ensureNativeTTSEngine();
+		const startTime = performance.now();
+		const out = await native.speak(text, {});
+		return {
+			audio: out.pcm,
+			sampleRate: out.sampleRate,
+			duration: out.audioSeconds,
+			voice: options.voice ?? "default",
+			totalTime: performance.now() - startTime
+		};
+	}
+	/** Lazily build (or reuse) the native Kani-TTS WebGPUEngine (default repo). */
+	async ensureNativeTTSEngine() {
+		if (!this.nativeTTSEngine) {
+			const { WebGPUEngine, DEFAULT_MODELS } = await import("./gpu/index.mjs");
+			this.nativeTTSEngine = await WebGPUEngine.create({ repo: DEFAULT_MODELS.tts });
+		}
+		return this.nativeTTSEngine;
+	}
+	/**
+	* Stream speech generation. The native engine synthesizes the full clip, so a
+	* single final audio chunk is yielded.
+	*/
+	async *speakStream(text, options = {}) {
+		const result = await this.speak(text, options);
+		yield {
+			samples: result.audio,
+			sampleRate: result.sampleRate,
+			index: 0,
+			isFinal: true
+		};
+		return result;
+	}
+	/**
+	* Get list of available TTS voices (native Kani-TTS-2 default voice).
+	*/
+	listVoices() {
+		return KOKORO_VOICES_DEFAULT;
+	}
+	/**
+	* Check if TTS model is loaded
+	*/
+	isTTSLoaded() {
+		return this.nativeTTSEngine !== null;
+	}
+	/**
+	* Get current TTS model info
+	*/
+	getTTSModelInfo() {
+		if (!this.nativeTTSEngine) return null;
+		return {
+			id: this.ttsModelId,
+			loaded: true,
+			device: "webgpu"
+		};
+	}
+	/**
+	* List available TTS models (native Kani-TTS-2).
+	*/
+	async listTTSModels() {
+		return [{
+			id: this.ttsModelId,
+			description: "Kani-TTS-2 native WebGPU TTS",
+			sampleRate: 22050,
+			voiceCount: 1
+		}];
+	}
+	/**
+	* Load the native STT model (Moonshine) for speech-to-text transcription.
+	*
+	* @example
+	* ```ts
+	* await g.loadSTT();
+	* const result = await g.transcribe(audioData);
+	* console.log(result.text);
+	* ```
+	*/
+	async loadSTT(_modelId, _options = {}) {
+		await this.ensureNativeSTT();
+	}
+	/**
+	* Ensure STT model is loaded (lazy loading)
+	*/
+	async ensureSTTLoaded(_modelId, _options) {
+		await this.ensureNativeSTT();
+	}
+	/**
+	* Transcribe audio to text
+	*
+	* @param audio - Audio data as Float32Array (16kHz mono) or Uint8Array (WAV file)
+	* @param options - Transcription options
+	*
+	* @example
+	* ```ts
+	* // From Float32Array (16kHz mono)
+	* const result = await g.transcribe(audioData);
+	* console.log(result.text);
+	*
+	* // With timestamps
+	* const result = await g.transcribe(audioData, { timestamps: true });
+	* for (const seg of result.segments) {
+	*   console.log(`[${seg.start}s] ${seg.text}`);
+	* }
+	*
+	* // From WAV file
+	* const wavData = fs.readFileSync("audio.wav");
+	* const result = await g.transcribe(new Uint8Array(wavData));
+	* ```
+	*/
+	async transcribe(audio, options = {}) {
+		if (!this.preferNative()) throw new Error("Transcription requires WebGPU. CPU/WASM and the legacy ONNX backend have been removed.");
+		if (!(audio instanceof Float32Array)) throw new Error("Native transcription requires 16 kHz mono Float32Array PCM. Decode WAV bytes to PCM first.");
+		if (options.timestamps) throw new Error("Native transcription does not produce timestamps.");
+		const native = await this.ensureNativeSTT();
+		const startTime = performance.now();
+		const out = await native.transcribe(audio);
+		return {
+			text: out.text,
+			language: options.language ?? "en",
+			duration: out.audioSeconds,
+			totalTime: performance.now() - startTime
+		};
+	}
+	/** Lazily build (or reuse) the native MoonshineSTT engine (default repo). */
+	async ensureNativeSTT() {
+		if (!this.nativeSTT) {
+			const { MoonshineSTT } = await import("./moonshine-stt-BLyVoRpB.mjs");
+			this.nativeSTT = await MoonshineSTT.create();
+		}
+		return this.nativeSTT;
+	}
+	/**
+	* Create a streaming transcription session
+	*
+	* Transcribes audio in real-time by processing chunks at regular intervals.
+	* Perfect for live captioning, call transcription, or real-time subtitles.
+	*
+	* @param options - Streaming options
+	* @returns Streaming session controller
+	*
+	* @example
+	* ```ts
+	* const session = await g.createStreamingTranscription({
+	*   chunkDuration: 3000,  // Transcribe every 3 seconds
+	*   onChunk: (text, idx) => console.log(`Chunk ${idx}: ${text}`),
+	*   onTranscript: (fullText) => console.log("Full:", fullText),
+	* });
+	*
+	* // Feed audio data as it comes in
+	* session.feedAudio(audioChunk);
+	*
+	* // Start automatic interval-based transcription
+	* session.start();
+	*
+	* // Later, stop and get final transcript
+	* const finalText = await session.stop();
+	* ```
+	*/
+	async createStreamingTranscription(_options = {}) {
+		throw new Error("Streaming transcription is not supported by the native WebGPU STT engine. Use transcribe() on buffered 16 kHz Float32Array PCM instead.");
+	}
+	/**
+	* Get list of available STT models (native Moonshine).
+	*/
+	async listSTTModels() {
+		return [{
+			id: "moonshine-base",
+			repo: "UsefulSensors/moonshine-base",
+			description: "Moonshine native WebGPU STT",
+			size: "61M",
+			multilingual: false,
+			languages: ["en"],
+			sampleRate: 16e3
+		}];
+	}
+	/**
+	* Check if STT model is loaded
+	*/
+	isSTTLoaded() {
+		return this.nativeSTT !== null;
+	}
+	/**
+	* Get current STT model info
+	*/
+	getSTTModelInfo() {
+		if (!this.nativeSTT) return null;
+		return {
+			id: "moonshine-base",
+			loaded: true,
+			device: "webgpu"
+		};
+	}
+	/**
+	* Record audio from microphone and transcribe
+	*
+	* @example
+	* ```ts
+	* // Record for 5 seconds and transcribe
+	* const result = await g.listen(5000);
+	* console.log(result.text);
+	*
+	* // Use with voice chat
+	* const userInput = await g.listen(10000);
+	* const response = await g.generate(userInput.text);
+	* await g.speak(response.text);
+	* ```
+	*/
+	async listen(durationMs = 5e3, options = {}) {
+		const { Microphone, isSoxAvailable } = await import("./microphone-Bqmoz9_K.mjs");
+		if (!isSoxAvailable()) throw new Error("Microphone recording requires SoX. Install with:\n  macOS: brew install sox\n  Ubuntu: sudo apt install sox\n  Windows: https://sox.sourceforge.net/");
+		options.onProgress?.("Starting microphone...");
+		const mic = new Microphone({ sampleRate: 16e3 });
+		await mic.start();
+		options.onProgress?.(`Recording for ${(durationMs / 1e3).toFixed(1)}s...`);
+		await new Promise((r) => setTimeout(r, durationMs));
+		options.onProgress?.("Processing audio...");
+		const { audio } = await mic.stop();
+		options.onProgress?.("Transcribing...");
+		return this.transcribe(audio, { onProgress: (p) => options.onProgress?.(p.status || "Transcribing...") });
+	}
+	/**
+	* Check if microphone recording is available
+	*/
+	async isMicrophoneAvailable() {
+		try {
+			const { isSoxAvailable } = await import("./microphone-Bqmoz9_K.mjs");
+			return isSoxAvailable();
+		} catch {
+			return false;
+		}
+	}
+	/**
+	* Dispose of resources (releases all native WebGPU engines and their devices).
+	* @param _disconnect Accepted for API compatibility; no longer used.
+	*/
+	async dispose(_disconnect = false) {
+		if (this.webgpuEngine) {
+			try {
+				this.webgpuEngine.destroy();
+			} catch {}
+			this.webgpuEngine = null;
+		}
+		if (this.nativeEmbedEngine) {
+			try {
+				this.nativeEmbedEngine.destroy();
+			} catch {}
+			this.nativeEmbedEngine = null;
+			this.nativeEmbedRepo = null;
+		}
+		if (this.nativeSTT) {
+			try {
+				this.nativeSTT.destroy?.();
+			} catch {}
+			this.nativeSTT = null;
+		}
+		if (this.nativeTTSEngine) {
+			try {
+				this.nativeTTSEngine.destroy();
+			} catch {}
+			this.nativeTTSEngine = null;
+		}
+		this.currentModel = null;
+		this.modelConfig = null;
+		this.isVisionModel = false;
+	}
+	/**
+	* @deprecated The shared Chrome backend was removed; this is now a no-op.
+	*/
+	static async shutdown() {}
+	parseThinking(text) {
+		const match = text.match(/<think>([\s\S]*?)<\/think>/);
+		if (match) return {
+			thinking: match[1].trim(),
+			response: text.replace(/<think>[\s\S]*?<\/think>/, "").trim()
+		};
+		const unclosedMatch = text.match(/<think>([\s\S]*)$/);
+		if (unclosedMatch) {
+			const thinking = unclosedMatch[1].trim();
+			const response = text.replace(/<think>[\s\S]*$/, "").trim();
+			return {
+				thinking: thinking || void 0,
+				response
+			};
+		}
+		return { response: text.replace(/<\/?think>/g, "").trim() };
+	}
+	cleanOutput(text) {
+		return text.replace(/<\|im_end\|>/g, "").replace(/<\|im_start\|>/g, "").replace(/<\|endoftext\|>/g, "").replace(/<\/s>/g, "").replace(/^\/no_think\s*/i, "").replace(/^assistant\s*/i, "").replace(/^\s*\/no_think\s*/gim, "").replace(/^\s*assistant\s*/gim, "").replace(/^(system|user|assistant):\s*/gim, "").trim();
+	}
+};
+//#endregion
+export { resolveModel as a, configureGlobalCache as c, listBuiltinModels as i, getGlobalCache as l, BUILTIN_MODELS as n, ResponseCache as o, DEFAULT_MODEL as r, clearGlobalCache as s, Gerbil as t };
+//# sourceMappingURL=gerbil-BHrJJIa4.mjs.map