npm - @tryhamster/gerbil - Versions diffs - 1.0.0-rc.9 → 1.0.1 - Mend

@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/LICENSE +1 -1
package/README.md +318 -104
package/dist/architectures-C1I5V3Dt.mjs +6070 -0
package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
package/dist/browser/index.d.ts +276 -590
package/dist/browser/index.d.ts.map +1 -1
package/dist/browser/index.js +592 -2334
package/dist/browser/index.js.map +1 -1
package/dist/cli.mjs +625 -1098
package/dist/cli.mjs.map +1 -1
package/dist/defaults-9komdrbY.mjs +24 -0
package/dist/defaults-9komdrbY.mjs.map +1 -0
package/dist/frameworks/express.d.mts +1 -3
package/dist/frameworks/express.d.mts.map +1 -1
package/dist/frameworks/express.mjs +7 -7
package/dist/frameworks/express.mjs.map +1 -1
package/dist/frameworks/fastify.d.mts +1 -1
package/dist/frameworks/fastify.d.mts.map +1 -1
package/dist/frameworks/fastify.mjs +3 -3
package/dist/frameworks/fastify.mjs.map +1 -1
package/dist/frameworks/hono.d.mts +1 -1
package/dist/frameworks/hono.d.mts.map +1 -1
package/dist/frameworks/hono.mjs +4 -4
package/dist/frameworks/hono.mjs.map +1 -1
package/dist/frameworks/next.d.mts +3 -2
package/dist/frameworks/next.d.mts.map +1 -1
package/dist/frameworks/next.mjs +4 -4
package/dist/frameworks/next.mjs.map +1 -1
package/dist/frameworks/react.d.mts +1 -1
package/dist/frameworks/trpc.d.mts +1 -1
package/dist/frameworks/trpc.d.mts.map +1 -1
package/dist/frameworks/trpc.mjs +4 -4
package/dist/frameworks/trpc.mjs.map +1 -1
package/dist/gerbil-BetB5xb0.d.mts +488 -0
package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
package/dist/gerbil-CTZUa8EZ.mjs +4 -0
package/dist/gerbil-DNniplr4.mjs +1656 -0
package/dist/gerbil-DNniplr4.mjs.map +1 -0
package/dist/gpu/hooks.d.mts +640 -0
package/dist/gpu/hooks.d.mts.map +1 -0
package/dist/gpu/hooks.mjs +1369 -0
package/dist/gpu/hooks.mjs.map +1 -0
package/dist/gpu/index.d.mts +2 -0
package/dist/gpu/index.mjs +6 -0
package/dist/gpu-DFuglcEx.mjs +3790 -0
package/dist/gpu-DFuglcEx.mjs.map +1 -0
package/dist/index-Dgmb2kE3.d.mts +245 -0
package/dist/index-Dgmb2kE3.d.mts.map +1 -0
package/dist/index-DukkJRMj.d.mts +2114 -0
package/dist/index-DukkJRMj.d.mts.map +1 -0
package/dist/index.d.mts +22 -487
package/dist/index.d.mts.map +1 -1
package/dist/index.mjs +13 -8
package/dist/index.mjs.map +1 -1
package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
package/dist/integrations/ai-sdk.d.mts +75 -6
package/dist/integrations/ai-sdk.d.mts.map +1 -1
package/dist/integrations/ai-sdk.mjs +131 -15
package/dist/integrations/ai-sdk.mjs.map +1 -1
package/dist/integrations/langchain.d.mts +1 -1
package/dist/integrations/langchain.d.mts.map +1 -1
package/dist/integrations/langchain.mjs +5 -5
package/dist/integrations/langchain.mjs.map +1 -1
package/dist/integrations/llamaindex.d.mts +1 -1
package/dist/integrations/llamaindex.d.mts.map +1 -1
package/dist/integrations/llamaindex.mjs +5 -5
package/dist/integrations/llamaindex.mjs.map +1 -1
package/dist/integrations/mcp-client.mjs +3 -3
package/dist/integrations/mcp-client.mjs.map +1 -1
package/dist/integrations/mcp.d.mts +3 -2
package/dist/integrations/mcp.d.mts.map +1 -1
package/dist/integrations/mcp.mjs +5 -5
package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
package/dist/memory/index.d.mts +3 -0
package/dist/memory/index.mjs +6 -0
package/dist/memory-D1P7Tmda.mjs +4 -0
package/dist/memory-DVN0MnIG.mjs +132 -0
package/dist/memory-DVN0MnIG.mjs.map +1 -0
package/dist/memory-Dj0J1v88.mjs +294 -0
package/dist/memory-Dj0J1v88.mjs.map +1 -0
package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
package/dist/repl-BDRkwPGX.mjs +9 -0
package/dist/skills/index.d.mts +270 -320
package/dist/skills/index.d.mts.map +1 -1
package/dist/skills/index.mjs +5 -5
package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
package/dist/skills-CU694Dc8.mjs.map +1 -0
package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
package/dist/tools-DQ1mPUw5.mjs.map +1 -0
package/dist/types-DQBe2lFo.d.mts +165 -0
package/dist/types-DQBe2lFo.d.mts.map +1 -0
package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
package/dist/types-LlyYILII.d.mts.map +1 -0
package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
package/dist/vector-B0panuy6.mjs +95 -0
package/dist/vector-B0panuy6.mjs.map +1 -0
package/docs/PROJECT-STATE.md +321 -0
package/docs/adding-a-model-family.md +280 -0
package/docs/ai-sdk.md +70 -61
package/docs/architecture/overview.md +17 -7
package/docs/browser.md +203 -8
package/docs/embeddings.md +156 -0
package/docs/gerbil-site-native-migration.md +217 -0
package/docs/gpu-engine/architectures.md +398 -0
package/docs/gpu-engine/ir.md +372 -0
package/docs/gpu-engine/kernels.md +718 -0
package/docs/gpu-engine/paper.html +1759 -0
package/docs/gpu-engine/paper.md +2109 -0
package/docs/gpu-engine/safetensors.md +312 -0
package/docs/gpu-engine/tokenizer.md +302 -0
package/docs/memory-rag.md +91 -0
package/docs/metal-safari-intel.md +190 -0
package/docs/mobile-failure-diagnosis.md +124 -0
package/docs/mobile.md +99 -0
package/docs/observability.md +230 -0
package/docs/onnx-removal-plan.md +339 -0
package/docs/research/autoresearch-portable.md +904 -0
package/docs/research/dispatch-reduction-hivemind.md +84 -0
package/docs/research/ios-safari-model-caching.md +117 -0
package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
package/docs/research/native-stt-model-selection.md +49 -0
package/docs/research/native-tts-model-selection.md +90 -0
package/docs/research/native-vs-chromium-decision.md +152 -0
package/docs/research/nemotron-mamba2-inference.md +910 -0
package/docs/research/qwen35-multimodal.md +293 -0
package/docs/research/qwen36-gemma4-targets.md +337 -0
package/docs/research/sota-embedding-models.md +179 -0
package/docs/research/sota-mobile-models-2026.md +263 -0
package/docs/research/sota-modality-models.md +202 -0
package/docs/research/tps-baselines.md +71 -0
package/docs/research/webgpu-m4-reference.md +104 -0
package/docs/site-update-plan.md +155 -0
package/docs/structured-output.md +123 -0
package/docs/stt.md +63 -446
package/docs/tts.md +77 -499
package/docs/vision.md +100 -338
package/package.json +22 -7
package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
package/dist/gerbil-CJ3ifloF.mjs +0 -4
package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
package/dist/gerbil-qOTe1nl2.d.mts +0 -431
package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
package/dist/kokoro-BNTb6egA.mjs +0 -20210
package/dist/kokoro-BNTb6egA.mjs.map +0 -1
package/dist/kokoro-CMOGDSgT.js +0 -20212
package/dist/kokoro-CMOGDSgT.js.map +0 -1
package/dist/mcp-BvbriaBy.mjs.map +0 -1
package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
package/dist/repl-DveXw36T.mjs +0 -9
package/dist/skills-CD3Orlex.mjs.map +0 -1
package/dist/stt-Bu-E23Sc.js +0 -433
package/dist/stt-Bu-E23Sc.js.map +0 -1
package/dist/stt-CpLYbGFd.mjs +0 -433
package/dist/stt-CpLYbGFd.mjs.map +0 -1
package/dist/stt-DRPLEEHB.mjs +0 -3
package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
package/dist/transformers.web-DiD1gTwk.js +0 -44695
package/dist/transformers.web-DiD1gTwk.js.map +0 -1
package/dist/transformers.web-u34VxRFM.js +0 -3
package/dist/tts-CqroPaSK.js +0 -724
package/dist/tts-CqroPaSK.js.map +0 -1
package/dist/tts-DXgsKGCe.mjs +0 -3
package/dist/tts-DeGANMNV.mjs +0 -730
package/dist/tts-DeGANMNV.mjs.map +0 -1
package/dist/types-CiTc7ez3.d.mts.map +0 -1
/package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
/package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
/package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0

package/dist/gpu/hooks.mjs ADDED Viewed

@@ -0,0 +1,1369 @@
+import { n as resolveDefaultRepo, t as DEFAULT_MODELS } from "../defaults-9komdrbY.mjs";
+import { useCallback, useEffect, useRef, useState } from "react";
+//#region src/browser/use-engine.ts
+/**
+* React hook for native WebGPU inference in the browser.
+*
+* Uses gerbil's WebGPUEngine directly on the main thread — no web worker,
+* no ONNX Runtime, no transformers.js. Pure WGSL compute shaders.
+*
+* Handles the full engine lifecycle for you:
+*  - loads the model (lazily or on mount),
+*  - hot-swaps when you change `model`/`dtype`/`enableVision`/`embedding`,
+*  - SHARES one engine across every component that asks for the same config
+*    (reference-counted) so you never upload the same weights to the GPU twice,
+*  - disposes when the last consumer unmounts.
+*
+* @example
+* ```tsx
+* import { useEngine } from "@tryhamster/gerbil/browser";
+*
+* function App() {
+*   const { complete, completion, isLoading, isGenerating, tps } = useEngine({
+*     model: "mlx-community/Qwen3.5-0.8B-4bit",
+*     autoLoad: true,
+*   });
+*
+*   if (isLoading) return <div>Loading model...</div>;
+*   return (
+*     <div>
+*       <button onClick={() => complete("What is 2+2?")}>Generate</button>
+*       <p>{completion}</p>
+*       {isGenerating && <span>{tps?.toFixed(1)} tok/s</span>}
+*     </div>
+*   );
+* }
+* ```
+*/
+function classifyError(err) {
+	const msg = err.message.toLowerCase();
+	if (msg.includes("webgpu is not available") || msg.includes("not supported")) return "no-webgpu";
+	if (msg.includes("no webgpu adapter") || msg.includes("no gpu adapter")) return "no-adapter";
+	if (msg.includes("device lost") || msg.includes("device was destroyed")) return "device-lost";
+	if (msg.includes("out of memory") || msg.includes("allocation failed") || msg.includes("buffer size")) return "oom";
+	if (msg.includes("fetch") || msg.includes("network") || msg.includes("cors")) return "network";
+	if (msg.includes("timeout") || msg.includes("timed out")) return "timeout";
+	return "unknown";
+}
+function getErrorGuidance(kind) {
+	switch (kind) {
+		case "no-webgpu": return "WebGPU requires Safari 26+ (iOS 26+), Chrome 113+, or Firefox 141+.";
+		case "no-adapter": return "No GPU found. Try closing other browser tabs that might be using the GPU.";
+		case "device-lost": return "GPU device was lost (tab may have been backgrounded). Please reload.";
+		case "oom": return "Not enough GPU memory. Try a smaller model or close other tabs.";
+		case "network": return "Failed to download model. Check your internet connection.";
+		case "timeout": return "Model loading timed out. Check your connection or try a smaller model.";
+		default: return "";
+	}
+}
+function getDefaultMaxSeqLen() {
+	if (typeof navigator === "undefined") return 4096;
+	return /iPhone|iPad|iPod|Android/i.test(navigator.userAgent) ? 2048 : 4096;
+}
+const SHARED_ENGINES = /* @__PURE__ */ new Map();
+const ENGINE_DISPOSE_GRACE_MS = 3e4;
+const RETRY_COOLDOWN_MS = 3e3;
+function acquireSharedEngine(key, factory) {
+	let entry = SHARED_ENGINES.get(key);
+	if (!entry) {
+		const created = {
+			promise: factory(),
+			engine: null,
+			refs: 0,
+			disposeTimer: null
+		};
+		created.promise.then((eng) => {
+			created.engine = eng;
+		}).catch(() => {
+			SHARED_ENGINES.delete(key);
+		});
+		SHARED_ENGINES.set(key, created);
+		entry = created;
+	}
+	if (entry.disposeTimer) {
+		clearTimeout(entry.disposeTimer);
+		entry.disposeTimer = null;
+	}
+	entry.refs += 1;
+	return entry.promise;
+}
+function releaseSharedEngine(key) {
+	const entry = SHARED_ENGINES.get(key);
+	if (!entry) return;
+	entry.refs -= 1;
+	if (entry.refs > 0 || entry.disposeTimer) return;
+	entry.disposeTimer = setTimeout(() => {
+		entry.disposeTimer = null;
+		if (entry.refs > 0) return;
+		SHARED_ENGINES.delete(key);
+		entry.promise.then((eng) => eng.destroy()).catch(() => {});
+	}, ENGINE_DISPOSE_GRACE_MS);
+}
+/** Decode an image URL / data URL into RGB pixels via an offscreen canvas. */
+async function decodeImage(src) {
+	const img = new Image();
+	img.crossOrigin = "anonymous";
+	await new Promise((resolve, reject) => {
+		img.onload = () => resolve();
+		img.onerror = () => reject(/* @__PURE__ */ new Error("Failed to load image."));
+		img.src = src;
+	});
+	const scale = Math.min(1, 448 / Math.max(img.naturalWidth, img.naturalHeight));
+	const canvas = document.createElement("canvas");
+	canvas.width = Math.max(1, Math.round(img.naturalWidth * scale));
+	canvas.height = Math.max(1, Math.round(img.naturalHeight * scale));
+	const cctx = canvas.getContext("2d");
+	if (!cctx) throw new Error("Could not get 2D canvas context for image decode.");
+	cctx.drawImage(img, 0, 0, canvas.width, canvas.height);
+	const rgba = cctx.getImageData(0, 0, canvas.width, canvas.height).data;
+	const rgb = new Uint8ClampedArray(canvas.width * canvas.height * 3);
+	for (let i = 0, j = 0; i < rgba.length; i += 4, j += 3) {
+		rgb[j] = rgba[i];
+		rgb[j + 1] = rgba[i + 1];
+		rgb[j + 2] = rgba[i + 2];
+	}
+	return {
+		pixels: rgb,
+		width: canvas.width,
+		height: canvas.height
+	};
+}
+function useEngine(options = {}) {
+	const { model: modelOption, maxSeqLen, dtype = "auto", autoLoad = false, enableVision = false, embedding = false, loadingTimeout = 3e5, onReady, onError } = options;
+	const model = resolveDefaultRepo({
+		repo: modelOption,
+		embedding,
+		enableVision
+	});
+	const engineRef = useRef(null);
+	const stoppedRef = useRef(false);
+	const timeoutRef = useRef(null);
+	const heldKeyRef = useRef(null);
+	const failedKeyRef = useRef(null);
+	const [isLoading, setIsLoading] = useState(false);
+	const [loadingProgress, setLoadingProgress] = useState(null);
+	const [isGenerating, setIsGenerating] = useState(false);
+	const [isReady, setIsReady] = useState(false);
+	const [completion, setCompletion] = useState("");
+	const [tps, setTps] = useState(null);
+	const [attempts, setAttempts] = useState(0);
+	const [error, setError] = useState(null);
+	const [errorKind, setErrorKind] = useState(null);
+	const modelKey = `${model}|${dtype}|${enableVision}|${embedding}|${maxSeqLen ?? "auto"}`;
+	const fail = useCallback((e) => {
+		const err = e instanceof Error ? e : new Error(String(e));
+		const kind = classifyError(err);
+		const guidance = getErrorGuidance(kind);
+		setError(guidance ? `${err.message} ${guidance}` : err.message);
+		setErrorKind(kind);
+		setIsLoading(false);
+		setLoadingProgress(null);
+		onError?.(err, kind);
+	}, [onError]);
+	const load = useCallback(async () => {
+		const failed = failedKeyRef.current;
+		const inCooldown = failed?.key === modelKey && Date.now() - failed.at < RETRY_COOLDOWN_MS;
+		if (engineRef.current || heldKeyRef.current === modelKey || inCooldown) return;
+		if (typeof navigator === "undefined" || !("gpu" in navigator)) {
+			failedKeyRef.current = {
+				key: modelKey,
+				at: Date.now()
+			};
+			fail(/* @__PURE__ */ new Error("WebGPU is not available in this browser."));
+			return;
+		}
+		setIsLoading(true);
+		setError(null);
+		setErrorKind(null);
+		setLoadingProgress({ status: "Initializing WebGPU engine..." });
+		if (timeoutRef.current) clearTimeout(timeoutRef.current);
+		const timeoutPromise = new Promise((_, reject) => {
+			timeoutRef.current = setTimeout(() => reject(/* @__PURE__ */ new Error("Model loading timed out. The download may be too slow.")), loadingTimeout);
+		});
+		const key = modelKey;
+		heldKeyRef.current = key;
+		const factory = async () => {
+			const { WebGPUEngine } = await import("./index.mjs");
+			return WebGPUEngine.create({
+				repo: model,
+				maxSeqLen: maxSeqLen ?? getDefaultMaxSeqLen(),
+				dtype,
+				enableVision,
+				embedding,
+				onProgress: (loaded, total, message) => {
+					setLoadingProgress({
+						status: message,
+						progress: total > 0 ? Math.round(loaded / total * 100) : void 0
+					});
+				}
+			});
+		};
+		try {
+			const engine = await Promise.race([acquireSharedEngine(key, factory), timeoutPromise]);
+			if (timeoutRef.current) clearTimeout(timeoutRef.current);
+			if (heldKeyRef.current !== key) {
+				releaseSharedEngine(key);
+				return;
+			}
+			engineRef.current = engine;
+			failedKeyRef.current = null;
+			if (typeof window !== "undefined") window.__gerbilEngine = engine;
+			setIsReady(true);
+			setIsLoading(false);
+			setLoadingProgress(null);
+			onReady?.();
+		} catch (e) {
+			if (timeoutRef.current) clearTimeout(timeoutRef.current);
+			failedKeyRef.current = {
+				key,
+				at: Date.now()
+			};
+			if (heldKeyRef.current === key) heldKeyRef.current = null;
+			releaseSharedEngine(key);
+			fail(e);
+		}
+	}, [
+		modelKey,
+		model,
+		maxSeqLen,
+		dtype,
+		enableVision,
+		embedding,
+		loadingTimeout,
+		onReady,
+		fail
+	]);
+	const stop = useCallback(() => {
+		stoppedRef.current = true;
+	}, []);
+	const dispose = useCallback(() => {
+		if (timeoutRef.current) clearTimeout(timeoutRef.current);
+		engineRef.current = null;
+		failedKeyRef.current = null;
+		setIsReady(false);
+		if (heldKeyRef.current) {
+			releaseSharedEngine(heldKeyRef.current);
+			heldKeyRef.current = null;
+		}
+	}, []);
+	const complete = useCallback(async (prompt, opts = {}) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		setIsGenerating(true);
+		setCompletion("");
+		setTps(null);
+		stoppedRef.current = false;
+		let fullText = "";
+		try {
+			const result = await engine.generate(prompt, {
+				maxTokens: opts.maxTokens ?? 256,
+				sampling: { temperature: opts.temperature ?? .7 },
+				systemPrompt: opts.system,
+				stopSequences: opts.stopSequences,
+				onToken: (token, meta) => {
+					if (stoppedRef.current) return;
+					fullText += token;
+					setCompletion(fullText);
+					if (meta) setTps(meta.tps);
+				}
+			});
+			setTps(result.tokensPerSecond);
+			setIsGenerating(false);
+			return result.text;
+		} catch (e) {
+			setIsGenerating(false);
+			fail(e);
+			return fullText;
+		}
+	}, [fail]);
+	const autocomplete = useCallback(async (prefix, opts) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		return engine.autocomplete(prefix, opts);
+	}, []);
+	const rewrite = useCallback(async (text, opts) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		return engine.rewrite(text, opts);
+	}, []);
+	const generateWithTools = useCallback((prompt, opts) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		return engine.generateWithTools(prompt, opts);
+	}, []);
+	const generateObject = useCallback(async (prompt, opts = {}) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		setIsGenerating(true);
+		setCompletion("");
+		setTps(null);
+		setAttempts(0);
+		stoppedRef.current = false;
+		try {
+			const result = await engine.generateObject(prompt, {
+				schema: opts.schema,
+				maxRetries: opts.maxRetries,
+				maxTokens: opts.maxTokens ?? 256,
+				sampling: { temperature: opts.temperature ?? .7 },
+				systemPrompt: opts.system,
+				stopSequences: opts.stopSequences
+			});
+			setCompletion(result.text);
+			setAttempts(result.attempts);
+			setIsGenerating(false);
+			return {
+				object: result.object,
+				attempts: result.attempts
+			};
+		} catch (e) {
+			setIsGenerating(false);
+			fail(e);
+			throw e instanceof Error ? e : new Error(String(e));
+		}
+	}, [fail]);
+	const describeImage = useCallback(async (image, prompt = "Describe this image.", opts = {}) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		if (!engine.hasVision) throw new Error("Engine was not created with enableVision: true.");
+		setIsGenerating(true);
+		setCompletion("");
+		setTps(null);
+		stoppedRef.current = false;
+		const decoded = typeof image === "string" ? await decodeImage(image) : image;
+		let fullText = "";
+		try {
+			const result = await engine.describeImage(decoded, prompt, {
+				maxTokens: opts.maxTokens ?? 150,
+				sampling: { temperature: opts.temperature ?? .7 },
+				systemPrompt: opts.system,
+				stopSequences: opts.stopSequences,
+				onToken: (token, meta) => {
+					if (stoppedRef.current) return;
+					fullText += token;
+					setCompletion(fullText);
+					if (meta) setTps(meta.tps);
+				}
+			});
+			setTps(result.tokensPerSecond);
+			setIsGenerating(false);
+			return result.text;
+		} catch (e) {
+			setIsGenerating(false);
+			fail(e);
+			return fullText;
+		}
+	}, [fail]);
+	const embed = useCallback(async (text, opts = {}) => {
+		const engine = engineRef.current;
+		if (!engine) throw new Error("Engine not loaded. Call load() first.");
+		if (!engine.isEmbedding) throw new Error("Engine was not created with embedding: true.");
+		return engine.embed(text, { taskType: opts.taskType ?? "query" });
+	}, []);
+	const similarity = useCallback(async (a, b) => {
+		const [va, vb] = await Promise.all([embed(a, { taskType: "query" }), embed(b, { taskType: "document" })]);
+		let dot = 0;
+		const n = Math.min(va.length, vb.length);
+		for (let i = 0; i < n; i++) dot += va[i] * vb[i];
+		return dot;
+	}, [embed]);
+	useEffect(() => {
+		if (autoLoad) load();
+		return () => {
+			if (timeoutRef.current) clearTimeout(timeoutRef.current);
+			engineRef.current = null;
+			if (heldKeyRef.current) {
+				releaseSharedEngine(heldKeyRef.current);
+				heldKeyRef.current = null;
+			}
+		};
+	}, []);
+	useEffect(() => {
+		if (heldKeyRef.current === modelKey) return;
+		failedKeyRef.current = null;
+		if (heldKeyRef.current === null) return;
+		engineRef.current = null;
+		releaseSharedEngine(heldKeyRef.current);
+		heldKeyRef.current = null;
+		setIsReady(false);
+		setCompletion("");
+		setTps(null);
+		setError(null);
+		setErrorKind(null);
+		load();
+	}, [modelKey]);
+	return {
+		complete,
+		autocomplete,
+		rewrite,
+		generateWithTools,
+		generateObject,
+		describeImage,
+		embed,
+		similarity,
+		completion,
+		isLoading,
+		loadingProgress,
+		isGenerating,
+		tps,
+		attempts,
+		error,
+		errorKind,
+		isReady,
+		load,
+		stop,
+		dispose
+	};
+}
+//#endregion
+//#region src/browser/use-agent.ts
+/**
+* React hook for agentic tool-calling in the browser.
+*
+* Owns the agent loop state (running flag, step trace, final answer) on top of
+* `useEngine().generateWithTools`. You supply the model + tools; the hook runs the
+* generate → call-tool → feed-result loop and exposes the steps for trace UIs.
+*
+* @example
+* ```tsx
+* import { useAgent } from "@tryhamster/gerbil/gpu/hooks";
+*
+* const { run, steps, answer, isRunning } = useAgent({
+*   model: "mlx-community/Qwen3.5-0.8B-4bit",
+*   tools: [weatherTool],
+* });
+* await run("What's the weather in Paris?");
+* ```
+*/
+function useAgent(options) {
+	const { model, tools, maxSteps = 5, autoLoad = false } = options;
+	const engine = useEngine({
+		model,
+		autoLoad
+	});
+	const [steps, setSteps] = useState([]);
+	const [answer, setAnswer] = useState("");
+	const [isRunning, setIsRunning] = useState(false);
+	const toolsRef = useRef(tools);
+	toolsRef.current = tools;
+	const run = useCallback(async (prompt) => {
+		setIsRunning(true);
+		setSteps([]);
+		setAnswer("");
+		try {
+			if (!engine.isReady) await engine.load();
+			const { text } = await engine.generateWithTools(prompt, {
+				tools: toolsRef.current,
+				maxSteps,
+				onStep: (step) => setSteps((prev) => [...prev, step])
+			});
+			setAnswer(text);
+			return text;
+		} finally {
+			setIsRunning(false);
+		}
+	}, [engine, maxSteps]);
+	const reset = useCallback(() => {
+		setSteps([]);
+		setAnswer("");
+	}, []);
+	return {
+		run,
+		steps,
+		answer,
+		isRunning,
+		isReady: engine.isReady,
+		load: engine.load,
+		reset,
+		error: engine.error
+	};
+}
+//#endregion
+//#region src/browser/use-autocomplete.ts
+/**
+* React hook for debounced inline autocomplete (ghost text).
+*
+* Owns the debounce, in-flight, and stale-response guards so a component only has
+* to render the suggestion and handle accept/dismiss. Built on `useEngine`, so it
+* shares the same reference-counted engine as other hooks.
+*
+* @example
+* ```tsx
+* import { useAutocomplete } from "@tryhamster/gerbil/gpu/hooks";
+*
+* const { suggestion, onInput, accept, dismiss } = useAutocomplete({
+*   model: "mlx-community/Qwen3.5-0.8B-4bit",
+* });
+* // <input onChange={(e) => onInput(e.target.value)} />
+* // render `suggestion` as ghost text; Tab → accept(), Esc → dismiss()
+* ```
+*/
+function useAutocomplete(options) {
+	const { model, debounceMs = 550, minChars = 8, maxTokens = 16, temperature = .3, autoLoad = false } = options;
+	const engine = useEngine({
+		model,
+		autoLoad
+	});
+	const [suggestion, setSuggestion] = useState("");
+	const [isFetching, setIsFetching] = useState(false);
+	const debounceRef = useRef(null);
+	const inFlightRef = useRef(false);
+	const requestedForRef = useRef("");
+	const request = useCallback(async (text) => {
+		if (inFlightRef.current) return;
+		if (text.trim().length < minChars) return;
+		inFlightRef.current = true;
+		requestedForRef.current = text;
+		setIsFetching(true);
+		try {
+			if (!engine.isReady) await engine.load();
+			if (engine.error) return;
+			const out = await engine.autocomplete(text, {
+				maxTokens,
+				temperature
+			});
+			if (requestedForRef.current !== text) return;
+			if (out) setSuggestion(out);
+		} finally {
+			inFlightRef.current = false;
+			setIsFetching(false);
+		}
+	}, [
+		engine,
+		minChars,
+		maxTokens,
+		temperature
+	]);
+	const onInput = useCallback((text) => {
+		setSuggestion("");
+		if (debounceRef.current) clearTimeout(debounceRef.current);
+		if (text.trim().length < minChars) return;
+		debounceRef.current = setTimeout(() => void request(text), debounceMs);
+	}, [
+		request,
+		minChars,
+		debounceMs
+	]);
+	const accept = useCallback(() => {
+		const s = suggestion;
+		setSuggestion("");
+		requestedForRef.current = "";
+		return s;
+	}, [suggestion]);
+	const dismiss = useCallback(() => {
+		setSuggestion("");
+		requestedForRef.current = "";
+	}, []);
+	useEffect(() => () => {
+		if (debounceRef.current) clearTimeout(debounceRef.current);
+	}, []);
+	return {
+		suggestion,
+		isFetching,
+		isReady: engine.isReady,
+		load: engine.load,
+		onInput,
+		accept,
+		dismiss,
+		error: engine.error
+	};
+}
+//#endregion
+//#region src/browser/use-memory.ts
+/**
+* React hook for on-device memory / RAG.
+*
+* Wraps the `@tryhamster/gerbil/memory` module with a native embedder (running
+* on the WebGPU engine) and a persistent IndexedDB store, so an agent can
+* remember things across turns AND across sessions — with zero server.
+*
+* ```tsx
+* import { useMemory } from "@tryhamster/gerbil/hooks";
+*
+* const memory = useMemory();
+* await memory.add("The user prefers TypeScript.");
+* const { context } = await memory.recall("what does the user like?", { tokenBudget: 256 });
+* ```
+*
+* The embedding model and the memory module are both imported lazily, so this
+* hook adds nothing to your bundle until it's used.
+*/
+function useMemory(options = {}) {
+	const { model, namespace = "gerbil-memory" } = options;
+	const embedder = useEngine({
+		model,
+		embedding: true,
+		autoLoad: false
+	});
+	const memRef = useRef(null);
+	const initRef = useRef(null);
+	const [isReady, setIsReady] = useState(false);
+	const ensure = useCallback(() => {
+		if (memRef.current) return Promise.resolve(memRef.current);
+		if (initRef.current) return initRef.current;
+		initRef.current = (async () => {
+			if (!embedder.isReady) await embedder.load();
+			const [{ createMemory }, { createIndexedDBStore }] = await Promise.all([import("../memory-D1P7Tmda.mjs"), import("../indexeddb-store-ClH12Xnl.mjs")]);
+			const mem = createMemory({
+				embed: async (texts) => Promise.all(texts.map((t) => embedder.embed(t))),
+				store: createIndexedDBStore({ dbName: namespace })
+			});
+			memRef.current = mem;
+			setIsReady(true);
+			return mem;
+		})();
+		return initRef.current;
+	}, [embedder, namespace]);
+	return {
+		add: useCallback(async (text, opts) => (await ensure()).add(text, opts), [ensure]),
+		recall: useCallback(async (query, opts) => (await ensure()).recall(query, opts), [ensure]),
+		search: useCallback(async (query, opts) => (await ensure()).search(query, opts), [ensure]),
+		get: useCallback(async (id) => (await ensure()).get(id), [ensure]),
+		remove: useCallback(async (id) => (await ensure()).delete(id), [ensure]),
+		clear: useCallback(async () => (await ensure()).clear(), [ensure]),
+		size: useCallback(async () => (await ensure()).size(), [ensure]),
+		isLoading: embedder.isLoading,
+		loadingProgress: embedder.loadingProgress,
+		isReady,
+		error: embedder.error
+	};
+}
+//#endregion
+//#region src/browser/use-modalities.ts
+/**
+* Per-modality convenience hooks built on {@link useEngine}.
+*
+* `useEngine` is the general/advanced hook (it can do text, vision, and
+* embeddings via options). These wrappers give each modality a focused,
+* self-documenting surface so app code reads cleanly:
+*
+* ```tsx
+* import { useText, useVision, useEmbedding } from "@tryhamster/gerbil/gpu/hooks";
+*
+* const { complete } = useText();                 // text generation
+* const { describeImage } = useVision();          // image → text
+* const { embed, similarity } = useEmbedding();   // text → vector
+* ```
+*
+* They share the same engine registry as `useEngine`, so requesting the same
+* model from several places loads it once.
+*/
+/** Text generation. */
+function useText(options = {}) {
+	const e = useEngine(options);
+	return {
+		complete: e.complete,
+		completion: e.completion,
+		isLoading: e.isLoading,
+		loadingProgress: e.loadingProgress,
+		isGenerating: e.isGenerating,
+		tps: e.tps,
+		error: e.error,
+		errorKind: e.errorKind,
+		isReady: e.isReady,
+		load: e.load,
+		stop: e.stop,
+		dispose: e.dispose
+	};
+}
+/**
+* Structured-output generation — generate, parse JSON, validate, and RETRY
+* until valid. On-device tokens are free, so re-rolling malformed JSON is cheap.
+*
+* ```tsx
+* const { object, generate, isGenerating } = useObject<{ name: string; age: number }>();
+* await generate('Extract {name, age} from: "I am Sarah, 28"', {
+*   schema: { required: ["name", "age"] },
+* });
+* // object === { name: "Sarah", age: 28 }
+* ```
+*/
+function useObject(options = {}) {
+	const e = useEngine(options);
+	const [object, setObject] = useState(null);
+	return {
+		object,
+		generate: useCallback(async (prompt, opts) => {
+			if (!e.isReady) await e.load();
+			const result = await e.generateObject(prompt, opts);
+			setObject(result.object);
+			return result.object;
+		}, [e]),
+		attempts: e.attempts,
+		isLoading: e.isLoading,
+		loadingProgress: e.loadingProgress,
+		isGenerating: e.isGenerating,
+		error: e.error,
+		errorKind: e.errorKind,
+		isReady: e.isReady,
+		load: e.load,
+		stop: e.stop,
+		dispose: e.dispose
+	};
+}
+/** Image understanding (image in → text out). Builds the vision tower. */
+function useVision(options = {}) {
+	const e = useEngine({
+		...options,
+		enableVision: true
+	});
+	return {
+		describeImage: e.describeImage,
+		completion: e.completion,
+		isLoading: e.isLoading,
+		loadingProgress: e.loadingProgress,
+		isGenerating: e.isGenerating,
+		tps: e.tps,
+		error: e.error,
+		errorKind: e.errorKind,
+		isReady: e.isReady,
+		load: e.load,
+		stop: e.stop,
+		dispose: e.dispose
+	};
+}
+/** Text embeddings + similarity. */
+function useEmbedding(options = {}) {
+	const e = useEngine({
+		...options,
+		embedding: true
+	});
+	return {
+		embed: e.embed,
+		similarity: e.similarity,
+		isLoading: e.isLoading,
+		loadingProgress: e.loadingProgress,
+		error: e.error,
+		errorKind: e.errorKind,
+		isReady: e.isReady,
+		load: e.load,
+		dispose: e.dispose
+	};
+}
+/**
+* Conversational chat hook — manages the message list and streams replies.
+* Multi-turn context is handled for you (the full history is sent each turn).
+*
+* ```tsx
+* const { messages, send, isGenerating } = useChat();
+* <button onClick={() => send("Hello!")}>Send</button>
+* ```
+*/
+function useChat(options = {}) {
+	const { system, ...engineOptions } = options;
+	const e = useEngine(engineOptions);
+	const [messages, setMessages] = useState([]);
+	const messagesRef = useRef([]);
+	messagesRef.current = messages;
+	useEffect(() => {
+		if (!e.isGenerating) return;
+		setMessages((prev) => {
+			if (prev.length === 0 || prev[prev.length - 1].role !== "assistant") return prev;
+			const copy = prev.slice();
+			copy[copy.length - 1] = {
+				role: "assistant",
+				content: e.completion
+			};
+			return copy;
+		});
+	}, [e.completion, e.isGenerating]);
+	const run = useCallback(async (history, opts) => {
+		setMessages([...history, {
+			role: "assistant",
+			content: ""
+		}]);
+		if (!e.isReady) await e.load();
+		const turns = system ? [{
+			role: "system",
+			content: system
+		}, ...history] : history;
+		const full = await e.complete(turns, {
+			...opts,
+			system: opts.system ?? system
+		});
+		setMessages((prev) => {
+			if (prev.length === 0) return prev;
+			const copy = prev.slice();
+			copy[copy.length - 1] = {
+				role: "assistant",
+				content: full
+			};
+			return copy;
+		});
+		return full;
+	}, [e, system]);
+	const send = useCallback(async (text, opts = {}) => {
+		if (!text.trim() || e.isGenerating) return "";
+		return run([...messagesRef.current, {
+			role: "user",
+			content: text
+		}], opts);
+	}, [e.isGenerating, run]);
+	return {
+		messages,
+		send,
+		sendMessage: send,
+		regenerate: useCallback(async (opts = {}) => {
+			if (e.isGenerating) return "";
+			const msgs = messagesRef.current.slice();
+			while (msgs.length > 0 && msgs[msgs.length - 1].role === "assistant") msgs.pop();
+			if (msgs.length === 0) return "";
+			return run(msgs, opts);
+		}, [e.isGenerating, run]),
+		setMessages: useCallback((next) => setMessages(next), []),
+		clear: useCallback(() => setMessages([]), []),
+		status: e.error ? "error" : e.isGenerating ? e.completion.length === 0 ? "submitted" : "streaming" : "ready",
+		isGenerating: e.isGenerating,
+		isLoading: e.isLoading,
+		loadingProgress: e.loadingProgress,
+		isReady: e.isReady,
+		tps: e.tps,
+		error: e.error,
+		errorKind: e.errorKind,
+		stop: e.stop,
+		load: e.load
+	};
+}
+/**
+* Single-prompt streaming completion with built-in input state — a near
+* drop-in for the Vercel AI SDK's `useCompletion`, running on-device.
+*/
+function useCompletion(options = {}) {
+	const t = useText(options);
+	const [input, setInput] = useState("");
+	const complete = useCallback(async (prompt, opts) => {
+		if (!t.isReady) await t.load();
+		return t.complete(prompt, opts);
+	}, [t]);
+	const handleInputChange = useCallback((e) => setInput(e.target.value), []);
+	const handleSubmit = useCallback((e) => {
+		e?.preventDefault?.();
+		const value = input;
+		if (!value.trim()) return;
+		setInput("");
+		complete(value);
+	}, [input, complete]);
+	return {
+		completion: t.completion,
+		complete,
+		input,
+		setInput,
+		handleInputChange,
+		handleSubmit,
+		isLoading: t.isGenerating,
+		isReady: t.isReady,
+		loadingProgress: t.loadingProgress,
+		stop: t.stop,
+		error: t.error,
+		load: t.load
+	};
+}
+//#endregion
+//#region src/browser/use-stt.ts
+/**
+* React hook for native speech-to-text in the browser.
+*
+* Wraps `MoonshineSTT` — raw 16 kHz mono PCM in, transcript out (encoder-decoder
+* ASR, no streaming/partial API). This hook captures mic audio between
+* start/stop, resamples it to 16 kHz mono, and runs a single transcribe() on the
+* finalized utterance. The GPU engine is dynamically imported so it stays out of
+* the main bundle until STT is actually used.
+*
+* @example
+* ```tsx
+* import { useSTT } from "@tryhamster/gerbil/gpu/hooks";
+*
+* const { startRecording, stopRecording, transcript, isRecording } = useSTT();
+* ```
+*/
+const MOONSHINE_SAMPLE_RATE = 16e3;
+/** Downmix to mono and linearly resample a Float32 buffer to 16 kHz. */
+function toMono16k(channels, inputRate) {
+	const inLen = channels[0]?.length ?? 0;
+	const mono = new Float32Array(inLen);
+	for (const ch of channels) for (let i = 0; i < inLen; i++) mono[i] += ch[i] / channels.length;
+	if (inputRate === MOONSHINE_SAMPLE_RATE) return mono;
+	const ratio = MOONSHINE_SAMPLE_RATE / inputRate;
+	const outLen = Math.max(0, Math.floor(inLen * ratio));
+	const out = new Float32Array(outLen);
+	for (let i = 0; i < outLen; i++) {
+		const srcPos = i / ratio;
+		const i0 = Math.floor(srcPos);
+		const i1 = Math.min(i0 + 1, inLen - 1);
+		const frac = srcPos - i0;
+		out[i] = mono[i0] * (1 - frac) + mono[i1] * frac;
+	}
+	return out;
+}
+function useSTT(options = {}) {
+	const { repo = DEFAULT_MODELS.stt, autoLoad = false, onReady, onError, onNoSpeech } = options;
+	const sttRef = useRef(null);
+	const loadingRef = useRef(false);
+	const mediaStreamRef = useRef(null);
+	const audioCtxRef = useRef(null);
+	const sourceRef = useRef(null);
+	const processorRef = useRef(null);
+	const chunksRef = useRef([]);
+	const sampleRateRef = useRef(MOONSHINE_SAMPLE_RATE);
+	const [isLoading, setIsLoading] = useState(false);
+	const [loadingProgress, setLoadingProgress] = useState(null);
+	const [isReady, setIsReady] = useState(false);
+	const [isRecording, setIsRecording] = useState(false);
+	const [isTranscribing, setIsTranscribing] = useState(false);
+	const [transcript, setTranscript] = useState("");
+	const [audioSeconds, setAudioSeconds] = useState(null);
+	const [noSpeech, setNoSpeech] = useState(false);
+	const [error, setError] = useState(null);
+	const load = useCallback(async () => {
+		if (sttRef.current || loadingRef.current) return;
+		loadingRef.current = true;
+		if (typeof navigator === "undefined" || !("gpu" in navigator)) {
+			loadingRef.current = false;
+			const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native speech-to-text requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
+			setError(err.message);
+			onError?.(err);
+			return;
+		}
+		setIsLoading(true);
+		setError(null);
+		setLoadingProgress({ status: "Initializing speech-to-text..." });
+		try {
+			const { MoonshineSTT } = await import("./index.mjs");
+			sttRef.current = await MoonshineSTT.create({
+				repo,
+				onProgress: (loaded, total, message) => {
+					setLoadingProgress({
+						status: message,
+						progress: total > 0 ? Math.round(loaded / total * 100) : void 0
+					});
+				}
+			});
+			setIsReady(true);
+			setIsLoading(false);
+			setLoadingProgress(null);
+			onReady?.();
+		} catch (e) {
+			loadingRef.current = false;
+			const err = e instanceof Error ? e : new Error(String(e));
+			setError(err.message);
+			setIsLoading(false);
+			setLoadingProgress(null);
+			onError?.(err);
+		}
+	}, [
+		repo,
+		onReady,
+		onError
+	]);
+	const teardownCapture = useCallback(() => {
+		processorRef.current?.disconnect();
+		sourceRef.current?.disconnect();
+		if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
+		for (const t of mediaStreamRef.current?.getTracks() ?? []) t.stop();
+		processorRef.current = null;
+		sourceRef.current = null;
+		audioCtxRef.current = null;
+		mediaStreamRef.current = null;
+	}, []);
+	const startRecording = useCallback(async () => {
+		if (isRecording) return;
+		if (!sttRef.current) await load();
+		if (!sttRef.current) return;
+		setTranscript("");
+		setAudioSeconds(null);
+		setNoSpeech(false);
+		setError(null);
+		chunksRef.current = [];
+		let stream;
+		try {
+			stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+		} catch (e) {
+			const err = e instanceof Error ? e : new Error(String(e));
+			const name = err.name;
+			if (name === "NotAllowedError" || name === "SecurityError") setError("Microphone access denied. Allow mic access for this site and try again.");
+			else if (name === "NotFoundError" || name === "DevicesNotFoundError") setError("No microphone found. Connect a mic and try again.");
+			else setError(err.message);
+			onError?.(err);
+			return;
+		}
+		mediaStreamRef.current = stream;
+		const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
+		if (!AudioCtx) return;
+		const ctx = new AudioCtx();
+		audioCtxRef.current = ctx;
+		sampleRateRef.current = ctx.sampleRate;
+		const source = ctx.createMediaStreamSource(stream);
+		sourceRef.current = source;
+		const processor = ctx.createScriptProcessor(4096, 1, 1);
+		processorRef.current = processor;
+		processor.onaudioprocess = (ev) => {
+			const input = ev.inputBuffer.getChannelData(0);
+			chunksRef.current.push(new Float32Array(input));
+		};
+		source.connect(processor);
+		processor.connect(ctx.destination);
+		setIsRecording(true);
+	}, [
+		isRecording,
+		load,
+		onError
+	]);
+	const stopRecording = useCallback(async () => {
+		if (!isRecording) return;
+		setIsRecording(false);
+		const inputRate = sampleRateRef.current;
+		const captured = chunksRef.current;
+		chunksRef.current = [];
+		teardownCapture();
+		const total = captured.reduce((n, c) => n + c.length, 0);
+		const joined = new Float32Array(total);
+		let off = 0;
+		for (const c of captured) {
+			joined.set(c, off);
+			off += c.length;
+		}
+		const pcm = toMono16k([joined], inputRate);
+		if (pcm.length < 127) {
+			setError("Recording was too short. Hold the mic a moment longer.");
+			return;
+		}
+		setIsTranscribing(true);
+		setError(null);
+		try {
+			const result = await sttRef.current.transcribe(pcm);
+			setAudioSeconds(result.audioSeconds);
+			setNoSpeech(result.noSpeech);
+			setTranscript(result.text);
+			if (result.noSpeech) onNoSpeech?.();
+		} catch (e) {
+			const err = e instanceof Error ? e : new Error(String(e));
+			setError(err.message);
+			onError?.(err);
+		} finally {
+			setIsTranscribing(false);
+		}
+	}, [
+		isRecording,
+		teardownCapture,
+		onError,
+		onNoSpeech
+	]);
+	const dispose = useCallback(() => {
+		teardownCapture();
+		if (sttRef.current) {
+			sttRef.current.destroy?.();
+			sttRef.current = null;
+			loadingRef.current = false;
+			setIsReady(false);
+		}
+	}, [teardownCapture]);
+	useEffect(() => {
+		if (autoLoad) load();
+		return () => {
+			teardownCapture();
+			if (sttRef.current) {
+				sttRef.current.destroy?.();
+				sttRef.current = null;
+			}
+		};
+	}, []);
+	return {
+		load,
+		startRecording,
+		stopRecording,
+		dispose,
+		isLoading,
+		loadingProgress,
+		isReady,
+		isRecording,
+		isTranscribing,
+		transcript,
+		audioSeconds,
+		noSpeech,
+		error
+	};
+}
+//#endregion
+//#region src/browser/use-tts.ts
+/**
+* React hook for native text-to-speech in the browser.
+*
+* Wraps the engine's `speak()` (Kani-TTS-2) — the codec-LM backbone emits
+* NanoCodec audio tokens, the NanoCodec decoder turns them into 22.05 kHz mono
+* PCM, and this hook plays it through the Web Audio API (and keeps the clip for
+* instant replay). The GPU engine is dynamically imported so it stays out of the
+* main bundle until TTS is actually used.
+*
+* @example
+* ```tsx
+* import { useTTS } from "@tryhamster/gerbil/gpu/hooks";
+*
+* const { speak, isSynthesizing, isPlaying } = useTTS();
+* <button onClick={() => speak("Hello from on-device TTS.")}>Speak</button>
+* ```
+*/
+const KANI_SAMPLE_RATE = 22050;
+/**
+* Built-in voices. Kani-TTS-2-en takes an `en_us`-style language tag prepended
+* to the text; the English checkpoint ships the US-English voice.
+*/
+const KANI_VOICES = [{
+	value: "en_us",
+	label: "English (US)"
+}];
+/** Build an AudioBuffer from mono Float32 PCM at the given sample rate. */
+function pcmToAudioBuffer(ctx, pcm, sampleRate) {
+	const buffer = ctx.createBuffer(1, pcm.length, sampleRate);
+	buffer.getChannelData(0).set(pcm);
+	return buffer;
+}
+function useTTS(options = {}) {
+	const { repo = DEFAULT_MODELS.tts, autoLoad = false, onReady, onError } = options;
+	const engineRef = useRef(null);
+	const loadingRef = useRef(false);
+	const audioCtxRef = useRef(null);
+	const sourceRef = useRef(null);
+	const bufferRef = useRef(null);
+	const [isLoading, setIsLoading] = useState(false);
+	const [loadingProgress, setLoadingProgress] = useState(null);
+	const [isReady, setIsReady] = useState(false);
+	const [isSynthesizing, setIsSynthesizing] = useState(false);
+	const [isPlaying, setIsPlaying] = useState(false);
+	const [hasAudio, setHasAudio] = useState(false);
+	const [audioSeconds, setAudioSeconds] = useState(null);
+	const [rtf, setRtf] = useState(null);
+	const [error, setError] = useState(null);
+	const load = useCallback(async () => {
+		if (engineRef.current || loadingRef.current) return;
+		loadingRef.current = true;
+		if (typeof navigator === "undefined" || !("gpu" in navigator)) {
+			loadingRef.current = false;
+			const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native text-to-speech requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
+			setError(err.message);
+			onError?.(err);
+			return;
+		}
+		setIsLoading(true);
+		setError(null);
+		setLoadingProgress({ status: "Initializing TTS..." });
+		try {
+			const { WebGPUEngine } = await import("./index.mjs");
+			engineRef.current = await WebGPUEngine.create({
+				repo,
+				onProgress: (loaded, total, message) => {
+					setLoadingProgress({
+						status: message,
+						progress: total > 0 ? Math.round(loaded / total * 100) : void 0
+					});
+				}
+			});
+			setIsReady(true);
+			setIsLoading(false);
+			setLoadingProgress(null);
+			onReady?.();
+		} catch (e) {
+			loadingRef.current = false;
+			const err = e instanceof Error ? e : new Error(String(e));
+			setError(err.message);
+			setIsLoading(false);
+			setLoadingProgress(null);
+			onError?.(err);
+		}
+	}, [
+		repo,
+		onReady,
+		onError
+	]);
+	const stop = useCallback(() => {
+		if (sourceRef.current) {
+			try {
+				sourceRef.current.onended = null;
+				sourceRef.current.stop();
+			} catch {}
+			sourceRef.current = null;
+		}
+		setIsPlaying(false);
+	}, []);
+	const playBuffer = useCallback(async () => {
+		const buffer = bufferRef.current;
+		if (!buffer) return;
+		const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
+		if (!AudioCtx) return;
+		if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
+		const ctx = audioCtxRef.current;
+		if (!ctx) return;
+		if (ctx.state === "suspended") await ctx.resume();
+		stop();
+		const source = ctx.createBufferSource();
+		source.buffer = buffer;
+		source.connect(ctx.destination);
+		source.onended = () => {
+			setIsPlaying(false);
+			sourceRef.current = null;
+		};
+		sourceRef.current = source;
+		setIsPlaying(true);
+		source.start();
+	}, [stop]);
+	const speak = useCallback(async (text, opts = {}) => {
+		if (!text.trim()) return;
+		{
+			const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
+			if (AudioCtx) {
+				if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
+				const ctx = audioCtxRef.current;
+				if (ctx.state === "suspended") ctx.resume();
+				try {
+					const warm = ctx.createBufferSource();
+					warm.buffer = ctx.createBuffer(1, 1, ctx.sampleRate);
+					warm.connect(ctx.destination);
+					warm.start(0);
+				} catch {}
+			}
+		}
+		if (!engineRef.current) await load();
+		const engine = engineRef.current;
+		if (!engine) return;
+		setIsSynthesizing(true);
+		setError(null);
+		try {
+			const t0 = performance.now();
+			const { pcm, sampleRate, audioSeconds: secs } = await engine.speak(text, {
+				languageTag: opts.voice ?? "en_us",
+				temperature: opts.temperature ?? 1,
+				topP: opts.topP ?? .95,
+				repetitionPenalty: opts.repetitionPenalty ?? 1.1
+			});
+			const wall = (performance.now() - t0) / 1e3;
+			const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
+			if (AudioCtx && (!audioCtxRef.current || audioCtxRef.current.state === "closed")) audioCtxRef.current = new AudioCtx();
+			if (audioCtxRef.current) bufferRef.current = pcmToAudioBuffer(audioCtxRef.current, pcm, sampleRate ?? KANI_SAMPLE_RATE);
+			setHasAudio(true);
+			setAudioSeconds(secs);
+			setRtf(wall > 0 ? secs / wall : null);
+			setIsSynthesizing(false);
+			await playBuffer();
+		} catch (e) {
+			const err = e instanceof Error ? e : new Error(String(e));
+			setError(err.message);
+			setIsSynthesizing(false);
+			onError?.(err);
+		}
+	}, [
+		load,
+		playBuffer,
+		onError
+	]);
+	const replay = useCallback(async () => {
+		if (!bufferRef.current) return;
+		await playBuffer();
+	}, [playBuffer]);
+	const dispose = useCallback(() => {
+		stop();
+		if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
+		audioCtxRef.current = null;
+		bufferRef.current = null;
+		if (engineRef.current) {
+			engineRef.current.destroy?.();
+			engineRef.current = null;
+			loadingRef.current = false;
+			setIsReady(false);
+		}
+	}, [stop]);
+	useEffect(() => {
+		if (autoLoad) load();
+		return () => {
+			if (sourceRef.current) {
+				try {
+					sourceRef.current.onended = null;
+					sourceRef.current.stop();
+				} catch {}
+				sourceRef.current = null;
+			}
+			if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
+			if (engineRef.current) {
+				engineRef.current.destroy?.();
+				engineRef.current = null;
+			}
+		};
+	}, []);
+	return {
+		load,
+		speak,
+		replay,
+		stop,
+		dispose,
+		isLoading,
+		loadingProgress,
+		isReady,
+		isSynthesizing,
+		isPlaying,
+		hasAudio,
+		audioSeconds,
+		rtf,
+		error
+	};
+}
+//#endregion
+//#region src/browser/use-voice-chat.ts
+/**
+* React hook for a fully on-device voice assistant: speak to it, it transcribes,
+* thinks, and speaks back — no cloud, no API keys. Composes {@link useSTT},
+* {@link useChat}, and {@link useTTS} into one flow.
+*
+* ```tsx
+* import { useVoiceChat } from "@tryhamster/gerbil/hooks";
+*
+* const vc = useVoiceChat();
+* <button onMouseDown={vc.start} onMouseUp={vc.stop}>
+*   {vc.isListening ? "Listening…" : "Hold to talk"}
+* </button>
+* // vc.messages renders the conversation; replies are spoken automatically.
+* ```
+*
+* This is Gerbil-unique — a private, offline voice loop the cloud SDKs can't do.
+*/
+function useVoiceChat(options = {}) {
+	const { sttModel, ttsModel, voice, speak = true, ...chatOptions } = options;
+	const stt = useSTT({ repo: sttModel });
+	const chat = useChat(chatOptions);
+	const tts = useTTS({ repo: ttsModel });
+	const processedRef = useRef("");
+	useEffect(() => {
+		const text = stt.transcript.trim();
+		if (!text || stt.isTranscribing || text === processedRef.current) return;
+		processedRef.current = text;
+		(async () => {
+			const reply = await chat.send(text);
+			if (speak && reply.trim()) await tts.speak(reply, voice ? { voice } : void 0);
+		})();
+	}, [stt.transcript, stt.isTranscribing]);
+	const start = useCallback(() => stt.startRecording(), [stt]);
+	const stop = useCallback(() => stt.stopRecording(), [stt]);
+	return {
+		messages: chat.messages,
+		start,
+		stop,
+		stopSpeaking: tts.stop,
+		clear: chat.clear,
+		isListening: stt.isRecording,
+		isTranscribing: stt.isTranscribing,
+		isThinking: chat.isGenerating,
+		isSpeaking: tts.isSynthesizing || tts.isPlaying,
+		transcript: stt.transcript,
+		isLoading: stt.isLoading || chat.isLoading || tts.isLoading,
+		isReady: chat.isReady,
+		error: stt.error ?? chat.error ?? tts.error
+	};
+}
+//#endregion
+export { KANI_VOICES, useAgent, useAutocomplete, useChat, useCompletion, useEmbedding, useEngine, useMemory, useObject, useSTT, useTTS, useText, useVision, useVoiceChat };
+//# sourceMappingURL=hooks.mjs.map