@tryhamster/gerbil 1.0.0-rc.9 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +318 -104
  3. package/dist/architectures-C1I5V3Dt.mjs +6070 -0
  4. package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
  5. package/dist/browser/index.d.ts +276 -590
  6. package/dist/browser/index.d.ts.map +1 -1
  7. package/dist/browser/index.js +592 -2334
  8. package/dist/browser/index.js.map +1 -1
  9. package/dist/cli.mjs +625 -1098
  10. package/dist/cli.mjs.map +1 -1
  11. package/dist/defaults-9komdrbY.mjs +24 -0
  12. package/dist/defaults-9komdrbY.mjs.map +1 -0
  13. package/dist/frameworks/express.d.mts +1 -3
  14. package/dist/frameworks/express.d.mts.map +1 -1
  15. package/dist/frameworks/express.mjs +7 -7
  16. package/dist/frameworks/express.mjs.map +1 -1
  17. package/dist/frameworks/fastify.d.mts +1 -1
  18. package/dist/frameworks/fastify.d.mts.map +1 -1
  19. package/dist/frameworks/fastify.mjs +3 -3
  20. package/dist/frameworks/fastify.mjs.map +1 -1
  21. package/dist/frameworks/hono.d.mts +1 -1
  22. package/dist/frameworks/hono.d.mts.map +1 -1
  23. package/dist/frameworks/hono.mjs +4 -4
  24. package/dist/frameworks/hono.mjs.map +1 -1
  25. package/dist/frameworks/next.d.mts +3 -2
  26. package/dist/frameworks/next.d.mts.map +1 -1
  27. package/dist/frameworks/next.mjs +4 -4
  28. package/dist/frameworks/next.mjs.map +1 -1
  29. package/dist/frameworks/react.d.mts +1 -1
  30. package/dist/frameworks/trpc.d.mts +1 -1
  31. package/dist/frameworks/trpc.d.mts.map +1 -1
  32. package/dist/frameworks/trpc.mjs +4 -4
  33. package/dist/frameworks/trpc.mjs.map +1 -1
  34. package/dist/gerbil-BetB5xb0.d.mts +488 -0
  35. package/dist/gerbil-BetB5xb0.d.mts.map +1 -0
  36. package/dist/gerbil-CTZUa8EZ.mjs +4 -0
  37. package/dist/gerbil-DNniplr4.mjs +1656 -0
  38. package/dist/gerbil-DNniplr4.mjs.map +1 -0
  39. package/dist/gpu/hooks.d.mts +640 -0
  40. package/dist/gpu/hooks.d.mts.map +1 -0
  41. package/dist/gpu/hooks.mjs +1369 -0
  42. package/dist/gpu/hooks.mjs.map +1 -0
  43. package/dist/gpu/index.d.mts +2 -0
  44. package/dist/gpu/index.mjs +6 -0
  45. package/dist/gpu-DFuglcEx.mjs +3790 -0
  46. package/dist/gpu-DFuglcEx.mjs.map +1 -0
  47. package/dist/index-Dgmb2kE3.d.mts +245 -0
  48. package/dist/index-Dgmb2kE3.d.mts.map +1 -0
  49. package/dist/index-DukkJRMj.d.mts +2114 -0
  50. package/dist/index-DukkJRMj.d.mts.map +1 -0
  51. package/dist/index.d.mts +22 -487
  52. package/dist/index.d.mts.map +1 -1
  53. package/dist/index.mjs +13 -8
  54. package/dist/index.mjs.map +1 -1
  55. package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
  56. package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
  57. package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
  58. package/dist/integrations/ai-sdk.d.mts +75 -6
  59. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  60. package/dist/integrations/ai-sdk.mjs +131 -15
  61. package/dist/integrations/ai-sdk.mjs.map +1 -1
  62. package/dist/integrations/langchain.d.mts +1 -1
  63. package/dist/integrations/langchain.d.mts.map +1 -1
  64. package/dist/integrations/langchain.mjs +5 -5
  65. package/dist/integrations/langchain.mjs.map +1 -1
  66. package/dist/integrations/llamaindex.d.mts +1 -1
  67. package/dist/integrations/llamaindex.d.mts.map +1 -1
  68. package/dist/integrations/llamaindex.mjs +5 -5
  69. package/dist/integrations/llamaindex.mjs.map +1 -1
  70. package/dist/integrations/mcp-client.mjs +3 -3
  71. package/dist/integrations/mcp-client.mjs.map +1 -1
  72. package/dist/integrations/mcp.d.mts +3 -2
  73. package/dist/integrations/mcp.d.mts.map +1 -1
  74. package/dist/integrations/mcp.mjs +5 -5
  75. package/dist/{mcp-BvbriaBy.mjs → mcp-D2vvH1Xc.mjs} +4 -4
  76. package/dist/mcp-D2vvH1Xc.mjs.map +1 -0
  77. package/dist/memory/index.d.mts +3 -0
  78. package/dist/memory/index.mjs +6 -0
  79. package/dist/memory-D1P7Tmda.mjs +4 -0
  80. package/dist/memory-DVN0MnIG.mjs +132 -0
  81. package/dist/memory-DVN0MnIG.mjs.map +1 -0
  82. package/dist/memory-Dj0J1v88.mjs +294 -0
  83. package/dist/memory-Dj0J1v88.mjs.map +1 -0
  84. package/dist/moonshine-stt-17dpP1kr.mjs +4 -0
  85. package/dist/moonshine-stt-4ojLtMq7.mjs +11962 -0
  86. package/dist/moonshine-stt-4ojLtMq7.mjs.map +1 -0
  87. package/dist/{one-liner-s-lD8rCC.mjs → one-liner-JhdIPxzF.mjs} +14 -16
  88. package/dist/one-liner-JhdIPxzF.mjs.map +1 -0
  89. package/dist/repl-BDRkwPGX.mjs +9 -0
  90. package/dist/skills/index.d.mts +270 -320
  91. package/dist/skills/index.d.mts.map +1 -1
  92. package/dist/skills/index.mjs +5 -5
  93. package/dist/{skills-CD3Orlex.mjs → skills-CU694Dc8.mjs} +187 -32
  94. package/dist/skills-CU694Dc8.mjs.map +1 -0
  95. package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
  96. package/dist/tools-DQ1mPUw5.mjs.map +1 -0
  97. package/dist/types-DQBe2lFo.d.mts +165 -0
  98. package/dist/types-DQBe2lFo.d.mts.map +1 -0
  99. package/dist/{types-CiTc7ez3.d.mts → types-LlyYILII.d.mts} +112 -14
  100. package/dist/types-LlyYILII.d.mts.map +1 -0
  101. package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
  102. package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
  103. package/dist/vector-B0panuy6.mjs +95 -0
  104. package/dist/vector-B0panuy6.mjs.map +1 -0
  105. package/docs/PROJECT-STATE.md +321 -0
  106. package/docs/adding-a-model-family.md +280 -0
  107. package/docs/ai-sdk.md +70 -61
  108. package/docs/architecture/overview.md +17 -7
  109. package/docs/browser.md +203 -8
  110. package/docs/embeddings.md +156 -0
  111. package/docs/gerbil-site-native-migration.md +217 -0
  112. package/docs/gpu-engine/architectures.md +398 -0
  113. package/docs/gpu-engine/ir.md +372 -0
  114. package/docs/gpu-engine/kernels.md +718 -0
  115. package/docs/gpu-engine/paper.html +1759 -0
  116. package/docs/gpu-engine/paper.md +2109 -0
  117. package/docs/gpu-engine/safetensors.md +312 -0
  118. package/docs/gpu-engine/tokenizer.md +302 -0
  119. package/docs/memory-rag.md +91 -0
  120. package/docs/metal-safari-intel.md +190 -0
  121. package/docs/mobile-failure-diagnosis.md +124 -0
  122. package/docs/mobile.md +99 -0
  123. package/docs/observability.md +230 -0
  124. package/docs/onnx-removal-plan.md +339 -0
  125. package/docs/research/autoresearch-portable.md +904 -0
  126. package/docs/research/dispatch-reduction-hivemind.md +84 -0
  127. package/docs/research/ios-safari-model-caching.md +117 -0
  128. package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
  129. package/docs/research/native-stt-model-selection.md +49 -0
  130. package/docs/research/native-tts-model-selection.md +90 -0
  131. package/docs/research/native-vs-chromium-decision.md +152 -0
  132. package/docs/research/nemotron-mamba2-inference.md +910 -0
  133. package/docs/research/qwen35-multimodal.md +293 -0
  134. package/docs/research/qwen36-gemma4-targets.md +337 -0
  135. package/docs/research/sota-embedding-models.md +179 -0
  136. package/docs/research/sota-mobile-models-2026.md +263 -0
  137. package/docs/research/sota-modality-models.md +202 -0
  138. package/docs/research/tps-baselines.md +71 -0
  139. package/docs/research/webgpu-m4-reference.md +104 -0
  140. package/docs/site-update-plan.md +155 -0
  141. package/docs/structured-output.md +123 -0
  142. package/docs/stt.md +63 -446
  143. package/docs/tts.md +77 -499
  144. package/docs/vision.md +100 -338
  145. package/package.json +22 -7
  146. package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
  147. package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
  148. package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
  149. package/dist/gerbil-CJ3ifloF.mjs +0 -4
  150. package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
  151. package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
  152. package/dist/gerbil-qOTe1nl2.d.mts +0 -431
  153. package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
  154. package/dist/kokoro-BNTb6egA.mjs +0 -20210
  155. package/dist/kokoro-BNTb6egA.mjs.map +0 -1
  156. package/dist/kokoro-CMOGDSgT.js +0 -20212
  157. package/dist/kokoro-CMOGDSgT.js.map +0 -1
  158. package/dist/mcp-BvbriaBy.mjs.map +0 -1
  159. package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
  160. package/dist/repl-DveXw36T.mjs +0 -9
  161. package/dist/skills-CD3Orlex.mjs.map +0 -1
  162. package/dist/stt-Bu-E23Sc.js +0 -433
  163. package/dist/stt-Bu-E23Sc.js.map +0 -1
  164. package/dist/stt-CpLYbGFd.mjs +0 -433
  165. package/dist/stt-CpLYbGFd.mjs.map +0 -1
  166. package/dist/stt-DRPLEEHB.mjs +0 -3
  167. package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
  168. package/dist/transformers.web-DiD1gTwk.js +0 -44695
  169. package/dist/transformers.web-DiD1gTwk.js.map +0 -1
  170. package/dist/transformers.web-u34VxRFM.js +0 -3
  171. package/dist/tts-CqroPaSK.js +0 -724
  172. package/dist/tts-CqroPaSK.js.map +0 -1
  173. package/dist/tts-DXgsKGCe.mjs +0 -3
  174. package/dist/tts-DeGANMNV.mjs +0 -730
  175. package/dist/tts-DeGANMNV.mjs.map +0 -1
  176. package/dist/types-CiTc7ez3.d.mts.map +0 -1
  177. /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
  178. /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
  179. /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
@@ -0,0 +1,1369 @@
1
+ import { n as resolveDefaultRepo, t as DEFAULT_MODELS } from "../defaults-9komdrbY.mjs";
2
+ import { useCallback, useEffect, useRef, useState } from "react";
3
+
4
+ //#region src/browser/use-engine.ts
5
+ /**
6
+ * React hook for native WebGPU inference in the browser.
7
+ *
8
+ * Uses gerbil's WebGPUEngine directly on the main thread — no web worker,
9
+ * no ONNX Runtime, no transformers.js. Pure WGSL compute shaders.
10
+ *
11
+ * Handles the full engine lifecycle for you:
12
+ * - loads the model (lazily or on mount),
13
+ * - hot-swaps when you change `model`/`dtype`/`enableVision`/`embedding`,
14
+ * - SHARES one engine across every component that asks for the same config
15
+ * (reference-counted) so you never upload the same weights to the GPU twice,
16
+ * - disposes when the last consumer unmounts.
17
+ *
18
+ * @example
19
+ * ```tsx
20
+ * import { useEngine } from "@tryhamster/gerbil/browser";
21
+ *
22
+ * function App() {
23
+ * const { complete, completion, isLoading, isGenerating, tps } = useEngine({
24
+ * model: "mlx-community/Qwen3.5-0.8B-4bit",
25
+ * autoLoad: true,
26
+ * });
27
+ *
28
+ * if (isLoading) return <div>Loading model...</div>;
29
+ * return (
30
+ * <div>
31
+ * <button onClick={() => complete("What is 2+2?")}>Generate</button>
32
+ * <p>{completion}</p>
33
+ * {isGenerating && <span>{tps?.toFixed(1)} tok/s</span>}
34
+ * </div>
35
+ * );
36
+ * }
37
+ * ```
38
+ */
39
+ function classifyError(err) {
40
+ const msg = err.message.toLowerCase();
41
+ if (msg.includes("webgpu is not available") || msg.includes("not supported")) return "no-webgpu";
42
+ if (msg.includes("no webgpu adapter") || msg.includes("no gpu adapter")) return "no-adapter";
43
+ if (msg.includes("device lost") || msg.includes("device was destroyed")) return "device-lost";
44
+ if (msg.includes("out of memory") || msg.includes("allocation failed") || msg.includes("buffer size")) return "oom";
45
+ if (msg.includes("fetch") || msg.includes("network") || msg.includes("cors")) return "network";
46
+ if (msg.includes("timeout") || msg.includes("timed out")) return "timeout";
47
+ return "unknown";
48
+ }
49
+ function getErrorGuidance(kind) {
50
+ switch (kind) {
51
+ case "no-webgpu": return "WebGPU requires Safari 26+ (iOS 26+), Chrome 113+, or Firefox 141+.";
52
+ case "no-adapter": return "No GPU found. Try closing other browser tabs that might be using the GPU.";
53
+ case "device-lost": return "GPU device was lost (tab may have been backgrounded). Please reload.";
54
+ case "oom": return "Not enough GPU memory. Try a smaller model or close other tabs.";
55
+ case "network": return "Failed to download model. Check your internet connection.";
56
+ case "timeout": return "Model loading timed out. Check your connection or try a smaller model.";
57
+ default: return "";
58
+ }
59
+ }
60
+ function getDefaultMaxSeqLen() {
61
+ if (typeof navigator === "undefined") return 4096;
62
+ return /iPhone|iPad|iPod|Android/i.test(navigator.userAgent) ? 2048 : 4096;
63
+ }
64
+ const SHARED_ENGINES = /* @__PURE__ */ new Map();
65
+ const ENGINE_DISPOSE_GRACE_MS = 3e4;
66
+ const RETRY_COOLDOWN_MS = 3e3;
67
+ function acquireSharedEngine(key, factory) {
68
+ let entry = SHARED_ENGINES.get(key);
69
+ if (!entry) {
70
+ const created = {
71
+ promise: factory(),
72
+ engine: null,
73
+ refs: 0,
74
+ disposeTimer: null
75
+ };
76
+ created.promise.then((eng) => {
77
+ created.engine = eng;
78
+ }).catch(() => {
79
+ SHARED_ENGINES.delete(key);
80
+ });
81
+ SHARED_ENGINES.set(key, created);
82
+ entry = created;
83
+ }
84
+ if (entry.disposeTimer) {
85
+ clearTimeout(entry.disposeTimer);
86
+ entry.disposeTimer = null;
87
+ }
88
+ entry.refs += 1;
89
+ return entry.promise;
90
+ }
91
+ function releaseSharedEngine(key) {
92
+ const entry = SHARED_ENGINES.get(key);
93
+ if (!entry) return;
94
+ entry.refs -= 1;
95
+ if (entry.refs > 0 || entry.disposeTimer) return;
96
+ entry.disposeTimer = setTimeout(() => {
97
+ entry.disposeTimer = null;
98
+ if (entry.refs > 0) return;
99
+ SHARED_ENGINES.delete(key);
100
+ entry.promise.then((eng) => eng.destroy()).catch(() => {});
101
+ }, ENGINE_DISPOSE_GRACE_MS);
102
+ }
103
+ /** Decode an image URL / data URL into RGB pixels via an offscreen canvas. */
104
+ async function decodeImage(src) {
105
+ const img = new Image();
106
+ img.crossOrigin = "anonymous";
107
+ await new Promise((resolve, reject) => {
108
+ img.onload = () => resolve();
109
+ img.onerror = () => reject(/* @__PURE__ */ new Error("Failed to load image."));
110
+ img.src = src;
111
+ });
112
+ const scale = Math.min(1, 448 / Math.max(img.naturalWidth, img.naturalHeight));
113
+ const canvas = document.createElement("canvas");
114
+ canvas.width = Math.max(1, Math.round(img.naturalWidth * scale));
115
+ canvas.height = Math.max(1, Math.round(img.naturalHeight * scale));
116
+ const cctx = canvas.getContext("2d");
117
+ if (!cctx) throw new Error("Could not get 2D canvas context for image decode.");
118
+ cctx.drawImage(img, 0, 0, canvas.width, canvas.height);
119
+ const rgba = cctx.getImageData(0, 0, canvas.width, canvas.height).data;
120
+ const rgb = new Uint8ClampedArray(canvas.width * canvas.height * 3);
121
+ for (let i = 0, j = 0; i < rgba.length; i += 4, j += 3) {
122
+ rgb[j] = rgba[i];
123
+ rgb[j + 1] = rgba[i + 1];
124
+ rgb[j + 2] = rgba[i + 2];
125
+ }
126
+ return {
127
+ pixels: rgb,
128
+ width: canvas.width,
129
+ height: canvas.height
130
+ };
131
+ }
132
+ function useEngine(options = {}) {
133
+ const { model: modelOption, maxSeqLen, dtype = "auto", autoLoad = false, enableVision = false, embedding = false, loadingTimeout = 3e5, onReady, onError } = options;
134
+ const model = resolveDefaultRepo({
135
+ repo: modelOption,
136
+ embedding,
137
+ enableVision
138
+ });
139
+ const engineRef = useRef(null);
140
+ const stoppedRef = useRef(false);
141
+ const timeoutRef = useRef(null);
142
+ const heldKeyRef = useRef(null);
143
+ const failedKeyRef = useRef(null);
144
+ const [isLoading, setIsLoading] = useState(false);
145
+ const [loadingProgress, setLoadingProgress] = useState(null);
146
+ const [isGenerating, setIsGenerating] = useState(false);
147
+ const [isReady, setIsReady] = useState(false);
148
+ const [completion, setCompletion] = useState("");
149
+ const [tps, setTps] = useState(null);
150
+ const [attempts, setAttempts] = useState(0);
151
+ const [error, setError] = useState(null);
152
+ const [errorKind, setErrorKind] = useState(null);
153
+ const modelKey = `${model}|${dtype}|${enableVision}|${embedding}|${maxSeqLen ?? "auto"}`;
154
+ const fail = useCallback((e) => {
155
+ const err = e instanceof Error ? e : new Error(String(e));
156
+ const kind = classifyError(err);
157
+ const guidance = getErrorGuidance(kind);
158
+ setError(guidance ? `${err.message} ${guidance}` : err.message);
159
+ setErrorKind(kind);
160
+ setIsLoading(false);
161
+ setLoadingProgress(null);
162
+ onError?.(err, kind);
163
+ }, [onError]);
164
+ const load = useCallback(async () => {
165
+ const failed = failedKeyRef.current;
166
+ const inCooldown = failed?.key === modelKey && Date.now() - failed.at < RETRY_COOLDOWN_MS;
167
+ if (engineRef.current || heldKeyRef.current === modelKey || inCooldown) return;
168
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
169
+ failedKeyRef.current = {
170
+ key: modelKey,
171
+ at: Date.now()
172
+ };
173
+ fail(/* @__PURE__ */ new Error("WebGPU is not available in this browser."));
174
+ return;
175
+ }
176
+ setIsLoading(true);
177
+ setError(null);
178
+ setErrorKind(null);
179
+ setLoadingProgress({ status: "Initializing WebGPU engine..." });
180
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
181
+ const timeoutPromise = new Promise((_, reject) => {
182
+ timeoutRef.current = setTimeout(() => reject(/* @__PURE__ */ new Error("Model loading timed out. The download may be too slow.")), loadingTimeout);
183
+ });
184
+ const key = modelKey;
185
+ heldKeyRef.current = key;
186
+ const factory = async () => {
187
+ const { WebGPUEngine } = await import("./index.mjs");
188
+ return WebGPUEngine.create({
189
+ repo: model,
190
+ maxSeqLen: maxSeqLen ?? getDefaultMaxSeqLen(),
191
+ dtype,
192
+ enableVision,
193
+ embedding,
194
+ onProgress: (loaded, total, message) => {
195
+ setLoadingProgress({
196
+ status: message,
197
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
198
+ });
199
+ }
200
+ });
201
+ };
202
+ try {
203
+ const engine = await Promise.race([acquireSharedEngine(key, factory), timeoutPromise]);
204
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
205
+ if (heldKeyRef.current !== key) {
206
+ releaseSharedEngine(key);
207
+ return;
208
+ }
209
+ engineRef.current = engine;
210
+ failedKeyRef.current = null;
211
+ if (typeof window !== "undefined") window.__gerbilEngine = engine;
212
+ setIsReady(true);
213
+ setIsLoading(false);
214
+ setLoadingProgress(null);
215
+ onReady?.();
216
+ } catch (e) {
217
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
218
+ failedKeyRef.current = {
219
+ key,
220
+ at: Date.now()
221
+ };
222
+ if (heldKeyRef.current === key) heldKeyRef.current = null;
223
+ releaseSharedEngine(key);
224
+ fail(e);
225
+ }
226
+ }, [
227
+ modelKey,
228
+ model,
229
+ maxSeqLen,
230
+ dtype,
231
+ enableVision,
232
+ embedding,
233
+ loadingTimeout,
234
+ onReady,
235
+ fail
236
+ ]);
237
+ const stop = useCallback(() => {
238
+ stoppedRef.current = true;
239
+ }, []);
240
+ const dispose = useCallback(() => {
241
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
242
+ engineRef.current = null;
243
+ failedKeyRef.current = null;
244
+ setIsReady(false);
245
+ if (heldKeyRef.current) {
246
+ releaseSharedEngine(heldKeyRef.current);
247
+ heldKeyRef.current = null;
248
+ }
249
+ }, []);
250
+ const complete = useCallback(async (prompt, opts = {}) => {
251
+ const engine = engineRef.current;
252
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
253
+ setIsGenerating(true);
254
+ setCompletion("");
255
+ setTps(null);
256
+ stoppedRef.current = false;
257
+ let fullText = "";
258
+ try {
259
+ const result = await engine.generate(prompt, {
260
+ maxTokens: opts.maxTokens ?? 256,
261
+ sampling: { temperature: opts.temperature ?? .7 },
262
+ systemPrompt: opts.system,
263
+ stopSequences: opts.stopSequences,
264
+ onToken: (token, meta) => {
265
+ if (stoppedRef.current) return;
266
+ fullText += token;
267
+ setCompletion(fullText);
268
+ if (meta) setTps(meta.tps);
269
+ }
270
+ });
271
+ setTps(result.tokensPerSecond);
272
+ setIsGenerating(false);
273
+ return result.text;
274
+ } catch (e) {
275
+ setIsGenerating(false);
276
+ fail(e);
277
+ return fullText;
278
+ }
279
+ }, [fail]);
280
+ const autocomplete = useCallback(async (prefix, opts) => {
281
+ const engine = engineRef.current;
282
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
283
+ return engine.autocomplete(prefix, opts);
284
+ }, []);
285
+ const rewrite = useCallback(async (text, opts) => {
286
+ const engine = engineRef.current;
287
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
288
+ return engine.rewrite(text, opts);
289
+ }, []);
290
+ const generateWithTools = useCallback((prompt, opts) => {
291
+ const engine = engineRef.current;
292
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
293
+ return engine.generateWithTools(prompt, opts);
294
+ }, []);
295
+ const generateObject = useCallback(async (prompt, opts = {}) => {
296
+ const engine = engineRef.current;
297
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
298
+ setIsGenerating(true);
299
+ setCompletion("");
300
+ setTps(null);
301
+ setAttempts(0);
302
+ stoppedRef.current = false;
303
+ try {
304
+ const result = await engine.generateObject(prompt, {
305
+ schema: opts.schema,
306
+ maxRetries: opts.maxRetries,
307
+ maxTokens: opts.maxTokens ?? 256,
308
+ sampling: { temperature: opts.temperature ?? .7 },
309
+ systemPrompt: opts.system,
310
+ stopSequences: opts.stopSequences
311
+ });
312
+ setCompletion(result.text);
313
+ setAttempts(result.attempts);
314
+ setIsGenerating(false);
315
+ return {
316
+ object: result.object,
317
+ attempts: result.attempts
318
+ };
319
+ } catch (e) {
320
+ setIsGenerating(false);
321
+ fail(e);
322
+ throw e instanceof Error ? e : new Error(String(e));
323
+ }
324
+ }, [fail]);
325
+ const describeImage = useCallback(async (image, prompt = "Describe this image.", opts = {}) => {
326
+ const engine = engineRef.current;
327
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
328
+ if (!engine.hasVision) throw new Error("Engine was not created with enableVision: true.");
329
+ setIsGenerating(true);
330
+ setCompletion("");
331
+ setTps(null);
332
+ stoppedRef.current = false;
333
+ const decoded = typeof image === "string" ? await decodeImage(image) : image;
334
+ let fullText = "";
335
+ try {
336
+ const result = await engine.describeImage(decoded, prompt, {
337
+ maxTokens: opts.maxTokens ?? 150,
338
+ sampling: { temperature: opts.temperature ?? .7 },
339
+ systemPrompt: opts.system,
340
+ stopSequences: opts.stopSequences,
341
+ onToken: (token, meta) => {
342
+ if (stoppedRef.current) return;
343
+ fullText += token;
344
+ setCompletion(fullText);
345
+ if (meta) setTps(meta.tps);
346
+ }
347
+ });
348
+ setTps(result.tokensPerSecond);
349
+ setIsGenerating(false);
350
+ return result.text;
351
+ } catch (e) {
352
+ setIsGenerating(false);
353
+ fail(e);
354
+ return fullText;
355
+ }
356
+ }, [fail]);
357
+ const embed = useCallback(async (text, opts = {}) => {
358
+ const engine = engineRef.current;
359
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
360
+ if (!engine.isEmbedding) throw new Error("Engine was not created with embedding: true.");
361
+ return engine.embed(text, { taskType: opts.taskType ?? "query" });
362
+ }, []);
363
+ const similarity = useCallback(async (a, b) => {
364
+ const [va, vb] = await Promise.all([embed(a, { taskType: "query" }), embed(b, { taskType: "document" })]);
365
+ let dot = 0;
366
+ const n = Math.min(va.length, vb.length);
367
+ for (let i = 0; i < n; i++) dot += va[i] * vb[i];
368
+ return dot;
369
+ }, [embed]);
370
+ useEffect(() => {
371
+ if (autoLoad) load();
372
+ return () => {
373
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
374
+ engineRef.current = null;
375
+ if (heldKeyRef.current) {
376
+ releaseSharedEngine(heldKeyRef.current);
377
+ heldKeyRef.current = null;
378
+ }
379
+ };
380
+ }, []);
381
+ useEffect(() => {
382
+ if (heldKeyRef.current === modelKey) return;
383
+ failedKeyRef.current = null;
384
+ if (heldKeyRef.current === null) return;
385
+ engineRef.current = null;
386
+ releaseSharedEngine(heldKeyRef.current);
387
+ heldKeyRef.current = null;
388
+ setIsReady(false);
389
+ setCompletion("");
390
+ setTps(null);
391
+ setError(null);
392
+ setErrorKind(null);
393
+ load();
394
+ }, [modelKey]);
395
+ return {
396
+ complete,
397
+ autocomplete,
398
+ rewrite,
399
+ generateWithTools,
400
+ generateObject,
401
+ describeImage,
402
+ embed,
403
+ similarity,
404
+ completion,
405
+ isLoading,
406
+ loadingProgress,
407
+ isGenerating,
408
+ tps,
409
+ attempts,
410
+ error,
411
+ errorKind,
412
+ isReady,
413
+ load,
414
+ stop,
415
+ dispose
416
+ };
417
+ }
418
+
419
+ //#endregion
420
+ //#region src/browser/use-agent.ts
421
+ /**
422
+ * React hook for agentic tool-calling in the browser.
423
+ *
424
+ * Owns the agent loop state (running flag, step trace, final answer) on top of
425
+ * `useEngine().generateWithTools`. You supply the model + tools; the hook runs the
426
+ * generate → call-tool → feed-result loop and exposes the steps for trace UIs.
427
+ *
428
+ * @example
429
+ * ```tsx
430
+ * import { useAgent } from "@tryhamster/gerbil/gpu/hooks";
431
+ *
432
+ * const { run, steps, answer, isRunning } = useAgent({
433
+ * model: "mlx-community/Qwen3.5-0.8B-4bit",
434
+ * tools: [weatherTool],
435
+ * });
436
+ * await run("What's the weather in Paris?");
437
+ * ```
438
+ */
439
+ function useAgent(options) {
440
+ const { model, tools, maxSteps = 5, autoLoad = false } = options;
441
+ const engine = useEngine({
442
+ model,
443
+ autoLoad
444
+ });
445
+ const [steps, setSteps] = useState([]);
446
+ const [answer, setAnswer] = useState("");
447
+ const [isRunning, setIsRunning] = useState(false);
448
+ const toolsRef = useRef(tools);
449
+ toolsRef.current = tools;
450
+ const run = useCallback(async (prompt) => {
451
+ setIsRunning(true);
452
+ setSteps([]);
453
+ setAnswer("");
454
+ try {
455
+ if (!engine.isReady) await engine.load();
456
+ const { text } = await engine.generateWithTools(prompt, {
457
+ tools: toolsRef.current,
458
+ maxSteps,
459
+ onStep: (step) => setSteps((prev) => [...prev, step])
460
+ });
461
+ setAnswer(text);
462
+ return text;
463
+ } finally {
464
+ setIsRunning(false);
465
+ }
466
+ }, [engine, maxSteps]);
467
+ const reset = useCallback(() => {
468
+ setSteps([]);
469
+ setAnswer("");
470
+ }, []);
471
+ return {
472
+ run,
473
+ steps,
474
+ answer,
475
+ isRunning,
476
+ isReady: engine.isReady,
477
+ load: engine.load,
478
+ reset,
479
+ error: engine.error
480
+ };
481
+ }
482
+
483
+ //#endregion
484
+ //#region src/browser/use-autocomplete.ts
485
+ /**
486
+ * React hook for debounced inline autocomplete (ghost text).
487
+ *
488
+ * Owns the debounce, in-flight, and stale-response guards so a component only has
489
+ * to render the suggestion and handle accept/dismiss. Built on `useEngine`, so it
490
+ * shares the same reference-counted engine as other hooks.
491
+ *
492
+ * @example
493
+ * ```tsx
494
+ * import { useAutocomplete } from "@tryhamster/gerbil/gpu/hooks";
495
+ *
496
+ * const { suggestion, onInput, accept, dismiss } = useAutocomplete({
497
+ * model: "mlx-community/Qwen3.5-0.8B-4bit",
498
+ * });
499
+ * // <input onChange={(e) => onInput(e.target.value)} />
500
+ * // render `suggestion` as ghost text; Tab → accept(), Esc → dismiss()
501
+ * ```
502
+ */
503
+ function useAutocomplete(options) {
504
+ const { model, debounceMs = 550, minChars = 8, maxTokens = 16, temperature = .3, autoLoad = false } = options;
505
+ const engine = useEngine({
506
+ model,
507
+ autoLoad
508
+ });
509
+ const [suggestion, setSuggestion] = useState("");
510
+ const [isFetching, setIsFetching] = useState(false);
511
+ const debounceRef = useRef(null);
512
+ const inFlightRef = useRef(false);
513
+ const requestedForRef = useRef("");
514
+ const request = useCallback(async (text) => {
515
+ if (inFlightRef.current) return;
516
+ if (text.trim().length < minChars) return;
517
+ inFlightRef.current = true;
518
+ requestedForRef.current = text;
519
+ setIsFetching(true);
520
+ try {
521
+ if (!engine.isReady) await engine.load();
522
+ if (engine.error) return;
523
+ const out = await engine.autocomplete(text, {
524
+ maxTokens,
525
+ temperature
526
+ });
527
+ if (requestedForRef.current !== text) return;
528
+ if (out) setSuggestion(out);
529
+ } finally {
530
+ inFlightRef.current = false;
531
+ setIsFetching(false);
532
+ }
533
+ }, [
534
+ engine,
535
+ minChars,
536
+ maxTokens,
537
+ temperature
538
+ ]);
539
+ const onInput = useCallback((text) => {
540
+ setSuggestion("");
541
+ if (debounceRef.current) clearTimeout(debounceRef.current);
542
+ if (text.trim().length < minChars) return;
543
+ debounceRef.current = setTimeout(() => void request(text), debounceMs);
544
+ }, [
545
+ request,
546
+ minChars,
547
+ debounceMs
548
+ ]);
549
+ const accept = useCallback(() => {
550
+ const s = suggestion;
551
+ setSuggestion("");
552
+ requestedForRef.current = "";
553
+ return s;
554
+ }, [suggestion]);
555
+ const dismiss = useCallback(() => {
556
+ setSuggestion("");
557
+ requestedForRef.current = "";
558
+ }, []);
559
+ useEffect(() => () => {
560
+ if (debounceRef.current) clearTimeout(debounceRef.current);
561
+ }, []);
562
+ return {
563
+ suggestion,
564
+ isFetching,
565
+ isReady: engine.isReady,
566
+ load: engine.load,
567
+ onInput,
568
+ accept,
569
+ dismiss,
570
+ error: engine.error
571
+ };
572
+ }
573
+
574
+ //#endregion
575
+ //#region src/browser/use-memory.ts
576
+ /**
577
+ * React hook for on-device memory / RAG.
578
+ *
579
+ * Wraps the `@tryhamster/gerbil/memory` module with a native embedder (running
580
+ * on the WebGPU engine) and a persistent IndexedDB store, so an agent can
581
+ * remember things across turns AND across sessions — with zero server.
582
+ *
583
+ * ```tsx
584
+ * import { useMemory } from "@tryhamster/gerbil/hooks";
585
+ *
586
+ * const memory = useMemory();
587
+ * await memory.add("The user prefers TypeScript.");
588
+ * const { context } = await memory.recall("what does the user like?", { tokenBudget: 256 });
589
+ * ```
590
+ *
591
+ * The embedding model and the memory module are both imported lazily, so this
592
+ * hook adds nothing to your bundle until it's used.
593
+ */
594
+ function useMemory(options = {}) {
595
+ const { model, namespace = "gerbil-memory" } = options;
596
+ const embedder = useEngine({
597
+ model,
598
+ embedding: true,
599
+ autoLoad: false
600
+ });
601
+ const memRef = useRef(null);
602
+ const initRef = useRef(null);
603
+ const [isReady, setIsReady] = useState(false);
604
+ const ensure = useCallback(() => {
605
+ if (memRef.current) return Promise.resolve(memRef.current);
606
+ if (initRef.current) return initRef.current;
607
+ initRef.current = (async () => {
608
+ if (!embedder.isReady) await embedder.load();
609
+ const [{ createMemory }, { createIndexedDBStore }] = await Promise.all([import("../memory-D1P7Tmda.mjs"), import("../indexeddb-store-ClH12Xnl.mjs")]);
610
+ const mem = createMemory({
611
+ embed: async (texts) => Promise.all(texts.map((t) => embedder.embed(t))),
612
+ store: createIndexedDBStore({ dbName: namespace })
613
+ });
614
+ memRef.current = mem;
615
+ setIsReady(true);
616
+ return mem;
617
+ })();
618
+ return initRef.current;
619
+ }, [embedder, namespace]);
620
+ return {
621
+ add: useCallback(async (text, opts) => (await ensure()).add(text, opts), [ensure]),
622
+ recall: useCallback(async (query, opts) => (await ensure()).recall(query, opts), [ensure]),
623
+ search: useCallback(async (query, opts) => (await ensure()).search(query, opts), [ensure]),
624
+ get: useCallback(async (id) => (await ensure()).get(id), [ensure]),
625
+ remove: useCallback(async (id) => (await ensure()).delete(id), [ensure]),
626
+ clear: useCallback(async () => (await ensure()).clear(), [ensure]),
627
+ size: useCallback(async () => (await ensure()).size(), [ensure]),
628
+ isLoading: embedder.isLoading,
629
+ loadingProgress: embedder.loadingProgress,
630
+ isReady,
631
+ error: embedder.error
632
+ };
633
+ }
634
+
635
+ //#endregion
636
+ //#region src/browser/use-modalities.ts
637
+ /**
638
+ * Per-modality convenience hooks built on {@link useEngine}.
639
+ *
640
+ * `useEngine` is the general/advanced hook (it can do text, vision, and
641
+ * embeddings via options). These wrappers give each modality a focused,
642
+ * self-documenting surface so app code reads cleanly:
643
+ *
644
+ * ```tsx
645
+ * import { useText, useVision, useEmbedding } from "@tryhamster/gerbil/gpu/hooks";
646
+ *
647
+ * const { complete } = useText(); // text generation
648
+ * const { describeImage } = useVision(); // image → text
649
+ * const { embed, similarity } = useEmbedding(); // text → vector
650
+ * ```
651
+ *
652
+ * They share the same engine registry as `useEngine`, so requesting the same
653
+ * model from several places loads it once.
654
+ */
655
+ /** Text generation. */
656
+ function useText(options = {}) {
657
+ const e = useEngine(options);
658
+ return {
659
+ complete: e.complete,
660
+ completion: e.completion,
661
+ isLoading: e.isLoading,
662
+ loadingProgress: e.loadingProgress,
663
+ isGenerating: e.isGenerating,
664
+ tps: e.tps,
665
+ error: e.error,
666
+ errorKind: e.errorKind,
667
+ isReady: e.isReady,
668
+ load: e.load,
669
+ stop: e.stop,
670
+ dispose: e.dispose
671
+ };
672
+ }
673
+ /**
674
+ * Structured-output generation — generate, parse JSON, validate, and RETRY
675
+ * until valid. On-device tokens are free, so re-rolling malformed JSON is cheap.
676
+ *
677
+ * ```tsx
678
+ * const { object, generate, isGenerating } = useObject<{ name: string; age: number }>();
679
+ * await generate('Extract {name, age} from: "I am Sarah, 28"', {
680
+ * schema: { required: ["name", "age"] },
681
+ * });
682
+ * // object === { name: "Sarah", age: 28 }
683
+ * ```
684
+ */
685
+ function useObject(options = {}) {
686
+ const e = useEngine(options);
687
+ const [object, setObject] = useState(null);
688
+ return {
689
+ object,
690
+ generate: useCallback(async (prompt, opts) => {
691
+ if (!e.isReady) await e.load();
692
+ const result = await e.generateObject(prompt, opts);
693
+ setObject(result.object);
694
+ return result.object;
695
+ }, [e]),
696
+ attempts: e.attempts,
697
+ isLoading: e.isLoading,
698
+ loadingProgress: e.loadingProgress,
699
+ isGenerating: e.isGenerating,
700
+ error: e.error,
701
+ errorKind: e.errorKind,
702
+ isReady: e.isReady,
703
+ load: e.load,
704
+ stop: e.stop,
705
+ dispose: e.dispose
706
+ };
707
+ }
708
+ /** Image understanding (image in → text out). Builds the vision tower. */
709
+ function useVision(options = {}) {
710
+ const e = useEngine({
711
+ ...options,
712
+ enableVision: true
713
+ });
714
+ return {
715
+ describeImage: e.describeImage,
716
+ completion: e.completion,
717
+ isLoading: e.isLoading,
718
+ loadingProgress: e.loadingProgress,
719
+ isGenerating: e.isGenerating,
720
+ tps: e.tps,
721
+ error: e.error,
722
+ errorKind: e.errorKind,
723
+ isReady: e.isReady,
724
+ load: e.load,
725
+ stop: e.stop,
726
+ dispose: e.dispose
727
+ };
728
+ }
729
+ /** Text embeddings + similarity. */
730
+ function useEmbedding(options = {}) {
731
+ const e = useEngine({
732
+ ...options,
733
+ embedding: true
734
+ });
735
+ return {
736
+ embed: e.embed,
737
+ similarity: e.similarity,
738
+ isLoading: e.isLoading,
739
+ loadingProgress: e.loadingProgress,
740
+ error: e.error,
741
+ errorKind: e.errorKind,
742
+ isReady: e.isReady,
743
+ load: e.load,
744
+ dispose: e.dispose
745
+ };
746
+ }
747
+ /**
748
+ * Conversational chat hook — manages the message list and streams replies.
749
+ * Multi-turn context is handled for you (the full history is sent each turn).
750
+ *
751
+ * ```tsx
752
+ * const { messages, send, isGenerating } = useChat();
753
+ * <button onClick={() => send("Hello!")}>Send</button>
754
+ * ```
755
+ */
756
+ function useChat(options = {}) {
757
+ const { system, ...engineOptions } = options;
758
+ const e = useEngine(engineOptions);
759
+ const [messages, setMessages] = useState([]);
760
+ const messagesRef = useRef([]);
761
+ messagesRef.current = messages;
762
+ useEffect(() => {
763
+ if (!e.isGenerating) return;
764
+ setMessages((prev) => {
765
+ if (prev.length === 0 || prev[prev.length - 1].role !== "assistant") return prev;
766
+ const copy = prev.slice();
767
+ copy[copy.length - 1] = {
768
+ role: "assistant",
769
+ content: e.completion
770
+ };
771
+ return copy;
772
+ });
773
+ }, [e.completion, e.isGenerating]);
774
+ const run = useCallback(async (history, opts) => {
775
+ setMessages([...history, {
776
+ role: "assistant",
777
+ content: ""
778
+ }]);
779
+ if (!e.isReady) await e.load();
780
+ const turns = system ? [{
781
+ role: "system",
782
+ content: system
783
+ }, ...history] : history;
784
+ const full = await e.complete(turns, {
785
+ ...opts,
786
+ system: opts.system ?? system
787
+ });
788
+ setMessages((prev) => {
789
+ if (prev.length === 0) return prev;
790
+ const copy = prev.slice();
791
+ copy[copy.length - 1] = {
792
+ role: "assistant",
793
+ content: full
794
+ };
795
+ return copy;
796
+ });
797
+ return full;
798
+ }, [e, system]);
799
+ const send = useCallback(async (text, opts = {}) => {
800
+ if (!text.trim() || e.isGenerating) return "";
801
+ return run([...messagesRef.current, {
802
+ role: "user",
803
+ content: text
804
+ }], opts);
805
+ }, [e.isGenerating, run]);
806
+ return {
807
+ messages,
808
+ send,
809
+ sendMessage: send,
810
+ regenerate: useCallback(async (opts = {}) => {
811
+ if (e.isGenerating) return "";
812
+ const msgs = messagesRef.current.slice();
813
+ while (msgs.length > 0 && msgs[msgs.length - 1].role === "assistant") msgs.pop();
814
+ if (msgs.length === 0) return "";
815
+ return run(msgs, opts);
816
+ }, [e.isGenerating, run]),
817
+ setMessages: useCallback((next) => setMessages(next), []),
818
+ clear: useCallback(() => setMessages([]), []),
819
+ status: e.error ? "error" : e.isGenerating ? e.completion.length === 0 ? "submitted" : "streaming" : "ready",
820
+ isGenerating: e.isGenerating,
821
+ isLoading: e.isLoading,
822
+ loadingProgress: e.loadingProgress,
823
+ isReady: e.isReady,
824
+ tps: e.tps,
825
+ error: e.error,
826
+ errorKind: e.errorKind,
827
+ stop: e.stop,
828
+ load: e.load
829
+ };
830
+ }
831
+ /**
832
+ * Single-prompt streaming completion with built-in input state — a near
833
+ * drop-in for the Vercel AI SDK's `useCompletion`, running on-device.
834
+ */
835
+ function useCompletion(options = {}) {
836
+ const t = useText(options);
837
+ const [input, setInput] = useState("");
838
+ const complete = useCallback(async (prompt, opts) => {
839
+ if (!t.isReady) await t.load();
840
+ return t.complete(prompt, opts);
841
+ }, [t]);
842
+ const handleInputChange = useCallback((e) => setInput(e.target.value), []);
843
+ const handleSubmit = useCallback((e) => {
844
+ e?.preventDefault?.();
845
+ const value = input;
846
+ if (!value.trim()) return;
847
+ setInput("");
848
+ complete(value);
849
+ }, [input, complete]);
850
+ return {
851
+ completion: t.completion,
852
+ complete,
853
+ input,
854
+ setInput,
855
+ handleInputChange,
856
+ handleSubmit,
857
+ isLoading: t.isGenerating,
858
+ isReady: t.isReady,
859
+ loadingProgress: t.loadingProgress,
860
+ stop: t.stop,
861
+ error: t.error,
862
+ load: t.load
863
+ };
864
+ }
865
+
866
+ //#endregion
867
+ //#region src/browser/use-stt.ts
868
+ /**
869
+ * React hook for native speech-to-text in the browser.
870
+ *
871
+ * Wraps `MoonshineSTT` — raw 16 kHz mono PCM in, transcript out (encoder-decoder
872
+ * ASR, no streaming/partial API). This hook captures mic audio between
873
+ * start/stop, resamples it to 16 kHz mono, and runs a single transcribe() on the
874
+ * finalized utterance. The GPU engine is dynamically imported so it stays out of
875
+ * the main bundle until STT is actually used.
876
+ *
877
+ * @example
878
+ * ```tsx
879
+ * import { useSTT } from "@tryhamster/gerbil/gpu/hooks";
880
+ *
881
+ * const { startRecording, stopRecording, transcript, isRecording } = useSTT();
882
+ * ```
883
+ */
884
+ const MOONSHINE_SAMPLE_RATE = 16e3;
885
+ /** Downmix to mono and linearly resample a Float32 buffer to 16 kHz. */
886
+ function toMono16k(channels, inputRate) {
887
+ const inLen = channels[0]?.length ?? 0;
888
+ const mono = new Float32Array(inLen);
889
+ for (const ch of channels) for (let i = 0; i < inLen; i++) mono[i] += ch[i] / channels.length;
890
+ if (inputRate === MOONSHINE_SAMPLE_RATE) return mono;
891
+ const ratio = MOONSHINE_SAMPLE_RATE / inputRate;
892
+ const outLen = Math.max(0, Math.floor(inLen * ratio));
893
+ const out = new Float32Array(outLen);
894
+ for (let i = 0; i < outLen; i++) {
895
+ const srcPos = i / ratio;
896
+ const i0 = Math.floor(srcPos);
897
+ const i1 = Math.min(i0 + 1, inLen - 1);
898
+ const frac = srcPos - i0;
899
+ out[i] = mono[i0] * (1 - frac) + mono[i1] * frac;
900
+ }
901
+ return out;
902
+ }
903
+ function useSTT(options = {}) {
904
+ const { repo = DEFAULT_MODELS.stt, autoLoad = false, onReady, onError, onNoSpeech } = options;
905
+ const sttRef = useRef(null);
906
+ const loadingRef = useRef(false);
907
+ const mediaStreamRef = useRef(null);
908
+ const audioCtxRef = useRef(null);
909
+ const sourceRef = useRef(null);
910
+ const processorRef = useRef(null);
911
+ const chunksRef = useRef([]);
912
+ const sampleRateRef = useRef(MOONSHINE_SAMPLE_RATE);
913
+ const [isLoading, setIsLoading] = useState(false);
914
+ const [loadingProgress, setLoadingProgress] = useState(null);
915
+ const [isReady, setIsReady] = useState(false);
916
+ const [isRecording, setIsRecording] = useState(false);
917
+ const [isTranscribing, setIsTranscribing] = useState(false);
918
+ const [transcript, setTranscript] = useState("");
919
+ const [audioSeconds, setAudioSeconds] = useState(null);
920
+ const [noSpeech, setNoSpeech] = useState(false);
921
+ const [error, setError] = useState(null);
922
+ const load = useCallback(async () => {
923
+ if (sttRef.current || loadingRef.current) return;
924
+ loadingRef.current = true;
925
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
926
+ loadingRef.current = false;
927
+ const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native speech-to-text requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
928
+ setError(err.message);
929
+ onError?.(err);
930
+ return;
931
+ }
932
+ setIsLoading(true);
933
+ setError(null);
934
+ setLoadingProgress({ status: "Initializing speech-to-text..." });
935
+ try {
936
+ const { MoonshineSTT } = await import("./index.mjs");
937
+ sttRef.current = await MoonshineSTT.create({
938
+ repo,
939
+ onProgress: (loaded, total, message) => {
940
+ setLoadingProgress({
941
+ status: message,
942
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
943
+ });
944
+ }
945
+ });
946
+ setIsReady(true);
947
+ setIsLoading(false);
948
+ setLoadingProgress(null);
949
+ onReady?.();
950
+ } catch (e) {
951
+ loadingRef.current = false;
952
+ const err = e instanceof Error ? e : new Error(String(e));
953
+ setError(err.message);
954
+ setIsLoading(false);
955
+ setLoadingProgress(null);
956
+ onError?.(err);
957
+ }
958
+ }, [
959
+ repo,
960
+ onReady,
961
+ onError
962
+ ]);
963
+ const teardownCapture = useCallback(() => {
964
+ processorRef.current?.disconnect();
965
+ sourceRef.current?.disconnect();
966
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
967
+ for (const t of mediaStreamRef.current?.getTracks() ?? []) t.stop();
968
+ processorRef.current = null;
969
+ sourceRef.current = null;
970
+ audioCtxRef.current = null;
971
+ mediaStreamRef.current = null;
972
+ }, []);
973
+ const startRecording = useCallback(async () => {
974
+ if (isRecording) return;
975
+ if (!sttRef.current) await load();
976
+ if (!sttRef.current) return;
977
+ setTranscript("");
978
+ setAudioSeconds(null);
979
+ setNoSpeech(false);
980
+ setError(null);
981
+ chunksRef.current = [];
982
+ let stream;
983
+ try {
984
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true });
985
+ } catch (e) {
986
+ const err = e instanceof Error ? e : new Error(String(e));
987
+ const name = err.name;
988
+ if (name === "NotAllowedError" || name === "SecurityError") setError("Microphone access denied. Allow mic access for this site and try again.");
989
+ else if (name === "NotFoundError" || name === "DevicesNotFoundError") setError("No microphone found. Connect a mic and try again.");
990
+ else setError(err.message);
991
+ onError?.(err);
992
+ return;
993
+ }
994
+ mediaStreamRef.current = stream;
995
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
996
+ if (!AudioCtx) return;
997
+ const ctx = new AudioCtx();
998
+ audioCtxRef.current = ctx;
999
+ sampleRateRef.current = ctx.sampleRate;
1000
+ const source = ctx.createMediaStreamSource(stream);
1001
+ sourceRef.current = source;
1002
+ const processor = ctx.createScriptProcessor(4096, 1, 1);
1003
+ processorRef.current = processor;
1004
+ processor.onaudioprocess = (ev) => {
1005
+ const input = ev.inputBuffer.getChannelData(0);
1006
+ chunksRef.current.push(new Float32Array(input));
1007
+ };
1008
+ source.connect(processor);
1009
+ processor.connect(ctx.destination);
1010
+ setIsRecording(true);
1011
+ }, [
1012
+ isRecording,
1013
+ load,
1014
+ onError
1015
+ ]);
1016
+ const stopRecording = useCallback(async () => {
1017
+ if (!isRecording) return;
1018
+ setIsRecording(false);
1019
+ const inputRate = sampleRateRef.current;
1020
+ const captured = chunksRef.current;
1021
+ chunksRef.current = [];
1022
+ teardownCapture();
1023
+ const total = captured.reduce((n, c) => n + c.length, 0);
1024
+ const joined = new Float32Array(total);
1025
+ let off = 0;
1026
+ for (const c of captured) {
1027
+ joined.set(c, off);
1028
+ off += c.length;
1029
+ }
1030
+ const pcm = toMono16k([joined], inputRate);
1031
+ if (pcm.length < 127) {
1032
+ setError("Recording was too short. Hold the mic a moment longer.");
1033
+ return;
1034
+ }
1035
+ setIsTranscribing(true);
1036
+ setError(null);
1037
+ try {
1038
+ const result = await sttRef.current.transcribe(pcm);
1039
+ setAudioSeconds(result.audioSeconds);
1040
+ setNoSpeech(result.noSpeech);
1041
+ setTranscript(result.text);
1042
+ if (result.noSpeech) onNoSpeech?.();
1043
+ } catch (e) {
1044
+ const err = e instanceof Error ? e : new Error(String(e));
1045
+ setError(err.message);
1046
+ onError?.(err);
1047
+ } finally {
1048
+ setIsTranscribing(false);
1049
+ }
1050
+ }, [
1051
+ isRecording,
1052
+ teardownCapture,
1053
+ onError,
1054
+ onNoSpeech
1055
+ ]);
1056
+ const dispose = useCallback(() => {
1057
+ teardownCapture();
1058
+ if (sttRef.current) {
1059
+ sttRef.current.destroy?.();
1060
+ sttRef.current = null;
1061
+ loadingRef.current = false;
1062
+ setIsReady(false);
1063
+ }
1064
+ }, [teardownCapture]);
1065
+ useEffect(() => {
1066
+ if (autoLoad) load();
1067
+ return () => {
1068
+ teardownCapture();
1069
+ if (sttRef.current) {
1070
+ sttRef.current.destroy?.();
1071
+ sttRef.current = null;
1072
+ }
1073
+ };
1074
+ }, []);
1075
+ return {
1076
+ load,
1077
+ startRecording,
1078
+ stopRecording,
1079
+ dispose,
1080
+ isLoading,
1081
+ loadingProgress,
1082
+ isReady,
1083
+ isRecording,
1084
+ isTranscribing,
1085
+ transcript,
1086
+ audioSeconds,
1087
+ noSpeech,
1088
+ error
1089
+ };
1090
+ }
1091
+
1092
+ //#endregion
1093
+ //#region src/browser/use-tts.ts
1094
+ /**
1095
+ * React hook for native text-to-speech in the browser.
1096
+ *
1097
+ * Wraps the engine's `speak()` (Kani-TTS-2) — the codec-LM backbone emits
1098
+ * NanoCodec audio tokens, the NanoCodec decoder turns them into 22.05 kHz mono
1099
+ * PCM, and this hook plays it through the Web Audio API (and keeps the clip for
1100
+ * instant replay). The GPU engine is dynamically imported so it stays out of the
1101
+ * main bundle until TTS is actually used.
1102
+ *
1103
+ * @example
1104
+ * ```tsx
1105
+ * import { useTTS } from "@tryhamster/gerbil/gpu/hooks";
1106
+ *
1107
+ * const { speak, isSynthesizing, isPlaying } = useTTS();
1108
+ * <button onClick={() => speak("Hello from on-device TTS.")}>Speak</button>
1109
+ * ```
1110
+ */
1111
+ const KANI_SAMPLE_RATE = 22050;
1112
+ /**
1113
+ * Built-in voices. Kani-TTS-2-en takes an `en_us`-style language tag prepended
1114
+ * to the text; the English checkpoint ships the US-English voice.
1115
+ */
1116
+ const KANI_VOICES = [{
1117
+ value: "en_us",
1118
+ label: "English (US)"
1119
+ }];
1120
+ /** Build an AudioBuffer from mono Float32 PCM at the given sample rate. */
1121
+ function pcmToAudioBuffer(ctx, pcm, sampleRate) {
1122
+ const buffer = ctx.createBuffer(1, pcm.length, sampleRate);
1123
+ buffer.getChannelData(0).set(pcm);
1124
+ return buffer;
1125
+ }
1126
+ function useTTS(options = {}) {
1127
+ const { repo = DEFAULT_MODELS.tts, autoLoad = false, onReady, onError } = options;
1128
+ const engineRef = useRef(null);
1129
+ const loadingRef = useRef(false);
1130
+ const audioCtxRef = useRef(null);
1131
+ const sourceRef = useRef(null);
1132
+ const bufferRef = useRef(null);
1133
+ const [isLoading, setIsLoading] = useState(false);
1134
+ const [loadingProgress, setLoadingProgress] = useState(null);
1135
+ const [isReady, setIsReady] = useState(false);
1136
+ const [isSynthesizing, setIsSynthesizing] = useState(false);
1137
+ const [isPlaying, setIsPlaying] = useState(false);
1138
+ const [hasAudio, setHasAudio] = useState(false);
1139
+ const [audioSeconds, setAudioSeconds] = useState(null);
1140
+ const [rtf, setRtf] = useState(null);
1141
+ const [error, setError] = useState(null);
1142
+ const load = useCallback(async () => {
1143
+ if (engineRef.current || loadingRef.current) return;
1144
+ loadingRef.current = true;
1145
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
1146
+ loadingRef.current = false;
1147
+ const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native text-to-speech requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
1148
+ setError(err.message);
1149
+ onError?.(err);
1150
+ return;
1151
+ }
1152
+ setIsLoading(true);
1153
+ setError(null);
1154
+ setLoadingProgress({ status: "Initializing TTS..." });
1155
+ try {
1156
+ const { WebGPUEngine } = await import("./index.mjs");
1157
+ engineRef.current = await WebGPUEngine.create({
1158
+ repo,
1159
+ onProgress: (loaded, total, message) => {
1160
+ setLoadingProgress({
1161
+ status: message,
1162
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
1163
+ });
1164
+ }
1165
+ });
1166
+ setIsReady(true);
1167
+ setIsLoading(false);
1168
+ setLoadingProgress(null);
1169
+ onReady?.();
1170
+ } catch (e) {
1171
+ loadingRef.current = false;
1172
+ const err = e instanceof Error ? e : new Error(String(e));
1173
+ setError(err.message);
1174
+ setIsLoading(false);
1175
+ setLoadingProgress(null);
1176
+ onError?.(err);
1177
+ }
1178
+ }, [
1179
+ repo,
1180
+ onReady,
1181
+ onError
1182
+ ]);
1183
+ const stop = useCallback(() => {
1184
+ if (sourceRef.current) {
1185
+ try {
1186
+ sourceRef.current.onended = null;
1187
+ sourceRef.current.stop();
1188
+ } catch {}
1189
+ sourceRef.current = null;
1190
+ }
1191
+ setIsPlaying(false);
1192
+ }, []);
1193
+ const playBuffer = useCallback(async () => {
1194
+ const buffer = bufferRef.current;
1195
+ if (!buffer) return;
1196
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1197
+ if (!AudioCtx) return;
1198
+ if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
1199
+ const ctx = audioCtxRef.current;
1200
+ if (!ctx) return;
1201
+ if (ctx.state === "suspended") await ctx.resume();
1202
+ stop();
1203
+ const source = ctx.createBufferSource();
1204
+ source.buffer = buffer;
1205
+ source.connect(ctx.destination);
1206
+ source.onended = () => {
1207
+ setIsPlaying(false);
1208
+ sourceRef.current = null;
1209
+ };
1210
+ sourceRef.current = source;
1211
+ setIsPlaying(true);
1212
+ source.start();
1213
+ }, [stop]);
1214
+ const speak = useCallback(async (text, opts = {}) => {
1215
+ if (!text.trim()) return;
1216
+ {
1217
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1218
+ if (AudioCtx) {
1219
+ if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
1220
+ const ctx = audioCtxRef.current;
1221
+ if (ctx.state === "suspended") ctx.resume();
1222
+ try {
1223
+ const warm = ctx.createBufferSource();
1224
+ warm.buffer = ctx.createBuffer(1, 1, ctx.sampleRate);
1225
+ warm.connect(ctx.destination);
1226
+ warm.start(0);
1227
+ } catch {}
1228
+ }
1229
+ }
1230
+ if (!engineRef.current) await load();
1231
+ const engine = engineRef.current;
1232
+ if (!engine) return;
1233
+ setIsSynthesizing(true);
1234
+ setError(null);
1235
+ try {
1236
+ const t0 = performance.now();
1237
+ const { pcm, sampleRate, audioSeconds: secs } = await engine.speak(text, {
1238
+ languageTag: opts.voice ?? "en_us",
1239
+ temperature: opts.temperature ?? 1,
1240
+ topP: opts.topP ?? .95,
1241
+ repetitionPenalty: opts.repetitionPenalty ?? 1.1
1242
+ });
1243
+ const wall = (performance.now() - t0) / 1e3;
1244
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1245
+ if (AudioCtx && (!audioCtxRef.current || audioCtxRef.current.state === "closed")) audioCtxRef.current = new AudioCtx();
1246
+ if (audioCtxRef.current) bufferRef.current = pcmToAudioBuffer(audioCtxRef.current, pcm, sampleRate ?? KANI_SAMPLE_RATE);
1247
+ setHasAudio(true);
1248
+ setAudioSeconds(secs);
1249
+ setRtf(wall > 0 ? secs / wall : null);
1250
+ setIsSynthesizing(false);
1251
+ await playBuffer();
1252
+ } catch (e) {
1253
+ const err = e instanceof Error ? e : new Error(String(e));
1254
+ setError(err.message);
1255
+ setIsSynthesizing(false);
1256
+ onError?.(err);
1257
+ }
1258
+ }, [
1259
+ load,
1260
+ playBuffer,
1261
+ onError
1262
+ ]);
1263
+ const replay = useCallback(async () => {
1264
+ if (!bufferRef.current) return;
1265
+ await playBuffer();
1266
+ }, [playBuffer]);
1267
+ const dispose = useCallback(() => {
1268
+ stop();
1269
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
1270
+ audioCtxRef.current = null;
1271
+ bufferRef.current = null;
1272
+ if (engineRef.current) {
1273
+ engineRef.current.destroy?.();
1274
+ engineRef.current = null;
1275
+ loadingRef.current = false;
1276
+ setIsReady(false);
1277
+ }
1278
+ }, [stop]);
1279
+ useEffect(() => {
1280
+ if (autoLoad) load();
1281
+ return () => {
1282
+ if (sourceRef.current) {
1283
+ try {
1284
+ sourceRef.current.onended = null;
1285
+ sourceRef.current.stop();
1286
+ } catch {}
1287
+ sourceRef.current = null;
1288
+ }
1289
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
1290
+ if (engineRef.current) {
1291
+ engineRef.current.destroy?.();
1292
+ engineRef.current = null;
1293
+ }
1294
+ };
1295
+ }, []);
1296
+ return {
1297
+ load,
1298
+ speak,
1299
+ replay,
1300
+ stop,
1301
+ dispose,
1302
+ isLoading,
1303
+ loadingProgress,
1304
+ isReady,
1305
+ isSynthesizing,
1306
+ isPlaying,
1307
+ hasAudio,
1308
+ audioSeconds,
1309
+ rtf,
1310
+ error
1311
+ };
1312
+ }
1313
+
1314
+ //#endregion
1315
+ //#region src/browser/use-voice-chat.ts
1316
+ /**
1317
+ * React hook for a fully on-device voice assistant: speak to it, it transcribes,
1318
+ * thinks, and speaks back — no cloud, no API keys. Composes {@link useSTT},
1319
+ * {@link useChat}, and {@link useTTS} into one flow.
1320
+ *
1321
+ * ```tsx
1322
+ * import { useVoiceChat } from "@tryhamster/gerbil/hooks";
1323
+ *
1324
+ * const vc = useVoiceChat();
1325
+ * <button onMouseDown={vc.start} onMouseUp={vc.stop}>
1326
+ * {vc.isListening ? "Listening…" : "Hold to talk"}
1327
+ * </button>
1328
+ * // vc.messages renders the conversation; replies are spoken automatically.
1329
+ * ```
1330
+ *
1331
+ * This is Gerbil-unique — a private, offline voice loop the cloud SDKs can't do.
1332
+ */
1333
+ function useVoiceChat(options = {}) {
1334
+ const { sttModel, ttsModel, voice, speak = true, ...chatOptions } = options;
1335
+ const stt = useSTT({ repo: sttModel });
1336
+ const chat = useChat(chatOptions);
1337
+ const tts = useTTS({ repo: ttsModel });
1338
+ const processedRef = useRef("");
1339
+ useEffect(() => {
1340
+ const text = stt.transcript.trim();
1341
+ if (!text || stt.isTranscribing || text === processedRef.current) return;
1342
+ processedRef.current = text;
1343
+ (async () => {
1344
+ const reply = await chat.send(text);
1345
+ if (speak && reply.trim()) await tts.speak(reply, voice ? { voice } : void 0);
1346
+ })();
1347
+ }, [stt.transcript, stt.isTranscribing]);
1348
+ const start = useCallback(() => stt.startRecording(), [stt]);
1349
+ const stop = useCallback(() => stt.stopRecording(), [stt]);
1350
+ return {
1351
+ messages: chat.messages,
1352
+ start,
1353
+ stop,
1354
+ stopSpeaking: tts.stop,
1355
+ clear: chat.clear,
1356
+ isListening: stt.isRecording,
1357
+ isTranscribing: stt.isTranscribing,
1358
+ isThinking: chat.isGenerating,
1359
+ isSpeaking: tts.isSynthesizing || tts.isPlaying,
1360
+ transcript: stt.transcript,
1361
+ isLoading: stt.isLoading || chat.isLoading || tts.isLoading,
1362
+ isReady: chat.isReady,
1363
+ error: stt.error ?? chat.error ?? tts.error
1364
+ };
1365
+ }
1366
+
1367
+ //#endregion
1368
+ export { KANI_VOICES, useAgent, useAutocomplete, useChat, useCompletion, useEmbedding, useEngine, useMemory, useObject, useSTT, useTTS, useText, useVision, useVoiceChat };
1369
+ //# sourceMappingURL=hooks.mjs.map