@tryhamster/gerbil 1.0.0-rc.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +247 -84
  3. package/dist/architectures-C1I5V3Dt.mjs +6070 -0
  4. package/dist/architectures-C1I5V3Dt.mjs.map +1 -0
  5. package/dist/browser/index.d.ts +264 -588
  6. package/dist/browser/index.d.ts.map +1 -1
  7. package/dist/browser/index.js +585 -2334
  8. package/dist/browser/index.js.map +1 -1
  9. package/dist/cli.mjs +625 -1098
  10. package/dist/cli.mjs.map +1 -1
  11. package/dist/defaults-9komdrbY.mjs +24 -0
  12. package/dist/defaults-9komdrbY.mjs.map +1 -0
  13. package/dist/frameworks/express.d.mts +1 -3
  14. package/dist/frameworks/express.d.mts.map +1 -1
  15. package/dist/frameworks/express.mjs +7 -7
  16. package/dist/frameworks/express.mjs.map +1 -1
  17. package/dist/frameworks/fastify.d.mts +1 -1
  18. package/dist/frameworks/fastify.d.mts.map +1 -1
  19. package/dist/frameworks/fastify.mjs +3 -3
  20. package/dist/frameworks/fastify.mjs.map +1 -1
  21. package/dist/frameworks/hono.d.mts +1 -1
  22. package/dist/frameworks/hono.d.mts.map +1 -1
  23. package/dist/frameworks/hono.mjs +4 -4
  24. package/dist/frameworks/hono.mjs.map +1 -1
  25. package/dist/frameworks/next.d.mts +3 -2
  26. package/dist/frameworks/next.d.mts.map +1 -1
  27. package/dist/frameworks/next.mjs +4 -4
  28. package/dist/frameworks/next.mjs.map +1 -1
  29. package/dist/frameworks/react.d.mts +1 -1
  30. package/dist/frameworks/trpc.d.mts +1 -1
  31. package/dist/frameworks/trpc.d.mts.map +1 -1
  32. package/dist/frameworks/trpc.mjs +4 -4
  33. package/dist/frameworks/trpc.mjs.map +1 -1
  34. package/dist/gerbil-BHrJJIa4.mjs +1656 -0
  35. package/dist/gerbil-BHrJJIa4.mjs.map +1 -0
  36. package/dist/gerbil-BT9fCydo.d.mts +488 -0
  37. package/dist/gerbil-BT9fCydo.d.mts.map +1 -0
  38. package/dist/gerbil-DomNfIr1.mjs +4 -0
  39. package/dist/gpu/hooks.d.mts +520 -0
  40. package/dist/gpu/hooks.d.mts.map +1 -0
  41. package/dist/gpu/hooks.mjs +1188 -0
  42. package/dist/gpu/hooks.mjs.map +1 -0
  43. package/dist/gpu/index.d.mts +2 -0
  44. package/dist/gpu/index.mjs +6 -0
  45. package/dist/gpu-33qCAtHW.mjs +3615 -0
  46. package/dist/gpu-33qCAtHW.mjs.map +1 -0
  47. package/dist/index-Dgmb2kE3.d.mts +245 -0
  48. package/dist/index-Dgmb2kE3.d.mts.map +1 -0
  49. package/dist/index-jEAL2s-A.d.mts +2022 -0
  50. package/dist/index-jEAL2s-A.d.mts.map +1 -0
  51. package/dist/index.d.mts +22 -487
  52. package/dist/index.d.mts.map +1 -1
  53. package/dist/index.mjs +13 -8
  54. package/dist/index.mjs.map +1 -1
  55. package/dist/indexeddb-store-BWIMtxxH.mjs +103 -0
  56. package/dist/indexeddb-store-BWIMtxxH.mjs.map +1 -0
  57. package/dist/indexeddb-store-ClH12Xnl.mjs +4 -0
  58. package/dist/integrations/ai-sdk.d.mts +75 -6
  59. package/dist/integrations/ai-sdk.d.mts.map +1 -1
  60. package/dist/integrations/ai-sdk.mjs +131 -15
  61. package/dist/integrations/ai-sdk.mjs.map +1 -1
  62. package/dist/integrations/langchain.d.mts +1 -1
  63. package/dist/integrations/langchain.d.mts.map +1 -1
  64. package/dist/integrations/langchain.mjs +5 -5
  65. package/dist/integrations/langchain.mjs.map +1 -1
  66. package/dist/integrations/llamaindex.d.mts +1 -1
  67. package/dist/integrations/llamaindex.d.mts.map +1 -1
  68. package/dist/integrations/llamaindex.mjs +5 -5
  69. package/dist/integrations/llamaindex.mjs.map +1 -1
  70. package/dist/integrations/mcp-client.mjs +3 -3
  71. package/dist/integrations/mcp-client.mjs.map +1 -1
  72. package/dist/integrations/mcp.d.mts +3 -2
  73. package/dist/integrations/mcp.d.mts.map +1 -1
  74. package/dist/integrations/mcp.mjs +5 -5
  75. package/dist/{mcp-BvbriaBy.mjs → mcp-1DaMsaBc.mjs} +4 -4
  76. package/dist/mcp-1DaMsaBc.mjs.map +1 -0
  77. package/dist/memory/index.d.mts +3 -0
  78. package/dist/memory/index.mjs +6 -0
  79. package/dist/memory-D1P7Tmda.mjs +4 -0
  80. package/dist/memory-DVN0MnIG.mjs +132 -0
  81. package/dist/memory-DVN0MnIG.mjs.map +1 -0
  82. package/dist/memory-Dj0J1v88.mjs +294 -0
  83. package/dist/memory-Dj0J1v88.mjs.map +1 -0
  84. package/dist/moonshine-stt-BLyVoRpB.mjs +4 -0
  85. package/dist/moonshine-stt-v_P_Ci_m.mjs +11936 -0
  86. package/dist/moonshine-stt-v_P_Ci_m.mjs.map +1 -0
  87. package/dist/{one-liner-s-lD8rCC.mjs → one-liner-DnQn7HJK.mjs} +14 -16
  88. package/dist/one-liner-DnQn7HJK.mjs.map +1 -0
  89. package/dist/repl-jV5gcJFA.mjs +9 -0
  90. package/dist/skills/index.d.mts +270 -320
  91. package/dist/skills/index.d.mts.map +1 -1
  92. package/dist/skills/index.mjs +5 -5
  93. package/dist/{skills-CD3Orlex.mjs → skills-DX8D59UH.mjs} +187 -32
  94. package/dist/skills-DX8D59UH.mjs.map +1 -0
  95. package/dist/{tools-Bi1P7Xoy.mjs → tools-DQ1mPUw5.mjs} +34 -22
  96. package/dist/tools-DQ1mPUw5.mjs.map +1 -0
  97. package/dist/{types-CiTc7ez3.d.mts → types-D6FiR_oh.d.mts} +106 -12
  98. package/dist/types-D6FiR_oh.d.mts.map +1 -0
  99. package/dist/types-DQBe2lFo.d.mts +165 -0
  100. package/dist/types-DQBe2lFo.d.mts.map +1 -0
  101. package/dist/{utils-CZBZ8dgR.mjs → utils-DKO55ZmZ.mjs} +1 -1
  102. package/dist/{utils-CZBZ8dgR.mjs.map → utils-DKO55ZmZ.mjs.map} +1 -1
  103. package/dist/vector-B0panuy6.mjs +95 -0
  104. package/dist/vector-B0panuy6.mjs.map +1 -0
  105. package/docs/PROJECT-STATE.md +321 -0
  106. package/docs/adding-a-model-family.md +280 -0
  107. package/docs/ai-sdk.md +70 -61
  108. package/docs/architecture/overview.md +17 -7
  109. package/docs/browser.md +203 -8
  110. package/docs/embeddings.md +156 -0
  111. package/docs/gerbil-site-native-migration.md +217 -0
  112. package/docs/gpu-engine/architectures.md +398 -0
  113. package/docs/gpu-engine/ir.md +372 -0
  114. package/docs/gpu-engine/kernels.md +718 -0
  115. package/docs/gpu-engine/paper.html +1759 -0
  116. package/docs/gpu-engine/paper.md +2109 -0
  117. package/docs/gpu-engine/safetensors.md +312 -0
  118. package/docs/gpu-engine/tokenizer.md +302 -0
  119. package/docs/memory-rag.md +91 -0
  120. package/docs/metal-safari-intel.md +190 -0
  121. package/docs/mobile-failure-diagnosis.md +124 -0
  122. package/docs/mobile.md +99 -0
  123. package/docs/observability.md +230 -0
  124. package/docs/onnx-removal-plan.md +339 -0
  125. package/docs/research/autoresearch-portable.md +904 -0
  126. package/docs/research/dispatch-reduction-hivemind.md +84 -0
  127. package/docs/research/ios-safari-model-caching.md +117 -0
  128. package/docs/research/mobile-webgpu-speed-fusion.md +135 -0
  129. package/docs/research/native-stt-model-selection.md +49 -0
  130. package/docs/research/native-tts-model-selection.md +90 -0
  131. package/docs/research/native-vs-chromium-decision.md +152 -0
  132. package/docs/research/nemotron-mamba2-inference.md +910 -0
  133. package/docs/research/qwen35-multimodal.md +293 -0
  134. package/docs/research/qwen36-gemma4-targets.md +337 -0
  135. package/docs/research/sota-embedding-models.md +179 -0
  136. package/docs/research/sota-mobile-models-2026.md +263 -0
  137. package/docs/research/sota-modality-models.md +202 -0
  138. package/docs/research/tps-baselines.md +71 -0
  139. package/docs/research/webgpu-m4-reference.md +104 -0
  140. package/docs/site-update-plan.md +155 -0
  141. package/docs/structured-output.md +123 -0
  142. package/docs/stt.md +63 -446
  143. package/docs/tts.md +77 -499
  144. package/docs/vision.md +100 -338
  145. package/package.json +22 -7
  146. package/dist/chrome-backend-CORwaIyC.mjs +0 -1212
  147. package/dist/chrome-backend-CORwaIyC.mjs.map +0 -1
  148. package/dist/chrome-backend-DIKYoWj-.mjs +0 -3
  149. package/dist/gerbil-CJ3ifloF.mjs +0 -4
  150. package/dist/gerbil-Dw4Qj77e.mjs +0 -1631
  151. package/dist/gerbil-Dw4Qj77e.mjs.map +0 -1
  152. package/dist/gerbil-qOTe1nl2.d.mts +0 -431
  153. package/dist/gerbil-qOTe1nl2.d.mts.map +0 -1
  154. package/dist/kokoro-BNTb6egA.mjs +0 -20210
  155. package/dist/kokoro-BNTb6egA.mjs.map +0 -1
  156. package/dist/kokoro-DFRQ1OeM.js +0 -20212
  157. package/dist/kokoro-DFRQ1OeM.js.map +0 -1
  158. package/dist/mcp-BvbriaBy.mjs.map +0 -1
  159. package/dist/one-liner-s-lD8rCC.mjs.map +0 -1
  160. package/dist/repl-DveXw36T.mjs +0 -9
  161. package/dist/skills-CD3Orlex.mjs.map +0 -1
  162. package/dist/stt-CpLYbGFd.mjs +0 -433
  163. package/dist/stt-CpLYbGFd.mjs.map +0 -1
  164. package/dist/stt-DRPLEEHB.mjs +0 -3
  165. package/dist/stt-Te8Qz-Ay.js +0 -433
  166. package/dist/stt-Te8Qz-Ay.js.map +0 -1
  167. package/dist/tools-Bi1P7Xoy.mjs.map +0 -1
  168. package/dist/transformers.web-DokyH3rP.js +0 -3
  169. package/dist/transformers.web-M6mCnEYJ.js +0 -30382
  170. package/dist/transformers.web-M6mCnEYJ.js.map +0 -1
  171. package/dist/tts-C0xx3CtE.js +0 -724
  172. package/dist/tts-C0xx3CtE.js.map +0 -1
  173. package/dist/tts-DXgsKGCe.mjs +0 -3
  174. package/dist/tts-DeGANMNV.mjs +0 -730
  175. package/dist/tts-DeGANMNV.mjs.map +0 -1
  176. package/dist/types-CiTc7ez3.d.mts.map +0 -1
  177. /package/dist/{auto-update-S9s5-g0C.mjs → auto-update-BVaLXcDE.mjs} +0 -0
  178. /package/dist/{chunk-CkXuGtQK.mjs → chunk-B9cbKln6.mjs} +0 -0
  179. /package/dist/{microphone-DaMZFRuR.mjs → microphone-Bqmoz9_K.mjs} +0 -0
@@ -0,0 +1,1188 @@
1
+ import { n as resolveDefaultRepo, t as DEFAULT_MODELS } from "../defaults-9komdrbY.mjs";
2
+ import { useCallback, useEffect, useRef, useState } from "react";
3
+
4
+ //#region src/browser/use-engine.ts
5
+ /**
6
+ * React hook for native WebGPU inference in the browser.
7
+ *
8
+ * Uses gerbil's WebGPUEngine directly on the main thread — no web worker,
9
+ * no ONNX Runtime, no transformers.js. Pure WGSL compute shaders.
10
+ *
11
+ * Handles the full engine lifecycle for you:
12
+ * - loads the model (lazily or on mount),
13
+ * - hot-swaps when you change `model`/`dtype`/`enableVision`/`embedding`,
14
+ * - SHARES one engine across every component that asks for the same config
15
+ * (reference-counted) so you never upload the same weights to the GPU twice,
16
+ * - disposes when the last consumer unmounts.
17
+ *
18
+ * @example
19
+ * ```tsx
20
+ * import { useEngine } from "@tryhamster/gerbil/browser";
21
+ *
22
+ * function App() {
23
+ * const { complete, completion, isLoading, isGenerating, tps } = useEngine({
24
+ * model: "mlx-community/Qwen3.5-0.8B-4bit",
25
+ * autoLoad: true,
26
+ * });
27
+ *
28
+ * if (isLoading) return <div>Loading model...</div>;
29
+ * return (
30
+ * <div>
31
+ * <button onClick={() => complete("What is 2+2?")}>Generate</button>
32
+ * <p>{completion}</p>
33
+ * {isGenerating && <span>{tps?.toFixed(1)} tok/s</span>}
34
+ * </div>
35
+ * );
36
+ * }
37
+ * ```
38
+ */
39
+ function classifyError(err) {
40
+ const msg = err.message.toLowerCase();
41
+ if (msg.includes("webgpu is not available") || msg.includes("not supported")) return "no-webgpu";
42
+ if (msg.includes("no webgpu adapter") || msg.includes("no gpu adapter")) return "no-adapter";
43
+ if (msg.includes("device lost") || msg.includes("device was destroyed")) return "device-lost";
44
+ if (msg.includes("out of memory") || msg.includes("allocation failed") || msg.includes("buffer size")) return "oom";
45
+ if (msg.includes("fetch") || msg.includes("network") || msg.includes("cors")) return "network";
46
+ if (msg.includes("timeout") || msg.includes("timed out")) return "timeout";
47
+ return "unknown";
48
+ }
49
+ function getErrorGuidance(kind) {
50
+ switch (kind) {
51
+ case "no-webgpu": return "WebGPU requires Safari 26+ (iOS 26+), Chrome 113+, or Firefox 141+.";
52
+ case "no-adapter": return "No GPU found. Try closing other browser tabs that might be using the GPU.";
53
+ case "device-lost": return "GPU device was lost (tab may have been backgrounded). Please reload.";
54
+ case "oom": return "Not enough GPU memory. Try a smaller model or close other tabs.";
55
+ case "network": return "Failed to download model. Check your internet connection.";
56
+ case "timeout": return "Model loading timed out. Check your connection or try a smaller model.";
57
+ default: return "";
58
+ }
59
+ }
60
+ function getDefaultMaxSeqLen() {
61
+ if (typeof navigator === "undefined") return 4096;
62
+ return /iPhone|iPad|iPod|Android/i.test(navigator.userAgent) ? 2048 : 4096;
63
+ }
64
+ const SHARED_ENGINES = /* @__PURE__ */ new Map();
65
+ const ENGINE_DISPOSE_GRACE_MS = 3e4;
66
+ const RETRY_COOLDOWN_MS = 3e3;
67
+ function acquireSharedEngine(key, factory) {
68
+ let entry = SHARED_ENGINES.get(key);
69
+ if (!entry) {
70
+ const created = {
71
+ promise: factory(),
72
+ engine: null,
73
+ refs: 0,
74
+ disposeTimer: null
75
+ };
76
+ created.promise.then((eng) => {
77
+ created.engine = eng;
78
+ }).catch(() => {
79
+ SHARED_ENGINES.delete(key);
80
+ });
81
+ SHARED_ENGINES.set(key, created);
82
+ entry = created;
83
+ }
84
+ if (entry.disposeTimer) {
85
+ clearTimeout(entry.disposeTimer);
86
+ entry.disposeTimer = null;
87
+ }
88
+ entry.refs += 1;
89
+ return entry.promise;
90
+ }
91
+ function releaseSharedEngine(key) {
92
+ const entry = SHARED_ENGINES.get(key);
93
+ if (!entry) return;
94
+ entry.refs -= 1;
95
+ if (entry.refs > 0 || entry.disposeTimer) return;
96
+ entry.disposeTimer = setTimeout(() => {
97
+ entry.disposeTimer = null;
98
+ if (entry.refs > 0) return;
99
+ SHARED_ENGINES.delete(key);
100
+ entry.promise.then((eng) => eng.destroy()).catch(() => {});
101
+ }, ENGINE_DISPOSE_GRACE_MS);
102
+ }
103
+ /** Decode an image URL / data URL into RGB pixels via an offscreen canvas. */
104
+ async function decodeImage(src) {
105
+ const img = new Image();
106
+ img.crossOrigin = "anonymous";
107
+ await new Promise((resolve, reject) => {
108
+ img.onload = () => resolve();
109
+ img.onerror = () => reject(/* @__PURE__ */ new Error("Failed to load image."));
110
+ img.src = src;
111
+ });
112
+ const scale = Math.min(1, 448 / Math.max(img.naturalWidth, img.naturalHeight));
113
+ const canvas = document.createElement("canvas");
114
+ canvas.width = Math.max(1, Math.round(img.naturalWidth * scale));
115
+ canvas.height = Math.max(1, Math.round(img.naturalHeight * scale));
116
+ const cctx = canvas.getContext("2d");
117
+ if (!cctx) throw new Error("Could not get 2D canvas context for image decode.");
118
+ cctx.drawImage(img, 0, 0, canvas.width, canvas.height);
119
+ const rgba = cctx.getImageData(0, 0, canvas.width, canvas.height).data;
120
+ const rgb = new Uint8ClampedArray(canvas.width * canvas.height * 3);
121
+ for (let i = 0, j = 0; i < rgba.length; i += 4, j += 3) {
122
+ rgb[j] = rgba[i];
123
+ rgb[j + 1] = rgba[i + 1];
124
+ rgb[j + 2] = rgba[i + 2];
125
+ }
126
+ return {
127
+ pixels: rgb,
128
+ width: canvas.width,
129
+ height: canvas.height
130
+ };
131
+ }
132
+ function useEngine(options = {}) {
133
+ const { model: modelOption, maxSeqLen, dtype = "auto", autoLoad = false, enableVision = false, embedding = false, loadingTimeout = 3e5, onReady, onError } = options;
134
+ const model = resolveDefaultRepo({
135
+ repo: modelOption,
136
+ embedding,
137
+ enableVision
138
+ });
139
+ const engineRef = useRef(null);
140
+ const stoppedRef = useRef(false);
141
+ const timeoutRef = useRef(null);
142
+ const heldKeyRef = useRef(null);
143
+ const failedKeyRef = useRef(null);
144
+ const [isLoading, setIsLoading] = useState(false);
145
+ const [loadingProgress, setLoadingProgress] = useState(null);
146
+ const [isGenerating, setIsGenerating] = useState(false);
147
+ const [isReady, setIsReady] = useState(false);
148
+ const [completion, setCompletion] = useState("");
149
+ const [tps, setTps] = useState(null);
150
+ const [attempts, setAttempts] = useState(0);
151
+ const [error, setError] = useState(null);
152
+ const [errorKind, setErrorKind] = useState(null);
153
+ const modelKey = `${model}|${dtype}|${enableVision}|${embedding}|${maxSeqLen ?? "auto"}`;
154
+ const fail = useCallback((e) => {
155
+ const err = e instanceof Error ? e : new Error(String(e));
156
+ const kind = classifyError(err);
157
+ const guidance = getErrorGuidance(kind);
158
+ setError(guidance ? `${err.message} ${guidance}` : err.message);
159
+ setErrorKind(kind);
160
+ setIsLoading(false);
161
+ setLoadingProgress(null);
162
+ onError?.(err, kind);
163
+ }, [onError]);
164
+ const load = useCallback(async () => {
165
+ const failed = failedKeyRef.current;
166
+ const inCooldown = failed?.key === modelKey && Date.now() - failed.at < RETRY_COOLDOWN_MS;
167
+ if (engineRef.current || heldKeyRef.current === modelKey || inCooldown) return;
168
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
169
+ failedKeyRef.current = {
170
+ key: modelKey,
171
+ at: Date.now()
172
+ };
173
+ fail(/* @__PURE__ */ new Error("WebGPU is not available in this browser."));
174
+ return;
175
+ }
176
+ setIsLoading(true);
177
+ setError(null);
178
+ setErrorKind(null);
179
+ setLoadingProgress({ status: "Initializing WebGPU engine..." });
180
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
181
+ const timeoutPromise = new Promise((_, reject) => {
182
+ timeoutRef.current = setTimeout(() => reject(/* @__PURE__ */ new Error("Model loading timed out. The download may be too slow.")), loadingTimeout);
183
+ });
184
+ const key = modelKey;
185
+ heldKeyRef.current = key;
186
+ const factory = async () => {
187
+ const { WebGPUEngine } = await import("./index.mjs");
188
+ return WebGPUEngine.create({
189
+ repo: model,
190
+ maxSeqLen: maxSeqLen ?? getDefaultMaxSeqLen(),
191
+ dtype,
192
+ enableVision,
193
+ embedding,
194
+ onProgress: (loaded, total, message) => {
195
+ setLoadingProgress({
196
+ status: message,
197
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
198
+ });
199
+ }
200
+ });
201
+ };
202
+ try {
203
+ const engine = await Promise.race([acquireSharedEngine(key, factory), timeoutPromise]);
204
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
205
+ if (heldKeyRef.current !== key) {
206
+ releaseSharedEngine(key);
207
+ return;
208
+ }
209
+ engineRef.current = engine;
210
+ failedKeyRef.current = null;
211
+ if (typeof window !== "undefined") window.__gerbilEngine = engine;
212
+ setIsReady(true);
213
+ setIsLoading(false);
214
+ setLoadingProgress(null);
215
+ onReady?.();
216
+ } catch (e) {
217
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
218
+ failedKeyRef.current = {
219
+ key,
220
+ at: Date.now()
221
+ };
222
+ if (heldKeyRef.current === key) heldKeyRef.current = null;
223
+ releaseSharedEngine(key);
224
+ fail(e);
225
+ }
226
+ }, [
227
+ modelKey,
228
+ model,
229
+ maxSeqLen,
230
+ dtype,
231
+ enableVision,
232
+ embedding,
233
+ loadingTimeout,
234
+ onReady,
235
+ fail
236
+ ]);
237
+ const stop = useCallback(() => {
238
+ stoppedRef.current = true;
239
+ }, []);
240
+ const dispose = useCallback(() => {
241
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
242
+ engineRef.current = null;
243
+ failedKeyRef.current = null;
244
+ setIsReady(false);
245
+ if (heldKeyRef.current) {
246
+ releaseSharedEngine(heldKeyRef.current);
247
+ heldKeyRef.current = null;
248
+ }
249
+ }, []);
250
+ const complete = useCallback(async (prompt, opts = {}) => {
251
+ const engine = engineRef.current;
252
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
253
+ setIsGenerating(true);
254
+ setCompletion("");
255
+ setTps(null);
256
+ stoppedRef.current = false;
257
+ let fullText = "";
258
+ try {
259
+ const result = await engine.generate(prompt, {
260
+ maxTokens: opts.maxTokens ?? 256,
261
+ sampling: { temperature: opts.temperature ?? .7 },
262
+ systemPrompt: opts.system,
263
+ stopSequences: opts.stopSequences,
264
+ onToken: (token) => {
265
+ if (stoppedRef.current) return;
266
+ fullText += token;
267
+ setCompletion(fullText);
268
+ }
269
+ });
270
+ setTps(result.tokensPerSecond);
271
+ setIsGenerating(false);
272
+ return result.text;
273
+ } catch (e) {
274
+ setIsGenerating(false);
275
+ fail(e);
276
+ return fullText;
277
+ }
278
+ }, [fail]);
279
+ const generateObject = useCallback(async (prompt, opts = {}) => {
280
+ const engine = engineRef.current;
281
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
282
+ setIsGenerating(true);
283
+ setCompletion("");
284
+ setTps(null);
285
+ setAttempts(0);
286
+ stoppedRef.current = false;
287
+ try {
288
+ const result = await engine.generateObject(prompt, {
289
+ schema: opts.schema,
290
+ maxRetries: opts.maxRetries,
291
+ maxTokens: opts.maxTokens ?? 256,
292
+ sampling: { temperature: opts.temperature ?? .7 },
293
+ systemPrompt: opts.system,
294
+ stopSequences: opts.stopSequences
295
+ });
296
+ setCompletion(result.text);
297
+ setAttempts(result.attempts);
298
+ setIsGenerating(false);
299
+ return {
300
+ object: result.object,
301
+ attempts: result.attempts
302
+ };
303
+ } catch (e) {
304
+ setIsGenerating(false);
305
+ fail(e);
306
+ throw e instanceof Error ? e : new Error(String(e));
307
+ }
308
+ }, [fail]);
309
+ const describeImage = useCallback(async (image, prompt = "Describe this image.", opts = {}) => {
310
+ const engine = engineRef.current;
311
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
312
+ if (!engine.hasVision) throw new Error("Engine was not created with enableVision: true.");
313
+ setIsGenerating(true);
314
+ setCompletion("");
315
+ setTps(null);
316
+ stoppedRef.current = false;
317
+ const decoded = typeof image === "string" ? await decodeImage(image) : image;
318
+ let fullText = "";
319
+ try {
320
+ const result = await engine.describeImage(decoded, prompt, {
321
+ maxTokens: opts.maxTokens ?? 150,
322
+ sampling: { temperature: opts.temperature ?? .7 },
323
+ systemPrompt: opts.system,
324
+ stopSequences: opts.stopSequences,
325
+ onToken: (token) => {
326
+ if (stoppedRef.current) return;
327
+ fullText += token;
328
+ setCompletion(fullText);
329
+ }
330
+ });
331
+ setTps(result.tokensPerSecond);
332
+ setIsGenerating(false);
333
+ return result.text;
334
+ } catch (e) {
335
+ setIsGenerating(false);
336
+ fail(e);
337
+ return fullText;
338
+ }
339
+ }, [fail]);
340
+ const embed = useCallback(async (text, opts = {}) => {
341
+ const engine = engineRef.current;
342
+ if (!engine) throw new Error("Engine not loaded. Call load() first.");
343
+ if (!engine.isEmbedding) throw new Error("Engine was not created with embedding: true.");
344
+ return engine.embed(text, { taskType: opts.taskType ?? "query" });
345
+ }, []);
346
+ const similarity = useCallback(async (a, b) => {
347
+ const [va, vb] = await Promise.all([embed(a, { taskType: "query" }), embed(b, { taskType: "document" })]);
348
+ let dot = 0;
349
+ const n = Math.min(va.length, vb.length);
350
+ for (let i = 0; i < n; i++) dot += va[i] * vb[i];
351
+ return dot;
352
+ }, [embed]);
353
+ useEffect(() => {
354
+ if (autoLoad) load();
355
+ return () => {
356
+ if (timeoutRef.current) clearTimeout(timeoutRef.current);
357
+ engineRef.current = null;
358
+ if (heldKeyRef.current) {
359
+ releaseSharedEngine(heldKeyRef.current);
360
+ heldKeyRef.current = null;
361
+ }
362
+ };
363
+ }, []);
364
+ useEffect(() => {
365
+ if (heldKeyRef.current === modelKey) return;
366
+ failedKeyRef.current = null;
367
+ if (heldKeyRef.current === null) return;
368
+ engineRef.current = null;
369
+ releaseSharedEngine(heldKeyRef.current);
370
+ heldKeyRef.current = null;
371
+ setIsReady(false);
372
+ setCompletion("");
373
+ setTps(null);
374
+ setError(null);
375
+ setErrorKind(null);
376
+ load();
377
+ }, [modelKey]);
378
+ return {
379
+ complete,
380
+ generateObject,
381
+ describeImage,
382
+ embed,
383
+ similarity,
384
+ completion,
385
+ isLoading,
386
+ loadingProgress,
387
+ isGenerating,
388
+ tps,
389
+ attempts,
390
+ error,
391
+ errorKind,
392
+ isReady,
393
+ load,
394
+ stop,
395
+ dispose
396
+ };
397
+ }
398
+
399
+ //#endregion
400
+ //#region src/browser/use-memory.ts
401
+ /**
402
+ * React hook for on-device memory / RAG.
403
+ *
404
+ * Wraps the `@tryhamster/gerbil/memory` module with a native embedder (running
405
+ * on the WebGPU engine) and a persistent IndexedDB store, so an agent can
406
+ * remember things across turns AND across sessions — with zero server.
407
+ *
408
+ * ```tsx
409
+ * import { useMemory } from "@tryhamster/gerbil/hooks";
410
+ *
411
+ * const memory = useMemory();
412
+ * await memory.add("The user prefers TypeScript.");
413
+ * const { context } = await memory.recall("what does the user like?", { tokenBudget: 256 });
414
+ * ```
415
+ *
416
+ * The embedding model and the memory module are both imported lazily, so this
417
+ * hook adds nothing to your bundle until it's used.
418
+ */
419
+ function useMemory(options = {}) {
420
+ const { model, namespace = "gerbil-memory" } = options;
421
+ const embedder = useEngine({
422
+ model,
423
+ embedding: true,
424
+ autoLoad: false
425
+ });
426
+ const memRef = useRef(null);
427
+ const initRef = useRef(null);
428
+ const [isReady, setIsReady] = useState(false);
429
+ const ensure = useCallback(() => {
430
+ if (memRef.current) return Promise.resolve(memRef.current);
431
+ if (initRef.current) return initRef.current;
432
+ initRef.current = (async () => {
433
+ if (!embedder.isReady) await embedder.load();
434
+ const [{ createMemory }, { createIndexedDBStore }] = await Promise.all([import("../memory-D1P7Tmda.mjs"), import("../indexeddb-store-ClH12Xnl.mjs")]);
435
+ const mem = createMemory({
436
+ embed: async (texts) => Promise.all(texts.map((t) => embedder.embed(t))),
437
+ store: createIndexedDBStore({ dbName: namespace })
438
+ });
439
+ memRef.current = mem;
440
+ setIsReady(true);
441
+ return mem;
442
+ })();
443
+ return initRef.current;
444
+ }, [embedder, namespace]);
445
+ return {
446
+ add: useCallback(async (text, opts) => (await ensure()).add(text, opts), [ensure]),
447
+ recall: useCallback(async (query, opts) => (await ensure()).recall(query, opts), [ensure]),
448
+ search: useCallback(async (query, opts) => (await ensure()).search(query, opts), [ensure]),
449
+ get: useCallback(async (id) => (await ensure()).get(id), [ensure]),
450
+ remove: useCallback(async (id) => (await ensure()).delete(id), [ensure]),
451
+ clear: useCallback(async () => (await ensure()).clear(), [ensure]),
452
+ size: useCallback(async () => (await ensure()).size(), [ensure]),
453
+ isLoading: embedder.isLoading,
454
+ loadingProgress: embedder.loadingProgress,
455
+ isReady,
456
+ error: embedder.error
457
+ };
458
+ }
459
+
460
+ //#endregion
461
+ //#region src/browser/use-modalities.ts
462
+ /**
463
+ * Per-modality convenience hooks built on {@link useEngine}.
464
+ *
465
+ * `useEngine` is the general/advanced hook (it can do text, vision, and
466
+ * embeddings via options). These wrappers give each modality a focused,
467
+ * self-documenting surface so app code reads cleanly:
468
+ *
469
+ * ```tsx
470
+ * import { useText, useVision, useEmbedding } from "@tryhamster/gerbil/gpu/hooks";
471
+ *
472
+ * const { complete } = useText(); // text generation
473
+ * const { describeImage } = useVision(); // image → text
474
+ * const { embed, similarity } = useEmbedding(); // text → vector
475
+ * ```
476
+ *
477
+ * They share the same engine registry as `useEngine`, so requesting the same
478
+ * model from several places loads it once.
479
+ */
480
+ /** Text generation. */
481
+ function useText(options = {}) {
482
+ const e = useEngine(options);
483
+ return {
484
+ complete: e.complete,
485
+ completion: e.completion,
486
+ isLoading: e.isLoading,
487
+ loadingProgress: e.loadingProgress,
488
+ isGenerating: e.isGenerating,
489
+ tps: e.tps,
490
+ error: e.error,
491
+ errorKind: e.errorKind,
492
+ isReady: e.isReady,
493
+ load: e.load,
494
+ stop: e.stop,
495
+ dispose: e.dispose
496
+ };
497
+ }
498
+ /**
499
+ * Structured-output generation — generate, parse JSON, validate, and RETRY
500
+ * until valid. On-device tokens are free, so re-rolling malformed JSON is cheap.
501
+ *
502
+ * ```tsx
503
+ * const { object, generate, isGenerating } = useObject<{ name: string; age: number }>();
504
+ * await generate('Extract {name, age} from: "I am Sarah, 28"', {
505
+ * schema: { required: ["name", "age"] },
506
+ * });
507
+ * // object === { name: "Sarah", age: 28 }
508
+ * ```
509
+ */
510
+ function useObject(options = {}) {
511
+ const e = useEngine(options);
512
+ const [object, setObject] = useState(null);
513
+ return {
514
+ object,
515
+ generate: useCallback(async (prompt, opts) => {
516
+ if (!e.isReady) await e.load();
517
+ const result = await e.generateObject(prompt, opts);
518
+ setObject(result.object);
519
+ return result.object;
520
+ }, [e]),
521
+ attempts: e.attempts,
522
+ isLoading: e.isLoading,
523
+ loadingProgress: e.loadingProgress,
524
+ isGenerating: e.isGenerating,
525
+ error: e.error,
526
+ errorKind: e.errorKind,
527
+ isReady: e.isReady,
528
+ load: e.load,
529
+ stop: e.stop,
530
+ dispose: e.dispose
531
+ };
532
+ }
533
+ /** Image understanding (image in → text out). Builds the vision tower. */
534
+ function useVision(options = {}) {
535
+ const e = useEngine({
536
+ ...options,
537
+ enableVision: true
538
+ });
539
+ return {
540
+ describeImage: e.describeImage,
541
+ completion: e.completion,
542
+ isLoading: e.isLoading,
543
+ loadingProgress: e.loadingProgress,
544
+ isGenerating: e.isGenerating,
545
+ tps: e.tps,
546
+ error: e.error,
547
+ errorKind: e.errorKind,
548
+ isReady: e.isReady,
549
+ load: e.load,
550
+ stop: e.stop,
551
+ dispose: e.dispose
552
+ };
553
+ }
554
+ /** Text embeddings + similarity. */
555
+ function useEmbedding(options = {}) {
556
+ const e = useEngine({
557
+ ...options,
558
+ embedding: true
559
+ });
560
+ return {
561
+ embed: e.embed,
562
+ similarity: e.similarity,
563
+ isLoading: e.isLoading,
564
+ loadingProgress: e.loadingProgress,
565
+ error: e.error,
566
+ errorKind: e.errorKind,
567
+ isReady: e.isReady,
568
+ load: e.load,
569
+ dispose: e.dispose
570
+ };
571
+ }
572
+ /**
573
+ * Conversational chat hook — manages the message list and streams replies.
574
+ * Multi-turn context is handled for you (the full history is sent each turn).
575
+ *
576
+ * ```tsx
577
+ * const { messages, send, isGenerating } = useChat();
578
+ * <button onClick={() => send("Hello!")}>Send</button>
579
+ * ```
580
+ */
581
+ function useChat(options = {}) {
582
+ const { system, ...engineOptions } = options;
583
+ const e = useEngine(engineOptions);
584
+ const [messages, setMessages] = useState([]);
585
+ const messagesRef = useRef([]);
586
+ messagesRef.current = messages;
587
+ useEffect(() => {
588
+ if (!e.isGenerating) return;
589
+ setMessages((prev) => {
590
+ if (prev.length === 0 || prev[prev.length - 1].role !== "assistant") return prev;
591
+ const copy = prev.slice();
592
+ copy[copy.length - 1] = {
593
+ role: "assistant",
594
+ content: e.completion
595
+ };
596
+ return copy;
597
+ });
598
+ }, [e.completion, e.isGenerating]);
599
+ const run = useCallback(async (history, opts) => {
600
+ setMessages([...history, {
601
+ role: "assistant",
602
+ content: ""
603
+ }]);
604
+ if (!e.isReady) await e.load();
605
+ const turns = system ? [{
606
+ role: "system",
607
+ content: system
608
+ }, ...history] : history;
609
+ const full = await e.complete(turns, {
610
+ ...opts,
611
+ system: opts.system ?? system
612
+ });
613
+ setMessages((prev) => {
614
+ if (prev.length === 0) return prev;
615
+ const copy = prev.slice();
616
+ copy[copy.length - 1] = {
617
+ role: "assistant",
618
+ content: full
619
+ };
620
+ return copy;
621
+ });
622
+ return full;
623
+ }, [e, system]);
624
+ const send = useCallback(async (text, opts = {}) => {
625
+ if (!text.trim() || e.isGenerating) return "";
626
+ return run([...messagesRef.current, {
627
+ role: "user",
628
+ content: text
629
+ }], opts);
630
+ }, [e.isGenerating, run]);
631
+ return {
632
+ messages,
633
+ send,
634
+ sendMessage: send,
635
+ regenerate: useCallback(async (opts = {}) => {
636
+ if (e.isGenerating) return "";
637
+ const msgs = messagesRef.current.slice();
638
+ while (msgs.length > 0 && msgs[msgs.length - 1].role === "assistant") msgs.pop();
639
+ if (msgs.length === 0) return "";
640
+ return run(msgs, opts);
641
+ }, [e.isGenerating, run]),
642
+ setMessages: useCallback((next) => setMessages(next), []),
643
+ clear: useCallback(() => setMessages([]), []),
644
+ status: e.error ? "error" : e.isGenerating ? e.completion.length === 0 ? "submitted" : "streaming" : "ready",
645
+ isGenerating: e.isGenerating,
646
+ isLoading: e.isLoading,
647
+ loadingProgress: e.loadingProgress,
648
+ isReady: e.isReady,
649
+ tps: e.tps,
650
+ error: e.error,
651
+ errorKind: e.errorKind,
652
+ stop: e.stop,
653
+ load: e.load
654
+ };
655
+ }
656
+ /**
657
+ * Single-prompt streaming completion with built-in input state — a near
658
+ * drop-in for the Vercel AI SDK's `useCompletion`, running on-device.
659
+ */
660
+ function useCompletion(options = {}) {
661
+ const t = useText(options);
662
+ const [input, setInput] = useState("");
663
+ const complete = useCallback(async (prompt, opts) => {
664
+ if (!t.isReady) await t.load();
665
+ return t.complete(prompt, opts);
666
+ }, [t]);
667
+ const handleInputChange = useCallback((e) => setInput(e.target.value), []);
668
+ const handleSubmit = useCallback((e) => {
669
+ e?.preventDefault?.();
670
+ const value = input;
671
+ if (!value.trim()) return;
672
+ setInput("");
673
+ complete(value);
674
+ }, [input, complete]);
675
+ return {
676
+ completion: t.completion,
677
+ complete,
678
+ input,
679
+ setInput,
680
+ handleInputChange,
681
+ handleSubmit,
682
+ isLoading: t.isGenerating,
683
+ isReady: t.isReady,
684
+ loadingProgress: t.loadingProgress,
685
+ stop: t.stop,
686
+ error: t.error,
687
+ load: t.load
688
+ };
689
+ }
690
+
691
+ //#endregion
692
+ //#region src/browser/use-stt.ts
693
+ /**
694
+ * React hook for native speech-to-text in the browser.
695
+ *
696
+ * Wraps `MoonshineSTT` — raw 16 kHz mono PCM in, transcript out (encoder-decoder
697
+ * ASR, no streaming/partial API). This hook captures mic audio between
698
+ * start/stop, resamples it to 16 kHz mono, and runs a single transcribe() on the
699
+ * finalized utterance. The GPU engine is dynamically imported so it stays out of
700
+ * the main bundle until STT is actually used.
701
+ *
702
+ * @example
703
+ * ```tsx
704
+ * import { useSTT } from "@tryhamster/gerbil/gpu/hooks";
705
+ *
706
+ * const { startRecording, stopRecording, transcript, isRecording } = useSTT();
707
+ * ```
708
+ */
709
+ const MOONSHINE_SAMPLE_RATE = 16e3;
710
+ /** Downmix to mono and linearly resample a Float32 buffer to 16 kHz. */
711
+ function toMono16k(channels, inputRate) {
712
+ const inLen = channels[0]?.length ?? 0;
713
+ const mono = new Float32Array(inLen);
714
+ for (const ch of channels) for (let i = 0; i < inLen; i++) mono[i] += ch[i] / channels.length;
715
+ if (inputRate === MOONSHINE_SAMPLE_RATE) return mono;
716
+ const ratio = MOONSHINE_SAMPLE_RATE / inputRate;
717
+ const outLen = Math.max(0, Math.floor(inLen * ratio));
718
+ const out = new Float32Array(outLen);
719
+ for (let i = 0; i < outLen; i++) {
720
+ const srcPos = i / ratio;
721
+ const i0 = Math.floor(srcPos);
722
+ const i1 = Math.min(i0 + 1, inLen - 1);
723
+ const frac = srcPos - i0;
724
+ out[i] = mono[i0] * (1 - frac) + mono[i1] * frac;
725
+ }
726
+ return out;
727
+ }
728
+ function useSTT(options = {}) {
729
+ const { repo = DEFAULT_MODELS.stt, autoLoad = false, onReady, onError } = options;
730
+ const sttRef = useRef(null);
731
+ const loadingRef = useRef(false);
732
+ const mediaStreamRef = useRef(null);
733
+ const audioCtxRef = useRef(null);
734
+ const sourceRef = useRef(null);
735
+ const processorRef = useRef(null);
736
+ const chunksRef = useRef([]);
737
+ const sampleRateRef = useRef(MOONSHINE_SAMPLE_RATE);
738
+ const [isLoading, setIsLoading] = useState(false);
739
+ const [loadingProgress, setLoadingProgress] = useState(null);
740
+ const [isReady, setIsReady] = useState(false);
741
+ const [isRecording, setIsRecording] = useState(false);
742
+ const [isTranscribing, setIsTranscribing] = useState(false);
743
+ const [transcript, setTranscript] = useState("");
744
+ const [audioSeconds, setAudioSeconds] = useState(null);
745
+ const [error, setError] = useState(null);
746
+ const load = useCallback(async () => {
747
+ if (sttRef.current || loadingRef.current) return;
748
+ loadingRef.current = true;
749
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
750
+ loadingRef.current = false;
751
+ const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native speech-to-text requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
752
+ setError(err.message);
753
+ onError?.(err);
754
+ return;
755
+ }
756
+ setIsLoading(true);
757
+ setError(null);
758
+ setLoadingProgress({ status: "Initializing speech-to-text..." });
759
+ try {
760
+ const { MoonshineSTT } = await import("./index.mjs");
761
+ sttRef.current = await MoonshineSTT.create({
762
+ repo,
763
+ onProgress: (loaded, total, message) => {
764
+ setLoadingProgress({
765
+ status: message,
766
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
767
+ });
768
+ }
769
+ });
770
+ setIsReady(true);
771
+ setIsLoading(false);
772
+ setLoadingProgress(null);
773
+ onReady?.();
774
+ } catch (e) {
775
+ loadingRef.current = false;
776
+ const err = e instanceof Error ? e : new Error(String(e));
777
+ setError(err.message);
778
+ setIsLoading(false);
779
+ setLoadingProgress(null);
780
+ onError?.(err);
781
+ }
782
+ }, [
783
+ repo,
784
+ onReady,
785
+ onError
786
+ ]);
787
+ const teardownCapture = useCallback(() => {
788
+ processorRef.current?.disconnect();
789
+ sourceRef.current?.disconnect();
790
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
791
+ for (const t of mediaStreamRef.current?.getTracks() ?? []) t.stop();
792
+ processorRef.current = null;
793
+ sourceRef.current = null;
794
+ audioCtxRef.current = null;
795
+ mediaStreamRef.current = null;
796
+ }, []);
797
+ const startRecording = useCallback(async () => {
798
+ if (isRecording) return;
799
+ if (!sttRef.current) await load();
800
+ if (!sttRef.current) return;
801
+ setTranscript("");
802
+ setAudioSeconds(null);
803
+ setError(null);
804
+ chunksRef.current = [];
805
+ let stream;
806
+ try {
807
+ stream = await navigator.mediaDevices.getUserMedia({ audio: true });
808
+ } catch (e) {
809
+ const err = e instanceof Error ? e : new Error(String(e));
810
+ const name = err.name;
811
+ if (name === "NotAllowedError" || name === "SecurityError") setError("Microphone access denied. Allow mic access for this site and try again.");
812
+ else if (name === "NotFoundError" || name === "DevicesNotFoundError") setError("No microphone found. Connect a mic and try again.");
813
+ else setError(err.message);
814
+ onError?.(err);
815
+ return;
816
+ }
817
+ mediaStreamRef.current = stream;
818
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
819
+ if (!AudioCtx) return;
820
+ const ctx = new AudioCtx();
821
+ audioCtxRef.current = ctx;
822
+ sampleRateRef.current = ctx.sampleRate;
823
+ const source = ctx.createMediaStreamSource(stream);
824
+ sourceRef.current = source;
825
+ const processor = ctx.createScriptProcessor(4096, 1, 1);
826
+ processorRef.current = processor;
827
+ processor.onaudioprocess = (ev) => {
828
+ const input = ev.inputBuffer.getChannelData(0);
829
+ chunksRef.current.push(new Float32Array(input));
830
+ };
831
+ source.connect(processor);
832
+ processor.connect(ctx.destination);
833
+ setIsRecording(true);
834
+ }, [
835
+ isRecording,
836
+ load,
837
+ onError
838
+ ]);
839
+ const stopRecording = useCallback(async () => {
840
+ if (!isRecording) return;
841
+ setIsRecording(false);
842
+ const inputRate = sampleRateRef.current;
843
+ const captured = chunksRef.current;
844
+ chunksRef.current = [];
845
+ teardownCapture();
846
+ const total = captured.reduce((n, c) => n + c.length, 0);
847
+ const joined = new Float32Array(total);
848
+ let off = 0;
849
+ for (const c of captured) {
850
+ joined.set(c, off);
851
+ off += c.length;
852
+ }
853
+ const pcm = toMono16k([joined], inputRate);
854
+ if (pcm.length < 127) {
855
+ setError("Recording was too short. Hold the mic a moment longer.");
856
+ return;
857
+ }
858
+ setIsTranscribing(true);
859
+ setError(null);
860
+ try {
861
+ const result = await sttRef.current.transcribe(pcm);
862
+ setTranscript(result.text);
863
+ setAudioSeconds(result.audioSeconds);
864
+ } catch (e) {
865
+ const err = e instanceof Error ? e : new Error(String(e));
866
+ setError(err.message);
867
+ onError?.(err);
868
+ } finally {
869
+ setIsTranscribing(false);
870
+ }
871
+ }, [
872
+ isRecording,
873
+ teardownCapture,
874
+ onError
875
+ ]);
876
+ const dispose = useCallback(() => {
877
+ teardownCapture();
878
+ if (sttRef.current) {
879
+ sttRef.current.destroy?.();
880
+ sttRef.current = null;
881
+ loadingRef.current = false;
882
+ setIsReady(false);
883
+ }
884
+ }, [teardownCapture]);
885
+ useEffect(() => {
886
+ if (autoLoad) load();
887
+ return () => {
888
+ teardownCapture();
889
+ if (sttRef.current) {
890
+ sttRef.current.destroy?.();
891
+ sttRef.current = null;
892
+ }
893
+ };
894
+ }, []);
895
+ return {
896
+ load,
897
+ startRecording,
898
+ stopRecording,
899
+ dispose,
900
+ isLoading,
901
+ loadingProgress,
902
+ isReady,
903
+ isRecording,
904
+ isTranscribing,
905
+ transcript,
906
+ audioSeconds,
907
+ error
908
+ };
909
+ }
910
+
911
+ //#endregion
912
+ //#region src/browser/use-tts.ts
913
+ /**
914
+ * React hook for native text-to-speech in the browser.
915
+ *
916
+ * Wraps the engine's `speak()` (Kani-TTS-2) — the codec-LM backbone emits
917
+ * NanoCodec audio tokens, the NanoCodec decoder turns them into 22.05 kHz mono
918
+ * PCM, and this hook plays it through the Web Audio API (and keeps the clip for
919
+ * instant replay). The GPU engine is dynamically imported so it stays out of the
920
+ * main bundle until TTS is actually used.
921
+ *
922
+ * @example
923
+ * ```tsx
924
+ * import { useTTS } from "@tryhamster/gerbil/gpu/hooks";
925
+ *
926
+ * const { speak, isSynthesizing, isPlaying } = useTTS();
927
+ * <button onClick={() => speak("Hello from on-device TTS.")}>Speak</button>
928
+ * ```
929
+ */
930
+ const KANI_SAMPLE_RATE = 22050;
931
+ /**
932
+ * Built-in voices. Kani-TTS-2-en takes an `en_us`-style language tag prepended
933
+ * to the text; the English checkpoint ships the US-English voice.
934
+ */
935
+ const KANI_VOICES = [{
936
+ value: "en_us",
937
+ label: "English (US)"
938
+ }];
939
+ /** Build an AudioBuffer from mono Float32 PCM at the given sample rate. */
940
+ function pcmToAudioBuffer(ctx, pcm, sampleRate) {
941
+ const buffer = ctx.createBuffer(1, pcm.length, sampleRate);
942
+ buffer.getChannelData(0).set(pcm);
943
+ return buffer;
944
+ }
945
+ function useTTS(options = {}) {
946
+ const { repo = DEFAULT_MODELS.tts, autoLoad = false, onReady, onError } = options;
947
+ const engineRef = useRef(null);
948
+ const loadingRef = useRef(false);
949
+ const audioCtxRef = useRef(null);
950
+ const sourceRef = useRef(null);
951
+ const bufferRef = useRef(null);
952
+ const [isLoading, setIsLoading] = useState(false);
953
+ const [loadingProgress, setLoadingProgress] = useState(null);
954
+ const [isReady, setIsReady] = useState(false);
955
+ const [isSynthesizing, setIsSynthesizing] = useState(false);
956
+ const [isPlaying, setIsPlaying] = useState(false);
957
+ const [hasAudio, setHasAudio] = useState(false);
958
+ const [audioSeconds, setAudioSeconds] = useState(null);
959
+ const [rtf, setRtf] = useState(null);
960
+ const [error, setError] = useState(null);
961
+ const load = useCallback(async () => {
962
+ if (engineRef.current || loadingRef.current) return;
963
+ loadingRef.current = true;
964
+ if (typeof navigator === "undefined" || !("gpu" in navigator)) {
965
+ loadingRef.current = false;
966
+ const err = /* @__PURE__ */ new Error("WebGPU is not available in this browser. Native text-to-speech requires Chrome/Edge 113+, Firefox 141+, or Safari 26+.");
967
+ setError(err.message);
968
+ onError?.(err);
969
+ return;
970
+ }
971
+ setIsLoading(true);
972
+ setError(null);
973
+ setLoadingProgress({ status: "Initializing TTS..." });
974
+ try {
975
+ const { WebGPUEngine } = await import("./index.mjs");
976
+ engineRef.current = await WebGPUEngine.create({
977
+ repo,
978
+ onProgress: (loaded, total, message) => {
979
+ setLoadingProgress({
980
+ status: message,
981
+ progress: total > 0 ? Math.round(loaded / total * 100) : void 0
982
+ });
983
+ }
984
+ });
985
+ setIsReady(true);
986
+ setIsLoading(false);
987
+ setLoadingProgress(null);
988
+ onReady?.();
989
+ } catch (e) {
990
+ loadingRef.current = false;
991
+ const err = e instanceof Error ? e : new Error(String(e));
992
+ setError(err.message);
993
+ setIsLoading(false);
994
+ setLoadingProgress(null);
995
+ onError?.(err);
996
+ }
997
+ }, [
998
+ repo,
999
+ onReady,
1000
+ onError
1001
+ ]);
1002
+ const stop = useCallback(() => {
1003
+ if (sourceRef.current) {
1004
+ try {
1005
+ sourceRef.current.onended = null;
1006
+ sourceRef.current.stop();
1007
+ } catch {}
1008
+ sourceRef.current = null;
1009
+ }
1010
+ setIsPlaying(false);
1011
+ }, []);
1012
+ const playBuffer = useCallback(async () => {
1013
+ const buffer = bufferRef.current;
1014
+ if (!buffer) return;
1015
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1016
+ if (!AudioCtx) return;
1017
+ if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
1018
+ const ctx = audioCtxRef.current;
1019
+ if (!ctx) return;
1020
+ if (ctx.state === "suspended") await ctx.resume();
1021
+ stop();
1022
+ const source = ctx.createBufferSource();
1023
+ source.buffer = buffer;
1024
+ source.connect(ctx.destination);
1025
+ source.onended = () => {
1026
+ setIsPlaying(false);
1027
+ sourceRef.current = null;
1028
+ };
1029
+ sourceRef.current = source;
1030
+ setIsPlaying(true);
1031
+ source.start();
1032
+ }, [stop]);
1033
+ const speak = useCallback(async (text, opts = {}) => {
1034
+ if (!text.trim()) return;
1035
+ {
1036
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1037
+ if (AudioCtx) {
1038
+ if (!audioCtxRef.current || audioCtxRef.current.state === "closed") audioCtxRef.current = new AudioCtx();
1039
+ const ctx = audioCtxRef.current;
1040
+ if (ctx.state === "suspended") ctx.resume();
1041
+ try {
1042
+ const warm = ctx.createBufferSource();
1043
+ warm.buffer = ctx.createBuffer(1, 1, ctx.sampleRate);
1044
+ warm.connect(ctx.destination);
1045
+ warm.start(0);
1046
+ } catch {}
1047
+ }
1048
+ }
1049
+ if (!engineRef.current) await load();
1050
+ const engine = engineRef.current;
1051
+ if (!engine) return;
1052
+ setIsSynthesizing(true);
1053
+ setError(null);
1054
+ try {
1055
+ const t0 = performance.now();
1056
+ const { pcm, sampleRate, audioSeconds: secs } = await engine.speak(text, {
1057
+ languageTag: opts.voice ?? "en_us",
1058
+ temperature: opts.temperature ?? 1,
1059
+ topP: opts.topP ?? .95,
1060
+ repetitionPenalty: opts.repetitionPenalty ?? 1.1
1061
+ });
1062
+ const wall = (performance.now() - t0) / 1e3;
1063
+ const AudioCtx = window.AudioContext ?? window.webkitAudioContext;
1064
+ if (AudioCtx && (!audioCtxRef.current || audioCtxRef.current.state === "closed")) audioCtxRef.current = new AudioCtx();
1065
+ if (audioCtxRef.current) bufferRef.current = pcmToAudioBuffer(audioCtxRef.current, pcm, sampleRate ?? KANI_SAMPLE_RATE);
1066
+ setHasAudio(true);
1067
+ setAudioSeconds(secs);
1068
+ setRtf(wall > 0 ? secs / wall : null);
1069
+ setIsSynthesizing(false);
1070
+ await playBuffer();
1071
+ } catch (e) {
1072
+ const err = e instanceof Error ? e : new Error(String(e));
1073
+ setError(err.message);
1074
+ setIsSynthesizing(false);
1075
+ onError?.(err);
1076
+ }
1077
+ }, [
1078
+ load,
1079
+ playBuffer,
1080
+ onError
1081
+ ]);
1082
+ const replay = useCallback(async () => {
1083
+ if (!bufferRef.current) return;
1084
+ await playBuffer();
1085
+ }, [playBuffer]);
1086
+ const dispose = useCallback(() => {
1087
+ stop();
1088
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
1089
+ audioCtxRef.current = null;
1090
+ bufferRef.current = null;
1091
+ if (engineRef.current) {
1092
+ engineRef.current.destroy?.();
1093
+ engineRef.current = null;
1094
+ loadingRef.current = false;
1095
+ setIsReady(false);
1096
+ }
1097
+ }, [stop]);
1098
+ useEffect(() => {
1099
+ if (autoLoad) load();
1100
+ return () => {
1101
+ if (sourceRef.current) {
1102
+ try {
1103
+ sourceRef.current.onended = null;
1104
+ sourceRef.current.stop();
1105
+ } catch {}
1106
+ sourceRef.current = null;
1107
+ }
1108
+ if (audioCtxRef.current && audioCtxRef.current.state !== "closed") audioCtxRef.current.close();
1109
+ if (engineRef.current) {
1110
+ engineRef.current.destroy?.();
1111
+ engineRef.current = null;
1112
+ }
1113
+ };
1114
+ }, []);
1115
+ return {
1116
+ load,
1117
+ speak,
1118
+ replay,
1119
+ stop,
1120
+ dispose,
1121
+ isLoading,
1122
+ loadingProgress,
1123
+ isReady,
1124
+ isSynthesizing,
1125
+ isPlaying,
1126
+ hasAudio,
1127
+ audioSeconds,
1128
+ rtf,
1129
+ error
1130
+ };
1131
+ }
1132
+
1133
+ //#endregion
1134
+ //#region src/browser/use-voice-chat.ts
1135
+ /**
1136
+ * React hook for a fully on-device voice assistant: speak to it, it transcribes,
1137
+ * thinks, and speaks back — no cloud, no API keys. Composes {@link useSTT},
1138
+ * {@link useChat}, and {@link useTTS} into one flow.
1139
+ *
1140
+ * ```tsx
1141
+ * import { useVoiceChat } from "@tryhamster/gerbil/hooks";
1142
+ *
1143
+ * const vc = useVoiceChat();
1144
+ * <button onMouseDown={vc.start} onMouseUp={vc.stop}>
1145
+ * {vc.isListening ? "Listening…" : "Hold to talk"}
1146
+ * </button>
1147
+ * // vc.messages renders the conversation; replies are spoken automatically.
1148
+ * ```
1149
+ *
1150
+ * This is Gerbil-unique — a private, offline voice loop the cloud SDKs can't do.
1151
+ */
1152
+ function useVoiceChat(options = {}) {
1153
+ const { sttModel, ttsModel, voice, speak = true, ...chatOptions } = options;
1154
+ const stt = useSTT({ repo: sttModel });
1155
+ const chat = useChat(chatOptions);
1156
+ const tts = useTTS({ repo: ttsModel });
1157
+ const processedRef = useRef("");
1158
+ useEffect(() => {
1159
+ const text = stt.transcript.trim();
1160
+ if (!text || stt.isTranscribing || text === processedRef.current) return;
1161
+ processedRef.current = text;
1162
+ (async () => {
1163
+ const reply = await chat.send(text);
1164
+ if (speak && reply.trim()) await tts.speak(reply, voice ? { voice } : void 0);
1165
+ })();
1166
+ }, [stt.transcript, stt.isTranscribing]);
1167
+ const start = useCallback(() => stt.startRecording(), [stt]);
1168
+ const stop = useCallback(() => stt.stopRecording(), [stt]);
1169
+ return {
1170
+ messages: chat.messages,
1171
+ start,
1172
+ stop,
1173
+ stopSpeaking: tts.stop,
1174
+ clear: chat.clear,
1175
+ isListening: stt.isRecording,
1176
+ isTranscribing: stt.isTranscribing,
1177
+ isThinking: chat.isGenerating,
1178
+ isSpeaking: tts.isSynthesizing || tts.isPlaying,
1179
+ transcript: stt.transcript,
1180
+ isLoading: stt.isLoading || chat.isLoading || tts.isLoading,
1181
+ isReady: chat.isReady,
1182
+ error: stt.error ?? chat.error ?? tts.error
1183
+ };
1184
+ }
1185
+
1186
+ //#endregion
1187
+ export { KANI_VOICES, useChat, useCompletion, useEmbedding, useEngine, useMemory, useObject, useSTT, useTTS, useText, useVision, useVoiceChat };
1188
+ //# sourceMappingURL=hooks.mjs.map