@kpritam/grimoire-output-docusaurus 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +25 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/index.d.ts +1 -0
  5. package/dist/index.js +1 -0
  6. package/dist/internal/assets.d.ts +9 -0
  7. package/dist/internal/assets.js +50 -0
  8. package/dist/internal/docusaurusConfig.d.ts +9 -0
  9. package/dist/internal/docusaurusConfig.js +259 -0
  10. package/dist/internal/spellbookAssets.d.ts +39 -0
  11. package/dist/internal/spellbookAssets.js +68 -0
  12. package/dist/layer.d.ts +3 -0
  13. package/dist/layer.js +6 -0
  14. package/dist/shared.d.ts +10 -0
  15. package/dist/shared.js +36 -0
  16. package/dist/upstream.d.ts +6 -0
  17. package/dist/upstream.js +84 -0
  18. package/package.json +59 -0
  19. package/src/index.ts +1 -0
  20. package/src/internal/assets.ts +66 -0
  21. package/src/internal/docusaurusConfig.ts +281 -0
  22. package/src/internal/spellbookAssets.ts +80 -0
  23. package/src/layer.ts +12 -0
  24. package/src/shared.ts +43 -0
  25. package/src/upstream.ts +119 -0
  26. package/templates/spellbook/spellbookPlugin.ts +156 -0
  27. package/templates/spellbook/src/components/SpellbookChat/ChatEngine.ts +79 -0
  28. package/templates/spellbook/src/components/SpellbookChat/ChatErrorBoundary.tsx +65 -0
  29. package/templates/spellbook/src/components/SpellbookChat/Markdown.tsx +259 -0
  30. package/templates/spellbook/src/components/SpellbookChat/README.md +111 -0
  31. package/templates/spellbook/src/components/SpellbookChat/SettingsPanel.tsx +376 -0
  32. package/templates/spellbook/src/components/SpellbookChat/VoiceMode.tsx +867 -0
  33. package/templates/spellbook/src/components/SpellbookChat/index.tsx +744 -0
  34. package/templates/spellbook/src/components/SpellbookChat/markdown.module.css +343 -0
  35. package/templates/spellbook/src/components/SpellbookChat/secretStore.ts +106 -0
  36. package/templates/spellbook/src/components/SpellbookChat/streamProviders/anthropic.ts +36 -0
  37. package/templates/spellbook/src/components/SpellbookChat/streamProviders/createCloudProvider.ts +112 -0
  38. package/templates/spellbook/src/components/SpellbookChat/streamProviders/google.ts +33 -0
  39. package/templates/spellbook/src/components/SpellbookChat/streamProviders/index.ts +32 -0
  40. package/templates/spellbook/src/components/SpellbookChat/streamProviders/mapFinishReason.ts +23 -0
  41. package/templates/spellbook/src/components/SpellbookChat/streamProviders/ollama.ts +44 -0
  42. package/templates/spellbook/src/components/SpellbookChat/streamProviders/openai.ts +34 -0
  43. package/templates/spellbook/src/components/SpellbookChat/streamProviders/openaiRealtime.ts +320 -0
  44. package/templates/spellbook/src/components/SpellbookChat/streamProviders/types.ts +172 -0
  45. package/templates/spellbook/src/components/SpellbookChat/streamProviders/webllm.ts +214 -0
  46. package/templates/spellbook/src/components/SpellbookChat/styles.module.css +852 -0
  47. package/templates/spellbook/src/components/SpellbookChat/systemPrompt.ts +107 -0
  48. package/templates/spellbook/src/components/SpellbookChat/transformers-ssr-stub.ts +16 -0
  49. package/templates/spellbook/src/components/SpellbookChat/types.ts +52 -0
  50. package/templates/spellbook/src/components/SpellbookChat/useBundleLoader.ts +46 -0
  51. package/templates/spellbook/src/components/SpellbookChat/useChatEngine.ts +524 -0
  52. package/templates/spellbook/src/components/SpellbookChat/useEmbeddings.ts +147 -0
  53. package/templates/spellbook/src/components/SpellbookChat/useRetrieval.ts +377 -0
  54. package/templates/spellbook/src/components/SpellbookChat/useSileroVAD.ts +236 -0
  55. package/templates/spellbook/src/components/SpellbookChat/useSpeechRecognition.ts +271 -0
  56. package/templates/spellbook/src/components/SpellbookChat/useSpeechSynthesis.ts +229 -0
  57. package/templates/spellbook/src/components/SpellbookChat/useUnifiedSTT.ts +134 -0
  58. package/templates/spellbook/src/components/SpellbookChat/useWhisperSTT.ts +411 -0
  59. package/templates/spellbook/src/components/SpellbookChat/vad-ssr-stub.ts +25 -0
  60. package/templates/spellbook/src/components/SpellbookChat/voiceDebug.ts +60 -0
  61. package/templates/spellbook/src/components/SpellbookChat/voiceFsm.ts +196 -0
  62. package/templates/spellbook/src/components/SpellbookChat/voiceStyles.module.css +334 -0
  63. package/templates/spellbook/src/components/SpellbookChat/webllm-ssr-stub.ts +8 -0
  64. package/templates/spellbook/src/components/SpellbookChatDisabled.tsx +20 -0
  65. package/templates/spellbook/src/theme/Root.tsx +29 -0
@@ -0,0 +1,377 @@
1
+ import { useCallback, useMemo } from "react";
2
+
3
+ import type { ChunkRecord, RetrievedChunk } from "./types";
4
+
5
+ /**
6
+ * How many candidates to consider in the dense (vector) sweep before MMR
7
+ * re-ranks them. Larger than the final `k` so MMR has room to swap in
8
+ * diverse candidates. Hand-rolled brute-force scan stays linear in the
9
+ * total chunk count, so this is essentially free for our scale.
10
+ */
11
+ const DENSE_CANDIDATE_POOL = 30;
12
+
13
+ /**
14
+ * Same idea for the lexical (BM25) sweep. Candidate sets from the two
15
+ * sides are merged via Reciprocal Rank Fusion before MMR runs.
16
+ */
17
+ const LEXICAL_CANDIDATE_POOL = 30;
18
+
19
+ /**
20
+ * RRF (Reciprocal Rank Fusion) constant. Lower values give more weight to
21
+ * top-ranked candidates; 60 is the value Cormack et al. use in the
22
+ * canonical RRF paper and is the standard production pick for hybrid
23
+ * search blending.
24
+ */
25
+ const RRF_K = 60;
26
+
27
+ /**
28
+ * MMR diversity weight (1 = pure relevance, 0 = pure diversity). 0.7
29
+ * keeps the top hit honest while still penalising near-duplicate chunks
30
+ * that come from the same section/file. Tunable per-deployment if needed.
31
+ */
32
+ const MMR_LAMBDA = 0.7;
33
+
34
+ /**
35
+ * Approximate context-token budget for the retrieved passages. We don't
36
+ * have a true tokenizer in the browser; chunks store a `tokens` heuristic
37
+ * (`ceil(chars / 4)`) computed at index time, and we sum that against
38
+ * this budget. Sized for cloud providers with 4-8k context windows; the
39
+ * caller can override per-request via the second arg to `retrieve`.
40
+ */
41
+ const DEFAULT_CONTEXT_TOKEN_BUDGET = 1500;
42
+
43
+ // ─── Vector similarity (dot product on unit vectors == cosine) ─────────
44
+
45
+ function dotProduct(
46
+ query: Float32Array,
47
+ vectors: Float32Array,
48
+ index: number,
49
+ dim: number,
50
+ ): number {
51
+ let s = 0;
52
+ const base = index * dim;
53
+ for (let d = 0; d < dim; d += 1) {
54
+ s += query[d]! * vectors[base + d]!;
55
+ }
56
+ return s;
57
+ }
58
+
59
+ function denseTopK(
60
+ query: Float32Array,
61
+ vectors: Float32Array,
62
+ dim: number,
63
+ count: number,
64
+ k: number,
65
+ ): readonly { readonly idx: number; readonly score: number }[] {
66
+ const scored: { idx: number; score: number }[] = new Array(count);
67
+ for (let i = 0; i < count; i += 1) {
68
+ scored[i] = { idx: i, score: dotProduct(query, vectors, i, dim) };
69
+ }
70
+ scored.sort((a, b) => b.score - a.score);
71
+ return scored.slice(0, k);
72
+ }
73
+
74
+ // ─── BM25 (lexical) ────────────────────────────────────────────────────
75
+
76
+ const TOKEN_RE = /[A-Za-z][A-Za-z0-9_]+|\d+/g;
77
+
78
+ /**
79
+ * Cheap, dependency-free tokenizer good enough for English markdown.
80
+ * We split on word boundaries, drop tokens shorter than 2 chars, and
81
+ * lowercase. We deliberately don't stop-word filter — short, common
82
+ * tokens like "use" or "tag" are still meaningful in code-heavy docs.
83
+ */
84
+ function tokenize(text: string): string[] {
85
+ const out: string[] = [];
86
+ for (const m of text.toLowerCase().matchAll(TOKEN_RE)) {
87
+ const t = m[0];
88
+ if (t.length >= 2) out.push(t);
89
+ }
90
+ return out;
91
+ }
92
+
93
+ interface BM25Index {
94
+ /** For each chunk: {token → tf}. */
95
+ readonly termFreqs: ReadonlyArray<ReadonlyMap<string, number>>;
96
+ /** Document length in tokens, per chunk. */
97
+ readonly docLengths: ReadonlyArray<number>;
98
+ /** Average document length over the corpus. */
99
+ readonly avgDocLength: number;
100
+ /** Inverse document frequency, per token. */
101
+ readonly idf: ReadonlyMap<string, number>;
102
+ readonly chunkCount: number;
103
+ }
104
+
105
+ function buildBM25Index(chunks: readonly ChunkRecord[]): BM25Index {
106
+ const termFreqs: Map<string, number>[] = [];
107
+ const docLengths: number[] = [];
108
+ const docFreq = new Map<string, number>();
109
+
110
+ for (const c of chunks) {
111
+ const tokens = tokenize(`${c.headings.join(" ")} ${c.text}`);
112
+ docLengths.push(tokens.length);
113
+ const tf = new Map<string, number>();
114
+ for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
115
+ termFreqs.push(tf);
116
+ for (const t of tf.keys()) docFreq.set(t, (docFreq.get(t) ?? 0) + 1);
117
+ }
118
+
119
+ const N = chunks.length;
120
+ const avgDocLength =
121
+ N > 0 ? docLengths.reduce((s, n) => s + n, 0) / N : 0;
122
+ const idf = new Map<string, number>();
123
+ for (const [term, df] of docFreq) {
124
+ // Robertson-Sparck-Jones IDF, clamped to non-negative for stability.
125
+ const v = Math.log(1 + (N - df + 0.5) / (df + 0.5));
126
+ idf.set(term, v);
127
+ }
128
+
129
+ return {
130
+ termFreqs,
131
+ docLengths,
132
+ avgDocLength,
133
+ idf,
134
+ chunkCount: N,
135
+ };
136
+ }
137
+
138
+ const BM25_K1 = 1.5;
139
+ const BM25_B = 0.75;
140
+
141
+ function bm25Score(
142
+ index: BM25Index,
143
+ docIdx: number,
144
+ queryTokens: readonly string[],
145
+ ): number {
146
+ const tf = index.termFreqs[docIdx];
147
+ const dl = index.docLengths[docIdx];
148
+ if (!tf || dl === undefined) return 0;
149
+ let score = 0;
150
+ for (const q of queryTokens) {
151
+ const f = tf.get(q);
152
+ if (!f) continue;
153
+ const idf = index.idf.get(q) ?? 0;
154
+ if (idf <= 0) continue;
155
+ const norm = 1 - BM25_B + BM25_B * (dl / Math.max(1, index.avgDocLength));
156
+ score += idf * ((f * (BM25_K1 + 1)) / (f + BM25_K1 * norm));
157
+ }
158
+ return score;
159
+ }
160
+
161
+ function bm25TopK(
162
+ index: BM25Index,
163
+ rawQuery: string,
164
+ k: number,
165
+ ): readonly { readonly idx: number; readonly score: number }[] {
166
+ const queryTokens = tokenize(rawQuery);
167
+ if (queryTokens.length === 0 || index.chunkCount === 0) return [];
168
+ const scored: { idx: number; score: number }[] = new Array(index.chunkCount);
169
+ for (let i = 0; i < index.chunkCount; i += 1) {
170
+ scored[i] = { idx: i, score: bm25Score(index, i, queryTokens) };
171
+ }
172
+ scored.sort((a, b) => b.score - a.score);
173
+ // Drop zero-score docs — they have no token overlap and skew the RRF
174
+ // blend toward arbitrary documents.
175
+ return scored.filter((s) => s.score > 0).slice(0, k);
176
+ }
177
+
178
+ // ─── Reciprocal Rank Fusion ────────────────────────────────────────────
179
+
180
+ interface RRFCandidate {
181
+ readonly idx: number;
182
+ readonly rrf: number;
183
+ readonly denseScore?: number;
184
+ }
185
+
186
+ function reciprocalRankFusion(
187
+ dense: readonly { readonly idx: number; readonly score: number }[],
188
+ lexical: readonly { readonly idx: number; readonly score: number }[],
189
+ ): readonly RRFCandidate[] {
190
+ const fused = new Map<number, { rrf: number; denseScore?: number }>();
191
+
192
+ dense.forEach(({ idx, score }, rank) => {
193
+ const entry = fused.get(idx) ?? { rrf: 0 };
194
+ entry.rrf += 1 / (RRF_K + rank + 1);
195
+ entry.denseScore = score;
196
+ fused.set(idx, entry);
197
+ });
198
+
199
+ lexical.forEach(({ idx }, rank) => {
200
+ const entry = fused.get(idx) ?? { rrf: 0 };
201
+ entry.rrf += 1 / (RRF_K + rank + 1);
202
+ fused.set(idx, entry);
203
+ });
204
+
205
+ // NOTE: must use `Array.from(fused, ...)` rather than `[...fused.entries()]`.
206
+ // Babel's docusaurus preset transpiles `[...iterable]` into
207
+ // `[].concat(iterable)`, which only spreads true Array instances —
208
+ // `Map.entries()` returns a Map Iterator that ends up wrapped as a single
209
+ // element. The downstream `.map(([idx, { rrf, ... }]) => ...)` then sees
210
+ // an iterator object instead of `[number, RRFEntry]` and explodes with
211
+ // "Cannot read properties of undefined (reading 'rrf')".
212
+ const out: RRFCandidate[] = [];
213
+ fused.forEach((entry, idx) => {
214
+ out.push({ idx, rrf: entry.rrf, denseScore: entry.denseScore });
215
+ });
216
+ out.sort((a, b) => b.rrf - a.rrf);
217
+ return out;
218
+ }
219
+
220
+ // ─── Maximal Marginal Relevance ────────────────────────────────────────
221
+
222
+ /**
223
+ * Re-rank candidates so picks with high relevance to the query but low
224
+ * redundancy with already-picked chunks rise to the top. Uses each
225
+ * candidate's stored unit vector for the redundancy term, falling back
226
+ * to the RRF score if no dense score is available.
227
+ */
228
+ function mmrReRank(
229
+ candidates: readonly RRFCandidate[],
230
+ vectors: Float32Array,
231
+ dim: number,
232
+ query: Float32Array,
233
+ k: number,
234
+ ): readonly { readonly idx: number; readonly score: number }[] {
235
+ if (candidates.length === 0) return [];
236
+ const remaining = [...candidates];
237
+ const picked: { idx: number; score: number }[] = [];
238
+ const pickedVecs: Float32Array[] = [];
239
+
240
+ // Cache candidate vectors as subarray views so we don't re-slice each pass.
241
+ const candVec = (idx: number): Float32Array =>
242
+ vectors.subarray(idx * dim, idx * dim + dim);
243
+
244
+ while (picked.length < k && remaining.length > 0) {
245
+ let bestPos = 0;
246
+ let bestScore = -Infinity;
247
+
248
+ for (let i = 0; i < remaining.length; i += 1) {
249
+ const c = remaining[i]!;
250
+ // Relevance: prefer the actual dense similarity when we have it; the
251
+ // RRF score is a relative ranking signal, not a magnitude.
252
+ const relevance =
253
+ c.denseScore !== undefined
254
+ ? c.denseScore
255
+ : c.rrf;
256
+
257
+ // Redundancy: max cosine to any already-picked vector.
258
+ let redundancy = 0;
259
+ const v = candVec(c.idx);
260
+ for (const pv of pickedVecs) {
261
+ let s = 0;
262
+ for (let d = 0; d < dim; d += 1) s += v[d]! * pv[d]!;
263
+ if (s > redundancy) redundancy = s;
264
+ }
265
+
266
+ const mmr = MMR_LAMBDA * relevance - (1 - MMR_LAMBDA) * redundancy;
267
+ if (mmr > bestScore) {
268
+ bestScore = mmr;
269
+ bestPos = i;
270
+ }
271
+ }
272
+
273
+ const winner = remaining.splice(bestPos, 1)[0]!;
274
+ picked.push({
275
+ idx: winner.idx,
276
+ // Surface the dense similarity (cosine in [-1, 1]) when available so
277
+ // the consumer can show a meaningful score to the user; fall back to
278
+ // a normalized version of RRF otherwise.
279
+ score: winner.denseScore ?? winner.rrf,
280
+ });
281
+ pickedVecs.push(candVec(winner.idx));
282
+ // Defensive: query-only relevance when no dense score is on this side.
283
+ void query;
284
+ }
285
+
286
+ return picked;
287
+ }
288
+
289
+ // ─── Token-budget enforcement ──────────────────────────────────────────
290
+
291
+ function enforceTokenBudget(
292
+ hits: readonly { readonly idx: number; readonly score: number }[],
293
+ chunks: readonly ChunkRecord[],
294
+ budget: number,
295
+ ): readonly { readonly idx: number; readonly score: number }[] {
296
+ if (budget <= 0) return hits;
297
+ const out: typeof hits = [] as never;
298
+ let used = 0;
299
+ for (const h of hits) {
300
+ const c = chunks[h.idx];
301
+ if (!c) continue;
302
+ const cost = c.tokens > 0 ? c.tokens : Math.ceil(c.text.length / 4);
303
+ if (used + cost > budget && out.length > 0) break;
304
+ (out as Array<typeof h>).push(h);
305
+ used += cost;
306
+ }
307
+ return out;
308
+ }
309
+
310
+ // ─── Public hook ───────────────────────────────────────────────────────
311
+
312
+ export interface RetrieveOptions {
313
+ /**
314
+ * Approximate token budget for the combined `[Context]` block. Pass
315
+ * `0` to disable. Defaults to `DEFAULT_CONTEXT_TOKEN_BUDGET`.
316
+ */
317
+ readonly tokenBudget?: number;
318
+ /**
319
+ * The original natural-language question, used for BM25 lexical
320
+ * scoring. If omitted, only dense (vector) retrieval runs.
321
+ */
322
+ readonly query?: string;
323
+ }
324
+
325
+ export function useRetrieval(
326
+ chunks: ChunkRecord[] | null,
327
+ vectors: Float32Array | null,
328
+ dim: number,
329
+ count: number,
330
+ ): (
331
+ query: Float32Array,
332
+ k: number,
333
+ options?: RetrieveOptions,
334
+ ) => RetrievedChunk[] {
335
+ // Build the BM25 index lazily and cache it for the lifetime of this
336
+ // chunks/vectors pair. Tokenisation cost scales with the corpus, but
337
+ // for our scale (~hundreds of chunks) this completes in a few ms.
338
+ const bm25 = useMemo(
339
+ () => (chunks && chunks.length > 0 ? buildBM25Index(chunks) : null),
340
+ [chunks],
341
+ );
342
+
343
+ return useCallback(
344
+ (query: Float32Array, k: number, options?: RetrieveOptions) => {
345
+ if (!chunks?.length || !vectors || count <= 0 || dim <= 0) {
346
+ return [];
347
+ }
348
+
349
+ const dense = denseTopK(
350
+ query,
351
+ vectors,
352
+ dim,
353
+ count,
354
+ Math.max(k, DENSE_CANDIDATE_POOL),
355
+ );
356
+ const lexical =
357
+ bm25 && options?.query
358
+ ? bm25TopK(bm25, options.query, LEXICAL_CANDIDATE_POOL)
359
+ : [];
360
+
361
+ const fused = reciprocalRankFusion(dense, lexical);
362
+ const reranked = mmrReRank(fused, vectors, dim, query, k);
363
+
364
+ const budget =
365
+ options?.tokenBudget !== undefined
366
+ ? options.tokenBudget
367
+ : DEFAULT_CONTEXT_TOKEN_BUDGET;
368
+ const trimmed = enforceTokenBudget(reranked, chunks, budget);
369
+
370
+ return trimmed.map(({ idx, score }) => ({
371
+ chunk: chunks[idx]!,
372
+ score,
373
+ }));
374
+ },
375
+ [chunks, vectors, dim, count, bm25],
376
+ );
377
+ }
@@ -0,0 +1,236 @@
1
+ import { useCallback, useEffect, useRef, useState } from "react";
2
+
3
+ import { voiceLog } from "./voiceDebug";
4
+
5
+ /**
6
+ * Async, non-typed import of `@ricky0123/vad-web`. We dynamic-import it so
7
+ * Docusaurus' SSR build never evaluates the AudioWorklet/onnxruntime-web
8
+ * dependency tree (already aliased to a stub for the server target — this
9
+ * extra layer keeps the client bundle async-only too).
10
+ */
11
+ type MicVADInstance = {
12
+ readonly start: () => Promise<void>;
13
+ readonly pause: () => Promise<void>;
14
+ readonly destroy: () => Promise<void>;
15
+ listening: boolean;
16
+ errored: string | null;
17
+ };
18
+
19
+ interface MicVADStatic {
20
+ new: (
21
+ options: Partial<{
22
+ onSpeechStart: () => void | Promise<void>;
23
+ onSpeechEnd: (audio: Float32Array) => void | Promise<void>;
24
+ onVADMisfire: () => void | Promise<void>;
25
+ model: "v5" | "legacy";
26
+ // Silero v5 thresholds — tuned for "stop talking immediately when
27
+ // they pause" rather than "wait for an obvious silence".
28
+ positiveSpeechThreshold: number;
29
+ negativeSpeechThreshold: number;
30
+ redemptionFrames: number;
31
+ preSpeechPadFrames: number;
32
+ minSpeechFrames: number;
33
+ }>,
34
+ ) => Promise<MicVADInstance>;
35
+ }
36
+
37
+ let cachedModule: { MicVAD: MicVADStatic } | null = null;
38
+ async function loadVadModule(): Promise<{ MicVAD: MicVADStatic }> {
39
+ if (cachedModule) return cachedModule;
40
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
41
+ const mod = (await import("@ricky0123/vad-web")) as any;
42
+ cachedModule = mod as { MicVAD: MicVADStatic };
43
+ return cachedModule;
44
+ }
45
+
46
+ export type SileroLoadStatus = "idle" | "loading" | "ready" | "error";
47
+
48
+ export interface UseSileroVADResult {
49
+ /** `true` once the VAD module + model are loaded and a `MicVAD` exists. */
50
+ readonly ready: boolean;
51
+ readonly loadStatus: SileroLoadStatus;
52
+ readonly listening: boolean;
53
+ readonly error: string | null;
54
+ /** Idempotent. Lazy-loads the VAD module + Silero model on first call. */
55
+ readonly start: () => Promise<void>;
56
+ /** Pause the underlying mic stream; safe to call when not started. */
57
+ readonly stop: () => Promise<void>;
58
+ /** Tear down VAD and release the mic. Use on unmount. */
59
+ readonly destroy: () => Promise<void>;
60
+ }
61
+
62
+ export interface UseSileroVADOptions {
63
+ /** Fired the moment Silero detects voiced audio above the threshold. */
64
+ readonly onSpeechStart?: () => void;
65
+ /**
66
+ * Fired when Silero detects the speaker has stopped (post-redemption).
67
+ * The `audio` argument is the captured speech segment (16 kHz mono
68
+ * Float32 in [-1, 1]); we don't currently use it because native STT
69
+ * and Whisper own their own audio pipelines, but it's there if a
70
+ * future revision wants to feed it directly into Whisper to drop
71
+ * push-to-talk entirely.
72
+ */
73
+ readonly onSpeechEnd?: (audio: Float32Array) => void;
74
+ /** Detected speech start but segment was below `minSpeechFrames`. */
75
+ readonly onMisfire?: () => void;
76
+ /**
77
+ * Disable the hook entirely (e.g. while the engine is loading or the
78
+ * user is in a non-voice mode). Keeps the React tree shape stable so
79
+ * the consuming component doesn't have to conditionally call hooks.
80
+ */
81
+ readonly enabled?: boolean;
82
+ }
83
+
84
+ /**
85
+ * Voice Activity Detection backed by Silero v5 via `@ricky0123/vad-web`.
86
+ *
87
+ * Silero gives us a hard speech-end signal in roughly the time it takes
88
+ * a human to draw a breath (~80–250 ms) instead of the legacy 1.2-second
89
+ * trailing-silence timer the chat used to fall back on. The win is most
90
+ * dramatic for follow-up questions where the user pauses naturally
91
+ * mid-sentence — the old timer would treat that as the end of the turn,
92
+ * the model would speak too early, and the user would talk over the
93
+ * assistant. With VAD redemption frames the model only fires on real
94
+ * silence.
95
+ *
96
+ * The hook owns its own `MicVAD` instance and mic stream; it doesn't
97
+ * compete with native STT or the Whisper recorder, which manage their
98
+ * own audio pipelines. Browsers grant mic permission once, so the user
99
+ * still sees only one prompt.
100
+ */
101
+ export function useSileroVAD(opts: UseSileroVADOptions = {}): UseSileroVADResult {
102
+ const { onSpeechStart, onSpeechEnd, onMisfire, enabled = true } = opts;
103
+
104
+ const [loadStatus, setLoadStatus] = useState<SileroLoadStatus>("idle");
105
+ const [listening, setListening] = useState(false);
106
+ const [error, setError] = useState<string | null>(null);
107
+
108
+ const vadRef = useRef<MicVADInstance | null>(null);
109
+ const onStartRef = useRef(onSpeechStart);
110
+ onStartRef.current = onSpeechStart;
111
+ const onEndRef = useRef(onSpeechEnd);
112
+ onEndRef.current = onSpeechEnd;
113
+ const onMisfireRef = useRef(onMisfire);
114
+ onMisfireRef.current = onMisfire;
115
+ const wantStartRef = useRef(false);
116
+
117
+ const ensureVad = useCallback(async (): Promise<MicVADInstance> => {
118
+ if (vadRef.current) return vadRef.current;
119
+ if (
120
+ typeof window === "undefined" ||
121
+ typeof window.AudioContext === "undefined" ||
122
+ typeof navigator === "undefined" ||
123
+ typeof navigator.mediaDevices?.getUserMedia !== "function"
124
+ ) {
125
+ // jsdom and SSR have neither AudioContext nor a real getUserMedia, so
126
+ // there is nothing the VAD can do. Surface a soft-disabled state so
127
+ // the consumer's `enabled` branch still works without firing a long
128
+ // chain of model-fetch errors.
129
+ throw new Error("AudioContext unavailable; VAD disabled");
130
+ }
131
+ setLoadStatus((s) => (s === "ready" ? s : "loading"));
132
+ setError(null);
133
+ const { MicVAD } = await loadVadModule();
134
+ voiceLog("vad.module-loaded");
135
+ const instance = await MicVAD.new({
136
+ // Silero v5 is the current default in vad-web; it's a strict
137
+ // upgrade over the legacy model in both accuracy and CPU cost
138
+ // (smaller model, runs in the worklet not the main thread).
139
+ model: "v5",
140
+ // The defaults are fine for most cases; tuning these hurts more
141
+ // than it helps. Documented here for future experimentation:
142
+ // positiveSpeechThreshold: 0.5
143
+ // negativeSpeechThreshold: 0.35
144
+ // redemptionFrames: 8 (≈260 ms at 32 ms frame size)
145
+ // preSpeechPadFrames: 1
146
+ // minSpeechFrames: 9 (≈300 ms; filters tongue clicks)
147
+ onSpeechStart: () => {
148
+ voiceLog("vad.speech-start");
149
+ onStartRef.current?.();
150
+ },
151
+ onSpeechEnd: (audio: Float32Array) => {
152
+ voiceLog("vad.speech-end", { samples: audio.length });
153
+ onEndRef.current?.(audio);
154
+ },
155
+ onVADMisfire: () => {
156
+ voiceLog("vad.misfire");
157
+ onMisfireRef.current?.();
158
+ },
159
+ });
160
+ vadRef.current = instance;
161
+ setLoadStatus("ready");
162
+ return instance;
163
+ }, []);
164
+
165
+ const start = useCallback(async (): Promise<void> => {
166
+ if (!enabled) return;
167
+ wantStartRef.current = true;
168
+ try {
169
+ const instance = await ensureVad();
170
+ if (!wantStartRef.current) return;
171
+ await instance.start();
172
+ setListening(true);
173
+ voiceLog("vad.started");
174
+ } catch (e) {
175
+ const msg = e instanceof Error ? e.message : "VAD start failed";
176
+ // "AudioContext unavailable" is the soft-disabled path used in
177
+ // jsdom/SSR — silence those so we don't pollute the console of
178
+ // every test run with VAD warnings.
179
+ if (msg !== "AudioContext unavailable; VAD disabled") {
180
+ voiceLog("vad.start.error", { message: msg });
181
+ setLoadStatus("error");
182
+ setError(msg);
183
+ } else {
184
+ setLoadStatus("error");
185
+ }
186
+ setListening(false);
187
+ }
188
+ }, [enabled, ensureVad]);
189
+
190
+ const stop = useCallback(async (): Promise<void> => {
191
+ wantStartRef.current = false;
192
+ const instance = vadRef.current;
193
+ if (!instance) return;
194
+ try {
195
+ await instance.pause();
196
+ } catch (e) {
197
+ voiceLog("vad.stop.error", {
198
+ message: e instanceof Error ? e.message : String(e),
199
+ });
200
+ }
201
+ setListening(false);
202
+ }, []);
203
+
204
+ const destroy = useCallback(async (): Promise<void> => {
205
+ wantStartRef.current = false;
206
+ const instance = vadRef.current;
207
+ vadRef.current = null;
208
+ setListening(false);
209
+ if (!instance) return;
210
+ try {
211
+ await instance.destroy();
212
+ } catch (e) {
213
+ voiceLog("vad.destroy.error", {
214
+ message: e instanceof Error ? e.message : String(e),
215
+ });
216
+ }
217
+ }, []);
218
+
219
+ // Tear down on unmount so the mic indicator clears even if the parent
220
+ // forgot to call destroy().
221
+ useEffect(() => {
222
+ return () => {
223
+ void destroy();
224
+ };
225
+ }, [destroy]);
226
+
227
+ return {
228
+ ready: loadStatus === "ready",
229
+ loadStatus,
230
+ listening,
231
+ error,
232
+ start,
233
+ stop,
234
+ destroy,
235
+ };
236
+ }