@kpritam/grimoire-output-docusaurus 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +25 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/internal/assets.d.ts +9 -0
- package/dist/internal/assets.js +50 -0
- package/dist/internal/docusaurusConfig.d.ts +9 -0
- package/dist/internal/docusaurusConfig.js +259 -0
- package/dist/internal/spellbookAssets.d.ts +39 -0
- package/dist/internal/spellbookAssets.js +68 -0
- package/dist/layer.d.ts +3 -0
- package/dist/layer.js +6 -0
- package/dist/shared.d.ts +10 -0
- package/dist/shared.js +36 -0
- package/dist/upstream.d.ts +6 -0
- package/dist/upstream.js +84 -0
- package/package.json +59 -0
- package/src/index.ts +1 -0
- package/src/internal/assets.ts +66 -0
- package/src/internal/docusaurusConfig.ts +281 -0
- package/src/internal/spellbookAssets.ts +80 -0
- package/src/layer.ts +12 -0
- package/src/shared.ts +43 -0
- package/src/upstream.ts +119 -0
- package/templates/spellbook/spellbookPlugin.ts +156 -0
- package/templates/spellbook/src/components/SpellbookChat/ChatEngine.ts +79 -0
- package/templates/spellbook/src/components/SpellbookChat/ChatErrorBoundary.tsx +65 -0
- package/templates/spellbook/src/components/SpellbookChat/Markdown.tsx +259 -0
- package/templates/spellbook/src/components/SpellbookChat/README.md +111 -0
- package/templates/spellbook/src/components/SpellbookChat/SettingsPanel.tsx +376 -0
- package/templates/spellbook/src/components/SpellbookChat/VoiceMode.tsx +867 -0
- package/templates/spellbook/src/components/SpellbookChat/index.tsx +744 -0
- package/templates/spellbook/src/components/SpellbookChat/markdown.module.css +343 -0
- package/templates/spellbook/src/components/SpellbookChat/secretStore.ts +106 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/anthropic.ts +36 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/createCloudProvider.ts +112 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/google.ts +33 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/index.ts +32 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/mapFinishReason.ts +23 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/ollama.ts +44 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/openai.ts +34 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/openaiRealtime.ts +320 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/types.ts +172 -0
- package/templates/spellbook/src/components/SpellbookChat/streamProviders/webllm.ts +214 -0
- package/templates/spellbook/src/components/SpellbookChat/styles.module.css +852 -0
- package/templates/spellbook/src/components/SpellbookChat/systemPrompt.ts +107 -0
- package/templates/spellbook/src/components/SpellbookChat/transformers-ssr-stub.ts +16 -0
- package/templates/spellbook/src/components/SpellbookChat/types.ts +52 -0
- package/templates/spellbook/src/components/SpellbookChat/useBundleLoader.ts +46 -0
- package/templates/spellbook/src/components/SpellbookChat/useChatEngine.ts +524 -0
- package/templates/spellbook/src/components/SpellbookChat/useEmbeddings.ts +147 -0
- package/templates/spellbook/src/components/SpellbookChat/useRetrieval.ts +377 -0
- package/templates/spellbook/src/components/SpellbookChat/useSileroVAD.ts +236 -0
- package/templates/spellbook/src/components/SpellbookChat/useSpeechRecognition.ts +271 -0
- package/templates/spellbook/src/components/SpellbookChat/useSpeechSynthesis.ts +229 -0
- package/templates/spellbook/src/components/SpellbookChat/useUnifiedSTT.ts +134 -0
- package/templates/spellbook/src/components/SpellbookChat/useWhisperSTT.ts +411 -0
- package/templates/spellbook/src/components/SpellbookChat/vad-ssr-stub.ts +25 -0
- package/templates/spellbook/src/components/SpellbookChat/voiceDebug.ts +60 -0
- package/templates/spellbook/src/components/SpellbookChat/voiceFsm.ts +196 -0
- package/templates/spellbook/src/components/SpellbookChat/voiceStyles.module.css +334 -0
- package/templates/spellbook/src/components/SpellbookChat/webllm-ssr-stub.ts +8 -0
- package/templates/spellbook/src/components/SpellbookChatDisabled.tsx +20 -0
- package/templates/spellbook/src/theme/Root.tsx +29 -0
|
@@ -0,0 +1,524 @@
|
|
|
1
|
+
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
|
2
|
+
|
|
3
|
+
import type {
|
|
4
|
+
AskFinishReason,
|
|
5
|
+
AskOptions,
|
|
6
|
+
AskResult,
|
|
7
|
+
ChatEngine,
|
|
8
|
+
EngineLoadingState,
|
|
9
|
+
UseChatEngine,
|
|
10
|
+
} from "./ChatEngine";
|
|
11
|
+
import type { BundleLoadResult } from "./useBundleLoader";
|
|
12
|
+
import { useBundleLoader } from "./useBundleLoader";
|
|
13
|
+
import type { Citation, ChunkRecord, RetrievedChunk } from "./types";
|
|
14
|
+
import { useEmbeddings } from "./useEmbeddings";
|
|
15
|
+
import { useRetrieval } from "./useRetrieval";
|
|
16
|
+
import {
|
|
17
|
+
getSecret,
|
|
18
|
+
hasSecret,
|
|
19
|
+
onSecretChange,
|
|
20
|
+
purgeLegacyKeyStorage,
|
|
21
|
+
} from "./secretStore";
|
|
22
|
+
import { loadProvider } from "./streamProviders/index";
|
|
23
|
+
import type {
|
|
24
|
+
ProviderConfig,
|
|
25
|
+
ProviderId,
|
|
26
|
+
StreamProvider,
|
|
27
|
+
} from "./streamProviders/types";
|
|
28
|
+
import { STORAGE_KEYS } from "./streamProviders/types";
|
|
29
|
+
import { buildPrompts } from "./systemPrompt";
|
|
30
|
+
|
|
31
|
+
const SETTINGS_EVENT = "grimoire-chat-settings";
|
|
32
|
+
|
|
33
|
+
function ls(): Storage | null {
|
|
34
|
+
try {
|
|
35
|
+
if (
|
|
36
|
+
typeof localStorage === "undefined" ||
|
|
37
|
+
typeof localStorage.getItem !== "function"
|
|
38
|
+
) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
return localStorage;
|
|
42
|
+
} catch {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function readActiveProviderId(): ProviderId {
|
|
48
|
+
const storage = ls();
|
|
49
|
+
const v = storage?.getItem(STORAGE_KEYS.activeProvider) ?? null;
|
|
50
|
+
if (
|
|
51
|
+
v === "anthropic" ||
|
|
52
|
+
v === "openai" ||
|
|
53
|
+
v === "openai-realtime" ||
|
|
54
|
+
v === "google" ||
|
|
55
|
+
v === "ollama" ||
|
|
56
|
+
v === "webllm"
|
|
57
|
+
) {
|
|
58
|
+
return v;
|
|
59
|
+
}
|
|
60
|
+
return "anthropic";
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function readProviderConfig(
|
|
64
|
+
id: ProviderId,
|
|
65
|
+
provider: StreamProvider,
|
|
66
|
+
): ProviderConfig {
|
|
67
|
+
const storage = ls();
|
|
68
|
+
const storedModel =
|
|
69
|
+
storage?.getItem(STORAGE_KEYS.field(id, "model"))?.trim() ?? "";
|
|
70
|
+
const model = storedModel || provider.models[0]?.id || "";
|
|
71
|
+
const baseUrl =
|
|
72
|
+
storage?.getItem(STORAGE_KEYS.field(id, "baseUrl"))?.trim() || undefined;
|
|
73
|
+
const tokenEndpoint =
|
|
74
|
+
storage
|
|
75
|
+
?.getItem(STORAGE_KEYS.field(id, "tokenEndpoint"))
|
|
76
|
+
?.trim() || undefined;
|
|
77
|
+
const apiKey = getSecret(id);
|
|
78
|
+
return { id, model, apiKey, baseUrl, tokenEndpoint };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Is the active provider ready to run? Cloud providers need an in-memory
|
|
83
|
+
* secret; local providers (ollama, webllm) just need a selected model.
|
|
84
|
+
* `openai-realtime` is satisfied by either an API key (insecure) OR a
|
|
85
|
+
* persisted token endpoint URL.
|
|
86
|
+
*/
|
|
87
|
+
function providerLooksConfigured(id: ProviderId): boolean {
|
|
88
|
+
if (id === "anthropic" || id === "openai" || id === "google") {
|
|
89
|
+
return hasSecret(id);
|
|
90
|
+
}
|
|
91
|
+
if (id === "openai-realtime") {
|
|
92
|
+
if (hasSecret(id)) return true;
|
|
93
|
+
const storage = ls();
|
|
94
|
+
const ep = storage
|
|
95
|
+
?.getItem(STORAGE_KEYS.field(id, "tokenEndpoint"))
|
|
96
|
+
?.trim();
|
|
97
|
+
return !!ep;
|
|
98
|
+
}
|
|
99
|
+
const storage = ls();
|
|
100
|
+
const model = storage?.getItem(STORAGE_KEYS.field(id, "model"))?.trim();
|
|
101
|
+
return !!model;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function uniqCitations(retrieved: readonly RetrievedChunk[]): Citation[] {
|
|
105
|
+
const byFile = new Map<string, Citation>();
|
|
106
|
+
for (const r of retrieved) {
|
|
107
|
+
const f = r.chunk.file;
|
|
108
|
+
if (!byFile.has(f)) {
|
|
109
|
+
byFile.set(f, {
|
|
110
|
+
file: f,
|
|
111
|
+
headings: r.chunk.headings,
|
|
112
|
+
sourceLink: r.chunk.sourceLink,
|
|
113
|
+
anchor: r.chunk.anchor,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// Avoid `[...byFile.values()]` — Docusaurus' Babel preset compiles array
|
|
118
|
+
// spread of a Map iterator to `[].concat(iter)`, which wraps the iterator
|
|
119
|
+
// in a single-element array instead of spreading it. Same hazard fixed in
|
|
120
|
+
// `reciprocalRankFusion` upstream; keeping the explicit forEach for safety.
|
|
121
|
+
const out: Citation[] = [];
|
|
122
|
+
byFile.forEach((c) => {
|
|
123
|
+
out.push(c);
|
|
124
|
+
});
|
|
125
|
+
return out;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function mapBundleError(e: unknown): string {
|
|
129
|
+
if (e instanceof Error && e.message === "BUNDLE_MISSING") {
|
|
130
|
+
return "Documentation index not found. Run `grimoire cast` to build it.";
|
|
131
|
+
}
|
|
132
|
+
if (e instanceof Error && e.message.startsWith("VECTOR_DIM_MISMATCH")) {
|
|
133
|
+
return "The documentation index looks corrupted (vector size mismatch). Rebuild it.";
|
|
134
|
+
}
|
|
135
|
+
if (e instanceof Error && e.message === "NO_WASM") {
|
|
136
|
+
return "Your browser doesn't support local embeddings. Please use a modern browser.";
|
|
137
|
+
}
|
|
138
|
+
return e instanceof Error ? e.message : String(e);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
interface BundleMeta {
|
|
142
|
+
readonly repo?: string;
|
|
143
|
+
readonly siteName?: string;
|
|
144
|
+
readonly siteTagline?: string;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export const useChatEngine: UseChatEngine = (): ChatEngine => {
|
|
148
|
+
const loadBundle = useBundleLoader();
|
|
149
|
+
const { loadEmbedder, embedQuery } = useEmbeddings();
|
|
150
|
+
|
|
151
|
+
const [state, setState] = useState<EngineLoadingState>("idle");
|
|
152
|
+
const [statusMessage, setStatusMessage] = useState("");
|
|
153
|
+
const [error, setError] = useState<string | undefined>();
|
|
154
|
+
const [chunkCount, setChunkCount] = useState(0);
|
|
155
|
+
const [bundleMeta, setBundleMeta] = useState<BundleMeta>({});
|
|
156
|
+
const [settingsTick, setSettingsTick] = useState(0);
|
|
157
|
+
|
|
158
|
+
const [chunks, setChunks] = useState<ChunkRecord[] | null>(null);
|
|
159
|
+
const [vectors, setVectors] = useState<Float32Array | null>(null);
|
|
160
|
+
const [dim, setDim] = useState(0);
|
|
161
|
+
const [count, setCount] = useState(0);
|
|
162
|
+
|
|
163
|
+
const loadGenRef = useRef(0);
|
|
164
|
+
const stateRef = useRef<EngineLoadingState>("idle");
|
|
165
|
+
stateRef.current = state;
|
|
166
|
+
|
|
167
|
+
// One-shot: wipe any legacy plaintext API keys a previous build persisted.
|
|
168
|
+
useEffect(() => {
|
|
169
|
+
purgeLegacyKeyStorage();
|
|
170
|
+
}, []);
|
|
171
|
+
|
|
172
|
+
const hasApiKey = useMemo(() => {
|
|
173
|
+
void settingsTick;
|
|
174
|
+
return providerLooksConfigured(readActiveProviderId());
|
|
175
|
+
}, [settingsTick]);
|
|
176
|
+
|
|
177
|
+
// Re-check config when the settings form saves or the in-memory secret
|
|
178
|
+
// for the active provider changes.
|
|
179
|
+
useEffect(() => {
|
|
180
|
+
const bump = (): void => setSettingsTick((t) => t + 1);
|
|
181
|
+
window.addEventListener(SETTINGS_EVENT, bump);
|
|
182
|
+
const offSecret = onSecretChange(bump);
|
|
183
|
+
return () => {
|
|
184
|
+
window.removeEventListener(SETTINGS_EVENT, bump);
|
|
185
|
+
offSecret();
|
|
186
|
+
};
|
|
187
|
+
}, []);
|
|
188
|
+
|
|
189
|
+
const retrieve = useRetrieval(chunks, vectors, dim, count);
|
|
190
|
+
|
|
191
|
+
const finishLoad = useCallback((bundle: BundleLoadResult) => {
|
|
192
|
+
setBundleMeta({
|
|
193
|
+
repo: bundle.manifest.repo,
|
|
194
|
+
siteName: bundle.manifest.siteName,
|
|
195
|
+
siteTagline: bundle.manifest.siteTagline,
|
|
196
|
+
});
|
|
197
|
+
setChunkCount(bundle.manifest.count);
|
|
198
|
+
setState("ready");
|
|
199
|
+
setStatusMessage("Ready.");
|
|
200
|
+
}, []);
|
|
201
|
+
|
|
202
|
+
const runLoad = useCallback(async () => {
|
|
203
|
+
const gen = ++loadGenRef.current;
|
|
204
|
+
const alive = (): boolean => gen === loadGenRef.current;
|
|
205
|
+
|
|
206
|
+
setError(undefined);
|
|
207
|
+
setChunks(null);
|
|
208
|
+
setVectors(null);
|
|
209
|
+
setDim(0);
|
|
210
|
+
setCount(0);
|
|
211
|
+
setChunkCount(0);
|
|
212
|
+
setBundleMeta({});
|
|
213
|
+
|
|
214
|
+
const providerId = readActiveProviderId();
|
|
215
|
+
|
|
216
|
+
if (typeof WebAssembly === "undefined") {
|
|
217
|
+
if (!alive()) return;
|
|
218
|
+
setError(mapBundleError(new Error("NO_WASM")));
|
|
219
|
+
setState("error");
|
|
220
|
+
setStatusMessage("");
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (!alive()) return;
|
|
225
|
+
setState("loading-bundle");
|
|
226
|
+
setStatusMessage("Preparing provider…");
|
|
227
|
+
|
|
228
|
+
let provider: StreamProvider;
|
|
229
|
+
try {
|
|
230
|
+
provider = await loadProvider(providerId);
|
|
231
|
+
} catch (e) {
|
|
232
|
+
if (!alive()) return;
|
|
233
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
234
|
+
setError(msg);
|
|
235
|
+
setState("error");
|
|
236
|
+
setStatusMessage("");
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (!alive()) return;
|
|
241
|
+
|
|
242
|
+
const config = readProviderConfig(providerId, provider);
|
|
243
|
+
const validationError = provider.validateConfig(config);
|
|
244
|
+
if (validationError) {
|
|
245
|
+
if (!alive()) return;
|
|
246
|
+
setState("missing-key");
|
|
247
|
+
setStatusMessage(validationError);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
try {
|
|
252
|
+
await provider.preload?.(config, (info) => {
|
|
253
|
+
if (!alive()) return;
|
|
254
|
+
const pct =
|
|
255
|
+
info.fraction !== undefined
|
|
256
|
+
? ` ${Math.round(info.fraction * 100)}%`
|
|
257
|
+
: "";
|
|
258
|
+
setStatusMessage(`${info.message}${pct}`);
|
|
259
|
+
});
|
|
260
|
+
} catch (e) {
|
|
261
|
+
if (!alive()) return;
|
|
262
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
263
|
+
setError(msg);
|
|
264
|
+
setState("error");
|
|
265
|
+
setStatusMessage("");
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
if (!alive()) return;
|
|
270
|
+
setStatusMessage("Fetching documentation index…");
|
|
271
|
+
|
|
272
|
+
let bundle: BundleLoadResult;
|
|
273
|
+
try {
|
|
274
|
+
bundle = await loadBundle();
|
|
275
|
+
} catch (e) {
|
|
276
|
+
if (!alive()) return;
|
|
277
|
+
const msg = mapBundleError(e);
|
|
278
|
+
setError(msg);
|
|
279
|
+
setState("error");
|
|
280
|
+
setStatusMessage("");
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (!alive()) return;
|
|
285
|
+
|
|
286
|
+
setChunks(bundle.chunks);
|
|
287
|
+
setVectors(bundle.vectors);
|
|
288
|
+
setDim(bundle.manifest.dim);
|
|
289
|
+
setCount(bundle.manifest.count);
|
|
290
|
+
|
|
291
|
+
setState("loading-model");
|
|
292
|
+
setStatusMessage("Loading search model…");
|
|
293
|
+
|
|
294
|
+
try {
|
|
295
|
+
// Use whatever model the bundle manifest names — the build-time
|
|
296
|
+
// pipeline (`grimoire cast`) writes this id, and queries MUST be
|
|
297
|
+
// embedded with the same model the chunks were embedded with for
|
|
298
|
+
// dot-product = cosine to hold.
|
|
299
|
+
await loadEmbedder(bundle.manifest.model, (fraction) => {
|
|
300
|
+
if (!alive()) return;
|
|
301
|
+
setStatusMessage(`Loading search model… ${Math.round(fraction * 100)}%`);
|
|
302
|
+
});
|
|
303
|
+
} catch (e) {
|
|
304
|
+
if (!alive()) return;
|
|
305
|
+
const msg = mapBundleError(e);
|
|
306
|
+
setError(msg);
|
|
307
|
+
setState("error");
|
|
308
|
+
setStatusMessage("");
|
|
309
|
+
setChunks(null);
|
|
310
|
+
setVectors(null);
|
|
311
|
+
setDim(0);
|
|
312
|
+
setCount(0);
|
|
313
|
+
return;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (!alive()) return;
|
|
317
|
+
finishLoad(bundle);
|
|
318
|
+
}, [loadBundle, loadEmbedder, finishLoad]);
|
|
319
|
+
|
|
320
|
+
const preload = useCallback(() => {
|
|
321
|
+
if (stateRef.current === "ready") return;
|
|
322
|
+
void runLoad();
|
|
323
|
+
}, [runLoad]);
|
|
324
|
+
|
|
325
|
+
useEffect(() => {
|
|
326
|
+
const onSettings = (): void => {
|
|
327
|
+
void runLoad();
|
|
328
|
+
};
|
|
329
|
+
if (typeof window === "undefined") return;
|
|
330
|
+
window.addEventListener(SETTINGS_EVENT, onSettings);
|
|
331
|
+
return () => window.removeEventListener(SETTINGS_EVENT, onSettings);
|
|
332
|
+
}, [runLoad]);
|
|
333
|
+
|
|
334
|
+
const ask = useCallback(
|
|
335
|
+
async (question: string, opts?: AskOptions): Promise<AskResult> => {
|
|
336
|
+
if (state !== "ready") {
|
|
337
|
+
throw new Error(
|
|
338
|
+
state === "missing-key"
|
|
339
|
+
? "Add your AI provider key in Settings."
|
|
340
|
+
: "The assistant is not ready yet.",
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const trimmed = question.trim();
|
|
345
|
+
if (!trimmed) {
|
|
346
|
+
throw new Error("Ask something first.");
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
const providerId = readActiveProviderId();
|
|
350
|
+
const provider = await loadProvider(providerId);
|
|
351
|
+
const config = readProviderConfig(providerId, provider);
|
|
352
|
+
const bad = provider.validateConfig(config);
|
|
353
|
+
if (bad) {
|
|
354
|
+
throw new Error(bad);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const t0 = performance.now();
|
|
358
|
+
const queryVec = await embedQuery(trimmed);
|
|
359
|
+
// Hybrid retrieval: dense (vector) + lexical (BM25) candidates fused
|
|
360
|
+
// with RRF, then MMR re-ranks for diversity, then a token budget
|
|
361
|
+
// trims the tail. `query` lets the BM25 side actually run; without
|
|
362
|
+
// it retrieval falls back to pure vector search.
|
|
363
|
+
const retrieved = retrieve(queryVec, 6, { query: trimmed });
|
|
364
|
+
const { system, user } = buildPrompts({
|
|
365
|
+
project: {
|
|
366
|
+
name: bundleMeta.siteName,
|
|
367
|
+
tagline: bundleMeta.siteTagline,
|
|
368
|
+
repo: bundleMeta.repo,
|
|
369
|
+
},
|
|
370
|
+
question: trimmed,
|
|
371
|
+
retrieved,
|
|
372
|
+
});
|
|
373
|
+
const citations = uniqCitations(retrieved);
|
|
374
|
+
|
|
375
|
+
// Stitch prior turns onto the front of the messages array. Filter out
|
|
376
|
+
// empty/whitespace-only entries (defensive — voice mode can briefly
|
|
377
|
+
// produce empty assistant bubbles before the first token arrives).
|
|
378
|
+
const priorTurns = (opts?.history ?? [])
|
|
379
|
+
.filter(
|
|
380
|
+
(t) =>
|
|
381
|
+
(t.role === "user" || t.role === "assistant") &&
|
|
382
|
+
typeof t.content === "string" &&
|
|
383
|
+
t.content.trim().length > 0,
|
|
384
|
+
)
|
|
385
|
+
.map((t) => ({ role: t.role, content: t.content }));
|
|
386
|
+
|
|
387
|
+
let answer = "";
|
|
388
|
+
let inputTokensApprox = Math.ceil((system.length + user.length) / 4);
|
|
389
|
+
let outputTokensApprox = 0;
|
|
390
|
+
let finishReason: AskFinishReason = "stop";
|
|
391
|
+
|
|
392
|
+
try {
|
|
393
|
+
for await (const ev of provider.stream(
|
|
394
|
+
{
|
|
395
|
+
system,
|
|
396
|
+
messages: [...priorTurns, { role: "user", content: user }],
|
|
397
|
+
maxTokens: 1024,
|
|
398
|
+
temperature: 0.4,
|
|
399
|
+
signal: opts?.signal,
|
|
400
|
+
},
|
|
401
|
+
config,
|
|
402
|
+
)) {
|
|
403
|
+
if (!ev || typeof ev.type !== "string") continue;
|
|
404
|
+
if (opts?.signal?.aborted) {
|
|
405
|
+
finishReason = "abort";
|
|
406
|
+
break;
|
|
407
|
+
}
|
|
408
|
+
if (ev.type === "text-delta") {
|
|
409
|
+
const piece = typeof ev.text === "string" ? ev.text : "";
|
|
410
|
+
if (piece.length === 0) continue;
|
|
411
|
+
answer += piece;
|
|
412
|
+
if (opts?.onToken) {
|
|
413
|
+
try {
|
|
414
|
+
opts.onToken({ text: piece });
|
|
415
|
+
} catch (cbErr) {
|
|
416
|
+
if (typeof console !== "undefined") {
|
|
417
|
+
console.error("[chat] onToken handler threw", cbErr);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
} else if (ev.type === "finish") {
|
|
422
|
+
if (
|
|
423
|
+
typeof ev.inputTokens === "number" &&
|
|
424
|
+
typeof ev.outputTokens === "number"
|
|
425
|
+
) {
|
|
426
|
+
inputTokensApprox = ev.inputTokens;
|
|
427
|
+
outputTokensApprox = ev.outputTokens;
|
|
428
|
+
} else {
|
|
429
|
+
outputTokensApprox = Math.ceil(answer.length / 4);
|
|
430
|
+
}
|
|
431
|
+
if (typeof ev.finishReason === "string") {
|
|
432
|
+
finishReason = ev.finishReason as AskFinishReason;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
} catch (err) {
|
|
437
|
+
if (opts?.signal?.aborted || (err as Error)?.name === "AbortError") {
|
|
438
|
+
finishReason = "abort";
|
|
439
|
+
} else {
|
|
440
|
+
throw err;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
const durationMs = Math.round(performance.now() - t0);
|
|
445
|
+
|
|
446
|
+
return {
|
|
447
|
+
answer,
|
|
448
|
+
citations,
|
|
449
|
+
inputTokensApprox,
|
|
450
|
+
outputTokensApprox,
|
|
451
|
+
durationMs,
|
|
452
|
+
finishReason,
|
|
453
|
+
};
|
|
454
|
+
},
|
|
455
|
+
[state, bundleMeta, embedQuery, retrieve],
|
|
456
|
+
);
|
|
457
|
+
|
|
458
|
+
return useMemo(
|
|
459
|
+
(): ChatEngine => ({
|
|
460
|
+
state,
|
|
461
|
+
statusMessage,
|
|
462
|
+
error,
|
|
463
|
+
chunkCount,
|
|
464
|
+
repo: bundleMeta.repo,
|
|
465
|
+
hasApiKey,
|
|
466
|
+
ask,
|
|
467
|
+
preload,
|
|
468
|
+
}),
|
|
469
|
+
[state, statusMessage, error, chunkCount, bundleMeta, hasApiKey, ask, preload],
|
|
470
|
+
);
|
|
471
|
+
};
|
|
472
|
+
|
|
473
|
+
export function notifySettingsChanged(): void {
|
|
474
|
+
if (typeof window !== "undefined") {
|
|
475
|
+
window.dispatchEvent(new Event(SETTINGS_EVENT));
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
export interface ActiveProviderInfo {
|
|
480
|
+
readonly displayName: string;
|
|
481
|
+
readonly modelLabel: string;
|
|
482
|
+
readonly providerId: ProviderId;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
export function useActiveProvider(): ActiveProviderInfo {
|
|
486
|
+
const [info, setInfo] = useState<ActiveProviderInfo>(() => ({
|
|
487
|
+
displayName: "…",
|
|
488
|
+
modelLabel: "…",
|
|
489
|
+
providerId: readActiveProviderId(),
|
|
490
|
+
}));
|
|
491
|
+
|
|
492
|
+
const [rev, setRev] = useState(0);
|
|
493
|
+
|
|
494
|
+
useEffect(() => {
|
|
495
|
+
const bump = (): void => setRev((r) => r + 1);
|
|
496
|
+
window.addEventListener(SETTINGS_EVENT, bump);
|
|
497
|
+
const offSecret = onSecretChange(bump);
|
|
498
|
+
return () => {
|
|
499
|
+
window.removeEventListener(SETTINGS_EVENT, bump);
|
|
500
|
+
offSecret();
|
|
501
|
+
};
|
|
502
|
+
}, []);
|
|
503
|
+
|
|
504
|
+
useEffect(() => {
|
|
505
|
+
const id = readActiveProviderId();
|
|
506
|
+
let cancelled = false;
|
|
507
|
+
void loadProvider(id).then((p) => {
|
|
508
|
+
if (cancelled) return;
|
|
509
|
+
const cfg = readProviderConfig(id, p);
|
|
510
|
+
const label =
|
|
511
|
+
p.models.find((m) => m.id === cfg.model)?.label ?? cfg.model;
|
|
512
|
+
setInfo({
|
|
513
|
+
displayName: p.displayName,
|
|
514
|
+
modelLabel: label,
|
|
515
|
+
providerId: id,
|
|
516
|
+
});
|
|
517
|
+
});
|
|
518
|
+
return () => {
|
|
519
|
+
cancelled = true;
|
|
520
|
+
};
|
|
521
|
+
}, [rev]);
|
|
522
|
+
|
|
523
|
+
return info;
|
|
524
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { useCallback, useRef } from "react";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Vector dimension of every embedding the chat pipeline produces. Both the
|
|
5
|
+
* legacy MiniLM-L6 model and the current Snowflake Arctic-Embed-XS default
|
|
6
|
+
* emit 384-d float vectors, and so does every other 384-d alternative we
|
|
7
|
+
* recommend. Anything else won't slot into the existing `vectors.bin`
|
|
8
|
+
* format, so we hard-fail at query time rather than silently corrupt
|
|
9
|
+
* retrieval scores.
|
|
10
|
+
*/
|
|
11
|
+
const EXPECTED_DIM = 384;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Default model used by browsers built before the manifest carried the
|
|
15
|
+
* model id. Older bundles (manifest.version === 1, no `model` ever
|
|
16
|
+
* required) used MiniLM-L6; new bundles always carry a model id, but we
|
|
17
|
+
* keep this fallback so a stale manifest doesn't crash the chat.
|
|
18
|
+
*/
|
|
19
|
+
const FALLBACK_MODEL_ID = "Snowflake/snowflake-arctic-embed-xs";
|
|
20
|
+
|
|
21
|
+
type FeaturePipeline = {
|
|
22
|
+
(text: string, options: { pooling: string; normalize: boolean }): Promise<unknown>;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const sharedPipelineCache = new Map<string, Promise<FeaturePipeline>>();
|
|
26
|
+
|
|
27
|
+
function toFloat32(raw: unknown): Float32Array {
|
|
28
|
+
if (raw instanceof Float32Array) {
|
|
29
|
+
return raw;
|
|
30
|
+
}
|
|
31
|
+
if (Array.isArray(raw)) {
|
|
32
|
+
return Float32Array.from(raw.flat(Infinity) as number[]);
|
|
33
|
+
}
|
|
34
|
+
const t = raw as { data?: unknown; dims?: readonly number[]; tolist?: () => unknown };
|
|
35
|
+
if (t?.data instanceof Float32Array) {
|
|
36
|
+
return t.data;
|
|
37
|
+
}
|
|
38
|
+
if (Array.isArray(t?.data)) {
|
|
39
|
+
return Float32Array.from((t.data as number[][]).flat());
|
|
40
|
+
}
|
|
41
|
+
if (typeof t?.tolist === "function") {
|
|
42
|
+
const nested = t.tolist() as number[] | number[][];
|
|
43
|
+
if (Array.isArray(nested) && nested.length > 0 && typeof nested[0] === "number") {
|
|
44
|
+
return Float32Array.from(nested as number[]);
|
|
45
|
+
}
|
|
46
|
+
return Float32Array.from((nested as number[][]).flat());
|
|
47
|
+
}
|
|
48
|
+
throw new Error("Unexpected embedder output shape");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* L2-normalize a vector into a fresh `Float32Array(dim)`. Defensive against
|
|
53
|
+
* the case where the upstream pipeline silently disables `normalize: true`
|
|
54
|
+
* (or ships per-token tensors instead of pooled vectors); without this,
|
|
55
|
+
* dot-product retrieval scores stop equalling cosine similarity and
|
|
56
|
+
* top-k results quietly degrade.
|
|
57
|
+
*/
|
|
58
|
+
function l2Normalize(vec: Float32Array): Float32Array {
|
|
59
|
+
let sum = 0;
|
|
60
|
+
for (let i = 0; i < vec.length; i++) {
|
|
61
|
+
const v = vec[i]!;
|
|
62
|
+
sum += v * v;
|
|
63
|
+
}
|
|
64
|
+
const inv = sum > 0 ? 1 / Math.sqrt(sum) : 1;
|
|
65
|
+
const out = new Float32Array(vec.length);
|
|
66
|
+
for (let i = 0; i < vec.length; i++) out[i] = vec[i]! * inv;
|
|
67
|
+
return out;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export type EmbeddingProgress = (fraction: number) => void;
|
|
71
|
+
|
|
72
|
+
export interface UseEmbeddingsApi {
|
|
73
|
+
/**
|
|
74
|
+
* Download / instantiate the embedding model named in the bundle
|
|
75
|
+
* manifest. Returns a callable pipeline that can produce 384-d vectors.
|
|
76
|
+
* Calling with a different `modelId` than a prior call swaps the
|
|
77
|
+
* active model — useful if the user reloads the chat panel after a
|
|
78
|
+
* fresh `grimoire cast` that changed the embedding model.
|
|
79
|
+
*/
|
|
80
|
+
readonly loadEmbedder: (
|
|
81
|
+
modelId?: string,
|
|
82
|
+
onProgress?: EmbeddingProgress,
|
|
83
|
+
) => Promise<FeaturePipeline>;
|
|
84
|
+
readonly embedQuery: (query: string) => Promise<Float32Array>;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export function useEmbeddings(): UseEmbeddingsApi {
|
|
88
|
+
const pipelineRef = useRef<FeaturePipeline | null>(null);
|
|
89
|
+
|
|
90
|
+
const loadEmbedder = useCallback(
|
|
91
|
+
async (modelId?: string, onProgress?: EmbeddingProgress) => {
|
|
92
|
+
if (typeof WebAssembly === "undefined") {
|
|
93
|
+
throw new Error("NO_WASM");
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const id = modelId ?? FALLBACK_MODEL_ID;
|
|
97
|
+
const { pipeline, env } = await import("@huggingface/transformers");
|
|
98
|
+
env.allowLocalModels = false;
|
|
99
|
+
env.useBrowserCache = true;
|
|
100
|
+
|
|
101
|
+
let entry = sharedPipelineCache.get(id);
|
|
102
|
+
if (!entry) {
|
|
103
|
+
entry = pipeline("feature-extraction", id, {
|
|
104
|
+
dtype: "fp32",
|
|
105
|
+
progress_callback: (report: { status?: string; progress?: number }) => {
|
|
106
|
+
if (
|
|
107
|
+
(report.status === "progress" || report.status === "progress_total") &&
|
|
108
|
+
typeof report.progress === "number" &&
|
|
109
|
+
onProgress
|
|
110
|
+
) {
|
|
111
|
+
onProgress(Math.min(1, Math.max(0, report.progress / 100)));
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
}) as Promise<FeaturePipeline>;
|
|
115
|
+
sharedPipelineCache.set(id, entry);
|
|
116
|
+
} else if (onProgress) {
|
|
117
|
+
onProgress(1);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const pipe = await entry;
|
|
121
|
+
pipelineRef.current = pipe;
|
|
122
|
+
return pipe;
|
|
123
|
+
},
|
|
124
|
+
[],
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
const embedQuery = useCallback(async (query: string) => {
|
|
128
|
+
const pipe = pipelineRef.current;
|
|
129
|
+
if (!pipe) {
|
|
130
|
+
throw new Error("EMBEDDER_NOT_READY");
|
|
131
|
+
}
|
|
132
|
+
const out = await pipe(query, { pooling: "mean", normalize: true });
|
|
133
|
+
const vec = toFloat32(out);
|
|
134
|
+
if (vec.length !== EXPECTED_DIM) {
|
|
135
|
+
throw new Error(
|
|
136
|
+
`EMBED_DIM_MISMATCH: expected ${EXPECTED_DIM}, got ${vec.length}. ` +
|
|
137
|
+
"The model named in the bundle manifest produced an unexpected " +
|
|
138
|
+
"vector size — either the wrong model is being downloaded or " +
|
|
139
|
+
"the pipeline returned a per-token tensor instead of a pooled " +
|
|
140
|
+
"vector.",
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
return l2Normalize(vec);
|
|
144
|
+
}, []);
|
|
145
|
+
|
|
146
|
+
return { loadEmbedder, embedQuery };
|
|
147
|
+
}
|