sagedesk 2.1.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -6
- package/dist/next/{SageDeskWidget-SJVE6QK3.js → SageDeskWidget-TNVTYDDX.js} +20 -4
- package/dist/next/SageDeskWidget-TNVTYDDX.js.map +1 -0
- package/dist/next/index.cjs +19 -3
- package/dist/next/index.cjs.map +1 -1
- package/dist/next/index.js +1 -1
- package/dist/react/index.cjs +19 -3
- package/dist/react/index.cjs.map +1 -1
- package/dist/react/index.js +19 -3
- package/dist/react/index.js.map +1 -1
- package/dist/server/index.cjs +23 -120
- package/dist/server/index.cjs.map +1 -1
- package/dist/server/index.d.cts +13 -7
- package/dist/server/index.d.ts +13 -7
- package/dist/server/index.js +23 -110
- package/dist/server/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/next/SageDeskWidget-SJVE6QK3.js.map +0 -1
package/dist/server/index.cjs
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __create = Object.create;
|
|
3
2
|
var __defProp = Object.defineProperty;
|
|
4
3
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
4
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
7
5
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
6
|
var __export = (target, all) => {
|
|
9
7
|
for (var name in all)
|
|
@@ -17,14 +15,6 @@ var __copyProps = (to, from, except, desc) => {
|
|
|
17
15
|
}
|
|
18
16
|
return to;
|
|
19
17
|
};
|
|
20
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
-
mod
|
|
27
|
-
));
|
|
28
18
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
19
|
|
|
30
20
|
// src/server/index.ts
|
|
@@ -36,96 +26,6 @@ __export(server_exports, {
|
|
|
36
26
|
module.exports = __toCommonJS(server_exports);
|
|
37
27
|
var import_fs = require("fs");
|
|
38
28
|
|
|
39
|
-
// src/core/server-embedder.ts
|
|
40
|
-
var HF_MODEL_IDS = {
|
|
41
|
-
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
|
42
|
-
"bge-small-en-v1-5": "BAAI/bge-small-en-v1.5",
|
|
43
|
-
"paraphrase-multilingual-MiniLM-L12-v2": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
|
44
|
-
"all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2"
|
|
45
|
-
};
|
|
46
|
-
var ServerEmbedder = class _ServerEmbedder {
|
|
47
|
-
constructor() {
|
|
48
|
-
this._ready = false;
|
|
49
|
-
this._failed = false;
|
|
50
|
-
this._model = "all-MiniLM-L6-v2";
|
|
51
|
-
}
|
|
52
|
-
static {
|
|
53
|
-
// Module-level singleton cache — survives across Lambda/Vercel warm invocations
|
|
54
|
-
this._pipelineCache = /* @__PURE__ */ new Map();
|
|
55
|
-
}
|
|
56
|
-
static {
|
|
57
|
-
this._loadingPromises = /* @__PURE__ */ new Map();
|
|
58
|
-
}
|
|
59
|
-
async load(model = "all-MiniLM-L6-v2") {
|
|
60
|
-
if (this._ready) return;
|
|
61
|
-
if (this._failed) throw new Error("ServerEmbedder previously failed to load");
|
|
62
|
-
this._model = model;
|
|
63
|
-
if (_ServerEmbedder._pipelineCache.has(model)) {
|
|
64
|
-
this._ready = true;
|
|
65
|
-
return;
|
|
66
|
-
}
|
|
67
|
-
if (_ServerEmbedder._loadingPromises.has(model)) {
|
|
68
|
-
await _ServerEmbedder._loadingPromises.get(model);
|
|
69
|
-
this._ready = true;
|
|
70
|
-
return;
|
|
71
|
-
}
|
|
72
|
-
const modelId = HF_MODEL_IDS[model];
|
|
73
|
-
const loadPromise = this._loadModel(model, modelId);
|
|
74
|
-
_ServerEmbedder._loadingPromises.set(model, loadPromise);
|
|
75
|
-
try {
|
|
76
|
-
await loadPromise;
|
|
77
|
-
this._ready = true;
|
|
78
|
-
} catch (err) {
|
|
79
|
-
this._failed = true;
|
|
80
|
-
_ServerEmbedder._loadingPromises.delete(model);
|
|
81
|
-
throw err;
|
|
82
|
-
} finally {
|
|
83
|
-
_ServerEmbedder._loadingPromises.delete(model);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
async _loadModel(model, modelId) {
|
|
87
|
-
try {
|
|
88
|
-
const { pipeline } = await import("@huggingface/transformers");
|
|
89
|
-
const pipelineInstance = await pipeline("feature-extraction", modelId, {
|
|
90
|
-
dtype: "q8",
|
|
91
|
-
device: "wasm"
|
|
92
|
-
});
|
|
93
|
-
_ServerEmbedder._pipelineCache.set(model, pipelineInstance);
|
|
94
|
-
} catch (err) {
|
|
95
|
-
throw new Error(`Failed to load embedding model ${modelId}: ${String(err)}`);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
async embed(text) {
|
|
99
|
-
if (!this._ready) {
|
|
100
|
-
await this.load(this._model);
|
|
101
|
-
}
|
|
102
|
-
const pipelineInstance = _ServerEmbedder._pipelineCache.get(this._model);
|
|
103
|
-
if (!pipelineInstance) {
|
|
104
|
-
throw new Error(`Embedding model ${this._model} not loaded`);
|
|
105
|
-
}
|
|
106
|
-
try {
|
|
107
|
-
const output = await pipelineInstance(text, {
|
|
108
|
-
pooling: "mean",
|
|
109
|
-
normalize: true
|
|
110
|
-
});
|
|
111
|
-
return output.data;
|
|
112
|
-
} catch (err) {
|
|
113
|
-
throw new Error(`Embedding failed: ${String(err)}`);
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
get isReady() {
|
|
117
|
-
return this._ready;
|
|
118
|
-
}
|
|
119
|
-
get hasFailed() {
|
|
120
|
-
return this._failed;
|
|
121
|
-
}
|
|
122
|
-
/** @internal - Reset for testing */
|
|
123
|
-
static _reset() {
|
|
124
|
-
_ServerEmbedder._pipelineCache.clear();
|
|
125
|
-
_ServerEmbedder._loadingPromises.clear();
|
|
126
|
-
}
|
|
127
|
-
};
|
|
128
|
-
|
|
129
29
|
// src/core/search.ts
|
|
130
30
|
function dotProduct(a, b) {
|
|
131
31
|
if (a.length !== b.length) {
|
|
@@ -191,7 +91,6 @@ var PROVIDER_URLS = {
|
|
|
191
91
|
};
|
|
192
92
|
var DEFAULT_SYSTEM_PROMPT = "You are a helpful support assistant. Answer the user's question based ONLY on the provided context. If the context does not contain a confident answer, respond with a friendly message saying you don't have that information right now. Do not make up information or draw from outside knowledge. Be concise, warm, and helpful.";
|
|
193
93
|
var indexCache = /* @__PURE__ */ new Map();
|
|
194
|
-
var embedderCache = /* @__PURE__ */ new Map();
|
|
195
94
|
function loadIndexFile(indexPath) {
|
|
196
95
|
if (indexCache.has(indexPath)) return indexCache.get(indexPath);
|
|
197
96
|
const raw = (0, import_fs.readFileSync)(indexPath, "utf-8");
|
|
@@ -206,13 +105,6 @@ function loadIndexFile(indexPath) {
|
|
|
206
105
|
indexCache.set(indexPath, chunks);
|
|
207
106
|
return chunks;
|
|
208
107
|
}
|
|
209
|
-
async function getEmbedder(model = "all-MiniLM-L6-v2") {
|
|
210
|
-
if (embedderCache.has(model)) return embedderCache.get(model);
|
|
211
|
-
const embedder = new ServerEmbedder();
|
|
212
|
-
await embedder.load(model);
|
|
213
|
-
embedderCache.set(model, embedder);
|
|
214
|
-
return embedder;
|
|
215
|
-
}
|
|
216
108
|
function classifyError(error) {
|
|
217
109
|
const msg = String(error).toLowerCase();
|
|
218
110
|
if (msg.includes("401") || msg.includes("403") || msg.includes("unauthorized") || msg.includes("invalid api key")) {
|
|
@@ -298,21 +190,18 @@ Question: ${query}`
|
|
|
298
190
|
clearTimeout(timeoutHandle);
|
|
299
191
|
}
|
|
300
192
|
}
|
|
301
|
-
async function handleQuery(query, config) {
|
|
193
|
+
async function handleQuery(query, queryVector, config) {
|
|
302
194
|
const {
|
|
303
195
|
indexPath,
|
|
304
196
|
provider,
|
|
305
197
|
apiKey,
|
|
306
198
|
model,
|
|
307
|
-
embeddingModel,
|
|
308
199
|
topK = 5,
|
|
309
200
|
minScore = 0.42,
|
|
310
201
|
systemPrompt = DEFAULT_SYSTEM_PROMPT,
|
|
311
202
|
llmTimeoutMs = 5e3
|
|
312
203
|
} = config;
|
|
313
204
|
const index = loadIndexFile(indexPath);
|
|
314
|
-
const embedder = await getEmbedder(embeddingModel);
|
|
315
|
-
const queryVector = await embedder.embed(query);
|
|
316
205
|
const results = search(queryVector, index, topK, minScore);
|
|
317
206
|
if (results.length === 0) {
|
|
318
207
|
return { answer: "", isFallback: true };
|
|
@@ -336,15 +225,29 @@ async function handleQuery(query, config) {
|
|
|
336
225
|
}
|
|
337
226
|
return { answer: llmResult.answer, isFallback: false };
|
|
338
227
|
}
|
|
228
|
+
function parseBody(body) {
|
|
229
|
+
const query = body.query?.trim();
|
|
230
|
+
if (!query) return { error: "Missing query" };
|
|
231
|
+
const raw = body.queryVector;
|
|
232
|
+
if (!Array.isArray(raw) || raw.length === 0) {
|
|
233
|
+
return { error: "Missing queryVector" };
|
|
234
|
+
}
|
|
235
|
+
for (let i = 0; i < raw.length; i++) {
|
|
236
|
+
if (typeof raw[i] !== "number" || !Number.isFinite(raw[i])) {
|
|
237
|
+
return { error: "Invalid queryVector" };
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return { query, vector: new Float32Array(raw) };
|
|
241
|
+
}
|
|
339
242
|
function createSageDeskHandler(config) {
|
|
340
243
|
return async function POST(request) {
|
|
341
244
|
try {
|
|
342
245
|
const body = await request.json();
|
|
343
|
-
const
|
|
344
|
-
if (
|
|
345
|
-
return Response.json({ error:
|
|
246
|
+
const parsed = parseBody(body);
|
|
247
|
+
if ("error" in parsed) {
|
|
248
|
+
return Response.json({ error: parsed.error }, { status: 400 });
|
|
346
249
|
}
|
|
347
|
-
const result = await handleQuery(query, config);
|
|
250
|
+
const result = await handleQuery(parsed.query, parsed.vector, config);
|
|
348
251
|
return Response.json(result);
|
|
349
252
|
} catch (err) {
|
|
350
253
|
console.error("[sagedesk/server] Handler error:", err);
|
|
@@ -355,12 +258,12 @@ function createSageDeskHandler(config) {
|
|
|
355
258
|
function createSageDeskMiddleware(config) {
|
|
356
259
|
return async function sageDeskMiddleware(req, res, next) {
|
|
357
260
|
try {
|
|
358
|
-
const
|
|
359
|
-
if (
|
|
360
|
-
res.status(400).json({ error:
|
|
261
|
+
const parsed = parseBody(req.body ?? {});
|
|
262
|
+
if ("error" in parsed) {
|
|
263
|
+
res.status(400).json({ error: parsed.error });
|
|
361
264
|
return;
|
|
362
265
|
}
|
|
363
|
-
const result = await handleQuery(query, config);
|
|
266
|
+
const result = await handleQuery(parsed.query, parsed.vector, config);
|
|
364
267
|
res.json(result);
|
|
365
268
|
} catch (err) {
|
|
366
269
|
console.error("[sagedesk/server] Middleware error:", err);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/server/index.ts","../../src/core/server-embedder.ts","../../src/core/search.ts","../../src/core/renderer.ts"],"sourcesContent":["import { readFileSync } from 'fs';\nimport { ServerEmbedder } from '../core/server-embedder.js';\nimport { search } from '../core/search.js';\nimport { buildAnswer } from '../core/renderer.js';\nimport type { IndexChunk, IndexFile, SageDeskModel, FallbackReason } from '../core/types.js';\n\n// ─── Types ────────────────────────────────────────────────────────────────────\n\nexport interface SageDeskHandlerConfig {\n /** Filesystem path to the pre-built vector index (e.g. \"./public/support-index.json\"). */\n indexPath: string;\n /** LLM provider: 'openai', 'deepseek', 'groq', 'gemini', 'anthropic', or any OpenAI-compatible base URL. */\n provider: string;\n /** API key for the LLM provider. Never sent to the browser. */\n apiKey: string;\n /** LLM model name (e.g. 'deepseek-chat', 'gpt-4o-mini', 'llama3-8b-8192'). */\n model: string;\n /** Embedding model - must match the model used at build time. Defaults to all-MiniLM-L6-v2. */\n embeddingModel?: SageDeskModel;\n /** Number of chunks to retrieve for context. Defaults to 5. */\n topK?: number;\n /** Minimum similarity score for a chunk to be included. Defaults to 0.42. */\n minScore?: number;\n /** Override the system prompt sent to the LLM. */\n systemPrompt?: string;\n /** Timeout for LLM API calls in milliseconds. Defaults to 5000 (5 seconds). */\n llmTimeoutMs?: number;\n}\n\n// ─── Provider URL map ─────────────────────────────────────────────────────────\n\nconst PROVIDER_URLS: Record<string, string> = {\n openai: 'https://api.openai.com/v1/chat/completions',\n deepseek: 'https://api.deepseek.com/chat/completions',\n groq: 'https://api.groq.com/openai/v1/chat/completions',\n gemini: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',\n anthropic: 'https://api.anthropic.com/v1/messages',\n};\n\n// ─── Default system prompt ────────────────────────────────────────────────────\n\nconst DEFAULT_SYSTEM_PROMPT =\n 'You are a helpful support assistant. Answer the user\\'s question based ONLY on the ' +\n 'provided context. If the context does not contain a confident answer, respond with a ' +\n 'friendly message saying you don\\'t have that information right now. Do not make up ' +\n 'information or draw from outside knowledge. Be concise, warm, and helpful.';\n\n// ─── Server-side caches (module-level singletons) ─────────────────────────────\n\nconst indexCache = new Map<string, IndexChunk[]>();\nconst embedderCache = new Map<string, ServerEmbedder>();\n\nfunction loadIndexFile(indexPath: string): IndexChunk[] {\n if (indexCache.has(indexPath)) return indexCache.get(indexPath)!;\n\n const raw = readFileSync(indexPath, 'utf-8');\n const data = JSON.parse(raw) as IndexFile | IndexChunk[];\n const chunks: IndexChunk[] = Array.isArray(data) ? data : data.chunks;\n\n for (const chunk of chunks) {\n chunk.textLower = chunk.text.toLowerCase();\n if (Array.isArray(chunk.vector384)) {\n chunk.vector384 = new Float32Array(chunk.vector384);\n }\n }\n\n indexCache.set(indexPath, chunks);\n return chunks;\n}\n\nasync function getEmbedder(model: SageDeskModel = 'all-MiniLM-L6-v2'): Promise<ServerEmbedder> {\n if (embedderCache.has(model)) return embedderCache.get(model)!;\n\n const embedder = new ServerEmbedder();\n await embedder.load(model);\n embedderCache.set(model, embedder);\n return embedder;\n}\n\n// ─── Helper: Classify error for client-side logging ───────────────────────────\n\nfunction classifyError(error: unknown): FallbackReason {\n const msg = String(error).toLowerCase();\n\n if (msg.includes('401') || msg.includes('403') || msg.includes('unauthorized') || msg.includes('invalid api key')) {\n return 'auth-error';\n }\n if (msg.includes('429') || msg.includes('quota') || msg.includes('rate limit')) {\n return 'quota-exceeded';\n }\n if (msg.includes('timeout') || msg.includes('aborted')) {\n return 'timeout';\n }\n if (msg.includes('malformed') || msg.includes('json')) {\n return 'malformed-response';\n }\n\n return 'api-error';\n}\n\n// ─── LLM call ─────────────────────────────────────────────────────────────────\n\nasync function callLLM(\n provider: string,\n apiKey: string,\n model: string,\n systemPrompt: string,\n query: string,\n context: string,\n timeoutMs: number = 5000\n): Promise<{ answer: string; error?: FallbackReason }> {\n const url = PROVIDER_URLS[provider] ?? provider;\n const controller = new AbortController();\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);\n\n try {\n if (provider === 'anthropic') {\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'x-api-key': apiKey,\n 'anthropic-version': '2023-06-01',\n },\n body: JSON.stringify({\n model,\n max_tokens: 512,\n system: systemPrompt,\n messages: [{ role: 'user', content: `Context:\\n${context}\\n\\nQuestion: ${query}` }],\n }),\n signal: controller.signal,\n });\n\n if (!res.ok) {\n const error = classifyError(`${res.status}`);\n return { answer: '', error };\n }\n\n const data = (await res.json()) as { content: Array<{ type: string; text: string }> };\n const answer = data.content?.[0]?.text?.trim() ?? '';\n return { answer };\n }\n\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${apiKey}`,\n },\n body: JSON.stringify({\n model,\n messages: [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: `Context:\\n${context}\\n\\nQuestion: ${query}`,\n },\n ],\n temperature: 0.3,\n max_tokens: 512,\n }),\n signal: controller.signal,\n });\n\n if (!res.ok) {\n const error = classifyError(`${res.status}`);\n return { answer: '', error };\n }\n\n const data = (await res.json()) as {\n choices: Array<{ message: { content: string } }>;\n };\n const answer = data.choices?.[0]?.message?.content?.trim() ?? '';\n return { answer };\n } catch (err) {\n const error = classifyError(err);\n return { answer: '', error };\n } finally {\n clearTimeout(timeoutHandle);\n }\n}\n\n// ─── Core handler logic ───────────────────────────────────────────────────────\n\nasync function handleQuery(\n query: string,\n config: SageDeskHandlerConfig\n): Promise<{ answer: string; isFallback: boolean; fallbackReason?: FallbackReason }> {\n const {\n indexPath,\n provider,\n apiKey,\n model,\n embeddingModel,\n topK = 5,\n minScore = 0.42,\n systemPrompt = DEFAULT_SYSTEM_PROMPT,\n llmTimeoutMs = 5000,\n } = config;\n\n const index = loadIndexFile(indexPath);\n const embedder = await getEmbedder(embeddingModel);\n\n const queryVector = await embedder.embed(query);\n const results = search(queryVector, index, topK, minScore);\n\n if (results.length === 0) {\n return { answer: '', isFallback: true };\n }\n\n const context = buildAnswer(results);\n const llmResult = await callLLM(\n provider,\n apiKey,\n model,\n systemPrompt,\n query,\n context,\n llmTimeoutMs\n );\n\n if (!llmResult.answer) {\n return {\n answer: '',\n isFallback: true,\n fallbackReason: llmResult.error,\n };\n }\n\n return { answer: llmResult.answer, isFallback: false };\n}\n\n// ─── Next.js App Router handler ───────────────────────────────────────────────\n\n/**\n * Returns a Next.js App Router POST handler.\n *\n * @example\n * // app/api/sagedesk/route.ts\n * import { createSageDeskHandler } from 'sagedesk/server'\n * export const POST = createSageDeskHandler({\n * indexPath: './public/support-index.json',\n * provider: 'deepseek',\n * apiKey: process.env.SAGEDESK_LLM_API_KEY!,\n * model: 'deepseek-chat',\n * })\n */\nexport function createSageDeskHandler(config: SageDeskHandlerConfig) {\n return async function POST(request: Request): Promise<Response> {\n try {\n const body = (await request.json()) as { query?: string };\n const query = body.query?.trim();\n\n if (!query) {\n return Response.json({ error: 'Missing query' }, { status: 400 });\n }\n\n const result = await handleQuery(query, config);\n return Response.json(result);\n } catch (err) {\n console.error('[sagedesk/server] Handler error:', err);\n return Response.json({ answer: '', isFallback: true }, { status: 500 });\n }\n };\n}\n\n// ─── Express / Connect middleware ─────────────────────────────────────────────\n\ntype ExpressRequest = {\n body: { query?: string };\n};\ntype ExpressResponse = {\n status: (code: number) => ExpressResponse;\n json: (data: unknown) => void;\n};\ntype NextFunction = (err?: unknown) => void;\n\n/**\n * Returns an Express (or any Connect-compatible) middleware.\n *\n * @example\n * // server.ts / index.ts\n * import { createSageDeskMiddleware } from 'sagedesk/server'\n * app.use('/api/sagedesk', express.json(), createSageDeskMiddleware({\n * indexPath: './public/support-index.json',\n * provider: 'openai',\n * apiKey: process.env.SAGEDESK_LLM_API_KEY!,\n * model: 'gpt-4o-mini',\n * }))\n */\nexport function createSageDeskMiddleware(config: SageDeskHandlerConfig) {\n return async function sageDeskMiddleware(\n req: ExpressRequest,\n res: ExpressResponse,\n next: NextFunction\n ): Promise<void> {\n try {\n const query = req.body?.query?.trim();\n\n if (!query) {\n res.status(400).json({ error: 'Missing query' });\n return;\n }\n\n const result = await handleQuery(query, config);\n res.json(result);\n } catch (err) {\n console.error('[sagedesk/server] Middleware error:', err);\n next(err);\n }\n };\n}\n","import type { SageDeskModel } from './types';\n\ntype PipelineFn = (\n text: string,\n options: { pooling: string; normalize: boolean }\n) => Promise<{ data: Float32Array }>;\n\n// Maps each supported model alias to its canonical HuggingFace model ID.\nconst HF_MODEL_IDS: Record<SageDeskModel, string> = {\n 'all-MiniLM-L6-v2': 'sentence-transformers/all-MiniLM-L6-v2',\n 'bge-small-en-v1-5': 'BAAI/bge-small-en-v1.5',\n 'paraphrase-multilingual-MiniLM-L12-v2': 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',\n 'all-mpnet-base-v2': 'sentence-transformers/all-mpnet-base-v2',\n};\n\n/**\n * Server-side embedder optimized for serverless environments (Vercel, Lambda, etc).\n * Uses pure WASM with no native ONNX Runtime dependency.\n * Models are cached at the module level to survive across serverless invocations.\n */\nexport class ServerEmbedder {\n private _ready = false;\n private _failed = false;\n private _model: SageDeskModel = 'all-MiniLM-L6-v2';\n\n // Module-level singleton cache — survives across Lambda/Vercel warm invocations\n private static _pipelineCache = new Map<SageDeskModel, PipelineFn>();\n private static _loadingPromises = new Map<SageDeskModel, Promise<void>>();\n\n async load(model: SageDeskModel = 'all-MiniLM-L6-v2'): Promise<void> {\n if (this._ready) return;\n if (this._failed) throw new Error('ServerEmbedder previously failed to load');\n\n this._model = model;\n\n // Return cached instance if already loaded\n if (ServerEmbedder._pipelineCache.has(model)) {\n this._ready = true;\n return;\n }\n\n // Return existing loading promise if currently loading\n if (ServerEmbedder._loadingPromises.has(model)) {\n await ServerEmbedder._loadingPromises.get(model)!;\n this._ready = true;\n return;\n }\n\n const modelId = HF_MODEL_IDS[model];\n const loadPromise = this._loadModel(model, modelId);\n ServerEmbedder._loadingPromises.set(model, loadPromise);\n\n try {\n await loadPromise;\n this._ready = true;\n } catch (err) {\n this._failed = true;\n ServerEmbedder._loadingPromises.delete(model);\n throw err;\n } finally {\n ServerEmbedder._loadingPromises.delete(model);\n }\n }\n\n private async _loadModel(model: SageDeskModel, modelId: string): Promise<void> {\n try {\n // device: 'wasm' forces pure WebAssembly backend — skips native ONNX Runtime\n // which is unavailable on serverless platforms (Vercel, Lambda, etc.)\n const { pipeline } = await import('@huggingface/transformers');\n const pipelineInstance = (await pipeline('feature-extraction', modelId, {\n dtype: 'q8',\n device: 'wasm',\n })) as unknown as PipelineFn;\n\n ServerEmbedder._pipelineCache.set(model, pipelineInstance);\n } catch (err) {\n throw new Error(`Failed to load embedding model ${modelId}: ${String(err)}`);\n }\n }\n\n async embed(text: string): Promise<Float32Array> {\n if (!this._ready) {\n await this.load(this._model);\n }\n\n const pipelineInstance = ServerEmbedder._pipelineCache.get(this._model);\n if (!pipelineInstance) {\n throw new Error(`Embedding model ${this._model} not loaded`);\n }\n\n try {\n const output = await pipelineInstance(text, {\n pooling: 'mean',\n normalize: true,\n });\n return output.data;\n } catch (err) {\n throw new Error(`Embedding failed: ${String(err)}`);\n }\n }\n\n get isReady(): boolean {\n return this._ready;\n }\n\n get hasFailed(): boolean {\n return this._failed;\n }\n\n /** @internal - Reset for testing */\n static _reset(): void {\n ServerEmbedder._pipelineCache.clear();\n ServerEmbedder._loadingPromises.clear();\n }\n}\n","import type { IndexChunk, SearchResult } from './types';\n\n// Both the query vector (embedder.ts, normalize:true) and stored vectors\n// (builder-embedder.ts, normalize:true) are guaranteed unit-length, so\n// cosine similarity reduces to a plain dot product - no norms needed.\nfunction dotProduct(a: Float32Array, b: Float32Array): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: query(${a.length}) vs index(${b.length})`);\n }\n let dot = 0;\n for (let i = 0; i < a.length; i++) dot += a[i] * b[i];\n return dot;\n}\n\n// Inserts item at the correct descending-score position, then trims to maxLen.\n// Avoids Array.sort overhead on every insertion for small topK arrays.\nfunction insertSorted(arr: SearchResult[], item: SearchResult, maxLen: number): void {\n arr.push(item);\n let i = arr.length - 1;\n while (i > 0 && arr[i - 1].score < arr[i].score) {\n const tmp = arr[i - 1]; arr[i - 1] = arr[i]; arr[i] = tmp;\n i--;\n }\n if (arr.length > maxLen) arr.pop();\n}\n\nexport function search(\n queryVector: Float32Array,\n index: IndexChunk[],\n topK = 3,\n minScore = 0.42\n): SearchResult[] {\n const results: SearchResult[] = [];\n\n for (const chunk of index) {\n const score = dotProduct(queryVector, chunk.vector384 as Float32Array);\n if (score < minScore) continue;\n\n if (results.length < topK) {\n insertSorted(results, { chunk, score }, topK);\n } else if (score > results[topK - 1].score) {\n results[topK - 1] = { chunk, score };\n let i = topK - 1;\n while (i > 0 && results[i - 1].score < results[i].score) {\n const tmp = results[i - 1]; results[i - 1] = results[i]; results[i] = tmp;\n i--;\n }\n }\n }\n\n return results;\n}\n\nexport function keywordSearch(\n query: string,\n index: IndexChunk[],\n topK = 3\n): SearchResult[] {\n const terms = query\n .toLowerCase()\n .split(/\\s+/)\n .filter((w) => w.length > 2)\n .map((w) => w.replace(/[^a-z0-9]/g, ''));\n\n if (terms.length === 0) return [];\n\n const results: SearchResult[] = [];\n\n for (const chunk of index) {\n const chunkLower = chunk.textLower || chunk.text.toLowerCase();\n let matchCount = 0;\n for (const t of terms) {\n if (chunkLower.includes(t)) matchCount++;\n }\n const score = matchCount / terms.length;\n\n if (score <= 0) continue;\n\n if (results.length < topK) {\n insertSorted(results, { chunk, score }, topK);\n } else if (score > results[topK - 1].score) {\n results[topK - 1] = { chunk, score };\n let i = topK - 1;\n while (i > 0 && results[i - 1].score < results[i].score) {\n const tmp = results[i - 1]; results[i - 1] = results[i]; results[i] = tmp;\n i--;\n }\n }\n }\n\n return results;\n}\n\nexport async function loadIndex(url: string): Promise<IndexChunk[]> {\n const res = await fetch(url);\n if (!res.ok) {\n throw new Error(`Failed to fetch index (HTTP ${res.status}): ${url}`);\n }\n const data = await res.json();\n // Support both the new { meta, chunks } format and the legacy bare-array format.\n const chunks: IndexChunk[] = Array.isArray(data)\n ? data\n : (data as { chunks: IndexChunk[] }).chunks;\n\n // Materialize lowercase versions and convert vectors to Float32Array once at load time.\n for (const chunk of chunks) {\n chunk.textLower = chunk.text.toLowerCase();\n if (Array.isArray(chunk.vector384)) {\n chunk.vector384 = new Float32Array(chunk.vector384);\n }\n if (Array.isArray(chunk.vector768)) {\n chunk.vector768 = new Float32Array(chunk.vector768);\n }\n }\n\n return chunks;\n}\n","import type { SearchResult } from './types';\n\nexport function buildAnswer(results: SearchResult[]): string {\n if (results.length === 0) return '';\n // Deduplicate by sourceId: query expansion produces multiple chunks per\n // source entry (same answer, different query phrasings) - show each source once.\n const seen = new Set<string>();\n const parts: string[] = [];\n for (const r of results) {\n if (!seen.has(r.chunk.sourceId)) {\n seen.add(r.chunk.sourceId);\n parts.push(r.chunk.text);\n }\n }\n return parts.join('\\n\\n');\n}\n\nexport function extractChips(\n index: { text: string; question?: string; sourceId?: string }[],\n override?: string[]\n): string[] {\n if (override && override.length > 0) return override.slice(0, 5);\n\n const chips: string[] = [];\n const seenText = new Set<string>();\n const seenSource = new Set<string>();\n\n for (const chunk of index) {\n if (chips.length >= 5) break;\n\n // Deduplicate by sourceId if available to ensure variety of answers.\n if (chunk.sourceId) {\n if (seenSource.has(chunk.sourceId)) continue;\n seenSource.add(chunk.sourceId);\n }\n\n const candidate = chunk.question ?? extractFirstSentence(chunk.text);\n if (candidate && !seenText.has(candidate)) {\n seenText.add(candidate);\n chips.push(candidate);\n }\n }\n\n return chips;\n}\n\nfunction extractFirstSentence(text: string): string {\n const match = text.match(/^[^\\n.!?]{10,80}[.!?\\n]?/);\n if (!match) return text.slice(0, 60);\n return match[0].trim();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,gBAA6B;;;ACQ7B,IAAM,eAA8C;AAAA,EAClD,oBAAoB;AAAA,EACpB,qBAAqB;AAAA,EACrB,yCAAyC;AAAA,EACzC,qBAAqB;AACvB;AAOO,IAAM,iBAAN,MAAM,gBAAe;AAAA,EAArB;AACL,SAAQ,SAAS;AACjB,SAAQ,UAAU;AAClB,SAAQ,SAAwB;AAAA;AAAA,EAGhC;AAAA;AAAA,SAAe,iBAAiB,oBAAI,IAA+B;AAAA;AAAA,EACnE;AAAA,SAAe,mBAAmB,oBAAI,IAAkC;AAAA;AAAA,EAExE,MAAM,KAAK,QAAuB,oBAAmC;AACnE,QAAI,KAAK,OAAQ;AACjB,QAAI,KAAK,QAAS,OAAM,IAAI,MAAM,0CAA0C;AAE5E,SAAK,SAAS;AAGd,QAAI,gBAAe,eAAe,IAAI,KAAK,GAAG;AAC5C,WAAK,SAAS;AACd;AAAA,IACF;AAGA,QAAI,gBAAe,iBAAiB,IAAI,KAAK,GAAG;AAC9C,YAAM,gBAAe,iBAAiB,IAAI,KAAK;AAC/C,WAAK,SAAS;AACd;AAAA,IACF;AAEA,UAAM,UAAU,aAAa,KAAK;AAClC,UAAM,cAAc,KAAK,WAAW,OAAO,OAAO;AAClD,oBAAe,iBAAiB,IAAI,OAAO,WAAW;AAEtD,QAAI;AACF,YAAM;AACN,WAAK,SAAS;AAAA,IAChB,SAAS,KAAK;AACZ,WAAK,UAAU;AACf,sBAAe,iBAAiB,OAAO,KAAK;AAC5C,YAAM;AAAA,IACR,UAAE;AACA,sBAAe,iBAAiB,OAAO,KAAK;AAAA,IAC9C;AAAA,EACF;AAAA,EAEA,MAAc,WAAW,OAAsB,SAAgC;AAC7E,QAAI;AAGF,YAAM,EAAE,SAAS,IAAI,MAAM,OAAO,2BAA2B;AAC7D,YAAM,mBAAoB,MAAM,SAAS,sBAAsB,SAAS;AAAA,QACtE,OAAO;AAAA,QACP,QAAQ;AAAA,MACV,CAAC;AAED,sBAAe,eAAe,IAAI,OAAO,gBAAgB;AAAA,IAC3D,SAAS,KAAK;AACZ,YAAM,IAAI,MAAM,kCAAkC,OAAO,KAAK,OAAO,GAAG,CAAC,EAAE;AAAA,IAC7E;AAAA,EACF;AAAA,EAEA,MAAM,MAAM,MAAqC;AAC/C,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,KAAK,KAAK,KAAK,MAAM;AAAA,IAC7B;AAEA,UAAM,mBAAmB,gBAAe,eAAe,IAAI,KAAK,MAAM;AACtE,QAAI,CAAC,kBAAkB;AACrB,YAAM,IAAI,MAAM,mBAAmB,KAAK,MAAM,aAAa;AAAA,IAC7D;AAEA,QAAI;AACF,YAAM,SAAS,MAAM,iBAAiB,MAAM;AAAA,QAC1C,SAAS;AAAA,QACT,WAAW;AAAA,MACb,CAAC;AACD,aAAO,OAAO;AAAA,IAChB,SAAS,KAAK;AACZ,YAAM,IAAI,MAAM,qBAAqB,OAAO,GAAG,CAAC,EAAE;AAAA,IACpD;AAAA,EACF;AAAA,EAEA,IAAI,UAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,YAAqB;AACvB,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,OAAO,SAAe;AACpB,oBAAe,eAAe,MAAM;AACpC,oBAAe,iBAAiB,MAAM;AAAA,EACxC;AACF;;;AC7GA,SAAS,WAAW,GAAiB,GAAyB;AAC5D,MAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,UAAM,IAAI,MAAM,oCAAoC,EAAE,MAAM,cAAc,EAAE,MAAM,GAAG;AAAA,EACvF;AACA,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,QAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACpD,SAAO;AACT;AAIA,SAAS,aAAa,KAAqB,MAAoB,QAAsB;AACnF,MAAI,KAAK,IAAI;AACb,MAAI,IAAI,IAAI,SAAS;AACrB,SAAO,IAAI,KAAK,IAAI,IAAI,CAAC,EAAE,QAAQ,IAAI,CAAC,EAAE,OAAO;AAC/C,UAAM,MAAM,IAAI,IAAI,CAAC;AAAG,QAAI,IAAI,CAAC,IAAI,IAAI,CAAC;AAAG,QAAI,CAAC,IAAI;AACtD;AAAA,EACF;AACA,MAAI,IAAI,SAAS,OAAQ,KAAI,IAAI;AACnC;AAEO,SAAS,OACd,aACA,OACA,OAAO,GACP,WAAW,MACK;AAChB,QAAM,UAA0B,CAAC;AAEjC,aAAW,SAAS,OAAO;AACzB,UAAM,QAAQ,WAAW,aAAa,MAAM,SAAyB;AACrE,QAAI,QAAQ,SAAU;AAEtB,QAAI,QAAQ,SAAS,MAAM;AACzB,mBAAa,SAAS,EAAE,OAAO,MAAM,GAAG,IAAI;AAAA,IAC9C,WAAW,QAAQ,QAAQ,OAAO,CAAC,EAAE,OAAO;AAC1C,cAAQ,OAAO,CAAC,IAAI,EAAE,OAAO,MAAM;AACnC,UAAI,IAAI,OAAO;AACf,aAAO,IAAI,KAAK,QAAQ,IAAI,CAAC,EAAE,QAAQ,QAAQ,CAAC,EAAE,OAAO;AACvD,cAAM,MAAM,QAAQ,IAAI,CAAC;AAAG,gBAAQ,IAAI,CAAC,IAAI,QAAQ,CAAC;AAAG,gBAAQ,CAAC,IAAI;AACtE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACjDO,SAAS,YAAY,SAAiC;AAC3D,MAAI,QAAQ,WAAW,EAAG,QAAO;AAGjC,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,QAAkB,CAAC;AACzB,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,KAAK,IAAI,EAAE,MAAM,QAAQ,GAAG;AAC/B,WAAK,IAAI,EAAE,MAAM,QAAQ;AACzB,YAAM,KAAK,EAAE,MAAM,IAAI;AAAA,IACzB;AAAA,EACF;AACA,SAAO,MAAM,KAAK,MAAM;AAC1B;;;AHgBA,IAAM,gBAAwC;AAAA,EAC5C,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,WAAW;AACb;AAIA,IAAM,wBACJ;AAOF,IAAM,aAAa,oBAAI,IAA0B;AACjD,IAAM,gBAAgB,oBAAI,IAA4B;AAEtD,SAAS,cAAc,WAAiC;AACtD,MAAI,WAAW,IAAI,SAAS,EAAG,QAAO,WAAW,IAAI,SAAS;AAE9D,QAAM,UAAM,wBAAa,WAAW,OAAO;AAC3C,QAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,QAAM,SAAuB,MAAM,QAAQ,IAAI,IAAI,OAAO,KAAK;AAE/D,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAAY,MAAM,KAAK,YAAY;AACzC,QAAI,MAAM,QAAQ,MAAM,SAAS,GAAG;AAClC,YAAM,YAAY,IAAI,aAAa,MAAM,SAAS;AAAA,IACpD;AAAA,EACF;AAEA,aAAW,IAAI,WAAW,MAAM;AAChC,SAAO;AACT;AAEA,eAAe,YAAY,QAAuB,oBAA6C;AAC7F,MAAI,cAAc,IAAI,KAAK,EAAG,QAAO,cAAc,IAAI,KAAK;AAE5D,QAAM,WAAW,IAAI,eAAe;AACpC,QAAM,SAAS,KAAK,KAAK;AACzB,gBAAc,IAAI,OAAO,QAAQ;AACjC,SAAO;AACT;AAIA,SAAS,cAAc,OAAgC;AACrD,QAAM,MAAM,OAAO,KAAK,EAAE,YAAY;AAEtC,MAAI,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,cAAc,KAAK,IAAI,SAAS,iBAAiB,GAAG;AACjH,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,OAAO,KAAK,IAAI,SAAS,YAAY,GAAG;AAC9E,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,SAAS,KAAK,IAAI,SAAS,SAAS,GAAG;AACtD,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,WAAW,KAAK,IAAI,SAAS,MAAM,GAAG;AACrD,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAIA,eAAe,QACb,UACA,QACA,OACA,cACA,OACA,SACA,YAAoB,KACiC;AACrD,QAAM,MAAM,cAAc,QAAQ,KAAK;AACvC,QAAM,aAAa,IAAI,gBAAgB;AACvC,QAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AAEpE,MAAI;AACF,QAAI,aAAa,aAAa;AAC5B,YAAMA,OAAM,MAAM,MAAM,KAAK;AAAA,QAC3B,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,gBAAgB;AAAA,UAChB,aAAa;AAAA,UACb,qBAAqB;AAAA,QACvB;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB;AAAA,UACA,YAAY;AAAA,UACZ,QAAQ;AAAA,UACR,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS;AAAA,EAAa,OAAO;AAAA;AAAA,YAAiB,KAAK,GAAG,CAAC;AAAA,QACpF,CAAC;AAAA,QACD,QAAQ,WAAW;AAAA,MACrB,CAAC;AAED,UAAI,CAACA,KAAI,IAAI;AACX,cAAM,QAAQ,cAAc,GAAGA,KAAI,MAAM,EAAE;AAC3C,eAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,MAC7B;AAEA,YAAMC,QAAQ,MAAMD,KAAI,KAAK;AAC7B,YAAME,UAASD,MAAK,UAAU,CAAC,GAAG,MAAM,KAAK,KAAK;AAClD,aAAO,EAAE,QAAAC,QAAO;AAAA,IAClB;AAEA,UAAM,MAAM,MAAM,MAAM,KAAK;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,eAAe,UAAU,MAAM;AAAA,MACjC;AAAA,MACA,MAAM,KAAK,UAAU;AAAA,QACnB;AAAA,QACA,UAAU;AAAA,UACR,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,UACxC;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,EAAa,OAAO;AAAA;AAAA,YAAiB,KAAK;AAAA,UACrD;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,YAAY;AAAA,MACd,CAAC;AAAA,MACD,QAAQ,WAAW;AAAA,IACrB,CAAC;AAED,QAAI,CAAC,IAAI,IAAI;AACX,YAAM,QAAQ,cAAc,GAAG,IAAI,MAAM,EAAE;AAC3C,aAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,IAC7B;AAEA,UAAM,OAAQ,MAAM,IAAI,KAAK;AAG7B,UAAM,SAAS,KAAK,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAC9D,WAAO,EAAE,OAAO;AAAA,EAClB,SAAS,KAAK;AACZ,UAAM,QAAQ,cAAc,GAAG;AAC/B,WAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,EAC7B,UAAE;AACA,iBAAa,aAAa;AAAA,EAC5B;AACF;AAIA,eAAe,YACb,OACA,QACmF;AACnF,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,IACP,WAAW;AAAA,IACX,eAAe;AAAA,IACf,eAAe;AAAA,EACjB,IAAI;AAEJ,QAAM,QAAQ,cAAc,SAAS;AACrC,QAAM,WAAW,MAAM,YAAY,cAAc;AAEjD,QAAM,cAAc,MAAM,SAAS,MAAM,KAAK;AAC9C,QAAM,UAAU,OAAO,aAAa,OAAO,MAAM,QAAQ;AAEzD,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO,EAAE,QAAQ,IAAI,YAAY,KAAK;AAAA,EACxC;AAEA,QAAM,UAAU,YAAY,OAAO;AACnC,QAAM,YAAY,MAAM;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI,CAAC,UAAU,QAAQ;AACrB,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,gBAAgB,UAAU;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,UAAU,QAAQ,YAAY,MAAM;AACvD;AAiBO,SAAS,sBAAsB,QAA+B;AACnE,SAAO,eAAe,KAAK,SAAqC;AAC9D,QAAI;AACF,YAAM,OAAQ,MAAM,QAAQ,KAAK;AACjC,YAAM,QAAQ,KAAK,OAAO,KAAK;AAE/B,UAAI,CAAC,OAAO;AACV,eAAO,SAAS,KAAK,EAAE,OAAO,gBAAgB,GAAG,EAAE,QAAQ,IAAI,CAAC;AAAA,MAClE;AAEA,YAAM,SAAS,MAAM,YAAY,OAAO,MAAM;AAC9C,aAAO,SAAS,KAAK,MAAM;AAAA,IAC7B,SAAS,KAAK;AACZ,cAAQ,MAAM,oCAAoC,GAAG;AACrD,aAAO,SAAS,KAAK,EAAE,QAAQ,IAAI,YAAY,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC;AAAA,IACxE;AAAA,EACF;AACF;AA0BO,SAAS,yBAAyB,QAA+B;AACtE,SAAO,eAAe,mBACpB,KACA,KACA,MACe;AACf,QAAI;AACF,YAAM,QAAQ,IAAI,MAAM,OAAO,KAAK;AAEpC,UAAI,CAAC,OAAO;AACV,YAAI,OAAO,GAAG,EAAE,KAAK,EAAE,OAAO,gBAAgB,CAAC;AAC/C;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,YAAY,OAAO,MAAM;AAC9C,UAAI,KAAK,MAAM;AAAA,IACjB,SAAS,KAAK;AACZ,cAAQ,MAAM,uCAAuC,GAAG;AACxD,WAAK,GAAG;AAAA,IACV;AAAA,EACF;AACF;","names":["res","data","answer"]}
|
|
1
|
+
{"version":3,"sources":["../../src/server/index.ts","../../src/core/search.ts","../../src/core/renderer.ts"],"sourcesContent":["import { readFileSync } from 'fs';\nimport { search } from '../core/search.js';\nimport { buildAnswer } from '../core/renderer.js';\nimport type { IndexChunk, IndexFile, FallbackReason } from '../core/types.js';\n\n// ─── Types ────────────────────────────────────────────────────────────────────\n\nexport interface SageDeskHandlerConfig {\n /** Filesystem path to the pre-built vector index (e.g. \"./public/support-index.json\"). */\n indexPath: string;\n /** LLM provider: 'openai', 'deepseek', 'groq', 'gemini', 'anthropic', or any OpenAI-compatible base URL. */\n provider: string;\n /** API key for the LLM provider. Never sent to the browser. */\n apiKey: string;\n /** LLM model name (e.g. 'deepseek-chat', 'gpt-4o-mini', 'llama3-8b-8192'). */\n model: string;\n /** Number of chunks to retrieve for context. Defaults to 5. */\n topK?: number;\n /** Minimum similarity score for a chunk to be included. Defaults to 0.42. */\n minScore?: number;\n /** Override the system prompt sent to the LLM. */\n systemPrompt?: string;\n /** Timeout for LLM API calls in milliseconds. Defaults to 5000 (5 seconds). */\n llmTimeoutMs?: number;\n}\n\ninterface QueryRequestBody {\n query?: string;\n queryVector?: number[];\n}\n\n// ─── Provider URL map ─────────────────────────────────────────────────────────\n\nconst PROVIDER_URLS: Record<string, string> = {\n openai: 'https://api.openai.com/v1/chat/completions',\n deepseek: 'https://api.deepseek.com/chat/completions',\n groq: 'https://api.groq.com/openai/v1/chat/completions',\n gemini: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',\n anthropic: 'https://api.anthropic.com/v1/messages',\n};\n\n// ─── Default system prompt ────────────────────────────────────────────────────\n\nconst DEFAULT_SYSTEM_PROMPT =\n 'You are a helpful support assistant. Answer the user\\'s question based ONLY on the ' +\n 'provided context. If the context does not contain a confident answer, respond with a ' +\n 'friendly message saying you don\\'t have that information right now. Do not make up ' +\n 'information or draw from outside knowledge. Be concise, warm, and helpful.';\n\n// ─── Server-side cache (module-level singleton) ───────────────────────────────\n\nconst indexCache = new Map<string, IndexChunk[]>();\n\nfunction loadIndexFile(indexPath: string): IndexChunk[] {\n if (indexCache.has(indexPath)) return indexCache.get(indexPath)!;\n\n const raw = readFileSync(indexPath, 'utf-8');\n const data = JSON.parse(raw) as IndexFile | IndexChunk[];\n const chunks: IndexChunk[] = Array.isArray(data) ? data : data.chunks;\n\n for (const chunk of chunks) {\n chunk.textLower = chunk.text.toLowerCase();\n if (Array.isArray(chunk.vector384)) {\n chunk.vector384 = new Float32Array(chunk.vector384);\n }\n }\n\n indexCache.set(indexPath, chunks);\n return chunks;\n}\n\n// ─── Helper: Classify error for client-side logging ───────────────────────────\n\nfunction classifyError(error: unknown): FallbackReason {\n const msg = String(error).toLowerCase();\n\n if (msg.includes('401') || msg.includes('403') || msg.includes('unauthorized') || msg.includes('invalid api key')) {\n return 'auth-error';\n }\n if (msg.includes('429') || msg.includes('quota') || msg.includes('rate limit')) {\n return 'quota-exceeded';\n }\n if (msg.includes('timeout') || msg.includes('aborted')) {\n return 'timeout';\n }\n if (msg.includes('malformed') || msg.includes('json')) {\n return 'malformed-response';\n }\n\n return 'api-error';\n}\n\n// ─── LLM call ─────────────────────────────────────────────────────────────────\n\nasync function callLLM(\n provider: string,\n apiKey: string,\n model: string,\n systemPrompt: string,\n query: string,\n context: string,\n timeoutMs: number = 5000\n): Promise<{ answer: string; error?: FallbackReason }> {\n const url = PROVIDER_URLS[provider] ?? provider;\n const controller = new AbortController();\n const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);\n\n try {\n if (provider === 'anthropic') {\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'x-api-key': apiKey,\n 'anthropic-version': '2023-06-01',\n },\n body: JSON.stringify({\n model,\n max_tokens: 512,\n system: systemPrompt,\n messages: [{ role: 'user', content: `Context:\\n${context}\\n\\nQuestion: ${query}` }],\n }),\n signal: controller.signal,\n });\n\n if (!res.ok) {\n const error = classifyError(`${res.status}`);\n return { answer: '', error };\n }\n\n const data = (await res.json()) as { content: Array<{ type: string; text: string }> };\n const answer = data.content?.[0]?.text?.trim() ?? '';\n return { answer };\n }\n\n const res = await fetch(url, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${apiKey}`,\n },\n body: JSON.stringify({\n model,\n messages: [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: `Context:\\n${context}\\n\\nQuestion: ${query}`,\n },\n ],\n temperature: 0.3,\n max_tokens: 512,\n }),\n signal: controller.signal,\n });\n\n if (!res.ok) {\n const error = classifyError(`${res.status}`);\n return { answer: '', error };\n }\n\n const data = (await res.json()) as {\n choices: Array<{ message: { content: string } }>;\n };\n const answer = data.choices?.[0]?.message?.content?.trim() ?? '';\n return { answer };\n } catch (err) {\n const error = classifyError(err);\n return { answer: '', error };\n } finally {\n clearTimeout(timeoutHandle);\n }\n}\n\n// ─── Core handler logic ───────────────────────────────────────────────────────\n\nasync function handleQuery(\n query: string,\n queryVector: Float32Array,\n config: SageDeskHandlerConfig\n): Promise<{ answer: string; isFallback: boolean; fallbackReason?: FallbackReason }> {\n const {\n indexPath,\n provider,\n apiKey,\n model,\n topK = 5,\n minScore = 0.42,\n systemPrompt = DEFAULT_SYSTEM_PROMPT,\n llmTimeoutMs = 5000,\n } = config;\n\n const index = loadIndexFile(indexPath);\n const results = search(queryVector, index, topK, minScore);\n\n if (results.length === 0) {\n return { answer: '', isFallback: true };\n }\n\n const context = buildAnswer(results);\n const llmResult = await callLLM(\n provider,\n apiKey,\n model,\n systemPrompt,\n query,\n context,\n llmTimeoutMs\n );\n\n if (!llmResult.answer) {\n return {\n answer: '',\n isFallback: true,\n fallbackReason: llmResult.error,\n };\n }\n\n return { answer: llmResult.answer, isFallback: false };\n}\n\n// ─── Request parsing ──────────────────────────────────────────────────────────\n\n/**\n * Parse and validate {query, queryVector} from a request body. Returns either a\n * usable Float32Array (and the query string) or a string error code suitable\n * for a 400 response.\n */\nfunction parseBody(body: QueryRequestBody): { query: string; vector: Float32Array } | { error: string } {\n const query = body.query?.trim();\n if (!query) return { error: 'Missing query' };\n\n const raw = body.queryVector;\n if (!Array.isArray(raw) || raw.length === 0) {\n return { error: 'Missing queryVector' };\n }\n for (let i = 0; i < raw.length; i++) {\n if (typeof raw[i] !== 'number' || !Number.isFinite(raw[i])) {\n return { error: 'Invalid queryVector' };\n }\n }\n return { query, vector: new Float32Array(raw) };\n}\n\n// ─── Next.js App Router handler ───────────────────────────────────────────────\n\n/**\n * Returns a Next.js App Router POST handler.\n *\n * Expects request body: `{ query: string, queryVector: number[] }`. The widget\n * embeds the query in the browser (same WASM model as local mode) and sends\n * both. This keeps the server function tiny and free of native ONNX binaries,\n * so it deploys cleanly on Vercel / Lambda / any serverless runtime.\n *\n * @example\n * // app/api/sagedesk/route.ts\n * import { createSageDeskHandler } from 'sagedesk/server'\n * export const POST = createSageDeskHandler({\n * indexPath: './public/support-index.json',\n * provider: 'deepseek',\n * apiKey: process.env.SAGEDESK_LLM_API_KEY!,\n * model: 'deepseek-chat',\n * })\n */\nexport function createSageDeskHandler(config: SageDeskHandlerConfig) {\n return async function POST(request: Request): Promise<Response> {\n try {\n const body = (await request.json()) as QueryRequestBody;\n const parsed = parseBody(body);\n\n if ('error' in parsed) {\n return Response.json({ error: parsed.error }, { status: 400 });\n }\n\n const result = await handleQuery(parsed.query, parsed.vector, config);\n return Response.json(result);\n } catch (err) {\n console.error('[sagedesk/server] Handler error:', err);\n return Response.json({ answer: '', isFallback: true }, { status: 500 });\n }\n };\n}\n\n// ─── Express / Connect middleware ─────────────────────────────────────────────\n\ntype ExpressRequest = {\n body: QueryRequestBody;\n};\ntype ExpressResponse = {\n status: (code: number) => ExpressResponse;\n json: (data: unknown) => void;\n};\ntype NextFunction = (err?: unknown) => void;\n\n/**\n * Returns an Express (or any Connect-compatible) middleware.\n *\n * Expects `req.body` to be `{ query: string, queryVector: number[] }`. See\n * `createSageDeskHandler` for the rationale.\n *\n * @example\n * // server.ts / index.ts\n * import { createSageDeskMiddleware } from 'sagedesk/server'\n * app.use('/api/sagedesk', express.json(), createSageDeskMiddleware({\n * indexPath: './public/support-index.json',\n * provider: 'openai',\n * apiKey: process.env.SAGEDESK_LLM_API_KEY!,\n * model: 'gpt-4o-mini',\n * }))\n */\nexport function createSageDeskMiddleware(config: SageDeskHandlerConfig) {\n return async function sageDeskMiddleware(\n req: ExpressRequest,\n res: ExpressResponse,\n next: NextFunction\n ): Promise<void> {\n try {\n const parsed = parseBody(req.body ?? {});\n\n if ('error' in parsed) {\n res.status(400).json({ error: parsed.error });\n return;\n }\n\n const result = await handleQuery(parsed.query, parsed.vector, config);\n res.json(result);\n } catch (err) {\n console.error('[sagedesk/server] Middleware error:', err);\n next(err);\n }\n };\n}\n","import type { IndexChunk, SearchResult } from './types';\n\n// Both the query vector (embedder.ts, normalize:true) and stored vectors\n// (builder-embedder.ts, normalize:true) are guaranteed unit-length, so\n// cosine similarity reduces to a plain dot product - no norms needed.\nfunction dotProduct(a: Float32Array, b: Float32Array): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: query(${a.length}) vs index(${b.length})`);\n }\n let dot = 0;\n for (let i = 0; i < a.length; i++) dot += a[i] * b[i];\n return dot;\n}\n\n// Inserts item at the correct descending-score position, then trims to maxLen.\n// Avoids Array.sort overhead on every insertion for small topK arrays.\nfunction insertSorted(arr: SearchResult[], item: SearchResult, maxLen: number): void {\n arr.push(item);\n let i = arr.length - 1;\n while (i > 0 && arr[i - 1].score < arr[i].score) {\n const tmp = arr[i - 1]; arr[i - 1] = arr[i]; arr[i] = tmp;\n i--;\n }\n if (arr.length > maxLen) arr.pop();\n}\n\nexport function search(\n queryVector: Float32Array,\n index: IndexChunk[],\n topK = 3,\n minScore = 0.42\n): SearchResult[] {\n const results: SearchResult[] = [];\n\n for (const chunk of index) {\n const score = dotProduct(queryVector, chunk.vector384 as Float32Array);\n if (score < minScore) continue;\n\n if (results.length < topK) {\n insertSorted(results, { chunk, score }, topK);\n } else if (score > results[topK - 1].score) {\n results[topK - 1] = { chunk, score };\n let i = topK - 1;\n while (i > 0 && results[i - 1].score < results[i].score) {\n const tmp = results[i - 1]; results[i - 1] = results[i]; results[i] = tmp;\n i--;\n }\n }\n }\n\n return results;\n}\n\nexport function keywordSearch(\n query: string,\n index: IndexChunk[],\n topK = 3\n): SearchResult[] {\n const terms = query\n .toLowerCase()\n .split(/\\s+/)\n .filter((w) => w.length > 2)\n .map((w) => w.replace(/[^a-z0-9]/g, ''));\n\n if (terms.length === 0) return [];\n\n const results: SearchResult[] = [];\n\n for (const chunk of index) {\n const chunkLower = chunk.textLower || chunk.text.toLowerCase();\n let matchCount = 0;\n for (const t of terms) {\n if (chunkLower.includes(t)) matchCount++;\n }\n const score = matchCount / terms.length;\n\n if (score <= 0) continue;\n\n if (results.length < topK) {\n insertSorted(results, { chunk, score }, topK);\n } else if (score > results[topK - 1].score) {\n results[topK - 1] = { chunk, score };\n let i = topK - 1;\n while (i > 0 && results[i - 1].score < results[i].score) {\n const tmp = results[i - 1]; results[i - 1] = results[i]; results[i] = tmp;\n i--;\n }\n }\n }\n\n return results;\n}\n\nexport async function loadIndex(url: string): Promise<IndexChunk[]> {\n const res = await fetch(url);\n if (!res.ok) {\n throw new Error(`Failed to fetch index (HTTP ${res.status}): ${url}`);\n }\n const data = await res.json();\n // Support both the new { meta, chunks } format and the legacy bare-array format.\n const chunks: IndexChunk[] = Array.isArray(data)\n ? data\n : (data as { chunks: IndexChunk[] }).chunks;\n\n // Materialize lowercase versions and convert vectors to Float32Array once at load time.\n for (const chunk of chunks) {\n chunk.textLower = chunk.text.toLowerCase();\n if (Array.isArray(chunk.vector384)) {\n chunk.vector384 = new Float32Array(chunk.vector384);\n }\n if (Array.isArray(chunk.vector768)) {\n chunk.vector768 = new Float32Array(chunk.vector768);\n }\n }\n\n return chunks;\n}\n","import type { SearchResult } from './types';\n\nexport function buildAnswer(results: SearchResult[]): string {\n if (results.length === 0) return '';\n // Deduplicate by sourceId: query expansion produces multiple chunks per\n // source entry (same answer, different query phrasings) - show each source once.\n const seen = new Set<string>();\n const parts: string[] = [];\n for (const r of results) {\n if (!seen.has(r.chunk.sourceId)) {\n seen.add(r.chunk.sourceId);\n parts.push(r.chunk.text);\n }\n }\n return parts.join('\\n\\n');\n}\n\nexport function extractChips(\n index: { text: string; question?: string; sourceId?: string }[],\n override?: string[]\n): string[] {\n if (override && override.length > 0) return override.slice(0, 5);\n\n const chips: string[] = [];\n const seenText = new Set<string>();\n const seenSource = new Set<string>();\n\n for (const chunk of index) {\n if (chips.length >= 5) break;\n\n // Deduplicate by sourceId if available to ensure variety of answers.\n if (chunk.sourceId) {\n if (seenSource.has(chunk.sourceId)) continue;\n seenSource.add(chunk.sourceId);\n }\n\n const candidate = chunk.question ?? extractFirstSentence(chunk.text);\n if (candidate && !seenText.has(candidate)) {\n seenText.add(candidate);\n chips.push(candidate);\n }\n }\n\n return chips;\n}\n\nfunction extractFirstSentence(text: string): string {\n const match = text.match(/^[^\\n.!?]{10,80}[.!?\\n]?/);\n if (!match) return text.slice(0, 60);\n return match[0].trim();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,gBAA6B;;;ACK7B,SAAS,WAAW,GAAiB,GAAyB;AAC5D,MAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,UAAM,IAAI,MAAM,oCAAoC,EAAE,MAAM,cAAc,EAAE,MAAM,GAAG;AAAA,EACvF;AACA,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,IAAK,QAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AACpD,SAAO;AACT;AAIA,SAAS,aAAa,KAAqB,MAAoB,QAAsB;AACnF,MAAI,KAAK,IAAI;AACb,MAAI,IAAI,IAAI,SAAS;AACrB,SAAO,IAAI,KAAK,IAAI,IAAI,CAAC,EAAE,QAAQ,IAAI,CAAC,EAAE,OAAO;AAC/C,UAAM,MAAM,IAAI,IAAI,CAAC;AAAG,QAAI,IAAI,CAAC,IAAI,IAAI,CAAC;AAAG,QAAI,CAAC,IAAI;AACtD;AAAA,EACF;AACA,MAAI,IAAI,SAAS,OAAQ,KAAI,IAAI;AACnC;AAEO,SAAS,OACd,aACA,OACA,OAAO,GACP,WAAW,MACK;AAChB,QAAM,UAA0B,CAAC;AAEjC,aAAW,SAAS,OAAO;AACzB,UAAM,QAAQ,WAAW,aAAa,MAAM,SAAyB;AACrE,QAAI,QAAQ,SAAU;AAEtB,QAAI,QAAQ,SAAS,MAAM;AACzB,mBAAa,SAAS,EAAE,OAAO,MAAM,GAAG,IAAI;AAAA,IAC9C,WAAW,QAAQ,QAAQ,OAAO,CAAC,EAAE,OAAO;AAC1C,cAAQ,OAAO,CAAC,IAAI,EAAE,OAAO,MAAM;AACnC,UAAI,IAAI,OAAO;AACf,aAAO,IAAI,KAAK,QAAQ,IAAI,CAAC,EAAE,QAAQ,QAAQ,CAAC,EAAE,OAAO;AACvD,cAAM,MAAM,QAAQ,IAAI,CAAC;AAAG,gBAAQ,IAAI,CAAC,IAAI,QAAQ,CAAC;AAAG,gBAAQ,CAAC,IAAI;AACtE;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,SAAO;AACT;;;ACjDO,SAAS,YAAY,SAAiC;AAC3D,MAAI,QAAQ,WAAW,EAAG,QAAO;AAGjC,QAAM,OAAO,oBAAI,IAAY;AAC7B,QAAM,QAAkB,CAAC;AACzB,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,KAAK,IAAI,EAAE,MAAM,QAAQ,GAAG;AAC/B,WAAK,IAAI,EAAE,MAAM,QAAQ;AACzB,YAAM,KAAK,EAAE,MAAM,IAAI;AAAA,IACzB;AAAA,EACF;AACA,SAAO,MAAM,KAAK,MAAM;AAC1B;;;AFkBA,IAAM,gBAAwC;AAAA,EAC5C,QAAQ;AAAA,EACR,UAAU;AAAA,EACV,MAAM;AAAA,EACN,QAAQ;AAAA,EACR,WAAW;AACb;AAIA,IAAM,wBACJ;AAOF,IAAM,aAAa,oBAAI,IAA0B;AAEjD,SAAS,cAAc,WAAiC;AACtD,MAAI,WAAW,IAAI,SAAS,EAAG,QAAO,WAAW,IAAI,SAAS;AAE9D,QAAM,UAAM,wBAAa,WAAW,OAAO;AAC3C,QAAM,OAAO,KAAK,MAAM,GAAG;AAC3B,QAAM,SAAuB,MAAM,QAAQ,IAAI,IAAI,OAAO,KAAK;AAE/D,aAAW,SAAS,QAAQ;AAC1B,UAAM,YAAY,MAAM,KAAK,YAAY;AACzC,QAAI,MAAM,QAAQ,MAAM,SAAS,GAAG;AAClC,YAAM,YAAY,IAAI,aAAa,MAAM,SAAS;AAAA,IACpD;AAAA,EACF;AAEA,aAAW,IAAI,WAAW,MAAM;AAChC,SAAO;AACT;AAIA,SAAS,cAAc,OAAgC;AACrD,QAAM,MAAM,OAAO,KAAK,EAAE,YAAY;AAEtC,MAAI,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,cAAc,KAAK,IAAI,SAAS,iBAAiB,GAAG;AACjH,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,KAAK,KAAK,IAAI,SAAS,OAAO,KAAK,IAAI,SAAS,YAAY,GAAG;AAC9E,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,SAAS,KAAK,IAAI,SAAS,SAAS,GAAG;AACtD,WAAO;AAAA,EACT;AACA,MAAI,IAAI,SAAS,WAAW,KAAK,IAAI,SAAS,MAAM,GAAG;AACrD,WAAO;AAAA,EACT;AAEA,SAAO;AACT;AAIA,eAAe,QACb,UACA,QACA,OACA,cACA,OACA,SACA,YAAoB,KACiC;AACrD,QAAM,MAAM,cAAc,QAAQ,KAAK;AACvC,QAAM,aAAa,IAAI,gBAAgB;AACvC,QAAM,gBAAgB,WAAW,MAAM,WAAW,MAAM,GAAG,SAAS;AAEpE,MAAI;AACF,QAAI,aAAa,aAAa;AAC5B,YAAMA,OAAM,MAAM,MAAM,KAAK;AAAA,QAC3B,QAAQ;AAAA,QACR,SAAS;AAAA,UACP,gBAAgB;AAAA,UAChB,aAAa;AAAA,UACb,qBAAqB;AAAA,QACvB;AAAA,QACA,MAAM,KAAK,UAAU;AAAA,UACnB;AAAA,UACA,YAAY;AAAA,UACZ,QAAQ;AAAA,UACR,UAAU,CAAC,EAAE,MAAM,QAAQ,SAAS;AAAA,EAAa,OAAO;AAAA;AAAA,YAAiB,KAAK,GAAG,CAAC;AAAA,QACpF,CAAC;AAAA,QACD,QAAQ,WAAW;AAAA,MACrB,CAAC;AAED,UAAI,CAACA,KAAI,IAAI;AACX,cAAM,QAAQ,cAAc,GAAGA,KAAI,MAAM,EAAE;AAC3C,eAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,MAC7B;AAEA,YAAMC,QAAQ,MAAMD,KAAI,KAAK;AAC7B,YAAME,UAASD,MAAK,UAAU,CAAC,GAAG,MAAM,KAAK,KAAK;AAClD,aAAO,EAAE,QAAAC,QAAO;AAAA,IAClB;AAEA,UAAM,MAAM,MAAM,MAAM,KAAK;AAAA,MAC3B,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,gBAAgB;AAAA,QAChB,eAAe,UAAU,MAAM;AAAA,MACjC;AAAA,MACA,MAAM,KAAK,UAAU;AAAA,QACnB;AAAA,QACA,UAAU;AAAA,UACR,EAAE,MAAM,UAAU,SAAS,aAAa;AAAA,UACxC;AAAA,YACE,MAAM;AAAA,YACN,SAAS;AAAA,EAAa,OAAO;AAAA;AAAA,YAAiB,KAAK;AAAA,UACrD;AAAA,QACF;AAAA,QACA,aAAa;AAAA,QACb,YAAY;AAAA,MACd,CAAC;AAAA,MACD,QAAQ,WAAW;AAAA,IACrB,CAAC;AAED,QAAI,CAAC,IAAI,IAAI;AACX,YAAM,QAAQ,cAAc,GAAG,IAAI,MAAM,EAAE;AAC3C,aAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,IAC7B;AAEA,UAAM,OAAQ,MAAM,IAAI,KAAK;AAG7B,UAAM,SAAS,KAAK,UAAU,CAAC,GAAG,SAAS,SAAS,KAAK,KAAK;AAC9D,WAAO,EAAE,OAAO;AAAA,EAClB,SAAS,KAAK;AACZ,UAAM,QAAQ,cAAc,GAAG;AAC/B,WAAO,EAAE,QAAQ,IAAI,MAAM;AAAA,EAC7B,UAAE;AACA,iBAAa,aAAa;AAAA,EAC5B;AACF;AAIA,eAAe,YACb,OACA,aACA,QACmF;AACnF,QAAM;AAAA,IACJ;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,OAAO;AAAA,IACP,WAAW;AAAA,IACX,eAAe;AAAA,IACf,eAAe;AAAA,EACjB,IAAI;AAEJ,QAAM,QAAQ,cAAc,SAAS;AACrC,QAAM,UAAU,OAAO,aAAa,OAAO,MAAM,QAAQ;AAEzD,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO,EAAE,QAAQ,IAAI,YAAY,KAAK;AAAA,EACxC;AAEA,QAAM,UAAU,YAAY,OAAO;AACnC,QAAM,YAAY,MAAM;AAAA,IACtB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF;AAEA,MAAI,CAAC,UAAU,QAAQ;AACrB,WAAO;AAAA,MACL,QAAQ;AAAA,MACR,YAAY;AAAA,MACZ,gBAAgB,UAAU;AAAA,IAC5B;AAAA,EACF;AAEA,SAAO,EAAE,QAAQ,UAAU,QAAQ,YAAY,MAAM;AACvD;AASA,SAAS,UAAU,MAAqF;AACtG,QAAM,QAAQ,KAAK,OAAO,KAAK;AAC/B,MAAI,CAAC,MAAO,QAAO,EAAE,OAAO,gBAAgB;AAE5C,QAAM,MAAM,KAAK;AACjB,MAAI,CAAC,MAAM,QAAQ,GAAG,KAAK,IAAI,WAAW,GAAG;AAC3C,WAAO,EAAE,OAAO,sBAAsB;AAAA,EACxC;AACA,WAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACnC,QAAI,OAAO,IAAI,CAAC,MAAM,YAAY,CAAC,OAAO,SAAS,IAAI,CAAC,CAAC,GAAG;AAC1D,aAAO,EAAE,OAAO,sBAAsB;AAAA,IACxC;AAAA,EACF;AACA,SAAO,EAAE,OAAO,QAAQ,IAAI,aAAa,GAAG,EAAE;AAChD;AAsBO,SAAS,sBAAsB,QAA+B;AACnE,SAAO,eAAe,KAAK,SAAqC;AAC9D,QAAI;AACF,YAAM,OAAQ,MAAM,QAAQ,KAAK;AACjC,YAAM,SAAS,UAAU,IAAI;AAE7B,UAAI,WAAW,QAAQ;AACrB,eAAO,SAAS,KAAK,EAAE,OAAO,OAAO,MAAM,GAAG,EAAE,QAAQ,IAAI,CAAC;AAAA,MAC/D;AAEA,YAAM,SAAS,MAAM,YAAY,OAAO,OAAO,OAAO,QAAQ,MAAM;AACpE,aAAO,SAAS,KAAK,MAAM;AAAA,IAC7B,SAAS,KAAK;AACZ,cAAQ,MAAM,oCAAoC,GAAG;AACrD,aAAO,SAAS,KAAK,EAAE,QAAQ,IAAI,YAAY,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC;AAAA,IACxE;AAAA,EACF;AACF;AA6BO,SAAS,yBAAyB,QAA+B;AACtE,SAAO,eAAe,mBACpB,KACA,KACA,MACe;AACf,QAAI;AACF,YAAM,SAAS,UAAU,IAAI,QAAQ,CAAC,CAAC;AAEvC,UAAI,WAAW,QAAQ;AACrB,YAAI,OAAO,GAAG,EAAE,KAAK,EAAE,OAAO,OAAO,MAAM,CAAC;AAC5C;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,YAAY,OAAO,OAAO,OAAO,QAAQ,MAAM;AACpE,UAAI,KAAK,MAAM;AAAA,IACjB,SAAS,KAAK;AACZ,cAAQ,MAAM,uCAAuC,GAAG;AACxD,WAAK,GAAG;AAAA,IACV;AAAA,EACF;AACF;","names":["res","data","answer"]}
|
package/dist/server/index.d.cts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
type SageDeskModel = 'all-MiniLM-L6-v2' | 'bge-small-en-v1-5' | 'paraphrase-multilingual-MiniLM-L12-v2' | 'all-mpnet-base-v2';
|
|
2
|
-
|
|
3
1
|
interface SageDeskHandlerConfig {
|
|
4
2
|
/** Filesystem path to the pre-built vector index (e.g. "./public/support-index.json"). */
|
|
5
3
|
indexPath: string;
|
|
@@ -9,8 +7,6 @@ interface SageDeskHandlerConfig {
|
|
|
9
7
|
apiKey: string;
|
|
10
8
|
/** LLM model name (e.g. 'deepseek-chat', 'gpt-4o-mini', 'llama3-8b-8192'). */
|
|
11
9
|
model: string;
|
|
12
|
-
/** Embedding model - must match the model used at build time. Defaults to all-MiniLM-L6-v2. */
|
|
13
|
-
embeddingModel?: SageDeskModel;
|
|
14
10
|
/** Number of chunks to retrieve for context. Defaults to 5. */
|
|
15
11
|
topK?: number;
|
|
16
12
|
/** Minimum similarity score for a chunk to be included. Defaults to 0.42. */
|
|
@@ -20,9 +16,18 @@ interface SageDeskHandlerConfig {
|
|
|
20
16
|
/** Timeout for LLM API calls in milliseconds. Defaults to 5000 (5 seconds). */
|
|
21
17
|
llmTimeoutMs?: number;
|
|
22
18
|
}
|
|
19
|
+
interface QueryRequestBody {
|
|
20
|
+
query?: string;
|
|
21
|
+
queryVector?: number[];
|
|
22
|
+
}
|
|
23
23
|
/**
|
|
24
24
|
* Returns a Next.js App Router POST handler.
|
|
25
25
|
*
|
|
26
|
+
* Expects request body: `{ query: string, queryVector: number[] }`. The widget
|
|
27
|
+
* embeds the query in the browser (same WASM model as local mode) and sends
|
|
28
|
+
* both. This keeps the server function tiny and free of native ONNX binaries,
|
|
29
|
+
* so it deploys cleanly on Vercel / Lambda / any serverless runtime.
|
|
30
|
+
*
|
|
26
31
|
* @example
|
|
27
32
|
* // app/api/sagedesk/route.ts
|
|
28
33
|
* import { createSageDeskHandler } from 'sagedesk/server'
|
|
@@ -35,9 +40,7 @@ interface SageDeskHandlerConfig {
|
|
|
35
40
|
*/
|
|
36
41
|
declare function createSageDeskHandler(config: SageDeskHandlerConfig): (request: Request) => Promise<Response>;
|
|
37
42
|
type ExpressRequest = {
|
|
38
|
-
body:
|
|
39
|
-
query?: string;
|
|
40
|
-
};
|
|
43
|
+
body: QueryRequestBody;
|
|
41
44
|
};
|
|
42
45
|
type ExpressResponse = {
|
|
43
46
|
status: (code: number) => ExpressResponse;
|
|
@@ -47,6 +50,9 @@ type NextFunction = (err?: unknown) => void;
|
|
|
47
50
|
/**
|
|
48
51
|
* Returns an Express (or any Connect-compatible) middleware.
|
|
49
52
|
*
|
|
53
|
+
* Expects `req.body` to be `{ query: string, queryVector: number[] }`. See
|
|
54
|
+
* `createSageDeskHandler` for the rationale.
|
|
55
|
+
*
|
|
50
56
|
* @example
|
|
51
57
|
* // server.ts / index.ts
|
|
52
58
|
* import { createSageDeskMiddleware } from 'sagedesk/server'
|
package/dist/server/index.d.ts
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
type SageDeskModel = 'all-MiniLM-L6-v2' | 'bge-small-en-v1-5' | 'paraphrase-multilingual-MiniLM-L12-v2' | 'all-mpnet-base-v2';
|
|
2
|
-
|
|
3
1
|
interface SageDeskHandlerConfig {
|
|
4
2
|
/** Filesystem path to the pre-built vector index (e.g. "./public/support-index.json"). */
|
|
5
3
|
indexPath: string;
|
|
@@ -9,8 +7,6 @@ interface SageDeskHandlerConfig {
|
|
|
9
7
|
apiKey: string;
|
|
10
8
|
/** LLM model name (e.g. 'deepseek-chat', 'gpt-4o-mini', 'llama3-8b-8192'). */
|
|
11
9
|
model: string;
|
|
12
|
-
/** Embedding model - must match the model used at build time. Defaults to all-MiniLM-L6-v2. */
|
|
13
|
-
embeddingModel?: SageDeskModel;
|
|
14
10
|
/** Number of chunks to retrieve for context. Defaults to 5. */
|
|
15
11
|
topK?: number;
|
|
16
12
|
/** Minimum similarity score for a chunk to be included. Defaults to 0.42. */
|
|
@@ -20,9 +16,18 @@ interface SageDeskHandlerConfig {
|
|
|
20
16
|
/** Timeout for LLM API calls in milliseconds. Defaults to 5000 (5 seconds). */
|
|
21
17
|
llmTimeoutMs?: number;
|
|
22
18
|
}
|
|
19
|
+
interface QueryRequestBody {
|
|
20
|
+
query?: string;
|
|
21
|
+
queryVector?: number[];
|
|
22
|
+
}
|
|
23
23
|
/**
|
|
24
24
|
* Returns a Next.js App Router POST handler.
|
|
25
25
|
*
|
|
26
|
+
* Expects request body: `{ query: string, queryVector: number[] }`. The widget
|
|
27
|
+
* embeds the query in the browser (same WASM model as local mode) and sends
|
|
28
|
+
* both. This keeps the server function tiny and free of native ONNX binaries,
|
|
29
|
+
* so it deploys cleanly on Vercel / Lambda / any serverless runtime.
|
|
30
|
+
*
|
|
26
31
|
* @example
|
|
27
32
|
* // app/api/sagedesk/route.ts
|
|
28
33
|
* import { createSageDeskHandler } from 'sagedesk/server'
|
|
@@ -35,9 +40,7 @@ interface SageDeskHandlerConfig {
|
|
|
35
40
|
*/
|
|
36
41
|
declare function createSageDeskHandler(config: SageDeskHandlerConfig): (request: Request) => Promise<Response>;
|
|
37
42
|
type ExpressRequest = {
|
|
38
|
-
body:
|
|
39
|
-
query?: string;
|
|
40
|
-
};
|
|
43
|
+
body: QueryRequestBody;
|
|
41
44
|
};
|
|
42
45
|
type ExpressResponse = {
|
|
43
46
|
status: (code: number) => ExpressResponse;
|
|
@@ -47,6 +50,9 @@ type NextFunction = (err?: unknown) => void;
|
|
|
47
50
|
/**
|
|
48
51
|
* Returns an Express (or any Connect-compatible) middleware.
|
|
49
52
|
*
|
|
53
|
+
* Expects `req.body` to be `{ query: string, queryVector: number[] }`. See
|
|
54
|
+
* `createSageDeskHandler` for the rationale.
|
|
55
|
+
*
|
|
50
56
|
* @example
|
|
51
57
|
* // server.ts / index.ts
|
|
52
58
|
* import { createSageDeskMiddleware } from 'sagedesk/server'
|
package/dist/server/index.js
CHANGED
|
@@ -1,96 +1,6 @@
|
|
|
1
1
|
// src/server/index.ts
|
|
2
2
|
import { readFileSync } from "fs";
|
|
3
3
|
|
|
4
|
-
// src/core/server-embedder.ts
|
|
5
|
-
var HF_MODEL_IDS = {
|
|
6
|
-
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
|
7
|
-
"bge-small-en-v1-5": "BAAI/bge-small-en-v1.5",
|
|
8
|
-
"paraphrase-multilingual-MiniLM-L12-v2": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
|
9
|
-
"all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2"
|
|
10
|
-
};
|
|
11
|
-
var ServerEmbedder = class _ServerEmbedder {
|
|
12
|
-
constructor() {
|
|
13
|
-
this._ready = false;
|
|
14
|
-
this._failed = false;
|
|
15
|
-
this._model = "all-MiniLM-L6-v2";
|
|
16
|
-
}
|
|
17
|
-
static {
|
|
18
|
-
// Module-level singleton cache — survives across Lambda/Vercel warm invocations
|
|
19
|
-
this._pipelineCache = /* @__PURE__ */ new Map();
|
|
20
|
-
}
|
|
21
|
-
static {
|
|
22
|
-
this._loadingPromises = /* @__PURE__ */ new Map();
|
|
23
|
-
}
|
|
24
|
-
async load(model = "all-MiniLM-L6-v2") {
|
|
25
|
-
if (this._ready) return;
|
|
26
|
-
if (this._failed) throw new Error("ServerEmbedder previously failed to load");
|
|
27
|
-
this._model = model;
|
|
28
|
-
if (_ServerEmbedder._pipelineCache.has(model)) {
|
|
29
|
-
this._ready = true;
|
|
30
|
-
return;
|
|
31
|
-
}
|
|
32
|
-
if (_ServerEmbedder._loadingPromises.has(model)) {
|
|
33
|
-
await _ServerEmbedder._loadingPromises.get(model);
|
|
34
|
-
this._ready = true;
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
const modelId = HF_MODEL_IDS[model];
|
|
38
|
-
const loadPromise = this._loadModel(model, modelId);
|
|
39
|
-
_ServerEmbedder._loadingPromises.set(model, loadPromise);
|
|
40
|
-
try {
|
|
41
|
-
await loadPromise;
|
|
42
|
-
this._ready = true;
|
|
43
|
-
} catch (err) {
|
|
44
|
-
this._failed = true;
|
|
45
|
-
_ServerEmbedder._loadingPromises.delete(model);
|
|
46
|
-
throw err;
|
|
47
|
-
} finally {
|
|
48
|
-
_ServerEmbedder._loadingPromises.delete(model);
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
async _loadModel(model, modelId) {
|
|
52
|
-
try {
|
|
53
|
-
const { pipeline } = await import("@huggingface/transformers");
|
|
54
|
-
const pipelineInstance = await pipeline("feature-extraction", modelId, {
|
|
55
|
-
dtype: "q8",
|
|
56
|
-
device: "wasm"
|
|
57
|
-
});
|
|
58
|
-
_ServerEmbedder._pipelineCache.set(model, pipelineInstance);
|
|
59
|
-
} catch (err) {
|
|
60
|
-
throw new Error(`Failed to load embedding model ${modelId}: ${String(err)}`);
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
async embed(text) {
|
|
64
|
-
if (!this._ready) {
|
|
65
|
-
await this.load(this._model);
|
|
66
|
-
}
|
|
67
|
-
const pipelineInstance = _ServerEmbedder._pipelineCache.get(this._model);
|
|
68
|
-
if (!pipelineInstance) {
|
|
69
|
-
throw new Error(`Embedding model ${this._model} not loaded`);
|
|
70
|
-
}
|
|
71
|
-
try {
|
|
72
|
-
const output = await pipelineInstance(text, {
|
|
73
|
-
pooling: "mean",
|
|
74
|
-
normalize: true
|
|
75
|
-
});
|
|
76
|
-
return output.data;
|
|
77
|
-
} catch (err) {
|
|
78
|
-
throw new Error(`Embedding failed: ${String(err)}`);
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
get isReady() {
|
|
82
|
-
return this._ready;
|
|
83
|
-
}
|
|
84
|
-
get hasFailed() {
|
|
85
|
-
return this._failed;
|
|
86
|
-
}
|
|
87
|
-
/** @internal - Reset for testing */
|
|
88
|
-
static _reset() {
|
|
89
|
-
_ServerEmbedder._pipelineCache.clear();
|
|
90
|
-
_ServerEmbedder._loadingPromises.clear();
|
|
91
|
-
}
|
|
92
|
-
};
|
|
93
|
-
|
|
94
4
|
// src/core/search.ts
|
|
95
5
|
function dotProduct(a, b) {
|
|
96
6
|
if (a.length !== b.length) {
|
|
@@ -156,7 +66,6 @@ var PROVIDER_URLS = {
|
|
|
156
66
|
};
|
|
157
67
|
var DEFAULT_SYSTEM_PROMPT = "You are a helpful support assistant. Answer the user's question based ONLY on the provided context. If the context does not contain a confident answer, respond with a friendly message saying you don't have that information right now. Do not make up information or draw from outside knowledge. Be concise, warm, and helpful.";
|
|
158
68
|
var indexCache = /* @__PURE__ */ new Map();
|
|
159
|
-
var embedderCache = /* @__PURE__ */ new Map();
|
|
160
69
|
function loadIndexFile(indexPath) {
|
|
161
70
|
if (indexCache.has(indexPath)) return indexCache.get(indexPath);
|
|
162
71
|
const raw = readFileSync(indexPath, "utf-8");
|
|
@@ -171,13 +80,6 @@ function loadIndexFile(indexPath) {
|
|
|
171
80
|
indexCache.set(indexPath, chunks);
|
|
172
81
|
return chunks;
|
|
173
82
|
}
|
|
174
|
-
async function getEmbedder(model = "all-MiniLM-L6-v2") {
|
|
175
|
-
if (embedderCache.has(model)) return embedderCache.get(model);
|
|
176
|
-
const embedder = new ServerEmbedder();
|
|
177
|
-
await embedder.load(model);
|
|
178
|
-
embedderCache.set(model, embedder);
|
|
179
|
-
return embedder;
|
|
180
|
-
}
|
|
181
83
|
function classifyError(error) {
|
|
182
84
|
const msg = String(error).toLowerCase();
|
|
183
85
|
if (msg.includes("401") || msg.includes("403") || msg.includes("unauthorized") || msg.includes("invalid api key")) {
|
|
@@ -263,21 +165,18 @@ Question: ${query}`
|
|
|
263
165
|
clearTimeout(timeoutHandle);
|
|
264
166
|
}
|
|
265
167
|
}
|
|
266
|
-
async function handleQuery(query, config) {
|
|
168
|
+
async function handleQuery(query, queryVector, config) {
|
|
267
169
|
const {
|
|
268
170
|
indexPath,
|
|
269
171
|
provider,
|
|
270
172
|
apiKey,
|
|
271
173
|
model,
|
|
272
|
-
embeddingModel,
|
|
273
174
|
topK = 5,
|
|
274
175
|
minScore = 0.42,
|
|
275
176
|
systemPrompt = DEFAULT_SYSTEM_PROMPT,
|
|
276
177
|
llmTimeoutMs = 5e3
|
|
277
178
|
} = config;
|
|
278
179
|
const index = loadIndexFile(indexPath);
|
|
279
|
-
const embedder = await getEmbedder(embeddingModel);
|
|
280
|
-
const queryVector = await embedder.embed(query);
|
|
281
180
|
const results = search(queryVector, index, topK, minScore);
|
|
282
181
|
if (results.length === 0) {
|
|
283
182
|
return { answer: "", isFallback: true };
|
|
@@ -301,15 +200,29 @@ async function handleQuery(query, config) {
|
|
|
301
200
|
}
|
|
302
201
|
return { answer: llmResult.answer, isFallback: false };
|
|
303
202
|
}
|
|
203
|
+
function parseBody(body) {
|
|
204
|
+
const query = body.query?.trim();
|
|
205
|
+
if (!query) return { error: "Missing query" };
|
|
206
|
+
const raw = body.queryVector;
|
|
207
|
+
if (!Array.isArray(raw) || raw.length === 0) {
|
|
208
|
+
return { error: "Missing queryVector" };
|
|
209
|
+
}
|
|
210
|
+
for (let i = 0; i < raw.length; i++) {
|
|
211
|
+
if (typeof raw[i] !== "number" || !Number.isFinite(raw[i])) {
|
|
212
|
+
return { error: "Invalid queryVector" };
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return { query, vector: new Float32Array(raw) };
|
|
216
|
+
}
|
|
304
217
|
function createSageDeskHandler(config) {
|
|
305
218
|
return async function POST(request) {
|
|
306
219
|
try {
|
|
307
220
|
const body = await request.json();
|
|
308
|
-
const
|
|
309
|
-
if (
|
|
310
|
-
return Response.json({ error:
|
|
221
|
+
const parsed = parseBody(body);
|
|
222
|
+
if ("error" in parsed) {
|
|
223
|
+
return Response.json({ error: parsed.error }, { status: 400 });
|
|
311
224
|
}
|
|
312
|
-
const result = await handleQuery(query, config);
|
|
225
|
+
const result = await handleQuery(parsed.query, parsed.vector, config);
|
|
313
226
|
return Response.json(result);
|
|
314
227
|
} catch (err) {
|
|
315
228
|
console.error("[sagedesk/server] Handler error:", err);
|
|
@@ -320,12 +233,12 @@ function createSageDeskHandler(config) {
|
|
|
320
233
|
function createSageDeskMiddleware(config) {
|
|
321
234
|
return async function sageDeskMiddleware(req, res, next) {
|
|
322
235
|
try {
|
|
323
|
-
const
|
|
324
|
-
if (
|
|
325
|
-
res.status(400).json({ error:
|
|
236
|
+
const parsed = parseBody(req.body ?? {});
|
|
237
|
+
if ("error" in parsed) {
|
|
238
|
+
res.status(400).json({ error: parsed.error });
|
|
326
239
|
return;
|
|
327
240
|
}
|
|
328
|
-
const result = await handleQuery(query, config);
|
|
241
|
+
const result = await handleQuery(parsed.query, parsed.vector, config);
|
|
329
242
|
res.json(result);
|
|
330
243
|
} catch (err) {
|
|
331
244
|
console.error("[sagedesk/server] Middleware error:", err);
|