@pixygon/chatbot-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +107 -0
- package/dist/index.d.ts +186 -0
- package/dist/index.js +1169 -0
- package/package.json +37 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,1169 @@
|
|
|
1
|
+
// src/models.ts
|
|
2
|
+
function createChatbotModels(mongoose, opts) {
|
|
3
|
+
const existing = {
|
|
4
|
+
KnowledgeDocument: mongoose.models.KnowledgeDocument,
|
|
5
|
+
KnowledgeChunk: mongoose.models.KnowledgeChunk,
|
|
6
|
+
ChatConversation: mongoose.models.ChatConversation
|
|
7
|
+
};
|
|
8
|
+
if (existing.KnowledgeDocument && existing.KnowledgeChunk && existing.ChatConversation) {
|
|
9
|
+
return existing;
|
|
10
|
+
}
|
|
11
|
+
const { Schema } = mongoose;
|
|
12
|
+
const { tenantField, tenantRefName, plugins = [] } = opts;
|
|
13
|
+
const docSchema = new Schema(
|
|
14
|
+
{
|
|
15
|
+
[tenantField]: { type: Schema.Types.ObjectId, ref: tenantRefName, required: true, index: true },
|
|
16
|
+
title: { type: String, required: true, trim: true },
|
|
17
|
+
source: { type: String, default: "" },
|
|
18
|
+
sourceType: { type: String, enum: ["text", "url", "file"], default: "text" },
|
|
19
|
+
content: { type: String, required: true },
|
|
20
|
+
status: { type: String, enum: ["pending", "processing", "ready", "failed"], default: "pending", index: true },
|
|
21
|
+
chunkCount: { type: Number, default: 0 },
|
|
22
|
+
processedAt: { type: Date },
|
|
23
|
+
lastError: { type: String },
|
|
24
|
+
tags: { type: [String], default: [] },
|
|
25
|
+
createdBy: { type: Schema.Types.ObjectId, ref: "User" }
|
|
26
|
+
},
|
|
27
|
+
{ timestamps: true }
|
|
28
|
+
);
|
|
29
|
+
docSchema.index({ [tenantField]: 1, status: 1, updatedAt: -1 });
|
|
30
|
+
for (const plugin of plugins) plugin(docSchema, "KnowledgeDocument");
|
|
31
|
+
const chunkSchema = new Schema(
|
|
32
|
+
{
|
|
33
|
+
[tenantField]: { type: Schema.Types.ObjectId, ref: tenantRefName, required: true, index: true },
|
|
34
|
+
documentId: { type: Schema.Types.ObjectId, ref: "KnowledgeDocument", required: true, index: true },
|
|
35
|
+
text: { type: String, required: true },
|
|
36
|
+
embedding: { type: [Number], required: true },
|
|
37
|
+
position: { type: Number, required: true },
|
|
38
|
+
tokens: { type: Number, default: 0 }
|
|
39
|
+
},
|
|
40
|
+
{ timestamps: true }
|
|
41
|
+
);
|
|
42
|
+
chunkSchema.index({ [tenantField]: 1, documentId: 1, position: 1 });
|
|
43
|
+
for (const plugin of plugins) plugin(chunkSchema, "KnowledgeChunk");
|
|
44
|
+
const messageSchema = new Schema(
|
|
45
|
+
{
|
|
46
|
+
role: { type: String, enum: ["user", "assistant", "system"], required: true },
|
|
47
|
+
content: { type: String, required: true },
|
|
48
|
+
citedChunkIds: [{ type: Schema.Types.ObjectId, ref: "KnowledgeChunk" }],
|
|
49
|
+
tokensInput: { type: Number, default: 0 },
|
|
50
|
+
tokensOutput: { type: Number, default: 0 },
|
|
51
|
+
costUsd: { type: Number, default: 0 },
|
|
52
|
+
model: { type: String },
|
|
53
|
+
rating: { type: Number, min: -1, max: 1 },
|
|
54
|
+
retrievalSimilarity: { type: Number, min: 0, max: 1 },
|
|
55
|
+
retrievedCount: { type: Number, default: 0 },
|
|
56
|
+
// Cached user-turn embedding — powers semantic clustering without
|
|
57
|
+
// re-embedding the whole corpus per analytics call. `select:false`
|
|
58
|
+
// keeps the 1536-float array out of default queries.
|
|
59
|
+
embedding: { type: [Number], select: false },
|
|
60
|
+
createdAt: { type: Date, default: Date.now }
|
|
61
|
+
},
|
|
62
|
+
{ _id: true }
|
|
63
|
+
);
|
|
64
|
+
const conversationSchema = new Schema(
|
|
65
|
+
{
|
|
66
|
+
[tenantField]: { type: Schema.Types.ObjectId, ref: tenantRefName, required: true, index: true },
|
|
67
|
+
sessionId: { type: String, required: true, index: true },
|
|
68
|
+
userId: { type: Schema.Types.ObjectId, ref: "User", index: true, sparse: true },
|
|
69
|
+
title: { type: String, default: "" },
|
|
70
|
+
messages: { type: [messageSchema], default: () => [] },
|
|
71
|
+
totalTokensInput: { type: Number, default: 0 },
|
|
72
|
+
totalTokensOutput: { type: Number, default: 0 },
|
|
73
|
+
totalCostUsd: { type: Number, default: 0 },
|
|
74
|
+
lastMessageAt: { type: Date, default: Date.now, index: true }
|
|
75
|
+
},
|
|
76
|
+
{ timestamps: true }
|
|
77
|
+
);
|
|
78
|
+
conversationSchema.index({ [tenantField]: 1, sessionId: 1, lastMessageAt: -1 });
|
|
79
|
+
conversationSchema.index({ [tenantField]: 1, lastMessageAt: -1 });
|
|
80
|
+
for (const plugin of plugins) plugin(conversationSchema, "ChatConversation");
|
|
81
|
+
return {
|
|
82
|
+
KnowledgeDocument: mongoose.model("KnowledgeDocument", docSchema),
|
|
83
|
+
KnowledgeChunk: mongoose.model("KnowledgeChunk", chunkSchema),
|
|
84
|
+
ChatConversation: mongoose.model("ChatConversation", conversationSchema)
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// src/aiGateway.ts
|
|
89
|
+
var approxTokens = (text) => Math.ceil((text || "").length / 4);
|
|
90
|
+
function createAiClient(cfg) {
|
|
91
|
+
const PIXYGON_API_URL = cfg.pixygonApiUrl || "https://api.pixygon.com/v1";
|
|
92
|
+
const OPENAI_API_URL = cfg.openaiApiUrl || "https://api.openai.com/v1";
|
|
93
|
+
const PIXYGON_AI_URL = `${PIXYGON_API_URL}/ai/api`;
|
|
94
|
+
const CHAT_IN = cfg.chatInputUsdPer1K ?? 25e-4;
|
|
95
|
+
const CHAT_OUT = cfg.chatOutputUsdPer1K ?? 0.01;
|
|
96
|
+
const EMBED_RATE = cfg.embedUsdPer1K ?? 2e-5;
|
|
97
|
+
return {
|
|
98
|
+
async chat({ messages, system, model = "openai", version = "gpt-4o" }) {
|
|
99
|
+
if (!cfg.pixygonApiKey) {
|
|
100
|
+
const err = new Error("PIXYGON_API_KEY not set");
|
|
101
|
+
err.code = "PIXYGON_AI_UNCONFIGURED";
|
|
102
|
+
throw err;
|
|
103
|
+
}
|
|
104
|
+
const prompt = messages.map((m) => `${m.role.toUpperCase()}: ${m.content}`).join("\n\n");
|
|
105
|
+
const formData = new FormData();
|
|
106
|
+
formData.append("type", "text");
|
|
107
|
+
formData.append("model", model);
|
|
108
|
+
formData.append("version", version);
|
|
109
|
+
formData.append("prompt", prompt);
|
|
110
|
+
if (system) formData.append("instructions", system);
|
|
111
|
+
const res = await fetch(PIXYGON_AI_URL, {
|
|
112
|
+
method: "POST",
|
|
113
|
+
headers: { "x-api-key": cfg.pixygonApiKey },
|
|
114
|
+
body: formData
|
|
115
|
+
});
|
|
116
|
+
if (!res.ok) {
|
|
117
|
+
const body = await res.text().catch(() => "");
|
|
118
|
+
const err = new Error(`Pixygon AI chat failed: ${res.status} \u2014 ${body.slice(0, 300)}`);
|
|
119
|
+
err.code = "PIXYGON_AI_CHAT_FAILED";
|
|
120
|
+
err.status = res.status;
|
|
121
|
+
throw err;
|
|
122
|
+
}
|
|
123
|
+
const raw = await res.text();
|
|
124
|
+
let payload;
|
|
125
|
+
try {
|
|
126
|
+
payload = JSON.parse(raw);
|
|
127
|
+
} catch {
|
|
128
|
+
payload = { text: raw };
|
|
129
|
+
}
|
|
130
|
+
const content = String(payload?.text || payload?.response || payload?.message || raw).trim();
|
|
131
|
+
const tokensInput = approxTokens(prompt + (system || ""));
|
|
132
|
+
const tokensOutput = approxTokens(content);
|
|
133
|
+
const costUsd = tokensInput / 1e3 * CHAT_IN + tokensOutput / 1e3 * CHAT_OUT;
|
|
134
|
+
return { content, model: `${model}/${version}`, tokensInput, tokensOutput, costUsd };
|
|
135
|
+
},
|
|
136
|
+
async embed(text, opts = {}) {
|
|
137
|
+
if (!cfg.openaiApiKey) {
|
|
138
|
+
const err = new Error("OPENAI_API_KEY not set");
|
|
139
|
+
err.code = "OPENAI_UNCONFIGURED";
|
|
140
|
+
throw err;
|
|
141
|
+
}
|
|
142
|
+
const model = opts.model || "text-embedding-3-small";
|
|
143
|
+
const res = await fetch(`${OPENAI_API_URL}/embeddings`, {
|
|
144
|
+
method: "POST",
|
|
145
|
+
headers: {
|
|
146
|
+
Authorization: `Bearer ${cfg.openaiApiKey}`,
|
|
147
|
+
"Content-Type": "application/json"
|
|
148
|
+
},
|
|
149
|
+
body: JSON.stringify({ input: text, model })
|
|
150
|
+
});
|
|
151
|
+
if (!res.ok) {
|
|
152
|
+
const body = await res.text().catch(() => "");
|
|
153
|
+
const err = new Error(`OpenAI embed failed: ${res.status} \u2014 ${body.slice(0, 300)}`);
|
|
154
|
+
err.code = "OPENAI_EMBED_FAILED";
|
|
155
|
+
err.status = res.status;
|
|
156
|
+
throw err;
|
|
157
|
+
}
|
|
158
|
+
const payload = await res.json();
|
|
159
|
+
const embedding = payload?.data?.[0]?.embedding;
|
|
160
|
+
if (!Array.isArray(embedding) || embedding.length === 0) {
|
|
161
|
+
const err = new Error("OpenAI embed response had no vector");
|
|
162
|
+
err.code = "OPENAI_EMBED_EMPTY";
|
|
163
|
+
throw err;
|
|
164
|
+
}
|
|
165
|
+
const tokens = Number(payload?.usage?.total_tokens ?? approxTokens(text));
|
|
166
|
+
const costUsd = tokens / 1e3 * EMBED_RATE;
|
|
167
|
+
return { embedding, tokens, costUsd, dimensions: embedding.length };
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
function cosineSimilarity(a, b) {
|
|
172
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
173
|
+
let dot = 0, ma = 0, mb = 0;
|
|
174
|
+
for (let i = 0; i < a.length; i++) {
|
|
175
|
+
dot += a[i] * b[i];
|
|
176
|
+
ma += a[i] * a[i];
|
|
177
|
+
mb += b[i] * b[i];
|
|
178
|
+
}
|
|
179
|
+
if (ma === 0 || mb === 0) return 0;
|
|
180
|
+
return dot / (Math.sqrt(ma) * Math.sqrt(mb));
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// src/textChunking.ts
|
|
184
|
+
var TARGET_CHARS = 2e3;
|
|
185
|
+
var OVERLAP_CHARS = 400;
|
|
186
|
+
function chunkText(input) {
|
|
187
|
+
const normalized = (input || "").replace(/\r\n/g, "\n").trim();
|
|
188
|
+
if (!normalized) return [];
|
|
189
|
+
const paragraphs = normalized.split(/\n\s*\n/).map((p) => p.trim()).filter(Boolean);
|
|
190
|
+
const chunks = [];
|
|
191
|
+
let buffer = "";
|
|
192
|
+
let position = 0;
|
|
193
|
+
const flush = () => {
|
|
194
|
+
if (!buffer.trim()) return;
|
|
195
|
+
chunks.push({ text: buffer.trim(), position: position++ });
|
|
196
|
+
buffer = buffer.slice(-OVERLAP_CHARS);
|
|
197
|
+
};
|
|
198
|
+
for (const p of paragraphs) {
|
|
199
|
+
if (p.length > TARGET_CHARS) {
|
|
200
|
+
flush();
|
|
201
|
+
let cursor = 0;
|
|
202
|
+
while (cursor < p.length) {
|
|
203
|
+
chunks.push({ text: p.slice(cursor, cursor + TARGET_CHARS), position: position++ });
|
|
204
|
+
cursor += TARGET_CHARS - OVERLAP_CHARS;
|
|
205
|
+
}
|
|
206
|
+
buffer = "";
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
if (buffer.length + p.length + 2 > TARGET_CHARS) flush();
|
|
210
|
+
buffer += (buffer ? "\n\n" : "") + p;
|
|
211
|
+
}
|
|
212
|
+
flush();
|
|
213
|
+
return chunks;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// src/rag.ts
|
|
217
|
+
var TOP_K = 5;
|
|
218
|
+
var MIN_SIMILARITY = 0.3;
|
|
219
|
+
var MAX_HISTORY = 10;
|
|
220
|
+
var defaultIdentity = (_id, fn) => fn();
|
|
221
|
+
function createRag(chatbot) {
|
|
222
|
+
const { models, ai, config } = chatbot;
|
|
223
|
+
const { KnowledgeChunk, KnowledgeDocument, ChatConversation } = models;
|
|
224
|
+
const tenantField = config.tenantField;
|
|
225
|
+
const withScope = config.hooks.withTenantScope || defaultIdentity;
|
|
226
|
+
async function respond({ tenantId, sessionId, userId, message }) {
|
|
227
|
+
if (config.hooks.getCostCap) {
|
|
228
|
+
const cap = await config.hooks.getCostCap(tenantId);
|
|
229
|
+
if (cap != null && cap > 0) {
|
|
230
|
+
const used = await currentMonthCost(tenantId);
|
|
231
|
+
if (used >= cap) {
|
|
232
|
+
const err = new Error(`Monthly chat budget reached ($${used.toFixed(4)} of $${cap.toFixed(2)}).`);
|
|
233
|
+
err.status = 503;
|
|
234
|
+
err.code = "CHAT_BUDGET_EXCEEDED";
|
|
235
|
+
throw err;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
const queryEmbed = await ai.embed(message);
|
|
240
|
+
const chunks = await KnowledgeChunk.find({ [tenantField]: tenantId }).select("documentId text embedding").lean();
|
|
241
|
+
const scored = chunks.map((c) => ({ chunk: c, score: cosineSimilarity(queryEmbed.embedding, c.embedding) })).filter(({ score }) => score >= MIN_SIMILARITY).sort((a, b) => b.score - a.score).slice(0, TOP_K);
|
|
242
|
+
const docIds = Array.from(new Set(scored.map((s) => String(s.chunk.documentId))));
|
|
243
|
+
const docs = await KnowledgeDocument.find({ _id: { $in: docIds } }).select("title").lean();
|
|
244
|
+
const titleById = new Map(docs.map((d) => [String(d._id), d.title]));
|
|
245
|
+
const citations = scored.map(({ chunk, score }) => ({
|
|
246
|
+
chunkId: chunk._id,
|
|
247
|
+
documentId: chunk.documentId,
|
|
248
|
+
documentTitle: titleById.get(String(chunk.documentId)) || "Untitled",
|
|
249
|
+
snippet: chunk.text.slice(0, 200) + (chunk.text.length > 200 ? "\u2026" : ""),
|
|
250
|
+
similarity: score
|
|
251
|
+
}));
|
|
252
|
+
let conversation = await ChatConversation.findOne({ [tenantField]: tenantId, sessionId });
|
|
253
|
+
if (!conversation) {
|
|
254
|
+
conversation = await ChatConversation.create({ [tenantField]: tenantId, sessionId, userId, messages: [] });
|
|
255
|
+
}
|
|
256
|
+
const historyMessages = conversation.messages.slice(-MAX_HISTORY).map((m) => ({ role: m.role, content: m.content }));
|
|
257
|
+
const tenantName = await config.hooks.getTenantName(tenantId) || "the assistant";
|
|
258
|
+
const system = config.hooks.systemPromptBuilder(tenantName, scored.map(({ chunk }) => chunk.text));
|
|
259
|
+
const completion = await ai.chat({
|
|
260
|
+
system,
|
|
261
|
+
messages: [...historyMessages, { role: "user", content: message }]
|
|
262
|
+
});
|
|
263
|
+
conversation.messages.push({
|
|
264
|
+
role: "user",
|
|
265
|
+
content: message,
|
|
266
|
+
tokensInput: queryEmbed.tokens,
|
|
267
|
+
tokensOutput: 0,
|
|
268
|
+
costUsd: queryEmbed.costUsd,
|
|
269
|
+
embedding: queryEmbed.embedding
|
|
270
|
+
});
|
|
271
|
+
conversation.messages.push({
|
|
272
|
+
role: "assistant",
|
|
273
|
+
content: completion.content,
|
|
274
|
+
citedChunkIds: citations.map((c) => c.chunkId),
|
|
275
|
+
tokensInput: completion.tokensInput,
|
|
276
|
+
tokensOutput: completion.tokensOutput,
|
|
277
|
+
costUsd: completion.costUsd,
|
|
278
|
+
model: completion.model,
|
|
279
|
+
retrievalSimilarity: scored[0]?.score || 0,
|
|
280
|
+
retrievedCount: scored.length
|
|
281
|
+
});
|
|
282
|
+
conversation.totalTokensInput += completion.tokensInput + queryEmbed.tokens;
|
|
283
|
+
conversation.totalTokensOutput += completion.tokensOutput;
|
|
284
|
+
conversation.totalCostUsd += completion.costUsd + queryEmbed.costUsd;
|
|
285
|
+
conversation.lastMessageAt = /* @__PURE__ */ new Date();
|
|
286
|
+
if (!conversation.title && message) conversation.title = message.slice(0, 80);
|
|
287
|
+
await conversation.save();
|
|
288
|
+
return {
|
|
289
|
+
conversationId: conversation._id,
|
|
290
|
+
assistantContent: completion.content,
|
|
291
|
+
citations,
|
|
292
|
+
costUsd: completion.costUsd + queryEmbed.costUsd
|
|
293
|
+
};
|
|
294
|
+
}
|
|
295
|
+
async function processDocument(documentId) {
|
|
296
|
+
const doc = await KnowledgeDocument.findById(documentId);
|
|
297
|
+
if (!doc) return;
|
|
298
|
+
doc.status = "processing";
|
|
299
|
+
await doc.save();
|
|
300
|
+
try {
|
|
301
|
+
await KnowledgeChunk.deleteMany({ documentId: doc._id, [tenantField]: doc[tenantField] });
|
|
302
|
+
const chunks = chunkText(doc.content);
|
|
303
|
+
for (const c of chunks) {
|
|
304
|
+
const { embedding, tokens } = await ai.embed(c.text);
|
|
305
|
+
await KnowledgeChunk.create({
|
|
306
|
+
[tenantField]: doc[tenantField],
|
|
307
|
+
documentId: doc._id,
|
|
308
|
+
text: c.text,
|
|
309
|
+
embedding,
|
|
310
|
+
position: c.position,
|
|
311
|
+
tokens
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
doc.chunkCount = chunks.length;
|
|
315
|
+
doc.status = "ready";
|
|
316
|
+
doc.processedAt = /* @__PURE__ */ new Date();
|
|
317
|
+
doc.lastError = void 0;
|
|
318
|
+
await doc.save();
|
|
319
|
+
} catch (err) {
|
|
320
|
+
doc.status = "failed";
|
|
321
|
+
doc.lastError = err?.message || String(err);
|
|
322
|
+
await doc.save();
|
|
323
|
+
throw err;
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
async function currentMonthCost(tenantId, now = /* @__PURE__ */ new Date()) {
|
|
327
|
+
const monthStart = new Date(now.getFullYear(), now.getMonth(), 1);
|
|
328
|
+
const conversations = await ChatConversation.find({
|
|
329
|
+
[tenantField]: tenantId,
|
|
330
|
+
lastMessageAt: { $gte: monthStart }
|
|
331
|
+
}).select("messages").lean();
|
|
332
|
+
let total = 0;
|
|
333
|
+
for (const c of conversations) {
|
|
334
|
+
for (const m of c.messages || []) {
|
|
335
|
+
const at = m.createdAt instanceof Date ? m.createdAt : new Date(m.createdAt);
|
|
336
|
+
if (at >= monthStart) total += m.costUsd || 0;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
return total;
|
|
340
|
+
}
|
|
341
|
+
return { respond, processDocument, currentMonthCost };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// src/analytics.ts
|
|
345
|
+
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
346
|
+
// English
|
|
347
|
+
"the",
|
|
348
|
+
"a",
|
|
349
|
+
"an",
|
|
350
|
+
"is",
|
|
351
|
+
"are",
|
|
352
|
+
"was",
|
|
353
|
+
"were",
|
|
354
|
+
"be",
|
|
355
|
+
"been",
|
|
356
|
+
"being",
|
|
357
|
+
"i",
|
|
358
|
+
"you",
|
|
359
|
+
"he",
|
|
360
|
+
"she",
|
|
361
|
+
"it",
|
|
362
|
+
"we",
|
|
363
|
+
"they",
|
|
364
|
+
"me",
|
|
365
|
+
"him",
|
|
366
|
+
"her",
|
|
367
|
+
"us",
|
|
368
|
+
"them",
|
|
369
|
+
"this",
|
|
370
|
+
"that",
|
|
371
|
+
"these",
|
|
372
|
+
"those",
|
|
373
|
+
"what",
|
|
374
|
+
"which",
|
|
375
|
+
"who",
|
|
376
|
+
"when",
|
|
377
|
+
"where",
|
|
378
|
+
"why",
|
|
379
|
+
"how",
|
|
380
|
+
"and",
|
|
381
|
+
"but",
|
|
382
|
+
"or",
|
|
383
|
+
"if",
|
|
384
|
+
"then",
|
|
385
|
+
"else",
|
|
386
|
+
"for",
|
|
387
|
+
"to",
|
|
388
|
+
"of",
|
|
389
|
+
"in",
|
|
390
|
+
"on",
|
|
391
|
+
"at",
|
|
392
|
+
"by",
|
|
393
|
+
"with",
|
|
394
|
+
"from",
|
|
395
|
+
"as",
|
|
396
|
+
"do",
|
|
397
|
+
"does",
|
|
398
|
+
"did",
|
|
399
|
+
"have",
|
|
400
|
+
"has",
|
|
401
|
+
"had",
|
|
402
|
+
"can",
|
|
403
|
+
"could",
|
|
404
|
+
"would",
|
|
405
|
+
"should",
|
|
406
|
+
"will",
|
|
407
|
+
"shall",
|
|
408
|
+
"may",
|
|
409
|
+
"might",
|
|
410
|
+
"must",
|
|
411
|
+
"not",
|
|
412
|
+
"no",
|
|
413
|
+
"yes",
|
|
414
|
+
"my",
|
|
415
|
+
"your",
|
|
416
|
+
"his",
|
|
417
|
+
"their",
|
|
418
|
+
"our",
|
|
419
|
+
"its",
|
|
420
|
+
"so",
|
|
421
|
+
"than",
|
|
422
|
+
"too",
|
|
423
|
+
"very",
|
|
424
|
+
"just",
|
|
425
|
+
"any",
|
|
426
|
+
"some",
|
|
427
|
+
"all",
|
|
428
|
+
"also",
|
|
429
|
+
"more",
|
|
430
|
+
"most",
|
|
431
|
+
// Norwegian
|
|
432
|
+
"og",
|
|
433
|
+
"i",
|
|
434
|
+
"jeg",
|
|
435
|
+
"det",
|
|
436
|
+
"at",
|
|
437
|
+
"en",
|
|
438
|
+
"et",
|
|
439
|
+
"den",
|
|
440
|
+
"til",
|
|
441
|
+
"er",
|
|
442
|
+
"som",
|
|
443
|
+
"p\xE5",
|
|
444
|
+
"de",
|
|
445
|
+
"med",
|
|
446
|
+
"han",
|
|
447
|
+
"av",
|
|
448
|
+
"ikke",
|
|
449
|
+
"der",
|
|
450
|
+
"s\xE5",
|
|
451
|
+
"var",
|
|
452
|
+
"meg",
|
|
453
|
+
"seg",
|
|
454
|
+
"men",
|
|
455
|
+
"har",
|
|
456
|
+
"om",
|
|
457
|
+
"vi",
|
|
458
|
+
"ha",
|
|
459
|
+
"hadde",
|
|
460
|
+
"hun",
|
|
461
|
+
"n\xE5",
|
|
462
|
+
"over",
|
|
463
|
+
"da",
|
|
464
|
+
"ved",
|
|
465
|
+
"fra",
|
|
466
|
+
"du",
|
|
467
|
+
"ut",
|
|
468
|
+
"sin",
|
|
469
|
+
"dem",
|
|
470
|
+
"oss",
|
|
471
|
+
"opp",
|
|
472
|
+
"man",
|
|
473
|
+
"kan",
|
|
474
|
+
"hans",
|
|
475
|
+
"hvor",
|
|
476
|
+
"eller",
|
|
477
|
+
"hva",
|
|
478
|
+
"skal",
|
|
479
|
+
"selv",
|
|
480
|
+
"her",
|
|
481
|
+
"alle",
|
|
482
|
+
"vil",
|
|
483
|
+
"bli",
|
|
484
|
+
"ble",
|
|
485
|
+
"kunne",
|
|
486
|
+
"inn",
|
|
487
|
+
"n\xE5r",
|
|
488
|
+
"v\xE6re",
|
|
489
|
+
"kom",
|
|
490
|
+
"noen",
|
|
491
|
+
"noe",
|
|
492
|
+
"ville",
|
|
493
|
+
"dere",
|
|
494
|
+
"kun",
|
|
495
|
+
"ja",
|
|
496
|
+
"etter",
|
|
497
|
+
"ned",
|
|
498
|
+
"skulle",
|
|
499
|
+
"denne",
|
|
500
|
+
"deg",
|
|
501
|
+
"si",
|
|
502
|
+
"sine",
|
|
503
|
+
"sitt",
|
|
504
|
+
"mot",
|
|
505
|
+
"\xE5",
|
|
506
|
+
"hvordan",
|
|
507
|
+
"ingen",
|
|
508
|
+
"din",
|
|
509
|
+
"ditt",
|
|
510
|
+
"blir",
|
|
511
|
+
"samme",
|
|
512
|
+
"b\xE5de",
|
|
513
|
+
"bare",
|
|
514
|
+
"enn",
|
|
515
|
+
"fordi",
|
|
516
|
+
"f\xF8r",
|
|
517
|
+
"mange",
|
|
518
|
+
"ogs\xE5",
|
|
519
|
+
"slik",
|
|
520
|
+
// Swedish
|
|
521
|
+
"och",
|
|
522
|
+
"\xE4r",
|
|
523
|
+
"som",
|
|
524
|
+
"p\xE5",
|
|
525
|
+
"den",
|
|
526
|
+
"med",
|
|
527
|
+
"var",
|
|
528
|
+
"sig",
|
|
529
|
+
"f\xF6r",
|
|
530
|
+
"s\xE5",
|
|
531
|
+
"till",
|
|
532
|
+
"har",
|
|
533
|
+
"de",
|
|
534
|
+
"att",
|
|
535
|
+
"ett",
|
|
536
|
+
"om",
|
|
537
|
+
"hade",
|
|
538
|
+
"mot",
|
|
539
|
+
"under",
|
|
540
|
+
"n\xE5got",
|
|
541
|
+
"annat",
|
|
542
|
+
"s\xE4ga",
|
|
543
|
+
"ska",
|
|
544
|
+
"sitt",
|
|
545
|
+
"hennes"
|
|
546
|
+
]);
|
|
547
|
+
var POOR_SIMILARITY = 0.4;
|
|
548
|
+
var CLUSTER_THRESHOLD = 0.78;
|
|
549
|
+
var CLUSTER_SAMPLE_LIMIT = 5e3;
|
|
550
|
+
function normalize(text) {
|
|
551
|
+
return text.toLowerCase().replace(/[^\p{L}\p{N}\s]/gu, " ").replace(/\s+/g, " ").trim();
|
|
552
|
+
}
|
|
553
|
+
function tokenize(text) {
|
|
554
|
+
return normalize(text).split(" ").filter((t) => t.length >= 3 && !STOPWORDS.has(t));
|
|
555
|
+
}
|
|
556
|
+
function cosine(a, b) {
|
|
557
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
558
|
+
let dot = 0, ma = 0, mb = 0;
|
|
559
|
+
for (let i = 0; i < a.length; i++) {
|
|
560
|
+
dot += a[i] * b[i];
|
|
561
|
+
ma += a[i] * a[i];
|
|
562
|
+
mb += b[i] * b[i];
|
|
563
|
+
}
|
|
564
|
+
if (ma === 0 || mb === 0) return 0;
|
|
565
|
+
return dot / (Math.sqrt(ma) * Math.sqrt(mb));
|
|
566
|
+
}
|
|
567
|
+
function createAnalytics(chatbot) {
|
|
568
|
+
const { ChatConversation, KnowledgeChunk, KnowledgeDocument } = chatbot.models;
|
|
569
|
+
const tenantField = chatbot.config.tenantField;
|
|
570
|
+
return {
|
|
571
|
+
async overview(tenantId) {
|
|
572
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("messages totalCostUsd totalTokensInput totalTokensOutput").lean();
|
|
573
|
+
let messages = 0, helpful = 0, unhelpful = 0, unrated = 0;
|
|
574
|
+
let totalCost = 0, totalIn = 0, totalOut = 0;
|
|
575
|
+
for (const c of conversations) {
|
|
576
|
+
messages += c.messages?.length || 0;
|
|
577
|
+
totalCost += c.totalCostUsd || 0;
|
|
578
|
+
totalIn += c.totalTokensInput || 0;
|
|
579
|
+
totalOut += c.totalTokensOutput || 0;
|
|
580
|
+
for (const m of c.messages || []) {
|
|
581
|
+
if (m.role !== "assistant") continue;
|
|
582
|
+
if (m.rating === 1) helpful++;
|
|
583
|
+
else if (m.rating === -1) unhelpful++;
|
|
584
|
+
else unrated++;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
return {
|
|
588
|
+
conversations: conversations.length,
|
|
589
|
+
messages,
|
|
590
|
+
totalCostUsd: totalCost,
|
|
591
|
+
totalTokensInput: totalIn,
|
|
592
|
+
totalTokensOutput: totalOut,
|
|
593
|
+
ratingsHelpful: helpful,
|
|
594
|
+
ratingsUnhelpful: unhelpful,
|
|
595
|
+
ratingsUnrated: unrated
|
|
596
|
+
};
|
|
597
|
+
},
|
|
598
|
+
async topQuestions(tenantId, limit = 20) {
|
|
599
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("messages").lean();
|
|
600
|
+
const groups = /* @__PURE__ */ new Map();
|
|
601
|
+
for (const c of conversations) {
|
|
602
|
+
for (const m of c.messages || []) {
|
|
603
|
+
if (m.role !== "user") continue;
|
|
604
|
+
const norm = normalize(m.content);
|
|
605
|
+
if (norm.length < 3) continue;
|
|
606
|
+
const at = m.createdAt instanceof Date ? m.createdAt : new Date(m.createdAt);
|
|
607
|
+
const existing = groups.get(norm);
|
|
608
|
+
if (existing) {
|
|
609
|
+
existing.count++;
|
|
610
|
+
if (at > existing.lastAt) existing.lastAt = at;
|
|
611
|
+
} else {
|
|
612
|
+
groups.set(norm, { question: m.content, count: 1, lastAt: at });
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
return Array.from(groups.entries()).map(([normalized, v]) => ({
|
|
617
|
+
question: v.question,
|
|
618
|
+
normalized,
|
|
619
|
+
count: v.count,
|
|
620
|
+
lastAskedAt: v.lastAt.toISOString()
|
|
621
|
+
})).sort((a, b) => b.count - a.count).slice(0, limit);
|
|
622
|
+
},
|
|
623
|
+
async keywordFrequency(tenantId, limit = 30) {
|
|
624
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("messages").lean();
|
|
625
|
+
const counts = /* @__PURE__ */ new Map();
|
|
626
|
+
for (const c of conversations) {
|
|
627
|
+
for (const m of c.messages || []) {
|
|
628
|
+
if (m.role !== "user") continue;
|
|
629
|
+
for (const t of tokenize(m.content)) counts.set(t, (counts.get(t) || 0) + 1);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return Array.from(counts.entries()).map(([term, count]) => ({ term, count })).sort((a, b) => b.count - a.count).slice(0, limit);
|
|
633
|
+
},
|
|
634
|
+
async costTimeseries(tenantId, days = 30) {
|
|
635
|
+
const since = new Date(Date.now() - days * 864e5);
|
|
636
|
+
const conversations = await ChatConversation.find({
|
|
637
|
+
[tenantField]: tenantId,
|
|
638
|
+
lastMessageAt: { $gte: since }
|
|
639
|
+
}).select("messages").lean();
|
|
640
|
+
const byDay = /* @__PURE__ */ new Map();
|
|
641
|
+
for (const c of conversations) {
|
|
642
|
+
for (const m of c.messages || []) {
|
|
643
|
+
const at = m.createdAt instanceof Date ? m.createdAt : new Date(m.createdAt);
|
|
644
|
+
if (at < since) continue;
|
|
645
|
+
const date = at.toISOString().slice(0, 10);
|
|
646
|
+
let bucket = byDay.get(date);
|
|
647
|
+
if (!bucket) {
|
|
648
|
+
bucket = { date, costUsd: 0, tokensInput: 0, tokensOutput: 0, messages: 0 };
|
|
649
|
+
byDay.set(date, bucket);
|
|
650
|
+
}
|
|
651
|
+
bucket.costUsd += m.costUsd || 0;
|
|
652
|
+
bucket.tokensInput += m.tokensInput || 0;
|
|
653
|
+
bucket.tokensOutput += m.tokensOutput || 0;
|
|
654
|
+
bucket.messages++;
|
|
655
|
+
}
|
|
656
|
+
}
|
|
657
|
+
return Array.from(byDay.values()).sort((a, b) => a.date.localeCompare(b.date));
|
|
658
|
+
},
|
|
659
|
+
async knowledgeGaps(tenantId, limit = 15) {
|
|
660
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("messages").lean();
|
|
661
|
+
const buckets2 = /* @__PURE__ */ new Map();
|
|
662
|
+
for (const c of conversations) {
|
|
663
|
+
const msgs = c.messages || [];
|
|
664
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
665
|
+
const m = msgs[i];
|
|
666
|
+
if (m.role !== "user") continue;
|
|
667
|
+
const norm = normalize(m.content);
|
|
668
|
+
if (norm.length < 3) continue;
|
|
669
|
+
const at = m.createdAt instanceof Date ? m.createdAt : new Date(m.createdAt);
|
|
670
|
+
let b = buckets2.get(norm);
|
|
671
|
+
if (!b) {
|
|
672
|
+
b = { question: m.content, count: 0, similaritySum: 0, similarityCount: 0, negatives: 0, zeroRetrieval: 0, lastAt: at };
|
|
673
|
+
buckets2.set(norm, b);
|
|
674
|
+
}
|
|
675
|
+
b.count++;
|
|
676
|
+
if (at > b.lastAt) b.lastAt = at;
|
|
677
|
+
const reply = msgs[i + 1];
|
|
678
|
+
if (reply?.role === "assistant") {
|
|
679
|
+
if (typeof reply.retrievalSimilarity === "number") {
|
|
680
|
+
b.similaritySum += reply.retrievalSimilarity;
|
|
681
|
+
b.similarityCount++;
|
|
682
|
+
}
|
|
683
|
+
if (reply.retrievedCount === 0) b.zeroRetrieval++;
|
|
684
|
+
if (reply.rating === -1) b.negatives++;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
const ranked = [];
|
|
689
|
+
for (const [normalized, b] of buckets2) {
|
|
690
|
+
const avgSimilarity = b.similarityCount > 0 ? b.similaritySum / b.similarityCount : 0;
|
|
691
|
+
const gapScore = b.count * (1 - avgSimilarity) + 2 * b.negatives;
|
|
692
|
+
const isGap = b.negatives > 0 || avgSimilarity < POOR_SIMILARITY || b.zeroRetrieval > 0;
|
|
693
|
+
if (!isGap) continue;
|
|
694
|
+
ranked.push({
|
|
695
|
+
question: b.question,
|
|
696
|
+
normalized,
|
|
697
|
+
count: b.count,
|
|
698
|
+
avgSimilarity,
|
|
699
|
+
negativeRatings: b.negatives,
|
|
700
|
+
noRetrievalCount: b.zeroRetrieval,
|
|
701
|
+
gapScore,
|
|
702
|
+
lastAskedAt: b.lastAt.toISOString()
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
return ranked.sort((a, b) => b.gapScore - a.gapScore).slice(0, limit);
|
|
706
|
+
},
|
|
707
|
+
async documentUsage(tenantId) {
|
|
708
|
+
const [docs, chunks, conversations] = await Promise.all([
|
|
709
|
+
KnowledgeDocument.find({ [tenantField]: tenantId }).select("_id title chunkCount").lean(),
|
|
710
|
+
KnowledgeChunk.find({ [tenantField]: tenantId }).select("_id documentId").lean(),
|
|
711
|
+
ChatConversation.find({ [tenantField]: tenantId }).select("messages").lean()
|
|
712
|
+
]);
|
|
713
|
+
const chunkToDoc = /* @__PURE__ */ new Map();
|
|
714
|
+
for (const c of chunks) chunkToDoc.set(String(c._id), String(c.documentId));
|
|
715
|
+
const stats = /* @__PURE__ */ new Map();
|
|
716
|
+
for (const d of docs) {
|
|
717
|
+
stats.set(String(d._id), { citations: 0, conversationsWith: /* @__PURE__ */ new Set(), citedChunks: /* @__PURE__ */ new Set() });
|
|
718
|
+
}
|
|
719
|
+
for (const conv of conversations) {
|
|
720
|
+
for (const m of conv.messages || []) {
|
|
721
|
+
if (m.role !== "assistant" || !m.citedChunkIds?.length) continue;
|
|
722
|
+
const docsHit = /* @__PURE__ */ new Set();
|
|
723
|
+
for (const chunkId of m.citedChunkIds) {
|
|
724
|
+
const docId = chunkToDoc.get(String(chunkId));
|
|
725
|
+
if (!docId) continue;
|
|
726
|
+
const s = stats.get(docId);
|
|
727
|
+
if (!s) continue;
|
|
728
|
+
s.citations++;
|
|
729
|
+
s.citedChunks.add(String(chunkId));
|
|
730
|
+
docsHit.add(docId);
|
|
731
|
+
}
|
|
732
|
+
for (const docId of docsHit) stats.get(docId).conversationsWith.add(String(conv._id));
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
return docs.map((d) => {
|
|
736
|
+
const s = stats.get(String(d._id));
|
|
737
|
+
const status = s.citations === 0 ? "unused" : s.citations < 3 ? "underused" : "active";
|
|
738
|
+
return {
|
|
739
|
+
documentId: String(d._id),
|
|
740
|
+
title: d.title,
|
|
741
|
+
citationCount: s.citations,
|
|
742
|
+
uniqueConversations: s.conversationsWith.size,
|
|
743
|
+
chunkCount: d.chunkCount || 0,
|
|
744
|
+
citedChunkCount: s.citedChunks.size,
|
|
745
|
+
status
|
|
746
|
+
};
|
|
747
|
+
}).sort((a, b) => b.citationCount - a.citationCount);
|
|
748
|
+
},
|
|
749
|
+
async conversationsForQuestion(tenantId, normalized, limit = 50) {
|
|
750
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("sessionId title messages createdAt").sort({ lastMessageAt: -1 }).lean();
|
|
751
|
+
const matches = [];
|
|
752
|
+
for (const c of conversations) {
|
|
753
|
+
const msgs = c.messages || [];
|
|
754
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
755
|
+
const m = msgs[i];
|
|
756
|
+
if (m.role !== "user") continue;
|
|
757
|
+
if (normalize(m.content) !== normalized) continue;
|
|
758
|
+
const reply = msgs[i + 1];
|
|
759
|
+
matches.push({
|
|
760
|
+
_id: String(c._id),
|
|
761
|
+
sessionId: c.sessionId,
|
|
762
|
+
title: c.title || m.content.slice(0, 80),
|
|
763
|
+
messageCount: msgs.length,
|
|
764
|
+
question: m.content,
|
|
765
|
+
assistantReply: reply?.role === "assistant" ? reply.content : "",
|
|
766
|
+
rating: reply?.rating,
|
|
767
|
+
retrievalSimilarity: reply?.retrievalSimilarity,
|
|
768
|
+
createdAt: (m.createdAt instanceof Date ? m.createdAt : new Date(m.createdAt)).toISOString()
|
|
769
|
+
});
|
|
770
|
+
if (matches.length >= limit) return matches;
|
|
771
|
+
break;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
return matches;
|
|
775
|
+
},
|
|
776
|
+
async semanticClusters(tenantId, limit = 15) {
|
|
777
|
+
const conversations = await ChatConversation.find({ [tenantField]: tenantId }).select("messages").lean();
|
|
778
|
+
const turns = [];
|
|
779
|
+
for (const c of conversations) {
|
|
780
|
+
for (const m of c.messages || []) {
|
|
781
|
+
if (m.role !== "user") continue;
|
|
782
|
+
if (!Array.isArray(m.embedding) || m.embedding.length === 0) continue;
|
|
783
|
+
turns.push({ content: m.content, embedding: m.embedding });
|
|
784
|
+
if (turns.length >= CLUSTER_SAMPLE_LIMIT) break;
|
|
785
|
+
}
|
|
786
|
+
if (turns.length >= CLUSTER_SAMPLE_LIMIT) break;
|
|
787
|
+
}
|
|
788
|
+
if (turns.length === 0) return [];
|
|
789
|
+
const clusters = [];
|
|
790
|
+
for (const t of turns) {
|
|
791
|
+
let bestIdx = -1, bestSim = CLUSTER_THRESHOLD;
|
|
792
|
+
for (let i = 0; i < clusters.length; i++) {
|
|
793
|
+
const sim = cosine(t.embedding, clusters[i].centroid);
|
|
794
|
+
if (sim > bestSim) {
|
|
795
|
+
bestSim = sim;
|
|
796
|
+
bestIdx = i;
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
if (bestIdx === -1) {
|
|
800
|
+
clusters.push({ centroid: t.embedding.slice(), members: [t], similaritySum: 1 });
|
|
801
|
+
} else {
|
|
802
|
+
const c = clusters[bestIdx];
|
|
803
|
+
const n = c.members.length;
|
|
804
|
+
for (let i = 0; i < c.centroid.length; i++) {
|
|
805
|
+
c.centroid[i] = (c.centroid[i] * n + t.embedding[i]) / (n + 1);
|
|
806
|
+
}
|
|
807
|
+
c.members.push(t);
|
|
808
|
+
c.similaritySum += bestSim;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
return clusters.map((c) => {
|
|
812
|
+
let best = c.members[0], bestSim = -2;
|
|
813
|
+
for (const m of c.members) {
|
|
814
|
+
const s = cosine(m.embedding, c.centroid);
|
|
815
|
+
if (s > bestSim) {
|
|
816
|
+
bestSim = s;
|
|
817
|
+
best = m;
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
return {
|
|
821
|
+
representative: best.content,
|
|
822
|
+
count: c.members.length,
|
|
823
|
+
members: c.members.slice(0, 5).map((m) => m.content),
|
|
824
|
+
avgSimilarity: c.similaritySum / c.members.length
|
|
825
|
+
};
|
|
826
|
+
}).sort((a, b) => b.count - a.count).slice(0, limit);
|
|
827
|
+
}
|
|
828
|
+
};
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
// src/routes.ts
|
|
832
|
+
import { Router } from "express";
|
|
833
|
+
import crypto from "crypto";
|
|
834
|
+
import { z } from "zod";
|
|
835
|
+
|
|
836
|
+
// src/rateLimit.ts
|
|
837
|
+
var buckets = /* @__PURE__ */ new Map();
|
|
838
|
+
function rateLimit({
|
|
839
|
+
windowMs,
|
|
840
|
+
max,
|
|
841
|
+
scope
|
|
842
|
+
}) {
|
|
843
|
+
return (req, res, next) => {
|
|
844
|
+
const ip = req.headers["x-forwarded-for"]?.split(",")[0]?.trim() || req.socket.remoteAddress || "unknown";
|
|
845
|
+
const key = `${scope}:${ip}`;
|
|
846
|
+
const now = Date.now();
|
|
847
|
+
let bucket = buckets.get(key);
|
|
848
|
+
if (!bucket || bucket.resetAt < now) {
|
|
849
|
+
bucket = { count: 0, resetAt: now + windowMs };
|
|
850
|
+
buckets.set(key, bucket);
|
|
851
|
+
}
|
|
852
|
+
bucket.count++;
|
|
853
|
+
res.setHeader("X-RateLimit-Limit", String(max));
|
|
854
|
+
res.setHeader("X-RateLimit-Remaining", String(Math.max(0, max - bucket.count)));
|
|
855
|
+
res.setHeader("X-RateLimit-Reset", String(Math.ceil(bucket.resetAt / 1e3)));
|
|
856
|
+
if (bucket.count > max) {
|
|
857
|
+
res.status(429).json({
|
|
858
|
+
error: "Too many requests, please slow down",
|
|
859
|
+
code: "RATE_LIMITED",
|
|
860
|
+
retryAfterSeconds: Math.ceil((bucket.resetAt - now) / 1e3)
|
|
861
|
+
});
|
|
862
|
+
return;
|
|
863
|
+
}
|
|
864
|
+
next();
|
|
865
|
+
};
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// src/routes.ts
|
|
869
|
+
function createPrivateRouter(chatbot) {
|
|
870
|
+
const rag = createRag(chatbot);
|
|
871
|
+
const analytics = createAnalytics(chatbot);
|
|
872
|
+
const { KnowledgeDocument, KnowledgeChunk, ChatConversation } = chatbot.models;
|
|
873
|
+
const tenantField = chatbot.config.tenantField;
|
|
874
|
+
const tenantParam = chatbot.config.tenantParamName;
|
|
875
|
+
const r = Router({ mergeParams: true });
|
|
876
|
+
const getTenantId = (req) => req.params[tenantParam];
|
|
877
|
+
const createSchema = z.object({
|
|
878
|
+
title: z.string().min(1),
|
|
879
|
+
content: z.string().min(10),
|
|
880
|
+
source: z.string().optional(),
|
|
881
|
+
sourceType: z.enum(["text", "url", "file"]).optional(),
|
|
882
|
+
tags: z.array(z.string()).optional()
|
|
883
|
+
});
|
|
884
|
+
r.get("/knowledge", async (req, res, next) => {
|
|
885
|
+
try {
|
|
886
|
+
const docs = await KnowledgeDocument.find({ [tenantField]: getTenantId(req) }).select("title source sourceType status chunkCount processedAt lastError tags createdAt updatedAt").sort({ updatedAt: -1 }).lean();
|
|
887
|
+
res.json(docs);
|
|
888
|
+
} catch (e) {
|
|
889
|
+
next(e);
|
|
890
|
+
}
|
|
891
|
+
});
|
|
892
|
+
r.get("/knowledge/:documentId", async (req, res, next) => {
|
|
893
|
+
try {
|
|
894
|
+
const doc = await KnowledgeDocument.findById(req.params.documentId).lean();
|
|
895
|
+
if (!doc) return res.status(404).json({ error: "Not found" });
|
|
896
|
+
res.json(doc);
|
|
897
|
+
} catch (e) {
|
|
898
|
+
next(e);
|
|
899
|
+
}
|
|
900
|
+
});
|
|
901
|
+
r.post("/knowledge", async (req, res, next) => {
|
|
902
|
+
try {
|
|
903
|
+
const data = createSchema.parse(req.body);
|
|
904
|
+
const doc = await KnowledgeDocument.create({
|
|
905
|
+
...data,
|
|
906
|
+
[tenantField]: getTenantId(req),
|
|
907
|
+
createdBy: req.userId
|
|
908
|
+
});
|
|
909
|
+
rag.processDocument(doc._id).catch((e) => console.error(`[chatbot] processDocument ${doc._id} failed:`, e.message));
|
|
910
|
+
res.status(201).json(doc);
|
|
911
|
+
} catch (e) {
|
|
912
|
+
next(e);
|
|
913
|
+
}
|
|
914
|
+
});
|
|
915
|
+
r.put("/knowledge/:documentId", async (req, res, next) => {
|
|
916
|
+
try {
|
|
917
|
+
const doc = await KnowledgeDocument.findById(req.params.documentId);
|
|
918
|
+
if (!doc) return res.status(404).json({ error: "Not found" });
|
|
919
|
+
const patch = createSchema.partial().parse(req.body);
|
|
920
|
+
Object.assign(doc, patch);
|
|
921
|
+
await doc.save();
|
|
922
|
+
if (patch.content !== void 0) {
|
|
923
|
+
rag.processDocument(doc._id).catch((e) => console.error(`[chatbot] re-processDocument failed:`, e.message));
|
|
924
|
+
}
|
|
925
|
+
res.json(doc);
|
|
926
|
+
} catch (e) {
|
|
927
|
+
next(e);
|
|
928
|
+
}
|
|
929
|
+
});
|
|
930
|
+
r.delete("/knowledge/:documentId", async (req, res, next) => {
|
|
931
|
+
try {
|
|
932
|
+
const doc = await KnowledgeDocument.findById(req.params.documentId);
|
|
933
|
+
if (!doc) return res.status(404).json({ error: "Not found" });
|
|
934
|
+
await KnowledgeChunk.deleteMany({ documentId: doc._id, [tenantField]: doc[tenantField] });
|
|
935
|
+
await doc.deleteOne();
|
|
936
|
+
res.json({ deleted: true });
|
|
937
|
+
} catch (e) {
|
|
938
|
+
next(e);
|
|
939
|
+
}
|
|
940
|
+
});
|
|
941
|
+
const sendSchema = z.object({
|
|
942
|
+
sessionId: z.string().min(1).optional(),
|
|
943
|
+
message: z.string().min(1).max(2e3)
|
|
944
|
+
});
|
|
945
|
+
r.post("/chat", async (req, res, next) => {
|
|
946
|
+
try {
|
|
947
|
+
const { sessionId: raw, message } = sendSchema.parse(req.body);
|
|
948
|
+
const sessionId = raw || crypto.randomUUID();
|
|
949
|
+
const result = await rag.respond({
|
|
950
|
+
tenantId: getTenantId(req),
|
|
951
|
+
sessionId,
|
|
952
|
+
message,
|
|
953
|
+
userId: req.userId
|
|
954
|
+
});
|
|
955
|
+
res.json({
|
|
956
|
+
sessionId,
|
|
957
|
+
conversationId: result.conversationId,
|
|
958
|
+
content: result.assistantContent,
|
|
959
|
+
citations: result.citations,
|
|
960
|
+
costUsd: result.costUsd
|
|
961
|
+
});
|
|
962
|
+
} catch (e) {
|
|
963
|
+
next(e);
|
|
964
|
+
}
|
|
965
|
+
});
|
|
966
|
+
r.get("/chat/:sessionId", async (req, res, next) => {
|
|
967
|
+
try {
|
|
968
|
+
const c = await ChatConversation.findOne({
|
|
969
|
+
[tenantField]: getTenantId(req),
|
|
970
|
+
sessionId: req.params.sessionId
|
|
971
|
+
}).lean();
|
|
972
|
+
if (!c) return res.status(404).json({ error: "Not found" });
|
|
973
|
+
res.json(c);
|
|
974
|
+
} catch (e) {
|
|
975
|
+
next(e);
|
|
976
|
+
}
|
|
977
|
+
});
|
|
978
|
+
r.get("/conversations", async (req, res, next) => {
|
|
979
|
+
try {
|
|
980
|
+
const limit = Math.max(1, Math.min(200, parseInt(String(req.query.limit) || "50", 10)));
|
|
981
|
+
const list = await ChatConversation.find({ [tenantField]: getTenantId(req) }).select("sessionId userId title messages totalCostUsd totalTokensInput totalTokensOutput lastMessageAt createdAt").sort({ lastMessageAt: -1 }).limit(limit).lean();
|
|
982
|
+
res.json(list.map((c) => ({ ...c, messageCount: c.messages?.length || 0, messages: void 0 })));
|
|
983
|
+
} catch (e) {
|
|
984
|
+
next(e);
|
|
985
|
+
}
|
|
986
|
+
});
|
|
987
|
+
const rateSchema = z.object({
|
|
988
|
+
conversationId: z.string().min(1),
|
|
989
|
+
messageId: z.string().min(1),
|
|
990
|
+
rating: z.number().int().min(-1).max(1)
|
|
991
|
+
});
|
|
992
|
+
r.post("/chat/rate", async (req, res, next) => {
|
|
993
|
+
try {
|
|
994
|
+
const { conversationId, messageId, rating } = rateSchema.parse(req.body);
|
|
995
|
+
const conv = await ChatConversation.findOne({
|
|
996
|
+
_id: conversationId,
|
|
997
|
+
[tenantField]: getTenantId(req)
|
|
998
|
+
});
|
|
999
|
+
if (!conv) return res.status(404).json({ error: "Conversation not found" });
|
|
1000
|
+
const msg = conv.messages.find((m) => String(m._id) === messageId);
|
|
1001
|
+
if (!msg) return res.status(404).json({ error: "Message not found" });
|
|
1002
|
+
msg.rating = rating;
|
|
1003
|
+
await conv.save();
|
|
1004
|
+
res.json({ rated: true });
|
|
1005
|
+
} catch (e) {
|
|
1006
|
+
next(e);
|
|
1007
|
+
}
|
|
1008
|
+
});
|
|
1009
|
+
const ap = (fn) => async (req, res, next) => {
|
|
1010
|
+
try {
|
|
1011
|
+
await fn(req, res, next);
|
|
1012
|
+
} catch (e) {
|
|
1013
|
+
next(e);
|
|
1014
|
+
}
|
|
1015
|
+
};
|
|
1016
|
+
r.get("/chat-analytics/overview", ap(async (req, res) => res.json(await analytics.overview(getTenantId(req)))));
|
|
1017
|
+
r.get("/chat-analytics/top-questions", ap(async (req, res) => {
|
|
1018
|
+
const lim = Math.max(1, Math.min(100, parseInt(String(req.query.limit) || "20", 10)));
|
|
1019
|
+
res.json(await analytics.topQuestions(getTenantId(req), lim));
|
|
1020
|
+
}));
|
|
1021
|
+
r.get("/chat-analytics/keywords", ap(async (req, res) => {
|
|
1022
|
+
const lim = Math.max(1, Math.min(100, parseInt(String(req.query.limit) || "30", 10)));
|
|
1023
|
+
res.json(await analytics.keywordFrequency(getTenantId(req), lim));
|
|
1024
|
+
}));
|
|
1025
|
+
r.get("/chat-analytics/cost-timeseries", ap(async (req, res) => {
|
|
1026
|
+
const days = Math.max(1, Math.min(180, parseInt(String(req.query.days) || "30", 10)));
|
|
1027
|
+
res.json(await analytics.costTimeseries(getTenantId(req), days));
|
|
1028
|
+
}));
|
|
1029
|
+
r.get("/chat-analytics/knowledge-gaps", ap(async (req, res) => {
|
|
1030
|
+
const lim = Math.max(1, Math.min(100, parseInt(String(req.query.limit) || "15", 10)));
|
|
1031
|
+
res.json(await analytics.knowledgeGaps(getTenantId(req), lim));
|
|
1032
|
+
}));
|
|
1033
|
+
r.get("/chat-analytics/document-usage", ap(async (req, res) => res.json(await analytics.documentUsage(getTenantId(req)))));
|
|
1034
|
+
r.get("/chat-analytics/conversations", ap(async (req, res) => {
|
|
1035
|
+
const normalized = String(req.query.normalized || "").trim();
|
|
1036
|
+
if (!normalized) return res.json([]);
|
|
1037
|
+
const lim = Math.max(1, Math.min(200, parseInt(String(req.query.limit) || "50", 10)));
|
|
1038
|
+
res.json(await analytics.conversationsForQuestion(getTenantId(req), normalized, lim));
|
|
1039
|
+
}));
|
|
1040
|
+
r.get("/chat-analytics/semantic-clusters", ap(async (req, res) => {
|
|
1041
|
+
const lim = Math.max(1, Math.min(50, parseInt(String(req.query.limit) || "15", 10)));
|
|
1042
|
+
res.json(await analytics.semanticClusters(getTenantId(req), lim));
|
|
1043
|
+
}));
|
|
1044
|
+
return r;
|
|
1045
|
+
}
|
|
1046
|
+
function createPublicRouter(chatbot, opts = {}) {
|
|
1047
|
+
const rag = createRag(chatbot);
|
|
1048
|
+
const tenantField = chatbot.config.tenantField;
|
|
1049
|
+
const { ChatConversation } = chatbot.models;
|
|
1050
|
+
const r = Router();
|
|
1051
|
+
const limiter = rateLimit({
|
|
1052
|
+
windowMs: opts.rateLimitConfig?.windowMs ?? 6e4,
|
|
1053
|
+
max: opts.rateLimitConfig?.max ?? 20,
|
|
1054
|
+
scope: "public-chat"
|
|
1055
|
+
});
|
|
1056
|
+
const sendSchema = z.object({
|
|
1057
|
+
sessionId: z.string().min(1).optional(),
|
|
1058
|
+
message: z.string().min(1).max(2e3)
|
|
1059
|
+
});
|
|
1060
|
+
const rateSchema = z.object({
|
|
1061
|
+
sessionId: z.string().min(1),
|
|
1062
|
+
messageId: z.string().min(1),
|
|
1063
|
+
rating: z.number().int().min(-1).max(1)
|
|
1064
|
+
});
|
|
1065
|
+
r.post("/:tenantSlug", limiter, async (req, res, next) => {
|
|
1066
|
+
try {
|
|
1067
|
+
const slug = String(req.params.tenantSlug || "").toLowerCase();
|
|
1068
|
+
const tenant = await chatbot.config.hooks.getTenantBySlug(slug);
|
|
1069
|
+
if (!tenant) return res.status(404).json({ error: "Tenant not found or inactive" });
|
|
1070
|
+
const { sessionId: raw, message } = sendSchema.parse(req.body);
|
|
1071
|
+
const sessionId = raw || crypto.randomUUID();
|
|
1072
|
+
const withScope = chatbot.config.hooks.withTenantScope || (async (_id, fn) => fn());
|
|
1073
|
+
const result = await withScope(
|
|
1074
|
+
tenant._id,
|
|
1075
|
+
() => rag.respond({ tenantId: tenant._id, sessionId, message })
|
|
1076
|
+
);
|
|
1077
|
+
res.json({
|
|
1078
|
+
sessionId,
|
|
1079
|
+
conversationId: result.conversationId,
|
|
1080
|
+
content: result.assistantContent,
|
|
1081
|
+
citations: result.citations.map((c) => ({
|
|
1082
|
+
documentTitle: c.documentTitle,
|
|
1083
|
+
snippet: c.snippet
|
|
1084
|
+
}))
|
|
1085
|
+
});
|
|
1086
|
+
} catch (e) {
|
|
1087
|
+
next(e);
|
|
1088
|
+
}
|
|
1089
|
+
});
|
|
1090
|
+
r.get("/:tenantSlug/:sessionId", limiter, async (req, res, next) => {
|
|
1091
|
+
try {
|
|
1092
|
+
const slug = String(req.params.tenantSlug || "").toLowerCase();
|
|
1093
|
+
const tenant = await chatbot.config.hooks.getTenantBySlug(slug);
|
|
1094
|
+
if (!tenant) return res.status(404).json({ error: "Tenant not found" });
|
|
1095
|
+
const c = await ChatConversation.findOne({
|
|
1096
|
+
[tenantField]: tenant._id,
|
|
1097
|
+
sessionId: req.params.sessionId
|
|
1098
|
+
}).lean();
|
|
1099
|
+
if (!c) return res.status(404).json({ error: "Conversation not found" });
|
|
1100
|
+
const conversation = c;
|
|
1101
|
+
res.json({
|
|
1102
|
+
sessionId: conversation.sessionId,
|
|
1103
|
+
title: conversation.title,
|
|
1104
|
+
messages: (conversation.messages || []).map((m) => ({
|
|
1105
|
+
_id: m._id,
|
|
1106
|
+
role: m.role,
|
|
1107
|
+
content: m.content,
|
|
1108
|
+
createdAt: m.createdAt,
|
|
1109
|
+
rating: m.rating
|
|
1110
|
+
}))
|
|
1111
|
+
});
|
|
1112
|
+
} catch (e) {
|
|
1113
|
+
next(e);
|
|
1114
|
+
}
|
|
1115
|
+
});
|
|
1116
|
+
r.post("/:tenantSlug/rate", limiter, async (req, res, next) => {
|
|
1117
|
+
try {
|
|
1118
|
+
const slug = String(req.params.tenantSlug || "").toLowerCase();
|
|
1119
|
+
const tenant = await chatbot.config.hooks.getTenantBySlug(slug);
|
|
1120
|
+
if (!tenant) return res.status(404).json({ error: "Tenant not found" });
|
|
1121
|
+
const { sessionId, messageId, rating } = rateSchema.parse(req.body);
|
|
1122
|
+
const conv = await ChatConversation.findOne({
|
|
1123
|
+
[tenantField]: tenant._id,
|
|
1124
|
+
sessionId
|
|
1125
|
+
});
|
|
1126
|
+
if (!conv) return res.status(404).json({ error: "Conversation not found" });
|
|
1127
|
+
const msg = conv.messages.find((m) => String(m._id) === messageId);
|
|
1128
|
+
if (!msg) return res.status(404).json({ error: "Message not found" });
|
|
1129
|
+
msg.rating = rating;
|
|
1130
|
+
await conv.save();
|
|
1131
|
+
res.json({ rated: true });
|
|
1132
|
+
} catch (e) {
|
|
1133
|
+
next(e);
|
|
1134
|
+
}
|
|
1135
|
+
});
|
|
1136
|
+
return r;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
// src/index.ts
|
|
1140
|
+
function createChatbot(cfg) {
|
|
1141
|
+
const tenantField = cfg.tenantField || cfg.tenantParamName;
|
|
1142
|
+
const models = createChatbotModels(cfg.mongoose, {
|
|
1143
|
+
tenantField,
|
|
1144
|
+
tenantRefName: cfg.tenantRefName,
|
|
1145
|
+
plugins: cfg.plugins
|
|
1146
|
+
});
|
|
1147
|
+
const ai = createAiClient(cfg.ai);
|
|
1148
|
+
const chatbot = {
|
|
1149
|
+
models,
|
|
1150
|
+
ai,
|
|
1151
|
+
config: { ...cfg, tenantField, tenantParamName: cfg.tenantParamName, tenantRefName: cfg.tenantRefName }
|
|
1152
|
+
};
|
|
1153
|
+
const rag = createRag(chatbot);
|
|
1154
|
+
const analytics = createAnalytics(chatbot);
|
|
1155
|
+
const privateRouter = createPrivateRouter(chatbot);
|
|
1156
|
+
const publicRouter = createPublicRouter(chatbot);
|
|
1157
|
+
return {
|
|
1158
|
+
...chatbot,
|
|
1159
|
+
rag,
|
|
1160
|
+
analytics,
|
|
1161
|
+
routes: { private: privateRouter, public: publicRouter }
|
|
1162
|
+
};
|
|
1163
|
+
}
|
|
1164
|
+
export {
|
|
1165
|
+
chunkText,
|
|
1166
|
+
cosineSimilarity,
|
|
1167
|
+
createChatbot,
|
|
1168
|
+
rateLimit
|
|
1169
|
+
};
|