vectra-js 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/core.js ADDED
@@ -0,0 +1,591 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { RAGConfigSchema, ProviderType, ChunkingStrategy, RetrievalStrategy } = require('./config');
4
+ const crypto = require('crypto');
5
+ const { DocumentProcessor } = require('./processor');
6
+ const { OpenAIBackend } = require('./backends/openai');
7
+ const { GeminiBackend } = require('./backends/gemini');
8
+ const { AnthropicBackend } = require('./backends/anthropic');
9
+ const { OpenRouterBackend } = require('./backends/openrouter');
10
+ const { HuggingFaceBackend } = require('./backends/huggingface');
11
+ const { PrismaVectorStore } = require('./backends/prisma_store');
12
+ const { ChromaVectorStore } = require('./backends/chroma_store');
13
+ const { QdrantVectorStore } = require('./backends/qdrant_store');
14
+ const { MilvusVectorStore } = require('./backends/milvus_store');
15
+ const { LLMReranker } = require('./reranker');
16
+ const { InMemoryHistory, RedisHistory, PostgresHistory } = require('./memory');
17
+ const { OllamaBackend } = require('./backends/ollama');
18
+ const { v5: uuidv5 } = require('uuid');
19
+
20
+ class VectraClient {
21
+ constructor(config) {
22
+ const parsed = RAGConfigSchema.parse(config);
23
+ this.config = parsed;
24
+ this.callbacks = config.callbacks || [];
25
+ // Initialize processor
26
+ const agenticLlm = (this.config.chunking && this.config.chunking.agenticLlm)
27
+ ? this.createLLM(this.config.chunking.agenticLlm)
28
+ : null;
29
+ this.processor = new DocumentProcessor(this.config.chunking, agenticLlm);
30
+
31
+ // Initialize embedding backend
32
+ this.embedder = this.createLLM(this.config.embedding);
33
+
34
+ // Initialize generation LLM
35
+ this.llm = this.createLLM(this.config.llm);
36
+ this.retrievalLlm = (this.config.retrieval && this.config.retrieval.llmConfig)
37
+ ? this.createLLM(this.config.retrieval.llmConfig)
38
+ : this.llm;
39
+
40
+ // Initialize vector store
41
+ this.vectorStore = this.createVectorStore(this.config.database);
42
+ this._embeddingCache = new Map();
43
+ this._metadataEnrichmentEnabled = !!(this.config.metadata && this.config.metadata.enrichment);
44
+ const mm = this.config.memory?.maxMessages || 20;
45
+ if (this.config.memory && this.config.memory.enabled) {
46
+ if (this.config.memory.type === 'in-memory') {
47
+ this.history = new InMemoryHistory(mm);
48
+ } else if (this.config.memory.type === 'redis') {
49
+ const rc = this.config.memory.redis || {};
50
+ this.history = new RedisHistory(rc.clientInstance, rc.keyPrefix || 'vectra:chat:', mm);
51
+ } else if (this.config.memory.type === 'postgres') {
52
+ const pc = this.config.memory.postgres || {};
53
+ this.history = new PostgresHistory(pc.clientInstance, pc.tableName || 'ChatMessage', pc.columnMap || { sessionId: 'sessionId', role: 'role', content: 'content', createdAt: 'createdAt' }, mm);
54
+ } else {
55
+ this.history = null;
56
+ }
57
+ } else {
58
+ this.history = null;
59
+ }
60
+ this._isTemporaryFile = (p) => {
61
+ const name = path.basename(p);
62
+ if (name.startsWith('~$')) return true;
63
+ if (name.endsWith('.tmp') || name.endsWith('.temp')) return true;
64
+ if (name.endsWith('.crdownload') || name.endsWith('.part')) return true;
65
+ if (name.startsWith('.')) return true;
66
+ return false;
67
+ };
68
+
69
+ if (this.config.reranking && this.config.reranking.enabled) {
70
+ const rerankLlm = this.config.reranking.llmConfig
71
+ ? this.createLLM(this.config.reranking.llmConfig)
72
+ : this.llm;
73
+ this.reranker = new LLMReranker(rerankLlm, this.config.reranking);
74
+ }
75
+ }
76
+
77
+ createLLM(llmConfig) {
78
+ if (!llmConfig || !llmConfig.provider) throw new Error('LLM config missing provider');
79
+ const p = llmConfig.provider;
80
+ if (p === ProviderType.OPENAI) return new OpenAIBackend(llmConfig);
81
+ if (p === ProviderType.GEMINI) return new GeminiBackend(llmConfig);
82
+ if (p === ProviderType.ANTHROPIC) return new AnthropicBackend(llmConfig);
83
+ if (p === ProviderType.OPENROUTER) return new OpenRouterBackend(llmConfig);
84
+ if (p === ProviderType.HUGGINGFACE) return new HuggingFaceBackend(llmConfig);
85
+ if (p === ProviderType.OLLAMA) return new OllamaBackend(llmConfig);
86
+ throw new Error(`Unsupported provider: ${p}`);
87
+ }
88
+
89
+ createVectorStore(dbConfig) {
90
+ if (!dbConfig || !dbConfig.type) throw new Error('Database config missing type');
91
+ const t = dbConfig.type.toLowerCase();
92
+ if (t === 'prisma') return new PrismaVectorStore(dbConfig);
93
+ if (t === 'chroma') return new ChromaVectorStore(dbConfig);
94
+ if (t === 'qdrant') return new QdrantVectorStore(dbConfig);
95
+ if (t === 'milvus') return new MilvusVectorStore(dbConfig);
96
+ throw new Error(`Unsupported vector store type: ${t}`);
97
+ }
98
+
99
+ trigger(event, ...args) {
100
+ const cbs = this.callbacks || [];
101
+ cbs.forEach(cb => {
102
+ if (cb[event] && typeof cb[event] === 'function') cb[event](...args);
103
+ });
104
+ }
105
+
106
+ async _enrichChunkMetadata(chunks) {
107
+ const enriched = [];
108
+ for (const c of chunks) {
109
+ try {
110
+ const prompt = `Summarize and extract keywords and questions from the following text. Return STRICT JSON with keys: summary (string), keywords (array of strings), hypothetical_questions (array of strings).\nText:\n${c}`;
111
+ const out = await this.llm.generate(prompt, 'You are a helpful assistant that returns valid JSON only.');
112
+ const clean = String(out).replace(/```json/g, '').replace(/```/g, '').trim();
113
+ const parsed = JSON.parse(clean);
114
+ const summary = typeof parsed.summary === 'string' ? parsed.summary : '';
115
+ const keywords = Array.isArray(parsed.keywords) ? parsed.keywords : [];
116
+ const hqs = Array.isArray(parsed.hypothetical_questions) ? parsed.hypothetical_questions : [];
117
+ enriched.push({ summary, keywords, hypothetical_questions: hqs });
118
+ } catch (_) {
119
+ const words = c.toLowerCase().replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 3);
120
+ const freq = {};
121
+ for (const w of words) freq[w] = (freq[w] || 0) + 1;
122
+ const top = Object.entries(freq).sort((a,b)=>b[1]-a[1]).slice(0,10).map(([w])=>w);
123
+ const summary = c.slice(0, 300);
124
+ enriched.push({ summary, keywords: top, hypothetical_questions: [] });
125
+ }
126
+ }
127
+ return enriched;
128
+ }
129
+
130
+ async ingestDocuments(filePath) {
131
+ try {
132
+ const stats = await fs.promises.stat(filePath);
133
+
134
+ if (stats.isDirectory()) {
135
+ const files = await fs.promises.readdir(filePath);
136
+ const summary = { processed: 0, succeeded: 0, failed: 0, errors: [] };
137
+ for (const file of files) {
138
+ const full = path.join(filePath, file);
139
+ if (this._isTemporaryFile(full)) continue;
140
+ summary.processed++;
141
+ try {
142
+ await this.ingestDocuments(full);
143
+ summary.succeeded++;
144
+ } catch (err) {
145
+ summary.failed++;
146
+ summary.errors.push({ file: full, message: err?.message || String(err) });
147
+ this.trigger('onError', err);
148
+ }
149
+ }
150
+ this.trigger('onIngestSummary', summary);
151
+ return;
152
+ }
153
+
154
+ const t0 = Date.now();
155
+ this.trigger('onIngestStart', filePath);
156
+ const absPath = path.resolve(filePath);
157
+ const size = stats.size || 0;
158
+ const mtime = Math.floor(stats.mtimeMs || Date.now());
159
+ const md5 = crypto.createHash('md5');
160
+ const sha = crypto.createHash('sha256');
161
+ await new Promise((resolve, reject) => {
162
+ const s = fs.createReadStream(filePath);
163
+ s.on('data', (chunk) => { md5.update(chunk); sha.update(chunk); });
164
+ s.on('error', reject);
165
+ s.on('end', resolve);
166
+ });
167
+ const fileMD5 = md5.digest('hex');
168
+ const fileSHA256 = sha.digest('hex');
169
+ const validation = { absolutePath: absPath, fileMD5, fileSHA256, fileSize: size, lastModified: mtime, timestamp: Date.now() };
170
+ this.trigger('onPreIngestionValidation', validation);
171
+ const mode = (this.config.ingestion && this.config.ingestion.mode) ? this.config.ingestion.mode : 'skip';
172
+ let exists = false;
173
+ if (this.vectorStore && typeof this.vectorStore.fileExists === 'function') {
174
+ try { exists = await this.vectorStore.fileExists(fileSHA256, size, mtime); } catch { exists = false; }
175
+ }
176
+ if (mode === 'skip' && exists) {
177
+ this.trigger('onIngestSkipped', validation);
178
+ return;
179
+ }
180
+ const rawText = await this.processor.loadDocument(filePath);
181
+
182
+ this.trigger('onChunkingStart', this.config.chunking.strategy);
183
+ const chunks = await this.processor.process(rawText);
184
+
185
+ this.trigger('onEmbeddingStart', chunks.length);
186
+ // Compute hashes and use cache for known chunks
187
+ const hashes = chunks.map(c => crypto.createHash('sha256').update(c).digest('hex'));
188
+ const toEmbed = [];
189
+ const mapIndex = [];
190
+ hashes.forEach((h, i) => {
191
+ if (this._embeddingCache.has(h)) return;
192
+ toEmbed.push(chunks[i]);
193
+ mapIndex.push(i);
194
+ });
195
+ const newEmbeds = [];
196
+ if (toEmbed.length > 0) {
197
+ const enabled = !!(this.config.ingestion && this.config.ingestion.rateLimitEnabled);
198
+ const defaultLimit = (this.config.ingestion && typeof this.config.ingestion.concurrencyLimit === 'number') ? this.config.ingestion.concurrencyLimit : 5;
199
+ const limit = enabled ? defaultLimit : toEmbed.length;
200
+ const batches = [];
201
+ for (let i = 0; i < toEmbed.length; i += limit) batches.push(toEmbed.slice(i, i + limit));
202
+ for (const batch of batches) {
203
+ let attempt = 0; let delay = 500;
204
+ while (true) {
205
+ try {
206
+ const out = await this.embedder.embedDocuments(batch);
207
+ newEmbeds.push(...out);
208
+ break;
209
+ } catch (err) {
210
+ attempt++;
211
+ if (attempt >= 3) throw err;
212
+ await new Promise(r => setTimeout(r, delay));
213
+ delay = Math.min(4000, delay * 2);
214
+ }
215
+ }
216
+ }
217
+ newEmbeds.forEach((vec, j) => {
218
+ const h = hashes[mapIndex[j]];
219
+ this._embeddingCache.set(h, vec);
220
+ });
221
+ }
222
+ const embeddings = hashes.map((h) => this._embeddingCache.get(h));
223
+
224
+ const metas = this.processor.computeChunkMetadata(filePath, rawText, chunks);
225
+ const idNamespace = uuidv5('vectra-js', uuidv5.DNS);
226
+ let documents = chunks.map((content, i) => ({
227
+ id: uuidv5(`${fileSHA256}:${i}`, idNamespace),
228
+ content,
229
+ embedding: embeddings[i],
230
+ metadata: {
231
+ docId: uuidv5(`${fileSHA256}:${i}`, idNamespace),
232
+ source: filePath,
233
+ absolutePath: absPath,
234
+ fileMD5,
235
+ fileSHA256,
236
+ fileSize: size,
237
+ lastModified: mtime,
238
+ chunkIndex: i,
239
+ sha256: hashes[i],
240
+ fileType: metas[i]?.fileType,
241
+ docTitle: metas[i]?.docTitle,
242
+ pageFrom: metas[i]?.pageFrom,
243
+ pageTo: metas[i]?.pageTo,
244
+ section: metas[i]?.section
245
+ }
246
+ }));
247
+
248
+ if (this._metadataEnrichmentEnabled) {
249
+ const extra = await this._enrichChunkMetadata(chunks);
250
+ documents = documents.map((d, i) => ({
251
+ ...d,
252
+ metadata: {
253
+ ...d.metadata,
254
+ summary: extra[i]?.summary,
255
+ keywords: extra[i]?.keywords,
256
+ hypothetical_questions: extra[i]?.hypothetical_questions,
257
+ }
258
+ }));
259
+ }
260
+
261
+ if (this.vectorStore && typeof this.vectorStore.ensureIndexes === 'function') {
262
+ try { await this.vectorStore.ensureIndexes(); } catch (_) {}
263
+ }
264
+ let existsServer = false;
265
+ if (this.vectorStore && typeof this.vectorStore.fileExists === 'function') {
266
+ try { existsServer = await this.vectorStore.fileExists(fileSHA256, size, mtime); } catch { existsServer = false; }
267
+ }
268
+ if (mode === 'skip' && existsServer) {
269
+ this.trigger('onIngestSkipped', validation);
270
+ return;
271
+ }
272
+ if (mode === 'replace' && this.vectorStore && typeof this.vectorStore.deleteDocuments === 'function') {
273
+ try {
274
+ await this.vectorStore.deleteDocuments({ filter: { absolutePath: absPath } });
275
+ } catch (_) {}
276
+ }
277
+ let attempt = 0; let delay = 500;
278
+ while (true) {
279
+ try {
280
+ if (mode === 'replace' && this.vectorStore && typeof this.vectorStore.upsertDocuments === 'function') {
281
+ await this.vectorStore.upsertDocuments(documents);
282
+ } else {
283
+ await this.vectorStore.addDocuments(documents);
284
+ }
285
+ break;
286
+ } catch (err) {
287
+ attempt++;
288
+ if (attempt >= 3) throw err;
289
+ await new Promise(r => setTimeout(r, delay));
290
+ delay = Math.min(4000, delay * 2);
291
+ }
292
+ }
293
+ const durationMs = Date.now() - t0;
294
+ this.trigger('onIngestEnd', filePath, chunks.length, durationMs);
295
+ } catch (e) {
296
+ this.trigger('onError', e);
297
+ throw e;
298
+ }
299
+ }
300
+
301
+ async listDocuments({ filter = null, limit = 100, offset = 0 } = {}) {
302
+ if (!this.vectorStore || typeof this.vectorStore.listDocuments !== 'function') {
303
+ throw new Error('Vector store does not support listDocuments');
304
+ }
305
+ return this.vectorStore.listDocuments({ filter, limit, offset });
306
+ }
307
+
308
+ async deleteDocuments({ ids = null, filter = null } = {}) {
309
+ if (!this.vectorStore || typeof this.vectorStore.deleteDocuments !== 'function') {
310
+ throw new Error('Vector store does not support deleteDocuments');
311
+ }
312
+ return this.vectorStore.deleteDocuments({ ids, filter });
313
+ }
314
+
315
+ async updateDocuments(documents) {
316
+ if (!Array.isArray(documents) || documents.length === 0) return;
317
+ const texts = documents.map(d => d.content);
318
+ const embeddings = await this.embedder.embedDocuments(texts);
319
+ const docs = documents.map((d, i) => ({
320
+ id: d.id,
321
+ content: d.content,
322
+ embedding: embeddings[i],
323
+ metadata: d.metadata || {}
324
+ }));
325
+ if (!this.vectorStore || typeof this.vectorStore.upsertDocuments !== 'function') {
326
+ throw new Error('Vector store does not support updateDocuments');
327
+ }
328
+ return this.vectorStore.upsertDocuments(docs);
329
+ }
330
+
331
+ async generateHydeQuery(query) {
332
+ const prompt = `Please write a plausible passage that answers the question: "${query}".`;
333
+ return await this.retrievalLlm.generate(prompt);
334
+ }
335
+
336
+ async generateMultiQueries(query) {
337
+ const prompt = `Generate 3 different versions of the user question to retrieve relevant documents. Return them separated by newlines.\nOriginal: ${query}`;
338
+ const response = await this.retrievalLlm.generate(prompt);
339
+ return response.split('\n').filter(line => line.trim().length > 0).slice(0, 3);
340
+ }
341
+
342
+ async generateHypotheticalQuestions(query) {
343
+ const prompt = `Generate 3 hypothetical questions related to the query. Return a VALID JSON array of strings.\nQuery: ${query}`;
344
+ const out = await this.retrievalLlm.generate(prompt);
345
+ const clean = String(out).replace(/```json/g, '').replace(/```/g, '').trim();
346
+ try { const arr = JSON.parse(clean); return Array.isArray(arr) ? arr.slice(0,3) : []; } catch { return []; }
347
+ }
348
+
349
+ tokenEstimate(text) {
350
+ const len = text ? text.length : 0;
351
+ return Math.ceil(len / 4);
352
+ }
353
+
354
+ buildContextParts(docs, query) {
355
+ const budget = (this.config.queryPlanning && this.config.queryPlanning.tokenBudget) ? this.config.queryPlanning.tokenBudget : 2048;
356
+ const preferSumm = (this.config.queryPlanning && this.config.queryPlanning.preferSummariesBelow) ? this.config.queryPlanning.preferSummariesBelow : 1024;
357
+ const parts = [];
358
+ let used = 0;
359
+ for (const d of docs) {
360
+ const t = d.metadata?.docTitle || '';
361
+ const sec = d.metadata?.section || '';
362
+ const pages = (d.metadata?.pageFrom && d.metadata?.pageTo) ? `pages ${d.metadata.pageFrom}-${d.metadata.pageTo}` : '';
363
+ const sum = d.metadata?.summary ? d.metadata.summary : d.content.slice(0, 800);
364
+ const chosen = (this.tokenEstimate(sum) <= preferSumm) ? sum : d.content.slice(0, 1200);
365
+ const part = `${t} ${sec} ${pages}\n${chosen}`;
366
+ const est = this.tokenEstimate(part);
367
+ if (used + est > budget) break;
368
+ parts.push(part);
369
+ used += est;
370
+ }
371
+ return parts;
372
+ }
373
+
374
+ extractSnippets(docs, query, maxSnippets) {
375
+ const terms = query.toLowerCase().split(/\W+/).filter(t=>t.length>2);
376
+ const out = [];
377
+ for (const d of docs) {
378
+ const sents = d.content.split(/(?<=[.!?])\s+/);
379
+ for (const s of sents) {
380
+ const l = s.toLowerCase();
381
+ const score = terms.reduce((acc,t)=> acc + (l.includes(t) ? 1 : 0), 0);
382
+ if (score > 0) {
383
+ const pages = (d.metadata?.pageFrom && d.metadata?.pageTo) ? `pages ${d.metadata.pageFrom}-${d.metadata.pageTo}` : '';
384
+ out.push(`${d.metadata?.docTitle || ''} ${d.metadata?.section || ''} ${pages}\n${s}`);
385
+ if (out.length >= maxSnippets) return out;
386
+ }
387
+ }
388
+ }
389
+ return out;
390
+ }
391
+
392
+ reciprocalRankFusion(docLists, k = 60) {
393
+ const scores = {};
394
+ const contentMap = {};
395
+ docLists.forEach(list => {
396
+ list.forEach((doc, rank) => {
397
+ if (!contentMap[doc.content]) contentMap[doc.content] = doc;
398
+ if (!scores[doc.content]) scores[doc.content] = 0;
399
+ scores[doc.content] += 1 / (k + rank + 1);
400
+ });
401
+ });
402
+ return Object.keys(scores)
403
+ .sort((a, b) => scores[b] - scores[a])
404
+ .map(content => contentMap[content]);
405
+ }
406
+
407
+ mmrSelect(candidates, k, mmrLambda) {
408
+ if (!Array.isArray(candidates) || candidates.length === 0) return [];
409
+ const kInt = Math.max(1, Number(k) || 1);
410
+ const lam = Math.max(0, Math.min(1, Number(mmrLambda) || 0.5));
411
+
412
+ const tokens = (text) => {
413
+ const t = String(text || '').toLowerCase().match(/[a-z0-9]+/g) || [];
414
+ return new Set(t.filter(x => x.length > 2));
415
+ };
416
+
417
+ const jaccard = (a, b) => {
418
+ if (!a || !b || a.size === 0 || b.size === 0) return 0;
419
+ let inter = 0;
420
+ for (const x of a) if (b.has(x)) inter++;
421
+ if (inter === 0) return 0;
422
+ const union = a.size + b.size - inter;
423
+ return union ? inter / union : 0;
424
+ };
425
+
426
+ const pool = candidates.map((d) => ({
427
+ ...d,
428
+ _tokens: tokens(d.content),
429
+ _rel: typeof d.score === 'number' ? d.score : Number(d.score) || 0,
430
+ })).sort((a, b) => (b._rel || 0) - (a._rel || 0));
431
+
432
+ const selected = [];
433
+ const selectedTokens = [];
434
+
435
+ const first = pool.shift();
436
+ selected.push(first);
437
+ selectedTokens.push(first._tokens);
438
+
439
+ while (pool.length > 0 && selected.length < kInt) {
440
+ let bestIdx = -1;
441
+ let bestScore = null;
442
+ for (let i = 0; i < pool.length; i++) {
443
+ const d = pool[i];
444
+ let div = 0;
445
+ for (const st of selectedTokens) div = Math.max(div, jaccard(d._tokens, st));
446
+ const score = lam * d._rel - (1 - lam) * div;
447
+ if (bestScore === null || score > bestScore) {
448
+ bestScore = score;
449
+ bestIdx = i;
450
+ }
451
+ }
452
+ if (bestIdx < 0) break;
453
+ const picked = pool.splice(bestIdx, 1)[0];
454
+ selected.push(picked);
455
+ selectedTokens.push(picked._tokens);
456
+ }
457
+
458
+ return selected.slice(0, kInt).map(({ _tokens, _rel, ...rest }) => rest);
459
+ }
460
+
461
+ async queryRAG(query, filter = null, stream = false, sessionId = null) {
462
+ try {
463
+ const tRetrieval = Date.now();
464
+ this.trigger('onRetrievalStart', query);
465
+
466
+ const strategy = this.config.retrieval.strategy;
467
+ let docs = [];
468
+ const k = (this.config.reranking && this.config.reranking.enabled)
469
+ ? this.config.reranking.windowSize : 5;
470
+
471
+ const queryVector = await this.embedder.embedQuery(query);
472
+
473
+ if (strategy === RetrievalStrategy.HYDE) {
474
+ const hypotheticalDoc = await this.generateHydeQuery(query);
475
+ const hydeVector = await this.embedder.embedQuery(hypotheticalDoc);
476
+ docs = await this.vectorStore.similaritySearch(hydeVector, k, filter);
477
+ } else if (strategy === RetrievalStrategy.MULTI_QUERY) {
478
+ const queries = await this.generateMultiQueries(query);
479
+ if (this.config.queryPlanning) {
480
+ const hyps = await this.generateHypotheticalQuestions(query);
481
+ queries.push(...hyps);
482
+ }
483
+ queries.push(query);
484
+ const results = await Promise.all(queries.map(async (q) => {
485
+ const vec = await this.embedder.embedQuery(q);
486
+ return await this.vectorStore.similaritySearch(vec, k, filter);
487
+ }));
488
+ docs = this.reciprocalRankFusion(results, 1);
489
+ } else if (strategy === RetrievalStrategy.HYBRID) {
490
+ docs = await this.vectorStore.hybridSearch(query, queryVector, k, filter);
491
+ } else if (strategy === RetrievalStrategy.MMR) {
492
+ const fetchK = Math.max(Number(this.config.retrieval?.mmrFetchK) || 20, k);
493
+ const lam = Number(this.config.retrieval?.mmrLambda) || 0.5;
494
+ const candidates = await this.vectorStore.similaritySearch(queryVector, fetchK, filter);
495
+ docs = this.mmrSelect(candidates, k, lam);
496
+ } else {
497
+ docs = await this.vectorStore.similaritySearch(queryVector, k, filter);
498
+ }
499
+
500
+ if (this.config.reranking && this.config.reranking.enabled && this.reranker) {
501
+ this.trigger('onRerankingStart', docs.length);
502
+ docs = await this.reranker.rerank(query, docs);
503
+ this.trigger('onRerankingEnd', docs.length);
504
+ }
505
+
506
+ const retrievalMs = Date.now() - tRetrieval;
507
+ this.trigger('onRetrievalEnd', docs.length, retrievalMs);
508
+ const terms = query.toLowerCase().split(/\W+/).filter(t=>t.length>2);
509
+ docs = docs.map(d => {
510
+ const kws = Array.isArray(d.metadata?.keywords) ? d.metadata.keywords.map(k=>String(k).toLowerCase()) : [];
511
+ const match = terms.reduce((acc,t)=>acc + (kws.includes(t)?1:0), 0);
512
+ return { ...d, _boost: match };
513
+ }).sort((a,b)=> (b._boost||0) - (a._boost||0));
514
+
515
+ const contextParts = this.buildContextParts(docs, query);
516
+ if (this.config.grounding && this.config.grounding.enabled) {
517
+ const maxSnippets = this.config.grounding.maxSnippets || 3;
518
+ const snippets = this.extractSnippets(docs, query, maxSnippets);
519
+ if (this.config.grounding.strict) {
520
+ contextParts.splice(0, contextParts.length, ...snippets);
521
+ } else {
522
+ contextParts.push(...snippets);
523
+ }
524
+ }
525
+ const context = contextParts.join('\n---\n');
526
+ let historyText = '';
527
+ if (this.history && sessionId) {
528
+ const fn = this.history.getRecent?.bind(this.history);
529
+ if (typeof fn === 'function') {
530
+ const out = fn.length >= 2 ? fn(sessionId, this.config.memory?.maxMessages || 10) : fn(sessionId);
531
+ const recent = out && typeof out.then === 'function' ? await out : out;
532
+ historyText = Array.isArray(recent) ? recent.map(m => `${String(m.role).toUpperCase()}: ${m.content}`).join('\n') : '';
533
+ }
534
+ }
535
+ let prompt;
536
+ if (this.config.prompts && this.config.prompts.query) {
537
+ prompt = this.config.prompts.query.replace(/\{\{context\}\}/g, context).replace(/\{\{question\}\}/g, query);
538
+ if (historyText) prompt = `Conversation:\n${historyText}\n\n${prompt}`;
539
+ } else {
540
+ prompt = `Answer the question using the provided summaries and cite titles/sections/pages where relevant.\nContext:\n${context}\n\n${historyText ? `Conversation:\n${historyText}\n\n` : ''}Question: ${query}`;
541
+ }
542
+
543
+ const tGen = Date.now();
544
+ this.trigger('onGenerationStart', prompt);
545
+ const systemInst = "You are a helpful RAG assistant.";
546
+
547
+ if (stream) {
548
+ // Streaming return
549
+ if (!this.llm.generateStream) throw new Error("Streaming not implemented for this provider");
550
+ return this.llm.generateStream(prompt, systemInst);
551
+ } else {
552
+ const answer = await this.llm.generate(prompt, systemInst);
553
+ if (this.history && sessionId) {
554
+ const add = this.history.addMessage?.bind(this.history);
555
+ if (typeof add === 'function') {
556
+ const r1 = add(sessionId, 'user', query);
557
+ if (r1 && typeof r1.then === 'function') await r1;
558
+ const r2 = add(sessionId, 'assistant', String(answer));
559
+ if (r2 && typeof r2.then === 'function') await r2;
560
+ }
561
+ }
562
+ const genMs = Date.now() - tGen;
563
+ this.trigger('onGenerationEnd', answer, genMs);
564
+ if (this.config.generation && this.config.generation.outputFormat === 'json') {
565
+ try { const parsed = JSON.parse(String(answer)); return { answer: parsed, sources: docs.map(d => d.metadata) }; } catch { return { answer, sources: docs.map(d => d.metadata) }; }
566
+ }
567
+ return { answer, sources: docs.map(d => d.metadata) };
568
+ }
569
+ } catch (e) {
570
+ this.trigger('onError', e);
571
+ throw e;
572
+ }
573
+ }
574
+
575
+ async evaluate(testSet) {
576
+ const report = [];
577
+ for (const item of testSet) {
578
+ const res = await this.queryRAG(item.question);
579
+ const context = Array.isArray(res.sources) ? res.sources.map(s => s.summary || '').join('\n') : '';
580
+ const faithPrompt = `Rate 0-1: Is the following Answer derived only from the Context?\nContext:\n${context}\n\nAnswer:\n${typeof res.answer === 'string' ? res.answer : JSON.stringify(res.answer)}`;
581
+ const relevancePrompt = `Rate 0-1: Does the Answer correctly answer the Question?\nQuestion:\n${item.question}\n\nAnswer:\n${typeof res.answer === 'string' ? res.answer : JSON.stringify(res.answer)}`;
582
+ let faith = 0; let rel = 0;
583
+ try { faith = Math.max(0, Math.min(1, parseFloat(String(await this.llm.generate(faithPrompt, 'You return a single number between 0 and 1.'))))); } catch {}
584
+ try { rel = Math.max(0, Math.min(1, parseFloat(String(await this.llm.generate(relevancePrompt, 'You return a single number between 0 and 1.'))))); } catch {}
585
+ report.push({ question: item.question, expectedGroundTruth: item.expectedGroundTruth, faithfulness: faith, relevance: rel });
586
+ }
587
+ return report;
588
+ }
589
+ }
590
+
591
+ module.exports = { VectraClient };
@@ -0,0 +1,15 @@
1
+ async function evaluateTestSet(client, testSet) {
2
+ const report = [];
3
+ for (const item of testSet) {
4
+ const res = await client.queryRAG(item.question);
5
+ const context = Array.isArray(res.sources) ? res.sources.map(s => s.summary || '').join('\n') : '';
6
+ const faithPrompt = `Rate 0-1: Is the following Answer derived only from the Context?\nContext:\n${context}\n\nAnswer:\n${typeof res.answer === 'string' ? res.answer : JSON.stringify(res.answer)}`;
7
+ const relevancePrompt = `Rate 0-1: Does the Answer correctly answer the Question?\nQuestion:\n${item.question}\n\nAnswer:\n${typeof res.answer === 'string' ? res.answer : JSON.stringify(res.answer)}`;
8
+ let faith = 0; let rel = 0;
9
+ try { faith = Math.max(0, Math.min(1, parseFloat(String(await client.llm.generate(faithPrompt, 'You return a single number between 0 and 1.'))))); } catch {}
10
+ try { rel = Math.max(0, Math.min(1, parseFloat(String(await client.llm.generate(relevancePrompt, 'You return a single number between 0 and 1.'))))); } catch {}
11
+ report.push({ question: item.question, expectedGroundTruth: item.expectedGroundTruth, faithfulness: faith, relevance: rel });
12
+ }
13
+ return report;
14
+ }
15
+ module.exports = { evaluateTestSet };
@@ -0,0 +1,21 @@
1
+ class VectorStore {
2
+ async addDocuments(documents) { throw new Error("Method 'addDocuments' must be implemented."); }
3
+ async upsertDocuments(documents) {
4
+ throw new Error("Method 'upsertDocuments' must be implemented.");
5
+ }
6
+ async similaritySearch(vector, limit = 5, filter = null) { throw new Error("Method 'similaritySearch' must be implemented."); }
7
+ async hybridSearch(text, vector, limit = 5, filter = null) {
8
+ // Default fallback
9
+ return this.similaritySearch(vector, limit, filter);
10
+ }
11
+ async listDocuments({ filter = null, limit = 100, offset = 0 } = {}) {
12
+ throw new Error("Method 'listDocuments' must be implemented.");
13
+ }
14
+ async deleteDocuments({ ids = null, filter = null } = {}) {
15
+ throw new Error("Method 'deleteDocuments' must be implemented.");
16
+ }
17
+ async fileExists(sha256, size, lastModified) {
18
+ return false;
19
+ }
20
+ }
21
+ module.exports = { VectorStore };