vectra-js 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/memory.js ADDED
@@ -0,0 +1,96 @@
1
+ class InMemoryHistory {
2
+ constructor(maxMessages = 20) {
3
+ this.sessions = new Map();
4
+ this.maxMessages = maxMessages;
5
+ }
6
+ addMessage(sessionId, role, content) {
7
+ if (!sessionId) return;
8
+ const arr = this.sessions.get(sessionId) || [];
9
+ arr.push({ role, content, ts: Date.now() });
10
+ const start = Math.max(0, arr.length - this.maxMessages);
11
+ this.sessions.set(sessionId, arr.slice(start));
12
+ }
13
+ getRecent(sessionId, n = 10) {
14
+ const arr = this.sessions.get(sessionId) || [];
15
+ const start = Math.max(0, arr.length - n);
16
+ return arr.slice(start);
17
+ }
18
+ }
19
+ class RedisHistory {
20
+ constructor(client, keyPrefix = 'vectra:chat:', maxMessages = 20) {
21
+ this.client = client;
22
+ this.keyPrefix = keyPrefix;
23
+ this.maxMessages = maxMessages;
24
+ }
25
+ async addMessage(sessionId, role, content) {
26
+ if (!sessionId || !this.client) return;
27
+ const key = `${this.keyPrefix}${sessionId}`;
28
+ const payload = JSON.stringify({ role, content, ts: Date.now() });
29
+ try {
30
+ if (typeof this.client.rpush === 'function') {
31
+ await this.client.rpush(key, payload);
32
+ } else if (typeof this.client.lPush === 'function') {
33
+ await this.client.lPush(key, payload);
34
+ }
35
+ if (typeof this.client.ltrim === 'function') {
36
+ await this.client.ltrim(key, -this.maxMessages, -1);
37
+ }
38
+ } catch (_) {}
39
+ }
40
+ async getRecent(sessionId, n = 10) {
41
+ if (!sessionId || !this.client) return [];
42
+ const key = `${this.keyPrefix}${sessionId}`;
43
+ try {
44
+ let arr = [];
45
+ if (typeof this.client.lrange === 'function') {
46
+ arr = await this.client.lrange(key, -n, -1);
47
+ } else if (typeof this.client.lRange === 'function') {
48
+ arr = await this.client.lRange(key, -n, -1);
49
+ }
50
+ return arr.map(x => {
51
+ try { return JSON.parse(x); } catch { return { role: 'assistant', content: String(x) }; }
52
+ });
53
+ } catch (_) {
54
+ return [];
55
+ }
56
+ }
57
+ }
58
+ class PostgresHistory {
59
+ constructor(client, tableName = 'ChatMessage', columnMap = { sessionId: 'sessionId', role: 'role', content: 'content', createdAt: 'createdAt' }, maxMessages = 20) {
60
+ this.client = client;
61
+ this.tableName = tableName;
62
+ this.columnMap = columnMap;
63
+ this.maxMessages = maxMessages;
64
+ }
65
+ async addMessage(sessionId, role, content) {
66
+ if (!sessionId || !this.client) return;
67
+ const t = this.tableName;
68
+ const c = this.columnMap;
69
+ const q = `INSERT INTO "${t}" ("${c.sessionId}","${c.role}","${c.content}","${c.createdAt}") VALUES ($1,$2,$3,NOW())`;
70
+ try {
71
+ if (typeof this.client.$executeRawUnsafe === 'function') {
72
+ await this.client.$executeRawUnsafe(q, sessionId, role, content);
73
+ } else if (typeof this.client.execute_raw === 'function') {
74
+ await this.client.execute_raw(q, sessionId, role, content);
75
+ }
76
+ } catch (_) {}
77
+ }
78
+ async getRecent(sessionId, n = 10) {
79
+ if (!sessionId || !this.client) return [];
80
+ const t = this.tableName;
81
+ const c = this.columnMap;
82
+ const q = `SELECT "${c.role}" as role, "${c.content}" as content FROM "${t}" WHERE "${c.sessionId}" = $1 ORDER BY "${c.createdAt}" DESC LIMIT ${Math.max(1, n)}`;
83
+ try {
84
+ let rows = [];
85
+ if (typeof this.client.$queryRawUnsafe === 'function') {
86
+ rows = await this.client.$queryRawUnsafe(q, sessionId);
87
+ } else if (typeof this.client.query_raw === 'function') {
88
+ rows = await this.client.query_raw(q, sessionId);
89
+ }
90
+ return Array.isArray(rows) ? rows.reverse().map(r => ({ role: r.role, content: r.content })) : [];
91
+ } catch (_) {
92
+ return [];
93
+ }
94
+ }
95
+ }
96
+ module.exports = { InMemoryHistory, RedisHistory, PostgresHistory };
@@ -0,0 +1,155 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const pdf = require('pdf-parse');
4
+ const mammoth = require('mammoth');
5
+ const xlsx = require('xlsx');
6
+ const { ChunkingStrategy } = require('./config');
7
+
8
+ class DocumentProcessor {
9
+ constructor(config, agenticLlm) {
10
+ this.config = config;
11
+ this.agenticLlm = agenticLlm;
12
+ this._lastPages = null;
13
+ }
14
+
15
+ async loadDocument(filePath) {
16
+ const ext = path.extname(filePath).toLowerCase();
17
+ const buffer = await fs.promises.readFile(filePath);
18
+ if (ext === '.pdf') {
19
+ const pages = [];
20
+ const res = await pdf(buffer, {
21
+ pagerender: pageData => pageData.getTextContent().then(tc => {
22
+ const s = tc.items.map(it => it.str).join(' ');
23
+ pages.push(s);
24
+ return s;
25
+ })
26
+ });
27
+ this._lastPages = pages;
28
+ return res.text;
29
+ }
30
+ if (ext === '.docx') return (await mammoth.extractRawText({ buffer })).value;
31
+ if (['.txt','.md'].includes(ext)) return buffer.toString('utf-8');
32
+ if (['.xlsx','.xls'].includes(ext)) {
33
+ const wb = xlsx.read(buffer, { type: 'buffer' });
34
+ return xlsx.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);
35
+ }
36
+ throw new Error(`Unsupported file: ${ext}`);
37
+ }
38
+
39
+ async process(text) {
40
+ return this.config.strategy === ChunkingStrategy.AGENTIC
41
+ ? this.agenticSplit(text)
42
+ : this.recursiveSplit(text);
43
+ }
44
+
45
+ recursiveSplit(text) {
46
+ const chunks = [];
47
+ const sizeChars = Math.max(500, this.config.chunkSize || 1000);
48
+ const baseOverlap = Math.max(0, this.config.chunkOverlap || 200);
49
+ const sentences = text.split(/(?<=[.!?])\s+/);
50
+ let current = '';
51
+ for (const s of sentences) {
52
+ const candidate = current.length ? current + ' ' + s : s;
53
+ if (candidate.length >= sizeChars) {
54
+ const entropy = this._entropy(candidate);
55
+ const overlap = Math.min(baseOverlap + Math.floor(entropy * 50), Math.floor(sizeChars / 3));
56
+ chunks.push(candidate);
57
+ // create overlap window from end of candidate
58
+ current = candidate.slice(Math.max(0, candidate.length - overlap));
59
+ } else {
60
+ current = candidate;
61
+ }
62
+ }
63
+ if (current) chunks.push(current);
64
+ return chunks;
65
+ }
66
+
67
+ _entropy(str) {
68
+ const freq = {};
69
+ for (const ch of str) freq[ch] = (freq[ch] || 0) + 1;
70
+ const len = str.length;
71
+ let H = 0;
72
+ Object.values(freq).forEach(c => { const p = c / len; H += -p * Math.log2(p); });
73
+ return H;
74
+ }
75
+
76
+ async agenticSplit(text) {
77
+ if (!this.agenticLlm) throw new Error("Agentic LLM not configured.");
78
+ const windows = this.recursiveSplit(text);
79
+ const finalChunks = [];
80
+ for (const window of windows) {
81
+ const prompt = `Split this text into semantically complete propositions. Return a VALID JSON list of strings. Do not include Markdown formatting.\nText: "${window}"`;
82
+ try {
83
+ const response = await this.agenticLlm.generate(prompt);
84
+ // Attempt to clean markdown
85
+ const cleanJson = response.replace(/```json/g, '').replace(/```/g, '').trim();
86
+ const parsed = JSON.parse(cleanJson);
87
+ if (Array.isArray(parsed)) {
88
+ const dedup = new Set();
89
+ for (const item of parsed) {
90
+ if (typeof item === 'string') {
91
+ const norm = item.trim().replace(/\s+/g, ' ');
92
+ if (norm.length > 1 && !dedup.has(norm)) { dedup.add(norm); finalChunks.push(norm); }
93
+ }
94
+ }
95
+ } else {
96
+ finalChunks.push(window);
97
+ }
98
+ } catch (e) {
99
+ // Fallback to window if parsing fails
100
+ finalChunks.push(window);
101
+ }
102
+ }
103
+ return finalChunks;
104
+ }
105
+
106
+ computeChunkMetadata(filePath, rawText, chunks) {
107
+ const ext = path.extname(filePath).toLowerCase();
108
+ const title = path.basename(filePath);
109
+ const positions = [];
110
+ let cursor = 0;
111
+ for (const c of chunks) {
112
+ const idx = rawText.indexOf(c, cursor);
113
+ const start = idx >= 0 ? idx : 0;
114
+ const end = start + c.length;
115
+ positions.push({ start, end });
116
+ cursor = end;
117
+ }
118
+ let pagesMeta = null;
119
+ if (ext === '.pdf' && Array.isArray(this._lastPages)) {
120
+ const lens = this._lastPages.map(p => p.length);
121
+ const cum = [];
122
+ let acc = 0;
123
+ for (const l of lens) { acc += l; cum.push(acc); }
124
+ pagesMeta = positions.map(pos => {
125
+ const pf = cum.findIndex(x => x >= pos.start) + 1;
126
+ const pt = cum.findIndex(x => x >= pos.end) + 1;
127
+ return { pageFrom: pf || 1, pageTo: pt || pf || 1 };
128
+ });
129
+ }
130
+ let sections = null;
131
+ if (ext === '.md' || ext === '.txt') {
132
+ const lines = rawText.split(/\n/);
133
+ let offset = 0;
134
+ const heads = [];
135
+ for (const ln of lines) {
136
+ if (/^#{1,6}\s+/.test(ln)) heads.push({ pos: offset, text: ln.replace(/^#{1,6}\s+/, '') });
137
+ offset += ln.length + 1;
138
+ }
139
+ sections = positions.map(pos => {
140
+ const candidates = heads.filter(h => h.pos <= pos.start);
141
+ const h = candidates.length ? candidates[candidates.length - 1] : null;
142
+ return h ? h.text : null;
143
+ });
144
+ }
145
+ return positions.map((pos, i) => ({
146
+ fileType: ext,
147
+ docTitle: title,
148
+ chunkIndex: i,
149
+ pageFrom: pagesMeta ? pagesMeta[i].pageFrom : undefined,
150
+ pageTo: pagesMeta ? pagesMeta[i].pageTo : undefined,
151
+ section: sections ? sections[i] : undefined
152
+ }));
153
+ }
154
+ }
155
+ module.exports = { DocumentProcessor };
@@ -0,0 +1,26 @@
1
+ class LLMReranker {
2
+ constructor(llm, config) {
3
+ this.llm = llm;
4
+ this.config = config;
5
+ }
6
+
7
+ async rerank(query, documents) {
8
+ if (!documents || documents.length === 0) return [];
9
+ const scored = await Promise.all(documents.map(async (doc) => {
10
+ const score = await this.scoreDocument(query, doc.content);
11
+ return { ...doc, score };
12
+ }));
13
+ scored.sort((a, b) => b.score - a.score);
14
+ return scored.slice(0, this.config.topN);
15
+ }
16
+
17
+ async scoreDocument(query, content) {
18
+ const prompt = `Analyze relevance (0-10) of document to query. Return ONLY integer.\nQuery: "${query}"\nDoc: "${content.substring(0, 1000)}..."`;
19
+ try {
20
+ const res = await this.llm.generate(prompt);
21
+ const match = res.match(/\d+/);
22
+ return match ? parseInt(match[0], 10) : 0;
23
+ } catch { return 0; }
24
+ }
25
+ }
26
+ module.exports = { LLMReranker };