vectra-js 0.9.12 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -0
- package/package.json +9 -7
- package/src/backends/chroma_store.js +22 -9
- package/src/backends/huggingface.js +36 -9
- package/src/backends/milvus_store.js +7 -5
- package/src/backends/postgres_store.js +82 -121
- package/src/backends/prisma_store.js +13 -7
- package/src/backends/qdrant_store.js +17 -19
- package/src/config.js +26 -5
- package/src/core.js +447 -191
- package/src/interfaces.js +8 -2
- package/src/memory.js +35 -13
- package/src/observability.js +73 -75
- package/src/processor.js +44 -15
- package/src/reranker.js +70 -13
package/index.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
const config = require('./src/config');
|
|
2
2
|
const callbacks = require('./src/callbacks');
|
|
3
3
|
const core = require('./src/core');
|
|
4
|
+
const interfaces = require('./src/interfaces');
|
|
4
5
|
const reranker = require('./src/reranker');
|
|
5
6
|
|
|
6
7
|
module.exports = {
|
|
7
8
|
...config,
|
|
8
9
|
...callbacks,
|
|
9
10
|
...core,
|
|
11
|
+
...interfaces,
|
|
10
12
|
...reranker
|
|
11
13
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vectra-js",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -36,27 +36,29 @@
|
|
|
36
36
|
"dependencies": {
|
|
37
37
|
"@anthropic-ai/sdk": "^0.71.2",
|
|
38
38
|
"@google/genai": "^1.34.0",
|
|
39
|
-
"dotenv": "^16.6.1",
|
|
40
39
|
"mammoth": "^1.11.0",
|
|
41
40
|
"openai": "^6.15.0",
|
|
42
41
|
"pdf-parse": "^2.4.5",
|
|
43
42
|
"pg": "^8.16.3",
|
|
44
43
|
"sqlite3": "^5.1.7",
|
|
45
|
-
"
|
|
46
|
-
"xlsx": "^0.18.5",
|
|
47
|
-
"zod": "^3.25.76"
|
|
44
|
+
"xlsx": "^0.18.5"
|
|
48
45
|
},
|
|
49
46
|
"peerDependencies": {
|
|
50
47
|
"@prisma/client": "^5.0.0"
|
|
51
48
|
},
|
|
52
49
|
"devDependencies": {
|
|
50
|
+
"@chroma-core/default-embed": "^0.1.9",
|
|
51
|
+
"chromadb": "^3.4.0",
|
|
52
|
+
"dotenv": "^17.3.1",
|
|
53
53
|
"eslint": "^9.39.2",
|
|
54
54
|
"globals": "^16.5.0",
|
|
55
|
-
"prisma": "^7.2.0"
|
|
55
|
+
"prisma": "^7.2.0",
|
|
56
|
+
"uuid": "^13.0.0",
|
|
57
|
+
"zod": "^4.3.6"
|
|
56
58
|
},
|
|
57
59
|
"pnpm": {
|
|
58
60
|
"onlyBuiltDependencies": [
|
|
59
61
|
"sqlite3"
|
|
60
62
|
]
|
|
61
63
|
}
|
|
62
|
-
}
|
|
64
|
+
}
|
|
@@ -10,6 +10,10 @@ class ChromaVectorStore extends VectorStore {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
async _init() {
|
|
13
|
+
if (!this.client) {
|
|
14
|
+
const { ChromaClient } = require('chromadb');
|
|
15
|
+
this.client = new ChromaClient();
|
|
16
|
+
}
|
|
13
17
|
if (!this.collection) {
|
|
14
18
|
this.collection = await this.client.getOrCreateCollection({ name: this.collectionName });
|
|
15
19
|
}
|
|
@@ -33,12 +37,19 @@ class ChromaVectorStore extends VectorStore {
|
|
|
33
37
|
const metadatas = docs.map(d => this._cleanMetadata(d.metadata));
|
|
34
38
|
const documents = docs.map(d => d.content);
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
console.log(`Adding ${docs.length} docs to Chroma collection: ${this.collectionName}`);
|
|
41
|
+
try {
|
|
42
|
+
await this.collection.add({
|
|
43
|
+
ids,
|
|
44
|
+
embeddings,
|
|
45
|
+
metadatas,
|
|
46
|
+
documents
|
|
47
|
+
});
|
|
48
|
+
console.log("Success adding docs to Chroma.");
|
|
49
|
+
} catch (e) {
|
|
50
|
+
console.error("Error in collection.add:", e);
|
|
51
|
+
throw e;
|
|
52
|
+
}
|
|
42
53
|
}
|
|
43
54
|
|
|
44
55
|
async upsertDocuments(docs) {
|
|
@@ -89,10 +100,10 @@ class ChromaVectorStore extends VectorStore {
|
|
|
89
100
|
return out;
|
|
90
101
|
}
|
|
91
102
|
|
|
92
|
-
async listDocuments({ filter = null, limit = 100,
|
|
103
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
93
104
|
await this._init();
|
|
94
105
|
const lim = Math.max(1, Math.min(1000, Number(limit) || 100));
|
|
95
|
-
const off =
|
|
106
|
+
const off = cursor ? Number(cursor) : 0;
|
|
96
107
|
const res = await this.collection.get({
|
|
97
108
|
where: filter || undefined,
|
|
98
109
|
limit: lim,
|
|
@@ -102,7 +113,9 @@ class ChromaVectorStore extends VectorStore {
|
|
|
102
113
|
const ids = Array.isArray(res?.ids) ? res.ids : [];
|
|
103
114
|
const documents = Array.isArray(res?.documents) ? res.documents : [];
|
|
104
115
|
const metadatas = Array.isArray(res?.metadatas) ? res.metadatas : [];
|
|
105
|
-
|
|
116
|
+
const docs = ids.map((id, i) => ({ id, content: documents[i], metadata: metadatas[i] }));
|
|
117
|
+
const nextCursor = docs.length === lim ? String(off + docs.length) : null;
|
|
118
|
+
return { documents: docs, nextCursor };
|
|
106
119
|
}
|
|
107
120
|
|
|
108
121
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
const https = require('https');
|
|
2
|
+
|
|
1
3
|
class HuggingFaceBackend {
|
|
2
4
|
constructor(config) {
|
|
3
5
|
this.config = config;
|
|
@@ -7,16 +9,41 @@ class HuggingFaceBackend {
|
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
async _post(model, payload) {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
return new Promise((resolve, reject) => {
|
|
13
|
+
const data = JSON.stringify(payload);
|
|
14
|
+
const url = new URL(`${this.baseUrl}/${encodeURIComponent(model)}`);
|
|
15
|
+
|
|
16
|
+
const options = {
|
|
17
|
+
hostname: url.hostname,
|
|
18
|
+
path: url.pathname + url.search,
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: {
|
|
21
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
22
|
+
'Content-Type': 'application/json',
|
|
23
|
+
'Content-Length': Buffer.byteLength(data)
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const req = https.request(options, (res) => {
|
|
28
|
+
let body = '';
|
|
29
|
+
res.on('data', (chunk) => body += chunk);
|
|
30
|
+
res.on('end', () => {
|
|
31
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
32
|
+
reject(new Error(`HF error ${res.statusCode}: ${body}`));
|
|
33
|
+
} else {
|
|
34
|
+
try {
|
|
35
|
+
resolve(JSON.parse(body));
|
|
36
|
+
} catch(e) {
|
|
37
|
+
resolve(body);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
req.on('error', reject);
|
|
44
|
+
req.write(data);
|
|
45
|
+
req.end();
|
|
14
46
|
});
|
|
15
|
-
if (!res.ok) {
|
|
16
|
-
const t = await res.text();
|
|
17
|
-
throw new Error(`HF error ${res.status}: ${t}`);
|
|
18
|
-
}
|
|
19
|
-
return await res.json();
|
|
20
47
|
}
|
|
21
48
|
|
|
22
49
|
async embedDocuments(texts) {
|
|
@@ -30,19 +30,21 @@ class MilvusVectorStore extends VectorStore {
|
|
|
30
30
|
}
|
|
31
31
|
async hybridSearch(text, vector, limit = 5, filter = null) { return this.similaritySearch(vector, limit, filter); }
|
|
32
32
|
|
|
33
|
-
async listDocuments({ filter = null, limit = 100,
|
|
33
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
34
34
|
if (typeof this.client.query !== 'function') throw new Error('listDocuments is not supported for this Milvus client');
|
|
35
|
-
const lim = Math.max(1,
|
|
36
|
-
const off =
|
|
35
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
36
|
+
const off = cursor ? Number(cursor) : 0;
|
|
37
37
|
const res = await this.client.query({
|
|
38
38
|
collection_name: this.collection,
|
|
39
39
|
expr: filter || '',
|
|
40
|
-
output_fields: ['content', 'metadata'],
|
|
40
|
+
output_fields: ['id', 'content', 'metadata'],
|
|
41
41
|
limit: lim,
|
|
42
42
|
offset: off,
|
|
43
43
|
});
|
|
44
44
|
const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
|
|
45
|
-
|
|
45
|
+
const docs = rows.map((r) => ({ id: r.id, content: r.content || '', metadata: r.metadata ? JSON.parse(r.metadata) : {} }));
|
|
46
|
+
const nextCursor = docs.length === lim ? String(off + docs.length) : null;
|
|
47
|
+
return { documents: docs, nextCursor };
|
|
46
48
|
}
|
|
47
49
|
|
|
48
50
|
async fileExists(sha256, size, lastModified) {
|
|
@@ -37,6 +37,14 @@ class PostgresVectorStore extends VectorStore {
|
|
|
37
37
|
this.client = this.config.clientInstance;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
|
+
async _withConn(fn) {
|
|
41
|
+
if (typeof this.client.connect === 'function') {
|
|
42
|
+
const client = await this.client.connect();
|
|
43
|
+
try { return await fn(client); } finally { client.release(); }
|
|
44
|
+
}
|
|
45
|
+
return fn(this.client);
|
|
46
|
+
}
|
|
47
|
+
|
|
40
48
|
normalizeVector(v) {
|
|
41
49
|
const m = Math.sqrt(v.reduce((s, x) => s + x * x, 0));
|
|
42
50
|
return m === 0 ? v : v.map(x => x / m);
|
|
@@ -44,129 +52,57 @@ class PostgresVectorStore extends VectorStore {
|
|
|
44
52
|
|
|
45
53
|
// Helper to ensure table and extension exist
|
|
46
54
|
async ensureIndexes() {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
FROM information_schema.columns
|
|
55
|
-
WHERE table_name = $1 AND column_name = $2`,
|
|
56
|
-
[this._tableBase, this._cVec.replace(/"/g, '')]
|
|
57
|
-
);
|
|
58
|
-
const row = typeCheck.rows[0];
|
|
59
|
-
if (row) {
|
|
60
|
-
const isPgVector = row.udt_name === 'vector';
|
|
61
|
-
const isArray = row.data_type && row.data_type.toLowerCase().includes('array');
|
|
62
|
-
if (isArray && !isPgVector) {
|
|
63
|
-
throw new Error(
|
|
64
|
-
'Postgres schema mismatch: vector column is double precision[] (array). ' +
|
|
65
|
-
'Use pgvector type: vector(<dimensions>). ' +
|
|
66
|
-
'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(1536);'
|
|
67
|
-
);
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
} catch (e) {
|
|
71
|
-
// Only throw if we explicitly detected array type; otherwise continue
|
|
72
|
-
if (String(e.message || e).includes('schema mismatch')) {
|
|
73
|
-
throw e;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// Create table if not exists (best-effort)
|
|
78
|
-
// Note: We need to know vector dimensions. We'll try to guess or use default 1536
|
|
79
|
-
// If embedding dimensions are provided in config, use them
|
|
80
|
-
// But store config usually doesn't have embedding config directly unless passed down
|
|
81
|
-
// For now we will assume the user creates the table or we default to 1536 (OpenAI)
|
|
82
|
-
// A better approach is to rely on user schema, but for convenience:
|
|
83
|
-
const dim = 1536; // Default to OpenAI dimension if unknown.
|
|
84
|
-
// However, if the table exists, we don't change it.
|
|
85
|
-
|
|
86
|
-
const createTableQuery = `
|
|
87
|
-
CREATE TABLE IF NOT EXISTS ${this._table} (
|
|
88
|
-
"id" TEXT PRIMARY KEY,
|
|
89
|
-
${this._cContent} TEXT,
|
|
90
|
-
${this._cMeta} JSONB,
|
|
91
|
-
${this._cVec} vector(${dim}),
|
|
92
|
-
"createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
|
93
|
-
)
|
|
94
|
-
`;
|
|
95
|
-
await this.client.query(createTableQuery);
|
|
96
|
-
|
|
97
|
-
// Ensure required columns exist (non-destructive)
|
|
98
|
-
try {
|
|
99
|
-
const res = await this.client.query(
|
|
100
|
-
`SELECT column_name, data_type, udt_name
|
|
101
|
-
FROM information_schema.columns
|
|
102
|
-
WHERE table_name = $1`,
|
|
103
|
-
[this._tableBase]
|
|
104
|
-
);
|
|
105
|
-
const cols = new Map(res.rows.map(r => [r.column_name, r]));
|
|
106
|
-
const contentCol = this._cContent.replace(/"/g, '');
|
|
107
|
-
const metaCol = this._cMeta.replace(/"/g, '');
|
|
108
|
-
const vecCol = this._cVec.replace(/"/g, '');
|
|
109
|
-
const createdAtCol = this._cCreatedAt.replace(/"/g, '');
|
|
110
|
-
|
|
111
|
-
if (!cols.has(contentCol)) {
|
|
112
|
-
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
113
|
-
}
|
|
114
|
-
if (!cols.has(metaCol)) {
|
|
115
|
-
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
116
|
-
}
|
|
117
|
-
if (!cols.has(vecCol)) {
|
|
118
|
-
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
119
|
-
} else {
|
|
120
|
-
const vinfo = cols.get(vecCol);
|
|
121
|
-
const isPgVector = vinfo && vinfo.udt_name === 'vector';
|
|
122
|
-
const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
|
|
123
|
-
if (isArray && !isPgVector) {
|
|
124
|
-
throw new Error(
|
|
125
|
-
'Postgres schema mismatch: vector column is double precision[] (array). ' +
|
|
126
|
-
'Use pgvector type: vector(' + dim + '). ' +
|
|
127
|
-
'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
|
|
55
|
+
await this._withConn(async (client) => {
|
|
56
|
+
await client.query('CREATE EXTENSION IF NOT EXISTS vector');
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
const typeCheck = await client.query(
|
|
60
|
+
`SELECT data_type, udt_name FROM information_schema.columns WHERE table_name = $1 AND column_name = $2`,
|
|
61
|
+
[this._tableBase, this._cVec.replace(/"/g, '')]
|
|
128
62
|
);
|
|
63
|
+
const row = typeCheck.rows[0];
|
|
64
|
+
if (row && row.data_type && row.data_type.toLowerCase().includes('array') && row.udt_name !== 'vector') {
|
|
65
|
+
throw new Error('Postgres schema mismatch: vector column is array. Use vector(<dimensions>).');
|
|
66
|
+
}
|
|
67
|
+
} catch (e) {
|
|
68
|
+
if (String(e.message || e).includes('schema mismatch')) throw e;
|
|
129
69
|
}
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
await
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
|
|
144
|
-
} catch (e) {
|
|
145
|
-
// Fallback to ivfflat when hnsw not supported
|
|
70
|
+
|
|
71
|
+
const dim = 1536;
|
|
72
|
+
await client.query(`CREATE TABLE IF NOT EXISTS ${this._table} ("id" TEXT PRIMARY KEY, ${this._cContent} TEXT, ${this._cMeta} JSONB, ${this._cVec} vector(${dim}), "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW())`);
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
const res = await client.query(`SELECT column_name FROM information_schema.columns WHERE table_name = $1`, [this._tableBase]);
|
|
76
|
+
const cols = new Set(res.rows.map(r => r.column_name));
|
|
77
|
+
if (!cols.has(this._cContent.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
78
|
+
if (!cols.has(this._cMeta.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
79
|
+
if (!cols.has(this._cVec.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
80
|
+
if (!cols.has('createdAt')) await client.query(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
|
|
81
|
+
} catch (_) {}
|
|
82
|
+
|
|
146
83
|
try {
|
|
147
|
-
|
|
148
|
-
} catch (
|
|
149
|
-
|
|
84
|
+
await client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
|
|
85
|
+
} catch (e) {
|
|
86
|
+
try { await client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops)`); } catch (_) {}
|
|
150
87
|
}
|
|
151
|
-
}
|
|
88
|
+
});
|
|
152
89
|
}
|
|
153
90
|
|
|
154
91
|
async addDocuments(docs) {
|
|
155
92
|
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT ("id") DO NOTHING`;
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
93
|
+
await this._withConn(async (client) => {
|
|
94
|
+
for (const doc of docs) {
|
|
95
|
+
const id = doc.id || uuidv4();
|
|
96
|
+
const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`;
|
|
97
|
+
try {
|
|
98
|
+
await client.query(q, [id, doc.content, doc.metadata, vec]);
|
|
99
|
+
} catch (e) {
|
|
100
|
+
const msg = e?.message || String(e);
|
|
101
|
+
if (msg.includes('vector') && msg.includes('dimension')) throw new Error('DimensionMismatchError');
|
|
102
|
+
throw e;
|
|
166
103
|
}
|
|
167
|
-
throw e;
|
|
168
104
|
}
|
|
169
|
-
}
|
|
105
|
+
});
|
|
170
106
|
}
|
|
171
107
|
|
|
172
108
|
async upsertDocuments(docs) {
|
|
@@ -180,11 +116,13 @@ class PostgresVectorStore extends VectorStore {
|
|
|
180
116
|
${this._cVec} = EXCLUDED.${this._cVec}
|
|
181
117
|
`;
|
|
182
118
|
|
|
183
|
-
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
119
|
+
await this._withConn(async (client) => {
|
|
120
|
+
for (const doc of docs) {
|
|
121
|
+
const id = doc.id || uuidv4();
|
|
122
|
+
const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`;
|
|
123
|
+
await client.query(q, [id, doc.content, doc.metadata, vec]);
|
|
124
|
+
}
|
|
125
|
+
});
|
|
188
126
|
}
|
|
189
127
|
|
|
190
128
|
async similaritySearch(vector, limit = 5, filter = null) {
|
|
@@ -208,7 +146,7 @@ class PostgresVectorStore extends VectorStore {
|
|
|
208
146
|
`;
|
|
209
147
|
params.push(Math.max(1, Number(limit) || 5));
|
|
210
148
|
|
|
211
|
-
const res = await this.
|
|
149
|
+
const res = await this._withConn(c => c.query(q, params));
|
|
212
150
|
return res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: r.score }));
|
|
213
151
|
}
|
|
214
152
|
|
|
@@ -237,7 +175,7 @@ class PostgresVectorStore extends VectorStore {
|
|
|
237
175
|
|
|
238
176
|
let lexical = [];
|
|
239
177
|
try {
|
|
240
|
-
const res = await this.
|
|
178
|
+
const res = await this._withConn(c => c.query(q, params));
|
|
241
179
|
lexical = res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: 1.0 }));
|
|
242
180
|
} catch (e) {
|
|
243
181
|
console.warn("Keyword search failed (maybe missing indexes):", e.message);
|
|
@@ -262,6 +200,29 @@ class PostgresVectorStore extends VectorStore {
|
|
|
262
200
|
return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
263
201
|
}
|
|
264
202
|
|
|
203
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
204
|
+
return this._withConn(async (client) => {
|
|
205
|
+
const params = [];
|
|
206
|
+
const whereParts = [];
|
|
207
|
+
if (filter) {
|
|
208
|
+
whereParts.push(`${this._cMeta} @> $${params.length + 1}`);
|
|
209
|
+
params.push(filter);
|
|
210
|
+
}
|
|
211
|
+
if (cursor) {
|
|
212
|
+
whereParts.push(`"id" > $${params.length + 1}`);
|
|
213
|
+
params.push(cursor);
|
|
214
|
+
}
|
|
215
|
+
const where = whereParts.length ? `WHERE ${whereParts.join(' AND ')}` : '';
|
|
216
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
217
|
+
const q = `SELECT "id", ${this._cContent} as content, ${this._cMeta} as metadata FROM ${this._table} ${where} ORDER BY "id" ASC LIMIT $${params.length + 1}`;
|
|
218
|
+
params.push(lim);
|
|
219
|
+
const res = await client.query(q, params);
|
|
220
|
+
const docs = res.rows.map(r => ({ id: r.id, content: r.content, metadata: r.metadata }));
|
|
221
|
+
const nextCursor = docs.length === lim ? docs[docs.length - 1].id : null;
|
|
222
|
+
return { documents: docs, nextCursor };
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
265
226
|
async fileExists(sha256, size, lastModified) {
|
|
266
227
|
try {
|
|
267
228
|
const q = `
|
|
@@ -271,7 +232,7 @@ class PostgresVectorStore extends VectorStore {
|
|
|
271
232
|
LIMIT 1
|
|
272
233
|
`;
|
|
273
234
|
const metaFilter = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
274
|
-
const res = await this.
|
|
235
|
+
const res = await this._withConn(c => c.query(q, [metaFilter]));
|
|
275
236
|
return res.rowCount > 0;
|
|
276
237
|
} catch (_) {
|
|
277
238
|
return false;
|
|
@@ -176,19 +176,25 @@ class PrismaVectorStore extends VectorStore {
|
|
|
176
176
|
}
|
|
177
177
|
}
|
|
178
178
|
|
|
179
|
-
async listDocuments({ filter = null, limit = 100,
|
|
179
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
180
180
|
const { clientInstance } = this.config;
|
|
181
181
|
const params = [];
|
|
182
|
-
|
|
182
|
+
const whereParts = [];
|
|
183
183
|
if (filter) {
|
|
184
|
-
|
|
184
|
+
whereParts.push(`${this._cMeta} @> $${params.length + 1}::jsonb`);
|
|
185
185
|
params.push(JSON.stringify(filter));
|
|
186
186
|
}
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
187
|
+
if (cursor) {
|
|
188
|
+
whereParts.push(`"id" > $${params.length + 1}`);
|
|
189
|
+
params.push(cursor);
|
|
190
|
+
}
|
|
191
|
+
const where = whereParts.length ? `WHERE ${whereParts.join(' AND ')}` : '';
|
|
192
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
193
|
+
const q = `SELECT "id" as id, ${this._cContent} as content, ${this._cMeta} as metadata FROM ${this._table} ${where} ORDER BY "id" ASC LIMIT ${lim}`;
|
|
190
194
|
const res = await clientInstance.$queryRawUnsafe(q, ...params);
|
|
191
|
-
|
|
195
|
+
const docs = res.map(r => ({ id: r.id, content: r.content, metadata: r.metadata }));
|
|
196
|
+
const nextCursor = docs.length === lim ? docs[docs.length - 1].id : null;
|
|
197
|
+
return { documents: docs, nextCursor };
|
|
192
198
|
}
|
|
193
199
|
|
|
194
200
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
@@ -30,27 +30,25 @@ class QdrantVectorStore extends VectorStore {
|
|
|
30
30
|
return res.map(r => ({ content: r.payload.content, metadata: r.payload.metadata, score: r.score }));
|
|
31
31
|
}
|
|
32
32
|
async hybridSearch(text, vector, limit = 5, filter = null) { return this.similaritySearch(vector, limit, filter); }
|
|
33
|
-
async listDocuments({ filter = null, limit = 100,
|
|
33
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
34
34
|
if (typeof this.client.scroll !== 'function') throw new Error('listDocuments is not supported for this Qdrant client');
|
|
35
35
|
const qFilter = this.normalizeFilter(filter);
|
|
36
|
-
const lim = Math.max(1,
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
}
|
|
53
|
-
return out;
|
|
36
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
37
|
+
const res = await this.client.scroll(this.collection, {
|
|
38
|
+
limit: lim,
|
|
39
|
+
filter: qFilter,
|
|
40
|
+
offset: cursor || undefined,
|
|
41
|
+
with_payload: true,
|
|
42
|
+
with_vector: false
|
|
43
|
+
});
|
|
44
|
+
const points = res?.points || res?.result?.points || [];
|
|
45
|
+
const nextCursor = res?.next_page_offset || res?.result?.next_page_offset;
|
|
46
|
+
const docs = points.map(p => ({
|
|
47
|
+
id: p.id,
|
|
48
|
+
content: p.payload?.content,
|
|
49
|
+
metadata: p.payload?.metadata
|
|
50
|
+
}));
|
|
51
|
+
return { documents: docs, nextCursor };
|
|
54
52
|
}
|
|
55
53
|
async fileExists(sha256, size, lastModified) {
|
|
56
54
|
const filter = this.normalizeFilter({ fileSHA256: sha256, fileSize: size, lastModified });
|
package/src/config.js
CHANGED
|
@@ -22,6 +22,13 @@ const RetrievalStrategy = {
|
|
|
22
22
|
MMR: 'mmr'
|
|
23
23
|
};
|
|
24
24
|
|
|
25
|
+
const RerankingProvider = {
|
|
26
|
+
LLM: 'llm',
|
|
27
|
+
CROSS_ENCODER: 'cross-encoder',
|
|
28
|
+
COHERE: 'cohere',
|
|
29
|
+
JINA: 'jina'
|
|
30
|
+
};
|
|
31
|
+
|
|
25
32
|
const EmbeddingConfigSchema = z.object({
|
|
26
33
|
provider: z.nativeEnum(ProviderType),
|
|
27
34
|
apiKey: z.string().optional(),
|
|
@@ -52,8 +59,10 @@ const ChunkingConfigSchema = z.object({
|
|
|
52
59
|
|
|
53
60
|
const RerankingConfigSchema = z.object({
|
|
54
61
|
enabled: z.boolean().default(false),
|
|
55
|
-
provider: z.
|
|
62
|
+
provider: z.nativeEnum(RerankingProvider).default(RerankingProvider.LLM),
|
|
56
63
|
llmConfig: LLMConfigSchema.optional(),
|
|
64
|
+
modelName: z.string().optional(),
|
|
65
|
+
apiKey: z.string().optional(),
|
|
57
66
|
topN: z.number().default(5),
|
|
58
67
|
windowSize: z.number().default(20)
|
|
59
68
|
});
|
|
@@ -69,6 +78,14 @@ const RetrievalConfigSchema = z.object({
|
|
|
69
78
|
return true;
|
|
70
79
|
}, { message: "llmConfig required for advanced retrieval", path: ["llmConfig"] });
|
|
71
80
|
|
|
81
|
+
const GuardrailConfigSchema = z.object({
|
|
82
|
+
blockPii: z.boolean().default(false),
|
|
83
|
+
blockOffTopic: z.boolean().default(false),
|
|
84
|
+
maxQueryLength: z.number().default(2000),
|
|
85
|
+
contentFilter: z.boolean().default(false),
|
|
86
|
+
hallucinationCheck: z.boolean().default(false)
|
|
87
|
+
});
|
|
88
|
+
|
|
72
89
|
const DatabaseConfigSchema = z.object({
|
|
73
90
|
type: z.string(), // 'prisma', 'chroma', etc.
|
|
74
91
|
tableName: z.string().optional(),
|
|
@@ -117,7 +134,9 @@ const RAGConfigSchema = z.object({
|
|
|
117
134
|
generation: z.object({ structuredOutput: z.enum(['none','citations']).default('none'), outputFormat: z.enum(['text','json']).default('text') }).optional(),
|
|
118
135
|
prompts: z.object({ query: z.string().optional(), reranking: z.string().optional() }).optional(),
|
|
119
136
|
tracing: z.object({ enable: z.boolean().default(false) }).optional(),
|
|
120
|
-
|
|
137
|
+
maxCacheSize: z.number().default(10000),
|
|
138
|
+
callbacks: z.array(z.custom((_) => true)).optional(),
|
|
139
|
+
middlewares: z.array(z.custom((_) => true)).default([]),
|
|
121
140
|
observability: z.object({
|
|
122
141
|
enabled: z.boolean().default(false),
|
|
123
142
|
sqlitePath: z.string().default('vectra-observability.db'),
|
|
@@ -126,11 +145,13 @@ const RAGConfigSchema = z.object({
|
|
|
126
145
|
trackTraces: z.boolean().default(true),
|
|
127
146
|
trackLogs: z.boolean().default(true),
|
|
128
147
|
sessionTracking: z.boolean().default(true)
|
|
129
|
-
}).default({})
|
|
148
|
+
}).default({}),
|
|
149
|
+
guardrails: GuardrailConfigSchema.default({})
|
|
130
150
|
});
|
|
131
151
|
|
|
132
152
|
module.exports = {
|
|
133
|
-
ProviderType, ChunkingStrategy, RetrievalStrategy,
|
|
153
|
+
ProviderType, ChunkingStrategy, RetrievalStrategy, RerankingProvider,
|
|
134
154
|
EmbeddingConfigSchema, LLMConfigSchema, ChunkingConfigSchema,
|
|
135
|
-
RetrievalConfigSchema, RerankingConfigSchema, DatabaseConfigSchema, RAGConfigSchema
|
|
155
|
+
RetrievalConfigSchema, RerankingConfigSchema, DatabaseConfigSchema, RAGConfigSchema,
|
|
156
|
+
GuardrailConfigSchema
|
|
136
157
|
};
|