vectra-js 0.9.11 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -0
- package/package.json +10 -8
- package/src/backends/chroma_store.js +22 -9
- package/src/backends/huggingface.js +36 -9
- package/src/backends/milvus_store.js +29 -5
- package/src/backends/postgres_store.js +104 -52
- package/src/backends/prisma_store.js +63 -8
- package/src/backends/qdrant_store.js +25 -17
- package/src/config.js +26 -5
- package/src/core.js +447 -191
- package/src/interfaces.js +8 -2
- package/src/memory.js +35 -13
- package/src/observability.js +73 -75
- package/src/processor.js +44 -15
- package/src/reranker.js +70 -13
- package/src/telemetry.js +1 -1
package/index.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
const config = require('./src/config');
|
|
2
2
|
const callbacks = require('./src/callbacks');
|
|
3
3
|
const core = require('./src/core');
|
|
4
|
+
const interfaces = require('./src/interfaces');
|
|
4
5
|
const reranker = require('./src/reranker');
|
|
5
6
|
|
|
6
7
|
module.exports = {
|
|
7
8
|
...config,
|
|
8
9
|
...callbacks,
|
|
9
10
|
...core,
|
|
11
|
+
...interfaces,
|
|
10
12
|
...reranker
|
|
11
13
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vectra-js",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -34,29 +34,31 @@
|
|
|
34
34
|
"author": "Abhishek N",
|
|
35
35
|
"license": "GPL-3.0",
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@anthropic-ai/sdk": "^0.
|
|
37
|
+
"@anthropic-ai/sdk": "^0.71.2",
|
|
38
38
|
"@google/genai": "^1.34.0",
|
|
39
|
-
"dotenv": "^16.6.1",
|
|
40
39
|
"mammoth": "^1.11.0",
|
|
41
40
|
"openai": "^6.15.0",
|
|
42
41
|
"pdf-parse": "^2.4.5",
|
|
43
42
|
"pg": "^8.16.3",
|
|
44
43
|
"sqlite3": "^5.1.7",
|
|
45
|
-
"
|
|
46
|
-
"xlsx": "^0.18.5",
|
|
47
|
-
"zod": "^3.25.76"
|
|
44
|
+
"xlsx": "^0.18.5"
|
|
48
45
|
},
|
|
49
46
|
"peerDependencies": {
|
|
50
47
|
"@prisma/client": "^5.0.0"
|
|
51
48
|
},
|
|
52
49
|
"devDependencies": {
|
|
50
|
+
"@chroma-core/default-embed": "^0.1.9",
|
|
51
|
+
"chromadb": "^3.4.0",
|
|
52
|
+
"dotenv": "^17.3.1",
|
|
53
53
|
"eslint": "^9.39.2",
|
|
54
54
|
"globals": "^16.5.0",
|
|
55
|
-
"prisma": "^7.2.0"
|
|
55
|
+
"prisma": "^7.2.0",
|
|
56
|
+
"uuid": "^13.0.0",
|
|
57
|
+
"zod": "^4.3.6"
|
|
56
58
|
},
|
|
57
59
|
"pnpm": {
|
|
58
60
|
"onlyBuiltDependencies": [
|
|
59
61
|
"sqlite3"
|
|
60
62
|
]
|
|
61
63
|
}
|
|
62
|
-
}
|
|
64
|
+
}
|
|
@@ -10,6 +10,10 @@ class ChromaVectorStore extends VectorStore {
|
|
|
10
10
|
}
|
|
11
11
|
|
|
12
12
|
async _init() {
|
|
13
|
+
if (!this.client) {
|
|
14
|
+
const { ChromaClient } = require('chromadb');
|
|
15
|
+
this.client = new ChromaClient();
|
|
16
|
+
}
|
|
13
17
|
if (!this.collection) {
|
|
14
18
|
this.collection = await this.client.getOrCreateCollection({ name: this.collectionName });
|
|
15
19
|
}
|
|
@@ -33,12 +37,19 @@ class ChromaVectorStore extends VectorStore {
|
|
|
33
37
|
const metadatas = docs.map(d => this._cleanMetadata(d.metadata));
|
|
34
38
|
const documents = docs.map(d => d.content);
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
console.log(`Adding ${docs.length} docs to Chroma collection: ${this.collectionName}`);
|
|
41
|
+
try {
|
|
42
|
+
await this.collection.add({
|
|
43
|
+
ids,
|
|
44
|
+
embeddings,
|
|
45
|
+
metadatas,
|
|
46
|
+
documents
|
|
47
|
+
});
|
|
48
|
+
console.log("Success adding docs to Chroma.");
|
|
49
|
+
} catch (e) {
|
|
50
|
+
console.error("Error in collection.add:", e);
|
|
51
|
+
throw e;
|
|
52
|
+
}
|
|
42
53
|
}
|
|
43
54
|
|
|
44
55
|
async upsertDocuments(docs) {
|
|
@@ -89,10 +100,10 @@ class ChromaVectorStore extends VectorStore {
|
|
|
89
100
|
return out;
|
|
90
101
|
}
|
|
91
102
|
|
|
92
|
-
async listDocuments({ filter = null, limit = 100,
|
|
103
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
93
104
|
await this._init();
|
|
94
105
|
const lim = Math.max(1, Math.min(1000, Number(limit) || 100));
|
|
95
|
-
const off =
|
|
106
|
+
const off = cursor ? Number(cursor) : 0;
|
|
96
107
|
const res = await this.collection.get({
|
|
97
108
|
where: filter || undefined,
|
|
98
109
|
limit: lim,
|
|
@@ -102,7 +113,9 @@ class ChromaVectorStore extends VectorStore {
|
|
|
102
113
|
const ids = Array.isArray(res?.ids) ? res.ids : [];
|
|
103
114
|
const documents = Array.isArray(res?.documents) ? res.documents : [];
|
|
104
115
|
const metadatas = Array.isArray(res?.metadatas) ? res.metadatas : [];
|
|
105
|
-
|
|
116
|
+
const docs = ids.map((id, i) => ({ id, content: documents[i], metadata: metadatas[i] }));
|
|
117
|
+
const nextCursor = docs.length === lim ? String(off + docs.length) : null;
|
|
118
|
+
return { documents: docs, nextCursor };
|
|
106
119
|
}
|
|
107
120
|
|
|
108
121
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
const https = require('https');
|
|
2
|
+
|
|
1
3
|
class HuggingFaceBackend {
|
|
2
4
|
constructor(config) {
|
|
3
5
|
this.config = config;
|
|
@@ -7,16 +9,41 @@ class HuggingFaceBackend {
|
|
|
7
9
|
}
|
|
8
10
|
|
|
9
11
|
async _post(model, payload) {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
return new Promise((resolve, reject) => {
|
|
13
|
+
const data = JSON.stringify(payload);
|
|
14
|
+
const url = new URL(`${this.baseUrl}/${encodeURIComponent(model)}`);
|
|
15
|
+
|
|
16
|
+
const options = {
|
|
17
|
+
hostname: url.hostname,
|
|
18
|
+
path: url.pathname + url.search,
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: {
|
|
21
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
22
|
+
'Content-Type': 'application/json',
|
|
23
|
+
'Content-Length': Buffer.byteLength(data)
|
|
24
|
+
}
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
const req = https.request(options, (res) => {
|
|
28
|
+
let body = '';
|
|
29
|
+
res.on('data', (chunk) => body += chunk);
|
|
30
|
+
res.on('end', () => {
|
|
31
|
+
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
32
|
+
reject(new Error(`HF error ${res.statusCode}: ${body}`));
|
|
33
|
+
} else {
|
|
34
|
+
try {
|
|
35
|
+
resolve(JSON.parse(body));
|
|
36
|
+
} catch(e) {
|
|
37
|
+
resolve(body);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
req.on('error', reject);
|
|
44
|
+
req.write(data);
|
|
45
|
+
req.end();
|
|
14
46
|
});
|
|
15
|
-
if (!res.ok) {
|
|
16
|
-
const t = await res.text();
|
|
17
|
-
throw new Error(`HF error ${res.status}: ${t}`);
|
|
18
|
-
}
|
|
19
|
-
return await res.json();
|
|
20
47
|
}
|
|
21
48
|
|
|
22
49
|
async embedDocuments(texts) {
|
|
@@ -30,19 +30,43 @@ class MilvusVectorStore extends VectorStore {
|
|
|
30
30
|
}
|
|
31
31
|
async hybridSearch(text, vector, limit = 5, filter = null) { return this.similaritySearch(vector, limit, filter); }
|
|
32
32
|
|
|
33
|
-
async listDocuments({ filter = null, limit = 100,
|
|
33
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
34
34
|
if (typeof this.client.query !== 'function') throw new Error('listDocuments is not supported for this Milvus client');
|
|
35
|
-
const lim = Math.max(1,
|
|
36
|
-
const off =
|
|
35
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
36
|
+
const off = cursor ? Number(cursor) : 0;
|
|
37
37
|
const res = await this.client.query({
|
|
38
38
|
collection_name: this.collection,
|
|
39
39
|
expr: filter || '',
|
|
40
|
-
output_fields: ['content', 'metadata'],
|
|
40
|
+
output_fields: ['id', 'content', 'metadata'],
|
|
41
41
|
limit: lim,
|
|
42
42
|
offset: off,
|
|
43
43
|
});
|
|
44
44
|
const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
|
|
45
|
-
|
|
45
|
+
const docs = rows.map((r) => ({ id: r.id, content: r.content || '', metadata: r.metadata ? JSON.parse(r.metadata) : {} }));
|
|
46
|
+
const nextCursor = docs.length === lim ? String(off + docs.length) : null;
|
|
47
|
+
return { documents: docs, nextCursor };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
async fileExists(sha256, size, lastModified) {
|
|
51
|
+
if (typeof this.client.query !== 'function') return false;
|
|
52
|
+
try {
|
|
53
|
+
const expr = '';
|
|
54
|
+
const res = await this.client.query({
|
|
55
|
+
collection_name: this.collection,
|
|
56
|
+
expr,
|
|
57
|
+
output_fields: ['content', 'metadata'],
|
|
58
|
+
limit: 1
|
|
59
|
+
});
|
|
60
|
+
const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
|
|
61
|
+
return rows.some((r) => {
|
|
62
|
+
try {
|
|
63
|
+
const m = r.metadata ? JSON.parse(r.metadata) : {};
|
|
64
|
+
return m.fileSHA256 === sha256 && m.fileSize === size && m.lastModified === lastModified;
|
|
65
|
+
} catch (_) { return false; }
|
|
66
|
+
});
|
|
67
|
+
} catch (_) {
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
46
70
|
}
|
|
47
71
|
|
|
48
72
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
@@ -24,9 +24,11 @@ class PostgresVectorStore extends VectorStore {
|
|
|
24
24
|
const tableName = config.tableName || 'document';
|
|
25
25
|
const columnMap = config.columnMap || {};
|
|
26
26
|
this._table = quoteTableName(tableName, 'tableName');
|
|
27
|
+
this._tableBase = tableName.split('.').pop();
|
|
27
28
|
this._cContent = quoteIdentifier(columnMap.content || 'content', 'columnMap.content');
|
|
28
29
|
this._cMeta = quoteIdentifier(columnMap.metadata || 'metadata', 'columnMap.metadata');
|
|
29
30
|
this._cVec = quoteIdentifier(columnMap.vector || 'vector', 'columnMap.vector');
|
|
31
|
+
this._cCreatedAt = '"createdAt"';
|
|
30
32
|
|
|
31
33
|
// We expect config.clientInstance to be a pg.Pool or pg.Client
|
|
32
34
|
if (!this.config.clientInstance) {
|
|
@@ -35,6 +37,14 @@ class PostgresVectorStore extends VectorStore {
|
|
|
35
37
|
this.client = this.config.clientInstance;
|
|
36
38
|
}
|
|
37
39
|
|
|
40
|
+
async _withConn(fn) {
|
|
41
|
+
if (typeof this.client.connect === 'function') {
|
|
42
|
+
const client = await this.client.connect();
|
|
43
|
+
try { return await fn(client); } finally { client.release(); }
|
|
44
|
+
}
|
|
45
|
+
return fn(this.client);
|
|
46
|
+
}
|
|
47
|
+
|
|
38
48
|
normalizeVector(v) {
|
|
39
49
|
const m = Math.sqrt(v.reduce((s, x) => s + x * x, 0));
|
|
40
50
|
return m === 0 ? v : v.map(x => x / m);
|
|
@@ -42,56 +52,57 @@ class PostgresVectorStore extends VectorStore {
|
|
|
42
52
|
|
|
43
53
|
// Helper to ensure table and extension exist
|
|
44
54
|
async ensureIndexes() {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
${this._cVec} vector(${dim}),
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
55
|
+
await this._withConn(async (client) => {
|
|
56
|
+
await client.query('CREATE EXTENSION IF NOT EXISTS vector');
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
const typeCheck = await client.query(
|
|
60
|
+
`SELECT data_type, udt_name FROM information_schema.columns WHERE table_name = $1 AND column_name = $2`,
|
|
61
|
+
[this._tableBase, this._cVec.replace(/"/g, '')]
|
|
62
|
+
);
|
|
63
|
+
const row = typeCheck.rows[0];
|
|
64
|
+
if (row && row.data_type && row.data_type.toLowerCase().includes('array') && row.udt_name !== 'vector') {
|
|
65
|
+
throw new Error('Postgres schema mismatch: vector column is array. Use vector(<dimensions>).');
|
|
66
|
+
}
|
|
67
|
+
} catch (e) {
|
|
68
|
+
if (String(e.message || e).includes('schema mismatch')) throw e;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const dim = 1536;
|
|
72
|
+
await client.query(`CREATE TABLE IF NOT EXISTS ${this._table} ("id" TEXT PRIMARY KEY, ${this._cContent} TEXT, ${this._cMeta} JSONB, ${this._cVec} vector(${dim}), "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW())`);
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
const res = await client.query(`SELECT column_name FROM information_schema.columns WHERE table_name = $1`, [this._tableBase]);
|
|
76
|
+
const cols = new Set(res.rows.map(r => r.column_name));
|
|
77
|
+
if (!cols.has(this._cContent.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
78
|
+
if (!cols.has(this._cMeta.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
79
|
+
if (!cols.has(this._cVec.replace(/"/g, ''))) await client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
80
|
+
if (!cols.has('createdAt')) await client.query(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
|
|
81
|
+
} catch (_) {}
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
await client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
|
|
85
|
+
} catch (e) {
|
|
86
|
+
try { await client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops)`); } catch (_) {}
|
|
87
|
+
}
|
|
88
|
+
});
|
|
77
89
|
}
|
|
78
90
|
|
|
79
91
|
async addDocuments(docs) {
|
|
80
|
-
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW())`;
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
92
|
+
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT ("id") DO NOTHING`;
|
|
93
|
+
await this._withConn(async (client) => {
|
|
94
|
+
for (const doc of docs) {
|
|
95
|
+
const id = doc.id || uuidv4();
|
|
96
|
+
const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`;
|
|
97
|
+
try {
|
|
98
|
+
await client.query(q, [id, doc.content, doc.metadata, vec]);
|
|
99
|
+
} catch (e) {
|
|
100
|
+
const msg = e?.message || String(e);
|
|
101
|
+
if (msg.includes('vector') && msg.includes('dimension')) throw new Error('DimensionMismatchError');
|
|
102
|
+
throw e;
|
|
91
103
|
}
|
|
92
|
-
throw e;
|
|
93
104
|
}
|
|
94
|
-
}
|
|
105
|
+
});
|
|
95
106
|
}
|
|
96
107
|
|
|
97
108
|
async upsertDocuments(docs) {
|
|
@@ -105,11 +116,13 @@ class PostgresVectorStore extends VectorStore {
|
|
|
105
116
|
${this._cVec} = EXCLUDED.${this._cVec}
|
|
106
117
|
`;
|
|
107
118
|
|
|
108
|
-
|
|
109
|
-
const
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
119
|
+
await this._withConn(async (client) => {
|
|
120
|
+
for (const doc of docs) {
|
|
121
|
+
const id = doc.id || uuidv4();
|
|
122
|
+
const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`;
|
|
123
|
+
await client.query(q, [id, doc.content, doc.metadata, vec]);
|
|
124
|
+
}
|
|
125
|
+
});
|
|
113
126
|
}
|
|
114
127
|
|
|
115
128
|
async similaritySearch(vector, limit = 5, filter = null) {
|
|
@@ -133,7 +146,7 @@ class PostgresVectorStore extends VectorStore {
|
|
|
133
146
|
`;
|
|
134
147
|
params.push(Math.max(1, Number(limit) || 5));
|
|
135
148
|
|
|
136
|
-
const res = await this.
|
|
149
|
+
const res = await this._withConn(c => c.query(q, params));
|
|
137
150
|
return res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: r.score }));
|
|
138
151
|
}
|
|
139
152
|
|
|
@@ -162,7 +175,7 @@ class PostgresVectorStore extends VectorStore {
|
|
|
162
175
|
|
|
163
176
|
let lexical = [];
|
|
164
177
|
try {
|
|
165
|
-
const res = await this.
|
|
178
|
+
const res = await this._withConn(c => c.query(q, params));
|
|
166
179
|
lexical = res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: 1.0 }));
|
|
167
180
|
} catch (e) {
|
|
168
181
|
console.warn("Keyword search failed (maybe missing indexes):", e.message);
|
|
@@ -186,6 +199,45 @@ class PostgresVectorStore extends VectorStore {
|
|
|
186
199
|
|
|
187
200
|
return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
188
201
|
}
|
|
202
|
+
|
|
203
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
204
|
+
return this._withConn(async (client) => {
|
|
205
|
+
const params = [];
|
|
206
|
+
const whereParts = [];
|
|
207
|
+
if (filter) {
|
|
208
|
+
whereParts.push(`${this._cMeta} @> $${params.length + 1}`);
|
|
209
|
+
params.push(filter);
|
|
210
|
+
}
|
|
211
|
+
if (cursor) {
|
|
212
|
+
whereParts.push(`"id" > $${params.length + 1}`);
|
|
213
|
+
params.push(cursor);
|
|
214
|
+
}
|
|
215
|
+
const where = whereParts.length ? `WHERE ${whereParts.join(' AND ')}` : '';
|
|
216
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
217
|
+
const q = `SELECT "id", ${this._cContent} as content, ${this._cMeta} as metadata FROM ${this._table} ${where} ORDER BY "id" ASC LIMIT $${params.length + 1}`;
|
|
218
|
+
params.push(lim);
|
|
219
|
+
const res = await client.query(q, params);
|
|
220
|
+
const docs = res.rows.map(r => ({ id: r.id, content: r.content, metadata: r.metadata }));
|
|
221
|
+
const nextCursor = docs.length === lim ? docs[docs.length - 1].id : null;
|
|
222
|
+
return { documents: docs, nextCursor };
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async fileExists(sha256, size, lastModified) {
|
|
227
|
+
try {
|
|
228
|
+
const q = `
|
|
229
|
+
SELECT 1
|
|
230
|
+
FROM ${this._table}
|
|
231
|
+
WHERE ${this._cMeta} @> $1
|
|
232
|
+
LIMIT 1
|
|
233
|
+
`;
|
|
234
|
+
const metaFilter = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
235
|
+
const res = await this._withConn(c => c.query(q, [metaFilter]));
|
|
236
|
+
return res.rowCount > 0;
|
|
237
|
+
} catch (_) {
|
|
238
|
+
return false;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
189
241
|
}
|
|
190
242
|
|
|
191
243
|
module.exports = { PostgresVectorStore };
|
|
@@ -35,7 +35,7 @@ class PrismaVectorStore extends VectorStore {
|
|
|
35
35
|
}
|
|
36
36
|
async addDocuments(docs) {
|
|
37
37
|
const { clientInstance } = this.config;
|
|
38
|
-
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW())`;
|
|
38
|
+
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW()) ON CONFLICT ("id") DO NOTHING`;
|
|
39
39
|
for (const doc of docs) {
|
|
40
40
|
const id = doc.id || uuidv4();
|
|
41
41
|
const vec = JSON.stringify(this.normalizeVector(doc.embedding));
|
|
@@ -108,6 +108,7 @@ class PrismaVectorStore extends VectorStore {
|
|
|
108
108
|
const idxFts = `"${base}_content_fts_gin"`;
|
|
109
109
|
try {
|
|
110
110
|
await clientInstance.$executeRawUnsafe('CREATE EXTENSION IF NOT EXISTS vector');
|
|
111
|
+
await this._ensureColumns();
|
|
111
112
|
await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxVec} ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops) WITH (lists = 100);`);
|
|
112
113
|
await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxFts} ON ${this._table} USING GIN (to_tsvector('english', ${this._cContent}));`);
|
|
113
114
|
} catch (e) {
|
|
@@ -115,6 +116,54 @@ class PrismaVectorStore extends VectorStore {
|
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
119
|
+
async _ensureColumns() {
|
|
120
|
+
const { clientInstance } = this.config;
|
|
121
|
+
const dim = 1536;
|
|
122
|
+
const createTableQuery = `
|
|
123
|
+
CREATE TABLE IF NOT EXISTS ${this._table} (
|
|
124
|
+
"id" TEXT PRIMARY KEY,
|
|
125
|
+
${this._cContent} TEXT,
|
|
126
|
+
${this._cMeta} JSONB,
|
|
127
|
+
${this._cVec} vector(${dim}),
|
|
128
|
+
"createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
|
129
|
+
)
|
|
130
|
+
`;
|
|
131
|
+
await clientInstance.$executeRawUnsafe(createTableQuery);
|
|
132
|
+
try {
|
|
133
|
+
const res = await clientInstance.$queryRawUnsafe(
|
|
134
|
+
`SELECT column_name, data_type, udt_name FROM information_schema.columns WHERE table_name = $1`,
|
|
135
|
+
this._tableBase
|
|
136
|
+
);
|
|
137
|
+
const cols = new Map(res.map(r => [r.column_name, r]));
|
|
138
|
+
const contentCol = this._cContent.replace(/"/g, '');
|
|
139
|
+
const metaCol = this._cMeta.replace(/"/g, '');
|
|
140
|
+
const vecCol = this._cVec.replace(/"/g, '');
|
|
141
|
+
const createdAtCol = 'createdAt';
|
|
142
|
+
if (!cols.has(contentCol)) {
|
|
143
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
144
|
+
}
|
|
145
|
+
if (!cols.has(metaCol)) {
|
|
146
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
147
|
+
}
|
|
148
|
+
if (!cols.has(vecCol)) {
|
|
149
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
150
|
+
} else {
|
|
151
|
+
const vinfo = cols.get(vecCol);
|
|
152
|
+
const isPgVector = vinfo && vinfo.udt_name === 'vector';
|
|
153
|
+
const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
|
|
154
|
+
if (isArray && !isPgVector) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
'Postgres schema mismatch: vector column is double precision[] (array). Use pgvector type: vector(' + dim + '). Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
if (!cols.has(createdAtCol)) {
|
|
161
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
|
|
162
|
+
}
|
|
163
|
+
} catch (_) {
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
118
167
|
async fileExists(sha256, size, lastModified) {
|
|
119
168
|
const { clientInstance } = this.config;
|
|
120
169
|
const payload = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
@@ -127,19 +176,25 @@ class PrismaVectorStore extends VectorStore {
|
|
|
127
176
|
}
|
|
128
177
|
}
|
|
129
178
|
|
|
130
|
-
async listDocuments({ filter = null, limit = 100,
|
|
179
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
131
180
|
const { clientInstance } = this.config;
|
|
132
181
|
const params = [];
|
|
133
|
-
|
|
182
|
+
const whereParts = [];
|
|
134
183
|
if (filter) {
|
|
135
|
-
|
|
184
|
+
whereParts.push(`${this._cMeta} @> $${params.length + 1}::jsonb`);
|
|
136
185
|
params.push(JSON.stringify(filter));
|
|
137
186
|
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
187
|
+
if (cursor) {
|
|
188
|
+
whereParts.push(`"id" > $${params.length + 1}`);
|
|
189
|
+
params.push(cursor);
|
|
190
|
+
}
|
|
191
|
+
const where = whereParts.length ? `WHERE ${whereParts.join(' AND ')}` : '';
|
|
192
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
193
|
+
const q = `SELECT "id" as id, ${this._cContent} as content, ${this._cMeta} as metadata FROM ${this._table} ${where} ORDER BY "id" ASC LIMIT ${lim}`;
|
|
141
194
|
const res = await clientInstance.$queryRawUnsafe(q, ...params);
|
|
142
|
-
|
|
195
|
+
const docs = res.map(r => ({ id: r.id, content: r.content, metadata: r.metadata }));
|
|
196
|
+
const nextCursor = docs.length === lim ? docs[docs.length - 1].id : null;
|
|
197
|
+
return { documents: docs, nextCursor };
|
|
143
198
|
}
|
|
144
199
|
|
|
145
200
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
@@ -30,27 +30,35 @@ class QdrantVectorStore extends VectorStore {
|
|
|
30
30
|
return res.map(r => ({ content: r.payload.content, metadata: r.payload.metadata, score: r.score }));
|
|
31
31
|
}
|
|
32
32
|
async hybridSearch(text, vector, limit = 5, filter = null) { return this.similaritySearch(vector, limit, filter); }
|
|
33
|
-
async listDocuments({ filter = null, limit = 100,
|
|
33
|
+
async listDocuments({ filter = null, limit = 100, cursor = null } = {}) {
|
|
34
34
|
if (typeof this.client.scroll !== 'function') throw new Error('listDocuments is not supported for this Qdrant client');
|
|
35
35
|
const qFilter = this.normalizeFilter(filter);
|
|
36
|
-
const lim = Math.max(1,
|
|
37
|
-
const
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
const lim = Math.max(1, Number(limit) || 100);
|
|
37
|
+
const res = await this.client.scroll(this.collection, {
|
|
38
|
+
limit: lim,
|
|
39
|
+
filter: qFilter,
|
|
40
|
+
offset: cursor || undefined,
|
|
41
|
+
with_payload: true,
|
|
42
|
+
with_vector: false
|
|
43
|
+
});
|
|
44
|
+
const points = res?.points || res?.result?.points || [];
|
|
45
|
+
const nextCursor = res?.next_page_offset || res?.result?.next_page_offset;
|
|
46
|
+
const docs = points.map(p => ({
|
|
47
|
+
id: p.id,
|
|
48
|
+
content: p.payload?.content,
|
|
49
|
+
metadata: p.payload?.metadata
|
|
50
|
+
}));
|
|
51
|
+
return { documents: docs, nextCursor };
|
|
52
|
+
}
|
|
53
|
+
async fileExists(sha256, size, lastModified) {
|
|
54
|
+
const filter = this.normalizeFilter({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
55
|
+
try {
|
|
56
|
+
const res = await this.client.scroll(this.collection, { limit: 1, filter });
|
|
43
57
|
const points = res?.points || res?.result?.points || [];
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
if (skipped < off) { skipped++; continue; }
|
|
48
|
-
out.push({ id: p.id, content: p.payload?.content, metadata: p.payload?.metadata });
|
|
49
|
-
if (out.length >= lim) break;
|
|
50
|
-
}
|
|
51
|
-
if (!nextOffset) break;
|
|
58
|
+
return points.length > 0;
|
|
59
|
+
} catch (_) {
|
|
60
|
+
return false;
|
|
52
61
|
}
|
|
53
|
-
return out;
|
|
54
62
|
}
|
|
55
63
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
56
64
|
if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Qdrant client');
|