vectra-js 0.9.11 → 0.9.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vectra-js",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.12",
|
|
4
4
|
"description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
"author": "Abhishek N",
|
|
35
35
|
"license": "GPL-3.0",
|
|
36
36
|
"dependencies": {
|
|
37
|
-
"@anthropic-ai/sdk": "^0.
|
|
37
|
+
"@anthropic-ai/sdk": "^0.71.2",
|
|
38
38
|
"@google/genai": "^1.34.0",
|
|
39
39
|
"dotenv": "^16.6.1",
|
|
40
40
|
"mammoth": "^1.11.0",
|
|
@@ -45,6 +45,28 @@ class MilvusVectorStore extends VectorStore {
|
|
|
45
45
|
return rows.map((r) => ({ id: r.id, content: r.content || '', metadata: r.metadata ? JSON.parse(r.metadata) : {} }));
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
async fileExists(sha256, size, lastModified) {
|
|
49
|
+
if (typeof this.client.query !== 'function') return false;
|
|
50
|
+
try {
|
|
51
|
+
const expr = '';
|
|
52
|
+
const res = await this.client.query({
|
|
53
|
+
collection_name: this.collection,
|
|
54
|
+
expr,
|
|
55
|
+
output_fields: ['content', 'metadata'],
|
|
56
|
+
limit: 1
|
|
57
|
+
});
|
|
58
|
+
const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
|
|
59
|
+
return rows.some((r) => {
|
|
60
|
+
try {
|
|
61
|
+
const m = r.metadata ? JSON.parse(r.metadata) : {};
|
|
62
|
+
return m.fileSHA256 === sha256 && m.fileSize === size && m.lastModified === lastModified;
|
|
63
|
+
} catch (_) { return false; }
|
|
64
|
+
});
|
|
65
|
+
} catch (_) {
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
48
70
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
49
71
|
if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Milvus client');
|
|
50
72
|
if (Array.isArray(ids) && ids.length > 0) {
|
|
@@ -24,9 +24,11 @@ class PostgresVectorStore extends VectorStore {
|
|
|
24
24
|
const tableName = config.tableName || 'document';
|
|
25
25
|
const columnMap = config.columnMap || {};
|
|
26
26
|
this._table = quoteTableName(tableName, 'tableName');
|
|
27
|
+
this._tableBase = tableName.split('.').pop();
|
|
27
28
|
this._cContent = quoteIdentifier(columnMap.content || 'content', 'columnMap.content');
|
|
28
29
|
this._cMeta = quoteIdentifier(columnMap.metadata || 'metadata', 'columnMap.metadata');
|
|
29
30
|
this._cVec = quoteIdentifier(columnMap.vector || 'vector', 'columnMap.vector');
|
|
31
|
+
this._cCreatedAt = '"createdAt"';
|
|
30
32
|
|
|
31
33
|
// We expect config.clientInstance to be a pg.Pool or pg.Client
|
|
32
34
|
if (!this.config.clientInstance) {
|
|
@@ -45,7 +47,34 @@ class PostgresVectorStore extends VectorStore {
|
|
|
45
47
|
// Enable pgvector extension
|
|
46
48
|
await this.client.query('CREATE EXTENSION IF NOT EXISTS vector');
|
|
47
49
|
|
|
48
|
-
//
|
|
50
|
+
// Detect existing column type to avoid malformed array issues
|
|
51
|
+
try {
|
|
52
|
+
const typeCheck = await this.client.query(
|
|
53
|
+
`SELECT data_type, udt_name
|
|
54
|
+
FROM information_schema.columns
|
|
55
|
+
WHERE table_name = $1 AND column_name = $2`,
|
|
56
|
+
[this._tableBase, this._cVec.replace(/"/g, '')]
|
|
57
|
+
);
|
|
58
|
+
const row = typeCheck.rows[0];
|
|
59
|
+
if (row) {
|
|
60
|
+
const isPgVector = row.udt_name === 'vector';
|
|
61
|
+
const isArray = row.data_type && row.data_type.toLowerCase().includes('array');
|
|
62
|
+
if (isArray && !isPgVector) {
|
|
63
|
+
throw new Error(
|
|
64
|
+
'Postgres schema mismatch: vector column is double precision[] (array). ' +
|
|
65
|
+
'Use pgvector type: vector(<dimensions>). ' +
|
|
66
|
+
'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(1536);'
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
} catch (e) {
|
|
71
|
+
// Only throw if we explicitly detected array type; otherwise continue
|
|
72
|
+
if (String(e.message || e).includes('schema mismatch')) {
|
|
73
|
+
throw e;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Create table if not exists (best-effort)
|
|
49
78
|
// Note: We need to know vector dimensions. We'll try to guess or use default 1536
|
|
50
79
|
// If embedding dimensions are provided in config, use them
|
|
51
80
|
// But store config usually doesn't have embedding config directly unless passed down
|
|
@@ -65,6 +94,47 @@ class PostgresVectorStore extends VectorStore {
|
|
|
65
94
|
`;
|
|
66
95
|
await this.client.query(createTableQuery);
|
|
67
96
|
|
|
97
|
+
// Ensure required columns exist (non-destructive)
|
|
98
|
+
try {
|
|
99
|
+
const res = await this.client.query(
|
|
100
|
+
`SELECT column_name, data_type, udt_name
|
|
101
|
+
FROM information_schema.columns
|
|
102
|
+
WHERE table_name = $1`,
|
|
103
|
+
[this._tableBase]
|
|
104
|
+
);
|
|
105
|
+
const cols = new Map(res.rows.map(r => [r.column_name, r]));
|
|
106
|
+
const contentCol = this._cContent.replace(/"/g, '');
|
|
107
|
+
const metaCol = this._cMeta.replace(/"/g, '');
|
|
108
|
+
const vecCol = this._cVec.replace(/"/g, '');
|
|
109
|
+
const createdAtCol = this._cCreatedAt.replace(/"/g, '');
|
|
110
|
+
|
|
111
|
+
if (!cols.has(contentCol)) {
|
|
112
|
+
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
113
|
+
}
|
|
114
|
+
if (!cols.has(metaCol)) {
|
|
115
|
+
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
116
|
+
}
|
|
117
|
+
if (!cols.has(vecCol)) {
|
|
118
|
+
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
119
|
+
} else {
|
|
120
|
+
const vinfo = cols.get(vecCol);
|
|
121
|
+
const isPgVector = vinfo && vinfo.udt_name === 'vector';
|
|
122
|
+
const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
|
|
123
|
+
if (isArray && !isPgVector) {
|
|
124
|
+
throw new Error(
|
|
125
|
+
'Postgres schema mismatch: vector column is double precision[] (array). ' +
|
|
126
|
+
'Use pgvector type: vector(' + dim + '). ' +
|
|
127
|
+
'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if (!cols.has(createdAtCol)) {
|
|
132
|
+
await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cCreatedAt} TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
|
|
133
|
+
}
|
|
134
|
+
} catch (_) {
|
|
135
|
+
// best-effort; ignore
|
|
136
|
+
}
|
|
137
|
+
|
|
68
138
|
// Create HNSW index for faster search
|
|
69
139
|
// checking if index exists is hard in raw sql cross-version,
|
|
70
140
|
// simpler to CREATE INDEX IF NOT EXISTS which pg supports in recent versions
|
|
@@ -72,12 +142,17 @@ class PostgresVectorStore extends VectorStore {
|
|
|
72
142
|
try {
|
|
73
143
|
await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
|
|
74
144
|
} catch (e) {
|
|
75
|
-
|
|
145
|
+
// Fallback to ivfflat when hnsw not supported
|
|
146
|
+
try {
|
|
147
|
+
await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops)`);
|
|
148
|
+
} catch (e2) {
|
|
149
|
+
console.warn('Could not create vector index (might be fine if not supported):', e.message);
|
|
150
|
+
}
|
|
76
151
|
}
|
|
77
152
|
}
|
|
78
153
|
|
|
79
154
|
async addDocuments(docs) {
|
|
80
|
-
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW())`;
|
|
155
|
+
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT ("id") DO NOTHING`;
|
|
81
156
|
|
|
82
157
|
for (const doc of docs) {
|
|
83
158
|
const id = doc.id || uuidv4();
|
|
@@ -186,6 +261,22 @@ class PostgresVectorStore extends VectorStore {
|
|
|
186
261
|
|
|
187
262
|
return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
188
263
|
}
|
|
264
|
+
|
|
265
|
+
async fileExists(sha256, size, lastModified) {
|
|
266
|
+
try {
|
|
267
|
+
const q = `
|
|
268
|
+
SELECT 1
|
|
269
|
+
FROM ${this._table}
|
|
270
|
+
WHERE ${this._cMeta} @> $1
|
|
271
|
+
LIMIT 1
|
|
272
|
+
`;
|
|
273
|
+
const metaFilter = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
274
|
+
const res = await this.client.query(q, [metaFilter]);
|
|
275
|
+
return res.rowCount > 0;
|
|
276
|
+
} catch (_) {
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
189
280
|
}
|
|
190
281
|
|
|
191
282
|
module.exports = { PostgresVectorStore };
|
|
@@ -35,7 +35,7 @@ class PrismaVectorStore extends VectorStore {
|
|
|
35
35
|
}
|
|
36
36
|
async addDocuments(docs) {
|
|
37
37
|
const { clientInstance } = this.config;
|
|
38
|
-
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW())`;
|
|
38
|
+
const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW()) ON CONFLICT ("id") DO NOTHING`;
|
|
39
39
|
for (const doc of docs) {
|
|
40
40
|
const id = doc.id || uuidv4();
|
|
41
41
|
const vec = JSON.stringify(this.normalizeVector(doc.embedding));
|
|
@@ -108,6 +108,7 @@ class PrismaVectorStore extends VectorStore {
|
|
|
108
108
|
const idxFts = `"${base}_content_fts_gin"`;
|
|
109
109
|
try {
|
|
110
110
|
await clientInstance.$executeRawUnsafe('CREATE EXTENSION IF NOT EXISTS vector');
|
|
111
|
+
await this._ensureColumns();
|
|
111
112
|
await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxVec} ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops) WITH (lists = 100);`);
|
|
112
113
|
await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxFts} ON ${this._table} USING GIN (to_tsvector('english', ${this._cContent}));`);
|
|
113
114
|
} catch (e) {
|
|
@@ -115,6 +116,54 @@ class PrismaVectorStore extends VectorStore {
|
|
|
115
116
|
}
|
|
116
117
|
}
|
|
117
118
|
|
|
119
|
+
async _ensureColumns() {
|
|
120
|
+
const { clientInstance } = this.config;
|
|
121
|
+
const dim = 1536;
|
|
122
|
+
const createTableQuery = `
|
|
123
|
+
CREATE TABLE IF NOT EXISTS ${this._table} (
|
|
124
|
+
"id" TEXT PRIMARY KEY,
|
|
125
|
+
${this._cContent} TEXT,
|
|
126
|
+
${this._cMeta} JSONB,
|
|
127
|
+
${this._cVec} vector(${dim}),
|
|
128
|
+
"createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
|
|
129
|
+
)
|
|
130
|
+
`;
|
|
131
|
+
await clientInstance.$executeRawUnsafe(createTableQuery);
|
|
132
|
+
try {
|
|
133
|
+
const res = await clientInstance.$queryRawUnsafe(
|
|
134
|
+
`SELECT column_name, data_type, udt_name FROM information_schema.columns WHERE table_name = $1`,
|
|
135
|
+
this._tableBase
|
|
136
|
+
);
|
|
137
|
+
const cols = new Map(res.map(r => [r.column_name, r]));
|
|
138
|
+
const contentCol = this._cContent.replace(/"/g, '');
|
|
139
|
+
const metaCol = this._cMeta.replace(/"/g, '');
|
|
140
|
+
const vecCol = this._cVec.replace(/"/g, '');
|
|
141
|
+
const createdAtCol = 'createdAt';
|
|
142
|
+
if (!cols.has(contentCol)) {
|
|
143
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
|
|
144
|
+
}
|
|
145
|
+
if (!cols.has(metaCol)) {
|
|
146
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
|
|
147
|
+
}
|
|
148
|
+
if (!cols.has(vecCol)) {
|
|
149
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
|
|
150
|
+
} else {
|
|
151
|
+
const vinfo = cols.get(vecCol);
|
|
152
|
+
const isPgVector = vinfo && vinfo.udt_name === 'vector';
|
|
153
|
+
const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
|
|
154
|
+
if (isArray && !isPgVector) {
|
|
155
|
+
throw new Error(
|
|
156
|
+
'Postgres schema mismatch: vector column is double precision[] (array). Use pgvector type: vector(' + dim + '). Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
if (!cols.has(createdAtCol)) {
|
|
161
|
+
await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
|
|
162
|
+
}
|
|
163
|
+
} catch (_) {
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
118
167
|
async fileExists(sha256, size, lastModified) {
|
|
119
168
|
const { clientInstance } = this.config;
|
|
120
169
|
const payload = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
@@ -52,6 +52,16 @@ class QdrantVectorStore extends VectorStore {
|
|
|
52
52
|
}
|
|
53
53
|
return out;
|
|
54
54
|
}
|
|
55
|
+
async fileExists(sha256, size, lastModified) {
|
|
56
|
+
const filter = this.normalizeFilter({ fileSHA256: sha256, fileSize: size, lastModified });
|
|
57
|
+
try {
|
|
58
|
+
const res = await this.client.scroll(this.collection, { limit: 1, filter });
|
|
59
|
+
const points = res?.points || res?.result?.points || [];
|
|
60
|
+
return points.length > 0;
|
|
61
|
+
} catch (_) {
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
55
65
|
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
56
66
|
if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Qdrant client');
|
|
57
67
|
if (Array.isArray(ids) && ids.length > 0) {
|
package/src/telemetry.js
CHANGED
|
@@ -103,7 +103,7 @@ class TelemetryManager {
|
|
|
103
103
|
|
|
104
104
|
const batch = this.queue.splice(0, this.queue.length);
|
|
105
105
|
|
|
106
|
-
if (!global.fetch
|
|
106
|
+
if (!global.fetch) {
|
|
107
107
|
if (process.env.VECTRA_TELEMETRY_DEBUG) {
|
|
108
108
|
console.log('Telemetry batch (debug):', batch);
|
|
109
109
|
}
|