vectra-js 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,42 +1,25 @@
1
- name: Publish to npm
1
+ name: Publish Package
2
2
 
3
3
  on:
4
4
  push:
5
5
  branches:
6
6
  - master
7
7
 
8
+ permissions:
9
+ id-token: write
10
+ contents: read
11
+
8
12
  jobs:
9
13
  publish:
10
14
  runs-on: ubuntu-latest
11
15
  environment: Build
12
-
13
- permissions:
14
- contents: read
15
-
16
16
  steps:
17
17
  - uses: actions/checkout@v4
18
18
 
19
19
  - uses: actions/setup-node@v4
20
20
  with:
21
- node-version: 20
22
- registry-url: https://registry.npmjs.org
23
-
24
- - uses: pnpm/action-setup@v3
25
- with:
26
- version: 9
27
-
28
- - name: Install dependencies
29
- run: pnpm install --frozen-lockfile
30
-
31
- - name: Configure npm auth
32
- run: |
33
- printf "//registry.npmjs.org/:_authToken=%s" "$NODE_AUTH_TOKEN" > "$NPM_CONFIG_USERCONFIG"
34
- env:
35
- NODE_AUTH_TOKEN: ${{ secrets.NPM_KEY }}
36
-
37
-
38
- - name: Verify auth (CI check)
39
- run: npm whoami
40
-
41
- - name: Publish to npm
42
- run: pnpm publish --no-git-checks --access public
21
+ node-version: '24'
22
+ registry-url: 'https://registry.npmjs.org'
23
+ - run: npm install
24
+ - run: npm run build --if-present
25
+ - run: npm publish
package/README.md CHANGED
@@ -6,7 +6,8 @@ A production-ready, provider-agnostic Node.js SDK for End-to-End RAG (Retrieval-
6
6
 
7
7
  * **Multi-Provider Support**: First-class support for **OpenAI**, **Gemini**, **Anthropic**, **OpenRouter**, and **Hugging Face**.
8
8
  * **Modular Vector Store**:
9
- * **Prisma**: Use your existing PostgreSQL database with `pgvector`.
9
+ * **Prisma**: Use your existing PostgreSQL database with `pgvector` (via Prisma).
10
+ * **Native PostgreSQL**: Direct connection to PostgreSQL using `pg` driver (no ORM required).
10
11
  * **ChromaDB**: Native support for the open-source vector database.
11
12
  * **Qdrant & Milvus**: Additional backends for portability.
12
13
  * **Extensible**: Easily add others by extending the `VectorStore` class.
package/bin/vectra.js CHANGED
File without changes
@@ -0,0 +1,96 @@
1
+ const path = require('path');
2
+ require('dotenv').config({ path: path.join(__dirname, '.env') });
3
+ const { ChromaClient } = require('chromadb');
4
+ const { VectraClient, ProviderType, ChunkingStrategy, RetrievalStrategy } = require('../vectra-js');
5
+ const { LoggingCallbackHandler } = require('../vectra-js/src/callbacks');
6
+
7
+ async function runSimulation() {
8
+ console.log('=== Starting Vectra SDK Simulation (Node.js) ===\n');
9
+
10
+ const chroma = new ChromaClient({
11
+ ssl: false,
12
+ host: "localhost",
13
+ port: 8000,
14
+ headers: {},
15
+ });
16
+
17
+ const config = {
18
+ embedding: {
19
+ provider: ProviderType.GEMINI,
20
+ apiKey: process.env.GEMINI_KEY,
21
+ modelName: 'gemini-embedding-001',
22
+ dimensions: 1536,
23
+ },
24
+ llm: {
25
+ provider: ProviderType.GEMINI,
26
+ apiKey: process.env.GEMINI_KEY,
27
+ modelName: 'gemini-2.5-flash-lite',
28
+ },
29
+ chunking: {
30
+ strategy: ChunkingStrategy.RECURSIVE,
31
+ chunkSize: 500,
32
+ chunkOverlap: 200,
33
+ },
34
+ database: {
35
+ type: 'chroma',
36
+ tableName: 'rag_collection',
37
+ clientInstance: chroma,
38
+ columnMap: { content: 'content', vector: 'embedding', metadata: 'metadata' },
39
+ },
40
+ retrieval: {
41
+ strategy: RetrievalStrategy.HYBRID,
42
+ },
43
+ reranking: {
44
+ enabled: true,
45
+ topN: 5,
46
+ windowSize: 20,
47
+ llmConfig: {
48
+ provider: ProviderType.GEMINI,
49
+ apiKey: process.env.GEMINI_KEY,
50
+ modelName: 'gemini-2.5-flash-lite',
51
+ }
52
+ },
53
+ observability: {
54
+ enabled: true,
55
+ projectId: "node-test-project",
56
+ sqlitePath: path.resolve(__dirname, "db/node-observability.db")
57
+ },
58
+ callbacks: [
59
+ new LoggingCallbackHandler(),
60
+ { onEmbeddingStart: (c) => console.info(`[RAG] Embedding ${c} chunks...`) }
61
+ ],
62
+ };
63
+
64
+ console.log('Initializing Client...');
65
+ const client = new VectraClient(config);
66
+ if (config.database.type === 'prisma' && client.vectorStore.ensureIndexes) {
67
+ await client.vectorStore.ensureIndexes();
68
+ }
69
+ await client.ingestDocuments('data/llm-ebook-part1-1.pdf');
70
+
71
+ console.log('\n--- Step 1: Standard Query (Hybrid) ---\n');
72
+ try {
73
+ const result = await client.queryRAG('What is LLM?');
74
+ console.log('Answer:', result.answer);
75
+ } catch (error) {
76
+ console.error('Query failed:', error);
77
+ }
78
+
79
+ console.log('\n--- Step 2: Streaming Query ---\n');
80
+ try {
81
+ const stream = await client.queryRAG('Tell me more about LLM...', null, true);
82
+ process.stdout.write('Stream Output: ');
83
+ for await (const chunk of stream) {
84
+ if (typeof chunk === 'string') {
85
+ process.stdout.write(chunk);
86
+ } else if (chunk && chunk.delta) {
87
+ process.stdout.write(chunk.delta);
88
+ }
89
+ }
90
+ process.stdout.write('\n');
91
+ } catch (error) {
92
+ console.error('Streaming failed:', error);
93
+ }
94
+ }
95
+
96
+ runSimulation();
@@ -0,0 +1,119 @@
1
+ const path = require('path');
2
+ require('dotenv').config({ path: path.join(__dirname, '.env') });
3
+ const { PrismaClient } = require('@prisma/client');
4
+ const { PrismaPg } = require('@prisma/adapter-pg');
5
+ const { Pool } = require('pg');
6
+ const { VectraClient, ProviderType, ChunkingStrategy, RetrievalStrategy } = require('../vectra-js');
7
+ const { LoggingCallbackHandler } = require('../vectra-js/src/callbacks');
8
+
9
+ async function runSimulation() {
10
+ console.log('=== Starting Vectra SDK Simulation (Node.js + Prisma) ===\n');
11
+
12
+ const connectionString = process.env.DATABASE_URL;
13
+ const pool = new Pool({ connectionString });
14
+ const adapter = new PrismaPg(pool);
15
+ const prisma = new PrismaClient({ adapter });
16
+
17
+ const config = {
18
+ embedding: {
19
+ provider: ProviderType.GEMINI,
20
+ apiKey: process.env.GEMINI_KEY,
21
+ modelName: 'gemini-embedding-001',
22
+ dimensions: 1536,
23
+ },
24
+ llm: {
25
+ provider: ProviderType.GEMINI,
26
+ apiKey: process.env.GEMINI_KEY,
27
+ modelName: 'gemini-2.5-flash-lite',
28
+ },
29
+ chunking: {
30
+ strategy: ChunkingStrategy.RECURSIVE,
31
+ chunkSize: 1000,
32
+ chunkOverlap: 200,
33
+ },
34
+ database: {
35
+ type: 'prisma',
36
+ tableName: 'Document',
37
+ clientInstance: prisma,
38
+ columnMap: { content: 'content', vector: 'embedding', metadata: 'metadata' },
39
+ },
40
+ retrieval: {
41
+ strategy: RetrievalStrategy.HYBRID,
42
+ },
43
+ reranking: {
44
+ enabled: true,
45
+ topN: 5,
46
+ windowSize: 20,
47
+ llmConfig: {
48
+ provider: ProviderType.GEMINI,
49
+ apiKey: process.env.GEMINI_KEY,
50
+ modelName: 'gemini-2.5-flash-lite',
51
+ }
52
+ },
53
+ observability: {
54
+ enabled: true,
55
+ projectId: "node-test-project",
56
+ sqlitePath: path.resolve(__dirname, "db/node-observability.db")
57
+ },
58
+ callbacks: [
59
+ new LoggingCallbackHandler(),
60
+ { onEmbeddingStart: (c) => console.info(`[RAG] Embedding ${c} chunks...`) }
61
+ ],
62
+ };
63
+
64
+ console.log('Initializing Client...');
65
+ const client = new VectraClient(config);
66
+
67
+ if (config.database.type === 'prisma' && client.vectorStore.ensureIndexes) {
68
+ try {
69
+ await client.vectorStore.ensureIndexes();
70
+ console.log('Database indexes ensured.');
71
+ } catch (e) {
72
+ console.warn('Index creation warning (may already exist):', e.message);
73
+ }
74
+ }
75
+
76
+ // Clean up table for simulation using Prisma
77
+ try {
78
+ // Note: Prisma doesn't support deleteMany on tables with unsupported types (like vector) easily in all versions
79
+ // or sometimes we need to use executeRaw.
80
+ // Since Document model has unsupported field, standard deleteMany might work but let's check.
81
+ // However, it is safer to use raw query if standard model usage is limited.
82
+ await prisma.$executeRawUnsafe(`DELETE FROM "Document"`);
83
+ console.log('Cleared existing documents from table.');
84
+ } catch (e) {
85
+ console.warn('Could not clear table:', e.message);
86
+ }
87
+
88
+ await client.ingestDocuments('data/sample.txt');
89
+
90
+ console.log('\n--- Step 1: Standard Query (Hybrid) ---\n');
91
+ try {
92
+ const result = await client.queryRAG('What is RAG?');
93
+ console.log('Answer:', result.answer);
94
+ } catch (error) {
95
+ console.error('Query failed:', error);
96
+ }
97
+
98
+ console.log('\n--- Step 2: Streaming Query ---\n');
99
+ try {
100
+ const stream = await client.queryRAG('Tell me more...', null, true);
101
+ process.stdout.write('Stream Output: ');
102
+ for await (const chunk of stream) {
103
+ if (typeof chunk === 'string') {
104
+ process.stdout.write(chunk);
105
+ } else if (chunk && chunk.delta) {
106
+ process.stdout.write(chunk.delta);
107
+ }
108
+ }
109
+ process.stdout.write('\n');
110
+ } catch (error) {
111
+ console.error('Streaming failed:', error);
112
+ }
113
+
114
+ // Clean up
115
+ await prisma.$disconnect();
116
+ await pool.end();
117
+ }
118
+
119
+ runSimulation();
@@ -0,0 +1,115 @@
1
+ const path = require('path');
2
+ require('dotenv').config({ path: path.join(__dirname, '.env') });
3
+ const { PrismaClient } = require('@prisma/client');
4
+ const { PrismaPg } = require('@prisma/adapter-pg');
5
+ const { Pool } = require('pg');
6
+ const { VectraClient, ProviderType, ChunkingStrategy, RetrievalStrategy } = require('../vectra-js');
7
+ const { LoggingCallbackHandler } = require('../vectra-js/src/callbacks');
8
+
9
+ async function runSimulation() {
10
+ console.log('=== Starting Vectra SDK Simulation (Node.js + Prisma) ===\n');
11
+
12
+ const connectionString = process.env.DATABASE_URL;
13
+ const pool = new Pool({ connectionString });
14
+ // const adapter = new PrismaPg({ pool });
15
+ // const prisma = new PrismaClient({ adapter });
16
+
17
+ const config = {
18
+ embedding: {
19
+ provider: ProviderType.GEMINI,
20
+ apiKey: process.env.GEMINI_KEY,
21
+ modelName: 'gemini-embedding-001',
22
+ dimensions: 1536,
23
+ },
24
+ llm: {
25
+ provider: ProviderType.GEMINI,
26
+ apiKey: process.env.GEMINI_KEY,
27
+ modelName: 'gemini-2.5-flash-lite',
28
+ },
29
+ chunking: {
30
+ strategy: ChunkingStrategy.RECURSIVE,
31
+ chunkSize: 1000,
32
+ chunkOverlap: 200,
33
+ },
34
+ database: {
35
+ type: 'postgres',
36
+ tableName: 'Document',
37
+ clientInstance: pool,
38
+ columnMap: { content: 'content', vector: 'embedding', metadata: 'metadata' },
39
+ },
40
+ retrieval: {
41
+ strategy: RetrievalStrategy.HYBRID,
42
+ },
43
+ reranking: {
44
+ enabled: true,
45
+ topN: 5,
46
+ windowSize: 20,
47
+ llmConfig: {
48
+ provider: ProviderType.GEMINI,
49
+ apiKey: process.env.GEMINI_KEY,
50
+ modelName: 'gemini-2.5-flash-lite',
51
+ }
52
+ },
53
+ observability: {
54
+ enabled: true,
55
+ projectId: "node-test-project",
56
+ sqlitePath: path.resolve(__dirname, "db/node-observability.db")
57
+ },
58
+ callbacks: [
59
+ new LoggingCallbackHandler(),
60
+ { onEmbeddingStart: (c) => console.info(`[RAG] Embedding ${c} chunks...`) }
61
+ ],
62
+ };
63
+
64
+ console.log('Initializing Client...');
65
+ const client = new VectraClient(config);
66
+
67
+ if ((config.database.type === 'prisma' || config.database.type === 'postgres') && client.vectorStore.ensureIndexes) {
68
+ try {
69
+ await client.vectorStore.ensureIndexes();
70
+ console.log('Database indexes ensured.');
71
+ } catch (e) {
72
+ console.warn('Index creation warning (may already exist):', e.message);
73
+ }
74
+ }
75
+
76
+ // Clean up table for simulation
77
+ if (config.database.type === 'postgres') {
78
+ try {
79
+ await pool.query(`DELETE FROM "${config.database.tableName}"`); // Use quoted identifier
80
+ console.log('Cleared existing documents from table.');
81
+ } catch (e) {
82
+ console.warn('Could not clear table:', e.message);
83
+ }
84
+ }
85
+
86
+ await client.ingestDocuments('data/sample.txt');
87
+
88
+ console.log('\n--- Step 1: Standard Query (Hybrid) ---\n');
89
+ try {
90
+ const result = await client.queryRAG('What is RAG?');
91
+ console.log('Answer:', result.answer);
92
+ } catch (error) {
93
+ console.error('Query failed:', error);
94
+ }
95
+
96
+ console.log('\n--- Step 2: Streaming Query ---\n');
97
+ try {
98
+ const stream = await client.queryRAG('Tell me more...', null, true);
99
+ process.stdout.write('Stream Output: ');
100
+ for await (const chunk of stream) {
101
+ if (typeof chunk === 'string') {
102
+ process.stdout.write(chunk);
103
+ } else if (chunk && chunk.delta) {
104
+ process.stdout.write(chunk.delta);
105
+ }
106
+ }
107
+ process.stdout.write('\n');
108
+ } catch (error) {
109
+ console.error('Streaming failed:', error);
110
+ }
111
+
112
+ await pool.end();
113
+ }
114
+
115
+ runSimulation();
package/package.json CHANGED
@@ -1,8 +1,14 @@
1
1
  {
2
2
  "name": "vectra-js",
3
- "version": "0.9.5",
3
+ "version": "0.9.7",
4
4
  "description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
5
5
  "main": "index.js",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1",
8
+ "prisma:generate": "prisma generate",
9
+ "lint": "eslint . --ext .js,.cjs,.mjs",
10
+ "lint:fix": "eslint . --ext .js,.cjs,.mjs --fix"
11
+ },
6
12
  "bin": {
7
13
  "vectra": "bin/vectra.js"
8
14
  },
@@ -34,6 +40,7 @@
34
40
  "mammoth": "^1.11.0",
35
41
  "openai": "^6.15.0",
36
42
  "pdf-parse": "^2.4.5",
43
+ "pg": "^8.16.3",
37
44
  "sqlite3": "^5.1.7",
38
45
  "uuid": "^9.0.1",
39
46
  "xlsx": "^0.18.5",
@@ -47,10 +54,9 @@
47
54
  "globals": "^16.5.0",
48
55
  "prisma": "^7.2.0"
49
56
  },
50
- "scripts": {
51
- "test": "echo \"Error: no test specified\" && exit 1",
52
- "prisma:generate": "prisma generate",
53
- "lint": "eslint . --ext .js,.cjs,.mjs",
54
- "lint:fix": "eslint . --ext .js,.cjs,.mjs --fix"
57
+ "pnpm": {
58
+ "onlyBuiltDependencies": [
59
+ "sqlite3"
60
+ ]
55
61
  }
56
- }
62
+ }
@@ -19,14 +19,21 @@ class GeminiBackend {
19
19
  }
20
20
 
21
21
  async embedDocuments(texts) {
22
- const res = await this._retry(() => this.client.models.embedContent({
23
- model: this.config.modelName,
24
- contents: texts,
25
- config: { outputDimensionality: this.config.dimensions }
26
- }));
27
- const out = res?.embeddings || res?.data?.embeddings;
28
- if (!out || !Array.isArray(out)) throw new Error('Gemini embedding response missing embeddings');
29
- return out.map(e => e.values || e.embedding?.values || e);
22
+ const BATCH_SIZE = 100;
23
+ const allEmbeddings = [];
24
+
25
+ for (let i = 0; i < texts.length; i += BATCH_SIZE) {
26
+ const batch = texts.slice(i, i + BATCH_SIZE);
27
+ const res = await this._retry(() => this.client.models.embedContent({
28
+ model: this.config.modelName,
29
+ contents: batch,
30
+ config: { outputDimensionality: this.config.dimensions }
31
+ }));
32
+ const out = res?.embeddings || res?.data?.embeddings;
33
+ if (!out || !Array.isArray(out)) throw new Error('Gemini embedding response missing embeddings');
34
+ allEmbeddings.push(...out.map(e => e.values || e.embedding?.values || e));
35
+ }
36
+ return allEmbeddings;
30
37
  }
31
38
  async embedQuery(text) {
32
39
  const res = await this._retry(() => this.client.models.embedContent({
@@ -0,0 +1,191 @@
1
+ const { v4: uuidv4 } = require('uuid');
2
+ const { VectorStore } = require('../interfaces');
3
+
4
+ const isSafeIdentifier = (value) => typeof value === 'string' && /^[A-Za-z_][A-Za-z0-9_]*$/.test(value);
5
+ const assertSafeIdentifier = (value, label) => {
6
+ if (!isSafeIdentifier(value)) throw new Error(`Unsafe SQL identifier for ${label}`);
7
+ };
8
+ const quoteIdentifier = (value, label) => {
9
+ assertSafeIdentifier(value, label);
10
+ return `"${value}"`;
11
+ };
12
+ const quoteTableName = (value, label) => {
13
+ if (typeof value !== 'string' || value.trim().length === 0) throw new Error(`Unsafe SQL identifier for ${label}`);
14
+ const parts = value.split('.').map(p => p.trim()).filter(Boolean);
15
+ if (parts.length === 0 || parts.length > 2) throw new Error(`Unsafe SQL identifier for ${label}`);
16
+ parts.forEach((p, i) => assertSafeIdentifier(p, i === 0 && parts.length === 2 ? `${label} schema` : `${label} table`));
17
+ return parts.map(p => `"${p}"`).join('.');
18
+ };
19
+
20
+ class PostgresVectorStore extends VectorStore {
21
+ constructor(config) {
22
+ super();
23
+ this.config = config;
24
+ const tableName = config.tableName || 'document';
25
+ const columnMap = config.columnMap || {};
26
+ this._table = quoteTableName(tableName, 'tableName');
27
+ this._cContent = quoteIdentifier(columnMap.content || 'content', 'columnMap.content');
28
+ this._cMeta = quoteIdentifier(columnMap.metadata || 'metadata', 'columnMap.metadata');
29
+ this._cVec = quoteIdentifier(columnMap.vector || 'vector', 'columnMap.vector');
30
+
31
+ // We expect config.clientInstance to be a pg.Pool or pg.Client
32
+ if (!this.config.clientInstance) {
33
+ throw new Error('PostgresVectorStore requires a clientInstance (pg.Pool or pg.Client)');
34
+ }
35
+ this.client = this.config.clientInstance;
36
+ }
37
+
38
+ normalizeVector(v) {
39
+ const m = Math.sqrt(v.reduce((s, x) => s + x * x, 0));
40
+ return m === 0 ? v : v.map(x => x / m);
41
+ }
42
+
43
+ // Helper to ensure table and extension exist
44
+ async ensureIndexes() {
45
+ // Enable pgvector extension
46
+ await this.client.query('CREATE EXTENSION IF NOT EXISTS vector');
47
+
48
+ // Create table if not exists
49
+ // Note: We need to know vector dimensions. We'll try to guess or use default 1536
50
+ // If embedding dimensions are provided in config, use them
51
+ // But store config usually doesn't have embedding config directly unless passed down
52
+ // For now we will assume the user creates the table or we default to 1536 (OpenAI)
53
+ // A better approach is to rely on user schema, but for convenience:
54
+ const dim = 1536; // Default to OpenAI dimension if unknown.
55
+ // However, if the table exists, we don't change it.
56
+
57
+ const createTableQuery = `
58
+ CREATE TABLE IF NOT EXISTS ${this._table} (
59
+ "id" TEXT PRIMARY KEY,
60
+ ${this._cContent} TEXT,
61
+ ${this._cMeta} JSONB,
62
+ ${this._cVec} vector(${dim}),
63
+ "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
64
+ )
65
+ `;
66
+ await this.client.query(createTableQuery);
67
+
68
+ // Create HNSW index for faster search
69
+ // checking if index exists is hard in raw sql cross-version,
70
+ // simpler to CREATE INDEX IF NOT EXISTS which pg supports in recent versions
71
+ // or catch error
72
+ try {
73
+ await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
74
+ } catch (e) {
75
+ console.warn('Could not create vector index (might be fine if not supported):', e.message);
76
+ }
77
+ }
78
+
79
+ async addDocuments(docs) {
80
+ const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW())`;
81
+
82
+ for (const doc of docs) {
83
+ const id = doc.id || uuidv4();
84
+ const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`; // pgvector format
85
+ try {
86
+ await this.client.query(q, [id, doc.content, doc.metadata, vec]);
87
+ } catch (e) {
88
+ const msg = e?.message || String(e);
89
+ if (msg.includes('vector') && msg.includes('dimension')) {
90
+ throw new Error('DimensionMismatchError: Embedding dimension does not match pgvector column.');
91
+ }
92
+ throw e;
93
+ }
94
+ }
95
+ }
96
+
97
+ async upsertDocuments(docs) {
98
+ const q = `
99
+ INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt")
100
+ VALUES ($1, $2, $3, $4, NOW())
101
+ ON CONFLICT ("id")
102
+ DO UPDATE SET
103
+ ${this._cContent} = EXCLUDED.${this._cContent},
104
+ ${this._cMeta} = EXCLUDED.${this._cMeta},
105
+ ${this._cVec} = EXCLUDED.${this._cVec}
106
+ `;
107
+
108
+ for (const doc of docs) {
109
+ const id = doc.id || uuidv4();
110
+ const vec = `[${this.normalizeVector(doc.embedding).join(',')}]`;
111
+ await this.client.query(q, [id, doc.content, doc.metadata, vec]);
112
+ }
113
+ }
114
+
115
+ async similaritySearch(vector, limit = 5, filter = null) {
116
+ const vec = `[${this.normalizeVector(vector).join(',')}]`;
117
+ let where = "";
118
+ const params = [vec];
119
+
120
+ if (filter) {
121
+ where = `WHERE ${this._cMeta} @> $2`;
122
+ params.push(filter);
123
+ }
124
+
125
+ const limitIdx = params.length + 1;
126
+ // <=> is cosine distance. 1 - distance = similarity (roughly)
127
+ const q = `
128
+ SELECT ${this._cContent} as content, ${this._cMeta} as metadata, 1 - (${this._cVec} <=> $1) as score
129
+ FROM ${this._table}
130
+ ${where}
131
+ ORDER BY ${this._cVec} <=> $1 ASC
132
+ LIMIT $${limitIdx}
133
+ `;
134
+ params.push(Math.max(1, Number(limit) || 5));
135
+
136
+ const res = await this.client.query(q, params);
137
+ return res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: r.score }));
138
+ }
139
+
140
+ async hybridSearch(text, vector, limit = 5, filter = null) {
141
+ // 1. Semantic search
142
+ const semantic = await this.similaritySearch(vector, limit * 2, filter);
143
+
144
+ // 2. Keyword search using to_tsvector
145
+ // We assume english config 'simple' or 'english'
146
+ const params = [text];
147
+ let where = "";
148
+ if (filter) {
149
+ where = `AND ${this._cMeta} @> $2`;
150
+ params.push(filter);
151
+ }
152
+ const limitIdx = params.length + 1;
153
+
154
+ const q = `
155
+ SELECT ${this._cContent} as content, ${this._cMeta} as metadata
156
+ FROM ${this._table}
157
+ WHERE to_tsvector('english', ${this._cContent}) @@ plainto_tsquery('english', $1)
158
+ ${where}
159
+ LIMIT $${limitIdx}
160
+ `;
161
+ params.push(Math.max(1, Number(limit) || 5) * 2);
162
+
163
+ let lexical = [];
164
+ try {
165
+ const res = await this.client.query(q, params);
166
+ lexical = res.rows.map(r => ({ content: r.content, metadata: r.metadata, score: 1.0 }));
167
+ } catch (e) {
168
+ console.warn("Keyword search failed (maybe missing indexes):", e.message);
169
+ lexical = [];
170
+ }
171
+
172
+ // 3. Reciprocal Rank Fusion
173
+ const combined = {};
174
+ const add = (list, weight = 1) => {
175
+ list.forEach((doc, idx) => {
176
+ const key = doc.content; // Use content as key if id not returned, ideally use id
177
+ // But doc structure returned by similaritySearch might not have id unless we select it
178
+ // existing implementations use content as key often in simple RRF
179
+ const score = 1 / (60 + idx + 1) * weight;
180
+ if (!combined[key]) combined[key] = { ...doc, score: 0 };
181
+ combined[key].score += score;
182
+ });
183
+ };
184
+ add(semantic, 1);
185
+ add(lexical, 1);
186
+
187
+ return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
188
+ }
189
+ }
190
+
191
+ module.exports = { PostgresVectorStore };
package/src/core.js CHANGED
@@ -10,6 +10,7 @@ const { OpenRouterBackend } = require('./backends/openrouter');
10
10
  const { HuggingFaceBackend } = require('./backends/huggingface');
11
11
  const { PrismaVectorStore } = require('./backends/prisma_store');
12
12
  const { ChromaVectorStore } = require('./backends/chroma_store');
13
+ const { PostgresVectorStore } = require('./backends/postgres_store');
13
14
  const { QdrantVectorStore } = require('./backends/qdrant_store');
14
15
  const { MilvusVectorStore } = require('./backends/milvus_store');
15
16
  const { LLMReranker } = require('./reranker');
@@ -98,6 +99,7 @@ class VectraClient {
98
99
  if (!dbConfig || !dbConfig.type) throw new Error('Database config missing type');
99
100
  const t = dbConfig.type.toLowerCase();
100
101
  if (t === 'prisma') return new PrismaVectorStore(dbConfig);
102
+ if (t === 'postgres') return new PostgresVectorStore(dbConfig);
101
103
  if (t === 'chroma') return new ChromaVectorStore(dbConfig);
102
104
  if (t === 'qdrant') return new QdrantVectorStore(dbConfig);
103
105
  if (t === 'milvus') return new MilvusVectorStore(dbConfig);
package/src/processor.js CHANGED
@@ -16,8 +16,38 @@ class DocumentProcessor {
16
16
  const ext = path.extname(filePath).toLowerCase();
17
17
  const buffer = await fs.promises.readFile(filePath);
18
18
  if (ext === '.pdf') {
19
+ let PDFParse = pdf.PDFParse;
20
+ if (!PDFParse && pdf.default && pdf.default.PDFParse) {
21
+ PDFParse = pdf.default.PDFParse;
22
+ }
23
+
24
+ if (PDFParse) {
25
+ // Handle pdf-parse v2
26
+ const parser = new PDFParse({ data: buffer });
27
+ const info = await parser.getInfo();
28
+ const total = info.total;
29
+ const pages = [];
30
+ let fullText = '';
31
+
32
+ for (let i = 1; i <= total; i++) {
33
+ const pageRes = await parser.getText({ partial: [i] });
34
+ const pageText = pageRes.text || '';
35
+ pages.push(pageText);
36
+ fullText += pageText + '\n';
37
+ }
38
+ await parser.destroy();
39
+ this._lastPages = pages;
40
+ return fullText;
41
+ }
42
+
43
+ // Fallback for v1 (or if PDFParse class not found)
44
+ let pdfFunc = pdf;
45
+ if (typeof pdfFunc !== 'function' && pdfFunc.default) {
46
+ pdfFunc = pdfFunc.default;
47
+ }
48
+
19
49
  const pages = [];
20
- const res = await pdf(buffer, {
50
+ const res = await pdfFunc(buffer, {
21
51
  pagerender: pageData => pageData.getTextContent().then(tc => {
22
52
  const s = tc.items.map(it => it.str).join(' ');
23
53
  pages.push(s);