vectra-js 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +625 -0
- package/bin/vectra.js +76 -0
- package/documentation.md +288 -0
- package/index.js +11 -0
- package/package.json +53 -0
- package/src/backends/anthropic.js +37 -0
- package/src/backends/chroma_store.js +110 -0
- package/src/backends/gemini.js +68 -0
- package/src/backends/huggingface.js +52 -0
- package/src/backends/milvus_store.js +61 -0
- package/src/backends/ollama.js +63 -0
- package/src/backends/openai.js +46 -0
- package/src/backends/openrouter.js +51 -0
- package/src/backends/prisma_store.js +160 -0
- package/src/backends/qdrant_store.js +68 -0
- package/src/callbacks.js +31 -0
- package/src/config.js +123 -0
- package/src/core.js +591 -0
- package/src/evaluation/index.js +15 -0
- package/src/interfaces.js +21 -0
- package/src/memory.js +96 -0
- package/src/processor.js +155 -0
- package/src/reranker.js +26 -0
- package/src/ui/index.html +665 -0
- package/src/ui/script.js +785 -0
- package/src/ui/style.css +281 -0
- package/src/webconfig_server.js +175 -0
package/bin/vectra.js
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
const { start: startWebConfig } = require('../src/webconfig_server');
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
async function run() {
|
|
7
|
+
const args = process.argv.slice(2);
|
|
8
|
+
const cmd = args[0];
|
|
9
|
+
|
|
10
|
+
// Parse args manually to support --config=path and --config path
|
|
11
|
+
let configPath = null;
|
|
12
|
+
let stream = false;
|
|
13
|
+
let target = null;
|
|
14
|
+
|
|
15
|
+
for (let i = 1; i < args.length; i++) {
|
|
16
|
+
const arg = args[i];
|
|
17
|
+
if (arg.startsWith('--config=')) {
|
|
18
|
+
configPath = arg.split('=')[1];
|
|
19
|
+
} else if (arg === '--config') {
|
|
20
|
+
configPath = args[i + 1];
|
|
21
|
+
i++; // Skip next arg
|
|
22
|
+
} else if (arg === '--stream') {
|
|
23
|
+
stream = true;
|
|
24
|
+
} else if (!target && !arg.startsWith('--')) {
|
|
25
|
+
target = arg;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (cmd === 'webconfig') {
|
|
30
|
+
const cfgPath = configPath || path.join(process.cwd(), 'vectra-config.json');
|
|
31
|
+
startWebConfig(cfgPath);
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!cmd || (!target && cmd !== 'webconfig')) {
|
|
36
|
+
console.error('Usage: vectra <ingest|query|webconfig> <path|text> [--config=path] [--stream]');
|
|
37
|
+
process.exit(1);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Lazy load VectraClient to avoid overhead when just running help or webconfig
|
|
41
|
+
const { VectraClient } = require('..');
|
|
42
|
+
|
|
43
|
+
let cfg = null;
|
|
44
|
+
if (configPath) {
|
|
45
|
+
cfg = JSON.parse(fs.readFileSync(path.resolve(configPath), 'utf-8'));
|
|
46
|
+
} else {
|
|
47
|
+
// Fallback to test config if exists, or null
|
|
48
|
+
try {
|
|
49
|
+
cfg = require(path.resolve(process.cwd(), 'nodejs-test/index.js')).config;
|
|
50
|
+
} catch (e) {
|
|
51
|
+
cfg = null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const client = new VectraClient(cfg);
|
|
56
|
+
if (cmd === 'ingest') {
|
|
57
|
+
await client.ingestDocuments(path.resolve(process.cwd(), target));
|
|
58
|
+
console.log('Ingestion complete');
|
|
59
|
+
} else if (cmd === 'query') {
|
|
60
|
+
const res = await client.queryRAG(target, null, stream);
|
|
61
|
+
if (stream) {
|
|
62
|
+
for await (const chunk of res) {
|
|
63
|
+
const t = chunk.delta || chunk;
|
|
64
|
+
process.stdout.write(String(t));
|
|
65
|
+
}
|
|
66
|
+
process.stdout.write('\n');
|
|
67
|
+
} else {
|
|
68
|
+
console.log(JSON.stringify(res, null, 2));
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
console.error('Unknown command');
|
|
72
|
+
process.exit(1);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
run().catch(e => { console.error(e && e.message ? e.message : String(e)); process.exit(1); });
|
package/documentation.md
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# Vectra JS Documentation
|
|
2
|
+
|
|
3
|
+
## 1. Getting Started
|
|
4
|
+
- Introduction
|
|
5
|
+
- Vectra is a provider-agnostic RAG SDK for Node.js that orchestrates the pipeline: load files, chunk, embed, store, retrieve, rerank, and generate answers with streaming support.
|
|
6
|
+
- Key Features
|
|
7
|
+
- Multi-Provider (OpenAI, Gemini, Anthropic, OpenRouter, HuggingFace)
|
|
8
|
+
- HyDE and Multi-Query retrieval strategies
|
|
9
|
+
- Hybrid Search with RRF fusion (vector + keyword)
|
|
10
|
+
- Agentic Chunking using an LLM to find semantic breaks
|
|
11
|
+
- Streaming responses and metadata enrichment
|
|
12
|
+
- Architecture
|
|
13
|
+
```mermaid
|
|
14
|
+
graph LR
|
|
15
|
+
A[Files] --> B(Chunking)
|
|
16
|
+
B --> C{Embedding API}
|
|
17
|
+
C --> D[(Vector Store)]
|
|
18
|
+
E[User Query] --> F(Retrieval)
|
|
19
|
+
D --> F
|
|
20
|
+
F --> G(Reranking)
|
|
21
|
+
G --> H[LLM Generation]
|
|
22
|
+
H --> I[Stream Output]
|
|
23
|
+
```
|
|
24
|
+
- Installation
|
|
25
|
+
- Prerequisites: Node.js 18+, optional Postgres (for Prisma + pgvector)
|
|
26
|
+
- Commands:
|
|
27
|
+
- `npm install vectra-js @prisma/client`
|
|
28
|
+
- `npm install chromadb` (optional for ChromaDB backend)
|
|
29
|
+
- `pnpm add vectra-js @prisma/client` and `pnpm add chromadb` (optional)
|
|
30
|
+
- Quickstart
|
|
31
|
+
- Minimal setup with ChromaDB to avoid Postgres in first run:
|
|
32
|
+
```javascript
|
|
33
|
+
const { VectraClient, ProviderType, RetrievalStrategy } = require('vectra-js');
|
|
34
|
+
const { ChromaClient } = require('chromadb');
|
|
35
|
+
const chroma = new ChromaClient();
|
|
36
|
+
|
|
37
|
+
const config = {
|
|
38
|
+
embedding: { provider: ProviderType.OPENAI, apiKey: process.env.OPENAI_API_KEY, modelName: 'text-embedding-3-small' },
|
|
39
|
+
llm: { provider: ProviderType.GEMINI, apiKey: process.env.GOOGLE_API_KEY, modelName: 'gemini-1.5-pro-latest' },
|
|
40
|
+
database: { type: 'chroma', clientInstance: chroma, tableName: 'rag_collection' },
|
|
41
|
+
retrieval: { strategy: RetrievalStrategy.NAIVE }
|
|
42
|
+
};
|
|
43
|
+
const client = new VectraClient(config);
|
|
44
|
+
await client.ingestDocuments('./docs/hello.txt');
|
|
45
|
+
const res = await client.queryRAG('Hello');
|
|
46
|
+
console.log(res.answer);
|
|
47
|
+
```
|
|
48
|
+
- Environment Variables
|
|
49
|
+
- `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY`, `OPENROUTER_API_KEY`, `HUGGINGFACE_API_KEY`
|
|
50
|
+
- First Query
|
|
51
|
+
- `await client.queryRAG("Hello")` returns `{ answer, sources }`.
|
|
52
|
+
|
|
53
|
+
## 2. Fundamentals
|
|
54
|
+
- Configuration
|
|
55
|
+
- Centralized config object validates providers, database, and pipeline options.
|
|
56
|
+
- Copy-Paste template:
|
|
57
|
+
```javascript
|
|
58
|
+
const { ProviderType, ChunkingStrategy, RetrievalStrategy } = require('vectra-js');
|
|
59
|
+
const config = {
|
|
60
|
+
// Embedding
|
|
61
|
+
embedding: {
|
|
62
|
+
provider: ProviderType.OPENAI,
|
|
63
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
64
|
+
modelName: 'text-embedding-3-small',
|
|
65
|
+
// dimensions: 1536
|
|
66
|
+
},
|
|
67
|
+
// LLM (generation)
|
|
68
|
+
llm: {
|
|
69
|
+
provider: ProviderType.GEMINI,
|
|
70
|
+
apiKey: process.env.GOOGLE_API_KEY,
|
|
71
|
+
modelName: 'gemini-1.5-pro-latest',
|
|
72
|
+
// temperature: 0.3,
|
|
73
|
+
// maxTokens: 1024,
|
|
74
|
+
// defaultHeaders: {} // OpenRouter only
|
|
75
|
+
},
|
|
76
|
+
// Memory (toggleable, defaults off)
|
|
77
|
+
memory: {
|
|
78
|
+
enabled: false,
|
|
79
|
+
type: 'in-memory', // or 'redis' | 'postgres'
|
|
80
|
+
maxMessages: 20,
|
|
81
|
+
// Redis options
|
|
82
|
+
redis: {
|
|
83
|
+
clientInstance: /* redis client */,
|
|
84
|
+
keyPrefix: 'vectra:chat:'
|
|
85
|
+
},
|
|
86
|
+
// Postgres options
|
|
87
|
+
postgres: {
|
|
88
|
+
clientInstance: /* Prisma client */,
|
|
89
|
+
tableName: 'ChatMessage',
|
|
90
|
+
columnMap: { sessionId: 'sessionId', role: 'role', content: 'content', createdAt: 'createdAt' }
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
// Ingestion (rate limit is toggleable, defaults off)
|
|
94
|
+
ingestion: { rateLimitEnabled: false, concurrencyLimit: 5 },
|
|
95
|
+
// Database
|
|
96
|
+
database: {
|
|
97
|
+
type: 'chroma', // 'prisma' | 'qdrant' | 'milvus'
|
|
98
|
+
clientInstance: null, // your DB client
|
|
99
|
+
tableName: 'Document',
|
|
100
|
+
columnMap: { content: 'content', vector: 'embedding', metadata: 'metadata' } // Prisma only
|
|
101
|
+
},
|
|
102
|
+
// Chunking
|
|
103
|
+
chunking: {
|
|
104
|
+
strategy: ChunkingStrategy.RECURSIVE, // or ChunkingStrategy.AGENTIC
|
|
105
|
+
chunkSize: 1000,
|
|
106
|
+
chunkOverlap: 200,
|
|
107
|
+
// separators: ['\n\n', '\n', ' ', '']
|
|
108
|
+
// agenticLlm: { provider: ProviderType.OPENAI, apiKey: process.env.OPENAI_API_KEY, modelName: 'gpt-4o-mini' } // required for AGENTIC
|
|
109
|
+
},
|
|
110
|
+
// Retrieval
|
|
111
|
+
retrieval: {
|
|
112
|
+
strategy: RetrievalStrategy.HYBRID, // NAIVE | HYDE | MULTI_QUERY | HYBRID | MMR
|
|
113
|
+
// llmConfig: { provider: ProviderType.OPENAI, apiKey: process.env.OPENAI_API_KEY, modelName: 'gpt-4o-mini' }, // HYDE/MULTI_QUERY
|
|
114
|
+
// hybridAlpha: 0.5 // tuning
|
|
115
|
+
// mmrLambda: 0.5,
|
|
116
|
+
// mmrFetchK: 20
|
|
117
|
+
},
|
|
118
|
+
// Reranking
|
|
119
|
+
reranking: {
|
|
120
|
+
enabled: false,
|
|
121
|
+
// topN: 5,
|
|
122
|
+
// windowSize: 20,
|
|
123
|
+
// llmConfig: { provider: ProviderType.ANTHROPIC, apiKey: process.env.ANTHROPIC_API_KEY, modelName: 'claude-3-haiku' }
|
|
124
|
+
},
|
|
125
|
+
// Metadata
|
|
126
|
+
metadata: { enrichment: false }, // summary, keywords, hypothetical_questions
|
|
127
|
+
// Query Planning
|
|
128
|
+
queryPlanning: { tokenBudget: 2048, preferSummariesBelow: 1024, includeCitations: true },
|
|
129
|
+
// Grounding
|
|
130
|
+
grounding: { enabled: false, strict: false, maxSnippets: 4 },
|
|
131
|
+
// Generation
|
|
132
|
+
generation: { outputFormat: 'text', structuredOutput: 'none' }, // 'json' and 'citations' supported
|
|
133
|
+
// Prompts
|
|
134
|
+
prompts: { query: 'Use only the following context.\nContext:\n{{context}}\n\nQ: {{question}}' },
|
|
135
|
+
// Callbacks
|
|
136
|
+
callbacks: []
|
|
137
|
+
};
|
|
138
|
+
```
|
|
139
|
+
- Ingestion
|
|
140
|
+
- File Loading: PDF, DOCX, TXT, MD, XLSX
|
|
141
|
+
- Directory Walking: `await client.ingestDocuments('./folder')` recursively processes supported files
|
|
142
|
+
- Index Management (Postgres/Prisma): `await client.vectorStore.ensureIndexes()` after ingestion
|
|
143
|
+
- Querying
|
|
144
|
+
- Standard:
|
|
145
|
+
```javascript
|
|
146
|
+
const { answer } = await client.queryRAG("Question");
|
|
147
|
+
```
|
|
148
|
+
- Stateful Chat (Memory):
|
|
149
|
+
```javascript
|
|
150
|
+
const sessionId = "user-123";
|
|
151
|
+
const { answer } = await client.queryRAG("Does this apply to contractors?", null, false, sessionId);
|
|
152
|
+
```
|
|
153
|
+
- Streaming + Filtering:
|
|
154
|
+
```javascript
|
|
155
|
+
const stream = await client.queryRAG(
|
|
156
|
+
"Draft a welcome memo...",
|
|
157
|
+
{ docTitle: "Handbook" },
|
|
158
|
+
true
|
|
159
|
+
);
|
|
160
|
+
for await (const chunk of stream) process.stdout.write(chunk.delta || '');
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## 3. Database & Vector Stores
|
|
164
|
+
- Supported Backends
|
|
165
|
+
- Prisma (Postgres + pgvector): rich SQL, hybrid search and indexes
|
|
166
|
+
- ChromaDB: simple local collections, easy first-run
|
|
167
|
+
- Qdrant: high-performance vector search
|
|
168
|
+
- Milvus: scalable vector database
|
|
169
|
+
- Prisma (Postgres + pgvector)
|
|
170
|
+
- Prerequisite: enable `vector` extension
|
|
171
|
+
- `CREATE EXTENSION IF NOT EXISTS vector;`
|
|
172
|
+
- Schema (`schema.prisma`)
|
|
173
|
+
```prisma
|
|
174
|
+
model Document {
|
|
175
|
+
id String @id @default(uuid())
|
|
176
|
+
content String
|
|
177
|
+
metadata Json
|
|
178
|
+
vector Unsupported("vector")? // pgvector type
|
|
179
|
+
createdAt DateTime @default(now())
|
|
180
|
+
}
|
|
181
|
+
```
|
|
182
|
+
- Column Mapping: `columnMap` maps SDK fields to DB columns, e.g. `{ content: 'content', vector: 'embedding', metadata: 'metadata' }`
|
|
183
|
+
- Index Management: ivfflat for vector cosine ops and GIN for FTS
|
|
184
|
+
- `await client.vectorStore.ensureIndexes()`
|
|
185
|
+
- ChromaDB / Qdrant / Milvus
|
|
186
|
+
- Chroma: `const { ChromaClient } = require('chromadb'); const chroma = new ChromaClient();`
|
|
187
|
+
- Qdrant: `const qdrant = new QdrantClient({ url, apiKey });`
|
|
188
|
+
- Milvus: `const milvus = new MilvusClient({ address });`
|
|
189
|
+
- Pass `clientInstance` and `tableName` to `database` config.
|
|
190
|
+
|
|
191
|
+
## 4. Providers (LLM & Embeddings)
|
|
192
|
+
- Provider Setup
|
|
193
|
+
- OpenAI:
|
|
194
|
+
- Embeddings: `text-embedding-3-small`, `text-embedding-3-large`
|
|
195
|
+
- Generation: `gpt-4o`, `gpt-4o-mini`
|
|
196
|
+
- Gemini:
|
|
197
|
+
- Generation: `gemini-1.5-pro-latest`
|
|
198
|
+
- Anthropic:
|
|
199
|
+
- Generation only (`claude-3-haiku`, `claude-3-opus`) — use a different embedding provider
|
|
200
|
+
- Ollama:
|
|
201
|
+
- Local development; set `provider = ProviderType.OLLAMA`
|
|
202
|
+
- Defaults to `http://localhost:11434` (override with `baseUrl`)
|
|
203
|
+
- OpenRouter:
|
|
204
|
+
- Unified gateway; set `llm.provider = ProviderType.OPENROUTER` and `modelName` to e.g. `openai/gpt-4o`
|
|
205
|
+
- HuggingFace:
|
|
206
|
+
- Use Inference API for embeddings and generation with open-source models
|
|
207
|
+
- Customizing Models
|
|
208
|
+
- `temperature`, `maxTokens`, and embedding `dimensions` (must match pgvector column for Prisma)
|
|
209
|
+
|
|
210
|
+
## 5. Advanced Concepts
|
|
211
|
+
- Chunking Strategies
|
|
212
|
+
- Recursive: control `chunkSize`, `chunkOverlap`, and optional `separators`
|
|
213
|
+
- Agentic: configure `chunking.agenticLlm`; uses an LLM to place semantic boundaries
|
|
214
|
+
- Retrieval Strategies
|
|
215
|
+
- Naive: cosine similarity on vectors
|
|
216
|
+
- HyDE: generate a hypothetical answer and search on its embedding
|
|
217
|
+
- Hybrid Search (RRF): combine vector search and keyword FTS using reciprocal rank fusion
|
|
218
|
+
- Multi-Query: produce query variations via LLM to improve recall
|
|
219
|
+
- Reranking
|
|
220
|
+
- Enable with `reranking.enabled`; tune `topN` and `windowSize`
|
|
221
|
+
- Metadata Enrichment
|
|
222
|
+
- Set `metadata.enrichment = true` to generate summaries, keywords, and hypothetical questions during ingestion
|
|
223
|
+
- Conversation Memory
|
|
224
|
+
- Enable stateful chat by setting `memory` config and passing `sessionId` to `queryRAG`.
|
|
225
|
+
- Automatically appends history to prompts and saves interactions.
|
|
226
|
+
- Production Evaluation
|
|
227
|
+
- Use `client.evaluate(testSet)` to measure Faithfulness (answer derived from context) and Relevance (answer addresses question).
|
|
228
|
+
- Returns per-test scores (0-1) for each question.
|
|
229
|
+
```javascript
|
|
230
|
+
// Example Test Set structure
|
|
231
|
+
const testSet = [
|
|
232
|
+
{
|
|
233
|
+
question: "What is the remote work policy?",
|
|
234
|
+
expectedGroundTruth: "Employees can work remotely up to 3 days a week."
|
|
235
|
+
}
|
|
236
|
+
];
|
|
237
|
+
const report = await client.evaluate(testSet);
|
|
238
|
+
const averageFaithfulness = report.length
|
|
239
|
+
? report.reduce((s, r) => s + (r.faithfulness || 0), 0) / report.length
|
|
240
|
+
: 0;
|
|
241
|
+
const averageRelevance = report.length
|
|
242
|
+
? report.reduce((s, r) => s + (r.relevance || 0), 0) / report.length
|
|
243
|
+
: 0;
|
|
244
|
+
console.log({ averageFaithfulness, averageRelevance, report });
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## 6. Production Guide
|
|
248
|
+
- Query Planning & Grounding
|
|
249
|
+
- Token Budgets: `queryPlanning.tokenBudget`
|
|
250
|
+
- Grounding: `grounding.enabled` and `grounding.strict` to restrict answers to grounded snippets
|
|
251
|
+
- Citations: include titles/sections/pages via `queryPlanning.includeCitations`; parse when using `generation.structuredOutput = 'citations'`
|
|
252
|
+
- Observability & Debugging
|
|
253
|
+
- Logging: use `StructuredLoggingCallbackHandler` for JSON events
|
|
254
|
+
- Tracing: hook into pipeline events like `onRetrievalStart`, `onGenerationEnd`
|
|
255
|
+
- CLI Tools
|
|
256
|
+
- Global or local `vectra` binary for ingestion and queries without writing code
|
|
257
|
+
- `vectra ingest ./docs --config=./config.json`
|
|
258
|
+
- `vectra query "What is our leave policy?" --config=./config.json --stream`
|
|
259
|
+
|
|
260
|
+
## 7. API Reference
|
|
261
|
+
- VectraClient
|
|
262
|
+
- Constructor: `new VectraClient(config)`
|
|
263
|
+
- Methods:
|
|
264
|
+
- `ingestDocuments(path: string): Promise<void>`
|
|
265
|
+
- `queryRAG(query: string, filter?: object | null, stream?: boolean, sessionId?: string | null): Promise<QueryResponse | AsyncGenerator>`
|
|
266
|
+
- `listDocuments({ filter?: object | null, limit?: number, offset?: number }): Promise<Array<{ id, content, metadata }>>`
|
|
267
|
+
- `deleteDocuments({ ids?: string[] | null, filter?: object | null }): Promise<void>`
|
|
268
|
+
- `updateDocuments(docs: Array<{ id, content, metadata? }>): Promise<void>`
|
|
269
|
+
- VectorStore Interface
|
|
270
|
+
- Extend and implement:
|
|
271
|
+
- `addDocuments(docs)`
|
|
272
|
+
- `upsertDocuments(docs)`
|
|
273
|
+
- `similaritySearch(vector, limit = 5, filter = null)`
|
|
274
|
+
- Optional: `hybridSearch(text, vector, limit = 5, filter = null)`
|
|
275
|
+
- `listDocuments({ filter, limit, offset })`
|
|
276
|
+
- `deleteDocuments({ ids, filter })`
|
|
277
|
+
- Type Definitions (shape)
|
|
278
|
+
- `VectraConfig`: `{ embedding, llm, database, chunking?, retrieval?, reranking?, metadata?, queryPlanning?, grounding?, generation?, prompts?, callbacks? }`
|
|
279
|
+
- `QueryResponse`: `{ answer: string | object, sources: object[] }` or streaming `AsyncGenerator<{ delta, finish_reason, usage }>`
|
|
280
|
+
|
|
281
|
+
## 8. Recipes / FAQ
|
|
282
|
+
- How do I use a local LLM?
|
|
283
|
+
- Use **Ollama** (`ProviderType.OLLAMA`) for the easiest local setup.
|
|
284
|
+
- Alternatively, use HuggingFace Inference API or a custom provider.
|
|
285
|
+
- How do I extract JSON from the answer?
|
|
286
|
+
- Set `generation.outputFormat = 'json'`, parse `answer`; fallback to string on parse errors.
|
|
287
|
+
- Why is my retrieval slow?
|
|
288
|
+
- Ensure Prisma indexes are created (`ensureIndexes()`); confirm embedding `dimensions` match pgvector column; consider Hybrid Search and metadata filters.
|
package/index.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vectra-js",
|
|
3
|
+
"version": "0.9.0",
|
|
4
|
+
"description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1",
|
|
8
|
+
"prisma:generate": "prisma generate",
|
|
9
|
+
"lint": "eslint . --ext .js,.cjs,.mjs",
|
|
10
|
+
"lint:fix": "eslint . --ext .js,.cjs,.mjs --fix",
|
|
11
|
+
"prepublishOnly": "pnpm run lint"
|
|
12
|
+
},
|
|
13
|
+
"bin": {
|
|
14
|
+
"vectra": "bin/vectra.js"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"rag",
|
|
18
|
+
"llm",
|
|
19
|
+
"openai",
|
|
20
|
+
"anthropic",
|
|
21
|
+
"gemini",
|
|
22
|
+
"vector-database"
|
|
23
|
+
],
|
|
24
|
+
"repository": {
|
|
25
|
+
"type": "git",
|
|
26
|
+
"url": "git+https://github.com/iamabhishek-n/vectra-js.git"
|
|
27
|
+
},
|
|
28
|
+
"bugs": {
|
|
29
|
+
"url": "https://github.com/iamabhishek-n/vectra-js/issues"
|
|
30
|
+
},
|
|
31
|
+
"homepage": "https://github.com/iamabhishek-n/vectra-js#readme",
|
|
32
|
+
"author": "Abhishek N",
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"@anthropic-ai/sdk": "^0.20.0",
|
|
36
|
+
"@google/genai": "^1.30.0",
|
|
37
|
+
"openai": "^4.0.0",
|
|
38
|
+
"zod": "^3.22.0",
|
|
39
|
+
"pdf-parse": "^1.1.1",
|
|
40
|
+
"mammoth": "^1.7.0",
|
|
41
|
+
"xlsx": "^0.18.5",
|
|
42
|
+
"uuid": "^9.0.0",
|
|
43
|
+
"dotenv": "^16.0.0"
|
|
44
|
+
},
|
|
45
|
+
"peerDependencies": {
|
|
46
|
+
"@prisma/client": "^5.0.0"
|
|
47
|
+
},
|
|
48
|
+
"devDependencies": {
|
|
49
|
+
"prisma": "^5.22.0",
|
|
50
|
+
"eslint": "^9.13.0",
|
|
51
|
+
"globals": "^13.24.0"
|
|
52
|
+
}
|
|
53
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const Anthropic = require('@anthropic-ai/sdk');
|
|
2
|
+
|
|
3
|
+
class AnthropicBackend {
|
|
4
|
+
constructor(config) {
|
|
5
|
+
this.config = config;
|
|
6
|
+
this.client = new Anthropic({ apiKey: config.apiKey || process.env.ANTHROPIC_API_KEY });
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
async generate(prompt, sys) {
|
|
10
|
+
const res = await this.client.messages.create({
|
|
11
|
+
model: this.config.modelName,
|
|
12
|
+
max_tokens: this.config.maxTokens,
|
|
13
|
+
temperature: this.config.temperature,
|
|
14
|
+
system: sys,
|
|
15
|
+
messages: [{ role: 'user', content: prompt }]
|
|
16
|
+
});
|
|
17
|
+
return res.content[0].text;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async *generateStream(prompt, sys) {
|
|
21
|
+
const stream = await this.client.messages.create({
|
|
22
|
+
model: this.config.modelName,
|
|
23
|
+
max_tokens: this.config.maxTokens,
|
|
24
|
+
temperature: this.config.temperature,
|
|
25
|
+
system: sys,
|
|
26
|
+
messages: [{ role: 'user', content: prompt }],
|
|
27
|
+
stream: true
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
for await (const chunk of stream) {
|
|
31
|
+
if (chunk.type === 'content_block_delta') {
|
|
32
|
+
yield { delta: chunk.delta.text, finish_reason: null, usage: null };
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
module.exports = { AnthropicBackend };
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
const { v4: uuidv4 } = require('uuid');
|
|
2
|
+
const { VectorStore } = require('../interfaces');
|
|
3
|
+
|
|
4
|
+
class ChromaVectorStore extends VectorStore {
|
|
5
|
+
constructor(config) {
|
|
6
|
+
super();
|
|
7
|
+
this.client = config.clientInstance;
|
|
8
|
+
this.collectionName = config.tableName || "rag_collection";
|
|
9
|
+
this.collection = null;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
async _init() {
|
|
13
|
+
if (!this.collection) {
|
|
14
|
+
this.collection = await this.client.getOrCreateCollection({ name: this.collectionName });
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async addDocuments(docs) {
|
|
19
|
+
await this._init();
|
|
20
|
+
const ids = docs.map((d) => d.id || uuidv4());
|
|
21
|
+
const embeddings = docs.map(d => d.embedding);
|
|
22
|
+
const metadatas = docs.map(d => d.metadata);
|
|
23
|
+
const documents = docs.map(d => d.content);
|
|
24
|
+
|
|
25
|
+
await this.collection.add({
|
|
26
|
+
ids,
|
|
27
|
+
embeddings,
|
|
28
|
+
metadatas,
|
|
29
|
+
documents
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async upsertDocuments(docs) {
|
|
34
|
+
await this._init();
|
|
35
|
+
const ids = docs.map((d) => d.id || uuidv4());
|
|
36
|
+
const embeddings = docs.map(d => d.embedding);
|
|
37
|
+
const metadatas = docs.map(d => d.metadata);
|
|
38
|
+
const documents = docs.map(d => d.content);
|
|
39
|
+
if (typeof this.collection.upsert === 'function') {
|
|
40
|
+
await this.collection.upsert({ ids, embeddings, metadatas, documents });
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
if (typeof this.collection.delete === 'function') {
|
|
44
|
+
try { await this.collection.delete({ ids }); } catch (_) {}
|
|
45
|
+
}
|
|
46
|
+
await this.collection.add({ ids, embeddings, metadatas, documents });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
async fileExists(sha256, size, lastModified) {
|
|
50
|
+
await this._init();
|
|
51
|
+
try {
|
|
52
|
+
const res = await this.collection.get({ where: { fileSHA256: sha256, fileSize: size, lastModified } });
|
|
53
|
+
return !!(res && Array.isArray(res.ids) && res.ids.length > 0);
|
|
54
|
+
} catch (_) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async similaritySearch(vector, limit = 5, filter = null) {
|
|
60
|
+
await this._init();
|
|
61
|
+
const results = await this.collection.query({
|
|
62
|
+
queryEmbeddings: [vector],
|
|
63
|
+
nResults: limit,
|
|
64
|
+
where: filter || undefined
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
if (!results.documents || results.documents.length === 0) return [];
|
|
68
|
+
|
|
69
|
+
const out = [];
|
|
70
|
+
// Chroma returns array of arrays for batch queries
|
|
71
|
+
for (let i = 0; i < results.documents[0].length; i++) {
|
|
72
|
+
out.push({
|
|
73
|
+
content: results.documents[0][i],
|
|
74
|
+
metadata: results.metadatas[0][i],
|
|
75
|
+
score: 1.0 - (results.distances ? results.distances[0][i] : 0)
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
return out;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async listDocuments({ filter = null, limit = 100, offset = 0 } = {}) {
|
|
82
|
+
await this._init();
|
|
83
|
+
const lim = Math.max(1, Math.min(1000, Number(limit) || 100));
|
|
84
|
+
const off = Math.max(0, Number(offset) || 0);
|
|
85
|
+
const res = await this.collection.get({
|
|
86
|
+
where: filter || undefined,
|
|
87
|
+
limit: lim,
|
|
88
|
+
offset: off,
|
|
89
|
+
include: ['documents', 'metadatas']
|
|
90
|
+
});
|
|
91
|
+
const ids = Array.isArray(res?.ids) ? res.ids : [];
|
|
92
|
+
const documents = Array.isArray(res?.documents) ? res.documents : [];
|
|
93
|
+
const metadatas = Array.isArray(res?.metadatas) ? res.metadatas : [];
|
|
94
|
+
return ids.map((id, i) => ({ id, content: documents[i], metadata: metadatas[i] }));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async deleteDocuments({ ids = null, filter = null } = {}) {
|
|
98
|
+
await this._init();
|
|
99
|
+
if (Array.isArray(ids) && ids.length > 0) {
|
|
100
|
+
await this.collection.delete({ ids });
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
if (filter) {
|
|
104
|
+
await this.collection.delete({ where: filter });
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
throw new Error('deleteDocuments requires ids or filter');
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
module.exports = { ChromaVectorStore };
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
const { GoogleGenAI } = require('@google/genai');
|
|
2
|
+
|
|
3
|
+
class GeminiBackend {
|
|
4
|
+
constructor(config) {
|
|
5
|
+
this.config = config;
|
|
6
|
+
const key = config.apiKey || process.env.GOOGLE_API_KEY || process.env.API_KEY;
|
|
7
|
+
if (!key) throw new Error("Gemini API Key missing.");
|
|
8
|
+
this.client = new GoogleGenAI({ apiKey: key });
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async _retry(fn, retries = 3) {
|
|
12
|
+
for (let i = 0; i < retries; i++) {
|
|
13
|
+
try { return await fn(); }
|
|
14
|
+
catch (e) {
|
|
15
|
+
if (i === retries - 1) throw e;
|
|
16
|
+
await new Promise(r => setTimeout(r, 1000 * Math.pow(2, i)));
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
async embedDocuments(texts) {
|
|
22
|
+
const res = await this._retry(() => this.client.models.embedContent({
|
|
23
|
+
model: this.config.modelName,
|
|
24
|
+
contents: texts,
|
|
25
|
+
config: { outputDimensionality: this.config.dimensions }
|
|
26
|
+
}));
|
|
27
|
+
const out = res?.embeddings || res?.data?.embeddings;
|
|
28
|
+
if (!out || !Array.isArray(out)) throw new Error('Gemini embedding response missing embeddings');
|
|
29
|
+
return out.map(e => e.values || e.embedding?.values || e);
|
|
30
|
+
}
|
|
31
|
+
async embedQuery(text) {
|
|
32
|
+
const res = await this._retry(() => this.client.models.embedContent({
|
|
33
|
+
model: this.config.modelName,
|
|
34
|
+
contents: text,
|
|
35
|
+
config: { outputDimensionality: this.config.dimensions }
|
|
36
|
+
}));
|
|
37
|
+
const out = res?.embeddings || res?.data?.embeddings;
|
|
38
|
+
const values = Array.isArray(out) ? out[0]?.values : undefined;
|
|
39
|
+
if (!values) throw new Error('Gemini embedding response missing values');
|
|
40
|
+
return values;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async generate(prompt, sys) {
|
|
44
|
+
const res = await this._retry(() => this.client.models.generateContent({
|
|
45
|
+
model: this.config.modelName,
|
|
46
|
+
contents: prompt,
|
|
47
|
+
config: { systemInstruction: sys, temperature: this.config.temperature, maxOutputTokens: this.config.maxTokens }
|
|
48
|
+
}));
|
|
49
|
+
return res.text || "";
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async *generateStream(prompt, sys) {
|
|
53
|
+
const result = await this.client.models.generateContentStream({
|
|
54
|
+
model: this.config.modelName,
|
|
55
|
+
contents: prompt,
|
|
56
|
+
config: {
|
|
57
|
+
systemInstruction: sys,
|
|
58
|
+
temperature: this.config.temperature,
|
|
59
|
+
maxOutputTokens: this.config.maxTokens
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
for await (const chunk of result) {
|
|
63
|
+
const text = typeof chunk.text === 'function' ? chunk.text() : chunk.text;
|
|
64
|
+
if (text) yield { delta: text, finish_reason: null, usage: null };
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
module.exports = { GeminiBackend };
|