vectra-js 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +625 -0
- package/bin/vectra.js +76 -0
- package/documentation.md +288 -0
- package/index.js +11 -0
- package/package.json +53 -0
- package/src/backends/anthropic.js +37 -0
- package/src/backends/chroma_store.js +110 -0
- package/src/backends/gemini.js +68 -0
- package/src/backends/huggingface.js +52 -0
- package/src/backends/milvus_store.js +61 -0
- package/src/backends/ollama.js +63 -0
- package/src/backends/openai.js +46 -0
- package/src/backends/openrouter.js +51 -0
- package/src/backends/prisma_store.js +160 -0
- package/src/backends/qdrant_store.js +68 -0
- package/src/callbacks.js +31 -0
- package/src/config.js +123 -0
- package/src/core.js +591 -0
- package/src/evaluation/index.js +15 -0
- package/src/interfaces.js +21 -0
- package/src/memory.js +96 -0
- package/src/processor.js +155 -0
- package/src/reranker.js +26 -0
- package/src/ui/index.html +665 -0
- package/src/ui/script.js +785 -0
- package/src/ui/style.css +281 -0
- package/src/webconfig_server.js +175 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Abhishek N
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,625 @@
|
|
|
1
|
+
# Vectra (Node.js)
|
|
2
|
+
|
|
3
|
+
A production-ready, provider-agnostic Node.js SDK for End-to-End RAG (Retrieval-Augmented Generation) pipelines.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
* **Multi-Provider Support**: First-class support for **OpenAI**, **Gemini**, and **Anthropic**.
|
|
8
|
+
* **Modular Vector Store**:
|
|
9
|
+
* **Prisma**: Use your existing PostgreSQL database with `pgvector`.
|
|
10
|
+
* **ChromaDB**: Native support for the open-source vector database.
|
|
11
|
+
* **Qdrant & Milvus**: Additional backends for portability.
|
|
12
|
+
* **Extensible**: Easily add others by extending the `VectorStore` class.
|
|
13
|
+
* **Advanced Chunking**:
|
|
14
|
+
* **Recursive**: Smart splitting based on characters and separators.
|
|
15
|
+
* **Token-Aware**: Sentence/paragraph fallback and adaptive overlap based on local entropy.
|
|
16
|
+
* **Agentic**: Uses an LLM to split text into semantically complete propositions with JSON validation and dedupe.
|
|
17
|
+
* **Advanced Retrieval Strategies**:
|
|
18
|
+
* **Naive**: Standard cosine similarity search.
|
|
19
|
+
* **HyDE (Hypothetical Document Embeddings)**: Generates a fake answer to the query and searches for that.
|
|
20
|
+
* **Multi-Query**: Generates multiple variations of the query to catch different phrasings.
|
|
21
|
+
* **Hybrid Search**: Combines semantic (pgvector) and lexical (FTS) results using **Reciprocal Rank Fusion (RRF)**.
|
|
22
|
+
* **MMR**: Diversifies results to reduce redundancy.
|
|
23
|
+
* **Streaming**: Full support for token-by-token streaming responses.
|
|
24
|
+
* **Reranking**: LLM-based reranking to re-order retrieved documents for maximum relevance.
|
|
25
|
+
* **File Support**: Native parsing for PDF, DOCX, XLSX, TXT, and Markdown.
|
|
26
|
+
* **Index Helpers**: ivfflat for pgvector, GIN FTS index, optional tsvector trigger.
|
|
27
|
+
* **Embedding Cache**: SHA256 content-based cache to skip re-embedding.
|
|
28
|
+
* **Batch Embeddings**: Gemini and OpenAI adapters support array inputs and dimension control.
|
|
29
|
+
* **Metadata Enrichment**: Per-chunk summary, keywords, hypothetical questions; page and section mapping for PDFs/Markdown. Retrieval boosts matching keywords and uses summaries in prompts.
|
|
30
|
+
* **Conversation Memory**: Built-in chat history management for context-aware multi-turn conversations.
|
|
31
|
+
* **Production Evaluation**: Integrated evaluation module to measure RAG quality (Faithfulness, Relevance).
|
|
32
|
+
* **Local LLMs**: First-class support for **Ollama** for local/offline development.
|
|
33
|
+
|
|
34
|
+
---
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Library (npm)
|
|
40
|
+
npm install vectra-js @prisma/client
|
|
41
|
+
npm install chromadb # optional: ChromaDB backend
|
|
42
|
+
|
|
43
|
+
# Library (pnpm)
|
|
44
|
+
pnpm add vectra-js @prisma/client
|
|
45
|
+
pnpm add chromadb # optional
|
|
46
|
+
|
|
47
|
+
# CLI (global install)
|
|
48
|
+
npm i -g vectra-js # or: pnpm add -g vectra-js
|
|
49
|
+
|
|
50
|
+
# CLI (no global install)
|
|
51
|
+
# Uses local project bin if vectra-js is installed
|
|
52
|
+
npx vectra ingest ./docs --config=./config.json
|
|
53
|
+
|
|
54
|
+
# CLI (one-off run with pnpm dlx)
|
|
55
|
+
pnpm dlx vectra-js vectra query "What is our leave policy?" --config=./config.json --stream
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Usage Guide
|
|
61
|
+
|
|
62
|
+
### 1. Configuration
|
|
63
|
+
|
|
64
|
+
The SDK uses a strictly typed configuration object (validated with Zod).
|
|
65
|
+
|
|
66
|
+
```javascript
|
|
67
|
+
const { ProviderType, ChunkingStrategy, RetrievalStrategy } = require('vectra-js');
|
|
68
|
+
|
|
69
|
+
const config = {
|
|
70
|
+
// 1. Embedding Provider
|
|
71
|
+
embedding: {
|
|
72
|
+
provider: ProviderType.OPENAI,
|
|
73
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
74
|
+
modelName: 'text-embedding-3-small',
|
|
75
|
+
dimensions: 1536 // Optional
|
|
76
|
+
},
|
|
77
|
+
|
|
78
|
+
// 2. LLM Provider (for Generation)
|
|
79
|
+
llm: {
|
|
80
|
+
provider: ProviderType.GEMINI,
|
|
81
|
+
apiKey: process.env.GOOGLE_API_KEY,
|
|
82
|
+
modelName: 'gemini-1.5-pro-latest'
|
|
83
|
+
},
|
|
84
|
+
|
|
85
|
+
// 3. Database (Modular)
|
|
86
|
+
database: {
|
|
87
|
+
type: 'prisma', // or 'chroma'
|
|
88
|
+
clientInstance: prismaClient, // Your instantiated DB client
|
|
89
|
+
tableName: 'Document', // Table or Collection name
|
|
90
|
+
columnMap: { // Map SDK fields to your DB columns
|
|
91
|
+
content: 'text',
|
|
92
|
+
vector: 'embedding',
|
|
93
|
+
metadata: 'meta'
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
|
|
97
|
+
// 4. Chunking (Optional)
|
|
98
|
+
chunking: {
|
|
99
|
+
strategy: ChunkingStrategy.RECURSIVE,
|
|
100
|
+
chunkSize: 1000,
|
|
101
|
+
chunkOverlap: 200
|
|
102
|
+
},
|
|
103
|
+
|
|
104
|
+
// 5. Retrieval (Optional)
|
|
105
|
+
retrieval: {
|
|
106
|
+
strategy: RetrievalStrategy.HYBRID, // Uses RRF
|
|
107
|
+
llmConfig: { /* Config for query rewriting LLM */ }
|
|
108
|
+
}
|
|
109
|
+
};
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Configuration Reference
|
|
113
|
+
|
|
114
|
+
- Embedding
|
|
115
|
+
- `provider`: one of `ProviderType.OPENAI`, `ProviderType.GEMINI`
|
|
116
|
+
- `apiKey`: provider API key string
|
|
117
|
+
- `modelName`: embedding model identifier
|
|
118
|
+
- `dimensions`: number; ensures vector size matches DB `pgvector(n)`
|
|
119
|
+
- LLM
|
|
120
|
+
- `provider`: `ProviderType.OPENAI` | `ProviderType.GEMINI` | `ProviderType.ANTHROPIC` | `ProviderType.OLLAMA`
|
|
121
|
+
- `apiKey`: provider API key string (optional for Ollama)
|
|
122
|
+
- `modelName`: generation model identifier
|
|
123
|
+
- `baseUrl`: optional custom URL (e.g., for Ollama)
|
|
124
|
+
- `temperature`: number; optional sampling temperature
|
|
125
|
+
- `maxTokens`: number; optional max output tokens
|
|
126
|
+
- Memory
|
|
127
|
+
- `enabled`: boolean; toggle memory on/off (default: false)
|
|
128
|
+
- `type`: `'in-memory' | 'redis' | 'postgres'`
|
|
129
|
+
- `maxMessages`: number; number of recent messages to retain (default: 20)
|
|
130
|
+
- `redis`: `{ clientInstance, keyPrefix }` where `keyPrefix` defaults to `'vectra:chat:'`
|
|
131
|
+
- `postgres`: `{ clientInstance, tableName, columnMap }` where `tableName` defaults to `'ChatMessage'` and `columnMap` maps `{ sessionId, role, content, createdAt }`
|
|
132
|
+
- Ingestion
|
|
133
|
+
- `rateLimitEnabled`: boolean; toggle rate limiting on/off (default: false)
|
|
134
|
+
- `concurrencyLimit`: number; max concurrent embedding requests when enabled (default: 5)
|
|
135
|
+
- `mode`: `'skip' | 'append' | 'replace'`; idempotency behavior (default: `'skip'`)
|
|
136
|
+
- Database
|
|
137
|
+
- `type`: `prisma` | `chroma` | `qdrant` | `milvus`
|
|
138
|
+
- `clientInstance`: instantiated client for the chosen backend
|
|
139
|
+
- `tableName`: table/collection name (Postgres/Qdrant/Milvus)
|
|
140
|
+
- `columnMap`: maps SDK fields to DB columns
|
|
141
|
+
- `content`: text column name
|
|
142
|
+
- `vector`: embedding vector column name (for Postgres pgvector)
|
|
143
|
+
- `metadata`: JSON column name for per-chunk metadata
|
|
144
|
+
- Chunking
|
|
145
|
+
- `strategy`: `ChunkingStrategy.RECURSIVE` | `ChunkingStrategy.AGENTIC`
|
|
146
|
+
- `chunkSize`: number; preferred chunk size (characters)
|
|
147
|
+
- `chunkOverlap`: number; overlap between adjacent chunks (characters)
|
|
148
|
+
- `separators`: array of string separators to split on (optional)
|
|
149
|
+
- Retrieval
|
|
150
|
+
- `strategy`: `RetrievalStrategy.NAIVE` | `HYDE` | `MULTI_QUERY` | `HYBRID` | `MMR`
|
|
151
|
+
- `llmConfig`: optional LLM config for query rewriting (HyDE/Multi-Query)
|
|
152
|
+
- `mmrLambda`: \(0..1\) tradeoff between relevance and diversity (default: 0.5)
|
|
153
|
+
- `mmrFetchK`: candidate pool size for MMR (default: 20)
|
|
154
|
+
- Reranking
|
|
155
|
+
- `enabled`: boolean; enable LLM-based reranking
|
|
156
|
+
- `topN`: number; final number of docs to keep (optional)
|
|
157
|
+
- `windowSize`: number; number of docs considered before reranking
|
|
158
|
+
- `llmConfig`: optional LLM config for the reranker
|
|
159
|
+
- Metadata
|
|
160
|
+
- `enrichment`: boolean; generate `summary`, `keywords`, `hypothetical_questions`
|
|
161
|
+
- Callbacks
|
|
162
|
+
- `callbacks`: array of handlers; use `LoggingCallbackHandler` or `StructuredLoggingCallbackHandler`
|
|
163
|
+
- Index Helpers (Postgres + Prisma)
|
|
164
|
+
- `ensureIndexes()`: creates ivfflat and GIN FTS indexes and optional `tsvector` trigger
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
### 2. Initialization & Ingestion
|
|
168
|
+
|
|
169
|
+
```javascript
|
|
170
|
+
const { VectraClient } = require('vectra-js');
|
|
171
|
+
const client = new VectraClient(config);
|
|
172
|
+
|
|
173
|
+
// Ingest a file (supports .pdf, .docx, .txt, .md, .xlsx)
|
|
174
|
+
// This will: Load -> Chunk -> Embed -> Store
|
|
175
|
+
await client.ingestDocuments('./documents/employee_handbook.pdf');
|
|
176
|
+
// Ensure indexes (Postgres + Prisma)
|
|
177
|
+
if (config.database.type === 'prisma' && client.vectorStore.ensureIndexes) {
|
|
178
|
+
await client.vectorStore.ensureIndexes();
|
|
179
|
+
}
|
|
180
|
+
// Enable metadata enrichment
|
|
181
|
+
// metadata: { enrichment: true }
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Document Management
|
|
185
|
+
|
|
186
|
+
```javascript
|
|
187
|
+
// List recent documents (by metadata filter)
|
|
188
|
+
const docs = await client.listDocuments({ filter: { docTitle: 'Employee Handbook' }, limit: 50 });
|
|
189
|
+
|
|
190
|
+
// Delete by ids or metadata filter
|
|
191
|
+
await client.deleteDocuments({ ids: docs.map(d => d.id) });
|
|
192
|
+
// or:
|
|
193
|
+
await client.deleteDocuments({ filter: { absolutePath: '/abs/path/to/file.pdf' } });
|
|
194
|
+
|
|
195
|
+
// Update existing docs (requires backend upsert support)
|
|
196
|
+
await client.updateDocuments([
|
|
197
|
+
{ id: docs[0].id, content: 'Updated content', metadata: { docTitle: 'Employee Handbook' } }
|
|
198
|
+
]);
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### 3. Querying (Standard)
|
|
202
|
+
|
|
203
|
+
```javascript
|
|
204
|
+
const response = await client.queryRAG("What is the vacation policy?");
|
|
205
|
+
|
|
206
|
+
console.log("Answer:", response.answer);
|
|
207
|
+
console.log("Sources:", response.sources); // Metadata of retrieved chunks
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### 4. Querying (Streaming)
|
|
211
|
+
|
|
212
|
+
Ideal for Chat UIs. Returns an Async Generator of unified chunks.
|
|
213
|
+
|
|
214
|
+
```javascript
|
|
215
|
+
const stream = await client.queryRAG("Draft a welcome email...", null, true);
|
|
216
|
+
|
|
217
|
+
for await (const chunk of stream) {
|
|
218
|
+
process.stdout.write(chunk.delta || "");
|
|
219
|
+
}
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### 5. Conversation Memory
|
|
223
|
+
|
|
224
|
+
Enable multi-turn conversations by configuring memory and passing a `sessionId`.
|
|
225
|
+
|
|
226
|
+
```javascript
|
|
227
|
+
// In config (enable memory: default is off)
|
|
228
|
+
const config = {
|
|
229
|
+
// ...
|
|
230
|
+
memory: { enabled: true, type: 'in-memory', maxMessages: 10 }
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
// Redis-backed memory
|
|
234
|
+
const redis = /* your redis client instance */;
|
|
235
|
+
const configRedis = {
|
|
236
|
+
// ...
|
|
237
|
+
memory: {
|
|
238
|
+
enabled: true,
|
|
239
|
+
type: 'redis',
|
|
240
|
+
redis: { clientInstance: redis, keyPrefix: 'vectra:chat:' },
|
|
241
|
+
maxMessages: 20
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
// Postgres-backed memory
|
|
246
|
+
const prisma = /* your Prisma client instance */;
|
|
247
|
+
const configPostgres = {
|
|
248
|
+
// ...
|
|
249
|
+
memory: {
|
|
250
|
+
enabled: true,
|
|
251
|
+
type: 'postgres',
|
|
252
|
+
postgres: {
|
|
253
|
+
clientInstance: prisma,
|
|
254
|
+
tableName: 'ChatMessage',
|
|
255
|
+
columnMap: { sessionId: 'sessionId', role: 'role', content: 'content', createdAt: 'createdAt' }
|
|
256
|
+
},
|
|
257
|
+
maxMessages: 20
|
|
258
|
+
}
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
// In your app:
|
|
262
|
+
const sessionId = 'user-123-session-abc';
|
|
263
|
+
const response = await client.queryRAG("What is the refund policy?", null, false, sessionId);
|
|
264
|
+
const followUp = await client.queryRAG("Does it apply to sale items?", null, false, sessionId);
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### 6. Production Evaluation
|
|
268
|
+
|
|
269
|
+
Measure the quality of your RAG pipeline using the built-in evaluation module.
|
|
270
|
+
|
|
271
|
+
```javascript
|
|
272
|
+
const testSet = [
|
|
273
|
+
{
|
|
274
|
+
question: "What is the capital of France?",
|
|
275
|
+
expectedGroundTruth: "Paris is the capital of France."
|
|
276
|
+
}
|
|
277
|
+
];
|
|
278
|
+
|
|
279
|
+
const results = await client.evaluate(testSet);
|
|
280
|
+
|
|
281
|
+
console.log(`Faithfulness: ${results.averageFaithfulness}`);
|
|
282
|
+
console.log(`Relevance: ${results.averageRelevance}`);
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
---
|
|
286
|
+
|
|
287
|
+
## Supported Providers & Backends
|
|
288
|
+
|
|
289
|
+
| Feature | OpenAI | Gemini | Anthropic | Ollama | OpenRouter | HuggingFace |
|
|
290
|
+
| :--- | :---: | :---: | :---: | :---: | :---: | :---: |
|
|
291
|
+
| **Embeddings** | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ |
|
|
292
|
+
| **Generation** | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
293
|
+
| **Streaming** | ✅ | ✅ | ✅ | ✅ | ✅ | ⚠️ |
|
|
294
|
+
|
|
295
|
+
### Ollama (Local)
|
|
296
|
+
- Use Ollama for local, offline development.
|
|
297
|
+
- Set `provider = ProviderType.OLLAMA`.
|
|
298
|
+
- Default `baseUrl` is `http://localhost:11434`.
|
|
299
|
+
```javascript
|
|
300
|
+
const config = {
|
|
301
|
+
embedding: { provider: ProviderType.OLLAMA, modelName: 'nomic-embed-text' },
|
|
302
|
+
llm: { provider: ProviderType.OLLAMA, modelName: 'llama3' }
|
|
303
|
+
};
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
### OpenRouter (Generation)
|
|
307
|
+
- Use OpenRouter as a unified generation provider.
|
|
308
|
+
- Set `llm.provider = ProviderType.OPENROUTER`, `llm.modelName` to a supported model (e.g., `openai/gpt-4o`).
|
|
309
|
+
- Provide `OPENROUTER_API_KEY`; optional attribution via `OPENROUTER_REFERER`, `OPENROUTER_TITLE`.
|
|
310
|
+
```javascript
|
|
311
|
+
const config = {
|
|
312
|
+
llm: {
|
|
313
|
+
provider: ProviderType.OPENROUTER,
|
|
314
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
315
|
+
modelName: 'openai/gpt-4o',
|
|
316
|
+
defaultHeaders: {
|
|
317
|
+
'HTTP-Referer': 'https://your.app',
|
|
318
|
+
'X-Title': 'Your App'
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
};
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Database Schemas
|
|
325
|
+
|
|
326
|
+
**Prisma (PostgreSQL)**
|
|
327
|
+
```prisma
|
|
328
|
+
model Document {
|
|
329
|
+
id String @id @default(uuid())
|
|
330
|
+
content String
|
|
331
|
+
metadata Json
|
|
332
|
+
vector Unsupported("vector")? // pgvector type
|
|
333
|
+
createdAt DateTime @default(now())
|
|
334
|
+
}
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
**ChromaDB**
|
|
338
|
+
No schema required; collections are created automatically.
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## API Reference
|
|
343
|
+
|
|
344
|
+
### `new VectraClient(config)`
|
|
345
|
+
Creates a new client instance. Throws error if config is invalid.
|
|
346
|
+
|
|
347
|
+
### `client.ingestDocuments(path: string): Promise<void>`
|
|
348
|
+
Reads a file **or recursively iterates a directory**, chunks content, embeds, and saves to the configured DB.
|
|
349
|
+
- If `path` is a file: Ingests that single file.
|
|
350
|
+
- If `path` is a directory: Recursively finds all supported files and ingests them.
|
|
351
|
+
|
|
352
|
+
### `client.queryRAG(query: string, filter?: object, stream?: boolean)`
|
|
353
|
+
Performs the RAG pipeline:
|
|
354
|
+
1. **Retrieval**: Fetches relevant docs using `config.retrieval.strategy`.
|
|
355
|
+
2. **Reranking**: (Optional) Re-orders docs using `config.reranking`.
|
|
356
|
+
3. **Generation**: Sends context + query to LLM.
|
|
357
|
+
|
|
358
|
+
**Returns**:
|
|
359
|
+
* If `stream=false` (default): `{ answer: string | object, sources: object[] }`
|
|
360
|
+
* If `stream=true`: `AsyncGenerator<{ delta: string, finish_reason: string | null, usage: any | null }>`
|
|
361
|
+
|
|
362
|
+
### Advanced Configuration
|
|
363
|
+
|
|
364
|
+
- Query Planning
|
|
365
|
+
- `queryPlanning.tokenBudget`: number; total token budget for context
|
|
366
|
+
- `queryPlanning.preferSummariesBelow`: number; prefer metadata summaries under this budget
|
|
367
|
+
- `queryPlanning.includeCitations`: boolean; include titles/sections/pages in context
|
|
368
|
+
- Grounding
|
|
369
|
+
- `grounding.enabled`: boolean; enable extractive snippet grounding
|
|
370
|
+
- `grounding.strict`: boolean; use only grounded snippets when true
|
|
371
|
+
- `grounding.maxSnippets`: number; max snippets to include
|
|
372
|
+
- Generation
|
|
373
|
+
- `generation.structuredOutput`: `'none' | 'citations'`; enable inline citations
|
|
374
|
+
- `generation.outputFormat`: `'text' | 'json'`; return JSON when set to `json`
|
|
375
|
+
- Prompts
|
|
376
|
+
- `prompts.query`: string template using `{{context}}` and `{{question}}`
|
|
377
|
+
- `prompts.reranking`: optional template for reranker prompt
|
|
378
|
+
- Tracing
|
|
379
|
+
- `tracing.enable`: boolean; enable provider/DB/pipeline span hooks
|
|
380
|
+
|
|
381
|
+
### CLI
|
|
382
|
+
|
|
383
|
+
Quickly ingest and query to validate configurations.
|
|
384
|
+
|
|
385
|
+
```bash
|
|
386
|
+
vectra ingest ./docs --config=./nodejs-test/config.json
|
|
387
|
+
vectra query "What is our leave policy?" --config=./nodejs-test/config.json --stream
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
### Ingestion Rate Limiting
|
|
391
|
+
- Toggle ingestion rate limiting via `config.ingestion`.
|
|
392
|
+
```javascript
|
|
393
|
+
const config = {
|
|
394
|
+
// ...
|
|
395
|
+
ingestion: { rateLimitEnabled: true, concurrencyLimit: 5 }
|
|
396
|
+
};
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
## Extending
|
|
402
|
+
|
|
403
|
+
### Custom Vector Store
|
|
404
|
+
Inherit from `VectorStore` class and implement `addDocuments` and `similaritySearch`.
|
|
405
|
+
|
|
406
|
+
```javascript
|
|
407
|
+
const { VectorStore } = require('vectra-js/interfaces');
|
|
408
|
+
|
|
409
|
+
class MyCustomDB extends VectorStore {
|
|
410
|
+
async addDocuments(docs) { ... }
|
|
411
|
+
async similaritySearch(vector, k) { ... }
|
|
412
|
+
}
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
---
|
|
416
|
+
|
|
417
|
+
## Developer Guide
|
|
418
|
+
|
|
419
|
+
### Setup
|
|
420
|
+
- Use `pnpm` for package management.
|
|
421
|
+
- Node.js 18+ recommended.
|
|
422
|
+
- Install with `pnpm install`.
|
|
423
|
+
- Lint with `pnpm run lint`.
|
|
424
|
+
|
|
425
|
+
### Environment
|
|
426
|
+
- `OPENAI_API_KEY`, `GOOGLE_API_KEY`, `ANTHROPIC_API_KEY` for providers.
|
|
427
|
+
- Database client instance configured under `config.database.clientInstance`.
|
|
428
|
+
|
|
429
|
+
### Architecture
|
|
430
|
+
- Pipeline: Load → Chunk → Embed → Store → Retrieve → Rerank → Plan → Ground → Generate → Stream.
|
|
431
|
+
- Core client: `VectraClient` (library export).
|
|
432
|
+
- Configuration: `VectraConfig` (validated schema).
|
|
433
|
+
- Vector store interface: `VectorStore` (extend to add custom stores).
|
|
434
|
+
- Callbacks: `StructuredLoggingCallbackHandler` and custom handler support.
|
|
435
|
+
|
|
436
|
+
### Retrieval Strategies
|
|
437
|
+
- Supports NAIVE, HYDE, MULTI_QUERY, HYBRID (RRF fusion built-in).
|
|
438
|
+
|
|
439
|
+
### Query Planning & Grounding
|
|
440
|
+
- Context assembly respects `queryPlanning` (token budget, summary preference, citations).
|
|
441
|
+
- Snippet extraction controlled by `grounding` (strict mode and max snippets).
|
|
442
|
+
|
|
443
|
+
### Streaming Interface
|
|
444
|
+
- Unified streaming shape `{ delta, finish_reason, usage }` across OpenAI, Gemini, Anthropic.
|
|
445
|
+
|
|
446
|
+
### Adding a Provider
|
|
447
|
+
- Implement `embedDocuments`, `embedQuery`, `generate`, `generateStream`.
|
|
448
|
+
- Ensure streaming yields `{ delta, finish_reason, usage }`.
|
|
449
|
+
- Wire via `llm.provider` in config.
|
|
450
|
+
|
|
451
|
+
### Adding a Vector Store
|
|
452
|
+
- Extend `VectorStore`; implement `addDocuments`, `similaritySearch`, optionally `hybridSearch`.
|
|
453
|
+
- Select via `database.type` in config.
|
|
454
|
+
|
|
455
|
+
### Callbacks & Observability
|
|
456
|
+
- Available events: `onIngestStart`, `onIngestEnd`, `onIngestSummary`, `onChunkingStart`, `onEmbeddingStart`, `onRetrievalStart`, `onRetrievalEnd`, `onRerankingStart`, `onRerankingEnd`, `onGenerationStart`, `onGenerationEnd`, `onError`.
|
|
457
|
+
- Extend `StructuredLoggingCallbackHandler` to add error codes and payload sizes.
|
|
458
|
+
|
|
459
|
+
### CLI
|
|
460
|
+
- Binary `vectra` included with the package.
|
|
461
|
+
- Ingest: `vectra ingest <path> --config=./config.json`.
|
|
462
|
+
- Query: `vectra query "<text>" --config=./config.json --stream`.
|
|
463
|
+
|
|
464
|
+
### Coding Conventions
|
|
465
|
+
- CommonJS modules, flat ESLint config.
|
|
466
|
+
- Follow existing naming: `chunkIndex` in JS; use consistent casing.
|
|
467
|
+
|
|
468
|
+
---
|
|
469
|
+
|
|
470
|
+
## Feature Guide
|
|
471
|
+
|
|
472
|
+
### Embeddings
|
|
473
|
+
- Providers: `OPENAI`, `GEMINI`.
|
|
474
|
+
- Configure dimensions to match DB `pgvector(n)` when applicable.
|
|
475
|
+
- Example:
|
|
476
|
+
```javascript
|
|
477
|
+
const config = {
|
|
478
|
+
embedding: {
|
|
479
|
+
provider: ProviderType.OPENAI,
|
|
480
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
481
|
+
modelName: 'text-embedding-3-small',
|
|
482
|
+
dimensions: 1536
|
|
483
|
+
},
|
|
484
|
+
// ...
|
|
485
|
+
};
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
### Generation
|
|
489
|
+
- Providers: `OPENAI`, `GEMINI`, `ANTHROPIC`.
|
|
490
|
+
- Options: `temperature`, `maxTokens`.
|
|
491
|
+
- Structured output: set `generation.outputFormat = 'json'` and parse `answer`.
|
|
492
|
+
```javascript
|
|
493
|
+
const config = {
|
|
494
|
+
llm: { provider: ProviderType.GEMINI, apiKey: process.env.GOOGLE_API_KEY, modelName: 'gemini-1.5-pro-latest', temperature: 0.3 },
|
|
495
|
+
generation: { outputFormat: 'json', structuredOutput: 'citations' }
|
|
496
|
+
};
|
|
497
|
+
const client = new VectraClient(config);
|
|
498
|
+
const res = await client.queryRAG('Summarize our policy with citations.');
|
|
499
|
+
console.log(res.answer); // JSON object or string on fallback
|
|
500
|
+
```
|
|
501
|
+
|
|
502
|
+
- OpenRouter usage:
|
|
503
|
+
```javascript
|
|
504
|
+
const config = {
|
|
505
|
+
llm: {
|
|
506
|
+
provider: ProviderType.OPENROUTER,
|
|
507
|
+
apiKey: process.env.OPENROUTER_API_KEY,
|
|
508
|
+
modelName: 'openai/gpt-4o',
|
|
509
|
+
defaultHeaders: { 'HTTP-Referer': 'https://your.app', 'X-Title': 'Your App' }
|
|
510
|
+
}
|
|
511
|
+
};
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
### Chunking
|
|
515
|
+
- Strategies: `RECURSIVE`, `AGENTIC`.
|
|
516
|
+
- Agentic requires `chunking.agenticLlm` config.
|
|
517
|
+
```javascript
|
|
518
|
+
const config = {
|
|
519
|
+
chunking: {
|
|
520
|
+
strategy: ChunkingStrategy.AGENTIC,
|
|
521
|
+
agenticLlm: { provider: ProviderType.OPENAI, apiKey: process.env.OPENAI_API_KEY, modelName: 'gpt-4o-mini' },
|
|
522
|
+
chunkSize: 1200,
|
|
523
|
+
chunkOverlap: 200
|
|
524
|
+
}
|
|
525
|
+
};
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
### Retrieval
|
|
529
|
+
- Strategies: `NAIVE`, `HYDE`, `MULTI_QUERY`, `HYBRID`.
|
|
530
|
+
- HYDE/MULTI_QUERY require `retrieval.llmConfig`.
|
|
531
|
+
- Example:
|
|
532
|
+
```javascript
|
|
533
|
+
const config = {
|
|
534
|
+
retrieval: {
|
|
535
|
+
strategy: RetrievalStrategy.MULTI_QUERY,
|
|
536
|
+
llmConfig: { provider: ProviderType.OPENAI, apiKey: process.env.OPENAI_API_KEY, modelName: 'gpt-4o-mini' }
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
```
|
|
540
|
+
|
|
541
|
+
### Reranking
|
|
542
|
+
- Enable LLM-based reranking to reorder results.
|
|
543
|
+
```javascript
|
|
544
|
+
const config = {
|
|
545
|
+
reranking: {
|
|
546
|
+
enabled: true,
|
|
547
|
+
topN: 5,
|
|
548
|
+
windowSize: 20,
|
|
549
|
+
llmConfig: { provider: ProviderType.ANTHROPIC, apiKey: process.env.ANTHROPIC_API_KEY, modelName: 'claude-3-haiku' }
|
|
550
|
+
}
|
|
551
|
+
};
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
### Metadata Enrichment
|
|
555
|
+
- Add summaries, keywords, hypothetical questions during ingestion.
|
|
556
|
+
```javascript
|
|
557
|
+
const config = { metadata: { enrichment: true } };
|
|
558
|
+
await client.ingestDocuments('./docs/handbook.pdf');
|
|
559
|
+
```
|
|
560
|
+
|
|
561
|
+
### Query Planning
|
|
562
|
+
- Control context assembly with token budget and summary preference.
|
|
563
|
+
```javascript
|
|
564
|
+
const config = {
|
|
565
|
+
queryPlanning: { tokenBudget: 2048, preferSummariesBelow: 1024, includeCitations: true }
|
|
566
|
+
};
|
|
567
|
+
```
|
|
568
|
+
|
|
569
|
+
### Answer Grounding
|
|
570
|
+
- Inject extractive snippets; use `strict` to only allow grounded quotes.
|
|
571
|
+
```javascript
|
|
572
|
+
const config = { grounding: { enabled: true, strict: false, maxSnippets: 4 } };
|
|
573
|
+
```
|
|
574
|
+
|
|
575
|
+
### Prompts
|
|
576
|
+
- Provide a custom query template using `{{context}}` and `{{question}}`.
|
|
577
|
+
```javascript
|
|
578
|
+
const config = {
|
|
579
|
+
prompts: { query: 'Use only the following context to answer.\nContext:\n{{context}}\n\nQ: {{question}}' }
|
|
580
|
+
};
|
|
581
|
+
```
|
|
582
|
+
|
|
583
|
+
### Streaming
|
|
584
|
+
- Unified async generator with chunks `{ delta, finish_reason, usage }`.
|
|
585
|
+
```javascript
|
|
586
|
+
const stream = await client.queryRAG('Draft a welcome email', null, true);
|
|
587
|
+
for await (const chunk of stream) process.stdout.write(chunk.delta || '');
|
|
588
|
+
```
|
|
589
|
+
|
|
590
|
+
### Filters
|
|
591
|
+
- Limit retrieval to metadata fields.
|
|
592
|
+
```javascript
|
|
593
|
+
const res = await client.queryRAG('Vacation policy', { docTitle: 'Employee Handbook' });
|
|
594
|
+
```
|
|
595
|
+
|
|
596
|
+
### Callbacks
|
|
597
|
+
- Hook into pipeline stages for logging/metrics.
|
|
598
|
+
```javascript
|
|
599
|
+
const { StructuredLoggingCallbackHandler } = require('vectra-js/src/callbacks');
|
|
600
|
+
const config = { callbacks: [ new StructuredLoggingCallbackHandler() ] };
|
|
601
|
+
```
|
|
602
|
+
|
|
603
|
+
### Vector Stores
|
|
604
|
+
- Prisma (Postgres + pgvector), Chroma, Qdrant, Milvus.
|
|
605
|
+
- Configure `database.type`, `tableName`, `columnMap`, `clientInstance`.
|
|
606
|
+
```javascript
|
|
607
|
+
const config = {
|
|
608
|
+
database: {
|
|
609
|
+
type: 'prisma',
|
|
610
|
+
clientInstance: prismaClient,
|
|
611
|
+
tableName: 'Document',
|
|
612
|
+
columnMap: { content: 'content', vector: 'embedding', metadata: 'metadata' }
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
```
|
|
616
|
+
### HuggingFace (Embeddings & Generation)
|
|
617
|
+
- Use HuggingFace Inference API for embeddings and generation.
|
|
618
|
+
- Set `provider = ProviderType.HUGGINGFACE`, `modelName` to a supported model (e.g., `sentence-transformers/all-MiniLM-L6-v2` for embeddings, `tiiuae/falcon-7b-instruct` for generation).
|
|
619
|
+
- Provide `HUGGINGFACE_API_KEY`.
|
|
620
|
+
```javascript
|
|
621
|
+
const config = {
|
|
622
|
+
embedding: { provider: ProviderType.HUGGINGFACE, apiKey: process.env.HUGGINGFACE_API_KEY, modelName: 'sentence-transformers/all-MiniLM-L6-v2' },
|
|
623
|
+
llm: { provider: ProviderType.HUGGINGFACE, apiKey: process.env.HUGGINGFACE_API_KEY, modelName: 'tiiuae/falcon-7b-instruct' }
|
|
624
|
+
};
|
|
625
|
+
```
|