@dcyfr/ai-rag 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +588 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +10 -0
- package/dist/index.js.map +1 -0
- package/dist/loaders/html/index.d.ts +26 -0
- package/dist/loaders/html/index.d.ts.map +1 -0
- package/dist/loaders/html/index.js +106 -0
- package/dist/loaders/html/index.js.map +1 -0
- package/dist/loaders/index.d.ts +8 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/index.js +7 -0
- package/dist/loaders/index.js.map +1 -0
- package/dist/loaders/markdown/index.d.ts +33 -0
- package/dist/loaders/markdown/index.d.ts.map +1 -0
- package/dist/loaders/markdown/index.js +150 -0
- package/dist/loaders/markdown/index.js.map +1 -0
- package/dist/loaders/text/index.d.ts +21 -0
- package/dist/loaders/text/index.d.ts.map +1 -0
- package/dist/loaders/text/index.js +78 -0
- package/dist/loaders/text/index.js.map +1 -0
- package/dist/pipeline/embedding/generator.d.ts +24 -0
- package/dist/pipeline/embedding/generator.d.ts.map +1 -0
- package/dist/pipeline/embedding/generator.js +42 -0
- package/dist/pipeline/embedding/generator.js.map +1 -0
- package/dist/pipeline/embedding/index.d.ts +8 -0
- package/dist/pipeline/embedding/index.d.ts.map +1 -0
- package/dist/pipeline/embedding/index.js +6 -0
- package/dist/pipeline/embedding/index.js.map +1 -0
- package/dist/pipeline/embedding/pipeline.d.ts +26 -0
- package/dist/pipeline/embedding/pipeline.d.ts.map +1 -0
- package/dist/pipeline/embedding/pipeline.js +59 -0
- package/dist/pipeline/embedding/pipeline.js.map +1 -0
- package/dist/pipeline/index.d.ts +7 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +7 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/ingestion/index.d.ts +5 -0
- package/dist/pipeline/ingestion/index.d.ts.map +1 -0
- package/dist/pipeline/ingestion/index.js +5 -0
- package/dist/pipeline/ingestion/index.js.map +1 -0
- package/dist/pipeline/ingestion/pipeline.d.ts +27 -0
- package/dist/pipeline/ingestion/pipeline.d.ts.map +1 -0
- package/dist/pipeline/ingestion/pipeline.js +118 -0
- package/dist/pipeline/ingestion/pipeline.js.map +1 -0
- package/dist/pipeline/retrieval/index.d.ts +5 -0
- package/dist/pipeline/retrieval/index.d.ts.map +1 -0
- package/dist/pipeline/retrieval/index.js +5 -0
- package/dist/pipeline/retrieval/index.js.map +1 -0
- package/dist/pipeline/retrieval/pipeline.d.ts +29 -0
- package/dist/pipeline/retrieval/pipeline.d.ts.map +1 -0
- package/dist/pipeline/retrieval/pipeline.js +109 -0
- package/dist/pipeline/retrieval/pipeline.js.map +1 -0
- package/dist/stores/index.d.ts +5 -0
- package/dist/stores/index.d.ts.map +1 -0
- package/dist/stores/index.js +5 -0
- package/dist/stores/index.js.map +1 -0
- package/dist/stores/vector/in-memory.d.ts +52 -0
- package/dist/stores/vector/in-memory.d.ts.map +1 -0
- package/dist/stores/vector/in-memory.js +172 -0
- package/dist/stores/vector/in-memory.js.map +1 -0
- package/dist/stores/vector/index.d.ts +6 -0
- package/dist/stores/vector/index.d.ts.map +1 -0
- package/dist/stores/vector/index.js +5 -0
- package/dist/stores/vector/index.js.map +1 -0
- package/dist/types/index.d.ts +259 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/types/index.js.map +1 -0
- package/docs/DOCUMENT_LOADERS.md +621 -0
- package/docs/EMBEDDINGS.md +733 -0
- package/docs/PIPELINES.md +771 -0
- package/docs/VECTOR_STORES.md +754 -0
- package/package.json +100 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 DCYFR
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
1
|
+
# @dcyfr/ai-rag
|
|
2
|
+
|
|
3
|
+
> **RAG (Retrieval-Augmented Generation) framework for Node.js and TypeScript**
|
|
4
|
+
|
|
5
|
+
Build production-ready RAG systems with document loading, embedding, vector stores, and semantic search.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@dcyfr/ai-rag)
|
|
8
|
+
[](https://www.typescriptlang.org/)
|
|
9
|
+
[](https://opensource.org/licenses/MIT)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## ✨ Features
|
|
14
|
+
|
|
15
|
+
- **📄 Document Loaders** - Load text, markdown, and HTML documents with intelligent chunking
|
|
16
|
+
- **🔢 Embeddings** - Pluggable providers (OpenAI, Cohere, Anthropic, Ollama local)
|
|
17
|
+
- **🗄️ Vector Stores** - In-memory + persistent (Chroma, Pinecone, Weaviate)
|
|
18
|
+
- **🔍 Semantic Retrieval** - Find relevant documents by meaning, not just keywords
|
|
19
|
+
- **🎯 Metadata Filtering** - Complex filters (AND/OR, nested, temporal queries)
|
|
20
|
+
- **⚡ Batch Processing** - Efficient ingestion with progress tracking and error handling
|
|
21
|
+
- **🔄 Hybrid Search** - Combine keyword (BM25) + semantic search for best results
|
|
22
|
+
- **📊 Multiple Distance Metrics** - Cosine similarity, dot product, euclidean
|
|
23
|
+
- **🚀 Production Ready** - Retry logic, monitoring hooks, comprehensive error handling
|
|
24
|
+
- **📚 Complete Documentation** - 4 comprehensive guides + advanced examples
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## 📦 Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npm install @dcyfr/ai-rag
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Optional Dependencies
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# For production embeddings (recommended)
|
|
38
|
+
npm install openai # or anthropic
|
|
39
|
+
|
|
40
|
+
# For persistent vector storage
|
|
41
|
+
npm install chromadb # or pinecone-client or weaviate-client
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## 🚀 Quick Start
|
|
47
|
+
|
|
48
|
+
```typescript
|
|
49
|
+
import {
|
|
50
|
+
TextLoader,
|
|
51
|
+
SimpleEmbeddingGenerator,
|
|
52
|
+
InMemoryVectorStore,
|
|
53
|
+
IngestionPipeline,
|
|
54
|
+
RetrievalPipeline,
|
|
55
|
+
} from '@dcyfr/ai-rag';
|
|
56
|
+
|
|
57
|
+
// 1. Setup components
|
|
58
|
+
const loader = new TextLoader();
|
|
59
|
+
const embedder = new SimpleEmbeddingGenerator({ dimensions: 384 });
|
|
60
|
+
const store = new InMemory VectorStore({
|
|
61
|
+
collectionName: 'my-docs',
|
|
62
|
+
embeddingDimensions: 384,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// 2. Ingest documents
|
|
66
|
+
const ingestion = new IngestionPipeline(loader, embedder, store);
|
|
67
|
+
await ingestion.ingest(['./docs/file1.txt', './docs/file2.txt']);
|
|
68
|
+
|
|
69
|
+
// 3. Query for relevant context
|
|
70
|
+
const retrieval = new RetrievalPipeline(store, embedder);
|
|
71
|
+
const result = await retrieval.query('What is machine learning?', {
|
|
72
|
+
limit: 5,
|
|
73
|
+
threshold: 0.7,
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
console.log(result.context) // Assembled context from top results
|
|
77
|
+
console.log(result.results); // Ranked document chunks with scores
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## 📚 Documentation
|
|
83
|
+
|
|
84
|
+
### Comprehensive Guides
|
|
85
|
+
|
|
86
|
+
Explore our detailed documentation covering all aspects of RAG development:
|
|
87
|
+
|
|
88
|
+
- **[Document Loaders Guide](docs/DOCUMENT_LOADERS.md)** - Complete guide to loading and chunking documents
|
|
89
|
+
- TextLoader, MarkdownLoader, HTMLLoader
|
|
90
|
+
- Chunking strategies (fixed-size, sentence-aware, paragraph-based, semantic)
|
|
91
|
+
- Custom loaders and streaming
|
|
92
|
+
|
|
93
|
+
- **[Embeddings Guide](docs/EMBEDDINGS.md)** - Vector embedding providers and techniques
|
|
94
|
+
- OpenAI, Cohere, Anthropic, Ollama (local)
|
|
95
|
+
- Batch processing and caching
|
|
96
|
+
- Similarity metrics explained
|
|
97
|
+
|
|
98
|
+
- **[Vector Stores Guide](docs/VECTOR_STORES.md)** - Storage and retrieval optimization
|
|
99
|
+
- InMemoryVectorStore, ChromaVectorStore, PineconeVectorStore, WeaviateVectorStore
|
|
100
|
+
- Metadata filtering (AND/OR, nested queries)
|
|
101
|
+
- Performance optimization (batching, ANN search)
|
|
102
|
+
|
|
103
|
+
- **[Pipelines Guide](docs/PIPELINES.md)** - End-to-end RAG workflows
|
|
104
|
+
- Ingestion pipeline (load → chunk → embed → store)
|
|
105
|
+
- Retrieval pipeline (query → search → assemble context)
|
|
106
|
+
- Production patterns (hybrid search, re-ranking, error handling)
|
|
107
|
+
|
|
108
|
+
### Quick Reference
|
|
109
|
+
|
|
110
|
+
**Document Loaders** - Load and chunk documents
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
import { TextLoader } from '@dcyfr/ai-rag';
|
|
114
|
+
|
|
115
|
+
const loader = new TextLoader();
|
|
116
|
+
const docs = await loader.load('./document.txt', {
|
|
117
|
+
chunkSize: 1000,
|
|
118
|
+
chunkOverlap: 200,
|
|
119
|
+
});
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**MarkdownLoader** - Load markdown files (`.md`)
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { MarkdownLoader } from '@dcyfr/ai-rag';
|
|
126
|
+
|
|
127
|
+
const loader = new MarkdownLoader();
|
|
128
|
+
const docs = await loader.load('./README.md', {
|
|
129
|
+
chunkSize: 800,
|
|
130
|
+
chunkOverlap: 150,
|
|
131
|
+
});
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**HTMLLoader** - Load HTML files (`.html`)
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
import { HTMLLoader } from '@dcyfr/ai-rag';
|
|
138
|
+
|
|
139
|
+
const loader = new HTMLLoader();
|
|
140
|
+
const docs = await loader.load('./page.html', {
|
|
141
|
+
chunkSize: 600,
|
|
142
|
+
chunkOverlap: 100,
|
|
143
|
+
});
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Embedding Generators
|
|
147
|
+
|
|
148
|
+
**SimpleEmbeddingGenerator** - Placeholder embeddings (for development/testing)
|
|
149
|
+
|
|
150
|
+
```typescript
|
|
151
|
+
import { SimpleEmbeddingGenerator } from '@dcyfr/ai-rag';
|
|
152
|
+
|
|
153
|
+
const embedder = new SimpleEmbeddingGenerator({ dimensions: 384 });
|
|
154
|
+
const embeddings = await embedder.embed(['text 1', 'text 2']);
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
⚠️ **Production Note:** Use real embedding models in production:
|
|
158
|
+
- OpenAI `text-embedding-3-small` (1536 dimensions)
|
|
159
|
+
- Cohere `embed-english-v3.0`
|
|
160
|
+
- Local models via Ollama
|
|
161
|
+
|
|
162
|
+
### Vector Stores
|
|
163
|
+
|
|
164
|
+
**InMemoryVectorStore** - Fast in-memory storage
|
|
165
|
+
|
|
166
|
+
```typescript
|
|
167
|
+
import { InMemoryVectorStore } from '@dcyfr/ai-rag';
|
|
168
|
+
|
|
169
|
+
const store = new InMemoryVectorStore({
|
|
170
|
+
collectionName: 'docs',
|
|
171
|
+
embeddingDimensions: 384,
|
|
172
|
+
distanceMetric: 'cosine', // 'cosine' | 'dot' | 'euclidean'
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Add documents
|
|
176
|
+
await store.addDocuments(chunks);
|
|
177
|
+
|
|
178
|
+
// Search
|
|
179
|
+
const results = await store.search(queryEmbedding, 10);
|
|
180
|
+
|
|
181
|
+
// Filter by metadata
|
|
182
|
+
const filtered = await store.search(queryEmbedding, 10, {
|
|
183
|
+
field: 'category',
|
|
184
|
+
operator: 'eq',
|
|
185
|
+
value: 'documentation',
|
|
186
|
+
});
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Ingestion Pipeline
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
import { IngestionPipeline } from '@dcyfr/ai-rag';
|
|
193
|
+
|
|
194
|
+
const pipeline = new IngestionPipeline(loader, embedder, store);
|
|
195
|
+
|
|
196
|
+
const result = await pipeline.ingest(['./docs/'], {
|
|
197
|
+
batchSize: 32,
|
|
198
|
+
onProgress: (current, total, details) => {
|
|
199
|
+
console.log(`Processing ${current}/${total}`);
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
console.log(`Processed ${result.documentsProcessed} documents`);
|
|
204
|
+
console.log(`Generated ${result.chunksGenerated} chunks`);
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Retrieval Pipeline
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
import { RetrievalPipeline } from '@dcyfr/ai-rag';
|
|
211
|
+
|
|
212
|
+
const pipeline = new RetrievalPipeline(store, embedder);
|
|
213
|
+
|
|
214
|
+
// Semantic search
|
|
215
|
+
const result = await pipeline.query('your question here', {
|
|
216
|
+
limit: 5,
|
|
217
|
+
threshold: 0.7,
|
|
218
|
+
includeMetadata: true,
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
console.log(result.context); // Assembled context
|
|
222
|
+
console.log(result.results); // Ranked results
|
|
223
|
+
console.log(result.metadata); // Query metadata
|
|
224
|
+
|
|
225
|
+
// Find similar documents
|
|
226
|
+
const similar = await pipeline.findSimilar('doc-id-123', { limit: 10 });
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## 💡 Examples
|
|
232
|
+
|
|
233
|
+
### Basic Examples
|
|
234
|
+
|
|
235
|
+
- **[Basic RAG](examples/basic-rag/)** - Simple document ingestion and retrieval workflow
|
|
236
|
+
- **[Semantic Search](examples/semantic-search/)** - Advanced search with metadata filtering
|
|
237
|
+
- **[Q&A System](examples/qa-system/)** - Question answering with context assembly
|
|
238
|
+
|
|
239
|
+
### Advanced Examples
|
|
240
|
+
|
|
241
|
+
- **[Advanced RAG](examples/advanced-rag/)** - Production-ready workflow with:
|
|
242
|
+
- OpenAI embeddings for semantic search
|
|
243
|
+
- Chroma persistent vector store
|
|
244
|
+
- Metadata filtering with multiple criteria
|
|
245
|
+
- Progress tracking and error handling
|
|
246
|
+
- Question answering with context
|
|
247
|
+
|
|
248
|
+
- **[Metadata Filtering](examples/metadata-filtering/)** - Complex query scenarios:
|
|
249
|
+
- AND/OR filter combinations
|
|
250
|
+
- Nested complex filters
|
|
251
|
+
- Temporal queries (date ranges)
|
|
252
|
+
- Tag-based search with arrays
|
|
253
|
+
- Multi-field filtering
|
|
254
|
+
|
|
255
|
+
- **[Hybrid Search](examples/hybrid-search/)** - Combine keyword + semantic:
|
|
256
|
+
- BM25 keyword search implementation
|
|
257
|
+
- Weighted score fusion
|
|
258
|
+
- Reciprocal rank fusion (RRF)
|
|
259
|
+
- Performance comparisons
|
|
260
|
+
|
|
261
|
+
### Running Examples
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
# Basic examples
|
|
265
|
+
npm run example:basic-rag
|
|
266
|
+
npm run example:semantic-search
|
|
267
|
+
npm run example:qa-system
|
|
268
|
+
|
|
269
|
+
# Advanced examples
|
|
270
|
+
npm run example:advanced-rag
|
|
271
|
+
npm run example:metadata-filtering
|
|
272
|
+
npm run example:hybrid-search
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
---
|
|
276
|
+
|
|
277
|
+
## 🏗️ Architecture
|
|
278
|
+
|
|
279
|
+
```
|
|
280
|
+
┌─────────────┐
|
|
281
|
+
│ Documents │
|
|
282
|
+
└──────┬──────┘
|
|
283
|
+
│
|
|
284
|
+
▼
|
|
285
|
+
┌─────────────┐
|
|
286
|
+
│ Loaders │ (Text, Markdown, HTML)
|
|
287
|
+
└──────┬──────┘
|
|
288
|
+
│
|
|
289
|
+
▼
|
|
290
|
+
┌─────────────┐
|
|
291
|
+
│ Chunking │ (Size + overlap)
|
|
292
|
+
└──────┬──────┘
|
|
293
|
+
│
|
|
294
|
+
▼
|
|
295
|
+
┌─────────────┐
|
|
296
|
+
│ Embeddings │ (Vector generation)
|
|
297
|
+
└──────┬──────┘
|
|
298
|
+
│
|
|
299
|
+
▼
|
|
300
|
+
┌─────────────┐
|
|
301
|
+
│ Vector Store│ (In-memory or persistent)
|
|
302
|
+
└──────┬──────┘
|
|
303
|
+
│
|
|
304
|
+
▼
|
|
305
|
+
┌─────────────┐
|
|
306
|
+
│ Retrieval │ (Semantic search)
|
|
307
|
+
└──────┬──────┘
|
|
308
|
+
│
|
|
309
|
+
▼
|
|
310
|
+
┌─────────────┐
|
|
311
|
+
│ Context │ (Assembled results)
|
|
312
|
+
└─────────────┘
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## 💡 Best Practices
|
|
318
|
+
|
|
319
|
+
### Chunking Strategy
|
|
320
|
+
|
|
321
|
+
**Choose appropriate chunk sizes:**
|
|
322
|
+
- Technical documentation: 800-1200 characters
|
|
323
|
+
- Blog posts/articles: 1000-1500 characters
|
|
324
|
+
- Code documentation: 600-1000 characters
|
|
325
|
+
- Q&A pairs: 400-800 characters
|
|
326
|
+
|
|
327
|
+
**Use 15-20% overlap:**
|
|
328
|
+
```typescript
|
|
329
|
+
const loader = new TextLoader();
|
|
330
|
+
const docs = await loader.load('./document.txt', {
|
|
331
|
+
chunkSize: 1000,
|
|
332
|
+
chunkOverlap: 200, // 20% overlap prevents context loss at boundaries
|
|
333
|
+
});
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
**Preserve document structure:**
|
|
337
|
+
- Use MarkdownLoader for `.md` files (preserves headings, code blocks)
|
|
338
|
+
- Use HTMLLoader for web pages (extracts main content, excludes nav/footer)
|
|
339
|
+
- Add rich metadata (source, category, tags, dates, author)
|
|
340
|
+
|
|
341
|
+
### Embedding Selection
|
|
342
|
+
|
|
343
|
+
**Development/Testing:**
|
|
344
|
+
- SimpleEmbeddingGenerator (fast, no API costs, not for production)
|
|
345
|
+
|
|
346
|
+
**Production (Recommended):**
|
|
347
|
+
- OpenAI `text-embedding-3-small` (1536 dim, $0.02/1M tokens, fast, good quality)
|
|
348
|
+
- OpenAI `text-embedding-3-large` (3072 dim, best quality, higher cost)
|
|
349
|
+
- Cohere `embed-english-v3.0` (1024 dim, multilingual support)
|
|
350
|
+
- Ollama local models (no API costs, data privacy, requires GPU)
|
|
351
|
+
|
|
352
|
+
**Critical:** Use the same embedder for both documents and queries!
|
|
353
|
+
|
|
354
|
+
### Search Optimization
|
|
355
|
+
|
|
356
|
+
**Set appropriate similarity thresholds:**
|
|
357
|
+
```typescript
|
|
358
|
+
const result = await pipeline.query('search query', {
|
|
359
|
+
limit: 10,
|
|
360
|
+
threshold: 0.7, // Filter results with score < 0.7 (adjust 0.6-0.8 based on needs)
|
|
361
|
+
});
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
**Use metadata filtering to narrow search space:**
|
|
365
|
+
```typescript
|
|
366
|
+
const result = await pipeline.query('search query', {
|
|
367
|
+
limit: 5,
|
|
368
|
+
filter: {
|
|
369
|
+
operator: 'and',
|
|
370
|
+
filters: [
|
|
371
|
+
{ field: 'category', operator: 'eq', value: 'technical' },
|
|
372
|
+
{ field: 'published', operator: 'gte', value: '2024-01-01' },
|
|
373
|
+
],
|
|
374
|
+
},
|
|
375
|
+
});
|
|
376
|
+
```
|
|
377
|
+
|
|
378
|
+
**For large collections (>100k documents):**
|
|
379
|
+
- Use persistent vector stores (Chroma, Pinecone, Weaviate)
|
|
380
|
+
- Enable Approximate Nearest Neighbor (ANN) search
|
|
381
|
+
- Implement caching for frequent queries
|
|
382
|
+
|
|
383
|
+
---
|
|
384
|
+
|
|
385
|
+
## 🔧 Troubleshooting
|
|
386
|
+
|
|
387
|
+
### Poor Search Results
|
|
388
|
+
|
|
389
|
+
**Problem:** Retrieved context not relevant to query
|
|
390
|
+
|
|
391
|
+
**Solutions:**
|
|
392
|
+
1. Verify using same embedder for docs and queries
|
|
393
|
+
2. Increase similarity threshold (0.75-0.8 for higher quality)
|
|
394
|
+
3. Test embedding quality:
|
|
395
|
+
```typescript
|
|
396
|
+
const [ml, ai, pizza] = await embedder.embed(['machine learning', 'artificial intelligence', 'pizza']);
|
|
397
|
+
const similarity = cosineSimilarity(ml, ai); // Should be >0.7
|
|
398
|
+
const unrelated = cosineSimilarity(ml, pizza); // Should be <0.3
|
|
399
|
+
```
|
|
400
|
+
4. Adjust chunk size (smaller chunks = more precise, larger = more context)
|
|
401
|
+
5. Add metadata filters to narrow search space
|
|
402
|
+
|
|
403
|
+
### High API Costs
|
|
404
|
+
|
|
405
|
+
**Problem:** Embedding API costs too high
|
|
406
|
+
|
|
407
|
+
**Solutions:**
|
|
408
|
+
1. Implement caching for frequent queries:
|
|
409
|
+
```typescript
|
|
410
|
+
const cache = new LRUCache<string, number[]>({ max: 10000, ttl: 1000 * 60 * 60 });
|
|
411
|
+
|
|
412
|
+
async function embedWithCache(text: string): Promise<number[]> {
|
|
413
|
+
const cached = cache.get(text);
|
|
414
|
+
if (cached) return cached;
|
|
415
|
+
|
|
416
|
+
const [embedding] = await embedder.embed([text]);
|
|
417
|
+
cache.set(text, embedding);
|
|
418
|
+
return embedding;
|
|
419
|
+
}
|
|
420
|
+
```
|
|
421
|
+
2. Use smaller embedding dimensions (OpenAI supports 512, 1024, 1536)
|
|
422
|
+
3. Switch to local models (Ollama) for development/testing
|
|
423
|
+
4. Batch process documents (100+ at a time) to reduce API calls
|
|
424
|
+
|
|
425
|
+
### Slow Performance
|
|
426
|
+
|
|
427
|
+
**Problem:** Search or ingestion too slow
|
|
428
|
+
|
|
429
|
+
**Solutions:**
|
|
430
|
+
1. **For ingestion:**
|
|
431
|
+
- Increase batch size: `{ batchSize: 100 }`
|
|
432
|
+
- Process files in parallel (use Promise.all with batches)
|
|
433
|
+
- Use streaming loader for huge files
|
|
434
|
+
|
|
435
|
+
2. **For search:**
|
|
436
|
+
- Reduce result limit: `{ limit: 5 }` instead of 50
|
|
437
|
+
- Use metadata filters to narrow search space
|
|
438
|
+
- Enable ANN search for collections >100k:
|
|
439
|
+
```typescript
|
|
440
|
+
const store = new InMemoryVectorStore({
|
|
441
|
+
useApproximateSearch: true,
|
|
442
|
+
approximationParams: { nprobe: 10, nlist: 100 },
|
|
443
|
+
});
|
|
444
|
+
```
|
|
445
|
+
- Use persistent vector stores with indexing (Pinecone, Weaviate)
|
|
446
|
+
|
|
447
|
+
### Memory Issues
|
|
448
|
+
|
|
449
|
+
**Problem:** Application crashes with large document collections
|
|
450
|
+
|
|
451
|
+
**Solutions:**
|
|
452
|
+
1. Use persistent vector stores instead of in-memory
|
|
453
|
+
2. Set maxDocuments limit with LRU eviction:
|
|
454
|
+
```typescript
|
|
455
|
+
const store = new InMemoryVectorStore({
|
|
456
|
+
maxDocuments: 100000,
|
|
457
|
+
evictionPolicy: 'lru',
|
|
458
|
+
});
|
|
459
|
+
```
|
|
460
|
+
3. Process documents in smaller batches
|
|
461
|
+
4. Use streaming loader for large files
|
|
462
|
+
|
|
463
|
+
---
|
|
464
|
+
|
|
465
|
+
## 🧪 Development
|
|
466
|
+
|
|
467
|
+
```bash
|
|
468
|
+
# Install dependencies
|
|
469
|
+
npm install
|
|
470
|
+
|
|
471
|
+
# Build
|
|
472
|
+
npm run build
|
|
473
|
+
|
|
474
|
+
# Test
|
|
475
|
+
npm run test:run
|
|
476
|
+
|
|
477
|
+
# Watch mode
|
|
478
|
+
npm run test:watch
|
|
479
|
+
|
|
480
|
+
# Coverage
|
|
481
|
+
npm run test:coverage
|
|
482
|
+
|
|
483
|
+
# Lint
|
|
484
|
+
npm run lint
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## 🔧 Production Setup
|
|
490
|
+
|
|
491
|
+
### 1. Use Real Embedding Models
|
|
492
|
+
|
|
493
|
+
```typescript
|
|
494
|
+
import OpenAI from 'openai';
|
|
495
|
+
|
|
496
|
+
class OpenAIEmbeddingGenerator implements EmbeddingGenerator {
|
|
497
|
+
private client: OpenAI;
|
|
498
|
+
|
|
499
|
+
constructor(apiKey: string) {
|
|
500
|
+
this.client = new OpenAI({ apiKey });
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
504
|
+
const response = await this.client.embeddings.create({
|
|
505
|
+
model: 'text-embedding-3-small',
|
|
506
|
+
input: texts,
|
|
507
|
+
});
|
|
508
|
+
return response.data.map((d) => d.embedding);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
getDimensions(): number {
|
|
512
|
+
return 1536;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
### 2. Use Persistent Vector Stores
|
|
518
|
+
|
|
519
|
+
```typescript
|
|
520
|
+
import { ChromaClient } from 'chromadb';
|
|
521
|
+
|
|
522
|
+
// Initialize Chroma for persistent storage
|
|
523
|
+
const client = new ChromaClient({ path: './chroma-data' });
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
### 3. Add Production Monitoring
|
|
527
|
+
|
|
528
|
+
```typescript
|
|
529
|
+
const result = await ingestion.ingest(files, {
|
|
530
|
+
onProgress: (current, total, details) => {
|
|
531
|
+
// Send metrics to monitoring service
|
|
532
|
+
metrics.gauge('rag.ingestion.progress', current / total);
|
|
533
|
+
logger.info({ current, total, details }, 'Ingestion progress');
|
|
534
|
+
},
|
|
535
|
+
});
|
|
536
|
+
```
|
|
537
|
+
|
|
538
|
+
---
|
|
539
|
+
|
|
540
|
+
## �️ Roadmap
|
|
541
|
+
|
|
542
|
+
### v1.1 (Planned)
|
|
543
|
+
- [ ] Additional vector stores (Qdrant, Milvus)
|
|
544
|
+
- [ ] Streaming ingestion pipeline
|
|
545
|
+
- [ ] Built-in caching layer
|
|
546
|
+
- [ ] Query expansion and synonyms
|
|
547
|
+
- [ ] Document versioning and updates
|
|
548
|
+
|
|
549
|
+
### v1.2 (Planned)
|
|
550
|
+
- [ ] Hybrid search (keyword + semantic) built-in
|
|
551
|
+
- [ ] Re-ranking strategies (cross-encoder models)
|
|
552
|
+
- [ ] Multi-query retrieval
|
|
553
|
+
- [ ] Sparse + dense vector support
|
|
554
|
+
- [ ] Advanced chunking (recursive, semantic)
|
|
555
|
+
|
|
556
|
+
### v2.0 (Future)
|
|
557
|
+
- [ ] Distributed vector search
|
|
558
|
+
- [ ] Graph RAG (knowledge graphs + vectors)
|
|
559
|
+
- [ ] Multi-modal embeddings (text + images)
|
|
560
|
+
- [ ] Real-time indexing
|
|
561
|
+
- [ ] Auto-tuning (chunk size, thresholds)
|
|
562
|
+
|
|
563
|
+
See our [GitHub Issues](https://github.com/dcyfr/ai-rag/issues) for feature requests and progress.
|
|
564
|
+
|
|
565
|
+
---
|
|
566
|
+
|
|
567
|
+
## �📄 License
|
|
568
|
+
|
|
569
|
+
MIT © [DCYFR](https://www.dcyfr.ai)
|
|
570
|
+
|
|
571
|
+
---
|
|
572
|
+
|
|
573
|
+
## 🤝 Contributing
|
|
574
|
+
|
|
575
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines.
|
|
576
|
+
|
|
577
|
+
---
|
|
578
|
+
|
|
579
|
+
## 🔗 Links
|
|
580
|
+
|
|
581
|
+
- [Website](https://www.dcyfr.ai)
|
|
582
|
+
- [Documentation](https://www.dcyfr.ai/docs/ai-rag)
|
|
583
|
+
- [GitHub](https://github.com/dcyfr/ai-rag)
|
|
584
|
+
- [npm](https://www.npmjs.com/package/@dcyfr/ai-rag)
|
|
585
|
+
|
|
586
|
+
---
|
|
587
|
+
|
|
588
|
+
Built with ❤️ by the DCYFR team
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAGH,mBAAmB,kBAAkB,CAAC;AAGtC,cAAc,oBAAoB,CAAC;AAGnC,cAAc,mBAAmB,CAAC;AAGlC,cAAc,qBAAqB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,UAAU;AACV,cAAc,oBAAoB,CAAC;AAEnC,SAAS;AACT,cAAc,mBAAmB,CAAC;AAElC,YAAY;AACZ,cAAc,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML document loader
|
|
3
|
+
* Handles HTML files (.html, .htm)
|
|
4
|
+
*/
|
|
5
|
+
import type { Document, DocumentLoader, LoaderConfig } from '../../types/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Load HTML documents
|
|
8
|
+
*/
|
|
9
|
+
export declare class HTMLLoader implements DocumentLoader {
|
|
10
|
+
supportedExtensions: string[];
|
|
11
|
+
load(source: string, config?: LoaderConfig): Promise<Document[]>;
|
|
12
|
+
/**
|
|
13
|
+
* Extract text content from HTML
|
|
14
|
+
* This is a simple implementation - for production use a proper HTML parser
|
|
15
|
+
*/
|
|
16
|
+
private extractText;
|
|
17
|
+
/**
|
|
18
|
+
* Split document into chunks
|
|
19
|
+
*/
|
|
20
|
+
private chunkDocument;
|
|
21
|
+
/**
|
|
22
|
+
* Generate document ID from source
|
|
23
|
+
*/
|
|
24
|
+
private generateId;
|
|
25
|
+
}
|
|
26
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/loaders/html/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAInF;;GAEG;AACH,qBAAa,UAAW,YAAW,cAAc;IAC/C,mBAAmB,WAAqB;IAElC,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAsCtE;;;OAGG;IACH,OAAO,CAAC,WAAW;IAsBnB;;OAEG;IACH,OAAO,CAAC,aAAa;IAgCrB;;OAEG;IACH,OAAO,CAAC,UAAU;CAGnB"}
|