@hazeljs/rag 0.2.0-beta.8 β 0.2.0-beta.81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +192 -21
- package/README.md +348 -223
- package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
- package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-detector.test.js +87 -0
- package/dist/__tests__/graph/community-detector.test.js.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
- package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
- package/dist/__tests__/graph/community-summarizer.test.js +131 -0
- package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
- package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
- package/dist/__tests__/graph/entity-extractor.test.js +129 -0
- package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
- package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
- package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
- package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
- package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/base.loader.test.js +114 -0
- package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
- package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/directory.loader.test.js +154 -0
- package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
- package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
- package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
- package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
- package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
- package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
- package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
- package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
- package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
- package/dist/__tests__/rag-pipeline.test.js +210 -0
- package/dist/__tests__/rag-pipeline.test.js.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
- package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/bm25.test.js +86 -0
- package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
- package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
- package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
- package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
- package/dist/__tests__/retrieval/multi-query.test.js +90 -0
- package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
- package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
- package/dist/__tests__/utils/similarity.test.d.ts +2 -0
- package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
- package/dist/__tests__/utils/similarity.test.js +47 -0
- package/dist/__tests__/utils/similarity.test.js.map +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +1 -0
- package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js +4 -15
- package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -1
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts +1 -0
- package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/corrective-rag.decorator.js +7 -11
- package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -1
- package/dist/agentic/decorators/hyde.decorator.d.ts +1 -0
- package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/hyde.decorator.js +7 -6
- package/dist/agentic/decorators/hyde.decorator.js.map +1 -1
- package/dist/agentic/decorators/multi-hop.decorator.d.ts +1 -0
- package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/multi-hop.decorator.js +8 -19
- package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -1
- package/dist/agentic/decorators/query-planner.decorator.d.ts +1 -0
- package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/query-planner.decorator.js +4 -18
- package/dist/agentic/decorators/query-planner.decorator.js.map +1 -1
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts +1 -0
- package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/query-rewriter.decorator.js +8 -5
- package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -1
- package/dist/agentic/decorators/self-reflective.decorator.d.ts +2 -0
- package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -1
- package/dist/agentic/decorators/self-reflective.decorator.js +11 -32
- package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -1
- package/dist/agentic/index.d.ts +1 -1
- package/dist/agentic/index.d.ts.map +1 -1
- package/dist/agentic/types.d.ts +3 -2
- package/dist/agentic/types.d.ts.map +1 -1
- package/dist/graph/community-detector.d.ts +45 -0
- package/dist/graph/community-detector.d.ts.map +1 -0
- package/dist/graph/community-detector.js +153 -0
- package/dist/graph/community-detector.js.map +1 -0
- package/dist/graph/community-summarizer.d.ts +41 -0
- package/dist/graph/community-summarizer.d.ts.map +1 -0
- package/dist/graph/community-summarizer.js +119 -0
- package/dist/graph/community-summarizer.js.map +1 -0
- package/dist/graph/entity-extractor.d.ts +47 -0
- package/dist/graph/entity-extractor.d.ts.map +1 -0
- package/dist/graph/entity-extractor.js +224 -0
- package/dist/graph/entity-extractor.js.map +1 -0
- package/dist/graph/graph-rag-pipeline.d.ts +83 -0
- package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
- package/dist/graph/graph-rag-pipeline.js +390 -0
- package/dist/graph/graph-rag-pipeline.js.map +1 -0
- package/dist/graph/graph.types.d.ts +186 -0
- package/dist/graph/graph.types.d.ts.map +1 -0
- package/dist/graph/graph.types.js +20 -0
- package/dist/graph/graph.types.js.map +1 -0
- package/dist/graph/index.d.ts +15 -0
- package/dist/graph/index.d.ts.map +1 -0
- package/dist/graph/index.js +31 -0
- package/dist/graph/index.js.map +1 -0
- package/dist/graph/knowledge-graph.d.ts +57 -0
- package/dist/graph/knowledge-graph.d.ts.map +1 -0
- package/dist/graph/knowledge-graph.js +198 -0
- package/dist/graph/knowledge-graph.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/loaders/base.loader.d.ts +108 -0
- package/dist/loaders/base.loader.d.ts.map +1 -0
- package/dist/loaders/base.loader.js +123 -0
- package/dist/loaders/base.loader.js.map +1 -0
- package/dist/loaders/csv-file.loader.d.ts +61 -0
- package/dist/loaders/csv-file.loader.d.ts.map +1 -0
- package/dist/loaders/csv-file.loader.js +162 -0
- package/dist/loaders/csv-file.loader.js.map +1 -0
- package/dist/loaders/directory.loader.d.ts +67 -0
- package/dist/loaders/directory.loader.d.ts.map +1 -0
- package/dist/loaders/directory.loader.js +163 -0
- package/dist/loaders/directory.loader.js.map +1 -0
- package/dist/loaders/docx.loader.d.ts +52 -0
- package/dist/loaders/docx.loader.d.ts.map +1 -0
- package/dist/loaders/docx.loader.js +110 -0
- package/dist/loaders/docx.loader.js.map +1 -0
- package/dist/loaders/github.loader.d.ts +114 -0
- package/dist/loaders/github.loader.d.ts.map +1 -0
- package/dist/loaders/github.loader.js +217 -0
- package/dist/loaders/github.loader.js.map +1 -0
- package/dist/loaders/html-file.loader.d.ts +55 -0
- package/dist/loaders/html-file.loader.d.ts.map +1 -0
- package/dist/loaders/html-file.loader.js +170 -0
- package/dist/loaders/html-file.loader.js.map +1 -0
- package/dist/loaders/index.d.ts +52 -0
- package/dist/loaders/index.d.ts.map +1 -0
- package/dist/loaders/index.js +61 -0
- package/dist/loaders/index.js.map +1 -0
- package/dist/loaders/json-file.loader.d.ts +51 -0
- package/dist/loaders/json-file.loader.d.ts.map +1 -0
- package/dist/loaders/json-file.loader.js +100 -0
- package/dist/loaders/json-file.loader.js.map +1 -0
- package/dist/loaders/markdown-file.loader.d.ts +61 -0
- package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
- package/dist/loaders/markdown-file.loader.js +148 -0
- package/dist/loaders/markdown-file.loader.js.map +1 -0
- package/dist/loaders/pdf.loader.d.ts +64 -0
- package/dist/loaders/pdf.loader.d.ts.map +1 -0
- package/dist/loaders/pdf.loader.js +163 -0
- package/dist/loaders/pdf.loader.js.map +1 -0
- package/dist/loaders/text-file.loader.d.ts +39 -0
- package/dist/loaders/text-file.loader.d.ts.map +1 -0
- package/dist/loaders/text-file.loader.js +69 -0
- package/dist/loaders/text-file.loader.js.map +1 -0
- package/dist/loaders/web.loader.d.ts +87 -0
- package/dist/loaders/web.loader.d.ts.map +1 -0
- package/dist/loaders/web.loader.js +194 -0
- package/dist/loaders/web.loader.js.map +1 -0
- package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
- package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
- package/dist/loaders/youtube-transcript.loader.js +254 -0
- package/dist/loaders/youtube-transcript.loader.js.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
- package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
- package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
- package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
- package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/hyde.prompt.js +18 -0
- package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
- package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
- package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
- package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-planner.prompt.js +30 -0
- package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
- package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
- package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
- package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
- package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
- package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
- package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
- package/dist/prompts/community-summary.prompt.d.ts +9 -0
- package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
- package/dist/prompts/community-summary.prompt.js +30 -0
- package/dist/prompts/community-summary.prompt.js.map +1 -0
- package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
- package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
- package/dist/prompts/entity-extraction.prompt.js +39 -0
- package/dist/prompts/entity-extraction.prompt.js.map +1 -0
- package/dist/prompts/graph-search.prompt.d.ts +10 -0
- package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
- package/dist/prompts/graph-search.prompt.js +23 -0
- package/dist/prompts/graph-search.prompt.js.map +1 -0
- package/dist/prompts/index.d.ts +13 -0
- package/dist/prompts/index.d.ts.map +1 -0
- package/dist/prompts/index.js +29 -0
- package/dist/prompts/index.js.map +1 -0
- package/dist/prompts/rag-answer.prompt.d.ts +9 -0
- package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
- package/dist/prompts/rag-answer.prompt.js +20 -0
- package/dist/prompts/rag-answer.prompt.js.map +1 -0
- package/dist/rag.service.d.ts +1 -0
- package/dist/rag.service.d.ts.map +1 -1
- package/dist/rag.service.js +8 -9
- package/dist/rag.service.js.map +1 -1
- package/dist/vector-stores/qdrant.store.d.ts +2 -0
- package/dist/vector-stores/qdrant.store.d.ts.map +1 -1
- package/dist/vector-stores/qdrant.store.js +1 -0
- package/dist/vector-stores/qdrant.store.js.map +1 -1
- package/package.json +64 -6
package/README.md
CHANGED
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
# @hazeljs/rag
|
|
2
2
|
|
|
3
|
-
**
|
|
3
|
+
**Your docs. Your data. AI that actually knows them.**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Load documents from any source, build a knowledge graph, embed into vector stores, and retrieve answers with semantic, hybrid, or graph-based search. Full RAG + GraphRAG pipeline β no PhD required.
|
|
6
|
+
|
|
7
|
+
[](https://www.npmjs.com/package/@hazeljs/rag)
|
|
8
|
+
[](https://www.npmjs.com/package/@hazeljs/rag)
|
|
9
|
+
[](https://www.apache.org/licenses/LICENSE-2.0)
|
|
6
10
|
|
|
7
11
|
## Features
|
|
8
12
|
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
13
|
+
- π **11 Document Loaders** β TXT, Markdown, JSON, CSV, HTML, PDF, DOCX, web scraping, YouTube transcripts, GitHub repos, and inline text. All return the same `Document[]` interface.
|
|
14
|
+
- πΈοΈ **GraphRAG** β Extract entities and relationships from documents, build a knowledge graph, detect communities, and answer questions with entity-centric (local), thematic (global), or hybrid search.
|
|
15
|
+
- π **Vector Search** β Semantic similarity search with configurable embeddings and vector stores
|
|
16
|
+
- π€ **RAG Pipeline** β Complete load β split β embed β retrieve β augment workflow
|
|
17
|
+
- π― **Multiple Strategies** β Similarity, Hybrid (vector + BM25), Multi-Query retrieval
|
|
18
|
+
- π **5 Vector Stores** β Memory, Pinecone, Qdrant, Weaviate, ChromaDB (unified interface)
|
|
19
|
+
- π **Embedding Providers** β OpenAI and Cohere, easily extensible
|
|
20
|
+
- βοΈ **Smart Text Splitting** β Recursive, character, and token splitters
|
|
21
|
+
- π **Metadata Filtering** β Filter results by any metadata field
|
|
22
|
+
- π§ **Memory System** β Conversation history, entity memory, fact storage, working memory
|
|
23
|
+
|
|
24
|
+
---
|
|
17
25
|
|
|
18
26
|
## Installation
|
|
19
27
|
|
|
@@ -21,28 +29,34 @@ Build powerful AI applications with semantic search, document retrieval, and LLM
|
|
|
21
29
|
npm install @hazeljs/rag
|
|
22
30
|
```
|
|
23
31
|
|
|
24
|
-
### Optional
|
|
32
|
+
### Optional peer dependencies
|
|
25
33
|
|
|
26
|
-
Install
|
|
34
|
+
Install only what you need:
|
|
27
35
|
|
|
28
36
|
```bash
|
|
29
|
-
#
|
|
37
|
+
# LLM (required for GraphRAG and RAG query synthesis)
|
|
30
38
|
npm install openai
|
|
31
39
|
|
|
32
|
-
# Vector
|
|
33
|
-
npm install @pinecone-database/pinecone
|
|
34
|
-
npm install weaviate-ts-client # Weaviate
|
|
40
|
+
# Vector stores
|
|
41
|
+
npm install @pinecone-database/pinecone # Pinecone
|
|
35
42
|
npm install @qdrant/js-client-rest # Qdrant
|
|
43
|
+
npm install weaviate-ts-client # Weaviate
|
|
36
44
|
npm install chromadb # ChromaDB
|
|
37
45
|
|
|
38
|
-
#
|
|
39
|
-
npm install cohere-ai
|
|
40
|
-
|
|
46
|
+
# Alternative embedding providers
|
|
47
|
+
npm install cohere-ai
|
|
48
|
+
|
|
49
|
+
# Document loaders
|
|
50
|
+
npm install pdf-parse # PdfLoader
|
|
51
|
+
npm install mammoth # DocxLoader
|
|
52
|
+
npm install cheerio # HtmlFileLoader / WebLoader CSS selectors
|
|
41
53
|
```
|
|
42
54
|
|
|
55
|
+
---
|
|
56
|
+
|
|
43
57
|
## Quick Start
|
|
44
58
|
|
|
45
|
-
### Basic RAG
|
|
59
|
+
### Basic RAG pipeline
|
|
46
60
|
|
|
47
61
|
```typescript
|
|
48
62
|
import {
|
|
@@ -50,271 +64,400 @@ import {
|
|
|
50
64
|
MemoryVectorStore,
|
|
51
65
|
OpenAIEmbeddings,
|
|
52
66
|
RecursiveTextSplitter,
|
|
67
|
+
DirectoryLoader,
|
|
53
68
|
} from '@hazeljs/rag';
|
|
54
69
|
|
|
55
|
-
|
|
56
|
-
const embeddings = new OpenAIEmbeddings({
|
|
57
|
-
apiKey: process.env.OPENAI_API_KEY!,
|
|
58
|
-
model: 'text-embedding-3-small',
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
// 2. Setup vector store
|
|
70
|
+
const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY });
|
|
62
71
|
const vectorStore = new MemoryVectorStore(embeddings);
|
|
63
72
|
|
|
64
|
-
// 3. Setup text splitter
|
|
65
|
-
const textSplitter = new RecursiveTextSplitter({
|
|
66
|
-
chunkSize: 1000,
|
|
67
|
-
chunkOverlap: 200,
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
// 4. Create RAG pipeline
|
|
71
73
|
const rag = new RAGPipeline({
|
|
72
74
|
vectorStore,
|
|
73
75
|
embeddingProvider: embeddings,
|
|
74
|
-
textSplitter,
|
|
76
|
+
textSplitter: new RecursiveTextSplitter({ chunkSize: 800, chunkOverlap: 150 }),
|
|
75
77
|
topK: 5,
|
|
76
78
|
});
|
|
77
|
-
|
|
78
|
-
// 5. Initialize
|
|
79
79
|
await rag.initialize();
|
|
80
80
|
|
|
81
|
-
//
|
|
82
|
-
await
|
|
83
|
-
|
|
84
|
-
content: 'HazelJS is a modern TypeScript framework for building scalable applications.',
|
|
85
|
-
metadata: { source: 'docs', category: 'intro' },
|
|
86
|
-
},
|
|
87
|
-
{
|
|
88
|
-
content: 'The framework includes built-in support for microservices, caching, and AI.',
|
|
89
|
-
metadata: { source: 'docs', category: 'features' },
|
|
90
|
-
},
|
|
91
|
-
]);
|
|
92
|
-
|
|
93
|
-
// 7. Query
|
|
94
|
-
const result = await rag.query('What is HazelJS?', {
|
|
95
|
-
topK: 3,
|
|
96
|
-
filter: { source: 'docs' },
|
|
97
|
-
});
|
|
81
|
+
// Load from disk β auto-detects file types
|
|
82
|
+
const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
|
|
83
|
+
await rag.addDocuments(docs);
|
|
98
84
|
|
|
85
|
+
const result = await rag.query('What is HazelJS?', { topK: 3 });
|
|
99
86
|
console.log(result.answer);
|
|
100
87
|
console.log(result.sources);
|
|
101
88
|
```
|
|
102
89
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
```typescript
|
|
106
|
-
import OpenAI from 'openai';
|
|
90
|
+
---
|
|
107
91
|
|
|
108
|
-
|
|
92
|
+
## Document Loaders
|
|
109
93
|
|
|
110
|
-
|
|
111
|
-
const llmFunction = async (prompt: string) => {
|
|
112
|
-
const response = await openai.chat.completions.create({
|
|
113
|
-
model: 'gpt-4',
|
|
114
|
-
messages: [{ role: 'user', content: prompt }],
|
|
115
|
-
});
|
|
116
|
-
return response.choices[0].message.content || '';
|
|
117
|
-
};
|
|
94
|
+
Every loader extends `BaseDocumentLoader` and returns `Document[]` ready for chunking and indexing.
|
|
118
95
|
|
|
119
|
-
|
|
120
|
-
const rag = new RAGPipeline(config, llmFunction);
|
|
96
|
+
### Built-in loaders
|
|
121
97
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
98
|
+
| Loader | Source | Extra install |
|
|
99
|
+
|--------|--------|:---:|
|
|
100
|
+
| `TextFileLoader` | `.txt` files | β |
|
|
101
|
+
| `MarkdownFileLoader` | `.md` / `.mdx` with heading splits and YAML front-matter | β |
|
|
102
|
+
| `JSONFileLoader` | `.json` with `textKey` / JSON Pointer extraction | β |
|
|
103
|
+
| `CSVFileLoader` | `.csv` rows mapped to documents | β |
|
|
104
|
+
| `HtmlFileLoader` | `.html` tag stripping; optional CSS selector via cheerio | opt. |
|
|
105
|
+
| `DirectoryLoader` | Recursive walk; auto-detects loader by extension | β |
|
|
106
|
+
| `PdfLoader` | PDFs; split by page or full document | `pdf-parse` |
|
|
107
|
+
| `DocxLoader` | Word documents; plain text or HTML output | `mammoth` |
|
|
108
|
+
| `WebLoader` | HTTP scraping with retry/timeout; optional CSS selector | opt. |
|
|
109
|
+
| `YouTubeTranscriptLoader` | YouTube transcripts; no API key; segment by duration | β |
|
|
110
|
+
| `GitHubLoader` | GitHub REST API; filter by path, extension, `maxFiles` | β |
|
|
125
111
|
|
|
126
|
-
|
|
127
|
-
{context}
|
|
112
|
+
### Examples
|
|
128
113
|
|
|
129
|
-
|
|
114
|
+
```typescript
|
|
115
|
+
import {
|
|
116
|
+
TextFileLoader,
|
|
117
|
+
MarkdownFileLoader,
|
|
118
|
+
JSONFileLoader,
|
|
119
|
+
CSVFileLoader,
|
|
120
|
+
PdfLoader,
|
|
121
|
+
DocxLoader,
|
|
122
|
+
WebLoader,
|
|
123
|
+
YouTubeTranscriptLoader,
|
|
124
|
+
GitHubLoader,
|
|
125
|
+
DirectoryLoader,
|
|
126
|
+
} from '@hazeljs/rag';
|
|
130
127
|
|
|
131
|
-
|
|
132
|
-
});
|
|
128
|
+
// Plain text
|
|
129
|
+
const textDocs = await new TextFileLoader({ filePath: './notes.txt' }).load();
|
|
130
|
+
|
|
131
|
+
// Markdown β one document per heading section
|
|
132
|
+
const mdDocs = await new MarkdownFileLoader({
|
|
133
|
+
filePath: './guide.md',
|
|
134
|
+
splitByHeading: true,
|
|
135
|
+
parseYamlFrontMatter: true,
|
|
136
|
+
}).load();
|
|
137
|
+
|
|
138
|
+
// JSON β extract the 'body' field from each element
|
|
139
|
+
const jsonDocs = await new JSONFileLoader({ filePath: './articles.json', textKey: 'body' }).load();
|
|
140
|
+
|
|
141
|
+
// CSV β map columns to content / metadata
|
|
142
|
+
const csvDocs = await new CSVFileLoader({
|
|
143
|
+
filePath: './faqs.csv',
|
|
144
|
+
contentColumns: ['question', 'answer'],
|
|
145
|
+
metadataColumns: ['category'],
|
|
146
|
+
}).load();
|
|
147
|
+
|
|
148
|
+
// PDF β one document per page
|
|
149
|
+
const pdfDocs = await new PdfLoader({ filePath: './report.pdf', splitByPage: true }).load();
|
|
150
|
+
|
|
151
|
+
// DOCX
|
|
152
|
+
const wordDocs = await new DocxLoader({ filePath: './agreement.docx' }).load();
|
|
153
|
+
|
|
154
|
+
// Web scraping
|
|
155
|
+
const webDocs = await new WebLoader({
|
|
156
|
+
urls: ['https://hazeljs.com/docs', 'https://hazeljs.com/blog'],
|
|
157
|
+
timeout: 10_000,
|
|
158
|
+
maxRetries: 3,
|
|
159
|
+
}).load();
|
|
160
|
+
|
|
161
|
+
// YouTube transcript (no API key needed)
|
|
162
|
+
const ytDocs = await new YouTubeTranscriptLoader({
|
|
163
|
+
videoUrl: 'https://www.youtube.com/watch?v=VIDEO_ID',
|
|
164
|
+
segmentDuration: 60, // group into 60-second chunks
|
|
165
|
+
}).load();
|
|
166
|
+
|
|
167
|
+
// GitHub repository
|
|
168
|
+
const githubDocs = await new GitHubLoader({
|
|
169
|
+
owner: 'hazeljs',
|
|
170
|
+
repo: 'hazel',
|
|
171
|
+
directory: 'docs',
|
|
172
|
+
extensions: ['.md'],
|
|
173
|
+
token: process.env.GITHUB_TOKEN,
|
|
174
|
+
}).load();
|
|
175
|
+
|
|
176
|
+
// Directory β auto-detects every file type
|
|
177
|
+
const allDocs = await new DirectoryLoader({
|
|
178
|
+
dirPath: './knowledge-base',
|
|
179
|
+
recursive: true,
|
|
180
|
+
extensions: ['.md', '.txt', '.pdf'],
|
|
181
|
+
}).load();
|
|
133
182
|
```
|
|
134
183
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
### Memory Vector Store (Development)
|
|
184
|
+
### Custom loaders
|
|
138
185
|
|
|
139
186
|
```typescript
|
|
140
|
-
import {
|
|
187
|
+
import { BaseDocumentLoader, Loader, DocumentLoaderRegistry } from '@hazeljs/rag';
|
|
141
188
|
|
|
142
|
-
|
|
143
|
-
|
|
189
|
+
@Loader({ name: 'NotionLoader', extensions: [] })
|
|
190
|
+
export class NotionLoader extends BaseDocumentLoader {
|
|
191
|
+
constructor(private readonly databaseId: string) { super(); }
|
|
192
|
+
|
|
193
|
+
async load() {
|
|
194
|
+
const pages = await fetchNotionPages(this.databaseId);
|
|
195
|
+
return pages.map(p =>
|
|
196
|
+
this.createDocument(p.content, { source: `notion:${p.id}`, title: p.title }),
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// Register so DirectoryLoader can auto-detect it
|
|
202
|
+
DocumentLoaderRegistry.register(NotionLoader, (id: string) => new NotionLoader(id));
|
|
144
203
|
```
|
|
145
204
|
|
|
146
|
-
|
|
205
|
+
---
|
|
147
206
|
|
|
148
|
-
|
|
149
|
-
import { Pinecone } from '@pinecone-database/pinecone';
|
|
150
|
-
import { PineconeVectorStore } from '@hazeljs/rag';
|
|
207
|
+
## GraphRAG
|
|
151
208
|
|
|
152
|
-
|
|
153
|
-
const index = pinecone.index('my-index');
|
|
209
|
+
GraphRAG builds a **knowledge graph** from your documents β entities, relationships, and community clusters β and enables three complementary search modes that go far beyond cosine similarity.
|
|
154
210
|
|
|
155
|
-
|
|
156
|
-
```
|
|
211
|
+
### Why GraphRAG?
|
|
157
212
|
|
|
158
|
-
|
|
213
|
+
| Question type | Traditional RAG | GraphRAG |
|
|
214
|
+
|---|---|---|
|
|
215
|
+
| "What does X do?" | β
Good | β
Excellent (entity traversal) |
|
|
216
|
+
| "How do X and Y relate?" | β Poor | β
Excellent (relationships) |
|
|
217
|
+
| "What are the main architectural layers?" | β Poor | β
Excellent (community reports) |
|
|
218
|
+
| Multi-document cross-referencing | β Fragmented | β
Native |
|
|
159
219
|
|
|
160
|
-
|
|
161
|
-
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
162
|
-
import { QdrantVectorStore } from '@hazeljs/rag';
|
|
220
|
+
### Build the graph
|
|
163
221
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
222
|
+
```typescript
|
|
223
|
+
import OpenAI from 'openai';
|
|
224
|
+
import { GraphRAGPipeline, DirectoryLoader } from '@hazeljs/rag';
|
|
225
|
+
|
|
226
|
+
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
227
|
+
|
|
228
|
+
const graphRag = new GraphRAGPipeline({
|
|
229
|
+
// Provider-agnostic: any LLM that accepts a string prompt
|
|
230
|
+
llm: async (prompt) => {
|
|
231
|
+
const res = await openai.chat.completions.create({
|
|
232
|
+
model: 'gpt-4o-mini',
|
|
233
|
+
temperature: 0,
|
|
234
|
+
messages: [{ role: 'user', content: prompt }],
|
|
235
|
+
});
|
|
236
|
+
return res.choices[0].message.content ?? '';
|
|
237
|
+
},
|
|
238
|
+
extractionChunkSize: 2000, // chars per LLM extraction call
|
|
239
|
+
generateCommunityReports: true, // LLM summaries per community cluster
|
|
240
|
+
maxCommunitySize: 15, // split clusters larger than this
|
|
241
|
+
localSearchDepth: 2, // BFS hops for local search
|
|
242
|
+
localSearchTopK: 5, // seed entities per query
|
|
243
|
+
globalSearchTopK: 5, // community reports for global search
|
|
167
244
|
});
|
|
168
|
-
```
|
|
169
245
|
|
|
170
|
-
|
|
246
|
+
const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
|
|
247
|
+
const stats = await graphRag.build(docs);
|
|
248
|
+
// { documentsProcessed, entitiesExtracted, relationshipsExtracted,
|
|
249
|
+
// communitiesDetected, communityReportsGenerated, duration }
|
|
250
|
+
```
|
|
171
251
|
|
|
172
|
-
###
|
|
252
|
+
### Search modes
|
|
173
253
|
|
|
174
254
|
```typescript
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
const
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
255
|
+
// LOCAL β entity-centric, BFS graph traversal
|
|
256
|
+
// Best for: specific questions about named concepts, classes, or technologies
|
|
257
|
+
const local = await graphRag.search(
|
|
258
|
+
'How does dependency injection work?',
|
|
259
|
+
{ mode: 'local' },
|
|
260
|
+
);
|
|
261
|
+
console.log(local.answer);
|
|
262
|
+
console.log(local.entities); // entities found and traversed
|
|
263
|
+
console.log(local.relationships); // evidence relationships
|
|
264
|
+
|
|
265
|
+
// GLOBAL β community report ranking
|
|
266
|
+
// Best for: broad thematic questions, architecture overviews
|
|
267
|
+
const global = await graphRag.search(
|
|
268
|
+
'What are the main architectural layers of this system?',
|
|
269
|
+
{ mode: 'global' },
|
|
270
|
+
);
|
|
271
|
+
console.log(global.communities); // ranked community reports used
|
|
272
|
+
|
|
273
|
+
// HYBRID β runs both in parallel, single synthesis call (recommended default)
|
|
274
|
+
const result = await graphRag.search('What vector stores does @hazeljs/rag support?');
|
|
275
|
+
// mode defaults to 'hybrid'
|
|
276
|
+
console.log(`${result.mode} search in ${result.duration}ms`);
|
|
182
277
|
```
|
|
183
278
|
|
|
184
|
-
###
|
|
279
|
+
### Incremental updates
|
|
185
280
|
|
|
186
281
|
```typescript
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
apiKey: process.env.COHERE_API_KEY!,
|
|
191
|
-
model: 'embed-english-v3.0',
|
|
192
|
-
});
|
|
282
|
+
const newDocs = await new WebLoader({ urls: ['https://hazeljs.com/blog/new'] }).load();
|
|
283
|
+
await graphRag.addDocuments(newDocs);
|
|
284
|
+
// Re-runs community detection and regenerates reports automatically
|
|
193
285
|
```
|
|
194
286
|
|
|
195
|
-
###
|
|
287
|
+
### Inspect the graph
|
|
196
288
|
|
|
197
289
|
```typescript
|
|
198
|
-
|
|
290
|
+
const graph = graphRag.getGraph();
|
|
199
291
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
292
|
+
// Entities, relationships, community reports
|
|
293
|
+
console.log([...graph.entities.values()].slice(0, 5));
|
|
294
|
+
console.log([...graph.relationships.values()].slice(0, 5));
|
|
295
|
+
console.log([...graph.communityReports.values()].map(r => r.title));
|
|
296
|
+
|
|
297
|
+
// Statistics
|
|
298
|
+
const stats = graphRag.getStats();
|
|
299
|
+
console.log(stats.entityTypeBreakdown); // { TECHNOLOGY: 14, CONCEPT: 12, ... }
|
|
300
|
+
console.log(stats.topEntities.slice(0, 5)); // most-connected entities
|
|
204
301
|
```
|
|
205
302
|
|
|
206
|
-
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Vector Stores
|
|
207
306
|
|
|
208
|
-
|
|
307
|
+
All stores implement the same interface β swap them with a one-line change.
|
|
209
308
|
|
|
210
309
|
```typescript
|
|
211
|
-
import {
|
|
310
|
+
import { MemoryVectorStore, OpenAIEmbeddings } from '@hazeljs/rag';
|
|
212
311
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
chunkOverlap: 200,
|
|
216
|
-
separators: ['\n\n', '\n', '. ', ' ', ''],
|
|
217
|
-
});
|
|
312
|
+
// Development
|
|
313
|
+
const vectorStore = new MemoryVectorStore(embeddings);
|
|
218
314
|
|
|
219
|
-
|
|
220
|
-
|
|
315
|
+
// Pinecone (production, serverless)
|
|
316
|
+
import { PineconeVectorStore } from '@hazeljs/rag';
|
|
317
|
+
const vectorStore = new PineconeVectorStore(embeddings, {
|
|
318
|
+
apiKey: process.env.PINECONE_API_KEY,
|
|
319
|
+
indexName: 'my-knowledge-base',
|
|
320
|
+
});
|
|
221
321
|
|
|
222
|
-
|
|
322
|
+
// Qdrant (high-performance, self-hosted)
|
|
323
|
+
import { QdrantVectorStore } from '@hazeljs/rag';
|
|
324
|
+
const vectorStore = new QdrantVectorStore(embeddings, {
|
|
325
|
+
url: process.env.QDRANT_URL || 'http://localhost:6333',
|
|
326
|
+
collectionName: 'my-collection',
|
|
327
|
+
});
|
|
223
328
|
|
|
224
|
-
|
|
329
|
+
// Weaviate (GraphQL, flexible)
|
|
330
|
+
import { WeaviateVectorStore } from '@hazeljs/rag';
|
|
331
|
+
const vectorStore = new WeaviateVectorStore(embeddings, {
|
|
332
|
+
host: process.env.WEAVIATE_HOST || 'http://localhost:8080',
|
|
333
|
+
className: 'MyKnowledgeBase',
|
|
334
|
+
});
|
|
225
335
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
336
|
+
// ChromaDB (prototyping)
|
|
337
|
+
import { ChromaVectorStore } from '@hazeljs/rag';
|
|
338
|
+
const vectorStore = new ChromaVectorStore(embeddings, {
|
|
339
|
+
url: process.env.CHROMA_URL || 'http://localhost:8000',
|
|
340
|
+
collectionName: 'my-collection',
|
|
230
341
|
});
|
|
231
342
|
```
|
|
232
343
|
|
|
233
|
-
###
|
|
344
|
+
### Vector store comparison
|
|
234
345
|
|
|
235
|
-
|
|
346
|
+
| | Memory | Pinecone | Qdrant | Weaviate | ChromaDB |
|
|
347
|
+
|---|:---:|:---:|:---:|:---:|:---:|
|
|
348
|
+
| Setup | None | API Key | Docker | Docker | Docker |
|
|
349
|
+
| Persistence | β | β
| β
| β
| β
|
|
|
350
|
+
| Best for | Dev/Test | Production | High-perf | GraphQL | Prototyping |
|
|
351
|
+
| Cost | Free | Paid | OSS | OSS | OSS |
|
|
352
|
+
|
|
353
|
+
---
|
|
354
|
+
|
|
355
|
+
## Embedding Providers
|
|
236
356
|
|
|
237
357
|
```typescript
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
358
|
+
import { OpenAIEmbeddings, CohereEmbeddings } from '@hazeljs/rag';
|
|
359
|
+
|
|
360
|
+
// OpenAI
|
|
361
|
+
const openaiEmbed = new OpenAIEmbeddings({
|
|
362
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
363
|
+
model: 'text-embedding-3-small', // 1536 dims
|
|
364
|
+
// model: 'text-embedding-3-large', // 3072 dims, highest quality
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
// Cohere (multilingual)
|
|
368
|
+
const cohereEmbed = new CohereEmbeddings({
|
|
369
|
+
apiKey: process.env.COHERE_API_KEY,
|
|
370
|
+
model: 'embed-multilingual-v3.0',
|
|
241
371
|
});
|
|
242
372
|
```
|
|
243
373
|
|
|
244
|
-
|
|
374
|
+
---
|
|
245
375
|
|
|
246
|
-
|
|
376
|
+
## Retrieval Strategies
|
|
247
377
|
|
|
248
378
|
```typescript
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
379
|
+
import { HybridSearchRetrieval, MultiQueryRetrieval } from '@hazeljs/rag';
|
|
380
|
+
|
|
381
|
+
// Hybrid β vector + BM25 keyword fusion
|
|
382
|
+
const hybrid = new HybridSearchRetrieval(vectorStore, {
|
|
383
|
+
vectorWeight: 0.7,
|
|
384
|
+
keywordWeight: 0.3,
|
|
385
|
+
topK: 10,
|
|
252
386
|
});
|
|
387
|
+
const results = await hybrid.search('machine learning algorithms', { topK: 5 });
|
|
388
|
+
|
|
389
|
+
// Multi-query β LLM generates N query variations, deduplicates results
|
|
390
|
+
const multiQuery = new MultiQueryRetrieval(vectorStore, {
|
|
391
|
+
llmApiKey: process.env.OPENAI_API_KEY,
|
|
392
|
+
numQueries: 3,
|
|
393
|
+
topK: 10,
|
|
394
|
+
});
|
|
395
|
+
const results2 = await multiQuery.search('How do I deploy my app?', { topK: 5 });
|
|
253
396
|
```
|
|
254
397
|
|
|
255
|
-
|
|
398
|
+
---
|
|
399
|
+
|
|
400
|
+
## Text Splitting
|
|
256
401
|
|
|
257
402
|
```typescript
|
|
258
|
-
|
|
259
|
-
{
|
|
260
|
-
content: 'Document 1',
|
|
261
|
-
metadata: { category: 'tech', year: 2024 },
|
|
262
|
-
},
|
|
263
|
-
{
|
|
264
|
-
content: 'Document 2',
|
|
265
|
-
metadata: { category: 'science', year: 2023 },
|
|
266
|
-
},
|
|
267
|
-
]);
|
|
403
|
+
import { RecursiveTextSplitter } from '@hazeljs/rag';
|
|
268
404
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
405
|
+
const splitter = new RecursiveTextSplitter({
|
|
406
|
+
chunkSize: 1000, // target chars per chunk
|
|
407
|
+
chunkOverlap: 200, // overlap for context continuity
|
|
408
|
+
separators: ['\n\n', '\n', '. ', ' '],
|
|
272
409
|
});
|
|
410
|
+
|
|
411
|
+
const chunks = splitter.split(longDocument);
|
|
273
412
|
```
|
|
274
413
|
|
|
275
|
-
|
|
414
|
+
---
|
|
276
415
|
|
|
277
|
-
|
|
416
|
+
## Memory System
|
|
278
417
|
|
|
279
418
|
```typescript
|
|
280
|
-
import {
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
419
|
+
import {
|
|
420
|
+
RAGPipelineWithMemory,
|
|
421
|
+
MemoryManager,
|
|
422
|
+
HybridMemory,
|
|
423
|
+
BufferMemory,
|
|
424
|
+
VectorMemory,
|
|
425
|
+
} from '@hazeljs/rag';
|
|
284
426
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
return [{ content: text, metadata: { source: this.filePath } }];
|
|
289
|
-
}
|
|
427
|
+
const buffer = new BufferMemory({ maxSize: 20 });
|
|
428
|
+
const vectorMemory = new VectorMemory(vectorStore, embeddings);
|
|
429
|
+
const memory = new MemoryManager(new HybridMemory(buffer, vectorMemory));
|
|
290
430
|
|
|
291
|
-
|
|
292
|
-
// PDF parsing logic
|
|
293
|
-
return '';
|
|
294
|
-
}
|
|
295
|
-
}
|
|
431
|
+
const rag = new RAGPipelineWithMemory(config, memory, llmFunction);
|
|
296
432
|
|
|
297
|
-
const
|
|
298
|
-
|
|
299
|
-
|
|
433
|
+
const response = await rag.queryWithMemory(
|
|
434
|
+
'What did we discuss about deployment?',
|
|
435
|
+
'session-123',
|
|
436
|
+
'user-456',
|
|
437
|
+
);
|
|
438
|
+
console.log(response.answer);
|
|
439
|
+
console.log(response.memories);
|
|
300
440
|
```
|
|
301
441
|
|
|
302
|
-
|
|
442
|
+
---
|
|
303
443
|
|
|
304
|
-
|
|
305
|
-
// Add multiple documents efficiently
|
|
306
|
-
const ids = await rag.addDocuments(documents);
|
|
444
|
+
## API Reference
|
|
307
445
|
|
|
308
|
-
|
|
309
|
-
await rag.deleteDocuments(ids);
|
|
446
|
+
### `GraphRAGPipeline`
|
|
310
447
|
|
|
311
|
-
|
|
312
|
-
|
|
448
|
+
```typescript
|
|
449
|
+
class GraphRAGPipeline {
|
|
450
|
+
constructor(config: GraphRAGConfig);
|
|
451
|
+
build(docs: Document[]): Promise<GraphBuildStats>;
|
|
452
|
+
addDocuments(docs: Document[]): Promise<GraphBuildStats>;
|
|
453
|
+
search(query: string, options?: GraphSearchOptions): Promise<GraphSearchResult>;
|
|
454
|
+
getGraph(): KnowledgeGraph;
|
|
455
|
+
getStats(): GraphStats;
|
|
456
|
+
clear(): void;
|
|
457
|
+
}
|
|
313
458
|
```
|
|
314
459
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
### RAGPipeline
|
|
460
|
+
### `RAGPipeline`
|
|
318
461
|
|
|
319
462
|
```typescript
|
|
320
463
|
class RAGPipeline {
|
|
@@ -322,58 +465,40 @@ class RAGPipeline {
|
|
|
322
465
|
initialize(): Promise<void>;
|
|
323
466
|
addDocuments(documents: Document[]): Promise<string[]>;
|
|
324
467
|
query(query: string, options?: RAGQueryOptions): Promise<RAGResponse>;
|
|
325
|
-
|
|
468
|
+
search(query: string, options?: QueryOptions): Promise<SearchResult[]>;
|
|
326
469
|
deleteDocuments(ids: string[]): Promise<void>;
|
|
327
470
|
clear(): Promise<void>;
|
|
328
471
|
}
|
|
329
472
|
```
|
|
330
473
|
|
|
331
|
-
###
|
|
474
|
+
### `Document`
|
|
332
475
|
|
|
333
476
|
```typescript
|
|
334
477
|
interface Document {
|
|
335
478
|
id?: string;
|
|
336
479
|
content: string;
|
|
337
|
-
metadata?: Record<string,
|
|
480
|
+
metadata?: Record<string, unknown>;
|
|
338
481
|
embedding?: number[];
|
|
339
482
|
}
|
|
340
|
-
|
|
341
|
-
interface SearchResult {
|
|
342
|
-
id: string;
|
|
343
|
-
content: string;
|
|
344
|
-
metadata?: Record<string, any>;
|
|
345
|
-
score: number;
|
|
346
|
-
embedding?: number[];
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
interface RAGResponse {
|
|
350
|
-
answer: string;
|
|
351
|
-
sources: SearchResult[];
|
|
352
|
-
context: string;
|
|
353
|
-
}
|
|
354
483
|
```
|
|
355
484
|
|
|
356
|
-
|
|
485
|
+
---
|
|
357
486
|
|
|
358
|
-
|
|
359
|
-
- π¬ **Chatbots** - Context-aware conversational AI
|
|
360
|
-
- π **Knowledge Base** - Internal knowledge management
|
|
361
|
-
- π **Content Recommendations** - Similar content discovery
|
|
362
|
-
- π **Educational Tools** - Q&A systems with source citations
|
|
363
|
-
- π’ **Enterprise Search** - Semantic search across company data
|
|
487
|
+
## Use Cases
|
|
364
488
|
|
|
365
|
-
|
|
489
|
+
- π **Documentation Q&A** β Index all your docs and answer developer questions
|
|
490
|
+
- πΈοΈ **Codebase Understanding** β GraphRAG over a repo to explain architecture and dependencies
|
|
491
|
+
- π¬ **Context-Aware Chatbots** β RAG + memory for multi-turn conversations
|
|
492
|
+
- π **Enterprise Knowledge Base** β Combine web, GitHub, PDFs, and internal wikis
|
|
493
|
+
- π **Research Assistants** β Multi-document reasoning with knowledge graph traversal
|
|
494
|
+
- π **Content Intelligence** β Semantic search + relationship mapping across articles
|
|
366
495
|
|
|
367
|
-
|
|
368
|
-
2. **Chunk Size** - Balance between context and precision (500-1500 tokens)
|
|
369
|
-
3. **Overlap** - Use 10-20% overlap for better context continuity
|
|
370
|
-
4. **Caching** - Cache embeddings for frequently accessed documents
|
|
371
|
-
5. **Filtering** - Use metadata filters to reduce search space
|
|
496
|
+
---
|
|
372
497
|
|
|
373
498
|
## License
|
|
374
499
|
|
|
375
|
-
|
|
500
|
+
Apache 2.0
|
|
376
501
|
|
|
377
502
|
## Contributing
|
|
378
503
|
|
|
379
|
-
Contributions are welcome!
|
|
504
|
+
Contributions are welcome! See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details.
|