@hazeljs/rag 0.2.0-beta.60 β†’ 0.2.0-beta.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +342 -221
  2. package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
  3. package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
  4. package/dist/__tests__/graph/community-detector.test.js +87 -0
  5. package/dist/__tests__/graph/community-detector.test.js.map +1 -0
  6. package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
  7. package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
  8. package/dist/__tests__/graph/community-summarizer.test.js +131 -0
  9. package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
  10. package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
  11. package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
  12. package/dist/__tests__/graph/entity-extractor.test.js +129 -0
  13. package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
  14. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
  15. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
  16. package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
  17. package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
  18. package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
  19. package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
  20. package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
  21. package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
  22. package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
  23. package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
  24. package/dist/__tests__/loaders/base.loader.test.js +114 -0
  25. package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
  26. package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
  27. package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
  28. package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
  29. package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
  30. package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
  31. package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
  32. package/dist/__tests__/loaders/directory.loader.test.js +154 -0
  33. package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
  34. package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
  35. package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
  36. package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
  37. package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
  38. package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
  39. package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
  40. package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
  41. package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
  42. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
  43. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
  44. package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
  45. package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
  46. package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
  47. package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
  48. package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
  49. package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
  50. package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
  51. package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
  52. package/dist/__tests__/rag-pipeline.test.js +210 -0
  53. package/dist/__tests__/rag-pipeline.test.js.map +1 -0
  54. package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
  55. package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
  56. package/dist/__tests__/retrieval/bm25.test.js +86 -0
  57. package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
  58. package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
  59. package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
  60. package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
  61. package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
  62. package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
  63. package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
  64. package/dist/__tests__/retrieval/multi-query.test.js +90 -0
  65. package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
  66. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
  67. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
  68. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
  69. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
  70. package/dist/__tests__/utils/similarity.test.d.ts +2 -0
  71. package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
  72. package/dist/__tests__/utils/similarity.test.js +47 -0
  73. package/dist/__tests__/utils/similarity.test.js.map +1 -0
  74. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +1 -0
  75. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -1
  76. package/dist/agentic/decorators/adaptive-retrieval.decorator.js +4 -15
  77. package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -1
  78. package/dist/agentic/decorators/corrective-rag.decorator.d.ts +1 -0
  79. package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -1
  80. package/dist/agentic/decorators/corrective-rag.decorator.js +7 -11
  81. package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -1
  82. package/dist/agentic/decorators/hyde.decorator.d.ts +1 -0
  83. package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -1
  84. package/dist/agentic/decorators/hyde.decorator.js +7 -6
  85. package/dist/agentic/decorators/hyde.decorator.js.map +1 -1
  86. package/dist/agentic/decorators/multi-hop.decorator.d.ts +1 -0
  87. package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -1
  88. package/dist/agentic/decorators/multi-hop.decorator.js +8 -19
  89. package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -1
  90. package/dist/agentic/decorators/query-planner.decorator.d.ts +1 -0
  91. package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -1
  92. package/dist/agentic/decorators/query-planner.decorator.js +4 -18
  93. package/dist/agentic/decorators/query-planner.decorator.js.map +1 -1
  94. package/dist/agentic/decorators/query-rewriter.decorator.d.ts +1 -0
  95. package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -1
  96. package/dist/agentic/decorators/query-rewriter.decorator.js +8 -5
  97. package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -1
  98. package/dist/agentic/decorators/self-reflective.decorator.d.ts +2 -0
  99. package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -1
  100. package/dist/agentic/decorators/self-reflective.decorator.js +11 -32
  101. package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -1
  102. package/dist/agentic/index.d.ts +1 -1
  103. package/dist/agentic/index.d.ts.map +1 -1
  104. package/dist/agentic/types.d.ts +3 -2
  105. package/dist/agentic/types.d.ts.map +1 -1
  106. package/dist/graph/community-detector.d.ts +45 -0
  107. package/dist/graph/community-detector.d.ts.map +1 -0
  108. package/dist/graph/community-detector.js +153 -0
  109. package/dist/graph/community-detector.js.map +1 -0
  110. package/dist/graph/community-summarizer.d.ts +41 -0
  111. package/dist/graph/community-summarizer.d.ts.map +1 -0
  112. package/dist/graph/community-summarizer.js +119 -0
  113. package/dist/graph/community-summarizer.js.map +1 -0
  114. package/dist/graph/entity-extractor.d.ts +47 -0
  115. package/dist/graph/entity-extractor.d.ts.map +1 -0
  116. package/dist/graph/entity-extractor.js +224 -0
  117. package/dist/graph/entity-extractor.js.map +1 -0
  118. package/dist/graph/graph-rag-pipeline.d.ts +83 -0
  119. package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
  120. package/dist/graph/graph-rag-pipeline.js +390 -0
  121. package/dist/graph/graph-rag-pipeline.js.map +1 -0
  122. package/dist/graph/graph.types.d.ts +186 -0
  123. package/dist/graph/graph.types.d.ts.map +1 -0
  124. package/dist/graph/graph.types.js +20 -0
  125. package/dist/graph/graph.types.js.map +1 -0
  126. package/dist/graph/index.d.ts +15 -0
  127. package/dist/graph/index.d.ts.map +1 -0
  128. package/dist/graph/index.js +31 -0
  129. package/dist/graph/index.js.map +1 -0
  130. package/dist/graph/knowledge-graph.d.ts +57 -0
  131. package/dist/graph/knowledge-graph.d.ts.map +1 -0
  132. package/dist/graph/knowledge-graph.js +198 -0
  133. package/dist/graph/knowledge-graph.js.map +1 -0
  134. package/dist/index.d.ts +2 -0
  135. package/dist/index.d.ts.map +1 -1
  136. package/dist/index.js +4 -0
  137. package/dist/index.js.map +1 -1
  138. package/dist/loaders/base.loader.d.ts +108 -0
  139. package/dist/loaders/base.loader.d.ts.map +1 -0
  140. package/dist/loaders/base.loader.js +123 -0
  141. package/dist/loaders/base.loader.js.map +1 -0
  142. package/dist/loaders/csv-file.loader.d.ts +61 -0
  143. package/dist/loaders/csv-file.loader.d.ts.map +1 -0
  144. package/dist/loaders/csv-file.loader.js +162 -0
  145. package/dist/loaders/csv-file.loader.js.map +1 -0
  146. package/dist/loaders/directory.loader.d.ts +67 -0
  147. package/dist/loaders/directory.loader.d.ts.map +1 -0
  148. package/dist/loaders/directory.loader.js +163 -0
  149. package/dist/loaders/directory.loader.js.map +1 -0
  150. package/dist/loaders/docx.loader.d.ts +52 -0
  151. package/dist/loaders/docx.loader.d.ts.map +1 -0
  152. package/dist/loaders/docx.loader.js +110 -0
  153. package/dist/loaders/docx.loader.js.map +1 -0
  154. package/dist/loaders/github.loader.d.ts +114 -0
  155. package/dist/loaders/github.loader.d.ts.map +1 -0
  156. package/dist/loaders/github.loader.js +217 -0
  157. package/dist/loaders/github.loader.js.map +1 -0
  158. package/dist/loaders/html-file.loader.d.ts +55 -0
  159. package/dist/loaders/html-file.loader.d.ts.map +1 -0
  160. package/dist/loaders/html-file.loader.js +170 -0
  161. package/dist/loaders/html-file.loader.js.map +1 -0
  162. package/dist/loaders/index.d.ts +52 -0
  163. package/dist/loaders/index.d.ts.map +1 -0
  164. package/dist/loaders/index.js +61 -0
  165. package/dist/loaders/index.js.map +1 -0
  166. package/dist/loaders/json-file.loader.d.ts +51 -0
  167. package/dist/loaders/json-file.loader.d.ts.map +1 -0
  168. package/dist/loaders/json-file.loader.js +100 -0
  169. package/dist/loaders/json-file.loader.js.map +1 -0
  170. package/dist/loaders/markdown-file.loader.d.ts +61 -0
  171. package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
  172. package/dist/loaders/markdown-file.loader.js +148 -0
  173. package/dist/loaders/markdown-file.loader.js.map +1 -0
  174. package/dist/loaders/pdf.loader.d.ts +64 -0
  175. package/dist/loaders/pdf.loader.d.ts.map +1 -0
  176. package/dist/loaders/pdf.loader.js +163 -0
  177. package/dist/loaders/pdf.loader.js.map +1 -0
  178. package/dist/loaders/text-file.loader.d.ts +39 -0
  179. package/dist/loaders/text-file.loader.d.ts.map +1 -0
  180. package/dist/loaders/text-file.loader.js +69 -0
  181. package/dist/loaders/text-file.loader.js.map +1 -0
  182. package/dist/loaders/web.loader.d.ts +87 -0
  183. package/dist/loaders/web.loader.d.ts.map +1 -0
  184. package/dist/loaders/web.loader.js +194 -0
  185. package/dist/loaders/web.loader.js.map +1 -0
  186. package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
  187. package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
  188. package/dist/loaders/youtube-transcript.loader.js +254 -0
  189. package/dist/loaders/youtube-transcript.loader.js.map +1 -0
  190. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
  191. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
  192. package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
  193. package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
  194. package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
  195. package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
  196. package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
  197. package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
  198. package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
  199. package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
  200. package/dist/prompts/agentic/hyde.prompt.js +18 -0
  201. package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
  202. package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
  203. package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
  204. package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
  205. package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
  206. package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
  207. package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
  208. package/dist/prompts/agentic/query-planner.prompt.js +30 -0
  209. package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
  210. package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
  211. package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
  212. package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
  213. package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
  214. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
  215. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
  216. package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
  217. package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
  218. package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
  219. package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
  220. package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
  221. package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
  222. package/dist/prompts/community-summary.prompt.d.ts +9 -0
  223. package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
  224. package/dist/prompts/community-summary.prompt.js +30 -0
  225. package/dist/prompts/community-summary.prompt.js.map +1 -0
  226. package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
  227. package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
  228. package/dist/prompts/entity-extraction.prompt.js +39 -0
  229. package/dist/prompts/entity-extraction.prompt.js.map +1 -0
  230. package/dist/prompts/graph-search.prompt.d.ts +10 -0
  231. package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
  232. package/dist/prompts/graph-search.prompt.js +23 -0
  233. package/dist/prompts/graph-search.prompt.js.map +1 -0
  234. package/dist/prompts/index.d.ts +13 -0
  235. package/dist/prompts/index.d.ts.map +1 -0
  236. package/dist/prompts/index.js +29 -0
  237. package/dist/prompts/index.js.map +1 -0
  238. package/dist/prompts/rag-answer.prompt.d.ts +9 -0
  239. package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
  240. package/dist/prompts/rag-answer.prompt.js +20 -0
  241. package/dist/prompts/rag-answer.prompt.js.map +1 -0
  242. package/dist/rag.service.d.ts +1 -0
  243. package/dist/rag.service.d.ts.map +1 -1
  244. package/dist/rag.service.js +7 -8
  245. package/dist/rag.service.js.map +1 -1
  246. package/package.json +55 -2
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Your docs. Your data. AI that actually knows them.**
4
4
 
5
- Semantic search, embeddings, document indexing. Pinecone, Weaviate, Qdrant, ChromaDB β€” or in-memory for dev. Full RAG pipeline: load, split, embed, retrieve, augment. No PhD required.
5
+ Load documents from any source, build a knowledge graph, embed into vector stores, and retrieve answers with semantic, hybrid, or graph-based search. Full RAG + GraphRAG pipeline β€” no PhD required.
6
6
 
7
7
  [![npm version](https://img.shields.io/npm/v/@hazeljs/rag.svg)](https://www.npmjs.com/package/@hazeljs/rag)
8
8
  [![npm downloads](https://img.shields.io/npm/dm/@hazeljs/rag)](https://www.npmjs.com/package/@hazeljs/rag)
@@ -10,14 +10,18 @@ Semantic search, embeddings, document indexing. Pinecone, Weaviate, Qdrant, Chro
10
10
 
11
11
  ## Features
12
12
 
13
- - πŸ” **Vector Search** - Semantic similarity search using embeddings
14
- - πŸ“š **Document Management** - Load, split, and index documents
15
- - πŸ€– **RAG Pipeline** - Complete retrieval-augmented generation workflow
16
- - 🎯 **Multiple Strategies** - Similarity, MMR (Maximal Marginal Relevance), Hybrid search
17
- - πŸ”Œ **Pluggable Backends** - Support for Pinecone, Weaviate, Qdrant, ChromaDB, and in-memory
18
- - 🌐 **Multiple Embedding Providers** - OpenAI, Cohere, HuggingFace
19
- - βœ‚οΈ **Smart Text Splitting** - Recursive text splitter with overlap
20
- - πŸ“Š **Metadata Filtering** - Filter results by custom metadata
13
+ - πŸ“‚ **11 Document Loaders** β€” TXT, Markdown, JSON, CSV, HTML, PDF, DOCX, web scraping, YouTube transcripts, GitHub repos, and inline text. All return the same `Document[]` interface.
14
+ - πŸ•ΈοΈ **GraphRAG** β€” Extract entities and relationships from documents, build a knowledge graph, detect communities, and answer questions with entity-centric (local), thematic (global), or hybrid search.
15
+ - πŸ” **Vector Search** β€” Semantic similarity search with configurable embeddings and vector stores
16
+ - πŸ€– **RAG Pipeline** β€” Complete load β†’ split β†’ embed β†’ retrieve β†’ augment workflow
17
+ - 🎯 **Multiple Strategies** β€” Similarity, Hybrid (vector + BM25), Multi-Query retrieval
18
+ - πŸ”Œ **5 Vector Stores** β€” Memory, Pinecone, Qdrant, Weaviate, ChromaDB (unified interface)
19
+ - 🌐 **Embedding Providers** β€” OpenAI and Cohere, easily extensible
20
+ - βœ‚οΈ **Smart Text Splitting** β€” Recursive, character, and token splitters
21
+ - πŸ“Š **Metadata Filtering** β€” Filter results by any metadata field
22
+ - 🧠 **Memory System** β€” Conversation history, entity memory, fact storage, working memory
23
+
24
+ ---
21
25
 
22
26
  ## Installation
23
27
 
@@ -25,28 +29,34 @@ Semantic search, embeddings, document indexing. Pinecone, Weaviate, Qdrant, Chro
25
29
  npm install @hazeljs/rag
26
30
  ```
27
31
 
28
- ### Optional Peer Dependencies
32
+ ### Optional peer dependencies
29
33
 
30
- Install the vector store and embedding provider you want to use:
34
+ Install only what you need:
31
35
 
32
36
  ```bash
33
- # OpenAI Embeddings
37
+ # LLM (required for GraphRAG and RAG query synthesis)
34
38
  npm install openai
35
39
 
36
- # Vector Stores (choose one or more)
37
- npm install @pinecone-database/pinecone # Pinecone
38
- npm install weaviate-ts-client # Weaviate
40
+ # Vector stores
41
+ npm install @pinecone-database/pinecone # Pinecone
39
42
  npm install @qdrant/js-client-rest # Qdrant
43
+ npm install weaviate-ts-client # Weaviate
40
44
  npm install chromadb # ChromaDB
41
45
 
42
- # Additional Embedding Providers
43
- npm install cohere-ai # Cohere
44
- npm install @huggingface/inference # HuggingFace
46
+ # Alternative embedding providers
47
+ npm install cohere-ai
48
+
49
+ # Document loaders
50
+ npm install pdf-parse # PdfLoader
51
+ npm install mammoth # DocxLoader
52
+ npm install cheerio # HtmlFileLoader / WebLoader CSS selectors
45
53
  ```
46
54
 
55
+ ---
56
+
47
57
  ## Quick Start
48
58
 
49
- ### Basic RAG Pipeline
59
+ ### Basic RAG pipeline
50
60
 
51
61
  ```typescript
52
62
  import {
@@ -54,271 +64,400 @@ import {
54
64
  MemoryVectorStore,
55
65
  OpenAIEmbeddings,
56
66
  RecursiveTextSplitter,
67
+ DirectoryLoader,
57
68
  } from '@hazeljs/rag';
58
69
 
59
- // 1. Setup embedding provider
60
- const embeddings = new OpenAIEmbeddings({
61
- apiKey: process.env.OPENAI_API_KEY!,
62
- model: 'text-embedding-3-small',
63
- });
64
-
65
- // 2. Setup vector store
70
+ const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY });
66
71
  const vectorStore = new MemoryVectorStore(embeddings);
67
72
 
68
- // 3. Setup text splitter
69
- const textSplitter = new RecursiveTextSplitter({
70
- chunkSize: 1000,
71
- chunkOverlap: 200,
72
- });
73
-
74
- // 4. Create RAG pipeline
75
73
  const rag = new RAGPipeline({
76
74
  vectorStore,
77
75
  embeddingProvider: embeddings,
78
- textSplitter,
76
+ textSplitter: new RecursiveTextSplitter({ chunkSize: 800, chunkOverlap: 150 }),
79
77
  topK: 5,
80
78
  });
81
-
82
- // 5. Initialize
83
79
  await rag.initialize();
84
80
 
85
- // 6. Add documents
86
- await rag.addDocuments([
87
- {
88
- content: 'HazelJS is a modern TypeScript framework for building scalable applications.',
89
- metadata: { source: 'docs', category: 'intro' },
90
- },
91
- {
92
- content: 'The framework includes built-in support for microservices, caching, and AI.',
93
- metadata: { source: 'docs', category: 'features' },
94
- },
95
- ]);
96
-
97
- // 7. Query
98
- const result = await rag.query('What is HazelJS?', {
99
- topK: 3,
100
- filter: { source: 'docs' },
101
- });
81
+ // Load from disk β€” auto-detects file types
82
+ const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
83
+ await rag.addDocuments(docs);
102
84
 
85
+ const result = await rag.query('What is HazelJS?', { topK: 3 });
103
86
  console.log(result.answer);
104
87
  console.log(result.sources);
105
88
  ```
106
89
 
107
- ### With LLM Integration
90
+ ---
108
91
 
109
- ```typescript
110
- import OpenAI from 'openai';
92
+ ## Document Loaders
111
93
 
112
- const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY! });
94
+ Every loader extends `BaseDocumentLoader` and returns `Document[]` ready for chunking and indexing.
113
95
 
114
- // Create LLM function
115
- const llmFunction = async (prompt: string) => {
116
- const response = await openai.chat.completions.create({
117
- model: 'gpt-4',
118
- messages: [{ role: 'user', content: prompt }],
119
- });
120
- return response.choices[0].message.content || '';
121
- };
96
+ ### Built-in loaders
122
97
 
123
- // Create RAG pipeline with LLM
124
- const rag = new RAGPipeline(config, llmFunction);
98
+ | Loader | Source | Extra install |
99
+ |--------|--------|:---:|
100
+ | `TextFileLoader` | `.txt` files | β€” |
101
+ | `MarkdownFileLoader` | `.md` / `.mdx` with heading splits and YAML front-matter | β€” |
102
+ | `JSONFileLoader` | `.json` with `textKey` / JSON Pointer extraction | β€” |
103
+ | `CSVFileLoader` | `.csv` rows mapped to documents | β€” |
104
+ | `HtmlFileLoader` | `.html` tag stripping; optional CSS selector via cheerio | opt. |
105
+ | `DirectoryLoader` | Recursive walk; auto-detects loader by extension | β€” |
106
+ | `PdfLoader` | PDFs; split by page or full document | `pdf-parse` |
107
+ | `DocxLoader` | Word documents; plain text or HTML output | `mammoth` |
108
+ | `WebLoader` | HTTP scraping with retry/timeout; optional CSS selector | opt. |
109
+ | `YouTubeTranscriptLoader` | YouTube transcripts; no API key; segment by duration | β€” |
110
+ | `GitHubLoader` | GitHub REST API; filter by path, extension, `maxFiles` | β€” |
125
111
 
126
- // Query with custom prompt
127
- const result = await rag.query('What is HazelJS?', {
128
- llmPrompt: `Based on the following context, answer the question.
112
+ ### Examples
129
113
 
130
- Context:
131
- {context}
132
-
133
- Question: {query}
114
+ ```typescript
115
+ import {
116
+ TextFileLoader,
117
+ MarkdownFileLoader,
118
+ JSONFileLoader,
119
+ CSVFileLoader,
120
+ PdfLoader,
121
+ DocxLoader,
122
+ WebLoader,
123
+ YouTubeTranscriptLoader,
124
+ GitHubLoader,
125
+ DirectoryLoader,
126
+ } from '@hazeljs/rag';
134
127
 
135
- Answer:`,
136
- });
128
+ // Plain text
129
+ const textDocs = await new TextFileLoader({ filePath: './notes.txt' }).load();
130
+
131
+ // Markdown β€” one document per heading section
132
+ const mdDocs = await new MarkdownFileLoader({
133
+ filePath: './guide.md',
134
+ splitByHeading: true,
135
+ parseYamlFrontMatter: true,
136
+ }).load();
137
+
138
+ // JSON β€” extract the 'body' field from each element
139
+ const jsonDocs = await new JSONFileLoader({ filePath: './articles.json', textKey: 'body' }).load();
140
+
141
+ // CSV β€” map columns to content / metadata
142
+ const csvDocs = await new CSVFileLoader({
143
+ filePath: './faqs.csv',
144
+ contentColumns: ['question', 'answer'],
145
+ metadataColumns: ['category'],
146
+ }).load();
147
+
148
+ // PDF β€” one document per page
149
+ const pdfDocs = await new PdfLoader({ filePath: './report.pdf', splitByPage: true }).load();
150
+
151
+ // DOCX
152
+ const wordDocs = await new DocxLoader({ filePath: './agreement.docx' }).load();
153
+
154
+ // Web scraping
155
+ const webDocs = await new WebLoader({
156
+ urls: ['https://hazeljs.com/docs', 'https://hazeljs.com/blog'],
157
+ timeout: 10_000,
158
+ maxRetries: 3,
159
+ }).load();
160
+
161
+ // YouTube transcript (no API key needed)
162
+ const ytDocs = await new YouTubeTranscriptLoader({
163
+ videoUrl: 'https://www.youtube.com/watch?v=VIDEO_ID',
164
+ segmentDuration: 60, // group into 60-second chunks
165
+ }).load();
166
+
167
+ // GitHub repository
168
+ const githubDocs = await new GitHubLoader({
169
+ owner: 'hazeljs',
170
+ repo: 'hazel',
171
+ directory: 'docs',
172
+ extensions: ['.md'],
173
+ token: process.env.GITHUB_TOKEN,
174
+ }).load();
175
+
176
+ // Directory β€” auto-detects every file type
177
+ const allDocs = await new DirectoryLoader({
178
+ dirPath: './knowledge-base',
179
+ recursive: true,
180
+ extensions: ['.md', '.txt', '.pdf'],
181
+ }).load();
137
182
  ```
138
183
 
139
- ## Vector Stores
140
-
141
- ### Memory Vector Store (Development)
184
+ ### Custom loaders
142
185
 
143
186
  ```typescript
144
- import { MemoryVectorStore, OpenAIEmbeddings } from '@hazeljs/rag';
187
+ import { BaseDocumentLoader, Loader, DocumentLoaderRegistry } from '@hazeljs/rag';
145
188
 
146
- const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY! });
147
- const vectorStore = new MemoryVectorStore(embeddings);
189
+ @Loader({ name: 'NotionLoader', extensions: [] })
190
+ export class NotionLoader extends BaseDocumentLoader {
191
+ constructor(private readonly databaseId: string) { super(); }
192
+
193
+ async load() {
194
+ const pages = await fetchNotionPages(this.databaseId);
195
+ return pages.map(p =>
196
+ this.createDocument(p.content, { source: `notion:${p.id}`, title: p.title }),
197
+ );
198
+ }
199
+ }
200
+
201
+ // Register so DirectoryLoader can auto-detect it
202
+ DocumentLoaderRegistry.register(NotionLoader, (id: string) => new NotionLoader(id));
148
203
  ```
149
204
 
150
- ### Pinecone (Production)
205
+ ---
151
206
 
152
- ```typescript
153
- import { Pinecone } from '@pinecone-database/pinecone';
154
- import { PineconeVectorStore } from '@hazeljs/rag';
207
+ ## GraphRAG
155
208
 
156
- const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY! });
157
- const index = pinecone.index('my-index');
209
+ GraphRAG builds a **knowledge graph** from your documents β€” entities, relationships, and community clusters β€” and enables three complementary search modes that go far beyond cosine similarity.
158
210
 
159
- const vectorStore = new PineconeVectorStore(index, embeddings);
160
- ```
211
+ ### Why GraphRAG?
161
212
 
162
- ### Qdrant
213
+ | Question type | Traditional RAG | GraphRAG |
214
+ |---|---|---|
215
+ | "What does X do?" | βœ… Good | βœ… Excellent (entity traversal) |
216
+ | "How do X and Y relate?" | ❌ Poor | βœ… Excellent (relationships) |
217
+ | "What are the main architectural layers?" | ❌ Poor | βœ… Excellent (community reports) |
218
+ | Multi-document cross-referencing | ❌ Fragmented | βœ… Native |
163
219
 
164
- ```typescript
165
- import { QdrantClient } from '@qdrant/js-client-rest';
166
- import { QdrantVectorStore } from '@hazeljs/rag';
220
+ ### Build the graph
167
221
 
168
- const client = new QdrantClient({ url: 'http://localhost:6333' });
169
- const vectorStore = new QdrantVectorStore(client, embeddings, {
170
- collectionName: 'my-collection',
222
+ ```typescript
223
+ import OpenAI from 'openai';
224
+ import { GraphRAGPipeline, DirectoryLoader } from '@hazeljs/rag';
225
+
226
+ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
227
+
228
+ const graphRag = new GraphRAGPipeline({
229
+ // Provider-agnostic: any LLM that accepts a string prompt
230
+ llm: async (prompt) => {
231
+ const res = await openai.chat.completions.create({
232
+ model: 'gpt-4o-mini',
233
+ temperature: 0,
234
+ messages: [{ role: 'user', content: prompt }],
235
+ });
236
+ return res.choices[0].message.content ?? '';
237
+ },
238
+ extractionChunkSize: 2000, // chars per LLM extraction call
239
+ generateCommunityReports: true, // LLM summaries per community cluster
240
+ maxCommunitySize: 15, // split clusters larger than this
241
+ localSearchDepth: 2, // BFS hops for local search
242
+ localSearchTopK: 5, // seed entities per query
243
+ globalSearchTopK: 5, // community reports for global search
171
244
  });
172
- ```
173
245
 
174
- ## Embedding Providers
246
+ const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
247
+ const stats = await graphRag.build(docs);
248
+ // { documentsProcessed, entitiesExtracted, relationshipsExtracted,
249
+ // communitiesDetected, communityReportsGenerated, duration }
250
+ ```
175
251
 
176
- ### OpenAI
252
+ ### Search modes
177
253
 
178
254
  ```typescript
179
- import { OpenAIEmbeddings } from '@hazeljs/rag';
180
-
181
- const embeddings = new OpenAIEmbeddings({
182
- apiKey: process.env.OPENAI_API_KEY!,
183
- model: 'text-embedding-3-small', // or 'text-embedding-3-large'
184
- dimensions: 1536,
185
- });
255
+ // LOCAL β€” entity-centric, BFS graph traversal
256
+ // Best for: specific questions about named concepts, classes, or technologies
257
+ const local = await graphRag.search(
258
+ 'How does dependency injection work?',
259
+ { mode: 'local' },
260
+ );
261
+ console.log(local.answer);
262
+ console.log(local.entities); // entities found and traversed
263
+ console.log(local.relationships); // evidence relationships
264
+
265
+ // GLOBAL β€” community report ranking
266
+ // Best for: broad thematic questions, architecture overviews
267
+ const global = await graphRag.search(
268
+ 'What are the main architectural layers of this system?',
269
+ { mode: 'global' },
270
+ );
271
+ console.log(global.communities); // ranked community reports used
272
+
273
+ // HYBRID β€” runs both in parallel, single synthesis call (recommended default)
274
+ const result = await graphRag.search('What vector stores does @hazeljs/rag support?');
275
+ // mode defaults to 'hybrid'
276
+ console.log(`${result.mode} search in ${result.duration}ms`);
186
277
  ```
187
278
 
188
- ### Cohere
279
+ ### Incremental updates
189
280
 
190
281
  ```typescript
191
- import { CohereEmbeddings } from '@hazeljs/rag';
192
-
193
- const embeddings = new CohereEmbeddings({
194
- apiKey: process.env.COHERE_API_KEY!,
195
- model: 'embed-english-v3.0',
196
- });
282
+ const newDocs = await new WebLoader({ urls: ['https://hazeljs.com/blog/new'] }).load();
283
+ await graphRag.addDocuments(newDocs);
284
+ // Re-runs community detection and regenerates reports automatically
197
285
  ```
198
286
 
199
- ### HuggingFace
287
+ ### Inspect the graph
200
288
 
201
289
  ```typescript
202
- import { HuggingFaceEmbeddings } from '@hazeljs/rag';
290
+ const graph = graphRag.getGraph();
203
291
 
204
- const embeddings = new HuggingFaceEmbeddings({
205
- apiKey: process.env.HUGGINGFACE_API_KEY!,
206
- model: 'sentence-transformers/all-MiniLM-L6-v2',
207
- });
292
+ // Entities, relationships, community reports
293
+ console.log([...graph.entities.values()].slice(0, 5));
294
+ console.log([...graph.relationships.values()].slice(0, 5));
295
+ console.log([...graph.communityReports.values()].map(r => r.title));
296
+
297
+ // Statistics
298
+ const stats = graphRag.getStats();
299
+ console.log(stats.entityTypeBreakdown); // { TECHNOLOGY: 14, CONCEPT: 12, ... }
300
+ console.log(stats.topEntities.slice(0, 5)); // most-connected entities
208
301
  ```
209
302
 
210
- ## Text Splitting
303
+ ---
304
+
305
+ ## Vector Stores
211
306
 
212
- ### Recursive Text Splitter
307
+ All stores implement the same interface β€” swap them with a one-line change.
213
308
 
214
309
  ```typescript
215
- import { RecursiveTextSplitter } from '@hazeljs/rag';
310
+ import { MemoryVectorStore, OpenAIEmbeddings } from '@hazeljs/rag';
216
311
 
217
- const splitter = new RecursiveTextSplitter({
218
- chunkSize: 1000,
219
- chunkOverlap: 200,
220
- separators: ['\n\n', '\n', '. ', ' ', ''],
221
- });
312
+ // Development
313
+ const vectorStore = new MemoryVectorStore(embeddings);
222
314
 
223
- const chunks = splitter.split(longText);
224
- ```
315
+ // Pinecone (production, serverless)
316
+ import { PineconeVectorStore } from '@hazeljs/rag';
317
+ const vectorStore = new PineconeVectorStore(embeddings, {
318
+ apiKey: process.env.PINECONE_API_KEY,
319
+ indexName: 'my-knowledge-base',
320
+ });
225
321
 
226
- ## Retrieval Strategies
322
+ // Qdrant (high-performance, self-hosted)
323
+ import { QdrantVectorStore } from '@hazeljs/rag';
324
+ const vectorStore = new QdrantVectorStore(embeddings, {
325
+ url: process.env.QDRANT_URL || 'http://localhost:6333',
326
+ collectionName: 'my-collection',
327
+ });
227
328
 
228
- ### Similarity Search (Default)
329
+ // Weaviate (GraphQL, flexible)
330
+ import { WeaviateVectorStore } from '@hazeljs/rag';
331
+ const vectorStore = new WeaviateVectorStore(embeddings, {
332
+ host: process.env.WEAVIATE_HOST || 'http://localhost:8080',
333
+ className: 'MyKnowledgeBase',
334
+ });
229
335
 
230
- ```typescript
231
- const results = await rag.retrieve('query', {
232
- topK: 5,
233
- strategy: RetrievalStrategy.SIMILARITY,
336
+ // ChromaDB (prototyping)
337
+ import { ChromaVectorStore } from '@hazeljs/rag';
338
+ const vectorStore = new ChromaVectorStore(embeddings, {
339
+ url: process.env.CHROMA_URL || 'http://localhost:8000',
340
+ collectionName: 'my-collection',
234
341
  });
235
342
  ```
236
343
 
237
- ### MMR (Maximal Marginal Relevance)
344
+ ### Vector store comparison
238
345
 
239
- Balances relevance and diversity to avoid redundant results:
346
+ | | Memory | Pinecone | Qdrant | Weaviate | ChromaDB |
347
+ |---|:---:|:---:|:---:|:---:|:---:|
348
+ | Setup | None | API Key | Docker | Docker | Docker |
349
+ | Persistence | ❌ | βœ… | βœ… | βœ… | βœ… |
350
+ | Best for | Dev/Test | Production | High-perf | GraphQL | Prototyping |
351
+ | Cost | Free | Paid | OSS | OSS | OSS |
352
+
353
+ ---
354
+
355
+ ## Embedding Providers
240
356
 
241
357
  ```typescript
242
- const results = await rag.retrieve('query', {
243
- topK: 5,
244
- strategy: RetrievalStrategy.MMR,
358
+ import { OpenAIEmbeddings, CohereEmbeddings } from '@hazeljs/rag';
359
+
360
+ // OpenAI
361
+ const openaiEmbed = new OpenAIEmbeddings({
362
+ apiKey: process.env.OPENAI_API_KEY,
363
+ model: 'text-embedding-3-small', // 1536 dims
364
+ // model: 'text-embedding-3-large', // 3072 dims, highest quality
365
+ });
366
+
367
+ // Cohere (multilingual)
368
+ const cohereEmbed = new CohereEmbeddings({
369
+ apiKey: process.env.COHERE_API_KEY,
370
+ model: 'embed-multilingual-v3.0',
245
371
  });
246
372
  ```
247
373
 
248
- ### Hybrid Search
374
+ ---
249
375
 
250
- Combines keyword and semantic search:
376
+ ## Retrieval Strategies
251
377
 
252
378
  ```typescript
253
- const results = await rag.retrieve('query', {
254
- topK: 5,
255
- strategy: RetrievalStrategy.HYBRID,
379
+ import { HybridSearchRetrieval, MultiQueryRetrieval } from '@hazeljs/rag';
380
+
381
+ // Hybrid β€” vector + BM25 keyword fusion
382
+ const hybrid = new HybridSearchRetrieval(vectorStore, {
383
+ vectorWeight: 0.7,
384
+ keywordWeight: 0.3,
385
+ topK: 10,
256
386
  });
387
+ const results = await hybrid.search('machine learning algorithms', { topK: 5 });
388
+
389
+ // Multi-query β€” LLM generates N query variations, deduplicates results
390
+ const multiQuery = new MultiQueryRetrieval(vectorStore, {
391
+ llmApiKey: process.env.OPENAI_API_KEY,
392
+ numQueries: 3,
393
+ topK: 10,
394
+ });
395
+ const results2 = await multiQuery.search('How do I deploy my app?', { topK: 5 });
257
396
  ```
258
397
 
259
- ## Metadata Filtering
398
+ ---
399
+
400
+ ## Text Splitting
260
401
 
261
402
  ```typescript
262
- await rag.addDocuments([
263
- {
264
- content: 'Document 1',
265
- metadata: { category: 'tech', year: 2024 },
266
- },
267
- {
268
- content: 'Document 2',
269
- metadata: { category: 'science', year: 2023 },
270
- },
271
- ]);
403
+ import { RecursiveTextSplitter } from '@hazeljs/rag';
272
404
 
273
- // Filter by metadata
274
- const results = await rag.query('query', {
275
- filter: { category: 'tech', year: 2024 },
405
+ const splitter = new RecursiveTextSplitter({
406
+ chunkSize: 1000, // target chars per chunk
407
+ chunkOverlap: 200, // overlap for context continuity
408
+ separators: ['\n\n', '\n', '. ', ' '],
276
409
  });
410
+
411
+ const chunks = splitter.split(longDocument);
277
412
  ```
278
413
 
279
- ## Advanced Usage
414
+ ---
280
415
 
281
- ### Custom Document Loaders
416
+ ## Memory System
282
417
 
283
418
  ```typescript
284
- import { DocumentLoader, Document } from '@hazeljs/rag';
285
-
286
- class PDFLoader implements DocumentLoader {
287
- constructor(private filePath: string) {}
419
+ import {
420
+ RAGPipelineWithMemory,
421
+ MemoryManager,
422
+ HybridMemory,
423
+ BufferMemory,
424
+ VectorMemory,
425
+ } from '@hazeljs/rag';
288
426
 
289
- async load(): Promise<Document[]> {
290
- // Load and parse PDF
291
- const text = await this.parsePDF(this.filePath);
292
- return [{ content: text, metadata: { source: this.filePath } }];
293
- }
427
+ const buffer = new BufferMemory({ maxSize: 20 });
428
+ const vectorMemory = new VectorMemory(vectorStore, embeddings);
429
+ const memory = new MemoryManager(new HybridMemory(buffer, vectorMemory));
294
430
 
295
- private async parsePDF(path: string): Promise<string> {
296
- // PDF parsing logic
297
- return '';
298
- }
299
- }
431
+ const rag = new RAGPipelineWithMemory(config, memory, llmFunction);
300
432
 
301
- const loader = new PDFLoader('./document.pdf');
302
- const documents = await loader.load();
303
- await rag.addDocuments(documents);
433
+ const response = await rag.queryWithMemory(
434
+ 'What did we discuss about deployment?',
435
+ 'session-123',
436
+ 'user-456',
437
+ );
438
+ console.log(response.answer);
439
+ console.log(response.memories);
304
440
  ```
305
441
 
306
- ### Batch Operations
442
+ ---
307
443
 
308
- ```typescript
309
- // Add multiple documents efficiently
310
- const ids = await rag.addDocuments(documents);
444
+ ## API Reference
311
445
 
312
- // Delete multiple documents
313
- await rag.deleteDocuments(ids);
446
+ ### `GraphRAGPipeline`
314
447
 
315
- // Clear all documents
316
- await rag.clear();
448
+ ```typescript
449
+ class GraphRAGPipeline {
450
+ constructor(config: GraphRAGConfig);
451
+ build(docs: Document[]): Promise<GraphBuildStats>;
452
+ addDocuments(docs: Document[]): Promise<GraphBuildStats>;
453
+ search(query: string, options?: GraphSearchOptions): Promise<GraphSearchResult>;
454
+ getGraph(): KnowledgeGraph;
455
+ getStats(): GraphStats;
456
+ clear(): void;
457
+ }
317
458
  ```
318
459
 
319
- ## API Reference
320
-
321
- ### RAGPipeline
460
+ ### `RAGPipeline`
322
461
 
323
462
  ```typescript
324
463
  class RAGPipeline {
@@ -326,53 +465,35 @@ class RAGPipeline {
326
465
  initialize(): Promise<void>;
327
466
  addDocuments(documents: Document[]): Promise<string[]>;
328
467
  query(query: string, options?: RAGQueryOptions): Promise<RAGResponse>;
329
- retrieve(query: string, options?: QueryOptions, strategy?: RetrievalStrategy): Promise<SearchResult[]>;
468
+ search(query: string, options?: QueryOptions): Promise<SearchResult[]>;
330
469
  deleteDocuments(ids: string[]): Promise<void>;
331
470
  clear(): Promise<void>;
332
471
  }
333
472
  ```
334
473
 
335
- ### Types
474
+ ### `Document`
336
475
 
337
476
  ```typescript
338
477
  interface Document {
339
478
  id?: string;
340
479
  content: string;
341
- metadata?: Record<string, any>;
480
+ metadata?: Record<string, unknown>;
342
481
  embedding?: number[];
343
482
  }
344
-
345
- interface SearchResult {
346
- id: string;
347
- content: string;
348
- metadata?: Record<string, any>;
349
- score: number;
350
- embedding?: number[];
351
- }
352
-
353
- interface RAGResponse {
354
- answer: string;
355
- sources: SearchResult[];
356
- context: string;
357
- }
358
483
  ```
359
484
 
360
- ## Use Cases
485
+ ---
361
486
 
362
- - πŸ“– **Documentation Search** - Semantic search across documentation
363
- - πŸ’¬ **Chatbots** - Context-aware conversational AI
364
- - πŸ” **Knowledge Base** - Internal knowledge management
365
- - πŸ“ **Content Recommendations** - Similar content discovery
366
- - πŸŽ“ **Educational Tools** - Q&A systems with source citations
367
- - 🏒 **Enterprise Search** - Semantic search across company data
487
+ ## Use Cases
368
488
 
369
- ## Performance Tips
489
+ - πŸ“– **Documentation Q&A** β€” Index all your docs and answer developer questions
490
+ - πŸ•ΈοΈ **Codebase Understanding** β€” GraphRAG over a repo to explain architecture and dependencies
491
+ - πŸ’¬ **Context-Aware Chatbots** β€” RAG + memory for multi-turn conversations
492
+ - πŸ” **Enterprise Knowledge Base** β€” Combine web, GitHub, PDFs, and internal wikis
493
+ - πŸŽ“ **Research Assistants** β€” Multi-document reasoning with knowledge graph traversal
494
+ - πŸ“ **Content Intelligence** β€” Semantic search + relationship mapping across articles
370
495
 
371
- 1. **Batch Operations** - Add documents in batches for better performance
372
- 2. **Chunk Size** - Balance between context and precision (500-1500 tokens)
373
- 3. **Overlap** - Use 10-20% overlap for better context continuity
374
- 4. **Caching** - Cache embeddings for frequently accessed documents
375
- 5. **Filtering** - Use metadata filters to reduce search space
496
+ ---
376
497
 
377
498
  ## License
378
499
 
@@ -380,4 +501,4 @@ Apache 2.0
380
501
 
381
502
  ## Contributing
382
503
 
383
- Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for details.
504
+ Contributions are welcome! See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details.