@hazeljs/rag 0.2.0-beta.8 β†’ 0.2.0-beta.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/LICENSE +192 -21
  2. package/README.md +348 -223
  3. package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
  4. package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
  5. package/dist/__tests__/graph/community-detector.test.js +87 -0
  6. package/dist/__tests__/graph/community-detector.test.js.map +1 -0
  7. package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
  8. package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
  9. package/dist/__tests__/graph/community-summarizer.test.js +131 -0
  10. package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
  11. package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
  12. package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
  13. package/dist/__tests__/graph/entity-extractor.test.js +129 -0
  14. package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
  15. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
  16. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
  17. package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
  18. package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
  19. package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
  20. package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
  21. package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
  22. package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
  23. package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
  24. package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
  25. package/dist/__tests__/loaders/base.loader.test.js +114 -0
  26. package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
  27. package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
  28. package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
  29. package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
  30. package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
  31. package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
  32. package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
  33. package/dist/__tests__/loaders/directory.loader.test.js +154 -0
  34. package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
  35. package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
  36. package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
  37. package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
  38. package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
  39. package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
  40. package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
  41. package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
  42. package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
  43. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
  44. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
  45. package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
  46. package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
  47. package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
  48. package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
  49. package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
  50. package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
  51. package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
  52. package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
  53. package/dist/__tests__/rag-pipeline.test.js +210 -0
  54. package/dist/__tests__/rag-pipeline.test.js.map +1 -0
  55. package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
  56. package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
  57. package/dist/__tests__/retrieval/bm25.test.js +86 -0
  58. package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
  59. package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
  60. package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
  61. package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
  62. package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
  63. package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
  64. package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
  65. package/dist/__tests__/retrieval/multi-query.test.js +90 -0
  66. package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
  67. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
  68. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
  69. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
  70. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
  71. package/dist/__tests__/utils/similarity.test.d.ts +2 -0
  72. package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
  73. package/dist/__tests__/utils/similarity.test.js +47 -0
  74. package/dist/__tests__/utils/similarity.test.js.map +1 -0
  75. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts +1 -0
  76. package/dist/agentic/decorators/adaptive-retrieval.decorator.d.ts.map +1 -1
  77. package/dist/agentic/decorators/adaptive-retrieval.decorator.js +4 -15
  78. package/dist/agentic/decorators/adaptive-retrieval.decorator.js.map +1 -1
  79. package/dist/agentic/decorators/corrective-rag.decorator.d.ts +1 -0
  80. package/dist/agentic/decorators/corrective-rag.decorator.d.ts.map +1 -1
  81. package/dist/agentic/decorators/corrective-rag.decorator.js +7 -11
  82. package/dist/agentic/decorators/corrective-rag.decorator.js.map +1 -1
  83. package/dist/agentic/decorators/hyde.decorator.d.ts +1 -0
  84. package/dist/agentic/decorators/hyde.decorator.d.ts.map +1 -1
  85. package/dist/agentic/decorators/hyde.decorator.js +7 -6
  86. package/dist/agentic/decorators/hyde.decorator.js.map +1 -1
  87. package/dist/agentic/decorators/multi-hop.decorator.d.ts +1 -0
  88. package/dist/agentic/decorators/multi-hop.decorator.d.ts.map +1 -1
  89. package/dist/agentic/decorators/multi-hop.decorator.js +8 -19
  90. package/dist/agentic/decorators/multi-hop.decorator.js.map +1 -1
  91. package/dist/agentic/decorators/query-planner.decorator.d.ts +1 -0
  92. package/dist/agentic/decorators/query-planner.decorator.d.ts.map +1 -1
  93. package/dist/agentic/decorators/query-planner.decorator.js +4 -18
  94. package/dist/agentic/decorators/query-planner.decorator.js.map +1 -1
  95. package/dist/agentic/decorators/query-rewriter.decorator.d.ts +1 -0
  96. package/dist/agentic/decorators/query-rewriter.decorator.d.ts.map +1 -1
  97. package/dist/agentic/decorators/query-rewriter.decorator.js +8 -5
  98. package/dist/agentic/decorators/query-rewriter.decorator.js.map +1 -1
  99. package/dist/agentic/decorators/self-reflective.decorator.d.ts +2 -0
  100. package/dist/agentic/decorators/self-reflective.decorator.d.ts.map +1 -1
  101. package/dist/agentic/decorators/self-reflective.decorator.js +11 -32
  102. package/dist/agentic/decorators/self-reflective.decorator.js.map +1 -1
  103. package/dist/agentic/index.d.ts +1 -1
  104. package/dist/agentic/index.d.ts.map +1 -1
  105. package/dist/agentic/types.d.ts +3 -2
  106. package/dist/agentic/types.d.ts.map +1 -1
  107. package/dist/graph/community-detector.d.ts +45 -0
  108. package/dist/graph/community-detector.d.ts.map +1 -0
  109. package/dist/graph/community-detector.js +153 -0
  110. package/dist/graph/community-detector.js.map +1 -0
  111. package/dist/graph/community-summarizer.d.ts +41 -0
  112. package/dist/graph/community-summarizer.d.ts.map +1 -0
  113. package/dist/graph/community-summarizer.js +119 -0
  114. package/dist/graph/community-summarizer.js.map +1 -0
  115. package/dist/graph/entity-extractor.d.ts +47 -0
  116. package/dist/graph/entity-extractor.d.ts.map +1 -0
  117. package/dist/graph/entity-extractor.js +224 -0
  118. package/dist/graph/entity-extractor.js.map +1 -0
  119. package/dist/graph/graph-rag-pipeline.d.ts +83 -0
  120. package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
  121. package/dist/graph/graph-rag-pipeline.js +390 -0
  122. package/dist/graph/graph-rag-pipeline.js.map +1 -0
  123. package/dist/graph/graph.types.d.ts +186 -0
  124. package/dist/graph/graph.types.d.ts.map +1 -0
  125. package/dist/graph/graph.types.js +20 -0
  126. package/dist/graph/graph.types.js.map +1 -0
  127. package/dist/graph/index.d.ts +15 -0
  128. package/dist/graph/index.d.ts.map +1 -0
  129. package/dist/graph/index.js +31 -0
  130. package/dist/graph/index.js.map +1 -0
  131. package/dist/graph/knowledge-graph.d.ts +57 -0
  132. package/dist/graph/knowledge-graph.d.ts.map +1 -0
  133. package/dist/graph/knowledge-graph.js +198 -0
  134. package/dist/graph/knowledge-graph.js.map +1 -0
  135. package/dist/index.d.ts +2 -0
  136. package/dist/index.d.ts.map +1 -1
  137. package/dist/index.js +4 -0
  138. package/dist/index.js.map +1 -1
  139. package/dist/loaders/base.loader.d.ts +108 -0
  140. package/dist/loaders/base.loader.d.ts.map +1 -0
  141. package/dist/loaders/base.loader.js +123 -0
  142. package/dist/loaders/base.loader.js.map +1 -0
  143. package/dist/loaders/csv-file.loader.d.ts +61 -0
  144. package/dist/loaders/csv-file.loader.d.ts.map +1 -0
  145. package/dist/loaders/csv-file.loader.js +162 -0
  146. package/dist/loaders/csv-file.loader.js.map +1 -0
  147. package/dist/loaders/directory.loader.d.ts +67 -0
  148. package/dist/loaders/directory.loader.d.ts.map +1 -0
  149. package/dist/loaders/directory.loader.js +163 -0
  150. package/dist/loaders/directory.loader.js.map +1 -0
  151. package/dist/loaders/docx.loader.d.ts +52 -0
  152. package/dist/loaders/docx.loader.d.ts.map +1 -0
  153. package/dist/loaders/docx.loader.js +110 -0
  154. package/dist/loaders/docx.loader.js.map +1 -0
  155. package/dist/loaders/github.loader.d.ts +114 -0
  156. package/dist/loaders/github.loader.d.ts.map +1 -0
  157. package/dist/loaders/github.loader.js +217 -0
  158. package/dist/loaders/github.loader.js.map +1 -0
  159. package/dist/loaders/html-file.loader.d.ts +55 -0
  160. package/dist/loaders/html-file.loader.d.ts.map +1 -0
  161. package/dist/loaders/html-file.loader.js +170 -0
  162. package/dist/loaders/html-file.loader.js.map +1 -0
  163. package/dist/loaders/index.d.ts +52 -0
  164. package/dist/loaders/index.d.ts.map +1 -0
  165. package/dist/loaders/index.js +61 -0
  166. package/dist/loaders/index.js.map +1 -0
  167. package/dist/loaders/json-file.loader.d.ts +51 -0
  168. package/dist/loaders/json-file.loader.d.ts.map +1 -0
  169. package/dist/loaders/json-file.loader.js +100 -0
  170. package/dist/loaders/json-file.loader.js.map +1 -0
  171. package/dist/loaders/markdown-file.loader.d.ts +61 -0
  172. package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
  173. package/dist/loaders/markdown-file.loader.js +148 -0
  174. package/dist/loaders/markdown-file.loader.js.map +1 -0
  175. package/dist/loaders/pdf.loader.d.ts +64 -0
  176. package/dist/loaders/pdf.loader.d.ts.map +1 -0
  177. package/dist/loaders/pdf.loader.js +163 -0
  178. package/dist/loaders/pdf.loader.js.map +1 -0
  179. package/dist/loaders/text-file.loader.d.ts +39 -0
  180. package/dist/loaders/text-file.loader.d.ts.map +1 -0
  181. package/dist/loaders/text-file.loader.js +69 -0
  182. package/dist/loaders/text-file.loader.js.map +1 -0
  183. package/dist/loaders/web.loader.d.ts +87 -0
  184. package/dist/loaders/web.loader.d.ts.map +1 -0
  185. package/dist/loaders/web.loader.js +194 -0
  186. package/dist/loaders/web.loader.js.map +1 -0
  187. package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
  188. package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
  189. package/dist/loaders/youtube-transcript.loader.js +254 -0
  190. package/dist/loaders/youtube-transcript.loader.js.map +1 -0
  191. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts +8 -0
  192. package/dist/prompts/agentic/adaptive-retrieval.prompt.d.ts.map +1 -0
  193. package/dist/prompts/agentic/adaptive-retrieval.prompt.js +27 -0
  194. package/dist/prompts/agentic/adaptive-retrieval.prompt.js.map +1 -0
  195. package/dist/prompts/agentic/corrective-rag.prompt.d.ts +9 -0
  196. package/dist/prompts/agentic/corrective-rag.prompt.d.ts.map +1 -0
  197. package/dist/prompts/agentic/corrective-rag.prompt.js +23 -0
  198. package/dist/prompts/agentic/corrective-rag.prompt.js.map +1 -0
  199. package/dist/prompts/agentic/hyde.prompt.d.ts +9 -0
  200. package/dist/prompts/agentic/hyde.prompt.d.ts.map +1 -0
  201. package/dist/prompts/agentic/hyde.prompt.js +18 -0
  202. package/dist/prompts/agentic/hyde.prompt.js.map +1 -0
  203. package/dist/prompts/agentic/multi-hop.prompt.d.ts +15 -0
  204. package/dist/prompts/agentic/multi-hop.prompt.d.ts.map +1 -0
  205. package/dist/prompts/agentic/multi-hop.prompt.js +38 -0
  206. package/dist/prompts/agentic/multi-hop.prompt.js.map +1 -0
  207. package/dist/prompts/agentic/query-planner.prompt.d.ts +8 -0
  208. package/dist/prompts/agentic/query-planner.prompt.d.ts.map +1 -0
  209. package/dist/prompts/agentic/query-planner.prompt.js +30 -0
  210. package/dist/prompts/agentic/query-planner.prompt.js.map +1 -0
  211. package/dist/prompts/agentic/query-rewriter.prompt.d.ts +10 -0
  212. package/dist/prompts/agentic/query-rewriter.prompt.d.ts.map +1 -0
  213. package/dist/prompts/agentic/query-rewriter.prompt.js +17 -0
  214. package/dist/prompts/agentic/query-rewriter.prompt.js.map +1 -0
  215. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts +10 -0
  216. package/dist/prompts/agentic/self-reflective-improve.prompt.d.ts.map +1 -0
  217. package/dist/prompts/agentic/self-reflective-improve.prompt.js +24 -0
  218. package/dist/prompts/agentic/self-reflective-improve.prompt.js.map +1 -0
  219. package/dist/prompts/agentic/self-reflective.prompt.d.ts +9 -0
  220. package/dist/prompts/agentic/self-reflective.prompt.d.ts.map +1 -0
  221. package/dist/prompts/agentic/self-reflective.prompt.js +32 -0
  222. package/dist/prompts/agentic/self-reflective.prompt.js.map +1 -0
  223. package/dist/prompts/community-summary.prompt.d.ts +9 -0
  224. package/dist/prompts/community-summary.prompt.d.ts.map +1 -0
  225. package/dist/prompts/community-summary.prompt.js +30 -0
  226. package/dist/prompts/community-summary.prompt.js.map +1 -0
  227. package/dist/prompts/entity-extraction.prompt.d.ts +10 -0
  228. package/dist/prompts/entity-extraction.prompt.d.ts.map +1 -0
  229. package/dist/prompts/entity-extraction.prompt.js +39 -0
  230. package/dist/prompts/entity-extraction.prompt.js.map +1 -0
  231. package/dist/prompts/graph-search.prompt.d.ts +10 -0
  232. package/dist/prompts/graph-search.prompt.d.ts.map +1 -0
  233. package/dist/prompts/graph-search.prompt.js +23 -0
  234. package/dist/prompts/graph-search.prompt.js.map +1 -0
  235. package/dist/prompts/index.d.ts +13 -0
  236. package/dist/prompts/index.d.ts.map +1 -0
  237. package/dist/prompts/index.js +29 -0
  238. package/dist/prompts/index.js.map +1 -0
  239. package/dist/prompts/rag-answer.prompt.d.ts +9 -0
  240. package/dist/prompts/rag-answer.prompt.d.ts.map +1 -0
  241. package/dist/prompts/rag-answer.prompt.js +20 -0
  242. package/dist/prompts/rag-answer.prompt.js.map +1 -0
  243. package/dist/rag.service.d.ts +1 -0
  244. package/dist/rag.service.d.ts.map +1 -1
  245. package/dist/rag.service.js +8 -9
  246. package/dist/rag.service.js.map +1 -1
  247. package/dist/vector-stores/qdrant.store.d.ts +2 -0
  248. package/dist/vector-stores/qdrant.store.d.ts.map +1 -1
  249. package/dist/vector-stores/qdrant.store.js +1 -0
  250. package/dist/vector-stores/qdrant.store.js.map +1 -1
  251. package/package.json +64 -6
package/README.md CHANGED
@@ -1,19 +1,27 @@
1
1
  # @hazeljs/rag
2
2
 
3
- **Retrieval-Augmented Generation (RAG) and Vector Search for HazelJS**
3
+ **Your docs. Your data. AI that actually knows them.**
4
4
 
5
- Build powerful AI applications with semantic search, document retrieval, and LLM-augmented responses.
5
+ Load documents from any source, build a knowledge graph, embed into vector stores, and retrieve answers with semantic, hybrid, or graph-based search. Full RAG + GraphRAG pipeline β€” no PhD required.
6
+
7
+ [![npm version](https://img.shields.io/npm/v/@hazeljs/rag.svg)](https://www.npmjs.com/package/@hazeljs/rag)
8
+ [![npm downloads](https://img.shields.io/npm/dm/@hazeljs/rag)](https://www.npmjs.com/package/@hazeljs/rag)
9
+ [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0)
6
10
 
7
11
  ## Features
8
12
 
9
- - πŸ” **Vector Search** - Semantic similarity search using embeddings
10
- - πŸ“š **Document Management** - Load, split, and index documents
11
- - πŸ€– **RAG Pipeline** - Complete retrieval-augmented generation workflow
12
- - 🎯 **Multiple Strategies** - Similarity, MMR (Maximal Marginal Relevance), Hybrid search
13
- - πŸ”Œ **Pluggable Backends** - Support for Pinecone, Weaviate, Qdrant, ChromaDB, and in-memory
14
- - 🌐 **Multiple Embedding Providers** - OpenAI, Cohere, HuggingFace
15
- - βœ‚οΈ **Smart Text Splitting** - Recursive text splitter with overlap
16
- - πŸ“Š **Metadata Filtering** - Filter results by custom metadata
13
+ - πŸ“‚ **11 Document Loaders** β€” TXT, Markdown, JSON, CSV, HTML, PDF, DOCX, web scraping, YouTube transcripts, GitHub repos, and inline text. All return the same `Document[]` interface.
14
+ - πŸ•ΈοΈ **GraphRAG** β€” Extract entities and relationships from documents, build a knowledge graph, detect communities, and answer questions with entity-centric (local), thematic (global), or hybrid search.
15
+ - πŸ” **Vector Search** β€” Semantic similarity search with configurable embeddings and vector stores
16
+ - πŸ€– **RAG Pipeline** β€” Complete load β†’ split β†’ embed β†’ retrieve β†’ augment workflow
17
+ - 🎯 **Multiple Strategies** β€” Similarity, Hybrid (vector + BM25), Multi-Query retrieval
18
+ - πŸ”Œ **5 Vector Stores** β€” Memory, Pinecone, Qdrant, Weaviate, ChromaDB (unified interface)
19
+ - 🌐 **Embedding Providers** β€” OpenAI and Cohere, easily extensible
20
+ - βœ‚οΈ **Smart Text Splitting** β€” Recursive, character, and token splitters
21
+ - πŸ“Š **Metadata Filtering** β€” Filter results by any metadata field
22
+ - 🧠 **Memory System** β€” Conversation history, entity memory, fact storage, working memory
23
+
24
+ ---
17
25
 
18
26
  ## Installation
19
27
 
@@ -21,28 +29,34 @@ Build powerful AI applications with semantic search, document retrieval, and LLM
21
29
  npm install @hazeljs/rag
22
30
  ```
23
31
 
24
- ### Optional Peer Dependencies
32
+ ### Optional peer dependencies
25
33
 
26
- Install the vector store and embedding provider you want to use:
34
+ Install only what you need:
27
35
 
28
36
  ```bash
29
- # OpenAI Embeddings
37
+ # LLM (required for GraphRAG and RAG query synthesis)
30
38
  npm install openai
31
39
 
32
- # Vector Stores (choose one or more)
33
- npm install @pinecone-database/pinecone # Pinecone
34
- npm install weaviate-ts-client # Weaviate
40
+ # Vector stores
41
+ npm install @pinecone-database/pinecone # Pinecone
35
42
  npm install @qdrant/js-client-rest # Qdrant
43
+ npm install weaviate-ts-client # Weaviate
36
44
  npm install chromadb # ChromaDB
37
45
 
38
- # Additional Embedding Providers
39
- npm install cohere-ai # Cohere
40
- npm install @huggingface/inference # HuggingFace
46
+ # Alternative embedding providers
47
+ npm install cohere-ai
48
+
49
+ # Document loaders
50
+ npm install pdf-parse # PdfLoader
51
+ npm install mammoth # DocxLoader
52
+ npm install cheerio # HtmlFileLoader / WebLoader CSS selectors
41
53
  ```
42
54
 
55
+ ---
56
+
43
57
  ## Quick Start
44
58
 
45
- ### Basic RAG Pipeline
59
+ ### Basic RAG pipeline
46
60
 
47
61
  ```typescript
48
62
  import {
@@ -50,271 +64,400 @@ import {
50
64
  MemoryVectorStore,
51
65
  OpenAIEmbeddings,
52
66
  RecursiveTextSplitter,
67
+ DirectoryLoader,
53
68
  } from '@hazeljs/rag';
54
69
 
55
- // 1. Setup embedding provider
56
- const embeddings = new OpenAIEmbeddings({
57
- apiKey: process.env.OPENAI_API_KEY!,
58
- model: 'text-embedding-3-small',
59
- });
60
-
61
- // 2. Setup vector store
70
+ const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY });
62
71
  const vectorStore = new MemoryVectorStore(embeddings);
63
72
 
64
- // 3. Setup text splitter
65
- const textSplitter = new RecursiveTextSplitter({
66
- chunkSize: 1000,
67
- chunkOverlap: 200,
68
- });
69
-
70
- // 4. Create RAG pipeline
71
73
  const rag = new RAGPipeline({
72
74
  vectorStore,
73
75
  embeddingProvider: embeddings,
74
- textSplitter,
76
+ textSplitter: new RecursiveTextSplitter({ chunkSize: 800, chunkOverlap: 150 }),
75
77
  topK: 5,
76
78
  });
77
-
78
- // 5. Initialize
79
79
  await rag.initialize();
80
80
 
81
- // 6. Add documents
82
- await rag.addDocuments([
83
- {
84
- content: 'HazelJS is a modern TypeScript framework for building scalable applications.',
85
- metadata: { source: 'docs', category: 'intro' },
86
- },
87
- {
88
- content: 'The framework includes built-in support for microservices, caching, and AI.',
89
- metadata: { source: 'docs', category: 'features' },
90
- },
91
- ]);
92
-
93
- // 7. Query
94
- const result = await rag.query('What is HazelJS?', {
95
- topK: 3,
96
- filter: { source: 'docs' },
97
- });
81
+ // Load from disk β€” auto-detects file types
82
+ const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
83
+ await rag.addDocuments(docs);
98
84
 
85
+ const result = await rag.query('What is HazelJS?', { topK: 3 });
99
86
  console.log(result.answer);
100
87
  console.log(result.sources);
101
88
  ```
102
89
 
103
- ### With LLM Integration
104
-
105
- ```typescript
106
- import OpenAI from 'openai';
90
+ ---
107
91
 
108
- const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY! });
92
+ ## Document Loaders
109
93
 
110
- // Create LLM function
111
- const llmFunction = async (prompt: string) => {
112
- const response = await openai.chat.completions.create({
113
- model: 'gpt-4',
114
- messages: [{ role: 'user', content: prompt }],
115
- });
116
- return response.choices[0].message.content || '';
117
- };
94
+ Every loader extends `BaseDocumentLoader` and returns `Document[]` ready for chunking and indexing.
118
95
 
119
- // Create RAG pipeline with LLM
120
- const rag = new RAGPipeline(config, llmFunction);
96
+ ### Built-in loaders
121
97
 
122
- // Query with custom prompt
123
- const result = await rag.query('What is HazelJS?', {
124
- llmPrompt: `Based on the following context, answer the question.
98
+ | Loader | Source | Extra install |
99
+ |--------|--------|:---:|
100
+ | `TextFileLoader` | `.txt` files | β€” |
101
+ | `MarkdownFileLoader` | `.md` / `.mdx` with heading splits and YAML front-matter | β€” |
102
+ | `JSONFileLoader` | `.json` with `textKey` / JSON Pointer extraction | β€” |
103
+ | `CSVFileLoader` | `.csv` rows mapped to documents | β€” |
104
+ | `HtmlFileLoader` | `.html` tag stripping; optional CSS selector via cheerio | opt. |
105
+ | `DirectoryLoader` | Recursive walk; auto-detects loader by extension | β€” |
106
+ | `PdfLoader` | PDFs; split by page or full document | `pdf-parse` |
107
+ | `DocxLoader` | Word documents; plain text or HTML output | `mammoth` |
108
+ | `WebLoader` | HTTP scraping with retry/timeout; optional CSS selector | opt. |
109
+ | `YouTubeTranscriptLoader` | YouTube transcripts; no API key; segment by duration | β€” |
110
+ | `GitHubLoader` | GitHub REST API; filter by path, extension, `maxFiles` | β€” |
125
111
 
126
- Context:
127
- {context}
112
+ ### Examples
128
113
 
129
- Question: {query}
114
+ ```typescript
115
+ import {
116
+ TextFileLoader,
117
+ MarkdownFileLoader,
118
+ JSONFileLoader,
119
+ CSVFileLoader,
120
+ PdfLoader,
121
+ DocxLoader,
122
+ WebLoader,
123
+ YouTubeTranscriptLoader,
124
+ GitHubLoader,
125
+ DirectoryLoader,
126
+ } from '@hazeljs/rag';
130
127
 
131
- Answer:`,
132
- });
128
+ // Plain text
129
+ const textDocs = await new TextFileLoader({ filePath: './notes.txt' }).load();
130
+
131
+ // Markdown β€” one document per heading section
132
+ const mdDocs = await new MarkdownFileLoader({
133
+ filePath: './guide.md',
134
+ splitByHeading: true,
135
+ parseYamlFrontMatter: true,
136
+ }).load();
137
+
138
+ // JSON β€” extract the 'body' field from each element
139
+ const jsonDocs = await new JSONFileLoader({ filePath: './articles.json', textKey: 'body' }).load();
140
+
141
+ // CSV β€” map columns to content / metadata
142
+ const csvDocs = await new CSVFileLoader({
143
+ filePath: './faqs.csv',
144
+ contentColumns: ['question', 'answer'],
145
+ metadataColumns: ['category'],
146
+ }).load();
147
+
148
+ // PDF β€” one document per page
149
+ const pdfDocs = await new PdfLoader({ filePath: './report.pdf', splitByPage: true }).load();
150
+
151
+ // DOCX
152
+ const wordDocs = await new DocxLoader({ filePath: './agreement.docx' }).load();
153
+
154
+ // Web scraping
155
+ const webDocs = await new WebLoader({
156
+ urls: ['https://hazeljs.com/docs', 'https://hazeljs.com/blog'],
157
+ timeout: 10_000,
158
+ maxRetries: 3,
159
+ }).load();
160
+
161
+ // YouTube transcript (no API key needed)
162
+ const ytDocs = await new YouTubeTranscriptLoader({
163
+ videoUrl: 'https://www.youtube.com/watch?v=VIDEO_ID',
164
+ segmentDuration: 60, // group into 60-second chunks
165
+ }).load();
166
+
167
+ // GitHub repository
168
+ const githubDocs = await new GitHubLoader({
169
+ owner: 'hazeljs',
170
+ repo: 'hazel',
171
+ directory: 'docs',
172
+ extensions: ['.md'],
173
+ token: process.env.GITHUB_TOKEN,
174
+ }).load();
175
+
176
+ // Directory β€” auto-detects every file type
177
+ const allDocs = await new DirectoryLoader({
178
+ dirPath: './knowledge-base',
179
+ recursive: true,
180
+ extensions: ['.md', '.txt', '.pdf'],
181
+ }).load();
133
182
  ```
134
183
 
135
- ## Vector Stores
136
-
137
- ### Memory Vector Store (Development)
184
+ ### Custom loaders
138
185
 
139
186
  ```typescript
140
- import { MemoryVectorStore, OpenAIEmbeddings } from '@hazeljs/rag';
187
+ import { BaseDocumentLoader, Loader, DocumentLoaderRegistry } from '@hazeljs/rag';
141
188
 
142
- const embeddings = new OpenAIEmbeddings({ apiKey: process.env.OPENAI_API_KEY! });
143
- const vectorStore = new MemoryVectorStore(embeddings);
189
+ @Loader({ name: 'NotionLoader', extensions: [] })
190
+ export class NotionLoader extends BaseDocumentLoader {
191
+ constructor(private readonly databaseId: string) { super(); }
192
+
193
+ async load() {
194
+ const pages = await fetchNotionPages(this.databaseId);
195
+ return pages.map(p =>
196
+ this.createDocument(p.content, { source: `notion:${p.id}`, title: p.title }),
197
+ );
198
+ }
199
+ }
200
+
201
+ // Register so DirectoryLoader can auto-detect it
202
+ DocumentLoaderRegistry.register(NotionLoader, (id: string) => new NotionLoader(id));
144
203
  ```
145
204
 
146
- ### Pinecone (Production)
205
+ ---
147
206
 
148
- ```typescript
149
- import { Pinecone } from '@pinecone-database/pinecone';
150
- import { PineconeVectorStore } from '@hazeljs/rag';
207
+ ## GraphRAG
151
208
 
152
- const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY! });
153
- const index = pinecone.index('my-index');
209
+ GraphRAG builds a **knowledge graph** from your documents β€” entities, relationships, and community clusters β€” and enables three complementary search modes that go far beyond cosine similarity.
154
210
 
155
- const vectorStore = new PineconeVectorStore(index, embeddings);
156
- ```
211
+ ### Why GraphRAG?
157
212
 
158
- ### Qdrant
213
+ | Question type | Traditional RAG | GraphRAG |
214
+ |---|---|---|
215
+ | "What does X do?" | βœ… Good | βœ… Excellent (entity traversal) |
216
+ | "How do X and Y relate?" | ❌ Poor | βœ… Excellent (relationships) |
217
+ | "What are the main architectural layers?" | ❌ Poor | βœ… Excellent (community reports) |
218
+ | Multi-document cross-referencing | ❌ Fragmented | βœ… Native |
159
219
 
160
- ```typescript
161
- import { QdrantClient } from '@qdrant/js-client-rest';
162
- import { QdrantVectorStore } from '@hazeljs/rag';
220
+ ### Build the graph
163
221
 
164
- const client = new QdrantClient({ url: 'http://localhost:6333' });
165
- const vectorStore = new QdrantVectorStore(client, embeddings, {
166
- collectionName: 'my-collection',
222
+ ```typescript
223
+ import OpenAI from 'openai';
224
+ import { GraphRAGPipeline, DirectoryLoader } from '@hazeljs/rag';
225
+
226
+ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
227
+
228
+ const graphRag = new GraphRAGPipeline({
229
+ // Provider-agnostic: any LLM that accepts a string prompt
230
+ llm: async (prompt) => {
231
+ const res = await openai.chat.completions.create({
232
+ model: 'gpt-4o-mini',
233
+ temperature: 0,
234
+ messages: [{ role: 'user', content: prompt }],
235
+ });
236
+ return res.choices[0].message.content ?? '';
237
+ },
238
+ extractionChunkSize: 2000, // chars per LLM extraction call
239
+ generateCommunityReports: true, // LLM summaries per community cluster
240
+ maxCommunitySize: 15, // split clusters larger than this
241
+ localSearchDepth: 2, // BFS hops for local search
242
+ localSearchTopK: 5, // seed entities per query
243
+ globalSearchTopK: 5, // community reports for global search
167
244
  });
168
- ```
169
245
 
170
- ## Embedding Providers
246
+ const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
247
+ const stats = await graphRag.build(docs);
248
+ // { documentsProcessed, entitiesExtracted, relationshipsExtracted,
249
+ // communitiesDetected, communityReportsGenerated, duration }
250
+ ```
171
251
 
172
- ### OpenAI
252
+ ### Search modes
173
253
 
174
254
  ```typescript
175
- import { OpenAIEmbeddings } from '@hazeljs/rag';
176
-
177
- const embeddings = new OpenAIEmbeddings({
178
- apiKey: process.env.OPENAI_API_KEY!,
179
- model: 'text-embedding-3-small', // or 'text-embedding-3-large'
180
- dimensions: 1536,
181
- });
255
+ // LOCAL β€” entity-centric, BFS graph traversal
256
+ // Best for: specific questions about named concepts, classes, or technologies
257
+ const local = await graphRag.search(
258
+ 'How does dependency injection work?',
259
+ { mode: 'local' },
260
+ );
261
+ console.log(local.answer);
262
+ console.log(local.entities); // entities found and traversed
263
+ console.log(local.relationships); // evidence relationships
264
+
265
+ // GLOBAL β€” community report ranking
266
+ // Best for: broad thematic questions, architecture overviews
267
+ const global = await graphRag.search(
268
+ 'What are the main architectural layers of this system?',
269
+ { mode: 'global' },
270
+ );
271
+ console.log(global.communities); // ranked community reports used
272
+
273
+ // HYBRID β€” runs both in parallel, single synthesis call (recommended default)
274
+ const result = await graphRag.search('What vector stores does @hazeljs/rag support?');
275
+ // mode defaults to 'hybrid'
276
+ console.log(`${result.mode} search in ${result.duration}ms`);
182
277
  ```
183
278
 
184
- ### Cohere
279
+ ### Incremental updates
185
280
 
186
281
  ```typescript
187
- import { CohereEmbeddings } from '@hazeljs/rag';
188
-
189
- const embeddings = new CohereEmbeddings({
190
- apiKey: process.env.COHERE_API_KEY!,
191
- model: 'embed-english-v3.0',
192
- });
282
+ const newDocs = await new WebLoader({ urls: ['https://hazeljs.com/blog/new'] }).load();
283
+ await graphRag.addDocuments(newDocs);
284
+ // Re-runs community detection and regenerates reports automatically
193
285
  ```
194
286
 
195
- ### HuggingFace
287
+ ### Inspect the graph
196
288
 
197
289
  ```typescript
198
- import { HuggingFaceEmbeddings } from '@hazeljs/rag';
290
+ const graph = graphRag.getGraph();
199
291
 
200
- const embeddings = new HuggingFaceEmbeddings({
201
- apiKey: process.env.HUGGINGFACE_API_KEY!,
202
- model: 'sentence-transformers/all-MiniLM-L6-v2',
203
- });
292
+ // Entities, relationships, community reports
293
+ console.log([...graph.entities.values()].slice(0, 5));
294
+ console.log([...graph.relationships.values()].slice(0, 5));
295
+ console.log([...graph.communityReports.values()].map(r => r.title));
296
+
297
+ // Statistics
298
+ const stats = graphRag.getStats();
299
+ console.log(stats.entityTypeBreakdown); // { TECHNOLOGY: 14, CONCEPT: 12, ... }
300
+ console.log(stats.topEntities.slice(0, 5)); // most-connected entities
204
301
  ```
205
302
 
206
- ## Text Splitting
303
+ ---
304
+
305
+ ## Vector Stores
207
306
 
208
- ### Recursive Text Splitter
307
+ All stores implement the same interface β€” swap them with a one-line change.
209
308
 
210
309
  ```typescript
211
- import { RecursiveTextSplitter } from '@hazeljs/rag';
310
+ import { MemoryVectorStore, OpenAIEmbeddings } from '@hazeljs/rag';
212
311
 
213
- const splitter = new RecursiveTextSplitter({
214
- chunkSize: 1000,
215
- chunkOverlap: 200,
216
- separators: ['\n\n', '\n', '. ', ' ', ''],
217
- });
312
+ // Development
313
+ const vectorStore = new MemoryVectorStore(embeddings);
218
314
 
219
- const chunks = splitter.split(longText);
220
- ```
315
+ // Pinecone (production, serverless)
316
+ import { PineconeVectorStore } from '@hazeljs/rag';
317
+ const vectorStore = new PineconeVectorStore(embeddings, {
318
+ apiKey: process.env.PINECONE_API_KEY,
319
+ indexName: 'my-knowledge-base',
320
+ });
221
321
 
222
- ## Retrieval Strategies
322
+ // Qdrant (high-performance, self-hosted)
323
+ import { QdrantVectorStore } from '@hazeljs/rag';
324
+ const vectorStore = new QdrantVectorStore(embeddings, {
325
+ url: process.env.QDRANT_URL || 'http://localhost:6333',
326
+ collectionName: 'my-collection',
327
+ });
223
328
 
224
- ### Similarity Search (Default)
329
+ // Weaviate (GraphQL, flexible)
330
+ import { WeaviateVectorStore } from '@hazeljs/rag';
331
+ const vectorStore = new WeaviateVectorStore(embeddings, {
332
+ host: process.env.WEAVIATE_HOST || 'http://localhost:8080',
333
+ className: 'MyKnowledgeBase',
334
+ });
225
335
 
226
- ```typescript
227
- const results = await rag.retrieve('query', {
228
- topK: 5,
229
- strategy: RetrievalStrategy.SIMILARITY,
336
+ // ChromaDB (prototyping)
337
+ import { ChromaVectorStore } from '@hazeljs/rag';
338
+ const vectorStore = new ChromaVectorStore(embeddings, {
339
+ url: process.env.CHROMA_URL || 'http://localhost:8000',
340
+ collectionName: 'my-collection',
230
341
  });
231
342
  ```
232
343
 
233
- ### MMR (Maximal Marginal Relevance)
344
+ ### Vector store comparison
234
345
 
235
- Balances relevance and diversity to avoid redundant results:
346
+ | | Memory | Pinecone | Qdrant | Weaviate | ChromaDB |
347
+ |---|:---:|:---:|:---:|:---:|:---:|
348
+ | Setup | None | API Key | Docker | Docker | Docker |
349
+ | Persistence | ❌ | βœ… | βœ… | βœ… | βœ… |
350
+ | Best for | Dev/Test | Production | High-perf | GraphQL | Prototyping |
351
+ | Cost | Free | Paid | OSS | OSS | OSS |
352
+
353
+ ---
354
+
355
+ ## Embedding Providers
236
356
 
237
357
  ```typescript
238
- const results = await rag.retrieve('query', {
239
- topK: 5,
240
- strategy: RetrievalStrategy.MMR,
358
+ import { OpenAIEmbeddings, CohereEmbeddings } from '@hazeljs/rag';
359
+
360
+ // OpenAI
361
+ const openaiEmbed = new OpenAIEmbeddings({
362
+ apiKey: process.env.OPENAI_API_KEY,
363
+ model: 'text-embedding-3-small', // 1536 dims
364
+ // model: 'text-embedding-3-large', // 3072 dims, highest quality
365
+ });
366
+
367
+ // Cohere (multilingual)
368
+ const cohereEmbed = new CohereEmbeddings({
369
+ apiKey: process.env.COHERE_API_KEY,
370
+ model: 'embed-multilingual-v3.0',
241
371
  });
242
372
  ```
243
373
 
244
- ### Hybrid Search
374
+ ---
245
375
 
246
- Combines keyword and semantic search:
376
+ ## Retrieval Strategies
247
377
 
248
378
  ```typescript
249
- const results = await rag.retrieve('query', {
250
- topK: 5,
251
- strategy: RetrievalStrategy.HYBRID,
379
+ import { HybridSearchRetrieval, MultiQueryRetrieval } from '@hazeljs/rag';
380
+
381
+ // Hybrid β€” vector + BM25 keyword fusion
382
+ const hybrid = new HybridSearchRetrieval(vectorStore, {
383
+ vectorWeight: 0.7,
384
+ keywordWeight: 0.3,
385
+ topK: 10,
252
386
  });
387
+ const results = await hybrid.search('machine learning algorithms', { topK: 5 });
388
+
389
+ // Multi-query β€” LLM generates N query variations, deduplicates results
390
+ const multiQuery = new MultiQueryRetrieval(vectorStore, {
391
+ llmApiKey: process.env.OPENAI_API_KEY,
392
+ numQueries: 3,
393
+ topK: 10,
394
+ });
395
+ const results2 = await multiQuery.search('How do I deploy my app?', { topK: 5 });
253
396
  ```
254
397
 
255
- ## Metadata Filtering
398
+ ---
399
+
400
+ ## Text Splitting
256
401
 
257
402
  ```typescript
258
- await rag.addDocuments([
259
- {
260
- content: 'Document 1',
261
- metadata: { category: 'tech', year: 2024 },
262
- },
263
- {
264
- content: 'Document 2',
265
- metadata: { category: 'science', year: 2023 },
266
- },
267
- ]);
403
+ import { RecursiveTextSplitter } from '@hazeljs/rag';
268
404
 
269
- // Filter by metadata
270
- const results = await rag.query('query', {
271
- filter: { category: 'tech', year: 2024 },
405
+ const splitter = new RecursiveTextSplitter({
406
+ chunkSize: 1000, // target chars per chunk
407
+ chunkOverlap: 200, // overlap for context continuity
408
+ separators: ['\n\n', '\n', '. ', ' '],
272
409
  });
410
+
411
+ const chunks = splitter.split(longDocument);
273
412
  ```
274
413
 
275
- ## Advanced Usage
414
+ ---
276
415
 
277
- ### Custom Document Loaders
416
+ ## Memory System
278
417
 
279
418
  ```typescript
280
- import { DocumentLoader, Document } from '@hazeljs/rag';
281
-
282
- class PDFLoader implements DocumentLoader {
283
- constructor(private filePath: string) {}
419
+ import {
420
+ RAGPipelineWithMemory,
421
+ MemoryManager,
422
+ HybridMemory,
423
+ BufferMemory,
424
+ VectorMemory,
425
+ } from '@hazeljs/rag';
284
426
 
285
- async load(): Promise<Document[]> {
286
- // Load and parse PDF
287
- const text = await this.parsePDF(this.filePath);
288
- return [{ content: text, metadata: { source: this.filePath } }];
289
- }
427
+ const buffer = new BufferMemory({ maxSize: 20 });
428
+ const vectorMemory = new VectorMemory(vectorStore, embeddings);
429
+ const memory = new MemoryManager(new HybridMemory(buffer, vectorMemory));
290
430
 
291
- private async parsePDF(path: string): Promise<string> {
292
- // PDF parsing logic
293
- return '';
294
- }
295
- }
431
+ const rag = new RAGPipelineWithMemory(config, memory, llmFunction);
296
432
 
297
- const loader = new PDFLoader('./document.pdf');
298
- const documents = await loader.load();
299
- await rag.addDocuments(documents);
433
+ const response = await rag.queryWithMemory(
434
+ 'What did we discuss about deployment?',
435
+ 'session-123',
436
+ 'user-456',
437
+ );
438
+ console.log(response.answer);
439
+ console.log(response.memories);
300
440
  ```
301
441
 
302
- ### Batch Operations
442
+ ---
303
443
 
304
- ```typescript
305
- // Add multiple documents efficiently
306
- const ids = await rag.addDocuments(documents);
444
+ ## API Reference
307
445
 
308
- // Delete multiple documents
309
- await rag.deleteDocuments(ids);
446
+ ### `GraphRAGPipeline`
310
447
 
311
- // Clear all documents
312
- await rag.clear();
448
+ ```typescript
449
+ class GraphRAGPipeline {
450
+ constructor(config: GraphRAGConfig);
451
+ build(docs: Document[]): Promise<GraphBuildStats>;
452
+ addDocuments(docs: Document[]): Promise<GraphBuildStats>;
453
+ search(query: string, options?: GraphSearchOptions): Promise<GraphSearchResult>;
454
+ getGraph(): KnowledgeGraph;
455
+ getStats(): GraphStats;
456
+ clear(): void;
457
+ }
313
458
  ```
314
459
 
315
- ## API Reference
316
-
317
- ### RAGPipeline
460
+ ### `RAGPipeline`
318
461
 
319
462
  ```typescript
320
463
  class RAGPipeline {
@@ -322,58 +465,40 @@ class RAGPipeline {
322
465
  initialize(): Promise<void>;
323
466
  addDocuments(documents: Document[]): Promise<string[]>;
324
467
  query(query: string, options?: RAGQueryOptions): Promise<RAGResponse>;
325
- retrieve(query: string, options?: QueryOptions, strategy?: RetrievalStrategy): Promise<SearchResult[]>;
468
+ search(query: string, options?: QueryOptions): Promise<SearchResult[]>;
326
469
  deleteDocuments(ids: string[]): Promise<void>;
327
470
  clear(): Promise<void>;
328
471
  }
329
472
  ```
330
473
 
331
- ### Types
474
+ ### `Document`
332
475
 
333
476
  ```typescript
334
477
  interface Document {
335
478
  id?: string;
336
479
  content: string;
337
- metadata?: Record<string, any>;
480
+ metadata?: Record<string, unknown>;
338
481
  embedding?: number[];
339
482
  }
340
-
341
- interface SearchResult {
342
- id: string;
343
- content: string;
344
- metadata?: Record<string, any>;
345
- score: number;
346
- embedding?: number[];
347
- }
348
-
349
- interface RAGResponse {
350
- answer: string;
351
- sources: SearchResult[];
352
- context: string;
353
- }
354
483
  ```
355
484
 
356
- ## Use Cases
485
+ ---
357
486
 
358
- - πŸ“– **Documentation Search** - Semantic search across documentation
359
- - πŸ’¬ **Chatbots** - Context-aware conversational AI
360
- - πŸ” **Knowledge Base** - Internal knowledge management
361
- - πŸ“ **Content Recommendations** - Similar content discovery
362
- - πŸŽ“ **Educational Tools** - Q&A systems with source citations
363
- - 🏒 **Enterprise Search** - Semantic search across company data
487
+ ## Use Cases
364
488
 
365
- ## Performance Tips
489
+ - πŸ“– **Documentation Q&A** β€” Index all your docs and answer developer questions
490
+ - πŸ•ΈοΈ **Codebase Understanding** β€” GraphRAG over a repo to explain architecture and dependencies
491
+ - πŸ’¬ **Context-Aware Chatbots** β€” RAG + memory for multi-turn conversations
492
+ - πŸ” **Enterprise Knowledge Base** β€” Combine web, GitHub, PDFs, and internal wikis
493
+ - πŸŽ“ **Research Assistants** β€” Multi-document reasoning with knowledge graph traversal
494
+ - πŸ“ **Content Intelligence** β€” Semantic search + relationship mapping across articles
366
495
 
367
- 1. **Batch Operations** - Add documents in batches for better performance
368
- 2. **Chunk Size** - Balance between context and precision (500-1500 tokens)
369
- 3. **Overlap** - Use 10-20% overlap for better context continuity
370
- 4. **Caching** - Cache embeddings for frequently accessed documents
371
- 5. **Filtering** - Use metadata filters to reduce search space
496
+ ---
372
497
 
373
498
  ## License
374
499
 
375
- MIT
500
+ Apache 2.0
376
501
 
377
502
  ## Contributing
378
503
 
379
- Contributions are welcome! Please see [CONTRIBUTING.md](../../CONTRIBUTING.md) for details.
504
+ Contributions are welcome! See [CONTRIBUTING.md](../../CONTRIBUTING.md) for details.