@hazeljs/rag 0.7.9 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +49 -50
  2. package/package.json +7 -49
package/README.md CHANGED
@@ -15,6 +15,7 @@ Part of the HazelJS AI-Native Backend Framework. Load documents from any source,
15
15
  Built for **AI-native applications** - not just another RAG library. When you combine @hazeljs/rag with @hazeljs/core, @hazeljs/ai, and @hazeljs/agent, you get a complete stack for intelligent backends.
16
16
 
17
17
  **Perfect for:**
18
+
18
19
  - AI startups building knowledge-based applications
19
20
  - Teams implementing document Q&A systems
20
21
  - Developers who want semantic search without complexity
@@ -110,19 +111,19 @@ Every loader extends `BaseDocumentLoader` and returns `Document[]` ready for chu
110
111
 
111
112
  ### Built-in loaders
112
113
 
113
- | Loader | Source | Extra install |
114
- |--------|--------|:---:|
115
- | `TextFileLoader` | `.txt` files | |
116
- | `MarkdownFileLoader` | `.md` / `.mdx` with heading splits and YAML front-matter | |
117
- | `JSONFileLoader` | `.json` with `textKey` / JSON Pointer extraction | |
118
- | `CSVFileLoader` | `.csv` rows mapped to documents | |
119
- | `HtmlFileLoader` | `.html` tag stripping; optional CSS selector via cheerio | opt. |
120
- | `DirectoryLoader` | Recursive walk; auto-detects loader by extension | |
121
- | `PdfLoader` | PDFs; split by page or full document | `pdf-parse` |
122
- | `DocxLoader` | Word documents; plain text or HTML output | `mammoth` |
123
- | `WebLoader` | HTTP scraping with retry/timeout; optional CSS selector | opt. |
124
- | `YouTubeTranscriptLoader` | YouTube transcripts; no API key; segment by duration | |
125
- | `GitHubLoader` | GitHub REST API; filter by path, extension, `maxFiles` | |
114
+ | Loader | Source | Extra install |
115
+ | ------------------------- | -------------------------------------------------------- | :-----------: |
116
+ | `TextFileLoader` | `.txt` files | |
117
+ | `MarkdownFileLoader` | `.md` / `.mdx` with heading splits and YAML front-matter | |
118
+ | `JSONFileLoader` | `.json` with `textKey` / JSON Pointer extraction | |
119
+ | `CSVFileLoader` | `.csv` rows mapped to documents | |
120
+ | `HtmlFileLoader` | `.html` tag stripping; optional CSS selector via cheerio | opt. |
121
+ | `DirectoryLoader` | Recursive walk; auto-detects loader by extension | |
122
+ | `PdfLoader` | PDFs; split by page or full document | `pdf-parse` |
123
+ | `DocxLoader` | Word documents; plain text or HTML output | `mammoth` |
124
+ | `WebLoader` | HTTP scraping with retry/timeout; optional CSS selector | opt. |
125
+ | `YouTubeTranscriptLoader` | YouTube transcripts; no API key; segment by duration | |
126
+ | `GitHubLoader` | GitHub REST API; filter by path, extension, `maxFiles` | |
126
127
 
127
128
  ### Examples
128
129
 
@@ -176,7 +177,7 @@ const webDocs = await new WebLoader({
176
177
  // YouTube transcript (no API key needed)
177
178
  const ytDocs = await new YouTubeTranscriptLoader({
178
179
  videoUrl: 'https://www.youtube.com/watch?v=VIDEO_ID',
179
- segmentDuration: 60, // group into 60-second chunks
180
+ segmentDuration: 60, // group into 60-second chunks
180
181
  }).load();
181
182
 
182
183
  // GitHub repository
@@ -203,12 +204,14 @@ import { BaseDocumentLoader, Loader, DocumentLoaderRegistry } from '@hazeljs/rag
203
204
 
204
205
  @Loader({ name: 'NotionLoader', extensions: [] })
205
206
  export class NotionLoader extends BaseDocumentLoader {
206
- constructor(private readonly databaseId: string) { super(); }
207
+ constructor(private readonly databaseId: string) {
208
+ super();
209
+ }
207
210
 
208
211
  async load() {
209
212
  const pages = await fetchNotionPages(this.databaseId);
210
- return pages.map(p =>
211
- this.createDocument(p.content, { source: `notion:${p.id}`, title: p.title }),
213
+ return pages.map((p) =>
214
+ this.createDocument(p.content, { source: `notion:${p.id}`, title: p.title })
212
215
  );
213
216
  }
214
217
  }
@@ -225,12 +228,12 @@ GraphRAG builds a **knowledge graph** from your documents — entities, relation
225
228
 
226
229
  ### Why GraphRAG?
227
230
 
228
- | Question type | Traditional RAG | GraphRAG |
229
- |---|---|---|
230
- | "What does X do?" | ✅ Good | ✅ Excellent (entity traversal) |
231
- | "How do X and Y relate?" | ❌ Poor | ✅ Excellent (relationships) |
232
- | "What are the main architectural layers?" | ❌ Poor | ✅ Excellent (community reports) |
233
- | Multi-document cross-referencing | ❌ Fragmented | ✅ Native |
231
+ | Question type | Traditional RAG | GraphRAG |
232
+ | ----------------------------------------- | --------------- | -------------------------------- |
233
+ | "What does X do?" | ✅ Good | ✅ Excellent (entity traversal) |
234
+ | "How do X and Y relate?" | ❌ Poor | ✅ Excellent (relationships) |
235
+ | "What are the main architectural layers?" | ❌ Poor | ✅ Excellent (community reports) |
236
+ | Multi-document cross-referencing | ❌ Fragmented | ✅ Native |
234
237
 
235
238
  ### Build the graph
236
239
 
@@ -250,12 +253,12 @@ const graphRag = new GraphRAGPipeline({
250
253
  });
251
254
  return res.choices[0].message.content ?? '';
252
255
  },
253
- extractionChunkSize: 2000, // chars per LLM extraction call
256
+ extractionChunkSize: 2000, // chars per LLM extraction call
254
257
  generateCommunityReports: true, // LLM summaries per community cluster
255
- maxCommunitySize: 15, // split clusters larger than this
256
- localSearchDepth: 2, // BFS hops for local search
257
- localSearchTopK: 5, // seed entities per query
258
- globalSearchTopK: 5, // community reports for global search
258
+ maxCommunitySize: 15, // split clusters larger than this
259
+ localSearchDepth: 2, // BFS hops for local search
260
+ localSearchTopK: 5, // seed entities per query
261
+ globalSearchTopK: 5, // community reports for global search
259
262
  });
260
263
 
261
264
  const docs = await new DirectoryLoader({ dirPath: './knowledge-base', recursive: true }).load();
@@ -269,21 +272,17 @@ const stats = await graphRag.build(docs);
269
272
  ```typescript
270
273
  // LOCAL — entity-centric, BFS graph traversal
271
274
  // Best for: specific questions about named concepts, classes, or technologies
272
- const local = await graphRag.search(
273
- 'How does dependency injection work?',
274
- { mode: 'local' },
275
- );
275
+ const local = await graphRag.search('How does dependency injection work?', { mode: 'local' });
276
276
  console.log(local.answer);
277
- console.log(local.entities); // entities found and traversed
277
+ console.log(local.entities); // entities found and traversed
278
278
  console.log(local.relationships); // evidence relationships
279
279
 
280
280
  // GLOBAL — community report ranking
281
281
  // Best for: broad thematic questions, architecture overviews
282
- const global = await graphRag.search(
283
- 'What are the main architectural layers of this system?',
284
- { mode: 'global' },
285
- );
286
- console.log(global.communities); // ranked community reports used
282
+ const global = await graphRag.search('What are the main architectural layers of this system?', {
283
+ mode: 'global',
284
+ });
285
+ console.log(global.communities); // ranked community reports used
287
286
 
288
287
  // HYBRID — runs both in parallel, single synthesis call (recommended default)
289
288
  const result = await graphRag.search('What vector stores does @hazeljs/rag support?');
@@ -307,11 +306,11 @@ const graph = graphRag.getGraph();
307
306
  // Entities, relationships, community reports
308
307
  console.log([...graph.entities.values()].slice(0, 5));
309
308
  console.log([...graph.relationships.values()].slice(0, 5));
310
- console.log([...graph.communityReports.values()].map(r => r.title));
309
+ console.log([...graph.communityReports.values()].map((r) => r.title));
311
310
 
312
311
  // Statistics
313
312
  const stats = graphRag.getStats();
314
- console.log(stats.entityTypeBreakdown); // { TECHNOLOGY: 14, CONCEPT: 12, ... }
313
+ console.log(stats.entityTypeBreakdown); // { TECHNOLOGY: 14, CONCEPT: 12, ... }
315
314
  console.log(stats.topEntities.slice(0, 5)); // most-connected entities
316
315
  ```
317
316
 
@@ -358,12 +357,12 @@ const vectorStore = new ChromaVectorStore(embeddings, {
358
357
 
359
358
  ### Vector store comparison
360
359
 
361
- | | Memory | Pinecone | Qdrant | Weaviate | ChromaDB |
362
- |---|:---:|:---:|:---:|:---:|:---:|
363
- | Setup | None | API Key | Docker | Docker | Docker |
364
- | Persistence | | | | | |
365
- | Best for | Dev/Test | Production | High-perf | GraphQL | Prototyping |
366
- | Cost | Free | Paid | OSS | OSS | OSS |
360
+ | | Memory | Pinecone | Qdrant | Weaviate | ChromaDB |
361
+ | ----------- | :------: | :--------: | :-------: | :------: | :---------: |
362
+ | Setup | None | API Key | Docker | Docker | Docker |
363
+ | Persistence | | | | | |
364
+ | Best for | Dev/Test | Production | High-perf | GraphQL | Prototyping |
365
+ | Cost | Free | Paid | OSS | OSS | OSS |
367
366
 
368
367
  ---
369
368
 
@@ -375,7 +374,7 @@ import { OpenAIEmbeddings, CohereEmbeddings } from '@hazeljs/rag';
375
374
  // OpenAI
376
375
  const openaiEmbed = new OpenAIEmbeddings({
377
376
  apiKey: process.env.OPENAI_API_KEY,
378
- model: 'text-embedding-3-small', // 1536 dims
377
+ model: 'text-embedding-3-small', // 1536 dims
379
378
  // model: 'text-embedding-3-large', // 3072 dims, highest quality
380
379
  });
381
380
 
@@ -418,8 +417,8 @@ const results2 = await multiQuery.search('How do I deploy my app?', { topK: 5 })
418
417
  import { RecursiveTextSplitter } from '@hazeljs/rag';
419
418
 
420
419
  const splitter = new RecursiveTextSplitter({
421
- chunkSize: 1000, // target chars per chunk
422
- chunkOverlap: 200, // overlap for context continuity
420
+ chunkSize: 1000, // target chars per chunk
421
+ chunkOverlap: 200, // overlap for context continuity
423
422
  separators: ['\n\n', '\n', '. ', ' '],
424
423
  });
425
424
 
@@ -448,7 +447,7 @@ const rag = new RAGPipelineWithMemory(config, memory, llmFunction);
448
447
  const response = await rag.queryWithMemory(
449
448
  'What did we discuss about deployment?',
450
449
  'session-123',
451
- 'user-456',
450
+ 'user-456'
452
451
  );
453
452
  console.log(response.answer);
454
453
  console.log(response.memories);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hazeljs/rag",
3
- "version": "0.7.9",
3
+ "version": "0.8.0",
4
4
  "description": "Retrieval-Augmented Generation (RAG) and vector search for HazelJS framework",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -19,13 +19,13 @@
19
19
  ],
20
20
  "scripts": {
21
21
  "build": "rm -rf dist tsconfig.tsbuildinfo && tsc --skipLibCheck",
22
- "test": "jest --coverage --passWithNoTests",
22
+ "test": "jest --config jest.config.js --coverage --passWithNoTests",
23
23
  "lint": "eslint \"src/**/*.ts\"",
24
24
  "lint:fix": "eslint \"src/**/*.ts\" --fix",
25
25
  "clean": "rm -rf dist"
26
26
  },
27
27
  "dependencies": {
28
- "@hazeljs/prompts": "^0.7.9",
28
+ "@hazeljs/prompts": "^0.8.0",
29
29
  "reflect-metadata": "^0.2.2"
30
30
  },
31
31
  "peerDependencies": {
@@ -39,7 +39,7 @@
39
39
  "chromadb": "^1.8.0",
40
40
  "cohere-ai": "^7.0.0",
41
41
  "mammoth": "^1.11.0",
42
- "openai": "^4.0.0",
42
+ "openai": "^6.0.0",
43
43
  "pdf-parse": "^1.1.1",
44
44
  "weaviate-ts-client": "^2.0.0"
45
45
  },
@@ -78,56 +78,14 @@
78
78
  "optional": true
79
79
  }
80
80
  },
81
- "jest": {
82
- "preset": "ts-jest",
83
- "testEnvironment": "node",
84
- "testMatch": [
85
- "**/src/__tests__/**/*.test.ts"
86
- ],
87
- "moduleNameMapper": {
88
- "^@hazeljs/prompts$": "<rootDir>/../prompts/src/index.ts",
89
- "^@hazeljs/memory$": "<rootDir>/../memory/src/index.ts",
90
- "^@hazeljs/memory/prisma$": "<rootDir>/../memory/src/prisma.ts"
91
- },
92
- "collectCoverageFrom": [
93
- "src/**/*.ts",
94
- "!src/**/*.d.ts",
95
- "!src/index.ts",
96
- "!src/rag.module.ts",
97
- "!src/rag.service.ts",
98
- "!src/rag-pipeline-with-memory.ts",
99
- "!src/vector-stores/**",
100
- "!src/embeddings/**",
101
- "!src/agentic/**",
102
- "!src/decorators/**",
103
- "!src/memory/**",
104
- "!src/loaders/pdf.loader.ts",
105
- "!src/loaders/docx.loader.ts",
106
- "!src/loaders/web.loader.ts",
107
- "!src/loaders/github.loader.ts",
108
- "!src/loaders/youtube-transcript.loader.ts",
109
- "!src/loaders/index.ts",
110
- "!src/graph/index.ts",
111
- "!src/prompts/**",
112
- "!src/types/**"
113
- ],
114
- "coverageThreshold": {
115
- "global": {
116
- "statements": 85,
117
- "branches": 75,
118
- "functions": 85,
119
- "lines": 85
120
- }
121
- }
122
- },
123
81
  "devDependencies": {
124
- "@hazeljs/memory": "^0.7.9",
82
+ "@hazeljs/memory": "^0.8.0",
125
83
  "@types/node": "^20.19.39",
126
84
  "@typescript-eslint/eslint-plugin": "^8.58.0",
127
85
  "@typescript-eslint/parser": "^8.58.0",
128
86
  "eslint": "^8.57.1",
129
87
  "jest": "^29.7.0",
130
- "openai": "^4.104.0",
88
+ "openai": "^6.33.0",
131
89
  "ts-jest": "^29.4.9",
132
90
  "typescript": "^5.9.3"
133
91
  },
@@ -159,5 +117,5 @@
159
117
  "url": "https://github.com/hazeljs/hazel-js/issues"
160
118
  },
161
119
  "homepage": "https://hazeljs.ai",
162
- "gitHead": "28c21c509aeca3bf2d0878fbee737d906b654c67"
120
+ "gitHead": "e0ed98ca074dd4f7472816d3c32ef576900dcca6"
163
121
  }