@elsium-ai/rag 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -323,6 +323,41 @@ console.log(doc.metadata.rowCount) // 2
|
|
|
323
323
|
console.log(doc.metadata.columns) // ["name", "age", "bio"]
|
|
324
324
|
```
|
|
325
325
|
|
|
326
|
+
### `pdfLoader`
|
|
327
|
+
|
|
328
|
+
Creates a loader for PDF documents. Extracts text content from PDF files with configurable page limits and page break markers.
|
|
329
|
+
|
|
330
|
+
Requires `pdf-parse` as an optional peer dependency:
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
npm install pdf-parse
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
```typescript
|
|
337
|
+
function pdfLoader(options?: {
|
|
338
|
+
maxPages?: number
|
|
339
|
+
pageBreakMarker?: string
|
|
340
|
+
}): DocumentLoader
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
| Parameter | Type | Default | Description |
|
|
344
|
+
|---|---|---|---|
|
|
345
|
+
| `options.maxPages` | `number` | `undefined` | Maximum number of pages to extract. When omitted, all pages are loaded. |
|
|
346
|
+
| `options.pageBreakMarker` | `string` | `'\n\n---\n\n'` | String inserted between pages in the extracted text. |
|
|
347
|
+
|
|
348
|
+
**Returns:** A `DocumentLoader` that produces documents with `type: 'pdf'`.
|
|
349
|
+
|
|
350
|
+
```typescript
|
|
351
|
+
import { pdfLoader } from '@elsium-ai/rag'
|
|
352
|
+
import { readFileSync } from 'node:fs'
|
|
353
|
+
|
|
354
|
+
const loader = pdfLoader({ maxPages: 50 })
|
|
355
|
+
const content = readFileSync('report.pdf', 'base64')
|
|
356
|
+
const doc = loader.load('report.pdf', content)
|
|
357
|
+
|
|
358
|
+
console.log(doc.metadata.type) // "pdf"
|
|
359
|
+
```
|
|
360
|
+
|
|
326
361
|
### `getLoader`
|
|
327
362
|
|
|
328
363
|
Factory function that returns a `DocumentLoader` for the given `LoaderType`.
|
|
@@ -818,7 +853,7 @@ const results = bm25.search('machine learning', 5)
|
|
|
818
853
|
|
|
819
854
|
### `createHybridSearch`
|
|
820
855
|
|
|
821
|
-
Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results.
|
|
856
|
+
Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results. Use hybrid search when you need both exact keyword matching (BM25) and semantic similarity (vector) to produce higher-quality retrieval than either approach alone.
|
|
822
857
|
|
|
823
858
|
```typescript
|
|
824
859
|
function createHybridSearch(
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@elsium-ai/rag",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "RAG pipeline, document processing, embeddings, and vector stores for ElsiumAI",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Eric Utrera <ebutrera9103@gmail.com>",
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dev": "bun --watch src/index.ts"
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
|
-
"@elsium-ai/core": "^0.
|
|
29
|
+
"@elsium-ai/core": "^0.9.1"
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
32
|
"typescript": "^5.7.0"
|