@elsium-ai/rag 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +36 -1
  2. package/package.json +2 -2
package/README.md CHANGED
@@ -323,6 +323,41 @@ console.log(doc.metadata.rowCount) // 2
323
323
  console.log(doc.metadata.columns) // ["name", "age", "bio"]
324
324
  ```
325
325
 
326
+ ### `pdfLoader`
327
+
328
+ Creates a loader for PDF documents. Extracts text content from PDF files with configurable page limits and page break markers.
329
+
330
+ Requires `pdf-parse` as an optional peer dependency:
331
+
332
+ ```bash
333
+ npm install pdf-parse
334
+ ```
335
+
336
+ ```typescript
337
+ function pdfLoader(options?: {
338
+ maxPages?: number
339
+ pageBreakMarker?: string
340
+ }): DocumentLoader
341
+ ```
342
+
343
+ | Parameter | Type | Default | Description |
344
+ |---|---|---|---|
345
+ | `options.maxPages` | `number` | `undefined` | Maximum number of pages to extract. When omitted, all pages are loaded. |
346
+ | `options.pageBreakMarker` | `string` | `'\n\n---\n\n'` | String inserted between pages in the extracted text. |
347
+
348
+ **Returns:** A `DocumentLoader` that produces documents with `type: 'pdf'`.
349
+
350
+ ```typescript
351
+ import { pdfLoader } from '@elsium-ai/rag'
352
+ import { readFileSync } from 'node:fs'
353
+
354
+ const loader = pdfLoader({ maxPages: 50 })
355
+ const content = readFileSync('report.pdf', 'base64')
356
+ const doc = loader.load('report.pdf', content)
357
+
358
+ console.log(doc.metadata.type) // "pdf"
359
+ ```
360
+
326
361
  ### `getLoader`
327
362
 
328
363
  Factory function that returns a `DocumentLoader` for the given `LoaderType`.
@@ -818,7 +853,7 @@ const results = bm25.search('machine learning', 5)
818
853
 
819
854
  ### `createHybridSearch`
820
855
 
821
- Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results.
856
+ Combines a vector store with a BM25 index using Reciprocal Rank Fusion (RRF) to blend semantic and keyword search results. Use hybrid search when you need both exact keyword matching (BM25) and semantic similarity (vector) to produce higher-quality retrieval than either approach alone.
822
857
 
823
858
  ```typescript
824
859
  function createHybridSearch(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elsium-ai/rag",
3
- "version": "0.8.0",
3
+ "version": "0.9.1",
4
4
  "description": "RAG pipeline, document processing, embeddings, and vector stores for ElsiumAI",
5
5
  "license": "MIT",
6
6
  "author": "Eric Utrera <ebutrera9103@gmail.com>",
@@ -26,7 +26,7 @@
26
26
  "dev": "bun --watch src/index.ts"
27
27
  },
28
28
  "dependencies": {
29
- "@elsium-ai/core": "^0.8.0"
29
+ "@elsium-ai/core": "^0.9.1"
30
30
  },
31
31
  "devDependencies": {
32
32
  "typescript": "^5.7.0"