@robthepcguy/rag-vault 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +76 -43
  2. package/dist/hyde/index.d.ts +47 -0
  3. package/dist/hyde/index.d.ts.map +1 -0
  4. package/dist/hyde/index.js +203 -0
  5. package/dist/hyde/index.js.map +1 -0
  6. package/dist/reranker/index.d.ts +76 -0
  7. package/dist/reranker/index.d.ts.map +1 -0
  8. package/dist/reranker/index.js +199 -0
  9. package/dist/reranker/index.js.map +1 -0
  10. package/dist/server/index.d.ts +25 -0
  11. package/dist/server/index.d.ts.map +1 -1
  12. package/dist/server/index.js +103 -14
  13. package/dist/server/index.js.map +1 -1
  14. package/dist/server/schemas.d.ts +21 -100
  15. package/dist/server/schemas.d.ts.map +1 -1
  16. package/dist/server/schemas.js +3 -3
  17. package/dist/server/schemas.js.map +1 -1
  18. package/dist/utils/config-parsers.d.ts +14 -0
  19. package/dist/utils/config-parsers.d.ts.map +1 -1
  20. package/dist/utils/config-parsers.js +26 -0
  21. package/dist/utils/config-parsers.js.map +1 -1
  22. package/dist/utils/config.d.ts +23 -0
  23. package/dist/utils/config.d.ts.map +1 -1
  24. package/dist/utils/config.js +39 -1
  25. package/dist/utils/config.js.map +1 -1
  26. package/dist/utils/file-utils.d.ts.map +1 -1
  27. package/dist/utils/file-utils.js +17 -1
  28. package/dist/utils/file-utils.js.map +1 -1
  29. package/dist/vectordb/index.d.ts +33 -7
  30. package/dist/vectordb/index.d.ts.map +1 -1
  31. package/dist/vectordb/index.js +161 -47
  32. package/dist/vectordb/index.js.map +1 -1
  33. package/package.json +8 -7
  34. package/skills/rag-vault/SKILL.md +3 -3
  35. package/skills/rag-vault/references/html-ingestion.md +1 -1
  36. package/web-ui/dist/assets/{CollectionsPage-CjLs8_5j.js → CollectionsPage-wbfgYFTw.js} +1 -1
  37. package/web-ui/dist/assets/{FilesPage-Bw9x9aMr.js → FilesPage-D6TlldaR.js} +1 -1
  38. package/web-ui/dist/assets/{ReaderPage-JPNiOF-x.js → ReaderPage-Sgy0vMZ6.js} +1 -1
  39. package/web-ui/dist/assets/{ReaderSettingsContext-BLFJnEne.js → ReaderSettingsContext-DsvLXuaf.js} +1 -1
  40. package/web-ui/dist/assets/{SearchPage-D3_Vtbdw.js → SearchPage-mPKXZEyq.js} +1 -1
  41. package/web-ui/dist/assets/{SettingsPage-BAxB2264.js → SettingsPage-DXeWwfvd.js} +1 -1
  42. package/web-ui/dist/assets/{StatusPage-CzJZW8Gs.js → StatusPage-AirpfsGF.js} +1 -1
  43. package/web-ui/dist/assets/{UploadPage-DW8OujeJ.js → UploadPage-Cob25kDa.js} +1 -1
  44. package/web-ui/dist/assets/index-BZMzEssr.js +6 -0
  45. package/web-ui/dist/assets/motion-DdHBXDWx.js +9 -0
  46. package/web-ui/dist/assets/query-DbAD_nLW.js +1 -0
  47. package/web-ui/dist/assets/{vendor-DSXQOR6A.js → vendor-DNJ-hWNb.js} +1 -1
  48. package/web-ui/dist/index.html +3 -3
  49. package/web-ui/dist/assets/index-ANt8Xo4z.js +0 -6
  50. package/web-ui/dist/assets/motion-Brxs0UET.js +0 -9
  51. package/web-ui/dist/assets/query-DPt-uCb6.js +0 -1
package/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  **Your documents. Your machine. Your control.**
8
8
 
9
- RAG Vault gives AI coding assistants fast access to your private documents such as API specs, research papers, and internal docs. Indexing and search run locally, and your data stays on your machine unless you explicitly ingest content from a remote URL.
9
+ RAG Vault lets your AI coding assistant search your private documents, things like API specs, research papers, and internal docs. Everything runs locally and your data stays on your machine unless you choose to pull in content from a remote URL.
10
10
 
11
11
  One command to run, minimal setup, privacy by default.
12
12
 
@@ -15,15 +15,15 @@ One command to run, minimal setup, privacy by default.
15
15
  | Pain Point | RAG Vault Solution |
16
16
  |------------|-------------------|
17
17
  | "I don't want my docs on someone else's server" | Everything stays local by default. No background cloud calls for indexing or search. |
18
- | "Semantic search misses exact code terms" | Hybrid search: meaning + exact matches like `useEffect` |
18
+ | "Semantic search misses exact code terms" | Hybrid search with RRF fusion, optional cross-encoder reranking |
19
19
  | "Setup requires Docker, Python, databases..." | One `npx` command plus a small MCP config block. |
20
20
  | "Cloud APIs charge per query" | Free forever. No subscriptions. |
21
21
 
22
22
  ## Security
23
23
 
24
- RAG Vault includes security features for production deployment:
24
+ RAG Vault comes with security built in:
25
25
  - **API Authentication**: Optional API key via `RAG_API_KEY`
26
- - **Rate Limiting**: Configurable request throttling
26
+ - **Rate Limiting**: You can throttle requests
27
27
  - **CORS Control**: Restrict allowed origins
28
28
  - **Security Headers**: Helmet.js protection
29
29
 
@@ -106,7 +106,7 @@ BASE_DIR = "/path/to/your/documents"
106
106
 
107
107
  ### Install Skills (Optional)
108
108
 
109
- For enhanced AI guidance on query formulation and result interpretation, install the RAG Vault skills:
109
+ If you want your AI to write better queries and make more sense of results, install the RAG Vault skills:
110
110
 
111
111
  ```bash
112
112
  # Claude Code (project-level - recommended for team projects)
@@ -124,7 +124,7 @@ npx github:RobThePCGuy/rag-vault skills install --path /your/custom/path
124
124
 
125
125
  Skills teach Claude best practices for:
126
126
  - Query formulation and expansion strategies
127
- - Score interpretation (< 0.3 = good match, > 0.5 = skip)
127
+ - Score interpretation. In boost mode, under 0.3 is a good match and over 0.5 is worth skipping. RRF mode scores by rank instead.
128
128
  - When to use `ingest_file` vs `ingest_data`
129
129
  - HTML ingestion and URL handling
130
130
 
@@ -142,7 +142,7 @@ That's it. No Docker. No Python. No server infrastructure to manage.
142
142
 
143
143
  ## Web Interface
144
144
 
145
- RAG Vault includes a full-featured web UI for managing your documents without the command line.
145
+ RAG Vault has a web UI so you can manage your documents without touching the command line.
146
146
 
147
147
  ### Launch the Web UI
148
148
 
@@ -157,7 +157,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
157
157
  - **Upload documents**: Drag and drop PDF, DOCX, Markdown, TXT, JSON, JSONL, and NDJSON files
158
158
  - **Search instantly**: Type queries and see results with relevance scores
159
159
  - **Preview content**: Click any result to see the full chunk in context
160
- - **Manage files**: View all indexed documents and delete what you do not need
160
+ - **Manage files**: View all indexed documents and delete what you don't need
161
161
  - **Switch databases**: Create and switch between multiple knowledge bases
162
162
  - **Monitor status**: See document counts, memory usage, and search mode
163
163
  - **Export/Import settings**: Back up and restore your vault configuration
@@ -166,7 +166,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
166
166
 
167
167
  ### REST API
168
168
 
169
- The web server exposes a REST API for programmatic access. Set `RAG_API_KEY` to require authentication:
169
+ The web server has a REST API you can hit directly. Set `RAG_API_KEY` to require authentication:
170
170
 
171
171
  ```bash
172
172
  # With authentication (when RAG_API_KEY is set)
@@ -175,7 +175,7 @@ curl -X POST "http://localhost:3000/api/v1/search" \
175
175
  -H "Content-Type: application/json" \
176
176
  -d '{"query": "authentication", "limit": 5}'
177
177
 
178
- # Search documents (no auth required if RAG_API_KEY is not set)
178
+ # Search documents (no auth needed if RAG_API_KEY isn't set)
179
179
  curl -X POST "http://localhost:3000/api/v1/search" \
180
180
  -H "Content-Type: application/json" \
181
181
  -d '{"query": "authentication", "limit": 5}'
@@ -201,7 +201,7 @@ curl "http://localhost:3000/api/v1/health"
201
201
 
202
202
  ### Reader API Endpoints
203
203
 
204
- For programmatic document reading and cross-document discovery:
204
+ These endpoints let you read documents and find connections across them:
205
205
 
206
206
  ```bash
207
207
  # Get all chunks for a document (ordered by index)
@@ -218,7 +218,7 @@ curl -X POST "http://localhost:3000/api/v1/chunks/batch-related" \
218
218
 
219
219
  ## Remote Mode
220
220
 
221
- RAG Vault can also run as an HTTP server for remote MCP clients like Claude.ai, Claude Desktop, or any client supporting Streamable HTTP or SSE transports.
221
+ RAG Vault can also run as an HTTP server so remote MCP clients like Claude.ai, Claude Desktop, or anything that supports Streamable HTTP or SSE can connect to it.
222
222
 
223
223
  ```bash
224
224
  # Start remote server (default port 3001)
@@ -228,7 +228,7 @@ npx github:RobThePCGuy/rag-vault --remote
228
228
  npx github:RobThePCGuy/rag-vault --remote --port 8080
229
229
  ```
230
230
 
231
- Stdio mode is unchanged -- omit `--remote` and everything works as before with Cursor, Claude Code, and Codex.
231
+ Stdio mode is unchanged. Just leave off `--remote` and everything works as before with Cursor, Claude Code, and Codex.
232
232
 
233
233
  ### Connecting from Claude Desktop
234
234
 
@@ -312,12 +312,18 @@ Pure semantic search would miss this. RAG Vault finds it.
312
312
  ```
313
313
  Document → Parse → Chunk by meaning → Embed locally → Store in LanceDB
314
314
 
315
- Query → Embed → Vector search → Keyword boost Quality filter → Results
315
+ Query → Embed → Vector search + BM25 FusionOptional reranking → Results
316
316
  ```
317
317
 
318
318
  **Smart chunking**: Splits by meaning, not character count. Keeps code blocks intact.
319
319
 
320
- **Hybrid search**: Vector similarity finds related content. Keyword boost ranks exact matches higher.
320
+ **Hybrid search**: Two fusion modes that combine vector similarity with BM25 keyword matching:
321
+ - **Boost mode** (default): BM25 boosts vector search distances multiplicatively. Simple and predictable.
322
+ - **RRF mode** (opt-in via `RAG_SEARCH_MODE=rrf`): [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) treats vector and BM25 as independent voters. This can surface documents that vector search alone would miss.
323
+
324
+ **Cross-encoder reranking** (opt-in): After the first pass, a cross-encoder model (`Xenova/ms-marco-MiniLM-L-6-v2`, ~23MB) scores each (query, passage) pair together for tighter relevance ranking. Turn it on with `RAG_RERANKER_ENABLED=true`.
325
+
326
+ **Query expansion** (opt-in): Generates reformulated queries to improve recall when searches are paraphrased or conceptual. Two backends: local template-based expansion (default, fully offline) or LLM-based [HyDE](https://arxiv.org/abs/2212.10496) through an external API. Turn it on with `RAG_HYDE_ENABLED=true`.
321
327
 
322
328
  **Quality filtering**: Groups results by relevance gaps instead of arbitrary top-K cutoffs.
323
329
 
@@ -347,12 +353,12 @@ Query → Embed → Vector search → Keyword boost → Quality filter → Resul
347
353
  | `DB_PATH` | `./lancedb/` | Where vectors are stored |
348
354
  | `CACHE_DIR` | `./models/` | Model cache directory |
349
355
  | `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | HuggingFace embedding model |
350
- | `MAX_FILE_SIZE` | `104857600` (100 MB) | Maximum file size in bytes for ingestion |
351
- | `RAG_EMBEDDING_DEVICE` | `auto` | Inference device: `auto`, `cpu`, `cuda`, `dml`, `webgpu`, `wasm`, `gpu`, `webnn` |
356
+ | `MAX_FILE_SIZE` | `104857600` (100 MB) | Biggest file you can ingest |
357
+ | `RAG_EMBEDDING_DEVICE` | `auto` | Device for running embeddings: `auto`, `cpu`, `cuda`, `dml`, `webgpu`, `wasm`, `gpu`, `webnn` |
352
358
  | `WEB_PORT` | `3000` | Port for web interface |
353
359
  | `UPLOAD_DIR` | `./uploads/` | Temporary directory for web UI file uploads |
354
360
 
355
- > **Windows users:** `RAG_EMBEDDING_DEVICE=auto` attempts GPU providers (DirectML) which can fail if ONNX Runtime GPU binaries are not available. If you see embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config for reliable operation. See the [GPU acceleration FAQ](#frequently-asked-questions) for details.
361
+ > **Windows users:** `RAG_EMBEDDING_DEVICE=auto` tries GPU providers (DirectML), which can fail if ONNX Runtime GPU binaries aren't available. If you see embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config for reliable operation. See the [GPU acceleration FAQ](#frequently-asked-questions) for details.
356
362
 
357
363
  One-command override (no `.env` edit):
358
364
 
@@ -371,13 +377,38 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
371
377
 
372
378
  | Variable | Default | What it does |
373
379
  |----------|---------|--------------|
374
- | `RAG_HYBRID_WEIGHT` | `0.6` | Keyword boost strength. `0` = semantic-only, `1.0` = BM25-only, higher = stronger boost for exact keyword matches |
375
- | `RAG_GROUPING` | unset | Quality filter grouping mode: `similar` = top group only, `related` = top 2 groups |
376
- | `RAG_MAX_DISTANCE` | unset | Filter out results below this relevance threshold |
377
- | `RAG_GROUPING_STD_MULTIPLIER` | `1.5` | Standard-deviation multiplier for detecting relevance gaps between result groups |
378
- | `RAG_HYBRID_CANDIDATE_MULTIPLIER` | `2` | Multiplier for number of vector candidates to fetch before keyword reranking |
379
- | `RAG_FTS_MAX_FAILURES` | `3` | Number of full-text search failures before temporarily disabling FTS |
380
- | `RAG_FTS_COOLDOWN_MS` | `300000` (5 min) | Cooldown period before retrying FTS after max failures reached |
380
+ | `RAG_SEARCH_MODE` | `boost` | Fusion mode: `boost` (multiplicative keyword boost) or `rrf` (Reciprocal Rank Fusion) |
381
+ | `RAG_HYBRID_WEIGHT` | `0.6` | Balance between vector and BM25. `0` = vector-only, `1.0` = BM25-only |
382
+ | `RAG_RRF_K` | `60` | RRF smoothing constant (only applies in `rrf` mode). Industry standard is 60. |
383
+ | `RAG_GROUPING` | unset | Quality filter: `similar` = top group only, `related` = top 2 groups |
384
+ | `RAG_MAX_DISTANCE` | unset | Drops results below this relevance threshold (use with `boost` mode; `rrf` scores are rank-based) |
385
+ | `RAG_GROUPING_STD_MULTIPLIER` | `1.5` | How many standard deviations between groups counts as a relevance gap |
386
+ | `RAG_HYBRID_CANDIDATE_MULTIPLIER` | `2` | How many extra vector candidates to grab before keyword reranking |
387
+ | `RAG_FTS_MAX_FAILURES` | `3` | Full-text search failures before FTS is temporarily disabled |
388
+ | `RAG_FTS_COOLDOWN_MS` | `300000` (5 min) | How long to wait before retrying FTS after hitting the failure limit |
389
+
390
+ ### Cross-Encoder Reranking (opt-in)
391
+
392
+ | Variable | Default | What it does |
393
+ |----------|---------|--------------|
394
+ | `RAG_RERANKER_ENABLED` | `false` | Turn on cross-encoder reranking for better results |
395
+ | `RAG_RERANKER_MODEL` | `Xenova/ms-marco-MiniLM-L-6-v2` | HuggingFace cross-encoder model (~23MB ONNX, downloads on first use) |
396
+ | `RAG_RERANKER_CANDIDATE_MULTIPLIER` | `2` | Fetch this many extra candidates for the reranker to score |
397
+ | `RAG_RERANKER_DEVICE` | `auto` | Device for the reranker (same options as `RAG_EMBEDDING_DEVICE`) |
398
+ | `RERANKER_INIT_TIMEOUT_MS` | `600000` (10 min) | Timeout for model download and initialization |
399
+
400
+ ### Query Expansion / HyDE (opt-in)
401
+
402
+ | Variable | Default | What it does |
403
+ |----------|---------|--------------|
404
+ | `RAG_HYDE_ENABLED` | `false` | Turn on query expansion for better recall |
405
+ | `RAG_HYDE_BACKEND` | `rule-based` | `rule-based` for local template expansion, `api` for LLM-based HyDE |
406
+ | `RAG_HYDE_EXPANSIONS` | `2` | Number of expanded queries to generate |
407
+ | `RAG_HYDE_API_KEY` | unset | API key for LLM backend (required when `RAG_HYDE_BACKEND=api`) |
408
+ | `RAG_HYDE_API_BASE_URL` | `https://api.anthropic.com` | API endpoint for LLM backend |
409
+ | `RAG_HYDE_API_MODEL` | `claude-haiku-4-5-20251001` | Model for LLM-based expansion |
410
+
411
+ > **Privacy note:** The `api` backend sends query text to an external LLM endpoint, which breaks the "zero cloud" guarantee. The default `rule-based` backend is fully local.
381
412
 
382
413
  ### Security (optional)
383
414
 
@@ -395,7 +426,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
395
426
  | `ALLOWED_SCAN_ROOTS` | Home directory | Directories allowed for database scanning |
396
427
  | `JSON_BODY_LIMIT` | `5mb` | Max request body size |
397
428
  | `REQUEST_TIMEOUT_MS` | `30000` | API request timeout |
398
- | `REQUEST_LOGGING` | `false` | Enable request audit logging |
429
+ | `REQUEST_LOGGING` | `false` | Turn on request audit logging |
399
430
 
400
431
  > Copy [`.env.example`](.env.example) for a complete configuration template.
401
432
 
@@ -413,7 +444,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
413
444
  <details>
414
445
  <summary><strong>Is my data really private?</strong></summary>
415
446
 
416
- For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only uses network if you choose remote URL ingestion or need to download a model.
447
+ For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only hits the network if you choose remote URL ingestion or need to download a model.
417
448
 
418
449
  </details>
419
450
 
@@ -427,11 +458,11 @@ Yes, after the first run. The model caches locally.
427
458
  <details>
428
459
  <summary><strong>What about GPU acceleration?</strong></summary>
429
460
 
430
- RAG Vault uses Transformers.js device auto-selection by default (`RAG_EMBEDDING_DEVICE=auto`). When GPU providers are properly configured, this can speed up embedding generation.
461
+ RAG Vault picks a device automatically by default (`RAG_EMBEDDING_DEVICE=auto`). When GPU providers are set up correctly, this can speed up embedding generation.
431
462
 
432
- **Important:** On Windows, `auto` tries DirectML (`dml`) which requires ONNX Runtime GPU binaries. If those binaries are not installed or your GPU setup is incomplete, the server will fail to start entirely it does not gracefully fall back to CPU. The same applies on Linux without CUDA binaries.
463
+ **Important:** On Windows, `auto` tries DirectML (`dml`), which requires ONNX Runtime GPU binaries. If those binaries aren't installed or your GPU setup is incomplete, the server won't start at all. It doesn't fall back to CPU gracefully. The same goes for Linux without CUDA binaries.
433
464
 
434
- **Recommendation:** If you encounter embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. CPU mode is reliable on all platforms and fast enough for most workloads (the default model is only ~90MB).
465
+ **Recommendation:** If you hit embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. CPU mode is reliable on all platforms and fast enough for most workloads (the default model is only ~90MB).
435
466
 
436
467
  ```json
437
468
  "env": {
@@ -446,7 +477,7 @@ Supported device values: `auto`, `cpu`, `cuda`, `dml`, `gpu`, `wasm`, `webgpu`,
446
477
  <details>
447
478
  <summary><strong>Can I change the embedding model?</strong></summary>
448
479
 
449
- Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You must delete `DB_PATH` and re-ingest because different models produce incompatible vectors.
480
+ Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You'll need to delete `DB_PATH` and re-ingest because different models produce incompatible vectors.
450
481
 
451
482
  **Recommended upgrade:** For better quality and multilingual support, use [EmbeddingGemma](https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX):
452
483
 
@@ -454,7 +485,7 @@ Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You must delete `DB_P
454
485
  "MODEL_NAME": "onnx-community/embeddinggemma-300m-ONNX"
455
486
  ```
456
487
 
457
- This model is a strong option for multilingual and higher-quality retrieval use cases.
488
+ It's a solid pick if you need multilingual support or higher-quality retrieval.
458
489
 
459
490
  **Other specialized models:**
460
491
  - Scientific: `sentence-transformers/allenai-specter`
@@ -473,16 +504,16 @@ Copy the `DB_PATH` directory (default: `./lancedb/`).
473
504
 
474
505
  | Problem | Solution |
475
506
  |---------|----------|
476
- | No results found | Documents must be ingested first. Run "List all ingested files" to check. |
477
- | Model download failed | Check internet connection. Model is ~90MB from HuggingFace. |
507
+ | No results found | Documents need to be ingested first. Run "List all ingested files" to check. |
508
+ | Model download failed | Check your internet connection. The model is ~90MB from HuggingFace. |
478
509
  | Embedding initialization fails | Set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. The `auto` default can fail on Windows without GPU binaries. |
479
510
  | `Protobuf parsing failed` | Corrupted model cache. Delete `CACHE_DIR` (default: `./models/`) and restart. RAG Vault also auto-retries with an isolated recovery cache. |
480
511
  | File too large | Default limit is 100MB. Set `MAX_FILE_SIZE` higher or split the file. |
481
512
  | Path outside BASE_DIR | All file paths must be under `BASE_DIR`. Use absolute paths. |
482
- | MCP tools not showing | Verify config syntax, restart your AI tool completely (Cmd+Q on Mac). |
513
+ | MCP tools not showing | Check your config syntax and restart your AI tool completely (Cmd+Q on Mac). |
483
514
  | `mcp-publisher login github` fails with `slow_down` | Use token login instead: `mcp-publisher login github --token "$(gh auth token)"` (or pass a PAT). |
484
- | 401 Unauthorized | API key required. Set `RAG_API_KEY` or use correct header format. |
485
- | 429 Too Many Requests | Rate limited. Wait for reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
515
+ | 401 Unauthorized | API key required. Set `RAG_API_KEY` or use the correct header format. |
516
+ | 429 Too Many Requests | Rate limited. Wait for the reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
486
517
  | CORS errors | Add your origin to `CORS_ORIGINS` environment variable. |
487
518
 
488
519
  ## Development
@@ -527,7 +558,7 @@ pnpm release:dry
527
558
 
528
559
  ### Test Tiers
529
560
 
530
- - `pnpm test:unit`: deterministic tests for local/CI quality checks, excluding model-download integration paths.
561
+ - `pnpm test:unit`: deterministic tests for local/CI quality checks. Doesn't include model-download integration paths.
531
562
  - `pnpm test:integration`: full integration and E2E workflows, including embedding model initialization.
532
563
 
533
564
  Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent suites.
@@ -537,8 +568,8 @@ Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent
537
568
  - Releases are local and scripted via `scripts/release-npm.sh`.
538
569
  - Supported bumps: `patch`, `minor`, `major`.
539
570
  - The script runs dependency installs, `pnpm check:all`, and `pnpm ui:build` before touching version files.
540
- - `package.json` and `server.json` versions are updated only after checks pass, and auto-restored if any later step fails.
541
- - `pnpm release:dry` performs the full gate plus npm dry-run publish and always restores version files.
571
+ - `package.json` and `server.json` versions only get updated after checks pass, and they're auto-restored if any later step fails.
572
+ - `pnpm release:dry` runs the full gate plus npm dry-run publish and always restores version files.
542
573
 
543
574
  ### Project Structure
544
575
 
@@ -550,11 +581,13 @@ src/
550
581
  ├── errors/ # Error handling utilities
551
582
  ├── explainability/ # Keyword-based result explanations
552
583
  ├── flywheel/ # Feedback loop (pin/dismiss reranking)
584
+ ├── hyde/ # Query expansion + HyDE (LLM-based)
553
585
  ├── parser/ # PDF, DOCX, HTML parsing
554
586
  ├── query/ # Advanced query syntax parser
587
+ ├── reranker/ # Cross-encoder reranking (Transformers.js)
555
588
  ├── server/ # MCP tool handlers + remote transport
556
589
  ├── utils/ # Config, file helpers, process handlers
557
- ├── vectordb/ # LanceDB + hybrid search
590
+ ├── vectordb/ # LanceDB + hybrid search (boost + RRF)
558
591
  └── web/ # Express server + REST API
559
592
 
560
593
  web-ui/ # React frontend (Vite + Tailwind)
@@ -573,6 +606,6 @@ MIT: free for personal and commercial use.
573
606
 
574
607
  Built with [Model Context Protocol](https://modelcontextprotocol.io/), [LanceDB](https://lancedb.com/), and [Transformers.js](https://huggingface.co/docs/transformers.js).
575
608
 
576
- > Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now its its own thing.
577
- > Huge credit to upstream contributors for the foundation, Ive been iterating hard from there.
609
+ > Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it's its own thing.
610
+ > Huge credit to upstream contributors for the foundation, I've been iterating hard from there.
578
611
  > Local-first dev tools, all the way.
@@ -0,0 +1,47 @@
1
+ /**
2
+ * HyDE configuration
3
+ */
4
+ export interface HyDEConfig {
5
+ /** Whether HyDE is enabled */
6
+ enabled: boolean;
7
+ /** Backend: 'rule-based' for local template-based query expansion, 'api' for LLM-based HyDE */
8
+ backend: 'rule-based' | 'api';
9
+ /** Number of query expansions to generate (default: 2) */
10
+ numExpansions: number;
11
+ /** API key for LLM backend (optional) */
12
+ apiKey?: string;
13
+ /** API base URL for LLM backend (optional) */
14
+ apiBaseUrl?: string;
15
+ /** API model name for LLM backend (optional) */
16
+ apiModel?: string;
17
+ }
18
+ /**
19
+ * Expanded query with weight
20
+ */
21
+ export interface ExpandedQuery {
22
+ /** The expanded query text */
23
+ text: string;
24
+ /** Weight for RRF voting (original = 1.0, expansions = 0.5) */
25
+ weight: number;
26
+ }
27
+ /**
28
+ * HyDE (Hypothetical Document Embeddings) query expander.
29
+ *
30
+ * Generates hypothetical answer documents from a query to improve
31
+ * retrieval recall. Each expansion becomes an additional voter in
32
+ * RRF fusion with a lower weight (0.5) than the original query (1.0).
33
+ */
34
+ export declare class HyDEExpander {
35
+ private readonly config;
36
+ constructor(config: HyDEConfig);
37
+ /**
38
+ * Expand a query into the original plus hypothetical documents.
39
+ *
40
+ * @param query - The original search query
41
+ * @returns Array of expanded queries with weights.
42
+ * First item is always the original query (weight 1.0).
43
+ * Subsequent items are hypothetical expansions (weight 0.5).
44
+ */
45
+ expandQuery(query: string): Promise<ExpandedQuery[]>;
46
+ }
47
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAiBA;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8BAA8B;IAC9B,OAAO,EAAE,OAAO,CAAA;IAChB,+FAA+F;IAC/F,OAAO,EAAE,YAAY,GAAG,KAAK,CAAA;IAC7B,0DAA0D;IAC1D,aAAa,EAAE,MAAM,CAAA;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,MAAM,EAAE,MAAM,CAAA;CACf;AAkMD;;;;;;GAMG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;gBAEvB,MAAM,EAAE,UAAU;IAI9B;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CAkC3D"}
@@ -0,0 +1,203 @@
1
+ // Query expansion module for improved retrieval recall
2
+ //
3
+ // Two backends are available:
4
+ // - 'rule-based': Template-based query expansion (local, offline, no dependencies).
5
+ // Generates reformulated queries using pattern detection and templates.
6
+ // This is a classic IR query expansion technique, not HyDE.
7
+ // - 'api': LLM-based HyDE (Hypothetical Document Embeddings). Generates hypothetical
8
+ // answer documents via an external LLM API, then embeds them for retrieval.
9
+ // Reference: Gao et al. "Precise Zero-Shot Dense Retrieval without Relevance Labels" (2022)
10
+ //
11
+ // Each expansion is embedded alongside the original query and becomes a separate
12
+ // voter in RRF fusion, improving recall for paraphrased or conceptual queries.
13
+ // ============================================
14
+ // Query Pattern Detection
15
+ // ============================================
16
+ /** Common question word patterns */
17
+ const QUESTION_PATTERN = /^(what|how|why|when|where|who|which|can|does|is|are|was|were|do|did|should|could|would)\s+/i;
18
+ /** Common technical/code patterns */
19
+ const CODE_PATTERN = /`[^`]+`|[A-Z][a-z]+[A-Z]|[a-z]+_[a-z]+|\.[a-z]+\(|ERR_|ERROR_|[A-Z_]{3,}/;
20
+ /** Error message patterns */
21
+ const ERROR_PATTERN = /error|exception|fail|crash|bug|issue|problem|broken|not working/i;
22
+ /**
23
+ * Detect query intent for better expansion
24
+ */
25
+ function detectQueryType(query) {
26
+ if (ERROR_PATTERN.test(query))
27
+ return 'error';
28
+ if (CODE_PATTERN.test(query))
29
+ return 'code';
30
+ if (QUESTION_PATTERN.test(query))
31
+ return 'question';
32
+ return 'concept';
33
+ }
34
+ // ============================================
35
+ // Rule-Based Expansion
36
+ // ============================================
37
+ /**
38
+ * Generate hypothetical documents using rule-based templates.
39
+ * Works offline with no dependencies — always available as a fallback.
40
+ *
41
+ * The strategy varies by detected query type:
42
+ * - Questions: Convert to declarative statements
43
+ * - Errors: Frame as troubleshooting documentation
44
+ * - Code: Frame as technical documentation
45
+ * - Concepts: Frame as explanatory documentation
46
+ */
47
+ function ruleBasedExpansion(query, numExpansions) {
48
+ const queryType = detectQueryType(query);
49
+ const expansions = [];
50
+ // Strip question marks for declarative reformulation
51
+ const cleanQuery = query.replace(/\?+$/, '').trim();
52
+ switch (queryType) {
53
+ case 'question': {
54
+ // Convert question to declarative statement
55
+ const declarative = cleanQuery.replace(QUESTION_PATTERN, '').trim();
56
+ if (declarative.length > 3) {
57
+ expansions.push(`${declarative.charAt(0).toUpperCase()}${declarative.slice(1)}. This is explained in detail in the documentation.`);
58
+ }
59
+ // Frame as a documentation excerpt
60
+ expansions.push(`The documentation explains that ${cleanQuery.toLowerCase()}. The key points are as follows.`);
61
+ // Frame as a guide section
62
+ expansions.push(`A guide about ${cleanQuery.toLowerCase()} would cover the following topics and provide step-by-step instructions.`);
63
+ break;
64
+ }
65
+ case 'error': {
66
+ // Frame as troubleshooting documentation
67
+ expansions.push(`To resolve ${cleanQuery}, follow these troubleshooting steps. The root cause is typically related to configuration or dependencies.`);
68
+ expansions.push(`The error "${cleanQuery}" occurs when the system encounters an unexpected state. The solution involves checking the following.`);
69
+ expansions.push(`Common causes for ${cleanQuery} include misconfiguration, missing dependencies, and version incompatibilities. Here is how to fix it.`);
70
+ break;
71
+ }
72
+ case 'code': {
73
+ // Frame as technical documentation
74
+ expansions.push(`The implementation of ${cleanQuery} involves the following components and follows these patterns.`);
75
+ expansions.push(`Documentation for ${cleanQuery}: This feature provides the following functionality and can be configured as described below.`);
76
+ expansions.push(`${cleanQuery} is used to handle specific operations in the system. Here is how it works and how to use it correctly.`);
77
+ break;
78
+ }
79
+ default: {
80
+ // General conceptual expansion
81
+ expansions.push(`A document about ${cleanQuery} would discuss the following key aspects, including definitions, usage patterns, and best practices.`);
82
+ expansions.push(`${cleanQuery.charAt(0).toUpperCase()}${cleanQuery.slice(1)} is a concept that encompasses several important areas. The documentation covers the following topics.`);
83
+ expansions.push(`The following documentation explains ${cleanQuery} in detail, covering its purpose, implementation, and common use cases.`);
84
+ break;
85
+ }
86
+ }
87
+ return expansions.slice(0, numExpansions);
88
+ }
89
+ // ============================================
90
+ // API-Based Expansion
91
+ // ============================================
92
+ /**
93
+ * Generate hypothetical documents using an LLM API.
94
+ * Falls back to rule-based expansion on failure.
95
+ */
96
+ async function apiBasedExpansion(query, numExpansions, config) {
97
+ if (!config.apiKey) {
98
+ console.error('HyDE: No API key configured, falling back to rule-based expansion');
99
+ return ruleBasedExpansion(query, numExpansions);
100
+ }
101
+ const baseUrl = config.apiBaseUrl || 'https://api.anthropic.com';
102
+ const model = config.apiModel || 'claude-haiku-4-5-20251001';
103
+ try {
104
+ const prompt = `Generate ${numExpansions} short hypothetical document excerpts (2-3 sentences each) that would be relevant to answering the following query. Each excerpt should sound like it comes from real documentation. Return only the excerpts, separated by newlines.
105
+
106
+ Query: ${query}`;
107
+ const response = await fetch(`${baseUrl}/v1/messages`, {
108
+ method: 'POST',
109
+ headers: {
110
+ 'Content-Type': 'application/json',
111
+ 'x-api-key': config.apiKey,
112
+ 'anthropic-version': '2023-06-01',
113
+ },
114
+ body: JSON.stringify({
115
+ model,
116
+ max_tokens: 300,
117
+ messages: [{ role: 'user', content: prompt }],
118
+ }),
119
+ signal: AbortSignal.timeout(10000), // 10s timeout
120
+ });
121
+ if (!response.ok) {
122
+ throw new Error(`API request failed: ${response.status} ${response.statusText}`);
123
+ }
124
+ const data = await response.json();
125
+ // Runtime validation of API response shape
126
+ const dataObj = data;
127
+ if (!dataObj || typeof dataObj !== 'object' || !Array.isArray(dataObj['content'])) {
128
+ throw new Error('API returned unexpected response format');
129
+ }
130
+ const content = dataObj['content'];
131
+ const firstBlock = content[0];
132
+ const text = firstBlock && typeof firstBlock['text'] === 'string' ? firstBlock['text'] : '';
133
+ const expansions = text
134
+ .split('\n')
135
+ .map((line) => line.trim())
136
+ .filter((line) => line.length > 20)
137
+ .slice(0, numExpansions);
138
+ if (expansions.length === 0) {
139
+ console.error('HyDE: API returned no valid expansions, falling back to rule-based');
140
+ return ruleBasedExpansion(query, numExpansions);
141
+ }
142
+ return expansions;
143
+ }
144
+ catch (error) {
145
+ console.error(`HyDE: API expansion failed: ${error.message}, falling back to rule-based`);
146
+ return ruleBasedExpansion(query, numExpansions);
147
+ }
148
+ }
149
+ // ============================================
150
+ // HyDEExpander Class
151
+ // ============================================
152
+ /**
153
+ * HyDE (Hypothetical Document Embeddings) query expander.
154
+ *
155
+ * Generates hypothetical answer documents from a query to improve
156
+ * retrieval recall. Each expansion becomes an additional voter in
157
+ * RRF fusion with a lower weight (0.5) than the original query (1.0).
158
+ */
159
+ export class HyDEExpander {
160
+ config;
161
+ constructor(config) {
162
+ this.config = config;
163
+ }
164
+ /**
165
+ * Expand a query into the original plus hypothetical documents.
166
+ *
167
+ * @param query - The original search query
168
+ * @returns Array of expanded queries with weights.
169
+ * First item is always the original query (weight 1.0).
170
+ * Subsequent items are hypothetical expansions (weight 0.5).
171
+ */
172
+ async expandQuery(query) {
173
+ if (!this.config.enabled) {
174
+ return [{ text: query, weight: 1.0 }];
175
+ }
176
+ // Always include the original query at full weight
177
+ const results = [{ text: query, weight: 1.0 }];
178
+ // Skip expansion for very short queries (less than 3 words)
179
+ const wordCount = query.trim().split(/\s+/).length;
180
+ if (wordCount < 3) {
181
+ return results;
182
+ }
183
+ try {
184
+ let expansions;
185
+ if (this.config.backend === 'api') {
186
+ expansions = await apiBasedExpansion(query, this.config.numExpansions, this.config);
187
+ }
188
+ else {
189
+ expansions = ruleBasedExpansion(query, this.config.numExpansions);
190
+ }
191
+ // Add expansions with lower weight
192
+ for (const expansion of expansions) {
193
+ results.push({ text: expansion, weight: 0.5 });
194
+ }
195
+ }
196
+ catch (error) {
197
+ console.error(`HyDE: Expansion failed: ${error.message}`);
198
+ // Return just the original query on failure (graceful degradation)
199
+ }
200
+ return results;
201
+ }
202
+ }
203
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAAA,uDAAuD;AACvD,EAAE;AACF,8BAA8B;AAC9B,oFAAoF;AACpF,0EAA0E;AAC1E,8DAA8D;AAC9D,qFAAqF;AACrF,8EAA8E;AAC9E,8FAA8F;AAC9F,EAAE;AACF,iFAAiF;AACjF,+EAA+E;AAkC/E,+CAA+C;AAC/C,0BAA0B;AAC1B,+CAA+C;AAE/C,oCAAoC;AACpC,MAAM,gBAAgB,GACpB,6FAA6F,CAAA;AAE/F,qCAAqC;AACrC,MAAM,YAAY,GAAG,0EAA0E,CAAA;AAE/F,6BAA6B;AAC7B,MAAM,aAAa,GAAG,kEAAkE,CAAA;AAExF;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAA;IAC7C,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAC3C,IAAI,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,UAAU,CAAA;IACnD,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,+CAA+C;AAC/C,uBAAuB;AACvB,+CAA+C;AAE/C;;;;;;;;;GASG;AACH,SAAS,kBAAkB,CAAC,KAAa,EAAE,aAAqB;IAC9D,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,CAAA;IACxC,MAAM,UAAU,GAAa,EAAE,CAAA;IAE/B,qDAAqD;IACrD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAEnD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,4CAA4C;YAC5C,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YACnE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CACb,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,qDAAqD,CACnH,CAAA;YACH,CAAC;YAED,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,mCAAmC,UAAU,CAAC,WAAW,EAAE,kCAAkC,CAC9F,CAAA;YAED,2BAA2B;YAC3B,UAAU,CAAC,IAAI,CACb,iBAAiB,UAAU,CAAC,WAAW,EAAE,0EAA0E,CACpH,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,yCAAyC;YACzC,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,6GAA6G,CACtI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,wGAAwG,CACjI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,wGAAwG,CACxI,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,yBAAyB,UAAU,gEAAgE,CACpG,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,+FAA+F,CAC/H,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,yGAAyG,CACvH,CAAA;YACD,MAAK;QACP,CAAC;QAED,OAAO,CAAC,CAAC,CAAC;YACR,+BAA+B;YAC/B,UAAU,CAAC,IAAI,CACb,oBAAoB,UAAU,sGAAsG,CACrI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,wGAAwG,CACpK,CAAA;YACD,UAAU,CAAC,IAAI,CACb,wCAAwC,UAAU,yEAAyE,CAC5H,CAAA;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;AAC3C,CAAC;AAED,+CAA+C;AAC/C,sBAAsB;AACtB,+CAA+C;AAE/C;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAC9B,KAAa,EACb,aAAqB,EACrB,MAAkB;IAElB,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAA;QAClF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,IAAI,2BAA2B,CAAA;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,IAAI,2BAA2B,CAAA;IAE5D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,aAAa;;SAEnC,KAAK,EAAE,CAAA;QAEZ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,cAAc,EAAE;YACrD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,mBAAmB,EAAE,YAAY;aAClC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,UAAU,EAAE,GAAG;gBACf,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;aAC9C,CAAC;YACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,cAAc;SACnD,CAAC,CAAA;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAA;QAClF,CAAC;QAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAE3C,2CAA2C;QAC3C,MAAM,OAAO,GAAG,IAAsC,CAAA;QACtD,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YAClF,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAA;QAC5D,CAAC;QACD,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,CAAc,CAAA;QAC/C,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAwC,CAAA;QACpE,MAAM,IAAI,GAAG,UAAU,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;QAC3F,MAAM,UAAU,GAAG,IAAI;aACpB,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAClC,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;aAC1C,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;QAE1B,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,oEAAoE,CAAC,CAAA;YACnF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;QACjD,CAAC;QAED,OAAO,UAAU,CAAA;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,+BAAgC,KAAe,CAAC,OAAO,8BAA8B,CACtF,CAAA;QACD,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;AACH,CAAC;AAED,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C;;;;;;GAMG;AACH,MAAM,OAAO,YAAY;IACN,MAAM,CAAY;IAEnC,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QACvC,CAAC;QAED,mDAAmD;QACnD,MAAM,OAAO,GAAoB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QAE/D,4DAA4D;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAA;QAClD,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,OAAO,OAAO,CAAA;QAChB,CAAC;QAED,IAAI,CAAC;YACH,IAAI,UAAoB,CAAA;YAExB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;gBAClC,UAAU,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;YACrF,CAAC;iBAAM,CAAC;gBACN,UAAU,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAA;YACnE,CAAC;YAED,mCAAmC;YACnC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;gBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2BAA4B,KAAe,CAAC,OAAO,EAAE,CAAC,CAAA;YACpE,mEAAmE;QACrE,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC;CACF"}
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Reranker configuration
3
+ */
4
+ export interface RerankerConfig {
5
+ /** HuggingFace cross-encoder model path */
6
+ modelPath: string;
7
+ /** Model cache directory */
8
+ cacheDir: string;
9
+ /**
10
+ * Device hint for Transformers.js runtime.
11
+ * Examples: auto, cpu, cuda, dml, webgpu
12
+ */
13
+ device?: string;
14
+ /**
15
+ * Timeout for model initialization/download in milliseconds.
16
+ * Default: 600000 (10 minutes).
17
+ */
18
+ initTimeoutMs?: number;
19
+ }
20
+ /**
21
+ * Reranked result with score
22
+ */
23
+ export interface RerankedResult {
24
+ /** Original index in the input array */
25
+ index: number;
26
+ /** Cross-encoder relevance score (higher = more relevant) */
27
+ score: number;
28
+ }
29
+ /**
30
+ * Cross-encoder reranker using Transformers.js
31
+ *
32
+ * Scores (query, passage) pairs for relevance using a cross-encoder model.
33
+ * Unlike bi-encoders, cross-encoders jointly encode both texts, producing
34
+ * more accurate relevance judgments at the cost of speed.
35
+ *
36
+ * Default model: Xenova/ms-marco-MiniLM-L-6-v2 (~23MB ONNX)
37
+ */
38
+ export declare class Reranker {
39
+ private model;
40
+ private initPromise;
41
+ private readonly config;
42
+ constructor(config: RerankerConfig);
43
+ /**
44
+ * Get the model name/path
45
+ */
46
+ getModelName(): string;
47
+ /**
48
+ * Resolve the device to use for inference
49
+ */
50
+ private resolveDevice;
51
+ /**
52
+ * Get a recovery cache directory for corrupted model caches
53
+ */
54
+ private getRecoveryCacheDir;
55
+ /**
56
+ * Check if an error is recoverable by using a fresh cache
57
+ */
58
+ private isRecoverableCacheError;
59
+ /**
60
+ * Initialize Transformers.js cross-encoder model
61
+ */
62
+ initialize(): Promise<void>;
63
+ /**
64
+ * Ensure model is initialized (lazy initialization)
65
+ */
66
+ private ensureInitialized;
67
+ /**
68
+ * Rerank passages by relevance to a query using cross-encoder scoring.
69
+ *
70
+ * @param query - The search query
71
+ * @param passages - Array of passage texts to score
72
+ * @returns Array of {index, score} sorted by score descending (most relevant first)
73
+ */
74
+ rerank(query: string, passages: string[]): Promise<RerankedResult[]>;
75
+ }
76
+ //# sourceMappingURL=index.d.ts.map