@robthepcguy/rag-vault 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +76 -43
- package/dist/hyde/index.d.ts +47 -0
- package/dist/hyde/index.d.ts.map +1 -0
- package/dist/hyde/index.js +203 -0
- package/dist/hyde/index.js.map +1 -0
- package/dist/reranker/index.d.ts +76 -0
- package/dist/reranker/index.d.ts.map +1 -0
- package/dist/reranker/index.js +199 -0
- package/dist/reranker/index.js.map +1 -0
- package/dist/server/index.d.ts +25 -0
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +103 -14
- package/dist/server/index.js.map +1 -1
- package/dist/server/schemas.d.ts +21 -100
- package/dist/server/schemas.d.ts.map +1 -1
- package/dist/server/schemas.js +3 -3
- package/dist/server/schemas.js.map +1 -1
- package/dist/utils/config-parsers.d.ts +14 -0
- package/dist/utils/config-parsers.d.ts.map +1 -1
- package/dist/utils/config-parsers.js +26 -0
- package/dist/utils/config-parsers.js.map +1 -1
- package/dist/utils/config.d.ts +23 -0
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +39 -1
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/file-utils.d.ts.map +1 -1
- package/dist/utils/file-utils.js +17 -1
- package/dist/utils/file-utils.js.map +1 -1
- package/dist/vectordb/index.d.ts +33 -7
- package/dist/vectordb/index.d.ts.map +1 -1
- package/dist/vectordb/index.js +161 -47
- package/dist/vectordb/index.js.map +1 -1
- package/package.json +8 -7
- package/skills/rag-vault/SKILL.md +3 -3
- package/skills/rag-vault/references/html-ingestion.md +1 -1
- package/web-ui/dist/assets/{CollectionsPage-CjLs8_5j.js → CollectionsPage-wbfgYFTw.js} +1 -1
- package/web-ui/dist/assets/{FilesPage-Bw9x9aMr.js → FilesPage-D6TlldaR.js} +1 -1
- package/web-ui/dist/assets/{ReaderPage-JPNiOF-x.js → ReaderPage-Sgy0vMZ6.js} +1 -1
- package/web-ui/dist/assets/{ReaderSettingsContext-BLFJnEne.js → ReaderSettingsContext-DsvLXuaf.js} +1 -1
- package/web-ui/dist/assets/{SearchPage-D3_Vtbdw.js → SearchPage-mPKXZEyq.js} +1 -1
- package/web-ui/dist/assets/{SettingsPage-BAxB2264.js → SettingsPage-DXeWwfvd.js} +1 -1
- package/web-ui/dist/assets/{StatusPage-CzJZW8Gs.js → StatusPage-AirpfsGF.js} +1 -1
- package/web-ui/dist/assets/{UploadPage-DW8OujeJ.js → UploadPage-Cob25kDa.js} +1 -1
- package/web-ui/dist/assets/index-BZMzEssr.js +6 -0
- package/web-ui/dist/assets/motion-DdHBXDWx.js +9 -0
- package/web-ui/dist/assets/query-DbAD_nLW.js +1 -0
- package/web-ui/dist/assets/{vendor-DSXQOR6A.js → vendor-DNJ-hWNb.js} +1 -1
- package/web-ui/dist/index.html +3 -3
- package/web-ui/dist/assets/index-ANt8Xo4z.js +0 -6
- package/web-ui/dist/assets/motion-Brxs0UET.js +0 -9
- package/web-ui/dist/assets/query-DPt-uCb6.js +0 -1
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
**Your documents. Your machine. Your control.**
|
|
8
8
|
|
|
9
|
-
RAG Vault
|
|
9
|
+
RAG Vault lets your AI coding assistant search your private documents, things like API specs, research papers, and internal docs. Everything runs locally and your data stays on your machine unless you choose to pull in content from a remote URL.
|
|
10
10
|
|
|
11
11
|
One command to run, minimal setup, privacy by default.
|
|
12
12
|
|
|
@@ -15,15 +15,15 @@ One command to run, minimal setup, privacy by default.
|
|
|
15
15
|
| Pain Point | RAG Vault Solution |
|
|
16
16
|
|------------|-------------------|
|
|
17
17
|
| "I don't want my docs on someone else's server" | Everything stays local by default. No background cloud calls for indexing or search. |
|
|
18
|
-
| "Semantic search misses exact code terms" | Hybrid search
|
|
18
|
+
| "Semantic search misses exact code terms" | Hybrid search with RRF fusion, optional cross-encoder reranking |
|
|
19
19
|
| "Setup requires Docker, Python, databases..." | One `npx` command plus a small MCP config block. |
|
|
20
20
|
| "Cloud APIs charge per query" | Free forever. No subscriptions. |
|
|
21
21
|
|
|
22
22
|
## Security
|
|
23
23
|
|
|
24
|
-
RAG Vault
|
|
24
|
+
RAG Vault comes with security built in:
|
|
25
25
|
- **API Authentication**: Optional API key via `RAG_API_KEY`
|
|
26
|
-
- **Rate Limiting**:
|
|
26
|
+
- **Rate Limiting**: You can throttle requests
|
|
27
27
|
- **CORS Control**: Restrict allowed origins
|
|
28
28
|
- **Security Headers**: Helmet.js protection
|
|
29
29
|
|
|
@@ -106,7 +106,7 @@ BASE_DIR = "/path/to/your/documents"
|
|
|
106
106
|
|
|
107
107
|
### Install Skills (Optional)
|
|
108
108
|
|
|
109
|
-
|
|
109
|
+
If you want your AI to write better queries and make more sense of results, install the RAG Vault skills:
|
|
110
110
|
|
|
111
111
|
```bash
|
|
112
112
|
# Claude Code (project-level - recommended for team projects)
|
|
@@ -124,7 +124,7 @@ npx github:RobThePCGuy/rag-vault skills install --path /your/custom/path
|
|
|
124
124
|
|
|
125
125
|
Skills teach Claude best practices for:
|
|
126
126
|
- Query formulation and expansion strategies
|
|
127
|
-
- Score interpretation
|
|
127
|
+
- Score interpretation. In boost mode, under 0.3 is a good match and over 0.5 is worth skipping. RRF mode scores by rank instead.
|
|
128
128
|
- When to use `ingest_file` vs `ingest_data`
|
|
129
129
|
- HTML ingestion and URL handling
|
|
130
130
|
|
|
@@ -142,7 +142,7 @@ That's it. No Docker. No Python. No server infrastructure to manage.
|
|
|
142
142
|
|
|
143
143
|
## Web Interface
|
|
144
144
|
|
|
145
|
-
RAG Vault
|
|
145
|
+
RAG Vault has a web UI so you can manage your documents without touching the command line.
|
|
146
146
|
|
|
147
147
|
### Launch the Web UI
|
|
148
148
|
|
|
@@ -157,7 +157,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
|
|
|
157
157
|
- **Upload documents**: Drag and drop PDF, DOCX, Markdown, TXT, JSON, JSONL, and NDJSON files
|
|
158
158
|
- **Search instantly**: Type queries and see results with relevance scores
|
|
159
159
|
- **Preview content**: Click any result to see the full chunk in context
|
|
160
|
-
- **Manage files**: View all indexed documents and delete what you
|
|
160
|
+
- **Manage files**: View all indexed documents and delete what you don't need
|
|
161
161
|
- **Switch databases**: Create and switch between multiple knowledge bases
|
|
162
162
|
- **Monitor status**: See document counts, memory usage, and search mode
|
|
163
163
|
- **Export/Import settings**: Back up and restore your vault configuration
|
|
@@ -166,7 +166,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
|
|
|
166
166
|
|
|
167
167
|
### REST API
|
|
168
168
|
|
|
169
|
-
The web server
|
|
169
|
+
The web server has a REST API you can hit directly. Set `RAG_API_KEY` to require authentication:
|
|
170
170
|
|
|
171
171
|
```bash
|
|
172
172
|
# With authentication (when RAG_API_KEY is set)
|
|
@@ -175,7 +175,7 @@ curl -X POST "http://localhost:3000/api/v1/search" \
|
|
|
175
175
|
-H "Content-Type: application/json" \
|
|
176
176
|
-d '{"query": "authentication", "limit": 5}'
|
|
177
177
|
|
|
178
|
-
# Search documents (no auth
|
|
178
|
+
# Search documents (no auth needed if RAG_API_KEY isn't set)
|
|
179
179
|
curl -X POST "http://localhost:3000/api/v1/search" \
|
|
180
180
|
-H "Content-Type: application/json" \
|
|
181
181
|
-d '{"query": "authentication", "limit": 5}'
|
|
@@ -201,7 +201,7 @@ curl "http://localhost:3000/api/v1/health"
|
|
|
201
201
|
|
|
202
202
|
### Reader API Endpoints
|
|
203
203
|
|
|
204
|
-
|
|
204
|
+
These endpoints let you read documents and find connections across them:
|
|
205
205
|
|
|
206
206
|
```bash
|
|
207
207
|
# Get all chunks for a document (ordered by index)
|
|
@@ -218,7 +218,7 @@ curl -X POST "http://localhost:3000/api/v1/chunks/batch-related" \
|
|
|
218
218
|
|
|
219
219
|
## Remote Mode
|
|
220
220
|
|
|
221
|
-
RAG Vault can also run as an HTTP server
|
|
221
|
+
RAG Vault can also run as an HTTP server so remote MCP clients like Claude.ai, Claude Desktop, or anything that supports Streamable HTTP or SSE can connect to it.
|
|
222
222
|
|
|
223
223
|
```bash
|
|
224
224
|
# Start remote server (default port 3001)
|
|
@@ -228,7 +228,7 @@ npx github:RobThePCGuy/rag-vault --remote
|
|
|
228
228
|
npx github:RobThePCGuy/rag-vault --remote --port 8080
|
|
229
229
|
```
|
|
230
230
|
|
|
231
|
-
Stdio mode is unchanged
|
|
231
|
+
Stdio mode is unchanged. Just leave off `--remote` and everything works as before with Cursor, Claude Code, and Codex.
|
|
232
232
|
|
|
233
233
|
### Connecting from Claude Desktop
|
|
234
234
|
|
|
@@ -312,12 +312,18 @@ Pure semantic search would miss this. RAG Vault finds it.
|
|
|
312
312
|
```
|
|
313
313
|
Document → Parse → Chunk by meaning → Embed locally → Store in LanceDB
|
|
314
314
|
↓
|
|
315
|
-
Query → Embed → Vector search →
|
|
315
|
+
Query → Embed → Vector search + BM25 → Fusion → Optional reranking → Results
|
|
316
316
|
```
|
|
317
317
|
|
|
318
318
|
**Smart chunking**: Splits by meaning, not character count. Keeps code blocks intact.
|
|
319
319
|
|
|
320
|
-
**Hybrid search**:
|
|
320
|
+
**Hybrid search**: Two fusion modes that combine vector similarity with BM25 keyword matching:
|
|
321
|
+
- **Boost mode** (default): BM25 boosts vector search distances multiplicatively. Simple and predictable.
|
|
322
|
+
- **RRF mode** (opt-in via `RAG_SEARCH_MODE=rrf`): [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) treats vector and BM25 as independent voters. This can surface documents that vector search alone would miss.
|
|
323
|
+
|
|
324
|
+
**Cross-encoder reranking** (opt-in): After the first pass, a cross-encoder model (`Xenova/ms-marco-MiniLM-L-6-v2`, ~23MB) scores each (query, passage) pair together for tighter relevance ranking. Turn it on with `RAG_RERANKER_ENABLED=true`.
|
|
325
|
+
|
|
326
|
+
**Query expansion** (opt-in): Generates reformulated queries to improve recall when searches are paraphrased or conceptual. Two backends: local template-based expansion (default, fully offline) or LLM-based [HyDE](https://arxiv.org/abs/2212.10496) through an external API. Turn it on with `RAG_HYDE_ENABLED=true`.
|
|
321
327
|
|
|
322
328
|
**Quality filtering**: Groups results by relevance gaps instead of arbitrary top-K cutoffs.
|
|
323
329
|
|
|
@@ -347,12 +353,12 @@ Query → Embed → Vector search → Keyword boost → Quality filter → Resul
|
|
|
347
353
|
| `DB_PATH` | `./lancedb/` | Where vectors are stored |
|
|
348
354
|
| `CACHE_DIR` | `./models/` | Model cache directory |
|
|
349
355
|
| `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | HuggingFace embedding model |
|
|
350
|
-
| `MAX_FILE_SIZE` | `104857600` (100 MB) |
|
|
351
|
-
| `RAG_EMBEDDING_DEVICE` | `auto` |
|
|
356
|
+
| `MAX_FILE_SIZE` | `104857600` (100 MB) | Biggest file you can ingest |
|
|
357
|
+
| `RAG_EMBEDDING_DEVICE` | `auto` | Device for running embeddings: `auto`, `cpu`, `cuda`, `dml`, `webgpu`, `wasm`, `gpu`, `webnn` |
|
|
352
358
|
| `WEB_PORT` | `3000` | Port for web interface |
|
|
353
359
|
| `UPLOAD_DIR` | `./uploads/` | Temporary directory for web UI file uploads |
|
|
354
360
|
|
|
355
|
-
> **Windows users:** `RAG_EMBEDDING_DEVICE=auto`
|
|
361
|
+
> **Windows users:** `RAG_EMBEDDING_DEVICE=auto` tries GPU providers (DirectML), which can fail if ONNX Runtime GPU binaries aren't available. If you see embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config for reliable operation. See the [GPU acceleration FAQ](#frequently-asked-questions) for details.
|
|
356
362
|
|
|
357
363
|
One-command override (no `.env` edit):
|
|
358
364
|
|
|
@@ -371,13 +377,38 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
|
|
|
371
377
|
|
|
372
378
|
| Variable | Default | What it does |
|
|
373
379
|
|----------|---------|--------------|
|
|
374
|
-
| `
|
|
375
|
-
| `
|
|
376
|
-
| `
|
|
377
|
-
| `
|
|
378
|
-
| `
|
|
379
|
-
| `
|
|
380
|
-
| `
|
|
380
|
+
| `RAG_SEARCH_MODE` | `boost` | Fusion mode: `boost` (multiplicative keyword boost) or `rrf` (Reciprocal Rank Fusion) |
|
|
381
|
+
| `RAG_HYBRID_WEIGHT` | `0.6` | Balance between vector and BM25. `0` = vector-only, `1.0` = BM25-only |
|
|
382
|
+
| `RAG_RRF_K` | `60` | RRF smoothing constant (only applies in `rrf` mode). Industry standard is 60. |
|
|
383
|
+
| `RAG_GROUPING` | unset | Quality filter: `similar` = top group only, `related` = top 2 groups |
|
|
384
|
+
| `RAG_MAX_DISTANCE` | unset | Drops results below this relevance threshold (use with `boost` mode; `rrf` scores are rank-based) |
|
|
385
|
+
| `RAG_GROUPING_STD_MULTIPLIER` | `1.5` | How many standard deviations between groups counts as a relevance gap |
|
|
386
|
+
| `RAG_HYBRID_CANDIDATE_MULTIPLIER` | `2` | How many extra vector candidates to grab before keyword reranking |
|
|
387
|
+
| `RAG_FTS_MAX_FAILURES` | `3` | Full-text search failures before FTS is temporarily disabled |
|
|
388
|
+
| `RAG_FTS_COOLDOWN_MS` | `300000` (5 min) | How long to wait before retrying FTS after hitting the failure limit |
|
|
389
|
+
|
|
390
|
+
### Cross-Encoder Reranking (opt-in)
|
|
391
|
+
|
|
392
|
+
| Variable | Default | What it does |
|
|
393
|
+
|----------|---------|--------------|
|
|
394
|
+
| `RAG_RERANKER_ENABLED` | `false` | Turn on cross-encoder reranking for better results |
|
|
395
|
+
| `RAG_RERANKER_MODEL` | `Xenova/ms-marco-MiniLM-L-6-v2` | HuggingFace cross-encoder model (~23MB ONNX, downloads on first use) |
|
|
396
|
+
| `RAG_RERANKER_CANDIDATE_MULTIPLIER` | `2` | Fetch this many extra candidates for the reranker to score |
|
|
397
|
+
| `RAG_RERANKER_DEVICE` | `auto` | Device for the reranker (same options as `RAG_EMBEDDING_DEVICE`) |
|
|
398
|
+
| `RERANKER_INIT_TIMEOUT_MS` | `600000` (10 min) | Timeout for model download and initialization |
|
|
399
|
+
|
|
400
|
+
### Query Expansion / HyDE (opt-in)
|
|
401
|
+
|
|
402
|
+
| Variable | Default | What it does |
|
|
403
|
+
|----------|---------|--------------|
|
|
404
|
+
| `RAG_HYDE_ENABLED` | `false` | Turn on query expansion for better recall |
|
|
405
|
+
| `RAG_HYDE_BACKEND` | `rule-based` | `rule-based` for local template expansion, `api` for LLM-based HyDE |
|
|
406
|
+
| `RAG_HYDE_EXPANSIONS` | `2` | Number of expanded queries to generate |
|
|
407
|
+
| `RAG_HYDE_API_KEY` | unset | API key for LLM backend (required when `RAG_HYDE_BACKEND=api`) |
|
|
408
|
+
| `RAG_HYDE_API_BASE_URL` | `https://api.anthropic.com` | API endpoint for LLM backend |
|
|
409
|
+
| `RAG_HYDE_API_MODEL` | `claude-haiku-4-5-20251001` | Model for LLM-based expansion |
|
|
410
|
+
|
|
411
|
+
> **Privacy note:** The `api` backend sends query text to an external LLM endpoint, which breaks the "zero cloud" guarantee. The default `rule-based` backend is fully local.
|
|
381
412
|
|
|
382
413
|
### Security (optional)
|
|
383
414
|
|
|
@@ -395,7 +426,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
|
|
|
395
426
|
| `ALLOWED_SCAN_ROOTS` | Home directory | Directories allowed for database scanning |
|
|
396
427
|
| `JSON_BODY_LIMIT` | `5mb` | Max request body size |
|
|
397
428
|
| `REQUEST_TIMEOUT_MS` | `30000` | API request timeout |
|
|
398
|
-
| `REQUEST_LOGGING` | `false` |
|
|
429
|
+
| `REQUEST_LOGGING` | `false` | Turn on request audit logging |
|
|
399
430
|
|
|
400
431
|
> Copy [`.env.example`](.env.example) for a complete configuration template.
|
|
401
432
|
|
|
@@ -413,7 +444,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
|
|
|
413
444
|
<details>
|
|
414
445
|
<summary><strong>Is my data really private?</strong></summary>
|
|
415
446
|
|
|
416
|
-
For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only
|
|
447
|
+
For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only hits the network if you choose remote URL ingestion or need to download a model.
|
|
417
448
|
|
|
418
449
|
</details>
|
|
419
450
|
|
|
@@ -427,11 +458,11 @@ Yes, after the first run. The model caches locally.
|
|
|
427
458
|
<details>
|
|
428
459
|
<summary><strong>What about GPU acceleration?</strong></summary>
|
|
429
460
|
|
|
430
|
-
RAG Vault
|
|
461
|
+
RAG Vault picks a device automatically by default (`RAG_EMBEDDING_DEVICE=auto`). When GPU providers are set up correctly, this can speed up embedding generation.
|
|
431
462
|
|
|
432
|
-
**Important:** On Windows, `auto` tries DirectML (`dml`) which requires ONNX Runtime GPU binaries. If those binaries
|
|
463
|
+
**Important:** On Windows, `auto` tries DirectML (`dml`), which requires ONNX Runtime GPU binaries. If those binaries aren't installed or your GPU setup is incomplete, the server won't start at all. It doesn't fall back to CPU gracefully. The same goes for Linux without CUDA binaries.
|
|
433
464
|
|
|
434
|
-
**Recommendation:** If you
|
|
465
|
+
**Recommendation:** If you hit embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. CPU mode is reliable on all platforms and fast enough for most workloads (the default model is only ~90MB).
|
|
435
466
|
|
|
436
467
|
```json
|
|
437
468
|
"env": {
|
|
@@ -446,7 +477,7 @@ Supported device values: `auto`, `cpu`, `cuda`, `dml`, `gpu`, `wasm`, `webgpu`,
|
|
|
446
477
|
<details>
|
|
447
478
|
<summary><strong>Can I change the embedding model?</strong></summary>
|
|
448
479
|
|
|
449
|
-
Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You
|
|
480
|
+
Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You'll need to delete `DB_PATH` and re-ingest because different models produce incompatible vectors.
|
|
450
481
|
|
|
451
482
|
**Recommended upgrade:** For better quality and multilingual support, use [EmbeddingGemma](https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX):
|
|
452
483
|
|
|
@@ -454,7 +485,7 @@ Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You must delete `DB_P
|
|
|
454
485
|
"MODEL_NAME": "onnx-community/embeddinggemma-300m-ONNX"
|
|
455
486
|
```
|
|
456
487
|
|
|
457
|
-
|
|
488
|
+
It's a solid pick if you need multilingual support or higher-quality retrieval.
|
|
458
489
|
|
|
459
490
|
**Other specialized models:**
|
|
460
491
|
- Scientific: `sentence-transformers/allenai-specter`
|
|
@@ -473,16 +504,16 @@ Copy the `DB_PATH` directory (default: `./lancedb/`).
|
|
|
473
504
|
|
|
474
505
|
| Problem | Solution |
|
|
475
506
|
|---------|----------|
|
|
476
|
-
| No results found | Documents
|
|
477
|
-
| Model download failed | Check internet connection.
|
|
507
|
+
| No results found | Documents need to be ingested first. Run "List all ingested files" to check. |
|
|
508
|
+
| Model download failed | Check your internet connection. The model is ~90MB from HuggingFace. |
|
|
478
509
|
| Embedding initialization fails | Set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. The `auto` default can fail on Windows without GPU binaries. |
|
|
479
510
|
| `Protobuf parsing failed` | Corrupted model cache. Delete `CACHE_DIR` (default: `./models/`) and restart. RAG Vault also auto-retries with an isolated recovery cache. |
|
|
480
511
|
| File too large | Default limit is 100MB. Set `MAX_FILE_SIZE` higher or split the file. |
|
|
481
512
|
| Path outside BASE_DIR | All file paths must be under `BASE_DIR`. Use absolute paths. |
|
|
482
|
-
| MCP tools not showing |
|
|
513
|
+
| MCP tools not showing | Check your config syntax and restart your AI tool completely (Cmd+Q on Mac). |
|
|
483
514
|
| `mcp-publisher login github` fails with `slow_down` | Use token login instead: `mcp-publisher login github --token "$(gh auth token)"` (or pass a PAT). |
|
|
484
|
-
| 401 Unauthorized | API key required. Set `RAG_API_KEY` or use correct header format. |
|
|
485
|
-
| 429 Too Many Requests | Rate limited. Wait for reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
|
|
515
|
+
| 401 Unauthorized | API key required. Set `RAG_API_KEY` or use the correct header format. |
|
|
516
|
+
| 429 Too Many Requests | Rate limited. Wait for the reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
|
|
486
517
|
| CORS errors | Add your origin to `CORS_ORIGINS` environment variable. |
|
|
487
518
|
|
|
488
519
|
## Development
|
|
@@ -527,7 +558,7 @@ pnpm release:dry
|
|
|
527
558
|
|
|
528
559
|
### Test Tiers
|
|
529
560
|
|
|
530
|
-
- `pnpm test:unit`: deterministic tests for local/CI quality checks
|
|
561
|
+
- `pnpm test:unit`: deterministic tests for local/CI quality checks. Doesn't include model-download integration paths.
|
|
531
562
|
- `pnpm test:integration`: full integration and E2E workflows, including embedding model initialization.
|
|
532
563
|
|
|
533
564
|
Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent suites.
|
|
@@ -537,8 +568,8 @@ Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent
|
|
|
537
568
|
- Releases are local and scripted via `scripts/release-npm.sh`.
|
|
538
569
|
- Supported bumps: `patch`, `minor`, `major`.
|
|
539
570
|
- The script runs dependency installs, `pnpm check:all`, and `pnpm ui:build` before touching version files.
|
|
540
|
-
- `package.json` and `server.json` versions
|
|
541
|
-
- `pnpm release:dry`
|
|
571
|
+
- `package.json` and `server.json` versions only get updated after checks pass, and they're auto-restored if any later step fails.
|
|
572
|
+
- `pnpm release:dry` runs the full gate plus npm dry-run publish and always restores version files.
|
|
542
573
|
|
|
543
574
|
### Project Structure
|
|
544
575
|
|
|
@@ -550,11 +581,13 @@ src/
|
|
|
550
581
|
├── errors/ # Error handling utilities
|
|
551
582
|
├── explainability/ # Keyword-based result explanations
|
|
552
583
|
├── flywheel/ # Feedback loop (pin/dismiss reranking)
|
|
584
|
+
├── hyde/ # Query expansion + HyDE (LLM-based)
|
|
553
585
|
├── parser/ # PDF, DOCX, HTML parsing
|
|
554
586
|
├── query/ # Advanced query syntax parser
|
|
587
|
+
├── reranker/ # Cross-encoder reranking (Transformers.js)
|
|
555
588
|
├── server/ # MCP tool handlers + remote transport
|
|
556
589
|
├── utils/ # Config, file helpers, process handlers
|
|
557
|
-
├── vectordb/ # LanceDB + hybrid search
|
|
590
|
+
├── vectordb/ # LanceDB + hybrid search (boost + RRF)
|
|
558
591
|
└── web/ # Express server + REST API
|
|
559
592
|
|
|
560
593
|
web-ui/ # React frontend (Vite + Tailwind)
|
|
@@ -573,6 +606,6 @@ MIT: free for personal and commercial use.
|
|
|
573
606
|
|
|
574
607
|
Built with [Model Context Protocol](https://modelcontextprotocol.io/), [LanceDB](https://lancedb.com/), and [Transformers.js](https://huggingface.co/docs/transformers.js).
|
|
575
608
|
|
|
576
|
-
> Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it
|
|
577
|
-
> Huge credit to upstream contributors for the foundation, I
|
|
609
|
+
> Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it's its own thing.
|
|
610
|
+
> Huge credit to upstream contributors for the foundation, I've been iterating hard from there.
|
|
578
611
|
> Local-first dev tools, all the way.
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HyDE configuration
|
|
3
|
+
*/
|
|
4
|
+
export interface HyDEConfig {
|
|
5
|
+
/** Whether HyDE is enabled */
|
|
6
|
+
enabled: boolean;
|
|
7
|
+
/** Backend: 'rule-based' for local template-based query expansion, 'api' for LLM-based HyDE */
|
|
8
|
+
backend: 'rule-based' | 'api';
|
|
9
|
+
/** Number of query expansions to generate (default: 2) */
|
|
10
|
+
numExpansions: number;
|
|
11
|
+
/** API key for LLM backend (optional) */
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
/** API base URL for LLM backend (optional) */
|
|
14
|
+
apiBaseUrl?: string;
|
|
15
|
+
/** API model name for LLM backend (optional) */
|
|
16
|
+
apiModel?: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Expanded query with weight
|
|
20
|
+
*/
|
|
21
|
+
export interface ExpandedQuery {
|
|
22
|
+
/** The expanded query text */
|
|
23
|
+
text: string;
|
|
24
|
+
/** Weight for RRF voting (original = 1.0, expansions = 0.5) */
|
|
25
|
+
weight: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* HyDE (Hypothetical Document Embeddings) query expander.
|
|
29
|
+
*
|
|
30
|
+
* Generates hypothetical answer documents from a query to improve
|
|
31
|
+
* retrieval recall. Each expansion becomes an additional voter in
|
|
32
|
+
* RRF fusion with a lower weight (0.5) than the original query (1.0).
|
|
33
|
+
*/
|
|
34
|
+
export declare class HyDEExpander {
|
|
35
|
+
private readonly config;
|
|
36
|
+
constructor(config: HyDEConfig);
|
|
37
|
+
/**
|
|
38
|
+
* Expand a query into the original plus hypothetical documents.
|
|
39
|
+
*
|
|
40
|
+
* @param query - The original search query
|
|
41
|
+
* @returns Array of expanded queries with weights.
|
|
42
|
+
* First item is always the original query (weight 1.0).
|
|
43
|
+
* Subsequent items are hypothetical expansions (weight 0.5).
|
|
44
|
+
*/
|
|
45
|
+
expandQuery(query: string): Promise<ExpandedQuery[]>;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAiBA;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8BAA8B;IAC9B,OAAO,EAAE,OAAO,CAAA;IAChB,+FAA+F;IAC/F,OAAO,EAAE,YAAY,GAAG,KAAK,CAAA;IAC7B,0DAA0D;IAC1D,aAAa,EAAE,MAAM,CAAA;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,MAAM,EAAE,MAAM,CAAA;CACf;AAkMD;;;;;;GAMG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;gBAEvB,MAAM,EAAE,UAAU;IAI9B;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CAkC3D"}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
// Query expansion module for improved retrieval recall
|
|
2
|
+
//
|
|
3
|
+
// Two backends are available:
|
|
4
|
+
// - 'rule-based': Template-based query expansion (local, offline, no dependencies).
|
|
5
|
+
// Generates reformulated queries using pattern detection and templates.
|
|
6
|
+
// This is a classic IR query expansion technique, not HyDE.
|
|
7
|
+
// - 'api': LLM-based HyDE (Hypothetical Document Embeddings). Generates hypothetical
|
|
8
|
+
// answer documents via an external LLM API, then embeds them for retrieval.
|
|
9
|
+
// Reference: Gao et al. "Precise Zero-Shot Dense Retrieval without Relevance Labels" (2022)
|
|
10
|
+
//
|
|
11
|
+
// Each expansion is embedded alongside the original query and becomes a separate
|
|
12
|
+
// voter in RRF fusion, improving recall for paraphrased or conceptual queries.
|
|
13
|
+
// ============================================
|
|
14
|
+
// Query Pattern Detection
|
|
15
|
+
// ============================================
|
|
16
|
+
/** Common question word patterns */
|
|
17
|
+
const QUESTION_PATTERN = /^(what|how|why|when|where|who|which|can|does|is|are|was|were|do|did|should|could|would)\s+/i;
|
|
18
|
+
/** Common technical/code patterns */
|
|
19
|
+
const CODE_PATTERN = /`[^`]+`|[A-Z][a-z]+[A-Z]|[a-z]+_[a-z]+|\.[a-z]+\(|ERR_|ERROR_|[A-Z_]{3,}/;
|
|
20
|
+
/** Error message patterns */
|
|
21
|
+
const ERROR_PATTERN = /error|exception|fail|crash|bug|issue|problem|broken|not working/i;
|
|
22
|
+
/**
|
|
23
|
+
* Detect query intent for better expansion
|
|
24
|
+
*/
|
|
25
|
+
function detectQueryType(query) {
|
|
26
|
+
if (ERROR_PATTERN.test(query))
|
|
27
|
+
return 'error';
|
|
28
|
+
if (CODE_PATTERN.test(query))
|
|
29
|
+
return 'code';
|
|
30
|
+
if (QUESTION_PATTERN.test(query))
|
|
31
|
+
return 'question';
|
|
32
|
+
return 'concept';
|
|
33
|
+
}
|
|
34
|
+
// ============================================
|
|
35
|
+
// Rule-Based Expansion
|
|
36
|
+
// ============================================
|
|
37
|
+
/**
|
|
38
|
+
* Generate hypothetical documents using rule-based templates.
|
|
39
|
+
* Works offline with no dependencies — always available as a fallback.
|
|
40
|
+
*
|
|
41
|
+
* The strategy varies by detected query type:
|
|
42
|
+
* - Questions: Convert to declarative statements
|
|
43
|
+
* - Errors: Frame as troubleshooting documentation
|
|
44
|
+
* - Code: Frame as technical documentation
|
|
45
|
+
* - Concepts: Frame as explanatory documentation
|
|
46
|
+
*/
|
|
47
|
+
function ruleBasedExpansion(query, numExpansions) {
|
|
48
|
+
const queryType = detectQueryType(query);
|
|
49
|
+
const expansions = [];
|
|
50
|
+
// Strip question marks for declarative reformulation
|
|
51
|
+
const cleanQuery = query.replace(/\?+$/, '').trim();
|
|
52
|
+
switch (queryType) {
|
|
53
|
+
case 'question': {
|
|
54
|
+
// Convert question to declarative statement
|
|
55
|
+
const declarative = cleanQuery.replace(QUESTION_PATTERN, '').trim();
|
|
56
|
+
if (declarative.length > 3) {
|
|
57
|
+
expansions.push(`${declarative.charAt(0).toUpperCase()}${declarative.slice(1)}. This is explained in detail in the documentation.`);
|
|
58
|
+
}
|
|
59
|
+
// Frame as a documentation excerpt
|
|
60
|
+
expansions.push(`The documentation explains that ${cleanQuery.toLowerCase()}. The key points are as follows.`);
|
|
61
|
+
// Frame as a guide section
|
|
62
|
+
expansions.push(`A guide about ${cleanQuery.toLowerCase()} would cover the following topics and provide step-by-step instructions.`);
|
|
63
|
+
break;
|
|
64
|
+
}
|
|
65
|
+
case 'error': {
|
|
66
|
+
// Frame as troubleshooting documentation
|
|
67
|
+
expansions.push(`To resolve ${cleanQuery}, follow these troubleshooting steps. The root cause is typically related to configuration or dependencies.`);
|
|
68
|
+
expansions.push(`The error "${cleanQuery}" occurs when the system encounters an unexpected state. The solution involves checking the following.`);
|
|
69
|
+
expansions.push(`Common causes for ${cleanQuery} include misconfiguration, missing dependencies, and version incompatibilities. Here is how to fix it.`);
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
case 'code': {
|
|
73
|
+
// Frame as technical documentation
|
|
74
|
+
expansions.push(`The implementation of ${cleanQuery} involves the following components and follows these patterns.`);
|
|
75
|
+
expansions.push(`Documentation for ${cleanQuery}: This feature provides the following functionality and can be configured as described below.`);
|
|
76
|
+
expansions.push(`${cleanQuery} is used to handle specific operations in the system. Here is how it works and how to use it correctly.`);
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
default: {
|
|
80
|
+
// General conceptual expansion
|
|
81
|
+
expansions.push(`A document about ${cleanQuery} would discuss the following key aspects, including definitions, usage patterns, and best practices.`);
|
|
82
|
+
expansions.push(`${cleanQuery.charAt(0).toUpperCase()}${cleanQuery.slice(1)} is a concept that encompasses several important areas. The documentation covers the following topics.`);
|
|
83
|
+
expansions.push(`The following documentation explains ${cleanQuery} in detail, covering its purpose, implementation, and common use cases.`);
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return expansions.slice(0, numExpansions);
|
|
88
|
+
}
|
|
89
|
+
// ============================================
|
|
90
|
+
// API-Based Expansion
|
|
91
|
+
// ============================================
|
|
92
|
+
/**
|
|
93
|
+
* Generate hypothetical documents using an LLM API.
|
|
94
|
+
* Falls back to rule-based expansion on failure.
|
|
95
|
+
*/
|
|
96
|
+
async function apiBasedExpansion(query, numExpansions, config) {
|
|
97
|
+
if (!config.apiKey) {
|
|
98
|
+
console.error('HyDE: No API key configured, falling back to rule-based expansion');
|
|
99
|
+
return ruleBasedExpansion(query, numExpansions);
|
|
100
|
+
}
|
|
101
|
+
const baseUrl = config.apiBaseUrl || 'https://api.anthropic.com';
|
|
102
|
+
const model = config.apiModel || 'claude-haiku-4-5-20251001';
|
|
103
|
+
try {
|
|
104
|
+
const prompt = `Generate ${numExpansions} short hypothetical document excerpts (2-3 sentences each) that would be relevant to answering the following query. Each excerpt should sound like it comes from real documentation. Return only the excerpts, separated by newlines.
|
|
105
|
+
|
|
106
|
+
Query: ${query}`;
|
|
107
|
+
const response = await fetch(`${baseUrl}/v1/messages`, {
|
|
108
|
+
method: 'POST',
|
|
109
|
+
headers: {
|
|
110
|
+
'Content-Type': 'application/json',
|
|
111
|
+
'x-api-key': config.apiKey,
|
|
112
|
+
'anthropic-version': '2023-06-01',
|
|
113
|
+
},
|
|
114
|
+
body: JSON.stringify({
|
|
115
|
+
model,
|
|
116
|
+
max_tokens: 300,
|
|
117
|
+
messages: [{ role: 'user', content: prompt }],
|
|
118
|
+
}),
|
|
119
|
+
signal: AbortSignal.timeout(10000), // 10s timeout
|
|
120
|
+
});
|
|
121
|
+
if (!response.ok) {
|
|
122
|
+
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
|
|
123
|
+
}
|
|
124
|
+
const data = await response.json();
|
|
125
|
+
// Runtime validation of API response shape
|
|
126
|
+
const dataObj = data;
|
|
127
|
+
if (!dataObj || typeof dataObj !== 'object' || !Array.isArray(dataObj['content'])) {
|
|
128
|
+
throw new Error('API returned unexpected response format');
|
|
129
|
+
}
|
|
130
|
+
const content = dataObj['content'];
|
|
131
|
+
const firstBlock = content[0];
|
|
132
|
+
const text = firstBlock && typeof firstBlock['text'] === 'string' ? firstBlock['text'] : '';
|
|
133
|
+
const expansions = text
|
|
134
|
+
.split('\n')
|
|
135
|
+
.map((line) => line.trim())
|
|
136
|
+
.filter((line) => line.length > 20)
|
|
137
|
+
.slice(0, numExpansions);
|
|
138
|
+
if (expansions.length === 0) {
|
|
139
|
+
console.error('HyDE: API returned no valid expansions, falling back to rule-based');
|
|
140
|
+
return ruleBasedExpansion(query, numExpansions);
|
|
141
|
+
}
|
|
142
|
+
return expansions;
|
|
143
|
+
}
|
|
144
|
+
catch (error) {
|
|
145
|
+
console.error(`HyDE: API expansion failed: ${error.message}, falling back to rule-based`);
|
|
146
|
+
return ruleBasedExpansion(query, numExpansions);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// ============================================
|
|
150
|
+
// HyDEExpander Class
|
|
151
|
+
// ============================================
|
|
152
|
+
/**
|
|
153
|
+
* HyDE (Hypothetical Document Embeddings) query expander.
|
|
154
|
+
*
|
|
155
|
+
* Generates hypothetical answer documents from a query to improve
|
|
156
|
+
* retrieval recall. Each expansion becomes an additional voter in
|
|
157
|
+
* RRF fusion with a lower weight (0.5) than the original query (1.0).
|
|
158
|
+
*/
|
|
159
|
+
export class HyDEExpander {
|
|
160
|
+
config;
|
|
161
|
+
constructor(config) {
|
|
162
|
+
this.config = config;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Expand a query into the original plus hypothetical documents.
|
|
166
|
+
*
|
|
167
|
+
* @param query - The original search query
|
|
168
|
+
* @returns Array of expanded queries with weights.
|
|
169
|
+
* First item is always the original query (weight 1.0).
|
|
170
|
+
* Subsequent items are hypothetical expansions (weight 0.5).
|
|
171
|
+
*/
|
|
172
|
+
async expandQuery(query) {
|
|
173
|
+
if (!this.config.enabled) {
|
|
174
|
+
return [{ text: query, weight: 1.0 }];
|
|
175
|
+
}
|
|
176
|
+
// Always include the original query at full weight
|
|
177
|
+
const results = [{ text: query, weight: 1.0 }];
|
|
178
|
+
// Skip expansion for very short queries (less than 3 words)
|
|
179
|
+
const wordCount = query.trim().split(/\s+/).length;
|
|
180
|
+
if (wordCount < 3) {
|
|
181
|
+
return results;
|
|
182
|
+
}
|
|
183
|
+
try {
|
|
184
|
+
let expansions;
|
|
185
|
+
if (this.config.backend === 'api') {
|
|
186
|
+
expansions = await apiBasedExpansion(query, this.config.numExpansions, this.config);
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
expansions = ruleBasedExpansion(query, this.config.numExpansions);
|
|
190
|
+
}
|
|
191
|
+
// Add expansions with lower weight
|
|
192
|
+
for (const expansion of expansions) {
|
|
193
|
+
results.push({ text: expansion, weight: 0.5 });
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
catch (error) {
|
|
197
|
+
console.error(`HyDE: Expansion failed: ${error.message}`);
|
|
198
|
+
// Return just the original query on failure (graceful degradation)
|
|
199
|
+
}
|
|
200
|
+
return results;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAAA,uDAAuD;AACvD,EAAE;AACF,8BAA8B;AAC9B,oFAAoF;AACpF,0EAA0E;AAC1E,8DAA8D;AAC9D,qFAAqF;AACrF,8EAA8E;AAC9E,8FAA8F;AAC9F,EAAE;AACF,iFAAiF;AACjF,+EAA+E;AAkC/E,+CAA+C;AAC/C,0BAA0B;AAC1B,+CAA+C;AAE/C,oCAAoC;AACpC,MAAM,gBAAgB,GACpB,6FAA6F,CAAA;AAE/F,qCAAqC;AACrC,MAAM,YAAY,GAAG,0EAA0E,CAAA;AAE/F,6BAA6B;AAC7B,MAAM,aAAa,GAAG,kEAAkE,CAAA;AAExF;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAA;IAC7C,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAC3C,IAAI,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,UAAU,CAAA;IACnD,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,+CAA+C;AAC/C,uBAAuB;AACvB,+CAA+C;AAE/C;;;;;;;;;GASG;AACH,SAAS,kBAAkB,CAAC,KAAa,EAAE,aAAqB;IAC9D,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,CAAA;IACxC,MAAM,UAAU,GAAa,EAAE,CAAA;IAE/B,qDAAqD;IACrD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAEnD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,4CAA4C;YAC5C,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YACnE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CACb,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,qDAAqD,CACnH,CAAA;YACH,CAAC;YAED,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,mCAAmC,UAAU,CAAC,WAAW,EAAE,kCAAkC,CAC9F,CAAA;YAED,2BAA2B;YAC3B,UAAU,CAAC,IAAI,CACb,iBAAiB,UAAU,CAAC,WAAW,EAAE,0EAA0E,CACpH,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,yCAAyC;YACzC,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,6GAA6G,CACtI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,wGAAwG,CACjI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,wGAAwG,CACxI,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,yBAAyB,UAAU,gEAAgE,CACpG,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,+FAA+F,CAC/H,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,yGAAyG,CACvH,CAAA;YACD,MAAK;QACP,CAAC;QAED,OAAO,CAAC,CAAC,CAAC;YACR,+BAA+B;YAC/B,UAAU,CAAC,IAAI,CACb,oBAAoB,UAAU,sGAAsG,CACrI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,wGAAwG,CACpK,CAAA;YACD,UAAU,CAAC,IAAI,CACb,wCAAwC,UAAU,yEAAyE,CAC5H,CAAA;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;AAC3C,CAAC;AAED,+CAA+C;AAC/C,sBAAsB;AACtB,+CAA+C;AAE/C;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAC9B,KAAa,EACb,aAAqB,EACrB,MAAkB;IAElB,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAA;QAClF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,IAAI,2BAA2B,CAAA;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,IAAI,2BAA2B,CAAA;IAE5D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,aAAa;;SAEnC,KAAK,EAAE,CAAA;QAEZ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,cAAc,EAAE;YACrD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,mBAAmB,EAAE,YAAY;aAClC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,UAAU,EAAE,GAAG;gBACf,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;aAC9C,CAAC;YACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,cAAc;SACnD,CAAC,CAAA;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAA;QAClF,CAAC;QAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAE3C,2CAA2C;QAC3C,MAAM,OAAO,GAAG,IAAsC,CAAA;QACtD,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YAClF,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAA;QAC5D,CAAC;QACD,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,CAAc,CAAA;QAC/C,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAwC,CAAA;QACpE,MAAM,IAAI,GAAG,UAAU,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;QAC3F,MAAM,UAAU,GAAG,IAAI;aACpB,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAClC,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;aAC1C,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;QAE1B,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,oEAAoE,CAAC,CAAA;YACnF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;QACjD,CAAC;QAED,OAAO,UAAU,CAAA;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,+BAAgC,KAAe,CAAC,OAAO,8BAA8B,CACtF,CAAA;QACD,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;AACH,CAAC;AAED,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C;;;;;;GAMG;AACH,MAAM,OAAO,YAAY;IACN,MAAM,CAAY;IAEnC,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QACvC,CAAC;QAED,mDAAmD;QACnD,MAAM,OAAO,GAAoB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QAE/D,4DAA4D;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAA;QAClD,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,OAAO,OAAO,CAAA;QAChB,CAAC;QAED,IAAI,CAAC;YACH,IAAI,UAAoB,CAAA;YAExB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;gBAClC,UAAU,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;YACrF,CAAC;iBAAM,CAAC;gBACN,UAAU,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAA;YACnE,CAAC;YAED,mCAAmC;YACnC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;gBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2BAA4B,KAAe,CAAC,OAAO,EAAE,CAAC,CAAA;YACpE,mEAAmE;QACrE,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC;CACF"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reranker configuration
|
|
3
|
+
*/
|
|
4
|
+
export interface RerankerConfig {
|
|
5
|
+
/** HuggingFace cross-encoder model path */
|
|
6
|
+
modelPath: string;
|
|
7
|
+
/** Model cache directory */
|
|
8
|
+
cacheDir: string;
|
|
9
|
+
/**
|
|
10
|
+
* Device hint for Transformers.js runtime.
|
|
11
|
+
* Examples: auto, cpu, cuda, dml, webgpu
|
|
12
|
+
*/
|
|
13
|
+
device?: string;
|
|
14
|
+
/**
|
|
15
|
+
* Timeout for model initialization/download in milliseconds.
|
|
16
|
+
* Default: 600000 (10 minutes).
|
|
17
|
+
*/
|
|
18
|
+
initTimeoutMs?: number;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Reranked result with score
|
|
22
|
+
*/
|
|
23
|
+
export interface RerankedResult {
|
|
24
|
+
/** Original index in the input array */
|
|
25
|
+
index: number;
|
|
26
|
+
/** Cross-encoder relevance score (higher = more relevant) */
|
|
27
|
+
score: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Cross-encoder reranker using Transformers.js
|
|
31
|
+
*
|
|
32
|
+
* Scores (query, passage) pairs for relevance using a cross-encoder model.
|
|
33
|
+
* Unlike bi-encoders, cross-encoders jointly encode both texts, producing
|
|
34
|
+
* more accurate relevance judgments at the cost of speed.
|
|
35
|
+
*
|
|
36
|
+
* Default model: Xenova/ms-marco-MiniLM-L-6-v2 (~23MB ONNX)
|
|
37
|
+
*/
|
|
38
|
+
export declare class Reranker {
|
|
39
|
+
private model;
|
|
40
|
+
private initPromise;
|
|
41
|
+
private readonly config;
|
|
42
|
+
constructor(config: RerankerConfig);
|
|
43
|
+
/**
|
|
44
|
+
* Get the model name/path
|
|
45
|
+
*/
|
|
46
|
+
getModelName(): string;
|
|
47
|
+
/**
|
|
48
|
+
* Resolve the device to use for inference
|
|
49
|
+
*/
|
|
50
|
+
private resolveDevice;
|
|
51
|
+
/**
|
|
52
|
+
* Get a recovery cache directory for corrupted model caches
|
|
53
|
+
*/
|
|
54
|
+
private getRecoveryCacheDir;
|
|
55
|
+
/**
|
|
56
|
+
* Check if an error is recoverable by using a fresh cache
|
|
57
|
+
*/
|
|
58
|
+
private isRecoverableCacheError;
|
|
59
|
+
/**
|
|
60
|
+
* Initialize Transformers.js cross-encoder model
|
|
61
|
+
*/
|
|
62
|
+
initialize(): Promise<void>;
|
|
63
|
+
/**
|
|
64
|
+
* Ensure model is initialized (lazy initialization)
|
|
65
|
+
*/
|
|
66
|
+
private ensureInitialized;
|
|
67
|
+
/**
|
|
68
|
+
* Rerank passages by relevance to a query using cross-encoder scoring.
|
|
69
|
+
*
|
|
70
|
+
* @param query - The search query
|
|
71
|
+
* @param passages - Array of passage texts to score
|
|
72
|
+
* @returns Array of {index, score} sorted by score descending (most relevant first)
|
|
73
|
+
*/
|
|
74
|
+
rerank(query: string, passages: string[]): Promise<RerankedResult[]>;
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=index.d.ts.map
|