npm - @robthepcguy/rag-vault - Versions diffs - 1.8.0 → 1.9.0 - Mend

@robthepcguy/rag-vault 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +76 -43
package/dist/hyde/index.d.ts +47 -0
package/dist/hyde/index.d.ts.map +1 -0
package/dist/hyde/index.js +203 -0
package/dist/hyde/index.js.map +1 -0
package/dist/reranker/index.d.ts +76 -0
package/dist/reranker/index.d.ts.map +1 -0
package/dist/reranker/index.js +199 -0
package/dist/reranker/index.js.map +1 -0
package/dist/server/index.d.ts +25 -0
package/dist/server/index.d.ts.map +1 -1
package/dist/server/index.js +103 -14
package/dist/server/index.js.map +1 -1
package/dist/server/schemas.d.ts +21 -100
package/dist/server/schemas.d.ts.map +1 -1
package/dist/server/schemas.js +3 -3
package/dist/server/schemas.js.map +1 -1
package/dist/utils/config-parsers.d.ts +14 -0
package/dist/utils/config-parsers.d.ts.map +1 -1
package/dist/utils/config-parsers.js +26 -0
package/dist/utils/config-parsers.js.map +1 -1
package/dist/utils/config.d.ts +23 -0
package/dist/utils/config.d.ts.map +1 -1
package/dist/utils/config.js +39 -1
package/dist/utils/config.js.map +1 -1
package/dist/utils/file-utils.d.ts.map +1 -1
package/dist/utils/file-utils.js +17 -1
package/dist/utils/file-utils.js.map +1 -1
package/dist/vectordb/index.d.ts +33 -7
package/dist/vectordb/index.d.ts.map +1 -1
package/dist/vectordb/index.js +161 -47
package/dist/vectordb/index.js.map +1 -1
package/package.json +8 -7
package/skills/rag-vault/SKILL.md +3 -3
package/skills/rag-vault/references/html-ingestion.md +1 -1
package/web-ui/dist/assets/{CollectionsPage-CjLs8_5j.js → CollectionsPage-wbfgYFTw.js} +1 -1
package/web-ui/dist/assets/{FilesPage-Bw9x9aMr.js → FilesPage-D6TlldaR.js} +1 -1
package/web-ui/dist/assets/{ReaderPage-JPNiOF-x.js → ReaderPage-Sgy0vMZ6.js} +1 -1
package/web-ui/dist/assets/{ReaderSettingsContext-BLFJnEne.js → ReaderSettingsContext-DsvLXuaf.js} +1 -1
package/web-ui/dist/assets/{SearchPage-D3_Vtbdw.js → SearchPage-mPKXZEyq.js} +1 -1
package/web-ui/dist/assets/{SettingsPage-BAxB2264.js → SettingsPage-DXeWwfvd.js} +1 -1
package/web-ui/dist/assets/{StatusPage-CzJZW8Gs.js → StatusPage-AirpfsGF.js} +1 -1
package/web-ui/dist/assets/{UploadPage-DW8OujeJ.js → UploadPage-Cob25kDa.js} +1 -1
package/web-ui/dist/assets/index-BZMzEssr.js +6 -0
package/web-ui/dist/assets/motion-DdHBXDWx.js +9 -0
package/web-ui/dist/assets/query-DbAD_nLW.js +1 -0
package/web-ui/dist/assets/{vendor-DSXQOR6A.js → vendor-DNJ-hWNb.js} +1 -1
package/web-ui/dist/index.html +3 -3
package/web-ui/dist/assets/index-ANt8Xo4z.js +0 -6
package/web-ui/dist/assets/motion-Brxs0UET.js +0 -9
package/web-ui/dist/assets/query-DPt-uCb6.js +0 -1

package/README.md CHANGED Viewed

@@ -6,7 +6,7 @@
 **Your documents. Your machine. Your control.**
-RAG Vault gives AI coding assistants fast access to your private documents such as API specs, research papers, and internal docs. Indexing and search run locally, and your data stays on your machine unless you explicitly ingest content from a remote URL.
+RAG Vault lets your AI coding assistant search your private documents, things like API specs, research papers, and internal docs. Everything runs locally and your data stays on your machine unless you choose to pull in content from a remote URL.
 One command to run, minimal setup, privacy by default.
@@ -15,15 +15,15 @@ One command to run, minimal setup, privacy by default.
 | Pain Point | RAG Vault Solution |
 |------------|-------------------|
 | "I don't want my docs on someone else's server" | Everything stays local by default. No background cloud calls for indexing or search. |
-| "Semantic search misses exact code terms" | Hybrid search: meaning + exact matches like `useEffect` |
+| "Semantic search misses exact code terms" | Hybrid search with RRF fusion, optional cross-encoder reranking |
 | "Setup requires Docker, Python, databases..." | One `npx` command plus a small MCP config block. |
 | "Cloud APIs charge per query" | Free forever. No subscriptions. |
 ## Security
-RAG Vault includes security features for production deployment:
+RAG Vault comes with security built in:
 - **API Authentication**: Optional API key via `RAG_API_KEY`
-- **Rate Limiting**: Configurable request throttling
+- **Rate Limiting**: You can throttle requests
 - **CORS Control**: Restrict allowed origins
 - **Security Headers**: Helmet.js protection
@@ -106,7 +106,7 @@ BASE_DIR = "/path/to/your/documents"
 ### Install Skills (Optional)
-For enhanced AI guidance on query formulation and result interpretation, install the RAG Vault skills:
+If you want your AI to write better queries and make more sense of results, install the RAG Vault skills:
 ```bash
 # Claude Code (project-level - recommended for team projects)
@@ -124,7 +124,7 @@ npx github:RobThePCGuy/rag-vault skills install --path /your/custom/path
 Skills teach Claude best practices for:
 - Query formulation and expansion strategies
-- Score interpretation (< 0.3 = good match, > 0.5 = skip)
+- Score interpretation. In boost mode, under 0.3 is a good match and over 0.5 is worth skipping. RRF mode scores by rank instead.
 - When to use `ingest_file` vs `ingest_data`
 - HTML ingestion and URL handling
@@ -142,7 +142,7 @@ That's it. No Docker. No Python. No server infrastructure to manage.
 ## Web Interface
-RAG Vault includes a full-featured web UI for managing your documents without the command line.
+RAG Vault has a web UI so you can manage your documents without touching the command line.
 ### Launch the Web UI
@@ -157,7 +157,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
 - **Upload documents**: Drag and drop PDF, DOCX, Markdown, TXT, JSON, JSONL, and NDJSON files
 - **Search instantly**: Type queries and see results with relevance scores
 - **Preview content**: Click any result to see the full chunk in context
-- **Manage files**: View all indexed documents and delete what you do not need
+- **Manage files**: View all indexed documents and delete what you don't need
 - **Switch databases**: Create and switch between multiple knowledge bases
 - **Monitor status**: See document counts, memory usage, and search mode
 - **Export/Import settings**: Back up and restore your vault configuration
@@ -166,7 +166,7 @@ Open [http://localhost:3000](http://localhost:3000) in your browser.
 ### REST API
-The web server exposes a REST API for programmatic access. Set `RAG_API_KEY` to require authentication:
+The web server has a REST API you can hit directly. Set `RAG_API_KEY` to require authentication:
 ```bash
 # With authentication (when RAG_API_KEY is set)
@@ -175,7 +175,7 @@ curl -X POST "http://localhost:3000/api/v1/search" \
   -H "Content-Type: application/json" \
   -d '{"query": "authentication", "limit": 5}'
-# Search documents (no auth required if RAG_API_KEY is not set)
+# Search documents (no auth needed if RAG_API_KEY isn't set)
 curl -X POST "http://localhost:3000/api/v1/search" \
   -H "Content-Type: application/json" \
   -d '{"query": "authentication", "limit": 5}'
@@ -201,7 +201,7 @@ curl "http://localhost:3000/api/v1/health"
 ### Reader API Endpoints
-For programmatic document reading and cross-document discovery:
+These endpoints let you read documents and find connections across them:
 ```bash
 # Get all chunks for a document (ordered by index)
@@ -218,7 +218,7 @@ curl -X POST "http://localhost:3000/api/v1/chunks/batch-related" \
 ## Remote Mode
-RAG Vault can also run as an HTTP server for remote MCP clients like Claude.ai, Claude Desktop, or any client supporting Streamable HTTP or SSE transports.
+RAG Vault can also run as an HTTP server so remote MCP clients like Claude.ai, Claude Desktop, or anything that supports Streamable HTTP or SSE can connect to it.
 ```bash
 # Start remote server (default port 3001)
@@ -228,7 +228,7 @@ npx github:RobThePCGuy/rag-vault --remote
 npx github:RobThePCGuy/rag-vault --remote --port 8080
 ```
-Stdio mode is unchanged -- omit `--remote` and everything works as before with Cursor, Claude Code, and Codex.
+Stdio mode is unchanged. Just leave off `--remote` and everything works as before with Cursor, Claude Code, and Codex.
 ### Connecting from Claude Desktop
@@ -312,12 +312,18 @@ Pure semantic search would miss this. RAG Vault finds it.
 ```
 Document → Parse → Chunk by meaning → Embed locally → Store in LanceDB
                          ↓
-Query → Embed → Vector search → Keyword boost → Quality filter → Results
+Query → Embed → Vector search + BM25 → Fusion → Optional reranking → Results
 ```
 **Smart chunking**: Splits by meaning, not character count. Keeps code blocks intact.
-**Hybrid search**: Vector similarity finds related content. Keyword boost ranks exact matches higher.
+**Hybrid search**: Two fusion modes that combine vector similarity with BM25 keyword matching:
+- **Boost mode** (default): BM25 boosts vector search distances multiplicatively. Simple and predictable.
+- **RRF mode** (opt-in via `RAG_SEARCH_MODE=rrf`): [Reciprocal Rank Fusion](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) treats vector and BM25 as independent voters. This can surface documents that vector search alone would miss.
+**Cross-encoder reranking** (opt-in): After the first pass, a cross-encoder model (`Xenova/ms-marco-MiniLM-L-6-v2`, ~23MB) scores each (query, passage) pair together for tighter relevance ranking. Turn it on with `RAG_RERANKER_ENABLED=true`.
+**Query expansion** (opt-in): Generates reformulated queries to improve recall when searches are paraphrased or conceptual. Two backends: local template-based expansion (default, fully offline) or LLM-based [HyDE](https://arxiv.org/abs/2212.10496) through an external API. Turn it on with `RAG_HYDE_ENABLED=true`.
 **Quality filtering**: Groups results by relevance gaps instead of arbitrary top-K cutoffs.
@@ -347,12 +353,12 @@ Query → Embed → Vector search → Keyword boost → Quality filter → Resul
 | `DB_PATH` | `./lancedb/` | Where vectors are stored |
 | `CACHE_DIR` | `./models/` | Model cache directory |
 | `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | HuggingFace embedding model |
-| `MAX_FILE_SIZE` | `104857600` (100 MB) | Maximum file size in bytes for ingestion |
-| `RAG_EMBEDDING_DEVICE` | `auto` | Inference device: `auto`, `cpu`, `cuda`, `dml`, `webgpu`, `wasm`, `gpu`, `webnn` |
+| `MAX_FILE_SIZE` | `104857600` (100 MB) | Biggest file you can ingest |
+| `RAG_EMBEDDING_DEVICE` | `auto` | Device for running embeddings: `auto`, `cpu`, `cuda`, `dml`, `webgpu`, `wasm`, `gpu`, `webnn` |
 | `WEB_PORT` | `3000` | Port for web interface |
 | `UPLOAD_DIR` | `./uploads/` | Temporary directory for web UI file uploads |
-> **Windows users:** `RAG_EMBEDDING_DEVICE=auto` attempts GPU providers (DirectML) which can fail if ONNX Runtime GPU binaries are not available. If you see embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config for reliable operation. See the [GPU acceleration FAQ](#frequently-asked-questions) for details.
+> **Windows users:** `RAG_EMBEDDING_DEVICE=auto` tries GPU providers (DirectML), which can fail if ONNX Runtime GPU binaries aren't available. If you see embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config for reliable operation. See the [GPU acceleration FAQ](#frequently-asked-questions) for details.
 One-command override (no `.env` edit):
@@ -371,13 +377,38 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
 | Variable | Default | What it does |
 |----------|---------|--------------|
-| `RAG_HYBRID_WEIGHT` | `0.6` | Keyword boost strength. `0` = semantic-only, `1.0` = BM25-only, higher = stronger boost for exact keyword matches |
-| `RAG_GROUPING` | unset | Quality filter grouping mode: `similar` = top group only, `related` = top 2 groups |
-| `RAG_MAX_DISTANCE` | unset | Filter out results below this relevance threshold |
-| `RAG_GROUPING_STD_MULTIPLIER` | `1.5` | Standard-deviation multiplier for detecting relevance gaps between result groups |
-| `RAG_HYBRID_CANDIDATE_MULTIPLIER` | `2` | Multiplier for number of vector candidates to fetch before keyword reranking |
-| `RAG_FTS_MAX_FAILURES` | `3` | Number of full-text search failures before temporarily disabling FTS |
-| `RAG_FTS_COOLDOWN_MS` | `300000` (5 min) | Cooldown period before retrying FTS after max failures reached |
+| `RAG_SEARCH_MODE` | `boost` | Fusion mode: `boost` (multiplicative keyword boost) or `rrf` (Reciprocal Rank Fusion) |
+| `RAG_HYBRID_WEIGHT` | `0.6` | Balance between vector and BM25. `0` = vector-only, `1.0` = BM25-only |
+| `RAG_RRF_K` | `60` | RRF smoothing constant (only applies in `rrf` mode). Industry standard is 60. |
+| `RAG_GROUPING` | unset | Quality filter: `similar` = top group only, `related` = top 2 groups |
+| `RAG_MAX_DISTANCE` | unset | Drops results below this relevance threshold (use with `boost` mode; `rrf` scores are rank-based) |
+| `RAG_GROUPING_STD_MULTIPLIER` | `1.5` | How many standard deviations between groups counts as a relevance gap |
+| `RAG_HYBRID_CANDIDATE_MULTIPLIER` | `2` | How many extra vector candidates to grab before keyword reranking |
+| `RAG_FTS_MAX_FAILURES` | `3` | Full-text search failures before FTS is temporarily disabled |
+| `RAG_FTS_COOLDOWN_MS` | `300000` (5 min) | How long to wait before retrying FTS after hitting the failure limit |
+### Cross-Encoder Reranking (opt-in)
+| Variable | Default | What it does |
+|----------|---------|--------------|
+| `RAG_RERANKER_ENABLED` | `false` | Turn on cross-encoder reranking for better results |
+| `RAG_RERANKER_MODEL` | `Xenova/ms-marco-MiniLM-L-6-v2` | HuggingFace cross-encoder model (~23MB ONNX, downloads on first use) |
+| `RAG_RERANKER_CANDIDATE_MULTIPLIER` | `2` | Fetch this many extra candidates for the reranker to score |
+| `RAG_RERANKER_DEVICE` | `auto` | Device for the reranker (same options as `RAG_EMBEDDING_DEVICE`) |
+| `RERANKER_INIT_TIMEOUT_MS` | `600000` (10 min) | Timeout for model download and initialization |
+### Query Expansion / HyDE (opt-in)
+| Variable | Default | What it does |
+|----------|---------|--------------|
+| `RAG_HYDE_ENABLED` | `false` | Turn on query expansion for better recall |
+| `RAG_HYDE_BACKEND` | `rule-based` | `rule-based` for local template expansion, `api` for LLM-based HyDE |
+| `RAG_HYDE_EXPANSIONS` | `2` | Number of expanded queries to generate |
+| `RAG_HYDE_API_KEY` | unset | API key for LLM backend (required when `RAG_HYDE_BACKEND=api`) |
+| `RAG_HYDE_API_BASE_URL` | `https://api.anthropic.com` | API endpoint for LLM backend |
+| `RAG_HYDE_API_MODEL` | `claude-haiku-4-5-20251001` | Model for LLM-based expansion |
+> **Privacy note:** The `api` backend sends query text to an external LLM endpoint, which breaks the "zero cloud" guarantee. The default `rule-based` backend is fully local.
 ### Security (optional)
@@ -395,7 +426,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
 | `ALLOWED_SCAN_ROOTS` | Home directory | Directories allowed for database scanning |
 | `JSON_BODY_LIMIT` | `5mb` | Max request body size |
 | `REQUEST_TIMEOUT_MS` | `30000` | API request timeout |
-| `REQUEST_LOGGING` | `false` | Enable request audit logging |
+| `REQUEST_LOGGING` | `false` | Turn on request audit logging |
 > Copy [`.env.example`](.env.example) for a complete configuration template.
@@ -413,7 +444,7 @@ npx github:RobThePCGuy/rag-vault --gpu-auto
 <details>
 <summary><strong>Is my data really private?</strong></summary>
-For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only uses network if you choose remote URL ingestion or need to download a model.
+For local files, yes. Indexing and search run on your machine after the embedding model downloads (~90MB). RAG Vault only hits the network if you choose remote URL ingestion or need to download a model.
 </details>
@@ -427,11 +458,11 @@ Yes, after the first run. The model caches locally.
 <details>
 <summary><strong>What about GPU acceleration?</strong></summary>
-RAG Vault uses Transformers.js device auto-selection by default (`RAG_EMBEDDING_DEVICE=auto`). When GPU providers are properly configured, this can speed up embedding generation.
+RAG Vault picks a device automatically by default (`RAG_EMBEDDING_DEVICE=auto`). When GPU providers are set up correctly, this can speed up embedding generation.
-**Important:** On Windows, `auto` tries DirectML (`dml`) which requires ONNX Runtime GPU binaries. If those binaries are not installed or your GPU setup is incomplete, the server will fail to start entirely — it does not gracefully fall back to CPU. The same applies on Linux without CUDA binaries.
+**Important:** On Windows, `auto` tries DirectML (`dml`), which requires ONNX Runtime GPU binaries. If those binaries aren't installed or your GPU setup is incomplete, the server won't start at all. It doesn't fall back to CPU gracefully. The same goes for Linux without CUDA binaries.
-**Recommendation:** If you encounter embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. CPU mode is reliable on all platforms and fast enough for most workloads (the default model is only ~90MB).
+**Recommendation:** If you hit embedding initialization errors, set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. CPU mode is reliable on all platforms and fast enough for most workloads (the default model is only ~90MB).
 ```json
 "env": {
@@ -446,7 +477,7 @@ Supported device values: `auto`, `cpu`, `cuda`, `dml`, `gpu`, `wasm`, `webgpu`,
 <details>
 <summary><strong>Can I change the embedding model?</strong></summary>
-Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You must delete `DB_PATH` and re-ingest because different models produce incompatible vectors.
+Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You'll need to delete `DB_PATH` and re-ingest because different models produce incompatible vectors.
 **Recommended upgrade:** For better quality and multilingual support, use [EmbeddingGemma](https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX):
@@ -454,7 +485,7 @@ Yes. Set `MODEL_NAME` to any compatible HuggingFace model. You must delete `DB_P
 "MODEL_NAME": "onnx-community/embeddinggemma-300m-ONNX"
 ```
-This model is a strong option for multilingual and higher-quality retrieval use cases.
+It's a solid pick if you need multilingual support or higher-quality retrieval.
 **Other specialized models:**
 - Scientific: `sentence-transformers/allenai-specter`
@@ -473,16 +504,16 @@ Copy the `DB_PATH` directory (default: `./lancedb/`).
 | Problem | Solution |
 |---------|----------|
-| No results found | Documents must be ingested first. Run "List all ingested files" to check. |
-| Model download failed | Check internet connection. Model is ~90MB from HuggingFace. |
+| No results found | Documents need to be ingested first. Run "List all ingested files" to check. |
+| Model download failed | Check your internet connection. The model is ~90MB from HuggingFace. |
 | Embedding initialization fails | Set `RAG_EMBEDDING_DEVICE=cpu` in your MCP config. The `auto` default can fail on Windows without GPU binaries. |
 | `Protobuf parsing failed` | Corrupted model cache. Delete `CACHE_DIR` (default: `./models/`) and restart. RAG Vault also auto-retries with an isolated recovery cache. |
 | File too large | Default limit is 100MB. Set `MAX_FILE_SIZE` higher or split the file. |
 | Path outside BASE_DIR | All file paths must be under `BASE_DIR`. Use absolute paths. |
-| MCP tools not showing | Verify config syntax, restart your AI tool completely (Cmd+Q on Mac). |
+| MCP tools not showing | Check your config syntax and restart your AI tool completely (Cmd+Q on Mac). |
 | `mcp-publisher login github` fails with `slow_down` | Use token login instead: `mcp-publisher login github --token "$(gh auth token)"` (or pass a PAT). |
-| 401 Unauthorized | API key required. Set `RAG_API_KEY` or use correct header format. |
-| 429 Too Many Requests | Rate limited. Wait for reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
+| 401 Unauthorized | API key required. Set `RAG_API_KEY` or use the correct header format. |
+| 429 Too Many Requests | Rate limited. Wait for the reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
 | CORS errors | Add your origin to `CORS_ORIGINS` environment variable. |
 ## Development
@@ -527,7 +558,7 @@ pnpm release:dry
 ### Test Tiers
-- `pnpm test:unit`: deterministic tests for local/CI quality checks, excluding model-download integration paths.
+- `pnpm test:unit`: deterministic tests for local/CI quality checks. Doesn't include model-download integration paths.
 - `pnpm test:integration`: full integration and E2E workflows, including embedding model initialization.
 Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent suites.
@@ -537,8 +568,8 @@ Use `RUN_EMBEDDING_INTEGRATION=1` to explicitly opt into network/model-dependent
 - Releases are local and scripted via `scripts/release-npm.sh`.
 - Supported bumps: `patch`, `minor`, `major`.
 - The script runs dependency installs, `pnpm check:all`, and `pnpm ui:build` before touching version files.
-- `package.json` and `server.json` versions are updated only after checks pass, and auto-restored if any later step fails.
-- `pnpm release:dry` performs the full gate plus npm dry-run publish and always restores version files.
+- `package.json` and `server.json` versions only get updated after checks pass, and they're auto-restored if any later step fails.
+- `pnpm release:dry` runs the full gate plus npm dry-run publish and always restores version files.
 ### Project Structure
@@ -550,11 +581,13 @@ src/
 ├── errors/          # Error handling utilities
 ├── explainability/  # Keyword-based result explanations
 ├── flywheel/        # Feedback loop (pin/dismiss reranking)
+├── hyde/            # Query expansion + HyDE (LLM-based)
 ├── parser/          # PDF, DOCX, HTML parsing
 ├── query/           # Advanced query syntax parser
+├── reranker/        # Cross-encoder reranking (Transformers.js)
 ├── server/          # MCP tool handlers + remote transport
 ├── utils/           # Config, file helpers, process handlers
-├── vectordb/        # LanceDB + hybrid search
+├── vectordb/        # LanceDB + hybrid search (boost + RRF)
 └── web/             # Express server + REST API
 web-ui/              # React frontend (Vite + Tailwind)
@@ -573,6 +606,6 @@ MIT: free for personal and commercial use.
 Built with [Model Context Protocol](https://modelcontextprotocol.io/), [LanceDB](https://lancedb.com/), and [Transformers.js](https://huggingface.co/docs/transformers.js).
-> Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it’s its own thing.
-> Huge credit to upstream contributors for the foundation, I’ve been iterating hard from there.
+> Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it's its own thing.
+> Huge credit to upstream contributors for the foundation, I've been iterating hard from there.
 > Local-first dev tools, all the way.

package/dist/hyde/index.d.ts ADDED Viewed

@@ -0,0 +1,47 @@
+/**
+ * HyDE configuration
+ */
+export interface HyDEConfig {
+    /** Whether HyDE is enabled */
+    enabled: boolean;
+    /** Backend: 'rule-based' for local template-based query expansion, 'api' for LLM-based HyDE */
+    backend: 'rule-based' | 'api';
+    /** Number of query expansions to generate (default: 2) */
+    numExpansions: number;
+    /** API key for LLM backend (optional) */
+    apiKey?: string;
+    /** API base URL for LLM backend (optional) */
+    apiBaseUrl?: string;
+    /** API model name for LLM backend (optional) */
+    apiModel?: string;
+}
+/**
+ * Expanded query with weight
+ */
+export interface ExpandedQuery {
+    /** The expanded query text */
+    text: string;
+    /** Weight for RRF voting (original = 1.0, expansions = 0.5) */
+    weight: number;
+}
+/**
+ * HyDE (Hypothetical Document Embeddings) query expander.
+ *
+ * Generates hypothetical answer documents from a query to improve
+ * retrieval recall. Each expansion becomes an additional voter in
+ * RRF fusion with a lower weight (0.5) than the original query (1.0).
+ */
+export declare class HyDEExpander {
+    private readonly config;
+    constructor(config: HyDEConfig);
+    /**
+     * Expand a query into the original plus hypothetical documents.
+     *
+     * @param query - The original search query
+     * @returns Array of expanded queries with weights.
+     *   First item is always the original query (weight 1.0).
+     *   Subsequent items are hypothetical expansions (weight 0.5).
+     */
+    expandQuery(query: string): Promise<ExpandedQuery[]>;
+}
+//# sourceMappingURL=index.d.ts.map

package/dist/hyde/index.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAiBA;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,8BAA8B;IAC9B,OAAO,EAAE,OAAO,CAAA;IAChB,+FAA+F;IAC/F,OAAO,EAAE,YAAY,GAAG,KAAK,CAAA;IAC7B,0DAA0D;IAC1D,aAAa,EAAE,MAAM,CAAA;IACrB,yCAAyC;IACzC,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,8CAA8C;IAC9C,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,8BAA8B;IAC9B,IAAI,EAAE,MAAM,CAAA;IACZ,+DAA+D;IAC/D,MAAM,EAAE,MAAM,CAAA;CACf;AAkMD;;;;;;GAMG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;gBAEvB,MAAM,EAAE,UAAU;IAI9B;;;;;;;OAOG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;CAkC3D"}

package/dist/hyde/index.js ADDED Viewed

@@ -0,0 +1,203 @@
+// Query expansion module for improved retrieval recall
+//
+// Two backends are available:
+// - 'rule-based': Template-based query expansion (local, offline, no dependencies).
+//   Generates reformulated queries using pattern detection and templates.
+//   This is a classic IR query expansion technique, not HyDE.
+// - 'api': LLM-based HyDE (Hypothetical Document Embeddings). Generates hypothetical
+//   answer documents via an external LLM API, then embeds them for retrieval.
+//   Reference: Gao et al. "Precise Zero-Shot Dense Retrieval without Relevance Labels" (2022)
+//
+// Each expansion is embedded alongside the original query and becomes a separate
+// voter in RRF fusion, improving recall for paraphrased or conceptual queries.
+// ============================================
+// Query Pattern Detection
+// ============================================
+/** Common question word patterns */
+const QUESTION_PATTERN = /^(what|how|why|when|where|who|which|can|does|is|are|was|were|do|did|should|could|would)\s+/i;
+/** Common technical/code patterns */
+const CODE_PATTERN = /`[^`]+`|[A-Z][a-z]+[A-Z]|[a-z]+_[a-z]+|\.[a-z]+\(|ERR_|ERROR_|[A-Z_]{3,}/;
+/** Error message patterns */
+const ERROR_PATTERN = /error|exception|fail|crash|bug|issue|problem|broken|not working/i;
+/**
+ * Detect query intent for better expansion
+ */
+function detectQueryType(query) {
+    if (ERROR_PATTERN.test(query))
+        return 'error';
+    if (CODE_PATTERN.test(query))
+        return 'code';
+    if (QUESTION_PATTERN.test(query))
+        return 'question';
+    return 'concept';
+}
+// ============================================
+// Rule-Based Expansion
+// ============================================
+/**
+ * Generate hypothetical documents using rule-based templates.
+ * Works offline with no dependencies — always available as a fallback.
+ *
+ * The strategy varies by detected query type:
+ * - Questions: Convert to declarative statements
+ * - Errors: Frame as troubleshooting documentation
+ * - Code: Frame as technical documentation
+ * - Concepts: Frame as explanatory documentation
+ */
+function ruleBasedExpansion(query, numExpansions) {
+    const queryType = detectQueryType(query);
+    const expansions = [];
+    // Strip question marks for declarative reformulation
+    const cleanQuery = query.replace(/\?+$/, '').trim();
+    switch (queryType) {
+        case 'question': {
+            // Convert question to declarative statement
+            const declarative = cleanQuery.replace(QUESTION_PATTERN, '').trim();
+            if (declarative.length > 3) {
+                expansions.push(`${declarative.charAt(0).toUpperCase()}${declarative.slice(1)}. This is explained in detail in the documentation.`);
+            }
+            // Frame as a documentation excerpt
+            expansions.push(`The documentation explains that ${cleanQuery.toLowerCase()}. The key points are as follows.`);
+            // Frame as a guide section
+            expansions.push(`A guide about ${cleanQuery.toLowerCase()} would cover the following topics and provide step-by-step instructions.`);
+            break;
+        }
+        case 'error': {
+            // Frame as troubleshooting documentation
+            expansions.push(`To resolve ${cleanQuery}, follow these troubleshooting steps. The root cause is typically related to configuration or dependencies.`);
+            expansions.push(`The error "${cleanQuery}" occurs when the system encounters an unexpected state. The solution involves checking the following.`);
+            expansions.push(`Common causes for ${cleanQuery} include misconfiguration, missing dependencies, and version incompatibilities. Here is how to fix it.`);
+            break;
+        }
+        case 'code': {
+            // Frame as technical documentation
+            expansions.push(`The implementation of ${cleanQuery} involves the following components and follows these patterns.`);
+            expansions.push(`Documentation for ${cleanQuery}: This feature provides the following functionality and can be configured as described below.`);
+            expansions.push(`${cleanQuery} is used to handle specific operations in the system. Here is how it works and how to use it correctly.`);
+            break;
+        }
+        default: {
+            // General conceptual expansion
+            expansions.push(`A document about ${cleanQuery} would discuss the following key aspects, including definitions, usage patterns, and best practices.`);
+            expansions.push(`${cleanQuery.charAt(0).toUpperCase()}${cleanQuery.slice(1)} is a concept that encompasses several important areas. The documentation covers the following topics.`);
+            expansions.push(`The following documentation explains ${cleanQuery} in detail, covering its purpose, implementation, and common use cases.`);
+            break;
+        }
+    }
+    return expansions.slice(0, numExpansions);
+}
+// ============================================
+// API-Based Expansion
+// ============================================
+/**
+ * Generate hypothetical documents using an LLM API.
+ * Falls back to rule-based expansion on failure.
+ */
+async function apiBasedExpansion(query, numExpansions, config) {
+    if (!config.apiKey) {
+        console.error('HyDE: No API key configured, falling back to rule-based expansion');
+        return ruleBasedExpansion(query, numExpansions);
+    }
+    const baseUrl = config.apiBaseUrl || 'https://api.anthropic.com';
+    const model = config.apiModel || 'claude-haiku-4-5-20251001';
+    try {
+        const prompt = `Generate ${numExpansions} short hypothetical document excerpts (2-3 sentences each) that would be relevant to answering the following query. Each excerpt should sound like it comes from real documentation. Return only the excerpts, separated by newlines.
+Query: ${query}`;
+        const response = await fetch(`${baseUrl}/v1/messages`, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json',
+                'x-api-key': config.apiKey,
+                'anthropic-version': '2023-06-01',
+            },
+            body: JSON.stringify({
+                model,
+                max_tokens: 300,
+                messages: [{ role: 'user', content: prompt }],
+            }),
+            signal: AbortSignal.timeout(10000), // 10s timeout
+        });
+        if (!response.ok) {
+            throw new Error(`API request failed: ${response.status} ${response.statusText}`);
+        }
+        const data = await response.json();
+        // Runtime validation of API response shape
+        const dataObj = data;
+        if (!dataObj || typeof dataObj !== 'object' || !Array.isArray(dataObj['content'])) {
+            throw new Error('API returned unexpected response format');
+        }
+        const content = dataObj['content'];
+        const firstBlock = content[0];
+        const text = firstBlock && typeof firstBlock['text'] === 'string' ? firstBlock['text'] : '';
+        const expansions = text
+            .split('\n')
+            .map((line) => line.trim())
+            .filter((line) => line.length > 20)
+            .slice(0, numExpansions);
+        if (expansions.length === 0) {
+            console.error('HyDE: API returned no valid expansions, falling back to rule-based');
+            return ruleBasedExpansion(query, numExpansions);
+        }
+        return expansions;
+    }
+    catch (error) {
+        console.error(`HyDE: API expansion failed: ${error.message}, falling back to rule-based`);
+        return ruleBasedExpansion(query, numExpansions);
+    }
+}
+// ============================================
+// HyDEExpander Class
+// ============================================
+/**
+ * HyDE (Hypothetical Document Embeddings) query expander.
+ *
+ * Generates hypothetical answer documents from a query to improve
+ * retrieval recall. Each expansion becomes an additional voter in
+ * RRF fusion with a lower weight (0.5) than the original query (1.0).
+ */
+export class HyDEExpander {
+    config;
+    constructor(config) {
+        this.config = config;
+    }
+    /**
+     * Expand a query into the original plus hypothetical documents.
+     *
+     * @param query - The original search query
+     * @returns Array of expanded queries with weights.
+     *   First item is always the original query (weight 1.0).
+     *   Subsequent items are hypothetical expansions (weight 0.5).
+     */
+    async expandQuery(query) {
+        if (!this.config.enabled) {
+            return [{ text: query, weight: 1.0 }];
+        }
+        // Always include the original query at full weight
+        const results = [{ text: query, weight: 1.0 }];
+        // Skip expansion for very short queries (less than 3 words)
+        const wordCount = query.trim().split(/\s+/).length;
+        if (wordCount < 3) {
+            return results;
+        }
+        try {
+            let expansions;
+            if (this.config.backend === 'api') {
+                expansions = await apiBasedExpansion(query, this.config.numExpansions, this.config);
+            }
+            else {
+                expansions = ruleBasedExpansion(query, this.config.numExpansions);
+            }
+            // Add expansions with lower weight
+            for (const expansion of expansions) {
+                results.push({ text: expansion, weight: 0.5 });
+            }
+        }
+        catch (error) {
+            console.error(`HyDE: Expansion failed: ${error.message}`);
+            // Return just the original query on failure (graceful degradation)
+        }
+        return results;
+    }
+}
+//# sourceMappingURL=index.js.map

package/dist/hyde/index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/hyde/index.ts"],"names":[],"mappings":"AAAA,uDAAuD;AACvD,EAAE;AACF,8BAA8B;AAC9B,oFAAoF;AACpF,0EAA0E;AAC1E,8DAA8D;AAC9D,qFAAqF;AACrF,8EAA8E;AAC9E,8FAA8F;AAC9F,EAAE;AACF,iFAAiF;AACjF,+EAA+E;AAkC/E,+CAA+C;AAC/C,0BAA0B;AAC1B,+CAA+C;AAE/C,oCAAoC;AACpC,MAAM,gBAAgB,GACpB,6FAA6F,CAAA;AAE/F,qCAAqC;AACrC,MAAM,YAAY,GAAG,0EAA0E,CAAA;AAE/F,6BAA6B;AAC7B,MAAM,aAAa,GAAG,kEAAkE,CAAA;AAExF;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa;IACpC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,OAAO,CAAA;IAC7C,IAAI,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAC3C,IAAI,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,UAAU,CAAA;IACnD,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,+CAA+C;AAC/C,uBAAuB;AACvB,+CAA+C;AAE/C;;;;;;;;;GASG;AACH,SAAS,kBAAkB,CAAC,KAAa,EAAE,aAAqB;IAC9D,MAAM,SAAS,GAAG,eAAe,CAAC,KAAK,CAAC,CAAA;IACxC,MAAM,UAAU,GAAa,EAAE,CAAA;IAE/B,qDAAqD;IACrD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAEnD,QAAQ,SAAS,EAAE,CAAC;QAClB,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,4CAA4C;YAC5C,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;YACnE,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CACb,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,qDAAqD,CACnH,CAAA;YACH,CAAC;YAED,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,mCAAmC,UAAU,CAAC,WAAW,EAAE,kCAAkC,CAC9F,CAAA;YAED,2BAA2B;YAC3B,UAAU,CAAC,IAAI,CACb,iBAAiB,UAAU,CAAC,WAAW,EAAE,0EAA0E,CACpH,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,OAAO,CAAC,CAAC,CAAC;YACb,yCAAyC;YACzC,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,6GAA6G,CACtI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,cAAc,UAAU,wGAAwG,CACjI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,wGAAwG,CACxI,CAAA;YACD,MAAK;QACP,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,mCAAmC;YACnC,UAAU,CAAC,IAAI,CACb,yBAAyB,UAAU,gEAAgE,CACpG,CAAA;YACD,UAAU,CAAC,IAAI,CACb,qBAAqB,UAAU,+FAA+F,CAC/H,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,yGAAyG,CACvH,CAAA;YACD,MAAK;QACP,CAAC;QAED,OAAO,CAAC,CAAC,CAAC;YACR,+BAA+B;YAC/B,UAAU,CAAC,IAAI,CACb,oBAAoB,UAAU,sGAAsG,CACrI,CAAA;YACD,UAAU,CAAC,IAAI,CACb,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,wGAAwG,CACpK,CAAA;YACD,UAAU,CAAC,IAAI,CACb,wCAAwC,UAAU,yEAAyE,CAC5H,CAAA;YACD,MAAK;QACP,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;AAC3C,CAAC;AAED,+CAA+C;AAC/C,sBAAsB;AACtB,+CAA+C;AAE/C;;;GAGG;AACH,KAAK,UAAU,iBAAiB,CAC9B,KAAa,EACb,aAAqB,EACrB,MAAkB;IAElB,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,KAAK,CAAC,mEAAmE,CAAC,CAAA;QAClF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,UAAU,IAAI,2BAA2B,CAAA;IAChE,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,IAAI,2BAA2B,CAAA;IAE5D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,YAAY,aAAa;;SAEnC,KAAK,EAAE,CAAA;QAEZ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,cAAc,EAAE;YACrD,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,WAAW,EAAE,MAAM,CAAC,MAAM;gBAC1B,mBAAmB,EAAE,YAAY;aAClC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK;gBACL,UAAU,EAAE,GAAG;gBACf,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;aAC9C,CAAC;YACF,MAAM,EAAE,WAAW,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,cAAc;SACnD,CAAC,CAAA;QAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAA;QAClF,CAAC;QAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QAE3C,2CAA2C;QAC3C,MAAM,OAAO,GAAG,IAAsC,CAAA;QACtD,IAAI,CAAC,OAAO,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;YAClF,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAA;QAC5D,CAAC;QACD,MAAM,OAAO,GAAG,OAAO,CAAC,SAAS,CAAc,CAAA;QAC/C,MAAM,UAAU,GAAG,OAAO,CAAC,CAAC,CAAwC,CAAA;QACpE,MAAM,IAAI,GAAG,UAAU,IAAI,OAAO,UAAU,CAAC,MAAM,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;QAC3F,MAAM,UAAU,GAAG,IAAI;aACpB,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;aAClC,MAAM,CAAC,CAAC,IAAY,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC;aAC1C,KAAK,CAAC,CAAC,EAAE,aAAa,CAAC,CAAA;QAE1B,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,oEAAoE,CAAC,CAAA;YACnF,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;QACjD,CAAC;QAED,OAAO,UAAU,CAAA;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CACX,+BAAgC,KAAe,CAAC,OAAO,8BAA8B,CACtF,CAAA;QACD,OAAO,kBAAkB,CAAC,KAAK,EAAE,aAAa,CAAC,CAAA;IACjD,CAAC;AACH,CAAC;AAED,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C;;;;;;GAMG;AACH,MAAM,OAAO,YAAY;IACN,MAAM,CAAY;IAEnC,YAAY,MAAkB;QAC5B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QACvC,CAAC;QAED,mDAAmD;QACnD,MAAM,OAAO,GAAoB,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;QAE/D,4DAA4D;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAA;QAClD,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,OAAO,OAAO,CAAA;QAChB,CAAC;QAED,IAAI,CAAC;YACH,IAAI,UAAoB,CAAA;YAExB,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,KAAK,EAAE,CAAC;gBAClC,UAAU,GAAG,MAAM,iBAAiB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;YACrF,CAAC;iBAAM,CAAC;gBACN,UAAU,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAA;YACnE,CAAC;YAED,mCAAmC;YACnC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;gBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAA;YAChD,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,2BAA4B,KAAe,CAAC,OAAO,EAAE,CAAC,CAAA;YACpE,mEAAmE;QACrE,CAAC;QAED,OAAO,OAAO,CAAA;IAChB,CAAC;CACF"}

package/dist/reranker/index.d.ts ADDED Viewed

@@ -0,0 +1,76 @@
+/**
+ * Reranker configuration
+ */
+export interface RerankerConfig {
+    /** HuggingFace cross-encoder model path */
+    modelPath: string;
+    /** Model cache directory */
+    cacheDir: string;
+    /**
+     * Device hint for Transformers.js runtime.
+     * Examples: auto, cpu, cuda, dml, webgpu
+     */
+    device?: string;
+    /**
+     * Timeout for model initialization/download in milliseconds.
+     * Default: 600000 (10 minutes).
+     */
+    initTimeoutMs?: number;
+}
+/**
+ * Reranked result with score
+ */
+export interface RerankedResult {
+    /** Original index in the input array */
+    index: number;
+    /** Cross-encoder relevance score (higher = more relevant) */
+    score: number;
+}
+/**
+ * Cross-encoder reranker using Transformers.js
+ *
+ * Scores (query, passage) pairs for relevance using a cross-encoder model.
+ * Unlike bi-encoders, cross-encoders jointly encode both texts, producing
+ * more accurate relevance judgments at the cost of speed.
+ *
+ * Default model: Xenova/ms-marco-MiniLM-L-6-v2 (~23MB ONNX)
+ */
+export declare class Reranker {
+    private model;
+    private initPromise;
+    private readonly config;
+    constructor(config: RerankerConfig);
+    /**
+     * Get the model name/path
+     */
+    getModelName(): string;
+    /**
+     * Resolve the device to use for inference
+     */
+    private resolveDevice;
+    /**
+     * Get a recovery cache directory for corrupted model caches
+     */
+    private getRecoveryCacheDir;
+    /**
+     * Check if an error is recoverable by using a fresh cache
+     */
+    private isRecoverableCacheError;
+    /**
+     * Initialize Transformers.js cross-encoder model
+     */
+    initialize(): Promise<void>;
+    /**
+     * Ensure model is initialized (lazy initialization)
+     */
+    private ensureInitialized;
+    /**
+     * Rerank passages by relevance to a query using cross-encoder scoring.
+     *
+     * @param query - The search query
+     * @param passages - Array of passage texts to score
+     * @returns Array of {index, score} sorted by score descending (most relevant first)
+     */
+    rerank(query: string, passages: string[]): Promise<RerankedResult[]>;
+}
+//# sourceMappingURL=index.d.ts.map