@robthepcguy/rag-vault 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +421 -0
  3. package/dist/bin/install-skills.d.ts +20 -0
  4. package/dist/bin/install-skills.d.ts.map +1 -0
  5. package/dist/bin/install-skills.js +196 -0
  6. package/dist/bin/install-skills.js.map +1 -0
  7. package/dist/chunker/index.d.ts +11 -0
  8. package/dist/chunker/index.d.ts.map +1 -0
  9. package/dist/chunker/index.js +6 -0
  10. package/dist/chunker/index.js.map +1 -0
  11. package/dist/chunker/semantic-chunker.d.ts +96 -0
  12. package/dist/chunker/semantic-chunker.d.ts.map +1 -0
  13. package/dist/chunker/semantic-chunker.js +267 -0
  14. package/dist/chunker/semantic-chunker.js.map +1 -0
  15. package/dist/chunker/sentence-splitter.d.ts +16 -0
  16. package/dist/chunker/sentence-splitter.d.ts.map +1 -0
  17. package/dist/chunker/sentence-splitter.js +114 -0
  18. package/dist/chunker/sentence-splitter.js.map +1 -0
  19. package/dist/embedder/index.d.ts +55 -0
  20. package/dist/embedder/index.d.ts.map +1 -0
  21. package/dist/embedder/index.js +146 -0
  22. package/dist/embedder/index.js.map +1 -0
  23. package/dist/errors/index.d.ts +73 -0
  24. package/dist/errors/index.d.ts.map +1 -0
  25. package/dist/errors/index.js +170 -0
  26. package/dist/errors/index.js.map +1 -0
  27. package/dist/index.d.ts +3 -0
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/index.js +91 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/parser/html-parser.d.ts +14 -0
  32. package/dist/parser/html-parser.d.ts.map +1 -0
  33. package/dist/parser/html-parser.js +99 -0
  34. package/dist/parser/html-parser.js.map +1 -0
  35. package/dist/parser/index.d.ts +144 -0
  36. package/dist/parser/index.d.ts.map +1 -0
  37. package/dist/parser/index.js +446 -0
  38. package/dist/parser/index.js.map +1 -0
  39. package/dist/parser/pdf-filter.d.ts +89 -0
  40. package/dist/parser/pdf-filter.d.ts.map +1 -0
  41. package/dist/parser/pdf-filter.js +304 -0
  42. package/dist/parser/pdf-filter.js.map +1 -0
  43. package/dist/server/index.d.ts +144 -0
  44. package/dist/server/index.d.ts.map +1 -0
  45. package/dist/server/index.js +518 -0
  46. package/dist/server/index.js.map +1 -0
  47. package/dist/server/raw-data-utils.d.ts +81 -0
  48. package/dist/server/raw-data-utils.d.ts.map +1 -0
  49. package/dist/server/raw-data-utils.js +196 -0
  50. package/dist/server/raw-data-utils.js.map +1 -0
  51. package/dist/server/schemas.d.ts +186 -0
  52. package/dist/server/schemas.d.ts.map +1 -0
  53. package/dist/server/schemas.js +99 -0
  54. package/dist/server/schemas.js.map +1 -0
  55. package/dist/utils/config-parsers.d.ts +14 -0
  56. package/dist/utils/config-parsers.d.ts.map +1 -0
  57. package/dist/utils/config-parsers.js +47 -0
  58. package/dist/utils/config-parsers.js.map +1 -0
  59. package/dist/utils/config.d.ts +37 -0
  60. package/dist/utils/config.d.ts.map +1 -0
  61. package/dist/utils/config.js +52 -0
  62. package/dist/utils/config.js.map +1 -0
  63. package/dist/utils/logger.d.ts +36 -0
  64. package/dist/utils/logger.d.ts.map +1 -0
  65. package/dist/utils/logger.js +64 -0
  66. package/dist/utils/logger.js.map +1 -0
  67. package/dist/utils/math.d.ts +34 -0
  68. package/dist/utils/math.d.ts.map +1 -0
  69. package/dist/utils/math.js +73 -0
  70. package/dist/utils/math.js.map +1 -0
  71. package/dist/utils/process-handlers.d.ts +26 -0
  72. package/dist/utils/process-handlers.d.ts.map +1 -0
  73. package/dist/utils/process-handlers.js +69 -0
  74. package/dist/utils/process-handlers.js.map +1 -0
  75. package/dist/vectordb/index.d.ts +210 -0
  76. package/dist/vectordb/index.d.ts.map +1 -0
  77. package/dist/vectordb/index.js +613 -0
  78. package/dist/vectordb/index.js.map +1 -0
  79. package/dist/web/api-routes.d.ts +9 -0
  80. package/dist/web/api-routes.d.ts.map +1 -0
  81. package/dist/web/api-routes.js +127 -0
  82. package/dist/web/api-routes.js.map +1 -0
  83. package/dist/web/config-routes.d.ts +7 -0
  84. package/dist/web/config-routes.d.ts.map +1 -0
  85. package/dist/web/config-routes.js +54 -0
  86. package/dist/web/config-routes.js.map +1 -0
  87. package/dist/web/database-manager.d.ts +130 -0
  88. package/dist/web/database-manager.d.ts.map +1 -0
  89. package/dist/web/database-manager.js +382 -0
  90. package/dist/web/database-manager.js.map +1 -0
  91. package/dist/web/http-server.d.ts +28 -0
  92. package/dist/web/http-server.d.ts.map +1 -0
  93. package/dist/web/http-server.js +311 -0
  94. package/dist/web/http-server.js.map +1 -0
  95. package/dist/web/index.d.ts +3 -0
  96. package/dist/web/index.d.ts.map +1 -0
  97. package/dist/web/index.js +114 -0
  98. package/dist/web/index.js.map +1 -0
  99. package/dist/web/middleware/async-handler.d.ts +17 -0
  100. package/dist/web/middleware/async-handler.d.ts.map +1 -0
  101. package/dist/web/middleware/async-handler.js +26 -0
  102. package/dist/web/middleware/async-handler.js.map +1 -0
  103. package/dist/web/middleware/auth.d.ts +22 -0
  104. package/dist/web/middleware/auth.d.ts.map +1 -0
  105. package/dist/web/middleware/auth.js +81 -0
  106. package/dist/web/middleware/auth.js.map +1 -0
  107. package/dist/web/middleware/error-handler.d.ts +36 -0
  108. package/dist/web/middleware/error-handler.d.ts.map +1 -0
  109. package/dist/web/middleware/error-handler.js +68 -0
  110. package/dist/web/middleware/error-handler.js.map +1 -0
  111. package/dist/web/middleware/index.d.ts +6 -0
  112. package/dist/web/middleware/index.d.ts.map +1 -0
  113. package/dist/web/middleware/index.js +19 -0
  114. package/dist/web/middleware/index.js.map +1 -0
  115. package/dist/web/middleware/rate-limit.d.ts +38 -0
  116. package/dist/web/middleware/rate-limit.d.ts.map +1 -0
  117. package/dist/web/middleware/rate-limit.js +116 -0
  118. package/dist/web/middleware/rate-limit.js.map +1 -0
  119. package/dist/web/middleware/request-logger.d.ts +52 -0
  120. package/dist/web/middleware/request-logger.d.ts.map +1 -0
  121. package/dist/web/middleware/request-logger.js +74 -0
  122. package/dist/web/middleware/request-logger.js.map +1 -0
  123. package/dist/web/types.d.ts +6 -0
  124. package/dist/web/types.d.ts.map +1 -0
  125. package/dist/web/types.js +4 -0
  126. package/dist/web/types.js.map +1 -0
  127. package/package.json +135 -0
  128. package/skills/rag-vault/SKILL.md +111 -0
  129. package/skills/rag-vault/references/html-ingestion.md +73 -0
  130. package/skills/rag-vault/references/query-optimization.md +57 -0
  131. package/skills/rag-vault/references/result-refinement.md +54 -0
package/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 RobThePCGuy
4
+
5
+ Based on mcp-local-rag (https://github.com/shinpr/mcp-local-rag)
6
+ Originally created by Shinsuke Kagawa
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,421 @@
1
+ # RAG Vault
2
+
3
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
4
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.0-blue.svg?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
5
+ [![MCP Registry](https://img.shields.io/badge/MCP-Registry-green.svg)](https://registry.modelcontextprotocol.io/servers/io.github.RobThePCGuy/rag-vault)
6
+
7
+ **Your documents. Your machine. Your control.**
8
+
9
+ RAG Vault gives AI coding assistants instant access to your private documents—API specs, research papers, internal docs—without ever sending data to the cloud. One command, zero configuration, complete privacy.
10
+
11
+ ## Why RAG Vault?
12
+
13
+ | Pain Point | RAG Vault Solution |
14
+ |------------|-------------------|
15
+ | "I don't want my docs on someone else's server" | Everything stays local. No API calls after setup. |
16
+ | "Semantic search misses exact code terms" | Hybrid search: meaning + exact matches like `useEffect` |
17
+ | "Setup requires Docker, Python, databases..." | One `npx` command. Done. |
18
+ | "Cloud APIs charge per query" | Free forever. No subscriptions. |
19
+
20
+ ## Security
21
+
22
+ RAG Vault includes security features for production deployment:
23
+ - **API Authentication** — Optional API key via `RAG_API_KEY`
24
+ - **Rate Limiting** — Configurable request throttling
25
+ - **CORS Control** — Restrict allowed origins
26
+ - **Security Headers** — Helmet.js protection
27
+
28
+ See [SECURITY.md](SECURITY.md) for complete documentation.
29
+
30
+ ## Get Started in 30 Seconds
31
+
32
+ ### For Cursor
33
+
34
+ Add to `~/.cursor/mcp.json`:
35
+
36
+ ```json
37
+ {
38
+ "mcpServers": {
39
+ "local-rag": {
40
+ "type": "stdio",
41
+ "command": "npx",
42
+ "args": ["-y", "github:RobThePCGuy/rag-vault"],
43
+ "env": {
44
+ "BASE_DIR": "/path/to/your/documents"
45
+ }
46
+ }
47
+ }
48
+ }
49
+ ```
50
+
51
+ ### For Claude Code
52
+
53
+ Add to `.mcp.json` in your project directory:
54
+
55
+ ```json
56
+ {
57
+ "mcpServers": {
58
+ "local-rag": {
59
+ "type": "stdio",
60
+ "command": "npx",
61
+ "args": ["-y", "github:RobThePCGuy/rag-vault"],
62
+ "env": {
63
+ "BASE_DIR": "./documents",
64
+ "DB_PATH": "./documents/.rag-db",
65
+ "CACHE_DIR": "./.cache",
66
+ "RAG_HYBRID_WEIGHT": "0.6",
67
+ "RAG_GROUPING": "related"
68
+ }
69
+ }
70
+ }
71
+ }
72
+ ```
73
+
74
+ Or add inline via CLI:
75
+
76
+ ```bash
77
+ claude mcp add local-rag --scope user --env BASE_DIR=/path/to/your/documents -- npx -y github:RobThePCGuy/rag-vault
78
+ ```
79
+
80
+ ### For Codex
81
+
82
+ Add to `~/.codex/config.toml`:
83
+
84
+ ```toml
85
+ [mcp_servers.local-rag]
86
+ command = "npx"
87
+ args = ["-y", "github:RobThePCGuy/rag-vault"]
88
+
89
+ [mcp_servers.local-rag.env]
90
+ BASE_DIR = "/path/to/your/documents"
91
+ ```
92
+
93
+ ### Install Skills (Optional)
94
+
95
+ For enhanced AI guidance on query formulation and result interpretation, install the RAG Vault skills:
96
+
97
+ ```bash
98
+ # Claude Code (project-level - recommended for team projects)
99
+ npx github:RobThePCGuy/rag-vault skills install --claude-code
100
+
101
+ # Claude Code (user-level - available in all projects)
102
+ npx github:RobThePCGuy/rag-vault skills install --claude-code --global
103
+
104
+ # Codex (user-level)
105
+ npx github:RobThePCGuy/rag-vault skills install --codex
106
+
107
+ # Custom location
108
+ npx github:RobThePCGuy/rag-vault skills install --path /your/custom/path
109
+ ```
110
+
111
+ Skills teach Claude best practices for:
112
+ - Query formulation and expansion strategies
113
+ - Score interpretation (< 0.3 = good match, > 0.5 = skip)
114
+ - When to use `ingest_file` vs `ingest_data`
115
+ - HTML ingestion and URL handling
116
+
117
+ Restart your AI tool, and start talking:
118
+
119
+ ```
120
+ You: "Ingest api-spec.pdf"
121
+ AI: Successfully ingested api-spec.pdf (47 chunks)
122
+
123
+ You: "How does authentication work?"
124
+ AI: Based on section 3.2, authentication uses OAuth 2.0 with JWT tokens...
125
+ ```
126
+
127
+ That's it. No Docker. No Python. No servers.
128
+
129
+ ## Web Interface
130
+
131
+ RAG Vault includes a full-featured web UI for managing your documents without the command line.
132
+
133
+ ### Launch the Web UI
134
+
135
+ ```bash
136
+ npx github:RobThePCGuy/rag-vault web
137
+ ```
138
+
139
+ Open [http://localhost:3000](http://localhost:3000) in your browser.
140
+
141
+ ### What You Can Do
142
+
143
+ - **Upload documents** — Drag and drop PDFs, Word docs, Markdown, text files
144
+ - **Search instantly** — Type queries and see results with relevance scores
145
+ - **Preview content** — Click any result to see the full chunk in context
146
+ - **Manage files** — View all indexed documents, delete what you don't need
147
+ - **Switch databases** — Create and switch between multiple knowledge bases
148
+ - **Monitor status** — See document counts, database size, system health
149
+
150
+ ### REST API
151
+
152
+ The web server exposes a REST API for programmatic access. Set `RAG_API_KEY` to require authentication:
153
+
154
+ ```bash
155
+ # With authentication (when RAG_API_KEY is set)
156
+ curl -X POST "http://localhost:3000/api/v1/search" \
157
+ -H "Authorization: Bearer your-api-key" \
158
+ -H "Content-Type: application/json" \
159
+ -d '{"query": "authentication", "limit": 5}'
160
+
161
+ # Search documents (no auth required if RAG_API_KEY is not set)
162
+ curl -X POST "http://localhost:3000/api/v1/search" \
163
+ -H "Content-Type: application/json" \
164
+ -d '{"query": "authentication", "limit": 5}'
165
+
166
+ # List all files
167
+ curl "http://localhost:3000/api/v1/files"
168
+
169
+ # Upload a document
170
+ curl -X POST "http://localhost:3000/api/v1/files/upload" \
171
+ -F "file=@spec.pdf"
172
+
173
+ # Delete a file
174
+ curl -X DELETE "http://localhost:3000/api/v1/files" \
175
+ -H "Content-Type: application/json" \
176
+ -d '{"filePath": "/path/to/spec.pdf"}'
177
+
178
+ # Get system status
179
+ curl "http://localhost:3000/api/v1/status"
180
+
181
+ # Health check (for load balancers)
182
+ curl "http://localhost:3000/api/v1/health"
183
+ ```
184
+
185
+ ## Real-World Examples
186
+
187
+ ### Search Your Codebase Documentation
188
+
189
+ ```
190
+ You: "Ingest all the markdown files in /docs"
191
+ AI: Ingested 23 files (847 chunks total)
192
+
193
+ You: "What's the retry policy for failed API calls?"
194
+ AI: According to error-handling.md, failed requests retry 3 times
195
+ with exponential backoff: 1s, 2s, 4s...
196
+ ```
197
+
198
+ ### Index Web Documentation
199
+
200
+ ```
201
+ You: "Fetch https://docs.example.com/api and ingest the HTML"
202
+ AI: Ingested "docs.example.com/api" (156 chunks)
203
+
204
+ You: "What rate limits apply to the /users endpoint?"
205
+ AI: The API limits /users to 100 requests per minute per API key...
206
+ ```
207
+
208
+ ### Build a Personal Knowledge Base
209
+
210
+ ```
211
+ You: "Ingest my research papers folder"
212
+ AI: Ingested 12 PDFs (2,341 chunks)
213
+
214
+ You: "What do recent studies say about transformer attention mechanisms?"
215
+ AI: Based on attention-mechanisms-2024.pdf, the key finding is...
216
+ ```
217
+
218
+ ### Search Exact Technical Terms
219
+
220
+ RAG Vault's hybrid search catches both meaning and exact matches:
221
+
222
+ ```
223
+ You: "Search for ERR_CONNECTION_REFUSED"
224
+ AI: Found 3 results mentioning ERR_CONNECTION_REFUSED:
225
+ 1. troubleshooting.md - "When you see ERR_CONNECTION_REFUSED..."
226
+ 2. network-errors.pdf - "Common causes include..."
227
+ ```
228
+
229
+ Pure semantic search would miss this. RAG Vault finds it.
230
+
231
+ ## How It Works
232
+
233
+ ```
234
+ Document → Parse → Chunk by meaning → Embed locally → Store in LanceDB
235
+
236
+ Query → Embed → Vector search → Keyword boost → Quality filter → Results
237
+ ```
238
+
239
+ **Smart chunking**: Splits by meaning, not character count. Keeps code blocks intact.
240
+
241
+ **Hybrid search**: Vector similarity finds related content. Keyword boost ranks exact matches higher.
242
+
243
+ **Quality filtering**: Groups results by relevance gaps instead of arbitrary top-K cutoffs.
244
+
245
+ **Local everything**: Embeddings via Transformers.js. Storage via LanceDB. No network after model download.
246
+
247
+ ## Supported Formats
248
+
249
+ | Format | Extension | Notes |
250
+ |--------|-----------|-------|
251
+ | PDF | `.pdf` | Full text extraction, header/footer filtering |
252
+ | Word | `.docx` | Tables, lists, formatting preserved |
253
+ | Markdown | `.md` | Code blocks kept intact |
254
+ | Text | `.txt` | Plain text |
255
+ | JSON | `.json` | Converted to searchable key-value text |
256
+ | HTML | via `ingest_data` | Auto-cleaned with Readability |
257
+
258
+ ## Configuration
259
+
260
+ ### Environment Variables
261
+
262
+ | Variable | Default | What it does |
263
+ |----------|---------|--------------|
264
+ | `BASE_DIR` | Current directory | Only files under this path can be accessed |
265
+ | `DB_PATH` | `./lancedb/` | Where vectors are stored |
266
+ | `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | HuggingFace embedding model |
267
+ | `WEB_PORT` | `3000` | Port for web interface |
268
+
269
+ ### Search Tuning
270
+
271
+ | Variable | Default | What it does |
272
+ |----------|---------|--------------|
273
+ | `RAG_HYBRID_WEIGHT` | `0.6` | Keyword boost strength. 0 = semantic-only, higher = stronger boost for exact keyword matches |
274
+ | `RAG_GROUPING` | — | `similar` = top group only, `related` = top 2 groups |
275
+ | `RAG_MAX_DISTANCE` | — | Filter out results below this relevance threshold |
276
+
277
+ ### Security (optional)
278
+
279
+ | Variable | Default | What it does |
280
+ |----------|---------|--------------|
281
+ | `RAG_API_KEY` | — | API key for authentication |
282
+ | `CORS_ORIGINS` | localhost | Allowed origins (comma-separated, or `*`) |
283
+ | `RATE_LIMIT_WINDOW_MS` | `60000` | Rate limit time window (ms) |
284
+ | `RATE_LIMIT_MAX_REQUESTS` | `100` | Max requests per window |
285
+
286
+ ### Advanced
287
+
288
+ | Variable | Default | What it does |
289
+ |----------|---------|--------------|
290
+ | `ALLOWED_SCAN_ROOTS` | Home directory | Directories allowed for database scanning |
291
+ | `JSON_BODY_LIMIT` | `5mb` | Max request body size |
292
+ | `REQUEST_TIMEOUT_MS` | `30000` | API request timeout |
293
+ | `REQUEST_LOGGING` | `false` | Enable request audit logging |
294
+
295
+ > Copy [`.env.example`](.env.example) for a complete configuration template.
296
+
297
+ **For code-heavy content**, try:
298
+
299
+ ```json
300
+ "env": {
301
+ "RAG_HYBRID_WEIGHT": "0.8",
302
+ "RAG_GROUPING": "similar"
303
+ }
304
+ ```
305
+
306
+ ## Frequently Asked Questions
307
+
308
+ <details>
309
+ <summary><strong>Is my data really private?</strong></summary>
310
+
311
+ Yes. After the embedding model downloads (~90MB), RAG Vault makes zero network requests. Everything runs on your machine. Verify with network monitoring.
312
+
313
+ </details>
314
+
315
+ <details>
316
+ <summary><strong>Does it work offline?</strong></summary>
317
+
318
+ Yes, after the first run. The model caches locally.
319
+
320
+ </details>
321
+
322
+ <details>
323
+ <summary><strong>What about GPU acceleration?</strong></summary>
324
+
325
+ Transformers.js runs on CPU. GPU support is experimental but unnecessary for most use cases—queries return in ~1 second even with 10,000 chunks.
326
+
327
+ </details>
328
+
329
+ <details>
330
+ <summary><strong>Can I change the embedding model?</strong></summary>
331
+
332
+ Yes. Set `MODEL_NAME` to any compatible HuggingFace model. But you must delete `DB_PATH` and re-ingest—different models produce incompatible vectors.
333
+
334
+ **Recommended upgrade:** For better quality and multilingual support, use [EmbeddingGemma](https://huggingface.co/onnx-community/embeddinggemma-300m-ONNX):
335
+
336
+ ```json
337
+ "MODEL_NAME": "onnx-community/embeddinggemma-300m-ONNX"
338
+ ```
339
+
340
+ This 300M parameter model scores 68.36 on MTEB benchmarks and supports 100+ languages, making it ideal for mixed-language or high-quality retrieval needs.
341
+
342
+ **Other specialized models:**
343
+ - Scientific: `sentence-transformers/allenai-specter`
344
+ - Code: `jinaai/jina-embeddings-v2-base-code`
345
+
346
+ </details>
347
+
348
+ <details>
349
+ <summary><strong>How do I back up my data?</strong></summary>
350
+
351
+ Copy the `DB_PATH` directory (default: `./lancedb/`).
352
+
353
+ </details>
354
+
355
+ ## Troubleshooting
356
+
357
+ | Problem | Solution |
358
+ |---------|----------|
359
+ | No results found | Documents must be ingested first. Run "List all ingested files" to check. |
360
+ | Model download failed | Check internet connection. Model is ~90MB from HuggingFace. |
361
+ | File too large | Default limit is 100MB. Set `MAX_FILE_SIZE` higher or split the file. |
362
+ | Path outside BASE_DIR | All file paths must be under `BASE_DIR`. Use absolute paths. |
363
+ | MCP tools not showing | Verify config syntax, restart your AI tool completely (Cmd+Q on Mac). |
364
+ | 401 Unauthorized | API key required. Set `RAG_API_KEY` or use correct header format. |
365
+ | 429 Too Many Requests | Rate limited. Wait for reset or increase `RATE_LIMIT_MAX_REQUESTS`. |
366
+ | CORS errors | Add your origin to `CORS_ORIGINS` environment variable. |
367
+
368
+ ## Development
369
+
370
+ ```bash
371
+ git clone https://github.com/RobThePCGuy/rag-vault.git
372
+ cd rag-vault
373
+ pnpm install
374
+
375
+ # Run tests
376
+ pnpm test
377
+
378
+ # Type check + lint + format
379
+ pnpm check:all
380
+
381
+ # Build
382
+ pnpm build
383
+
384
+ # Run MCP server locally
385
+ pnpm dev
386
+
387
+ # Run web server locally
388
+ pnpm web:dev
389
+ ```
390
+
391
+ ### Project Structure
392
+
393
+ ```
394
+ src/
395
+ ├── server/ # MCP tool handlers
396
+ ├── vectordb/ # LanceDB + hybrid search
397
+ ├── chunker/ # Semantic text splitting
398
+ ├── embedder/ # Transformers.js wrapper
399
+ ├── parser/ # PDF, DOCX, HTML parsing
400
+ ├── web/ # Express server + REST API
401
+ └── __tests__/ # Test suites
402
+
403
+ web-ui/ # React frontend
404
+ ```
405
+
406
+ ## Documentation
407
+
408
+ - [SECURITY.md](SECURITY.md) — Security configuration and best practices
409
+ - [.env.example](.env.example) — Complete environment variable template
410
+
411
+ ## License
412
+
413
+ MIT — free for personal and commercial use.
414
+
415
+ ## Acknowledgments
416
+
417
+ Built with [Model Context Protocol](https://modelcontextprotocol.io/), [LanceDB](https://lancedb.com/), and [Transformers.js](https://huggingface.co/docs/transformers.js).
418
+
419
+ > Started as a fork of [mcp-local-rag](https://github.com/shinpr/mcp-local-rag) by [Shinsuke Kagawa](https://github.com/shinpr). Now it’s its own thing.
420
+ > Huge credit to upstream contributors for the foundation, I’ve been iterating hard from there.
421
+ > Local-first dev tools, all the way.
@@ -0,0 +1,20 @@
1
+ /**
2
+ * MCP Local RAG Skills Installer
3
+ *
4
+ * Installs skills to various AI coding assistants:
5
+ * - Claude Code (project or global)
6
+ * - OpenAI Codex
7
+ * - Custom path
8
+ *
9
+ * Usage:
10
+ * npx rag-vault skills install --claude-code # Project-level
11
+ * npx rag-vault skills install --claude-code --global # User-level
12
+ * npx rag-vault skills install --codex # Codex
13
+ * npx rag-vault skills install --path /custom/path # Custom
14
+ */
15
+ /**
16
+ * Run the skills installer with the given arguments
17
+ * @param args - Command line arguments (after "skills install")
18
+ */
19
+ export declare function run(args: string[]): void;
20
+ //# sourceMappingURL=install-skills.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"install-skills.d.ts","sourceRoot":"","sources":["../../src/bin/install-skills.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAmLH;;;GAGG;AACH,wBAAgB,GAAG,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI,CA+BxC"}
@@ -0,0 +1,196 @@
1
+ "use strict";
2
+ /**
3
+ * MCP Local RAG Skills Installer
4
+ *
5
+ * Installs skills to various AI coding assistants:
6
+ * - Claude Code (project or global)
7
+ * - OpenAI Codex
8
+ * - Custom path
9
+ *
10
+ * Usage:
11
+ * npx rag-vault skills install --claude-code # Project-level
12
+ * npx rag-vault skills install --claude-code --global # User-level
13
+ * npx rag-vault skills install --codex # Codex
14
+ * npx rag-vault skills install --path /custom/path # Custom
15
+ */
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.run = run;
18
+ const node_fs_1 = require("node:fs");
19
+ const node_os_1 = require("node:os");
20
+ const node_path_1 = require("node:path");
21
+ // ============================================
22
+ // Constants
23
+ // ============================================
24
+ // Skills source directory (relative to dist/bin when compiled)
25
+ // dist/bin/install-skills.js -> dist/skills/rag-vault
26
+ // But skills are actually in package root: skills/rag-vault
27
+ // So from dist/bin, go up twice: ../.. then skills/rag-vault
28
+ const SKILLS_SOURCE = (0, node_path_1.resolve)(__dirname, '..', '..', 'skills', 'rag-vault');
29
+ // Codex home directory (supports CODEX_HOME environment variable)
30
+ // https://developers.openai.com/codex/local-config/
31
+ const CODEX_HOME = process.env['CODEX_HOME'] || (0, node_path_1.join)((0, node_os_1.homedir)(), '.codex');
32
+ // Installation targets
33
+ const TARGETS = {
34
+ 'claude-code-project': './.claude/skills/rag-vault',
35
+ 'claude-code-global': (0, node_path_1.join)((0, node_os_1.homedir)(), '.claude', 'skills', 'rag-vault'),
36
+ 'codex-project': './.codex/skills/rag-vault',
37
+ 'codex-global': (0, node_path_1.join)(CODEX_HOME, 'skills', 'rag-vault'),
38
+ };
39
+ function parseArgs(args) {
40
+ const options = {
41
+ target: 'claude-code-project',
42
+ help: false,
43
+ };
44
+ for (let i = 0; i < args.length; i++) {
45
+ const arg = args[i];
46
+ switch (arg) {
47
+ case '--help':
48
+ case '-h':
49
+ options.help = true;
50
+ break;
51
+ case '--claude-code':
52
+ // Check for --global flag
53
+ if (args[i + 1] === '--global') {
54
+ options.target = 'claude-code-global';
55
+ i++; // Skip next arg
56
+ }
57
+ else {
58
+ options.target = 'claude-code-project';
59
+ }
60
+ break;
61
+ case '--codex':
62
+ // Check for --project or --global flag
63
+ if (args[i + 1] === '--project') {
64
+ options.target = 'codex-project';
65
+ i++; // Skip next arg
66
+ }
67
+ else if (args[i + 1] === '--global') {
68
+ options.target = 'codex-global';
69
+ i++; // Skip next arg
70
+ }
71
+ else {
72
+ // Default to global (matches previous behavior)
73
+ options.target = 'codex-global';
74
+ }
75
+ break;
76
+ case '--path': {
77
+ const pathArg = args[i + 1];
78
+ if (!pathArg) {
79
+ console.error('Error: --path requires a path argument');
80
+ process.exit(1);
81
+ }
82
+ options.target = 'custom';
83
+ options.customPath = pathArg;
84
+ i++; // Skip next arg
85
+ break;
86
+ }
87
+ default:
88
+ if (arg?.startsWith('-')) {
89
+ console.error(`Unknown option: ${arg}`);
90
+ process.exit(1);
91
+ }
92
+ }
93
+ }
94
+ return options;
95
+ }
96
+ // ============================================
97
+ // Help Message
98
+ // ============================================
99
+ function printHelp() {
100
+ console.log(`
101
+ MCP Local RAG Skills Installer
102
+
103
+ Usage:
104
+ npx rag-vault skills install [options]
105
+
106
+ Options:
107
+ --claude-code Install to project-level Claude Code skills
108
+ (./.claude/skills/)
109
+
110
+ --claude-code --global Install to user-level Claude Code skills
111
+ (~/.claude/skills/)
112
+
113
+ --codex Install to user-level Codex skills (default)
114
+ ($CODEX_HOME/skills/ or ~/.codex/skills/)
115
+
116
+ --codex --project Install to project-level Codex skills
117
+ (./.codex/skills/)
118
+
119
+ --codex --global Install to user-level Codex skills
120
+ ($CODEX_HOME/skills/ or ~/.codex/skills/)
121
+
122
+ --path <path> Install to custom path
123
+
124
+ --help, -h Show this help message
125
+
126
+ Examples:
127
+ npx rag-vault skills install --claude-code
128
+ npx rag-vault skills install --claude-code --global
129
+ npx rag-vault skills install --codex
130
+ npx rag-vault skills install --codex --project
131
+ npx rag-vault skills install --path ./my-skills/
132
+ `);
133
+ }
134
+ // ============================================
135
+ // Installation
136
+ // ============================================
137
+ function getTargetPath(options) {
138
+ if (options.target === 'custom') {
139
+ if (!options.customPath) {
140
+ console.error('Error: Custom path not specified');
141
+ process.exit(1);
142
+ }
143
+ return (0, node_path_1.resolve)(options.customPath, 'rag-vault');
144
+ }
145
+ return TARGETS[options.target];
146
+ }
147
+ function install(targetPath) {
148
+ // Check source exists
149
+ if (!(0, node_fs_1.existsSync)(SKILLS_SOURCE)) {
150
+ console.error(`Error: Skills source not found at ${SKILLS_SOURCE}`);
151
+ process.exit(1);
152
+ }
153
+ // Create target directory
154
+ const targetDir = (0, node_path_1.dirname)(targetPath);
155
+ if (!(0, node_fs_1.existsSync)(targetDir)) {
156
+ (0, node_fs_1.mkdirSync)(targetDir, { recursive: true });
157
+ console.log(`Created directory: ${targetDir}`);
158
+ }
159
+ // Copy skills
160
+ (0, node_fs_1.cpSync)(SKILLS_SOURCE, targetPath, { recursive: true });
161
+ console.log(`Installed skills to: ${targetPath}`);
162
+ }
163
+ // ============================================
164
+ // Exported Run Function
165
+ // ============================================
166
+ /**
167
+ * Run the skills installer with the given arguments
168
+ * @param args - Command line arguments (after "skills install")
169
+ */
170
+ function run(args) {
171
+ // Default to help if no args
172
+ if (args.length === 0) {
173
+ printHelp();
174
+ process.exit(0);
175
+ }
176
+ const options = parseArgs(args);
177
+ if (options.help) {
178
+ printHelp();
179
+ process.exit(0);
180
+ }
181
+ const targetPath = getTargetPath(options);
182
+ console.log('Installing MCP Local RAG skills...');
183
+ console.log(`Target: ${options.target}`);
184
+ console.log(`Path: ${targetPath}`);
185
+ console.log();
186
+ install(targetPath);
187
+ console.log();
188
+ console.log('Installation complete!');
189
+ console.log();
190
+ console.log('The following skills are now available:');
191
+ console.log(' - rag-vault (SKILL.md)');
192
+ console.log(' - references/html-ingestion.md');
193
+ console.log(' - references/query-optimization.md');
194
+ console.log(' - references/result-refinement.md');
195
+ }
196
+ //# sourceMappingURL=install-skills.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"install-skills.js","sourceRoot":"","sources":["../../src/bin/install-skills.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;GAaG;;AAuLH,kBA+BC;AApND,qCAAuD;AACvD,qCAAiC;AACjC,yCAAkD;AAElD,+CAA+C;AAC/C,YAAY;AACZ,+CAA+C;AAE/C,+DAA+D;AAC/D,sDAAsD;AACtD,4DAA4D;AAC5D,6DAA6D;AAC7D,MAAM,aAAa,GAAG,IAAA,mBAAO,EAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAA;AAE3E,kEAAkE;AAClE,oDAAoD;AACpD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,IAAA,gBAAI,EAAC,IAAA,iBAAO,GAAE,EAAE,QAAQ,CAAC,CAAA;AAEzE,uBAAuB;AACvB,MAAM,OAAO,GAAG;IACd,qBAAqB,EAAE,4BAA4B;IACnD,oBAAoB,EAAE,IAAA,gBAAI,EAAC,IAAA,iBAAO,GAAE,EAAE,SAAS,EAAE,QAAQ,EAAE,WAAW,CAAC;IACvE,eAAe,EAAE,2BAA2B;IAC5C,cAAc,EAAE,IAAA,gBAAI,EAAC,UAAU,EAAE,QAAQ,EAAE,WAAW,CAAC;CAC/C,CAAA;AAYV,SAAS,SAAS,CAAC,IAAc;IAC/B,MAAM,OAAO,GAAY;QACvB,MAAM,EAAE,qBAAqB;QAC7B,IAAI,EAAE,KAAK;KACZ,CAAA;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QAEnB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,QAAQ,CAAC;YACd,KAAK,IAAI;gBACP,OAAO,CAAC,IAAI,GAAG,IAAI,CAAA;gBACnB,MAAK;YAEP,KAAK,eAAe;gBAClB,0BAA0B;gBAC1B,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,UAAU,EAAE,CAAC;oBAC/B,OAAO,CAAC,MAAM,GAAG,oBAAoB,CAAA;oBACrC,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,CAAC;oBACN,OAAO,CAAC,MAAM,GAAG,qBAAqB,CAAA;gBACxC,CAAC;gBACD,MAAK;YAEP,KAAK,SAAS;gBACZ,uCAAuC;gBACvC,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,WAAW,EAAE,CAAC;oBAChC,OAAO,CAAC,MAAM,GAAG,eAAe,CAAA;oBAChC,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,UAAU,EAAE,CAAC;oBACtC,OAAO,CAAC,MAAM,GAAG,cAAc,CAAA;oBAC/B,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACtB,CAAC;qBAAM,CAAC;oBACN,gDAAgD;oBAChD,OAAO,CAAC,MAAM,GAAG,cAAc,CAAA;gBACjC,CAAC;gBACD,MAAK;YAEP,KAAK,QAAQ,CAAC,CAAC,CAAC;gBACd,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;gBAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,CAAC,KAAK,CAAC,wCAAwC,CAAC,CAAA;oBACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;gBACD,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAA;gBACzB,OAAO,CAAC,UAAU,GAAG,OAAO,CAAA;gBAC5B,CAAC,EAAE,CAAA,CAAC,gBAAgB;gBACpB,MAAK;YACP,CAAC;YAED;gBACE,IAAI,GAAG,EAAE,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;oBACvC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;QACL,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C,SAAS,SAAS;IAChB,OAAO,CAAC,GAAG,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAgCb,CAAC,CAAA;AACF,CAAC;AAED,+CAA+C;AAC/C,eAAe;AACf,+CAA+C;AAE/C,SAAS,aAAa,CAAC,OAAgB;IACrC,IAAI,OAAO,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC;YACxB,OAAO,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAA;YACjD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;QACD,OAAO,IAAA,mBAAO,EAAC,OAAO,CAAC,UAAU,EAAE,WAAW,CAAC,CAAA;IACjD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,CAAA;AAChC,CAAC;AAED,SAAS,OAAO,CAAC,UAAkB;IACjC,sBAAsB;IACtB,IAAI,CAAC,IAAA,oBAAU,EAAC,aAAa,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,KAAK,CAAC,qCAAqC,aAAa,EAAE,CAAC,CAAA;QACnE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,0BAA0B;IAC1B,MAAM,SAAS,GAAG,IAAA,mBAAO,EAAC,UAAU,CAAC,CAAA;IACrC,IAAI,CAAC,IAAA,oBAAU,EAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,IAAA,mBAAS,EAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;QACzC,OAAO,CAAC,GAAG,CAAC,sBAAsB,SAAS,EAAE,CAAC,CAAA;IAChD,CAAC;IAED,cAAc;IACd,IAAA,gBAAM,EAAC,aAAa,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,wBAAwB,UAAU,EAAE,CAAC,CAAA;AACnD,CAAC;AAED,+CAA+C;AAC/C,wBAAwB;AACxB,+CAA+C;AAE/C;;;GAGG;AACH,SAAgB,GAAG,CAAC,IAAc;IAChC,6BAA6B;IAC7B,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,SAAS,EAAE,CAAA;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAE/B,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;QACjB,SAAS,EAAE,CAAA;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;IAEzC,OAAO,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAA;IACjD,OAAO,CAAC,GAAG,CAAC,WAAW,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;IACxC,OAAO,CAAC,GAAG,CAAC,SAAS,UAAU,EAAE,CAAC,CAAA;IAClC,OAAO,CAAC,GAAG,EAAE,CAAA;IAEb,OAAO,CAAC,UAAU,CAAC,CAAA;IAEnB,OAAO,CAAC,GAAG,EAAE,CAAA;IACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAA;IACrC,OAAO,CAAC,GAAG,EAAE,CAAA;IACb,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAA;IACtD,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAA;IACvC,OAAO,CAAC,GAAG,CAAC,kCAAkC,CAAC,CAAA;IAC/C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAA;IACnD,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAA;AACpD,CAAC"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Text chunk
3
+ */
4
+ export interface TextChunk {
5
+ /** Chunk text */
6
+ text: string;
7
+ /** Chunk index (zero-based) */
8
+ index: number;
9
+ }
10
+ export { SemanticChunker } from './semantic-chunker.js';
11
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chunker/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,iBAAiB;IACjB,IAAI,EAAE,MAAM,CAAA;IACZ,+BAA+B;IAC/B,KAAK,EAAE,MAAM,CAAA;CACd;AAED,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA"}