mcp-local-rag 0.9.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -46
- package/dist/cli/common.d.ts +14 -0
- package/dist/cli/common.d.ts.map +1 -0
- package/dist/cli/common.js +25 -0
- package/dist/cli/common.js.map +1 -0
- package/dist/cli/delete.d.ts +8 -0
- package/dist/cli/delete.d.ts.map +1 -0
- package/dist/cli/delete.js +164 -0
- package/dist/cli/delete.js.map +1 -0
- package/dist/cli/ingest.d.ts +21 -8
- package/dist/cli/ingest.d.ts.map +1 -1
- package/dist/cli/ingest.js +113 -69
- package/dist/cli/ingest.js.map +1 -1
- package/dist/cli/list.d.ts +23 -0
- package/dist/cli/list.d.ts.map +1 -0
- package/dist/cli/list.js +142 -0
- package/dist/cli/list.js.map +1 -0
- package/dist/cli/options.d.ts +49 -0
- package/dist/cli/options.d.ts.map +1 -0
- package/dist/cli/options.js +193 -0
- package/dist/cli/options.js.map +1 -0
- package/dist/cli/query.d.ts +23 -0
- package/dist/cli/query.d.ts.map +1 -0
- package/dist/cli/query.js +161 -0
- package/dist/cli/query.js.map +1 -0
- package/dist/cli/status.d.ts +8 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +78 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli-main.d.ts +4 -2
- package/dist/cli-main.d.ts.map +1 -1
- package/dist/cli-main.js +21 -4
- package/dist/cli-main.js.map +1 -1
- package/dist/index.js +5 -4
- package/dist/index.js.map +1 -1
- package/dist/server/index.js +1 -1
- package/dist/server/index.js.map +1 -1
- package/dist/server/types.d.ts +1 -1
- package/dist/server/types.d.ts.map +1 -1
- package/dist/utils/raw-data-utils.d.ts +116 -0
- package/dist/utils/raw-data-utils.d.ts.map +1 -0
- package/dist/utils/raw-data-utils.js +202 -0
- package/dist/utils/raw-data-utils.js.map +1 -0
- package/package.json +6 -7
- package/skills/mcp-local-rag/SKILL.md +25 -35
- package/skills/mcp-local-rag/references/cli-reference.md +77 -0
- package/skills/mcp-local-rag/references/html-ingestion.md +11 -7
- package/skills/mcp-local-rag/references/query-optimization.md +1 -1
- package/skills/mcp-local-rag/references/cli-ingest.md +0 -45
package/README.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/banner.jpg" alt="MCP Local RAG — Search below the surface." width="600" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
1
5
|
# MCP Local RAG
|
|
2
6
|
|
|
3
7
|
[](https://github.com/shinpr/mcp-local-rag)
|
|
@@ -6,7 +10,7 @@
|
|
|
6
10
|
[](https://www.typescriptlang.org/)
|
|
7
11
|
[](https://registry.modelcontextprotocol.io/)
|
|
8
12
|
|
|
9
|
-
Local RAG for developers
|
|
13
|
+
Local RAG for developers via MCP or CLI.
|
|
10
14
|
Semantic search with keyword boost for exact technical terms — fully private, zero setup.
|
|
11
15
|
|
|
12
16
|
## Features
|
|
@@ -24,7 +28,8 @@ Semantic search with keyword boost for exact technical terms — fully private,
|
|
|
24
28
|
No API keys, no cloud, no data leaving your machine. Works fully offline after the first model download.
|
|
25
29
|
|
|
26
30
|
- **Zero-friction setup**
|
|
27
|
-
One `npx` command. No Docker, no Python, no servers to manage.
|
|
31
|
+
One `npx` command. No Docker, no Python, no servers to manage.
|
|
32
|
+
Use via MCP, CLI, or both. Optional [Agent Skills](#agent-skills) help AI assistants form better queries and interpret results.
|
|
28
33
|
|
|
29
34
|
## Quick Start
|
|
30
35
|
|
|
@@ -73,7 +78,14 @@ Assistant: Based on the documentation, authentication uses OAuth 2.0 with JWT to
|
|
|
73
78
|
The flow is described in section 3.2...
|
|
74
79
|
```
|
|
75
80
|
|
|
76
|
-
|
|
81
|
+
**Or use directly as CLI** — no MCP server needed:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
npx mcp-local-rag ingest ./docs/
|
|
85
|
+
npx mcp-local-rag query "authentication API"
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
That's it. No Docker, no Python, no server setup.
|
|
77
89
|
|
|
78
90
|
## Why This Exists
|
|
79
91
|
|
|
@@ -87,12 +99,17 @@ You want AI to search your documents—technical specs, research papers, interna
|
|
|
87
99
|
|
|
88
100
|
**Code search.** Pure semantic search misses exact terms like `useEffect` or `ERR_CONNECTION_REFUSED`. Keyword boost catches both meaning and exact matches.
|
|
89
101
|
|
|
102
|
+
**Agent reality.** In practice, many AI environments mainly use tool calling. CLI support and Agent Skills make the same workflows available even without full MCP integration.
|
|
103
|
+
|
|
90
104
|
## Usage
|
|
91
105
|
|
|
92
|
-
|
|
93
|
-
|
|
106
|
+
mcp-local-rag provides two interfaces: an **MCP server** for AI coding tools and a **CLI** for direct use from the terminal.
|
|
107
|
+
|
|
108
|
+
### Using with MCP
|
|
94
109
|
|
|
95
|
-
|
|
110
|
+
The MCP server provides 6 tools: `ingest_file`, `ingest_data`, `query_documents`, `list_files`, `delete_file`, `status`.
|
|
111
|
+
|
|
112
|
+
#### Ingesting Documents
|
|
96
113
|
|
|
97
114
|
```
|
|
98
115
|
"Ingest the document at /Users/me/docs/api-spec.pdf"
|
|
@@ -102,7 +119,7 @@ Supports PDF, DOCX, TXT, and Markdown. The server extracts text, splits it into
|
|
|
102
119
|
|
|
103
120
|
Re-ingesting the same file replaces the old version automatically.
|
|
104
121
|
|
|
105
|
-
|
|
122
|
+
#### Ingesting HTML Content
|
|
106
123
|
|
|
107
124
|
Use `ingest_data` to ingest HTML content retrieved by your AI assistant (via web fetch, curl, browser tools, etc.):
|
|
108
125
|
|
|
@@ -119,31 +136,7 @@ HTML is automatically cleaned—you get the article content, not the boilerplate
|
|
|
119
136
|
|
|
120
137
|
> **Note:** The RAG server itself doesn't fetch web content—your AI assistant retrieves it and passes the HTML to `ingest_data`. This keeps the server fully local while letting you index any content your assistant can access. Please respect website terms of service and copyright when ingesting external content.
|
|
121
138
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
For ingesting multiple files or an entire directory, use the CLI command instead of calling `ingest_file` repeatedly:
|
|
125
|
-
|
|
126
|
-
```bash
|
|
127
|
-
npx mcp-local-rag ingest --db-path ./lancedb --base-dir ./docs ./docs/
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
Supports PDF, DOCX, TXT, and Markdown (same as MCP tools). HTML is only supported via `ingest_data`, not CLI.
|
|
131
|
-
|
|
132
|
-
Key options:
|
|
133
|
-
|
|
134
|
-
| Option | Default | Description |
|
|
135
|
-
|--------|---------|-------------|
|
|
136
|
-
| `--db-path <path>` | `./lancedb/` | LanceDB database path |
|
|
137
|
-
| `--base-dir <path>` | cwd | Base directory for documents |
|
|
138
|
-
| `--model-name <name>` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
|
|
139
|
-
| `--cache-dir <path>` | `./models/` | Model cache directory |
|
|
140
|
-
| `--max-file-size <n>` | `104857600` | Max file size in bytes |
|
|
141
|
-
|
|
142
|
-
This processes all supported files recursively and runs optimization once at the end — much faster than per-file ingestion for large batches.
|
|
143
|
-
|
|
144
|
-
> ⚠️ **CLI options must match your MCP server config.** Especially `--model-name` — using a different embedding model against an existing database produces incompatible vectors, silently degrading search quality.
|
|
145
|
-
|
|
146
|
-
### Searching Documents
|
|
139
|
+
#### Searching Documents
|
|
147
140
|
|
|
148
141
|
```
|
|
149
142
|
"What does the API documentation say about authentication?"
|
|
@@ -155,7 +148,7 @@ Search uses semantic similarity with keyword boost. This means `useEffect` finds
|
|
|
155
148
|
|
|
156
149
|
Results include text content, source file, document title, and relevance score. The document title provides context for each chunk, helping identify which document a result belongs to. Adjust result count with `limit` (1-20, default 10).
|
|
157
150
|
|
|
158
|
-
|
|
151
|
+
#### Managing Files
|
|
159
152
|
|
|
160
153
|
```
|
|
161
154
|
"List all files in BASE_DIR and their ingested status" # See what's indexed
|
|
@@ -163,6 +156,58 @@ Results include text content, source file, document title, and relevance score.
|
|
|
163
156
|
"Show RAG server status" # Check system health
|
|
164
157
|
```
|
|
165
158
|
|
|
159
|
+
### Using as CLI
|
|
160
|
+
|
|
161
|
+
All MCP tools are also available as CLI commands — no MCP server needed:
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
npx mcp-local-rag ingest ./docs/ # Bulk ingest files
|
|
165
|
+
npx mcp-local-rag query "authentication API" # Search documents
|
|
166
|
+
npx mcp-local-rag list # Show ingestion status
|
|
167
|
+
npx mcp-local-rag status # Database stats
|
|
168
|
+
npx mcp-local-rag delete ./docs/old.pdf # Remove content
|
|
169
|
+
npx mcp-local-rag delete --source "https://..." # Remove by source URL
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
`query`, `list`, `status`, and `delete` output JSON to stdout for piping (e.g., `| jq`). `ingest` outputs progress to stderr. Global options (`--db-path`, `--cache-dir`, `--model-name`) go before the subcommand. Run `npx mcp-local-rag --help` for details.
|
|
173
|
+
|
|
174
|
+
> ⚠️ The CLI does **not** read your MCP client config (`mcp.json`, `config.toml`, etc.). Configure the CLI via flags or environment variables as shown below.
|
|
175
|
+
|
|
176
|
+
#### Configuration
|
|
177
|
+
|
|
178
|
+
**CLI flags** — global options go before the subcommand, subcommand options go after:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
npx mcp-local-rag --db-path ./my-db query "auth" --base-dir ./docs
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**Environment variables** — set in your shell:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
export DB_PATH=./my-db
|
|
188
|
+
export BASE_DIR=./docs
|
|
189
|
+
npx mcp-local-rag query "auth"
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
**Sharing config between MCP and CLI** — if your MCP client inherits shell environment variables, you can set them in your shell profile (e.g., `~/.zshrc`) so both use the same values. Otherwise, set them explicitly in your MCP config as well.
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
export BASE_DIR=/path/to/your/documents
|
|
196
|
+
export DB_PATH=/path/to/lancedb
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Configuration is resolved in this order:
|
|
200
|
+
|
|
201
|
+
1. CLI flags (highest priority)
|
|
202
|
+
2. Environment variables
|
|
203
|
+
3. Defaults
|
|
204
|
+
|
|
205
|
+
For the full list of CLI flags, environment variables, and defaults, see [Configuration](#configuration).
|
|
206
|
+
|
|
207
|
+
For CLI-only setups (no MCP server), install [Agent Skills](#agent-skills) so your AI assistant can form better queries and interpret results consistently.
|
|
208
|
+
|
|
209
|
+
> ⚠️ **`--model-name` must match your MCP server config.** Using a different embedding model against an existing database produces incompatible vectors, silently degrading search quality.
|
|
210
|
+
|
|
166
211
|
## Search Tuning
|
|
167
212
|
|
|
168
213
|
Adjust these for your use case:
|
|
@@ -247,21 +292,22 @@ Before RAG operations, request in natural language:
|
|
|
247
292
|
Add to your `AGENTS.md`, `CLAUDE.md`, or other agent instruction file:
|
|
248
293
|
```
|
|
249
294
|
When using query_documents, ingest_file, or ingest_data tools,
|
|
250
|
-
apply the mcp-local-rag skill for
|
|
295
|
+
apply the mcp-local-rag skill for better query formulation and result interpretation.
|
|
251
296
|
```
|
|
252
297
|
|
|
253
|
-
|
|
254
|
-
<summary><strong>Configuration</strong></summary>
|
|
298
|
+
## Configuration
|
|
255
299
|
|
|
256
|
-
### Environment Variables
|
|
300
|
+
### Environment Variables and CLI Flags
|
|
257
301
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
|
261
|
-
|
|
262
|
-
| `
|
|
263
|
-
| `
|
|
264
|
-
| `
|
|
302
|
+
Both MCP and CLI use the same environment variables. The CLI also accepts equivalent flags.
|
|
303
|
+
|
|
304
|
+
| Environment Variable | CLI Flag | Default | Description |
|
|
305
|
+
|---------------------|----------|---------|-------------|
|
|
306
|
+
| `BASE_DIR` | `--base-dir` | Current directory | Document root directory (security boundary) |
|
|
307
|
+
| `DB_PATH` | `--db-path` | `./lancedb/` | Vector database location |
|
|
308
|
+
| `CACHE_DIR` | `--cache-dir` | `./models/` | Model cache directory |
|
|
309
|
+
| `MODEL_NAME` | `--model-name` | `Xenova/all-MiniLM-L6-v2` | HuggingFace model ID ([available models](https://huggingface.co/models?library=transformers.js&pipeline_tag=feature-extraction)) |
|
|
310
|
+
| `MAX_FILE_SIZE` | `--max-file-size` | `104857600` (100MB) | Maximum file size in bytes |
|
|
265
311
|
|
|
266
312
|
**Model choice tips:**
|
|
267
313
|
- Multilingual docs → e.g., `onnx-community/embeddinggemma-300m-ONNX` (100+ languages)
|
|
@@ -317,8 +363,6 @@ The embedding model (~90MB) downloads on first use. Takes 1-2 minutes, then work
|
|
|
317
363
|
- **Local only**: No network requests after model download
|
|
318
364
|
- **Model source**: Official HuggingFace repository ([verify here](https://huggingface.co/Xenova/all-MiniLM-L6-v2))
|
|
319
365
|
|
|
320
|
-
</details>
|
|
321
|
-
|
|
322
366
|
<details>
|
|
323
367
|
<summary><strong>Performance</strong></summary>
|
|
324
368
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { Embedder } from '../embedder/index.js';
|
|
2
|
+
import { VectorStore } from '../vectordb/index.js';
|
|
3
|
+
import type { ResolvedGlobalConfig } from './options.js';
|
|
4
|
+
/**
|
|
5
|
+
* Create an uninitialized VectorStore from resolved global config.
|
|
6
|
+
* Callers are responsible for calling initialize() before use.
|
|
7
|
+
*/
|
|
8
|
+
export declare function createVectorStore(config: ResolvedGlobalConfig): VectorStore;
|
|
9
|
+
/**
|
|
10
|
+
* Create an uninitialized Embedder from resolved global config.
|
|
11
|
+
* Callers are responsible for managing the Embedder lifecycle.
|
|
12
|
+
*/
|
|
13
|
+
export declare function createEmbedder(config: ResolvedGlobalConfig): Embedder;
|
|
14
|
+
//# sourceMappingURL=common.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAA;AAExD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,oBAAoB,GAAG,WAAW,CAK3E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,oBAAoB,GAAG,QAAQ,CAMrE"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
// Shared CLI component helpers — factory functions for VectorStore and Embedder
|
|
2
|
+
import { Embedder } from '../embedder/index.js';
|
|
3
|
+
import { VectorStore } from '../vectordb/index.js';
|
|
4
|
+
/**
|
|
5
|
+
* Create an uninitialized VectorStore from resolved global config.
|
|
6
|
+
* Callers are responsible for calling initialize() before use.
|
|
7
|
+
*/
|
|
8
|
+
export function createVectorStore(config) {
|
|
9
|
+
return new VectorStore({
|
|
10
|
+
dbPath: config.dbPath,
|
|
11
|
+
tableName: 'chunks',
|
|
12
|
+
});
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Create an uninitialized Embedder from resolved global config.
|
|
16
|
+
* Callers are responsible for managing the Embedder lifecycle.
|
|
17
|
+
*/
|
|
18
|
+
export function createEmbedder(config) {
|
|
19
|
+
return new Embedder({
|
|
20
|
+
modelPath: config.modelName,
|
|
21
|
+
batchSize: 16,
|
|
22
|
+
cacheDir: config.cacheDir,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=common.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"common.js","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAAA,gFAAgF;AAEhF,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAGlD;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAA4B;IAC5D,OAAO,IAAI,WAAW,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAA;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAA4B;IACzD,OAAO,IAAI,QAAQ,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;KAC1B,CAAC,CAAA;AACJ,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { GlobalOptions } from './options.js';
|
|
2
|
+
/**
|
|
3
|
+
* Run the delete CLI subcommand.
|
|
4
|
+
* @param args - Arguments after "delete"
|
|
5
|
+
* @param globalOptions - Global options parsed before the subcommand
|
|
6
|
+
*/
|
|
7
|
+
export declare function runDelete(args: string[], globalOptions?: GlobalOptions): Promise<void>;
|
|
8
|
+
//# sourceMappingURL=delete.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delete.d.ts","sourceRoot":"","sources":["../../src/cli/delete.ts"],"names":[],"mappings":"AAUA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAkFjD;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,aAAa,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAiGhG"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
// CLI delete subcommand — delete ingested content by file path or source URL
|
|
2
|
+
import { unlink } from 'node:fs/promises';
|
|
3
|
+
import { resolve } from 'node:path';
|
|
4
|
+
import { generateMetaJsonPath, generateRawDataPath, isRawDataPath, } from '../utils/raw-data-utils.js';
|
|
5
|
+
import { createVectorStore } from './common.js';
|
|
6
|
+
import { resolveGlobalConfig, validatePath } from './options.js';
|
|
7
|
+
// ============================================
|
|
8
|
+
// Help
|
|
9
|
+
// ============================================
|
|
10
|
+
const HELP_TEXT = `Usage: mcp-local-rag [global-options] delete [--source <url>] [<file-path>]
|
|
11
|
+
|
|
12
|
+
Delete ingested content by file path or source URL.
|
|
13
|
+
|
|
14
|
+
Either <file-path> or --source is required (not both).
|
|
15
|
+
|
|
16
|
+
Arguments:
|
|
17
|
+
<file-path> File path of ingested content to delete
|
|
18
|
+
|
|
19
|
+
Options:
|
|
20
|
+
--source <url> Delete by source URL (for content ingested via ingest_data)
|
|
21
|
+
-h, --help Show this help
|
|
22
|
+
|
|
23
|
+
Global options (must appear before "delete"):
|
|
24
|
+
--db-path <path> LanceDB database path
|
|
25
|
+
--cache-dir <path> Model cache directory
|
|
26
|
+
--model-name <name> Embedding model`;
|
|
27
|
+
/**
|
|
28
|
+
* Parse delete-specific CLI arguments.
|
|
29
|
+
* Accepts a positional <file-path>, --source <url>, and -h/--help.
|
|
30
|
+
* Unknown flags or conflicting args cause exit(1).
|
|
31
|
+
*/
|
|
32
|
+
function parseArgs(args) {
|
|
33
|
+
let help = false;
|
|
34
|
+
let source;
|
|
35
|
+
let filePath;
|
|
36
|
+
let i = 0;
|
|
37
|
+
while (i < args.length) {
|
|
38
|
+
const arg = args[i];
|
|
39
|
+
if (arg === '-h' || arg === '--help') {
|
|
40
|
+
help = true;
|
|
41
|
+
i++;
|
|
42
|
+
}
|
|
43
|
+
else if (arg === '--source') {
|
|
44
|
+
const value = args[++i];
|
|
45
|
+
if (value === undefined || value.startsWith('-')) {
|
|
46
|
+
console.error('Missing value for --source');
|
|
47
|
+
console.error(HELP_TEXT);
|
|
48
|
+
process.exit(1);
|
|
49
|
+
}
|
|
50
|
+
source = value;
|
|
51
|
+
i++;
|
|
52
|
+
}
|
|
53
|
+
else if (arg.startsWith('-')) {
|
|
54
|
+
console.error(`Unknown option: ${arg}`);
|
|
55
|
+
console.error(HELP_TEXT);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// Positional argument: file-path
|
|
60
|
+
filePath = arg;
|
|
61
|
+
i++;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const result = { help };
|
|
65
|
+
if (source !== undefined)
|
|
66
|
+
result.source = source;
|
|
67
|
+
if (filePath !== undefined)
|
|
68
|
+
result.filePath = filePath;
|
|
69
|
+
return result;
|
|
70
|
+
}
|
|
71
|
+
// ============================================
|
|
72
|
+
// Main Entry Point
|
|
73
|
+
// ============================================
|
|
74
|
+
/**
|
|
75
|
+
* Run the delete CLI subcommand.
|
|
76
|
+
* @param args - Arguments after "delete"
|
|
77
|
+
* @param globalOptions - Global options parsed before the subcommand
|
|
78
|
+
*/
|
|
79
|
+
export async function runDelete(args, globalOptions = {}) {
|
|
80
|
+
// Parse CLI options
|
|
81
|
+
const parsed = parseArgs(args);
|
|
82
|
+
// Handle --help
|
|
83
|
+
if (parsed.help) {
|
|
84
|
+
console.error(HELP_TEXT);
|
|
85
|
+
process.exit(0);
|
|
86
|
+
}
|
|
87
|
+
// Validate: either file-path or --source required, not both
|
|
88
|
+
if (!parsed.filePath && !parsed.source) {
|
|
89
|
+
console.error('Either <file-path> or --source is required');
|
|
90
|
+
console.error(HELP_TEXT);
|
|
91
|
+
process.exit(1);
|
|
92
|
+
}
|
|
93
|
+
if (parsed.filePath && parsed.source) {
|
|
94
|
+
console.error('Cannot specify both <file-path> and --source');
|
|
95
|
+
console.error(HELP_TEXT);
|
|
96
|
+
process.exit(1);
|
|
97
|
+
}
|
|
98
|
+
// Resolve global config
|
|
99
|
+
const globalConfig = resolveGlobalConfig(globalOptions);
|
|
100
|
+
try {
|
|
101
|
+
// Create and initialize VectorStore (no Embedder needed for delete)
|
|
102
|
+
const vectorStore = createVectorStore(globalConfig);
|
|
103
|
+
await vectorStore.initialize();
|
|
104
|
+
// Determine target file path
|
|
105
|
+
let targetPath;
|
|
106
|
+
if (parsed.source) {
|
|
107
|
+
// Generate raw-data path from source URL
|
|
108
|
+
targetPath = generateRawDataPath(globalConfig.dbPath, parsed.source, 'markdown');
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
// Use provided file path, resolve to absolute
|
|
112
|
+
targetPath = resolve(parsed.filePath);
|
|
113
|
+
// Validate path (reject sensitive system directories)
|
|
114
|
+
const pathError = validatePath(targetPath, '<file-path>');
|
|
115
|
+
if (pathError) {
|
|
116
|
+
console.error(pathError);
|
|
117
|
+
process.exitCode = 1;
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// Delete chunks from VectorStore
|
|
122
|
+
await vectorStore.deleteChunks(targetPath);
|
|
123
|
+
// Clean up physical raw-data files if applicable
|
|
124
|
+
if (isRawDataPath(targetPath)) {
|
|
125
|
+
try {
|
|
126
|
+
await unlink(targetPath);
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
// Ignore ENOENT (file already deleted / never existed)
|
|
130
|
+
if (!(error instanceof Error) ||
|
|
131
|
+
!('code' in error) ||
|
|
132
|
+
error.code !== 'ENOENT') {
|
|
133
|
+
throw error;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
try {
|
|
137
|
+
await unlink(generateMetaJsonPath(targetPath));
|
|
138
|
+
}
|
|
139
|
+
catch (error) {
|
|
140
|
+
// Ignore ENOENT
|
|
141
|
+
if (!(error instanceof Error) ||
|
|
142
|
+
!('code' in error) ||
|
|
143
|
+
error.code !== 'ENOENT') {
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Optimize VectorStore after deletion
|
|
149
|
+
await vectorStore.optimize();
|
|
150
|
+
// Output result JSON to stdout
|
|
151
|
+
const result = {
|
|
152
|
+
filePath: targetPath,
|
|
153
|
+
deleted: true,
|
|
154
|
+
timestamp: new Date().toISOString(),
|
|
155
|
+
};
|
|
156
|
+
process.stdout.write(JSON.stringify(result));
|
|
157
|
+
}
|
|
158
|
+
catch (error) {
|
|
159
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
160
|
+
console.error(`Error: ${reason}`);
|
|
161
|
+
process.exit(1);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=delete.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"delete.js","sourceRoot":"","sources":["../../src/cli/delete.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAE7E,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAA;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,GACd,MAAM,4BAA4B,CAAA;AACnC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAEhE,+CAA+C;AAC/C,OAAO;AACP,+CAA+C;AAE/C,MAAM,SAAS,GAAG;;;;;;;;;;;;;;;;yCAgBuB,CAAA;AAYzC;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAc;IAC/B,IAAI,IAAI,GAAG,KAAK,CAAA;IAChB,IAAI,MAA0B,CAAA;IAC9B,IAAI,QAA4B,CAAA;IAEhC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QAEpB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YACrC,IAAI,GAAG,IAAI,CAAA;YACX,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;YACvB,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACjD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;gBAC3C,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAM,GAAG,KAAK,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;YACvC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,QAAQ,GAAG,GAAG,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAe,EAAE,IAAI,EAAE,CAAA;IACnC,IAAI,MAAM,KAAK,SAAS;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAA;IAChD,IAAI,QAAQ,KAAK,SAAS;QAAE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAA;IACtD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc,EAAE,gBAA+B,EAAE;IAC/E,oBAAoB;IACpB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAE9B,gBAAgB;IAChB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,4DAA4D;IAC5D,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACvC,OAAO,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAA;QAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAA;QAC7D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,wBAAwB;IACxB,MAAM,YAAY,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAA;IAEvD,IAAI,CAAC;QACH,oEAAoE;QACpE,MAAM,WAAW,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAA;QACnD,MAAM,WAAW,CAAC,UAAU,EAAE,CAAA;QAE9B,6BAA6B;QAC7B,IAAI,UAAkB,CAAA;QAEtB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,yCAAyC;YACzC,UAAU,GAAG,mBAAmB,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;QAClF,CAAC;aAAM,CAAC;YACN,8CAA8C;YAC9C,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,QAAS,CAAC,CAAA;YAEtC,sDAAsD;YACtD,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;YACzD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;gBACpB,OAAM;YACR,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,MAAM,WAAW,CAAC,YAAY,CAAC,UAAU,CAAC,CAAA;QAE1C,iDAAiD;QACjD,IAAI,aAAa,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,CAAC,CAAA;YAC1B,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,uDAAuD;gBACvD,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,oBAAoB,CAAC,UAAU,CAAC,CAAC,CAAA;YAChD,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,gBAAgB;gBAChB,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,CAAC,QAAQ,EAAE,CAAA;QAE5B,+BAA+B;QAC/B,MAAM,MAAM,GAAG;YACb,QAAQ,EAAE,UAAU;YACpB,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAA;QACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAA;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACrE,OAAO,CAAC,KAAK,CAAC,UAAU,MAAM,EAAE,CAAC,CAAA;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC"}
|
package/dist/cli/ingest.d.ts
CHANGED
|
@@ -1,24 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import type { GlobalOptions, ResolvedGlobalConfig } from './options.js';
|
|
2
|
+
interface IngestConfig {
|
|
3
|
+
baseDir: string;
|
|
4
|
+
dbPath: string;
|
|
5
|
+
cacheDir: string;
|
|
6
|
+
modelName: string;
|
|
7
|
+
maxFileSize: number;
|
|
8
|
+
}
|
|
9
|
+
interface IngestCliOptions {
|
|
3
10
|
baseDir?: string | undefined;
|
|
4
|
-
cacheDir?: string | undefined;
|
|
5
|
-
modelName?: string | undefined;
|
|
6
11
|
maxFileSize?: number | undefined;
|
|
7
12
|
}
|
|
8
13
|
interface ParsedArgs {
|
|
9
14
|
positional: string | undefined;
|
|
10
|
-
options:
|
|
15
|
+
options: IngestCliOptions;
|
|
11
16
|
help: boolean;
|
|
12
17
|
}
|
|
13
18
|
/**
|
|
14
|
-
* Parse CLI arguments into options and a positional path.
|
|
15
|
-
* Flags: --
|
|
19
|
+
* Parse ingest-specific CLI arguments into options and a positional path.
|
|
20
|
+
* Flags: --base-dir, --max-file-size, -h/--help
|
|
21
|
+
* Unknown flags (including global flags passed after subcommand) cause an error.
|
|
16
22
|
*/
|
|
17
23
|
export declare function parseArgs(args: string[]): ParsedArgs;
|
|
24
|
+
/**
|
|
25
|
+
* Resolve ingest config by merging global config with ingest-specific options.
|
|
26
|
+
* Ingest-specific: baseDir, maxFileSize (CLI flags > env vars > defaults).
|
|
27
|
+
* Validates all resolved values before returning.
|
|
28
|
+
*/
|
|
29
|
+
export declare function resolveConfig(globalConfig: ResolvedGlobalConfig, ingestOptions?: IngestCliOptions): IngestConfig;
|
|
18
30
|
/**
|
|
19
31
|
* Run the ingest CLI subcommand.
|
|
20
32
|
* @param args - Arguments after "ingest" (e.g., option flags and file/directory path)
|
|
33
|
+
* @param globalOptions - Global options parsed before the subcommand
|
|
21
34
|
*/
|
|
22
|
-
export declare function runIngest(args: string[]): Promise<void>;
|
|
35
|
+
export declare function runIngest(args: string[], globalOptions?: GlobalOptions): Promise<void>;
|
|
23
36
|
export {};
|
|
24
37
|
//# sourceMappingURL=ingest.d.ts.map
|
package/dist/cli/ingest.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAA;AAavE,UAAU,YAAY;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;CACpB;AAQD,UAAU,gBAAgB;IACxB,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC5B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;CACjC;AAED,UAAU,UAAU;IAClB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,OAAO,EAAE,gBAAgB,CAAA;IACzB,IAAI,EAAE,OAAO,CAAA;CACd;AAgCD;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CAwDpD;AAMD;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,YAAY,EAAE,oBAAoB,EAClC,aAAa,GAAE,gBAAqB,GACnC,YAAY,CA6Bd;AAuJD;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,aAAa,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CA0FhG"}
|