mcp-local-rag 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/dist/cli/ingest.d.ts +24 -0
- package/dist/cli/ingest.d.ts.map +1 -0
- package/dist/cli/ingest.js +295 -0
- package/dist/cli/ingest.js.map +1 -0
- package/dist/cli-main.d.ts +1 -1
- package/dist/cli-main.d.ts.map +1 -1
- package/dist/cli-main.js +6 -2
- package/dist/cli-main.js.map +1 -1
- package/dist/embedder/index.d.ts.map +1 -1
- package/dist/embedder/index.js +3 -1
- package/dist/embedder/index.js.map +1 -1
- package/dist/index.js +5 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/skills/mcp-local-rag/SKILL.md +27 -1
- package/skills/mcp-local-rag/references/cli-ingest.md +45 -0
package/README.md
CHANGED
|
@@ -119,6 +119,30 @@ HTML is automatically cleaned—you get the article content, not the boilerplate
|
|
|
119
119
|
|
|
120
120
|
> **Note:** The RAG server itself doesn't fetch web content—your AI assistant retrieves it and passes the HTML to `ingest_data`. This keeps the server fully local while letting you index any content your assistant can access. Please respect website terms of service and copyright when ingesting external content.
|
|
121
121
|
|
|
122
|
+
### Bulk Ingestion (CLI)
|
|
123
|
+
|
|
124
|
+
For ingesting multiple files or an entire directory, use the CLI command instead of calling `ingest_file` repeatedly:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
npx mcp-local-rag ingest --db-path ./lancedb --base-dir ./docs ./docs/
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Supports PDF, DOCX, TXT, and Markdown (same as MCP tools). HTML is only supported via `ingest_data`, not CLI.
|
|
131
|
+
|
|
132
|
+
Key options:
|
|
133
|
+
|
|
134
|
+
| Option | Default | Description |
|
|
135
|
+
|--------|---------|-------------|
|
|
136
|
+
| `--db-path <path>` | `./lancedb/` | LanceDB database path |
|
|
137
|
+
| `--base-dir <path>` | cwd | Base directory for documents |
|
|
138
|
+
| `--model-name <name>` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
|
|
139
|
+
| `--cache-dir <path>` | `./models/` | Model cache directory |
|
|
140
|
+
| `--max-file-size <n>` | `104857600` | Max file size in bytes |
|
|
141
|
+
|
|
142
|
+
This processes all supported files recursively and runs optimization once at the end — much faster than per-file ingestion for large batches.
|
|
143
|
+
|
|
144
|
+
> ⚠️ **CLI options must match your MCP server config.** Especially `--model-name` — using a different embedding model against an existing database produces incompatible vectors, silently degrading search quality.
|
|
145
|
+
|
|
122
146
|
### Searching Documents
|
|
123
147
|
|
|
124
148
|
```
|
|
@@ -407,6 +431,7 @@ pnpm run check:all # Full quality check
|
|
|
407
431
|
src/
|
|
408
432
|
index.ts # Entry point
|
|
409
433
|
server/ # MCP tool handlers
|
|
434
|
+
cli/ # CLI subcommands (ingest)
|
|
410
435
|
parser/ # PDF, DOCX, TXT, MD parsing
|
|
411
436
|
chunker/ # Text splitting
|
|
412
437
|
embedder/ # Transformers.js embeddings
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
interface CliOptions {
|
|
2
|
+
dbPath?: string | undefined;
|
|
3
|
+
baseDir?: string | undefined;
|
|
4
|
+
cacheDir?: string | undefined;
|
|
5
|
+
modelName?: string | undefined;
|
|
6
|
+
maxFileSize?: number | undefined;
|
|
7
|
+
}
|
|
8
|
+
interface ParsedArgs {
|
|
9
|
+
positional: string | undefined;
|
|
10
|
+
options: CliOptions;
|
|
11
|
+
help: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Parse CLI arguments into options and a positional path.
|
|
15
|
+
* Flags: --db-path, --base-dir, --cache-dir, --model-name, --max-file-size, -h/--help
|
|
16
|
+
*/
|
|
17
|
+
export declare function parseArgs(args: string[]): ParsedArgs;
|
|
18
|
+
/**
|
|
19
|
+
* Run the ingest CLI subcommand.
|
|
20
|
+
* @param args - Arguments after "ingest" (e.g., option flags and file/directory path)
|
|
21
|
+
*/
|
|
22
|
+
export declare function runIngest(args: string[]): Promise<void>;
|
|
23
|
+
export {};
|
|
24
|
+
//# sourceMappingURL=ingest.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AA8BA,UAAU,UAAU;IAClB,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC3B,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC5B,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC7B,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;CACjC;AAED,UAAU,UAAU;IAClB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,OAAO,EAAE,UAAU,CAAA;IACnB,IAAI,EAAE,OAAO,CAAA;CACd;AAiCD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CAiDpD;AAmJD;;;GAGG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAgG7D"}
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
// CLI ingest subcommand — bulk file ingestion with single optimize() at end
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { readdir, stat } from 'node:fs/promises';
|
|
4
|
+
import { extname, join, resolve } from 'node:path';
|
|
5
|
+
import { SemanticChunker } from '../chunker/index.js';
|
|
6
|
+
import { Embedder } from '../embedder/index.js';
|
|
7
|
+
import { DocumentParser, SUPPORTED_EXTENSIONS } from '../parser/index.js';
|
|
8
|
+
import { VectorStore } from '../vectordb/index.js';
|
|
9
|
+
// ============================================
|
|
10
|
+
// Defaults
|
|
11
|
+
// ============================================
|
|
12
|
+
const DEFAULTS = {
|
|
13
|
+
dbPath: './lancedb/',
|
|
14
|
+
cacheDir: './models/',
|
|
15
|
+
modelName: 'Xenova/all-MiniLM-L6-v2',
|
|
16
|
+
maxFileSize: 104857600,
|
|
17
|
+
};
|
|
18
|
+
// ============================================
|
|
19
|
+
// Help
|
|
20
|
+
// ============================================
|
|
21
|
+
const HELP_TEXT = `Usage: mcp-local-rag ingest [options] <path>
|
|
22
|
+
|
|
23
|
+
Ingest a single file or all supported files under a directory.
|
|
24
|
+
|
|
25
|
+
Options:
|
|
26
|
+
--db-path <path> LanceDB database path (default: ${DEFAULTS.dbPath})
|
|
27
|
+
--base-dir <path> Base directory for documents (default: cwd)
|
|
28
|
+
--cache-dir <path> Model cache directory (default: ${DEFAULTS.cacheDir})
|
|
29
|
+
--model-name <name> Embedding model (default: ${DEFAULTS.modelName})
|
|
30
|
+
--max-file-size <n> Max file size in bytes (default: ${DEFAULTS.maxFileSize})
|
|
31
|
+
-h, --help Show this help`;
|
|
32
|
+
// ============================================
|
|
33
|
+
// Arg Parsing
|
|
34
|
+
// ============================================
|
|
35
|
+
/**
|
|
36
|
+
* Parse CLI arguments into options and a positional path.
|
|
37
|
+
* Flags: --db-path, --base-dir, --cache-dir, --model-name, --max-file-size, -h/--help
|
|
38
|
+
*/
|
|
39
|
+
export function parseArgs(args) {
|
|
40
|
+
const options = {};
|
|
41
|
+
let positional;
|
|
42
|
+
let help = false;
|
|
43
|
+
let i = 0;
|
|
44
|
+
while (i < args.length) {
|
|
45
|
+
const arg = args[i];
|
|
46
|
+
switch (arg) {
|
|
47
|
+
case '-h':
|
|
48
|
+
case '--help':
|
|
49
|
+
help = true;
|
|
50
|
+
i++;
|
|
51
|
+
break;
|
|
52
|
+
case '--db-path':
|
|
53
|
+
options.dbPath = args[++i];
|
|
54
|
+
i++;
|
|
55
|
+
break;
|
|
56
|
+
case '--base-dir':
|
|
57
|
+
options.baseDir = args[++i];
|
|
58
|
+
i++;
|
|
59
|
+
break;
|
|
60
|
+
case '--cache-dir':
|
|
61
|
+
options.cacheDir = args[++i];
|
|
62
|
+
i++;
|
|
63
|
+
break;
|
|
64
|
+
case '--model-name':
|
|
65
|
+
options.modelName = args[++i];
|
|
66
|
+
i++;
|
|
67
|
+
break;
|
|
68
|
+
case '--max-file-size': {
|
|
69
|
+
const parsed = Number.parseInt(args[++i] ?? '', 10);
|
|
70
|
+
options.maxFileSize = Number.isNaN(parsed) ? undefined : parsed;
|
|
71
|
+
i++;
|
|
72
|
+
break;
|
|
73
|
+
}
|
|
74
|
+
default:
|
|
75
|
+
if (arg.startsWith('-')) {
|
|
76
|
+
console.error(`Unknown option: ${arg}`);
|
|
77
|
+
console.error(HELP_TEXT);
|
|
78
|
+
process.exit(1);
|
|
79
|
+
}
|
|
80
|
+
positional = arg;
|
|
81
|
+
i++;
|
|
82
|
+
break;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return { positional, options, help };
|
|
86
|
+
}
|
|
87
|
+
// ============================================
|
|
88
|
+
// NaN Defense
|
|
89
|
+
// ============================================
|
|
90
|
+
/**
|
|
91
|
+
* Ensure maxFileSize is a valid number, falling back to default if NaN.
|
|
92
|
+
*/
|
|
93
|
+
function sanitizeMaxFileSize(value) {
|
|
94
|
+
return Number.isNaN(value) ? DEFAULTS.maxFileSize : value;
|
|
95
|
+
}
|
|
96
|
+
// ============================================
|
|
97
|
+
// Config Resolution
|
|
98
|
+
// ============================================
|
|
99
|
+
/**
|
|
100
|
+
* Resolve config with priority: CLI flags > environment variables > defaults.
|
|
101
|
+
*/
|
|
102
|
+
function resolveConfig(cliOptions = {}) {
|
|
103
|
+
return {
|
|
104
|
+
baseDir: cliOptions.baseDir ?? process.env['BASE_DIR'] ?? process.cwd(),
|
|
105
|
+
dbPath: cliOptions.dbPath ?? process.env['DB_PATH'] ?? DEFAULTS.dbPath,
|
|
106
|
+
cacheDir: cliOptions.cacheDir ?? process.env['CACHE_DIR'] ?? DEFAULTS.cacheDir,
|
|
107
|
+
modelName: cliOptions.modelName ?? process.env['MODEL_NAME'] ?? DEFAULTS.modelName,
|
|
108
|
+
maxFileSize: sanitizeMaxFileSize(cliOptions.maxFileSize ??
|
|
109
|
+
(process.env['MAX_FILE_SIZE']
|
|
110
|
+
? Number.parseInt(process.env['MAX_FILE_SIZE'], 10)
|
|
111
|
+
: DEFAULTS.maxFileSize)),
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
// ============================================
|
|
115
|
+
// File Collection
|
|
116
|
+
// ============================================
|
|
117
|
+
/**
|
|
118
|
+
* Collect files to ingest from a path.
|
|
119
|
+
* - If path is a file with supported extension, return [path].
|
|
120
|
+
* - If path is a directory, recursively scan for supported files.
|
|
121
|
+
* - Exclude dbPath and cacheDir directories.
|
|
122
|
+
*/
|
|
123
|
+
async function collectFiles(targetPath, excludePaths) {
|
|
124
|
+
const resolved = resolve(targetPath);
|
|
125
|
+
const info = await stat(resolved);
|
|
126
|
+
if (info.isFile()) {
|
|
127
|
+
const ext = extname(resolved).toLowerCase();
|
|
128
|
+
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
129
|
+
console.error(`Unsupported file extension: ${ext} (supported: ${[...SUPPORTED_EXTENSIONS].join(', ')})`);
|
|
130
|
+
return [];
|
|
131
|
+
}
|
|
132
|
+
return [resolved];
|
|
133
|
+
}
|
|
134
|
+
if (info.isDirectory()) {
|
|
135
|
+
const entries = await readdir(resolved, { recursive: true, withFileTypes: true });
|
|
136
|
+
return entries
|
|
137
|
+
.filter((e) => e.isFile() && SUPPORTED_EXTENSIONS.has(extname(e.name).toLowerCase()))
|
|
138
|
+
.map((e) => join(e.parentPath, e.name))
|
|
139
|
+
.filter((filePath) => !excludePaths.some((ep) => filePath.startsWith(ep)))
|
|
140
|
+
.sort();
|
|
141
|
+
}
|
|
142
|
+
return [];
|
|
143
|
+
}
|
|
144
|
+
// ============================================
|
|
145
|
+
// Per-file Ingestion
|
|
146
|
+
// ============================================
|
|
147
|
+
/**
|
|
148
|
+
* Ingest a single file: parse, chunk, embed, delete old chunks, insert new chunks.
|
|
149
|
+
* Returns the number of chunks inserted.
|
|
150
|
+
*/
|
|
151
|
+
async function ingestSingleFile(filePath, parser, chunker, embedder, vectorStore) {
|
|
152
|
+
// Parse file
|
|
153
|
+
const isPdf = filePath.toLowerCase().endsWith('.pdf');
|
|
154
|
+
let text;
|
|
155
|
+
let title = null;
|
|
156
|
+
if (isPdf) {
|
|
157
|
+
const result = await parser.parsePdf(filePath, embedder);
|
|
158
|
+
text = result.content;
|
|
159
|
+
title = result.title || null;
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
const result = await parser.parseFile(filePath);
|
|
163
|
+
text = result.content;
|
|
164
|
+
title = result.title || null;
|
|
165
|
+
}
|
|
166
|
+
// Chunk text
|
|
167
|
+
const chunks = await chunker.chunkText(text, embedder);
|
|
168
|
+
if (chunks.length === 0) {
|
|
169
|
+
console.error(` Warning: 0 chunks generated (file may be empty or too short)`);
|
|
170
|
+
return 0;
|
|
171
|
+
}
|
|
172
|
+
// Generate embeddings
|
|
173
|
+
const embeddings = await embedder.embedBatch(chunks.map((c) => c.text));
|
|
174
|
+
// Delete existing chunks for this file
|
|
175
|
+
await vectorStore.deleteChunks(filePath);
|
|
176
|
+
// Build vector chunks
|
|
177
|
+
const timestamp = new Date().toISOString();
|
|
178
|
+
const vectorChunks = chunks.map((chunk, index) => {
|
|
179
|
+
const embedding = embeddings[index];
|
|
180
|
+
if (!embedding) {
|
|
181
|
+
throw new Error(`Missing embedding for chunk ${index}`);
|
|
182
|
+
}
|
|
183
|
+
return {
|
|
184
|
+
id: randomUUID(),
|
|
185
|
+
filePath,
|
|
186
|
+
chunkIndex: chunk.index,
|
|
187
|
+
text: chunk.text,
|
|
188
|
+
vector: embedding,
|
|
189
|
+
metadata: {
|
|
190
|
+
fileName: filePath.split('/').pop() || filePath,
|
|
191
|
+
fileSize: text.length,
|
|
192
|
+
fileType: filePath.split('.').pop() || '',
|
|
193
|
+
},
|
|
194
|
+
fileTitle: title,
|
|
195
|
+
timestamp,
|
|
196
|
+
};
|
|
197
|
+
});
|
|
198
|
+
// Insert chunks
|
|
199
|
+
await vectorStore.insertChunks(vectorChunks);
|
|
200
|
+
return vectorChunks.length;
|
|
201
|
+
}
|
|
202
|
+
// ============================================
|
|
203
|
+
// Main Entry Point
|
|
204
|
+
// ============================================
|
|
205
|
+
/**
|
|
206
|
+
* Run the ingest CLI subcommand.
|
|
207
|
+
* @param args - Arguments after "ingest" (e.g., option flags and file/directory path)
|
|
208
|
+
*/
|
|
209
|
+
export async function runIngest(args) {
|
|
210
|
+
// Parse CLI options
|
|
211
|
+
const { positional, options, help } = parseArgs(args);
|
|
212
|
+
// Handle --help
|
|
213
|
+
if (help) {
|
|
214
|
+
console.error(HELP_TEXT);
|
|
215
|
+
process.exit(0);
|
|
216
|
+
}
|
|
217
|
+
// Validate positional argument
|
|
218
|
+
if (!positional) {
|
|
219
|
+
console.error('Usage: mcp-local-rag ingest [options] <path>');
|
|
220
|
+
console.error(' Ingest a single file or all supported files under a directory.');
|
|
221
|
+
console.error(' Run with --help for all options.');
|
|
222
|
+
process.exit(1);
|
|
223
|
+
}
|
|
224
|
+
const targetPath = positional;
|
|
225
|
+
// Validate path exists
|
|
226
|
+
try {
|
|
227
|
+
await stat(targetPath);
|
|
228
|
+
}
|
|
229
|
+
catch {
|
|
230
|
+
console.error(`Error: path does not exist: ${targetPath}`);
|
|
231
|
+
process.exit(1);
|
|
232
|
+
}
|
|
233
|
+
// Resolve config: CLI flags > env vars > defaults
|
|
234
|
+
const config = resolveConfig(options);
|
|
235
|
+
const excludePaths = [`${resolve(config.dbPath)}/`, `${resolve(config.cacheDir)}/`];
|
|
236
|
+
// Collect files
|
|
237
|
+
const files = await collectFiles(targetPath, excludePaths);
|
|
238
|
+
if (files.length === 0) {
|
|
239
|
+
console.error('No supported files found.');
|
|
240
|
+
process.exit(1);
|
|
241
|
+
}
|
|
242
|
+
console.error(`Found ${files.length} file(s) to ingest.`);
|
|
243
|
+
// Initialize components (single instances reused across all files)
|
|
244
|
+
const parser = new DocumentParser({
|
|
245
|
+
baseDir: config.baseDir,
|
|
246
|
+
maxFileSize: config.maxFileSize,
|
|
247
|
+
});
|
|
248
|
+
const chunker = new SemanticChunker();
|
|
249
|
+
const embedder = new Embedder({
|
|
250
|
+
modelPath: config.modelName,
|
|
251
|
+
batchSize: 16,
|
|
252
|
+
cacheDir: config.cacheDir,
|
|
253
|
+
});
|
|
254
|
+
const vectorStore = new VectorStore({
|
|
255
|
+
dbPath: config.dbPath,
|
|
256
|
+
tableName: 'chunks',
|
|
257
|
+
});
|
|
258
|
+
await vectorStore.initialize();
|
|
259
|
+
// Process each file
|
|
260
|
+
const summary = { succeeded: 0, failed: 0, totalChunks: 0 };
|
|
261
|
+
for (let i = 0; i < files.length; i++) {
|
|
262
|
+
const filePath = files[i];
|
|
263
|
+
const label = `[${i + 1}/${files.length}]`;
|
|
264
|
+
try {
|
|
265
|
+
const chunkCount = await ingestSingleFile(filePath, parser, chunker, embedder, vectorStore);
|
|
266
|
+
if (chunkCount === 0) {
|
|
267
|
+
// 0 chunks is a skip/warning, not a failure
|
|
268
|
+
console.error(`${label} ${filePath} ... SKIPPED (0 chunks)`);
|
|
269
|
+
summary.succeeded++;
|
|
270
|
+
}
|
|
271
|
+
else {
|
|
272
|
+
console.error(`${label} ${filePath} ... OK (${chunkCount} chunks)`);
|
|
273
|
+
summary.succeeded++;
|
|
274
|
+
summary.totalChunks += chunkCount;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
const reason = error instanceof Error ? error.message : String(error);
|
|
279
|
+
console.error(`${label} ${filePath} ... FAILED: ${reason}`);
|
|
280
|
+
summary.failed++;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
// Optimize once at end (not per-file)
|
|
284
|
+
await vectorStore.optimize();
|
|
285
|
+
// Print summary
|
|
286
|
+
console.error('');
|
|
287
|
+
console.error('--- Ingest Summary ---');
|
|
288
|
+
console.error(`Succeeded: ${summary.succeeded}`);
|
|
289
|
+
console.error(`Failed: ${summary.failed}`);
|
|
290
|
+
console.error(`Total chunks: ${summary.totalChunks}`);
|
|
291
|
+
if (summary.failed > 0) {
|
|
292
|
+
process.exitCode = 1;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
//# sourceMappingURL=ingest.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAE5E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AAEzE,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAkClD,+CAA+C;AAC/C,WAAW;AACX,+CAA+C;AAE/C,MAAM,QAAQ,GAAG;IACf,MAAM,EAAE,YAAY;IACpB,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,yBAAyB;IACpC,WAAW,EAAE,SAAS;CACd,CAAA;AAEV,+CAA+C;AAC/C,OAAO;AACP,+CAA+C;AAE/C,MAAM,SAAS,GAAG;;;;;2DAKyC,QAAQ,CAAC,MAAM;;2DAEf,QAAQ,CAAC,QAAQ;qDACvB,QAAQ,CAAC,SAAS;4DACX,QAAQ,CAAC,WAAW;wCACxC,CAAA;AAExC,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,OAAO,GAAe,EAAE,CAAA;IAC9B,IAAI,UAA8B,CAAA;IAClC,IAAI,IAAI,GAAG,KAAK,CAAA;IAEhB,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QACpB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ;gBACX,IAAI,GAAG,IAAI,CAAA;gBACX,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,WAAW;gBACd,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC1B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,YAAY;gBACf,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC3B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,aAAa;gBAChB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC5B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,cAAc;gBACjB,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC7B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,iBAAiB,CAAC,CAAC,CAAC;gBACvB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;gBACnD,OAAO,CAAC,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAA;gBAC/D,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,CAAC;YACD;gBACE,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;oBACvC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;oBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;gBACD,UAAU,GAAG,GAAG,CAAA;gBAChB,CAAC,EAAE,CAAA;gBACH,MAAK;QACT,CAAC;IACH,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,CAAA;AACtC,CAAC;AAED,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;GAEG;AACH,SAAS,mBAAmB,CAAC,KAAa;IACxC,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAA;AAC3D,CAAC;AAED,+CAA+C;AAC/C,oBAAoB;AACpB,+CAA+C;AAE/C;;GAEG;AACH,SAAS,aAAa,CAAC,aAAyB,EAAE;IAChD,OAAO;QACL,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE;QACvE,MAAM,EAAE,UAAU,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,QAAQ,CAAC,MAAM;QACtE,QAAQ,EAAE,UAAU,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,QAAQ,CAAC,QAAQ;QAC9E,SAAS,EAAE,UAAU,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,QAAQ,CAAC,SAAS;QAClF,WAAW,EAAE,mBAAmB,CAC9B,UAAU,CAAC,WAAW;YACpB,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;gBAC3B,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,EAAE,EAAE,CAAC;gBACnD,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAC5B;KACF,CAAA;AACH,CAAC;AAED,+CAA+C;AAC/C,kBAAkB;AAClB,+CAA+C;AAE/C;;;;;GAKG;AACH,KAAK,UAAU,YAAY,CAAC,UAAkB,EAAE,YAAsB;IACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,CAAC,CAAA;IACpC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAA;IAEjC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;QAC3C,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CACX,+BAA+B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC1F,CAAA;YACD,OAAO,EAAE,CAAA;QACX,CAAC;QACD,OAAO,CAAC,QAAQ,CAAC,CAAA;IACnB,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;QACjF,OAAO,OAAO;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;aACpF,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;aACtC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;aACzE,IAAI,EAAE,CAAA;IACX,CAAC;IAED,OAAO,EAAE,CAAA;AACX,CAAC;AAED,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C;;;GAGG;AACH,KAAK,UAAU,gBAAgB,CAC7B,QAAgB,EAChB,MAAsB,EACtB,OAAwB,EACxB,QAAkB,EAClB,WAAwB;IAExB,aAAa;IACb,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAA;IACrD,IAAI,IAAY,CAAA;IAChB,IAAI,KAAK,GAAkB,IAAI,CAAA;IAC/B,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;QACxD,IAAI,GAAG,MAAM,CAAC,OAAO,CAAA;QACrB,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,IAAI,CAAA;IAC9B,CAAC;SAAM,CAAC;QACN,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QAC/C,IAAI,GAAG,MAAM,CAAC,OAAO,CAAA;QACrB,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,IAAI,CAAA;IAC9B,CAAC;IAED,aAAa;IACb,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;IACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,KAAK,CAAC,gEAAgE,CAAC,CAAA;QAC/E,OAAO,CAAC,CAAA;IACV,CAAC;IAED,sBAAsB;IACtB,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAEvE,uCAAuC;IACvC,MAAM,WAAW,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAA;IAExC,sBAAsB;IACtB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAC1C,MAAM,YAAY,GAAkB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC9D,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;QACnC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAA;QACzD,CAAC;QACD,OAAO;YACL,EAAE,EAAE,UAAU,EAAE;YAChB,QAAQ;YACR,UAAU,EAAE,KAAK,CAAC,KAAK;YACvB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,MAAM,EAAE,SAAS;YACjB,QAAQ,EAAE;gBACR,QAAQ,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,QAAQ;gBAC/C,QAAQ,EAAE,IAAI,CAAC,MAAM;gBACrB,QAAQ,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE;aAC1C;YACD,SAAS,EAAE,KAAK;YAChB,SAAS;SACV,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,gBAAgB;IAChB,MAAM,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAA;IAE5C,OAAO,YAAY,CAAC,MAAM,CAAA;AAC5B,CAAC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc;IAC5C,oBAAoB;IACpB,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAErD,gBAAgB;IAChB,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAA;QAC7D,OAAO,CAAC,KAAK,CAAC,kEAAkE,CAAC,CAAA;QACjF,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,CAAA;IAE7B,uBAAuB;IACvB,IAAI,CAAC;QACH,MAAM,IAAI,CAAC,UAAU,CAAC,CAAA;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAA;QAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;IACrC,MAAM,YAAY,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IAEnF,gBAAgB;IAChB,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAA;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,MAAM,qBAAqB,CAAC,CAAA;IAEzD,mEAAmE;IACnE,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC;QAChC,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC,CAAA;IACF,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAA;IACrC,MAAM,QAAQ,GAAG,IAAI,QAAQ,CAAC;QAC5B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;KAC1B,CAAC,CAAA;IACF,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC;QAClC,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAA;IACF,MAAM,WAAW,CAAC,UAAU,EAAE,CAAA;IAE9B,oBAAoB;IACpB,MAAM,OAAO,GAAkB,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAA;IAE1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAA;QAE1C,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAA;YAC3F,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;gBACrB,4CAA4C;gBAC5C,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,yBAAyB,CAAC,CAAA;gBAC5D,OAAO,CAAC,SAAS,EAAE,CAAA;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,YAAY,UAAU,UAAU,CAAC,CAAA;gBACnE,OAAO,CAAC,SAAS,EAAE,CAAA;gBACnB,OAAO,CAAC,WAAW,IAAI,UAAU,CAAA;YACnC,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACrE,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,gBAAgB,MAAM,EAAE,CAAC,CAAA;YAC3D,OAAO,CAAC,MAAM,EAAE,CAAA;QAClB,CAAC;IACH,CAAC;IAED,sCAAsC;IACtC,MAAM,WAAW,CAAC,QAAQ,EAAE,CAAA;IAE5B,gBAAgB;IAChB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;IACjB,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAA;IACvC,OAAO,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,SAAS,EAAE,CAAC,CAAA;IAChD,OAAO,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;IAC7C,OAAO,CAAC,KAAK,CAAC,iBAAiB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;IAErD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;IACtB,CAAC;AACH,CAAC"}
|
package/dist/cli-main.d.ts
CHANGED
package/dist/cli-main.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli-main.d.ts","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"cli-main.d.ts","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CA0B7D"}
|
package/dist/cli-main.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
// CLI entry point for subcommands (skills install, etc.)
|
|
2
2
|
import { run as runSkillsInstall } from './bin/install-skills.js';
|
|
3
|
+
import { runIngest } from './cli/ingest.js';
|
|
3
4
|
/**
|
|
4
5
|
* Handle CLI subcommands
|
|
5
6
|
* @param args - Command line arguments (after the binary name)
|
|
6
7
|
*/
|
|
7
|
-
export function handleCli(args) {
|
|
8
|
+
export async function handleCli(args) {
|
|
8
9
|
const subcommand = args[0];
|
|
9
10
|
switch (subcommand) {
|
|
10
11
|
case 'skills':
|
|
@@ -18,9 +19,12 @@ export function handleCli(args) {
|
|
|
18
19
|
process.exit(1);
|
|
19
20
|
}
|
|
20
21
|
break;
|
|
22
|
+
case 'ingest':
|
|
23
|
+
await runIngest(args.slice(1));
|
|
24
|
+
break;
|
|
21
25
|
default:
|
|
22
26
|
console.error(`Unknown command: ${subcommand}`);
|
|
23
|
-
console.error('Available commands: skills');
|
|
27
|
+
console.error('Available commands: skills, ingest');
|
|
24
28
|
process.exit(1);
|
|
25
29
|
}
|
|
26
30
|
}
|
package/dist/cli-main.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli-main.js","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,OAAO,EAAE,GAAG,IAAI,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;
|
|
1
|
+
{"version":3,"file":"cli-main.js","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,OAAO,EAAE,GAAG,IAAI,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AACjE,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAE3C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;IAE1B,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,QAAQ;YACX,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC1B,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CACX,8EAA8E,CAC/E,CAAA;gBACD,OAAO,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAA;gBACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAK;QAEP,KAAK,QAAQ;YACX,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAK;QAEP;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,UAAU,EAAE,CAAC,CAAA;YAC/C,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAA;YACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACnB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAQA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;CACjB;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,KAAK;aAGZ,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,QAAQ;IAEnB,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAQA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;CACjB;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,KAAK;aAGZ,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,QAAQ;IAEnB,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC;;;OAGG;YACW,iBAAiB;IA+B/B;;;;;OAKG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAiC5C;;;;;OAKG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA0BvD"}
|
package/dist/embedder/index.js
CHANGED
|
@@ -45,7 +45,9 @@ export class Embedder {
|
|
|
45
45
|
console.error(`Embedder: Setting cache directory to "${this.config.cacheDir}"`);
|
|
46
46
|
console.error(`Embedder: Loading model "${this.config.modelPath}"...`);
|
|
47
47
|
// Use type assertion to avoid TS2590 (union type too complex with @types/jsdom)
|
|
48
|
-
this.model = await pipeline('feature-extraction', this.config.modelPath
|
|
48
|
+
this.model = await pipeline('feature-extraction', this.config.modelPath, {
|
|
49
|
+
dtype: 'fp32',
|
|
50
|
+
});
|
|
49
51
|
console.error('Embedder: Model loaded successfully');
|
|
50
52
|
}
|
|
51
53
|
catch (error) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAA;AAkBzD,+CAA+C;AAC/C,gBAAgB;AAChB,+CAA+C;AAE/C;;GAEG;AACH,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAA;IAC9B,CAAC;CACF;AAED,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAM,OAAO,QAAQ;IAMnB,YAAY,MAAsB;QALlC,2EAA2E;QACnE,UAAK,GAAY,IAAI,CAAA;QACrB,gBAAW,GAAyB,IAAI,CAAA;QAI9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAA;AAkBzD,+CAA+C;AAC/C,gBAAgB;AAChB,+CAA+C;AAE/C;;GAEG;AACH,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAA;IAC9B,CAAC;CACF;AAED,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAM,OAAO,QAAQ;IAMnB,YAAY,MAAsB;QALlC,2EAA2E;QACnE,UAAK,GAAY,IAAI,CAAA;QACrB,gBAAW,GAAyB,IAAI,CAAA;QAI9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE;gBACvE,KAAK,EAAE,MAAM;aACd,CAAC,CAAA;YACF,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;QACtD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,kCAAmC,KAAe,CAAC,OAAO,EAAE,EAC5D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAA;YACtB,OAAM;QACR,CAAC;QAED,uBAAuB;QACvB,OAAO,CAAC,KAAK,CACX,+FAA+F,CAChG,CAAA;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACnD,8CAA8C;YAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YAEvB,+CAA+C;YAC/C,MAAM,IAAI,cAAc,CACtB,+CAAgD,KAAe,CAAC,OAAO,wTAAwT,IAAI,CAAC,MAAM,CAAC,QAAQ,gCAAgC,EACnb,KAAc,CACf,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,CAAC;YACH,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,cAAc,CAAC,0CAA0C,CAAC,CAAA;YACtE,CAAC;YAED,uEAAuE;YACvE,8FAA8F;YAC9F,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,KAGa,CAAA;YACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;YAE7C,qCAAqC;YACrC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,SAAS,CAAA;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAe,EAAE,CAAA;YAEjC,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC7D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBACvD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAChF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,UAAU,CAAA;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,wCAAyC,KAAe,CAAC,OAAO,EAAE,EAClE,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;CACF"}
|
package/dist/index.js
CHANGED
|
@@ -6,12 +6,15 @@ import { startServer } from './server-main.js';
|
|
|
6
6
|
// ============================================
|
|
7
7
|
// Routing
|
|
8
8
|
// ============================================
|
|
9
|
-
const SUBCOMMANDS = new Set(['skills']);
|
|
9
|
+
const SUBCOMMANDS = new Set(['skills', 'ingest']);
|
|
10
10
|
const args = process.argv.slice(2);
|
|
11
11
|
const firstArg = args[0];
|
|
12
12
|
if (firstArg && SUBCOMMANDS.has(firstArg)) {
|
|
13
13
|
// CLI subcommand
|
|
14
|
-
handleCli(args)
|
|
14
|
+
handleCli(args).catch((error) => {
|
|
15
|
+
console.error(error);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
});
|
|
15
18
|
}
|
|
16
19
|
else {
|
|
17
20
|
// Default: start MCP server
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,gCAAgC;AAChC,qDAAqD;AAErD,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,+CAA+C;AAC/C,UAAU;AACV,+CAA+C;AAE/C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAA;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,gCAAgC;AAChC,qDAAqD;AAErD,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,+CAA+C;AAC/C,UAAU;AACV,+CAA+C;AAE/C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAA;AAEjD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;AAExB,IAAI,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;IAC1C,iBAAiB;IACjB,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;AACJ,CAAC;KAAM,CAAC;IACN,4BAA4B;IAC5B,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACnD,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;QACxC,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,KAAK,CAAC,CAAA;QAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,WAAW,EAAE,CAAA;AACf,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: mcp-local-rag
|
|
3
|
-
description:
|
|
3
|
+
description: Use this skill when ingesting or querying documents with MCP local RAG, including `query_documents`, `ingest_file`, `ingest_data`, and CLI bulk ingestion. Covers query refinement, result score interpretation, and source metadata conventions for PDF, HTML, DOCX, TXT, and Markdown. Not for general file operations or SQL/database queries.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# MCP Local RAG Skills
|
|
@@ -13,6 +13,7 @@ description: Provides score interpretation (< 0.3 good, > 0.5 skip), query optim
|
|
|
13
13
|
| `ingest_data` | Raw content (HTML, text) with source URL |
|
|
14
14
|
| `query_documents` | Semantic + keyword hybrid search |
|
|
15
15
|
| `delete_file` / `list_files` / `status` | Management |
|
|
16
|
+
| `npx mcp-local-rag ingest` | Multiple files or directory (shell) |
|
|
16
17
|
|
|
17
18
|
## Search: Core Rules
|
|
18
19
|
|
|
@@ -112,9 +113,34 @@ ingest_data({
|
|
|
112
113
|
|
|
113
114
|
Re-ingest same source to update. Use same source in `delete_file` to remove.
|
|
114
115
|
|
|
116
|
+
### CLI ingest
|
|
117
|
+
|
|
118
|
+
For multiple files or directory ingestion. Prefer over repeated `ingest_file` calls.
|
|
119
|
+
|
|
120
|
+
| Scenario | Use |
|
|
121
|
+
|----------|-----|
|
|
122
|
+
| Single file from user request | `ingest_file` |
|
|
123
|
+
| Multiple files or a directory | `npx mcp-local-rag ingest <path>` |
|
|
124
|
+
| Raw HTML/text content | `ingest_data` (CLI does not support stdin) |
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
npx mcp-local-rag ingest [options] <path>
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
- `<path>`: file or directory (recursively scans supported formats)
|
|
131
|
+
- Use `--help` for all options and defaults
|
|
132
|
+
- Options must match MCP server config — see [cli-ingest.md](references/cli-ingest.md)
|
|
133
|
+
|
|
134
|
+
**Output interpretation:**
|
|
135
|
+
|
|
136
|
+
- Exit code 0: all files succeeded
|
|
137
|
+
- Exit code 1: one or more files failed — report failed files to user
|
|
138
|
+
- `SKIPPED (0 chunks)`: file was empty or too short, counted as success
|
|
139
|
+
|
|
115
140
|
## References
|
|
116
141
|
|
|
117
142
|
For edge cases and examples:
|
|
118
143
|
- [html-ingestion.md](references/html-ingestion.md) - URL normalization, SPA handling
|
|
119
144
|
- [query-optimization.md](references/query-optimization.md) - Query patterns by intent
|
|
120
145
|
- [result-refinement.md](references/result-refinement.md) - Contradiction resolution, chunking
|
|
146
|
+
- [cli-ingest.md](references/cli-ingest.md) - CLI options, config matching
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# CLI Ingest Reference
|
|
2
|
+
|
|
3
|
+
## Command
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npx mcp-local-rag ingest [options] <path>
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Options
|
|
10
|
+
|
|
11
|
+
| Option | Env Var | Default | Description |
|
|
12
|
+
|--------|---------|---------|-------------|
|
|
13
|
+
| `--db-path <path>` | `DB_PATH` | `./lancedb/` | LanceDB database path |
|
|
14
|
+
| `--base-dir <path>` | `BASE_DIR` | cwd | Base directory for documents |
|
|
15
|
+
| `--cache-dir <path>` | `CACHE_DIR` | `./models/` | Model cache directory |
|
|
16
|
+
| `--model-name <name>` | `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
|
|
17
|
+
| `--max-file-size <n>` | `MAX_FILE_SIZE` | `104857600` | Max file size in bytes |
|
|
18
|
+
| `-h, --help` | — | — | Show usage with defaults |
|
|
19
|
+
|
|
20
|
+
Priority: CLI flags > environment variables > defaults.
|
|
21
|
+
|
|
22
|
+
## Config Matching
|
|
23
|
+
|
|
24
|
+
When ingesting into an existing database, options **must match** the MCP server config — especially `--model-name`. Using a different embedding model against an existing database produces vectors in a different space, silently degrading search quality.
|
|
25
|
+
|
|
26
|
+
## Output
|
|
27
|
+
|
|
28
|
+
Output goes to stderr. Summary block:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
--- Ingest Summary ---
|
|
32
|
+
Succeeded: 12
|
|
33
|
+
Failed: 1
|
|
34
|
+
Total chunks: 247
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
- Exit code 0: all succeeded
|
|
38
|
+
- Exit code 1: one or more failed
|
|
39
|
+
- `SKIPPED (0 chunks)`: empty or too-short file, counted as success
|
|
40
|
+
|
|
41
|
+
## Supported Formats
|
|
42
|
+
|
|
43
|
+
Same as MCP tools: `.pdf`, `.docx`, `.txt`, `.md`, `.html`
|
|
44
|
+
|
|
45
|
+
Directory mode recursively scans for these extensions, excluding `dbPath` and `cacheDir` paths.
|