mcp-local-rag 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -119,6 +119,30 @@ HTML is automatically cleaned—you get the article content, not the boilerplate
119
119
 
120
120
  > **Note:** The RAG server itself doesn't fetch web content—your AI assistant retrieves it and passes the HTML to `ingest_data`. This keeps the server fully local while letting you index any content your assistant can access. Please respect website terms of service and copyright when ingesting external content.
121
121
 
122
+ ### Bulk Ingestion (CLI)
123
+
124
+ For ingesting multiple files or an entire directory, use the CLI command instead of calling `ingest_file` repeatedly:
125
+
126
+ ```bash
127
+ npx mcp-local-rag ingest --db-path ./lancedb --base-dir ./docs ./docs/
128
+ ```
129
+
130
+ Supports PDF, DOCX, TXT, and Markdown (same as MCP tools). HTML is only supported via `ingest_data`, not CLI.
131
+
132
+ Key options:
133
+
134
+ | Option | Default | Description |
135
+ |--------|---------|-------------|
136
+ | `--db-path <path>` | `./lancedb/` | LanceDB database path |
137
+ | `--base-dir <path>` | cwd | Base directory for documents |
138
+ | `--model-name <name>` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
139
+ | `--cache-dir <path>` | `./models/` | Model cache directory |
140
+ | `--max-file-size <n>` | `104857600` | Max file size in bytes |
141
+
142
+ This processes all supported files recursively and runs optimization once at the end — much faster than per-file ingestion for large batches.
143
+
144
+ > ⚠️ **CLI options must match your MCP server config.** Especially `--model-name` — using a different embedding model against an existing database produces incompatible vectors, silently degrading search quality.
145
+
122
146
  ### Searching Documents
123
147
 
124
148
  ```
@@ -407,6 +431,7 @@ pnpm run check:all # Full quality check
407
431
  src/
408
432
  index.ts # Entry point
409
433
  server/ # MCP tool handlers
434
+ cli/ # CLI subcommands (ingest)
410
435
  parser/ # PDF, DOCX, TXT, MD parsing
411
436
  chunker/ # Text splitting
412
437
  embedder/ # Transformers.js embeddings
@@ -0,0 +1,24 @@
1
+ interface CliOptions {
2
+ dbPath?: string | undefined;
3
+ baseDir?: string | undefined;
4
+ cacheDir?: string | undefined;
5
+ modelName?: string | undefined;
6
+ maxFileSize?: number | undefined;
7
+ }
8
+ interface ParsedArgs {
9
+ positional: string | undefined;
10
+ options: CliOptions;
11
+ help: boolean;
12
+ }
13
+ /**
14
+ * Parse CLI arguments into options and a positional path.
15
+ * Flags: --db-path, --base-dir, --cache-dir, --model-name, --max-file-size, -h/--help
16
+ */
17
+ export declare function parseArgs(args: string[]): ParsedArgs;
18
+ /**
19
+ * Run the ingest CLI subcommand.
20
+ * @param args - Arguments after "ingest" (e.g., option flags and file/directory path)
21
+ */
22
+ export declare function runIngest(args: string[]): Promise<void>;
23
+ export {};
24
+ //# sourceMappingURL=ingest.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AA8BA,UAAU,UAAU;IAClB,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC3B,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC5B,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC7B,SAAS,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;CACjC;AAED,UAAU,UAAU;IAClB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,OAAO,EAAE,UAAU,CAAA;IACnB,IAAI,EAAE,OAAO,CAAA;CACd;AAiCD;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CAiDpD;AAmJD;;;GAGG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAgG7D"}
@@ -0,0 +1,295 @@
1
+ // CLI ingest subcommand — bulk file ingestion with single optimize() at end
2
+ import { randomUUID } from 'node:crypto';
3
+ import { readdir, stat } from 'node:fs/promises';
4
+ import { extname, join, resolve } from 'node:path';
5
+ import { SemanticChunker } from '../chunker/index.js';
6
+ import { Embedder } from '../embedder/index.js';
7
+ import { DocumentParser, SUPPORTED_EXTENSIONS } from '../parser/index.js';
8
+ import { VectorStore } from '../vectordb/index.js';
9
+ // ============================================
10
+ // Defaults
11
+ // ============================================
12
+ const DEFAULTS = {
13
+ dbPath: './lancedb/',
14
+ cacheDir: './models/',
15
+ modelName: 'Xenova/all-MiniLM-L6-v2',
16
+ maxFileSize: 104857600,
17
+ };
18
+ // ============================================
19
+ // Help
20
+ // ============================================
21
+ const HELP_TEXT = `Usage: mcp-local-rag ingest [options] <path>
22
+
23
+ Ingest a single file or all supported files under a directory.
24
+
25
+ Options:
26
+ --db-path <path> LanceDB database path (default: ${DEFAULTS.dbPath})
27
+ --base-dir <path> Base directory for documents (default: cwd)
28
+ --cache-dir <path> Model cache directory (default: ${DEFAULTS.cacheDir})
29
+ --model-name <name> Embedding model (default: ${DEFAULTS.modelName})
30
+ --max-file-size <n> Max file size in bytes (default: ${DEFAULTS.maxFileSize})
31
+ -h, --help Show this help`;
32
+ // ============================================
33
+ // Arg Parsing
34
+ // ============================================
35
+ /**
36
+ * Parse CLI arguments into options and a positional path.
37
+ * Flags: --db-path, --base-dir, --cache-dir, --model-name, --max-file-size, -h/--help
38
+ */
39
+ export function parseArgs(args) {
40
+ const options = {};
41
+ let positional;
42
+ let help = false;
43
+ let i = 0;
44
+ while (i < args.length) {
45
+ const arg = args[i];
46
+ switch (arg) {
47
+ case '-h':
48
+ case '--help':
49
+ help = true;
50
+ i++;
51
+ break;
52
+ case '--db-path':
53
+ options.dbPath = args[++i];
54
+ i++;
55
+ break;
56
+ case '--base-dir':
57
+ options.baseDir = args[++i];
58
+ i++;
59
+ break;
60
+ case '--cache-dir':
61
+ options.cacheDir = args[++i];
62
+ i++;
63
+ break;
64
+ case '--model-name':
65
+ options.modelName = args[++i];
66
+ i++;
67
+ break;
68
+ case '--max-file-size': {
69
+ const parsed = Number.parseInt(args[++i] ?? '', 10);
70
+ options.maxFileSize = Number.isNaN(parsed) ? undefined : parsed;
71
+ i++;
72
+ break;
73
+ }
74
+ default:
75
+ if (arg.startsWith('-')) {
76
+ console.error(`Unknown option: ${arg}`);
77
+ console.error(HELP_TEXT);
78
+ process.exit(1);
79
+ }
80
+ positional = arg;
81
+ i++;
82
+ break;
83
+ }
84
+ }
85
+ return { positional, options, help };
86
+ }
87
+ // ============================================
88
+ // NaN Defense
89
+ // ============================================
90
+ /**
91
+ * Ensure maxFileSize is a valid number, falling back to default if NaN.
92
+ */
93
+ function sanitizeMaxFileSize(value) {
94
+ return Number.isNaN(value) ? DEFAULTS.maxFileSize : value;
95
+ }
96
+ // ============================================
97
+ // Config Resolution
98
+ // ============================================
99
+ /**
100
+ * Resolve config with priority: CLI flags > environment variables > defaults.
101
+ */
102
+ function resolveConfig(cliOptions = {}) {
103
+ return {
104
+ baseDir: cliOptions.baseDir ?? process.env['BASE_DIR'] ?? process.cwd(),
105
+ dbPath: cliOptions.dbPath ?? process.env['DB_PATH'] ?? DEFAULTS.dbPath,
106
+ cacheDir: cliOptions.cacheDir ?? process.env['CACHE_DIR'] ?? DEFAULTS.cacheDir,
107
+ modelName: cliOptions.modelName ?? process.env['MODEL_NAME'] ?? DEFAULTS.modelName,
108
+ maxFileSize: sanitizeMaxFileSize(cliOptions.maxFileSize ??
109
+ (process.env['MAX_FILE_SIZE']
110
+ ? Number.parseInt(process.env['MAX_FILE_SIZE'], 10)
111
+ : DEFAULTS.maxFileSize)),
112
+ };
113
+ }
114
+ // ============================================
115
+ // File Collection
116
+ // ============================================
117
+ /**
118
+ * Collect files to ingest from a path.
119
+ * - If path is a file with supported extension, return [path].
120
+ * - If path is a directory, recursively scan for supported files.
121
+ * - Exclude dbPath and cacheDir directories.
122
+ */
123
+ async function collectFiles(targetPath, excludePaths) {
124
+ const resolved = resolve(targetPath);
125
+ const info = await stat(resolved);
126
+ if (info.isFile()) {
127
+ const ext = extname(resolved).toLowerCase();
128
+ if (!SUPPORTED_EXTENSIONS.has(ext)) {
129
+ console.error(`Unsupported file extension: ${ext} (supported: ${[...SUPPORTED_EXTENSIONS].join(', ')})`);
130
+ return [];
131
+ }
132
+ return [resolved];
133
+ }
134
+ if (info.isDirectory()) {
135
+ const entries = await readdir(resolved, { recursive: true, withFileTypes: true });
136
+ return entries
137
+ .filter((e) => e.isFile() && SUPPORTED_EXTENSIONS.has(extname(e.name).toLowerCase()))
138
+ .map((e) => join(e.parentPath, e.name))
139
+ .filter((filePath) => !excludePaths.some((ep) => filePath.startsWith(ep)))
140
+ .sort();
141
+ }
142
+ return [];
143
+ }
144
+ // ============================================
145
+ // Per-file Ingestion
146
+ // ============================================
147
+ /**
148
+ * Ingest a single file: parse, chunk, embed, delete old chunks, insert new chunks.
149
+ * Returns the number of chunks inserted.
150
+ */
151
+ async function ingestSingleFile(filePath, parser, chunker, embedder, vectorStore) {
152
+ // Parse file
153
+ const isPdf = filePath.toLowerCase().endsWith('.pdf');
154
+ let text;
155
+ let title = null;
156
+ if (isPdf) {
157
+ const result = await parser.parsePdf(filePath, embedder);
158
+ text = result.content;
159
+ title = result.title || null;
160
+ }
161
+ else {
162
+ const result = await parser.parseFile(filePath);
163
+ text = result.content;
164
+ title = result.title || null;
165
+ }
166
+ // Chunk text
167
+ const chunks = await chunker.chunkText(text, embedder);
168
+ if (chunks.length === 0) {
169
+ console.error(` Warning: 0 chunks generated (file may be empty or too short)`);
170
+ return 0;
171
+ }
172
+ // Generate embeddings
173
+ const embeddings = await embedder.embedBatch(chunks.map((c) => c.text));
174
+ // Delete existing chunks for this file
175
+ await vectorStore.deleteChunks(filePath);
176
+ // Build vector chunks
177
+ const timestamp = new Date().toISOString();
178
+ const vectorChunks = chunks.map((chunk, index) => {
179
+ const embedding = embeddings[index];
180
+ if (!embedding) {
181
+ throw new Error(`Missing embedding for chunk ${index}`);
182
+ }
183
+ return {
184
+ id: randomUUID(),
185
+ filePath,
186
+ chunkIndex: chunk.index,
187
+ text: chunk.text,
188
+ vector: embedding,
189
+ metadata: {
190
+ fileName: filePath.split('/').pop() || filePath,
191
+ fileSize: text.length,
192
+ fileType: filePath.split('.').pop() || '',
193
+ },
194
+ fileTitle: title,
195
+ timestamp,
196
+ };
197
+ });
198
+ // Insert chunks
199
+ await vectorStore.insertChunks(vectorChunks);
200
+ return vectorChunks.length;
201
+ }
202
+ // ============================================
203
+ // Main Entry Point
204
+ // ============================================
205
+ /**
206
+ * Run the ingest CLI subcommand.
207
+ * @param args - Arguments after "ingest" (e.g., option flags and file/directory path)
208
+ */
209
+ export async function runIngest(args) {
210
+ // Parse CLI options
211
+ const { positional, options, help } = parseArgs(args);
212
+ // Handle --help
213
+ if (help) {
214
+ console.error(HELP_TEXT);
215
+ process.exit(0);
216
+ }
217
+ // Validate positional argument
218
+ if (!positional) {
219
+ console.error('Usage: mcp-local-rag ingest [options] <path>');
220
+ console.error(' Ingest a single file or all supported files under a directory.');
221
+ console.error(' Run with --help for all options.');
222
+ process.exit(1);
223
+ }
224
+ const targetPath = positional;
225
+ // Validate path exists
226
+ try {
227
+ await stat(targetPath);
228
+ }
229
+ catch {
230
+ console.error(`Error: path does not exist: ${targetPath}`);
231
+ process.exit(1);
232
+ }
233
+ // Resolve config: CLI flags > env vars > defaults
234
+ const config = resolveConfig(options);
235
+ const excludePaths = [`${resolve(config.dbPath)}/`, `${resolve(config.cacheDir)}/`];
236
+ // Collect files
237
+ const files = await collectFiles(targetPath, excludePaths);
238
+ if (files.length === 0) {
239
+ console.error('No supported files found.');
240
+ process.exit(1);
241
+ }
242
+ console.error(`Found ${files.length} file(s) to ingest.`);
243
+ // Initialize components (single instances reused across all files)
244
+ const parser = new DocumentParser({
245
+ baseDir: config.baseDir,
246
+ maxFileSize: config.maxFileSize,
247
+ });
248
+ const chunker = new SemanticChunker();
249
+ const embedder = new Embedder({
250
+ modelPath: config.modelName,
251
+ batchSize: 16,
252
+ cacheDir: config.cacheDir,
253
+ });
254
+ const vectorStore = new VectorStore({
255
+ dbPath: config.dbPath,
256
+ tableName: 'chunks',
257
+ });
258
+ await vectorStore.initialize();
259
+ // Process each file
260
+ const summary = { succeeded: 0, failed: 0, totalChunks: 0 };
261
+ for (let i = 0; i < files.length; i++) {
262
+ const filePath = files[i];
263
+ const label = `[${i + 1}/${files.length}]`;
264
+ try {
265
+ const chunkCount = await ingestSingleFile(filePath, parser, chunker, embedder, vectorStore);
266
+ if (chunkCount === 0) {
267
+ // 0 chunks is a skip/warning, not a failure
268
+ console.error(`${label} ${filePath} ... SKIPPED (0 chunks)`);
269
+ summary.succeeded++;
270
+ }
271
+ else {
272
+ console.error(`${label} ${filePath} ... OK (${chunkCount} chunks)`);
273
+ summary.succeeded++;
274
+ summary.totalChunks += chunkCount;
275
+ }
276
+ }
277
+ catch (error) {
278
+ const reason = error instanceof Error ? error.message : String(error);
279
+ console.error(`${label} ${filePath} ... FAILED: ${reason}`);
280
+ summary.failed++;
281
+ }
282
+ }
283
+ // Optimize once at end (not per-file)
284
+ await vectorStore.optimize();
285
+ // Print summary
286
+ console.error('');
287
+ console.error('--- Ingest Summary ---');
288
+ console.error(`Succeeded: ${summary.succeeded}`);
289
+ console.error(`Failed: ${summary.failed}`);
290
+ console.error(`Total chunks: ${summary.totalChunks}`);
291
+ if (summary.failed > 0) {
292
+ process.exitCode = 1;
293
+ }
294
+ }
295
+ //# sourceMappingURL=ingest.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AAAA,4EAA4E;AAE5E,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAA;AAChD,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAElD,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AAEzE,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAkClD,+CAA+C;AAC/C,WAAW;AACX,+CAA+C;AAE/C,MAAM,QAAQ,GAAG;IACf,MAAM,EAAE,YAAY;IACpB,QAAQ,EAAE,WAAW;IACrB,SAAS,EAAE,yBAAyB;IACpC,WAAW,EAAE,SAAS;CACd,CAAA;AAEV,+CAA+C;AAC/C,OAAO;AACP,+CAA+C;AAE/C,MAAM,SAAS,GAAG;;;;;2DAKyC,QAAQ,CAAC,MAAM;;2DAEf,QAAQ,CAAC,QAAQ;qDACvB,QAAQ,CAAC,SAAS;4DACX,QAAQ,CAAC,WAAW;wCACxC,CAAA;AAExC,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,OAAO,GAAe,EAAE,CAAA;IAC9B,IAAI,UAA8B,CAAA;IAClC,IAAI,IAAI,GAAG,KAAK,CAAA;IAEhB,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QACpB,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC;YACV,KAAK,QAAQ;gBACX,IAAI,GAAG,IAAI,CAAA;gBACX,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,WAAW;gBACd,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC1B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,YAAY;gBACf,OAAO,CAAC,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC3B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,aAAa;gBAChB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC5B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,cAAc;gBACjB,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;gBAC7B,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,KAAK,iBAAiB,CAAC,CAAC,CAAC;gBACvB,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;gBACnD,OAAO,CAAC,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAA;gBAC/D,CAAC,EAAE,CAAA;gBACH,MAAK;YACP,CAAC;YACD;gBACE,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;oBACxB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;oBACvC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;oBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBACjB,CAAC;gBACD,UAAU,GAAG,GAAG,CAAA;gBAChB,CAAC,EAAE,CAAA;gBACH,MAAK;QACT,CAAC;IACH,CAAC;IAED,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,CAAA;AACtC,CAAC;AAED,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;GAEG;AACH,SAAS,mBAAmB,CAAC,KAAa;IACxC,OAAO,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,CAAA;AAC3D,CAAC;AAED,+CAA+C;AAC/C,oBAAoB;AACpB,+CAA+C;AAE/C;;GAEG;AACH,SAAS,aAAa,CAAC,aAAyB,EAAE;IAChD,OAAO;QACL,OAAO,EAAE,UAAU,CAAC,OAAO,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,OAAO,CAAC,GAAG,EAAE;QACvE,MAAM,EAAE,UAAU,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,QAAQ,CAAC,MAAM;QACtE,QAAQ,EAAE,UAAU,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,QAAQ,CAAC,QAAQ;QAC9E,SAAS,EAAE,UAAU,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,IAAI,QAAQ,CAAC,SAAS;QAClF,WAAW,EAAE,mBAAmB,CAC9B,UAAU,CAAC,WAAW;YACpB,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;gBAC3B,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC,EAAE,EAAE,CAAC;gBACnD,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAC5B;KACF,CAAA;AACH,CAAC;AAED,+CAA+C;AAC/C,kBAAkB;AAClB,+CAA+C;AAE/C;;;;;GAKG;AACH,KAAK,UAAU,YAAY,CAAC,UAAkB,EAAE,YAAsB;IACpE,MAAM,QAAQ,GAAG,OAAO,CAAC,UAAU,CAAC,CAAA;IACpC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAA;IAEjC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClB,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;QAC3C,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CACX,+BAA+B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAC1F,CAAA;YACD,OAAO,EAAE,CAAA;QACX,CAAC;QACD,OAAO,CAAC,QAAQ,CAAC,CAAA;IACnB,CAAC;IAED,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;QACjF,OAAO,OAAO;aACX,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;aACpF,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC;aACtC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;aACzE,IAAI,EAAE,CAAA;IACX,CAAC;IAED,OAAO,EAAE,CAAA;AACX,CAAC;AAED,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C;;;GAGG;AACH,KAAK,UAAU,gBAAgB,CAC7B,QAAgB,EAChB,MAAsB,EACtB,OAAwB,EACxB,QAAkB,EAClB,WAAwB;IAExB,aAAa;IACb,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAA;IACrD,IAAI,IAAY,CAAA;IAChB,IAAI,KAAK,GAAkB,IAAI,CAAA;IAC/B,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAA;QACxD,IAAI,GAAG,MAAM,CAAC,OAAO,CAAA;QACrB,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,IAAI,CAAA;IAC9B,CAAC;SAAM,CAAC;QACN,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QAC/C,IAAI,GAAG,MAAM,CAAC,OAAO,CAAA;QACrB,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,IAAI,CAAA;IAC9B,CAAC;IAED,aAAa;IACb,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;IACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,KAAK,CAAC,gEAAgE,CAAC,CAAA;QAC/E,OAAO,CAAC,CAAA;IACV,CAAC;IAED,sBAAsB;IACtB,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAEvE,uCAAuC;IACvC,MAAM,WAAW,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAA;IAExC,sBAAsB;IACtB,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IAC1C,MAAM,YAAY,GAAkB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC9D,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAA;QACnC,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAA;QACzD,CAAC;QACD,OAAO;YACL,EAAE,EAAE,UAAU,EAAE;YAChB,QAAQ;YACR,UAAU,EAAE,KAAK,CAAC,KAAK;YACvB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,MAAM,EAAE,SAAS;YACjB,QAAQ,EAAE;gBACR,QAAQ,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,QAAQ;gBAC/C,QAAQ,EAAE,IAAI,CAAC,MAAM;gBACrB,QAAQ,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE;aAC1C;YACD,SAAS,EAAE,KAAK;YAChB,SAAS;SACV,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,gBAAgB;IAChB,MAAM,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAA;IAE5C,OAAO,YAAY,CAAC,MAAM,CAAA;AAC5B,CAAC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc;IAC5C,oBAAoB;IACpB,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAErD,gBAAgB;IAChB,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,+BAA+B;IAC/B,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAA;QAC7D,OAAO,CAAC,KAAK,CAAC,kEAAkE,CAAC,CAAA;QACjF,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,MAAM,UAAU,GAAG,UAAU,CAAA;IAE7B,uBAAuB;IACvB,IAAI,CAAC;QACH,MAAM,IAAI,CAAC,UAAU,CAAC,CAAA;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,+BAA+B,UAAU,EAAE,CAAC,CAAA;QAC1D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,kDAAkD;IAClD,MAAM,MAAM,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;IACrC,MAAM,YAAY,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IAEnF,gBAAgB;IAChB,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IAC1D,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAA;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,MAAM,qBAAqB,CAAC,CAAA;IAEzD,mEAAmE;IACnE,MAAM,MAAM,GAAG,IAAI,cAAc,CAAC;QAChC,OAAO,EAAE,MAAM,CAAC,OAAO;QACvB,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC,CAAA;IACF,MAAM,OAAO,GAAG,IAAI,eAAe,EAAE,CAAA;IACrC,MAAM,QAAQ,GAAG,IAAI,QAAQ,CAAC;QAC5B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;KAC1B,CAAC,CAAA;IACF,MAAM,WAAW,GAAG,IAAI,WAAW,CAAC;QAClC,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAA;IACF,MAAM,WAAW,CAAC,UAAU,EAAE,CAAA;IAE9B,oBAAoB;IACpB,MAAM,OAAO,GAAkB,EAAE,SAAS,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,EAAE,CAAA;IAE1E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAE,CAAA;QAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAA;QAE1C,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,gBAAgB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,WAAW,CAAC,CAAA;YAC3F,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;gBACrB,4CAA4C;gBAC5C,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,yBAAyB,CAAC,CAAA;gBAC5D,OAAO,CAAC,SAAS,EAAE,CAAA;YACrB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,YAAY,UAAU,UAAU,CAAC,CAAA;gBACnE,OAAO,CAAC,SAAS,EAAE,CAAA;gBACnB,OAAO,CAAC,WAAW,IAAI,UAAU,CAAA;YACnC,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACrE,OAAO,CAAC,KAAK,CAAC,GAAG,KAAK,IAAI,QAAQ,gBAAgB,MAAM,EAAE,CAAC,CAAA;YAC3D,OAAO,CAAC,MAAM,EAAE,CAAA;QAClB,CAAC;IACH,CAAC;IAED,sCAAsC;IACtC,MAAM,WAAW,CAAC,QAAQ,EAAE,CAAA;IAE5B,gBAAgB;IAChB,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;IACjB,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAA;IACvC,OAAO,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,SAAS,EAAE,CAAC,CAAA;IAChD,OAAO,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,MAAM,EAAE,CAAC,CAAA;IAC7C,OAAO,CAAC,KAAK,CAAC,iBAAiB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAA;IAErD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;IACtB,CAAC;AACH,CAAC"}
@@ -2,5 +2,5 @@
2
2
  * Handle CLI subcommands
3
3
  * @param args - Command line arguments (after the binary name)
4
4
  */
5
- export declare function handleCli(args: string[]): void;
5
+ export declare function handleCli(args: string[]): Promise<void>;
6
6
  //# sourceMappingURL=cli-main.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cli-main.d.ts","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAGA;;;GAGG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,IAAI,CAsB9C"}
1
+ {"version":3,"file":"cli-main.d.ts","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CA0B7D"}
package/dist/cli-main.js CHANGED
@@ -1,10 +1,11 @@
1
1
  // CLI entry point for subcommands (skills install, etc.)
2
2
  import { run as runSkillsInstall } from './bin/install-skills.js';
3
+ import { runIngest } from './cli/ingest.js';
3
4
  /**
4
5
  * Handle CLI subcommands
5
6
  * @param args - Command line arguments (after the binary name)
6
7
  */
7
- export function handleCli(args) {
8
+ export async function handleCli(args) {
8
9
  const subcommand = args[0];
9
10
  switch (subcommand) {
10
11
  case 'skills':
@@ -18,9 +19,12 @@ export function handleCli(args) {
18
19
  process.exit(1);
19
20
  }
20
21
  break;
22
+ case 'ingest':
23
+ await runIngest(args.slice(1));
24
+ break;
21
25
  default:
22
26
  console.error(`Unknown command: ${subcommand}`);
23
- console.error('Available commands: skills');
27
+ console.error('Available commands: skills, ingest');
24
28
  process.exit(1);
25
29
  }
26
30
  }
@@ -1 +1 @@
1
- {"version":3,"file":"cli-main.js","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,OAAO,EAAE,GAAG,IAAI,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAEjE;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,IAAc;IACtC,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;IAE1B,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,QAAQ;YACX,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC1B,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CACX,8EAA8E,CAC/E,CAAA;gBACD,OAAO,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAA;gBACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAK;QAEP;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,UAAU,EAAE,CAAC,CAAA;YAC/C,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;YAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACnB,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"cli-main.js","sourceRoot":"","sources":["../src/cli-main.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,OAAO,EAAE,GAAG,IAAI,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AACjE,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAA;AAE3C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;IAE1B,QAAQ,UAAU,EAAE,CAAC;QACnB,KAAK,QAAQ;YACX,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAC1B,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;gBAC/B,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,KAAK,CACX,8EAA8E,CAC/E,CAAA;gBACD,OAAO,CAAC,KAAK,CAAC,qEAAqE,CAAC,CAAA;gBACpF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAK;QAEP,KAAK,QAAQ;YACX,MAAM,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAK;QAEP;YACE,OAAO,CAAC,KAAK,CAAC,oBAAoB,UAAU,EAAE,CAAC,CAAA;YAC/C,OAAO,CAAC,KAAK,CAAC,oCAAoC,CAAC,CAAA;YACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACnB,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAQA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;CACjB;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,KAAK;aAGZ,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,QAAQ;IAEnB,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuBjC;;;OAGG;YACW,iBAAiB;IA+B/B;;;;;OAKG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAiC5C;;;;;OAKG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA0BvD"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAQA;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,6BAA6B;IAC7B,SAAS,EAAE,MAAM,CAAA;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAA;IACjB,4BAA4B;IAC5B,QAAQ,EAAE,MAAM,CAAA;CACjB;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,KAAK;aAGZ,KAAK,CAAC,EAAE,KAAK;gBADtC,OAAO,EAAE,MAAM,EACU,KAAK,CAAC,EAAE,KAAK,YAAA;CAKzC;AAMD;;;;;;;GAOG;AACH,qBAAa,QAAQ;IAEnB,OAAO,CAAC,KAAK,CAAgB;IAC7B,OAAO,CAAC,WAAW,CAA6B;IAChD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAgB;gBAE3B,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAyBjC;;;OAGG;YACW,iBAAiB;IA+B/B;;;;;OAKG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAiC5C;;;;;OAKG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA0BvD"}
@@ -45,7 +45,9 @@ export class Embedder {
45
45
  console.error(`Embedder: Setting cache directory to "${this.config.cacheDir}"`);
46
46
  console.error(`Embedder: Loading model "${this.config.modelPath}"...`);
47
47
  // Use type assertion to avoid TS2590 (union type too complex with @types/jsdom)
48
- this.model = await pipeline('feature-extraction', this.config.modelPath);
48
+ this.model = await pipeline('feature-extraction', this.config.modelPath, {
49
+ dtype: 'fp32',
50
+ });
49
51
  console.error('Embedder: Model loaded successfully');
50
52
  }
51
53
  catch (error) {
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAA;AAkBzD,+CAA+C;AAC/C,gBAAgB;AAChB,+CAA+C;AAE/C;;GAEG;AACH,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAA;IAC9B,CAAC;CACF;AAED,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAM,OAAO,QAAQ;IAMnB,YAAY,MAAsB;QALlC,2EAA2E;QACnE,UAAK,GAAY,IAAI,CAAA;QACrB,gBAAW,GAAyB,IAAI,CAAA;QAI9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;YACxE,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;QACtD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,kCAAmC,KAAe,CAAC,OAAO,EAAE,EAC5D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAA;YACtB,OAAM;QACR,CAAC;QAED,uBAAuB;QACvB,OAAO,CAAC,KAAK,CACX,+FAA+F,CAChG,CAAA;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACnD,8CAA8C;YAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YAEvB,+CAA+C;YAC/C,MAAM,IAAI,cAAc,CACtB,+CAAgD,KAAe,CAAC,OAAO,wTAAwT,IAAI,CAAC,MAAM,CAAC,QAAQ,gCAAgC,EACnb,KAAc,CACf,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,CAAC;YACH,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,cAAc,CAAC,0CAA0C,CAAC,CAAA;YACtE,CAAC;YAED,uEAAuE;YACvE,8FAA8F;YAC9F,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,KAGa,CAAA;YACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;YAE7C,qCAAqC;YACrC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,SAAS,CAAA;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAe,EAAE,CAAA;YAEjC,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC7D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBACvD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAChF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,UAAU,CAAA;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,wCAAyC,KAAe,CAAC,OAAO,EAAE,EAClE,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":"AAAA,+CAA+C;AAE/C,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAA;AAkBzD,+CAA+C;AAC/C,gBAAgB;AAChB,+CAA+C;AAE/C;;GAEG;AACH,MAAM,OAAO,cAAe,SAAQ,KAAK;IACvC,YACE,OAAe,EACU,KAAa;QAEtC,KAAK,CAAC,OAAO,CAAC,CAAA;QAFW,UAAK,GAAL,KAAK,CAAQ;QAGtC,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAA;IAC9B,CAAC;CACF;AAED,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAM,OAAO,QAAQ;IAMnB,YAAY,MAAsB;QALlC,2EAA2E;QACnE,UAAK,GAAY,IAAI,CAAA;QACrB,gBAAW,GAAyB,IAAI,CAAA;QAI9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE;gBACvE,KAAK,EAAE,MAAM;aACd,CAAC,CAAA;YACF,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;QACtD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,kCAAmC,KAAe,CAAC,OAAO,EAAE,EAC5D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAA;YACtB,OAAM;QACR,CAAC;QAED,uBAAuB;QACvB,OAAO,CAAC,KAAK,CACX,+FAA+F,CAChG,CAAA;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACnD,8CAA8C;YAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YAEvB,+CAA+C;YAC/C,MAAM,IAAI,cAAc,CACtB,+CAAgD,KAAe,CAAC,OAAO,wTAAwT,IAAI,CAAC,MAAM,CAAC,QAAQ,gCAAgC,EACnb,KAAc,CACf,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,CAAC;YACH,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,cAAc,CAAC,0CAA0C,CAAC,CAAA;YACtE,CAAC;YAED,uEAAuE;YACvE,8FAA8F;YAC9F,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,KAGa,CAAA;YACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;YAE7C,qCAAqC;YACrC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,SAAS,CAAA;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,cAAc,CACtB,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAe,EAAE,CAAA;YAEjC,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC7D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBACvD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAChF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,UAAU,CAAA;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,cAAc,CACtB,wCAAyC,KAAe,CAAC,OAAO,EAAE,EAClE,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;CACF"}
package/dist/index.js CHANGED
@@ -6,12 +6,15 @@ import { startServer } from './server-main.js';
6
6
  // ============================================
7
7
  // Routing
8
8
  // ============================================
9
- const SUBCOMMANDS = new Set(['skills']);
9
+ const SUBCOMMANDS = new Set(['skills', 'ingest']);
10
10
  const args = process.argv.slice(2);
11
11
  const firstArg = args[0];
12
12
  if (firstArg && SUBCOMMANDS.has(firstArg)) {
13
13
  // CLI subcommand
14
- handleCli(args);
14
+ handleCli(args).catch((error) => {
15
+ console.error(error);
16
+ process.exit(1);
17
+ });
15
18
  }
16
19
  else {
17
20
  // Default: start MCP server
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,gCAAgC;AAChC,qDAAqD;AAErD,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,+CAA+C;AAC/C,UAAU;AACV,+CAA+C;AAE/C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAA;AAEvC,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;AAExB,IAAI,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;IAC1C,iBAAiB;IACjB,SAAS,CAAC,IAAI,CAAC,CAAA;AACjB,CAAC;KAAM,CAAC;IACN,4BAA4B;IAC5B,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACnD,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;QACxC,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,KAAK,CAAC,CAAA;QAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,WAAW,EAAE,CAAA;AACf,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,gCAAgC;AAChC,qDAAqD;AAErD,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAA;AACzC,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAE9C,+CAA+C;AAC/C,UAAU;AACV,+CAA+C;AAE/C,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAA;AAEjD,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAClC,MAAM,QAAQ,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;AAExB,IAAI,QAAQ,IAAI,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;IAC1C,iBAAiB;IACjB,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;QACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;AACJ,CAAC;KAAM,CAAC;IACN,4BAA4B;IAC5B,OAAO,CAAC,EAAE,CAAC,oBAAoB,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;QACnD,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAA;QACpE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,OAAO,CAAC,EAAE,CAAC,mBAAmB,EAAE,CAAC,KAAK,EAAE,EAAE;QACxC,OAAO,CAAC,KAAK,CAAC,qBAAqB,EAAE,KAAK,CAAC,CAAA;QAC3C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;IAEF,WAAW,EAAE,CAAA;AACf,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mcp-local-rag",
3
- "version": "0.8.2",
3
+ "version": "0.9.0",
4
4
  "description": "Local RAG MCP Server - Easy-to-setup document search with minimal configuration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: mcp-local-rag
3
- description: Provides score interpretation (< 0.3 good, > 0.5 skip), query optimization, and source naming for query_documents, ingest_file, ingest_data tools. Use this skill when working with RAG, searching documents, ingesting files, saving web content, or handling PDF, HTML, DOCX, TXT, Markdown.
3
+ description: Use this skill when ingesting or querying documents with MCP local RAG, including `query_documents`, `ingest_file`, `ingest_data`, and CLI bulk ingestion. Covers query refinement, result score interpretation, and source metadata conventions for PDF, HTML, DOCX, TXT, and Markdown. Not for general file operations or SQL/database queries.
4
4
  ---
5
5
 
6
6
  # MCP Local RAG Skills
@@ -13,6 +13,7 @@ description: Provides score interpretation (< 0.3 good, > 0.5 skip), query optim
13
13
  | `ingest_data` | Raw content (HTML, text) with source URL |
14
14
  | `query_documents` | Semantic + keyword hybrid search |
15
15
  | `delete_file` / `list_files` / `status` | Management |
16
+ | `npx mcp-local-rag ingest` | Multiple files or directory (shell) |
16
17
 
17
18
  ## Search: Core Rules
18
19
 
@@ -112,9 +113,34 @@ ingest_data({
112
113
 
113
114
  Re-ingest same source to update. Use same source in `delete_file` to remove.
114
115
 
116
+ ### CLI ingest
117
+
118
+ For multiple files or directory ingestion. Prefer over repeated `ingest_file` calls.
119
+
120
+ | Scenario | Use |
121
+ |----------|-----|
122
+ | Single file from user request | `ingest_file` |
123
+ | Multiple files or a directory | `npx mcp-local-rag ingest <path>` |
124
+ | Raw HTML/text content | `ingest_data` (CLI does not support stdin) |
125
+
126
+ ```bash
127
+ npx mcp-local-rag ingest [options] <path>
128
+ ```
129
+
130
+ - `<path>`: file or directory (recursively scans supported formats)
131
+ - Use `--help` for all options and defaults
132
+ - Options must match MCP server config — see [cli-ingest.md](references/cli-ingest.md)
133
+
134
+ **Output interpretation:**
135
+
136
+ - Exit code 0: all files succeeded
137
+ - Exit code 1: one or more files failed — report failed files to user
138
+ - `SKIPPED (0 chunks)`: file was empty or too short, counted as success
139
+
115
140
  ## References
116
141
 
117
142
  For edge cases and examples:
118
143
  - [html-ingestion.md](references/html-ingestion.md) - URL normalization, SPA handling
119
144
  - [query-optimization.md](references/query-optimization.md) - Query patterns by intent
120
145
  - [result-refinement.md](references/result-refinement.md) - Contradiction resolution, chunking
146
+ - [cli-ingest.md](references/cli-ingest.md) - CLI options, config matching
@@ -0,0 +1,45 @@
1
+ # CLI Ingest Reference
2
+
3
+ ## Command
4
+
5
+ ```bash
6
+ npx mcp-local-rag ingest [options] <path>
7
+ ```
8
+
9
+ ## Options
10
+
11
+ | Option | Env Var | Default | Description |
12
+ |--------|---------|---------|-------------|
13
+ | `--db-path <path>` | `DB_PATH` | `./lancedb/` | LanceDB database path |
14
+ | `--base-dir <path>` | `BASE_DIR` | cwd | Base directory for documents |
15
+ | `--cache-dir <path>` | `CACHE_DIR` | `./models/` | Model cache directory |
16
+ | `--model-name <name>` | `MODEL_NAME` | `Xenova/all-MiniLM-L6-v2` | Embedding model |
17
+ | `--max-file-size <n>` | `MAX_FILE_SIZE` | `104857600` | Max file size in bytes |
18
+ | `-h, --help` | — | — | Show usage with defaults |
19
+
20
+ Priority: CLI flags > environment variables > defaults.
21
+
22
+ ## Config Matching
23
+
24
+ When ingesting into an existing database, options **must match** the MCP server config — especially `--model-name`. Using a different embedding model against an existing database produces vectors in a different space, silently degrading search quality.
25
+
26
+ ## Output
27
+
28
+ Output goes to stderr. Summary block:
29
+
30
+ ```
31
+ --- Ingest Summary ---
32
+ Succeeded: 12
33
+ Failed: 1
34
+ Total chunks: 247
35
+ ```
36
+
37
+ - Exit code 0: all succeeded
38
+ - Exit code 1: one or more failed
39
+ - `SKIPPED (0 chunks)`: empty or too-short file, counted as success
40
+
41
+ ## Supported Formats
42
+
43
+ Same as MCP tools: `.pdf`, `.docx`, `.txt`, `.md`, `.html`
44
+
45
+ Directory mode recursively scans for these extensions, excluding `dbPath` and `cacheDir` paths.