mcp-local-rag 0.14.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +150 -10
  2. package/dist/cli/common.d.ts +37 -0
  3. package/dist/cli/common.d.ts.map +1 -1
  4. package/dist/cli/common.js +81 -1
  5. package/dist/cli/common.js.map +1 -1
  6. package/dist/cli/delete.js +3 -3
  7. package/dist/cli/delete.js.map +1 -1
  8. package/dist/cli/ingest.d.ts +38 -14
  9. package/dist/cli/ingest.d.ts.map +1 -1
  10. package/dist/cli/ingest.js +146 -74
  11. package/dist/cli/ingest.js.map +1 -1
  12. package/dist/cli/list.d.ts +6 -1
  13. package/dist/cli/list.d.ts.map +1 -1
  14. package/dist/cli/list.js +158 -34
  15. package/dist/cli/list.js.map +1 -1
  16. package/dist/cli/options.d.ts +22 -3
  17. package/dist/cli/options.d.ts.map +1 -1
  18. package/dist/cli/options.js +37 -32
  19. package/dist/cli/options.js.map +1 -1
  20. package/dist/cli/query.d.ts.map +1 -1
  21. package/dist/cli/query.js +2 -3
  22. package/dist/cli/query.js.map +1 -1
  23. package/dist/cli/read-neighbors.js +2 -2
  24. package/dist/cli/read-neighbors.js.map +1 -1
  25. package/dist/index.js +1 -1
  26. package/dist/index.js.map +1 -1
  27. package/dist/ingest/visual.d.ts +6 -4
  28. package/dist/ingest/visual.d.ts.map +1 -1
  29. package/dist/ingest/visual.js +18 -3
  30. package/dist/ingest/visual.js.map +1 -1
  31. package/dist/parser/index.d.ts +47 -10
  32. package/dist/parser/index.d.ts.map +1 -1
  33. package/dist/parser/index.js +70 -14
  34. package/dist/parser/index.js.map +1 -1
  35. package/dist/pdf-visual/captioner.d.ts +9 -9
  36. package/dist/pdf-visual/captioner.d.ts.map +1 -1
  37. package/dist/pdf-visual/captioner.js +48 -157
  38. package/dist/pdf-visual/captioner.js.map +1 -1
  39. package/dist/pdf-visual/captioners/fast.d.ts +7 -0
  40. package/dist/pdf-visual/captioners/fast.d.ts.map +1 -0
  41. package/dist/pdf-visual/captioners/fast.js +129 -0
  42. package/dist/pdf-visual/captioners/fast.js.map +1 -0
  43. package/dist/pdf-visual/captioners/quality.d.ts +7 -0
  44. package/dist/pdf-visual/captioners/quality.d.ts.map +1 -0
  45. package/dist/pdf-visual/captioners/quality.js +150 -0
  46. package/dist/pdf-visual/captioners/quality.js.map +1 -0
  47. package/dist/pdf-visual/captioners/shared.d.ts +11 -0
  48. package/dist/pdf-visual/captioners/shared.d.ts.map +1 -0
  49. package/dist/pdf-visual/captioners/shared.js +40 -0
  50. package/dist/pdf-visual/captioners/shared.js.map +1 -0
  51. package/dist/pdf-visual/index.d.ts +24 -10
  52. package/dist/pdf-visual/index.d.ts.map +1 -1
  53. package/dist/pdf-visual/index.js +27 -29
  54. package/dist/pdf-visual/index.js.map +1 -1
  55. package/dist/pdf-visual/types.d.ts +14 -9
  56. package/dist/pdf-visual/types.d.ts.map +1 -1
  57. package/dist/server/error-utils.d.ts +40 -0
  58. package/dist/server/error-utils.d.ts.map +1 -1
  59. package/dist/server/error-utils.js +66 -0
  60. package/dist/server/error-utils.js.map +1 -1
  61. package/dist/server/index.d.ts +92 -37
  62. package/dist/server/index.d.ts.map +1 -1
  63. package/dist/server/index.js +296 -75
  64. package/dist/server/index.js.map +1 -1
  65. package/dist/server/tool-definitions.d.ts.map +1 -1
  66. package/dist/server/tool-definitions.js +6 -0
  67. package/dist/server/tool-definitions.js.map +1 -1
  68. package/dist/server/types.d.ts +63 -8
  69. package/dist/server/types.d.ts.map +1 -1
  70. package/dist/server-main.d.ts.map +1 -1
  71. package/dist/server-main.js +91 -10
  72. package/dist/server-main.js.map +1 -1
  73. package/dist/utils/base-dirs.d.ts +203 -0
  74. package/dist/utils/base-dirs.d.ts.map +1 -0
  75. package/dist/utils/base-dirs.js +407 -0
  76. package/dist/utils/base-dirs.js.map +1 -0
  77. package/dist/utils/raw-data-utils.d.ts +15 -5
  78. package/dist/utils/raw-data-utils.d.ts.map +1 -1
  79. package/dist/utils/raw-data-utils.js +39 -8
  80. package/dist/utils/raw-data-utils.js.map +1 -1
  81. package/dist/utils/sensitive-path.d.ts +23 -0
  82. package/dist/utils/sensitive-path.d.ts.map +1 -0
  83. package/dist/utils/sensitive-path.js +91 -0
  84. package/dist/utils/sensitive-path.js.map +1 -0
  85. package/package.json +7 -7
  86. package/skills/mcp-local-rag/SKILL.md +77 -17
  87. package/skills/mcp-local-rag/references/cli-reference.md +18 -6
package/README.md CHANGED
@@ -33,7 +33,7 @@ Semantic search with keyword boost for exact technical terms — fully private,
33
33
 
34
34
  ## Quick Start
35
35
 
36
- Set `BASE_DIR` to the folder you want to search. Documents must live under it.
36
+ Set `BASE_DIR` to the folder you want to search (or `BASE_DIRS` for multiple roots — see [Configuration](#configuration)). Documents must live under one of the configured roots.
37
37
 
38
38
  Add the MCP server to your AI coding tool:
39
39
 
@@ -121,7 +121,7 @@ Re-ingesting the same file replaces the old version automatically.
121
121
 
122
122
  ##### Ingesting PDFs with figures (visual mode)
123
123
 
124
- PDFs with charts, tables, or diagrams can optionally add local VLM-generated captions to the related text chunks, giving visual content some searchable representation in the same vector + FTS pipeline.
124
+ PDFs with charts, tables, or diagrams can optionally add local VLM-generated captions to the document index, giving visual content some searchable representation in the same vector + FTS pipeline. Captions are auxiliary text — not image search, not OCR, and not a faithful transcription of the figure.
125
125
 
126
126
  **Via MCP**:
127
127
  ```
@@ -133,9 +133,34 @@ PDFs with charts, tables, or diagrams can optionally add local VLM-generated cap
133
133
  npx mcp-local-rag ingest ./docs/spec.pdf --visual
134
134
  ```
135
135
 
136
- Captions are inlined into the corresponding page's chunks as `[Visual content on page N: …]`. They flow through the existing chunker and embedder unchanged — no schema differences, no separate index.
136
+ Each caption is emitted as its own chunk with the envelope `[Visual content on page N: …]`, alongside the page-body chunks. It flows through the existing embedder and FTS index — no schema differences, no separate index.
137
137
 
138
- Visual mode is opt-in; normal ingest does not load the VLM. When visual mode is enabled, the VLM (`HuggingFaceTB/SmolVLM-256M-Instruct`) is cached under `CACHE_DIR` (default: `./models/`). The first download is hundreds of MB. The model identifier and quantization variant are fixed in this release; per-page VLM failures are tolerated — that page proceeds with text only.
138
+ Visual mode is opt-in; normal ingest does not load the VLM. Per-page VLM failures are tolerated — that page proceeds with text only.
139
+
140
+ ###### Choosing a visual-quality profile
141
+
142
+ Visual mode offers two profiles, selected per ingest call:
143
+
144
+ | Profile | Model | Disk (cache) | Per-page inference | Suited for |
145
+ |---|---|---|---|---|
146
+ | `fast` (default) | `HuggingFaceTB/SmolVLM-256M-Instruct` | ~250 MB | baseline | Light visual indexing, quick first-run setup. |
147
+ | `quality` | `onnx-community/Qwen2.5-VL-3B-Instruct-ONNX` | ~2.9 GB | ~2× `fast` | Figures with in-image text (axis labels, panel sub-labels, annotations) where caption fidelity matters more than inference time. |
148
+
149
+ The numbers above are measured on CPU during development on the project's probe PDFs; they may shift with model updates or differ on your hardware.
150
+
151
+ **Via MCP** — `ingest_file` accepts an optional `visualQuality` parameter (enum: `'fast' | 'quality'`, default `'fast'`; ignored when `visual` is false):
152
+ ```
153
+ "Ingest /Users/me/docs/research-paper.pdf with visual: true and visualQuality: 'quality'"
154
+ ```
155
+
156
+ **Via CLI** — `--visual-quality fast|quality` (default `fast`; silently ignored when `--visual` is absent):
157
+ ```bash
158
+ npx mcp-local-rag ingest ./docs/research-paper.pdf --visual --visual-quality quality
159
+ ```
160
+
161
+ Profile model identifiers and quantization variants are fixed per release. Both profiles share the same `CACHE_DIR` (default: `./models/`); the first run on each profile downloads its model.
162
+
163
+ > **Behavior change from v0.14.0**: Captions are now emitted as dedicated chunks rather than appended to the page text before chunking. As a side effect, `metadata.fileSize` for visual ingests no longer includes the caption character count — it measures the post-extraction body length only. The underlying PDF is unchanged; only the reported `fileSize` for visual-ingested PDFs may shrink across the release boundary.
139
164
 
140
165
  > **Security note**: Visual captions are derived from PDF contents and may inherit attacker-controlled text. Downstream LLM consumers should treat retrieved chunks as untrusted data, not as instructions. The `[Visual content on page N: …]` envelope helps consumers distinguish caption text from prose.
141
166
 
@@ -181,7 +206,7 @@ Pass the `filePath` and `chunkIndex` from the search result. The response includ
181
206
  #### Managing Files
182
207
 
183
208
  ```
184
- "List all files in BASE_DIR and their ingested status" # See what's indexed
209
+ "List all files in configured base directories and their ingested status" # See what's indexed
185
210
  "Delete old-spec.pdf from RAG" # Remove a file
186
211
  "Show RAG server status" # Check system health
187
212
  ```
@@ -212,6 +237,15 @@ npx mcp-local-rag delete --source "https://..." # Remove by source URL
212
237
  npx mcp-local-rag --db-path ./my-db query "auth" --base-dir ./docs
213
238
  ```
214
239
 
240
+ The `--base-dir` flag is repeatable on `ingest` and `list`; pass it once per root:
241
+
242
+ ```bash
243
+ npx mcp-local-rag ingest --base-dir ./docs --base-dir ./specs ./docs/readme.md
244
+ npx mcp-local-rag list --base-dir ./docs --base-dir ./specs
245
+ ```
246
+
247
+ The positional path to `ingest` must sit inside one of the configured roots. When at least one `--base-dir` is supplied, CLI roots replace any env-var roots (no merge).
248
+
215
249
  **Environment variables** — set in your shell:
216
250
 
217
251
  ```bash
@@ -220,6 +254,13 @@ export BASE_DIR=./docs
220
254
  npx mcp-local-rag query "auth"
221
255
  ```
222
256
 
257
+ For multiple roots, use `BASE_DIRS` (JSON array of non-empty path strings):
258
+
259
+ ```bash
260
+ export BASE_DIRS='["/Users/me/Documents/work","/Users/me/Projects/specs"]'
261
+ npx mcp-local-rag list
262
+ ```
263
+
223
264
  **Sharing config between MCP and CLI** — if your MCP client inherits shell environment variables, you can set them in your shell profile (e.g., `~/.zshrc`) so both use the same values. Otherwise, set them explicitly in your MCP config as well.
224
265
 
225
266
  ```bash
@@ -334,7 +375,8 @@ The MCP server is configured by environment variables only — pass them through
334
375
 
335
376
  | Environment Variable | CLI Flag | Default | Description |
336
377
  |---------------------|----------|---------|-------------|
337
- | `BASE_DIR` | `--base-dir` | Current directory | Document root directory (security boundary) |
378
+ | `BASE_DIR` | `--base-dir` (repeatable) | Current directory | Single document root directory (security boundary). See [Document Roots](#document-roots-base_dir-and-base_dirs) for multi-root setup. |
379
+ | `BASE_DIRS` | — | (unset) | JSON array of document roots (security boundary). Takes precedence over `BASE_DIR`. See [Document Roots](#document-roots-base_dir-and-base_dirs). |
338
380
  | `DB_PATH` | `--db-path` | `./lancedb/` | Vector database location |
339
381
  | `CACHE_DIR` | `--cache-dir` | `./models/` | Model cache directory |
340
382
  | `MODEL_NAME` | `--model-name` | `Xenova/all-MiniLM-L6-v2` | HuggingFace model ID ([available models](https://huggingface.co/models?library=transformers.js&pipeline_tag=feature-extraction)) |
@@ -349,6 +391,89 @@ The MCP server is configured by environment variables only — pass them through
349
391
 
350
392
  ⚠️ Changing `MODEL_NAME` changes embedding dimensions. Delete `DB_PATH` and re-ingest after switching models.
351
393
 
394
+ ### Document Roots (`BASE_DIR` and `BASE_DIRS`)
395
+
396
+ mcp-local-rag enforces a security boundary: only files under a configured root are accessible to ingest, list, delete, or read-neighbor operations.
397
+
398
+ **Single root** — use `BASE_DIR`:
399
+
400
+ ```bash
401
+ export BASE_DIR=/Users/me/Documents/work
402
+ ```
403
+
404
+ **Multiple roots** — use `BASE_DIRS` with a JSON array:
405
+
406
+ ```bash
407
+ export BASE_DIRS='["/Users/me/Documents/work","/Users/me/Projects/specs"]'
408
+ ```
409
+
410
+ Only JSON-array syntax is supported. Delimiter syntax such as `BASE_DIRS=/a:/b` is intentionally **not** supported (avoids ambiguity with spaces, colons, commas, and Windows paths).
411
+
412
+ **Resolution order** (highest precedence first):
413
+
414
+ 1. CLI `--base-dir <path>` flags (repeatable on `ingest` and `list`)
415
+ 2. `BASE_DIRS` environment variable
416
+ 3. `BASE_DIR` environment variable
417
+ 4. `process.cwd()` (current working directory)
418
+
419
+ CLI roots **replace** env roots — they are never merged. `BASE_DIRS` and `BASE_DIR` are never merged either: `BASE_DIRS` wins when both are set.
420
+
421
+ **Precedence warning** — when `BASE_DIRS` and `BASE_DIR` are both set (and no CLI `--base-dir` is supplied), `BASE_DIR` is ignored and a warning is surfaced. The warning is visible:
422
+
423
+ - In MCP tool responses (as an additional content block, on every tool — including `status`, `query_documents`, `ingest_file`, `ingest_data`, `list_files`, `delete_file`, `read_chunk_neighbors`).
424
+ - On CLI `stderr`.
425
+
426
+ Unset `BASE_DIR` (or remove `BASE_DIRS`) to silence the warning.
427
+
428
+ **Nested-root pruning** — if one configured root sits inside another after realpath resolution, the nested child is dropped to avoid duplicate scan results. A pruning warning is surfaced the same way as the precedence warning. The surviving parent root still defines the security boundary.
429
+
430
+ **Invalid `BASE_DIRS`** — when `BASE_DIRS` is not a valid JSON array of non-empty strings (malformed JSON, empty array, non-string elements, ...), root-dependent MCP tools return a structured error and CLI subcommands exit non-zero. There is **no silent fallback** to `BASE_DIR` or `cwd`. The MCP `status` tool remains callable so you can diagnose the config error through your MCP client.
431
+
432
+ **MCP client examples** — multi-root setup:
433
+
434
+ Cursor (`~/.cursor/mcp.json`):
435
+ ```json
436
+ {
437
+ "mcpServers": {
438
+ "local-rag": {
439
+ "command": "npx",
440
+ "args": ["-y", "mcp-local-rag"],
441
+ "env": {
442
+ "BASE_DIRS": "[\"/Users/me/Documents/work\",\"/Users/me/Projects/specs\"]"
443
+ }
444
+ }
445
+ }
446
+ }
447
+ ```
448
+
449
+ Codex (`~/.codex/config.toml`):
450
+ ```toml
451
+ [mcp_servers.local-rag]
452
+ command = "npx"
453
+ args = ["-y", "mcp-local-rag"]
454
+
455
+ [mcp_servers.local-rag.env]
456
+ BASE_DIRS = "[\"/Users/me/Documents/work\",\"/Users/me/Projects/specs\"]"
457
+ ```
458
+
459
+ Claude Code:
460
+ ```bash
461
+ claude mcp add local-rag --scope user \
462
+ --env BASE_DIRS='["/Users/me/Documents/work","/Users/me/Projects/specs"]' \
463
+ -- npx -y mcp-local-rag
464
+ ```
465
+
466
+ **CLI examples** — multi-root invocations:
467
+
468
+ ```bash
469
+ # Repeatable --base-dir
470
+ npx mcp-local-rag ingest --base-dir /Users/me/work --base-dir /Users/me/specs /Users/me/work/readme.md
471
+ npx mcp-local-rag list --base-dir /Users/me/work --base-dir /Users/me/specs
472
+
473
+ # Or via BASE_DIRS env
474
+ BASE_DIRS='["/Users/me/work","/Users/me/specs"]' npx mcp-local-rag list
475
+ ```
476
+
352
477
  ### Client-Specific Setup
353
478
 
354
479
  **Cursor** — Global: `~/.cursor/mcp.json`, Project: `.cursor/mcp.json`
@@ -392,9 +517,13 @@ The embedding model (~90MB) downloads on first use. Takes 1-2 minutes, then work
392
517
 
393
518
  ### Security
394
519
 
395
- - **Path restriction**: Only files within `BASE_DIR` are accessible
520
+ - **Path restriction**: Only files within a configured root (`BASE_DIR` or any `BASE_DIRS` / `--base-dir` entry) are accessible. Symlinks resolving outside all configured roots, and sibling-prefix paths (e.g. `/foo/barista` for root `/foo/bar`), are rejected.
396
521
  - **Local only**: No network requests after model download
397
- - **Model source**: Official HuggingFace repository ([verify here](https://huggingface.co/Xenova/all-MiniLM-L6-v2))
522
+ - **Model sources** (all official HuggingFace repositories):
523
+ - Embedder: [`Xenova/all-MiniLM-L6-v2`](https://huggingface.co/Xenova/all-MiniLM-L6-v2)
524
+ - Visual `fast` profile: [`HuggingFaceTB/SmolVLM-256M-Instruct`](https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct)
525
+ - Visual `quality` profile: [`onnx-community/Qwen2.5-VL-3B-Instruct-ONNX`](https://huggingface.co/onnx-community/Qwen2.5-VL-3B-Instruct-ONNX)
526
+ - **Visual caption fidelity**: The `quality` profile reproduces in-image text more faithfully than `fast`. Both profiles output captions wrapped as `[Visual content on page N: …]`, but a faithful reproduction means attacker-controlled in-image text — including characters like `]` that visually close the envelope — can appear verbatim in retrieved chunks. Downstream LLM consumers should treat retrieved chunks as untrusted data, not as instructions, regardless of envelope shape.
398
527
 
399
528
  <details>
400
529
  <summary><strong>Performance</strong></summary>
@@ -436,7 +565,18 @@ Check chunk count with `status`. Large documents with many chunks may slow queri
436
565
 
437
566
  ### "Path outside BASE_DIR"
438
567
 
439
- Ensure file paths are within `BASE_DIR`. Use absolute paths.
568
+ Ensure file paths are within one of the configured roots (`BASE_DIR`, any `BASE_DIRS` entry, or any CLI `--base-dir`). Use absolute paths.
569
+
570
+ ### "BASE_DIRS must be a JSON array..."
571
+
572
+ `BASE_DIRS` accepts only a JSON array of one or more non-empty path strings. Examples:
573
+
574
+ - Valid: `BASE_DIRS='["/Users/me/work","/Users/me/specs"]'`
575
+ - Invalid: `BASE_DIRS=/a:/b` (delimiter syntax not supported)
576
+ - Invalid: `BASE_DIRS='[]'` (empty array)
577
+ - Invalid: `BASE_DIRS='["",""]'` (empty string element)
578
+
579
+ When invalid, root-dependent operations fail with a clear error rather than silently falling back. The MCP `status` tool remains callable so you can inspect the diagnostic.
440
580
 
441
581
  ### MCP client doesn't see tools
442
582
 
@@ -453,7 +593,7 @@ Ensure file paths are within `BASE_DIR`. Use absolute paths.
453
593
  Yes. After model download, nothing leaves your machine. Verify with network monitoring.
454
594
 
455
595
  **Can I use this offline?**
456
- Yes, after the required models are cached locally. Text ingest/search needs the embedding model. PDF visual mode is opt-in and also needs the VLM model on first use; the download is hundreds of MB for the default `q4` variant and is cached under `CACHE_DIR` (default: `./models/`).
596
+ Yes, after the required models are cached locally. Text ingest/search needs the embedding model. PDF visual mode is opt-in and also needs the VLM model on first use; the download is ~250 MB for the default `fast` profile (SmolVLM-256M) or ~2.9 GB for the `quality` profile (Qwen2.5-VL-3B), cached under `CACHE_DIR` (default: `./models/`).
457
597
 
458
598
  **How does this compare to cloud RAG?**
459
599
  Cloud services offer better accuracy at scale but require sending data externally. This trades some accuracy for complete privacy and zero runtime cost.
@@ -1,4 +1,5 @@
1
1
  import { Embedder } from '../embedder/index.js';
2
+ import { type BaseDirsConfig, type BaseDirsConfigWarning } from '../utils/base-dirs.js';
2
3
  import { VectorStore } from '../vectordb/index.js';
3
4
  import { type ResolvedGlobalConfig } from './options.js';
4
5
  /**
@@ -11,4 +12,40 @@ export declare function createVectorStore(config: ResolvedGlobalConfig): VectorS
11
12
  * Callers are responsible for managing the Embedder lifecycle.
12
13
  */
13
14
  export declare function createEmbedder(config: ResolvedGlobalConfig): Embedder;
15
+ /**
16
+ * Result of {@link resolveCliBaseDirsOrExit}. Resolution warnings travel with
17
+ * the config so subcommands can render them per their own UI contract (CLI
18
+ * subcommands generally write them to stderr).
19
+ */
20
+ export interface CliBaseDirsResolution {
21
+ config: BaseDirsConfig;
22
+ warnings: BaseDirsConfigWarning[];
23
+ }
24
+ /**
25
+ * Resolve effective base directories for a CLI subcommand using the shared
26
+ * resolver, surfacing any configuration error as a process-level failure.
27
+ *
28
+ * Inputs (single source of truth for CLI precedence — kept here so per-
29
+ * subcommand entry points don't each replicate the env-fallback chain):
30
+ * - `cliRoots`: repeated `--base-dir` flag values in CLI order. When non-
31
+ * empty, REPLACES env roots — no merge.
32
+ * - `process.env['BASE_DIRS']`: JSON array, used only when CLI roots are
33
+ * absent.
34
+ * - `process.env['BASE_DIR']`: single path, used only when CLI roots and
35
+ * `BASE_DIRS` are absent.
36
+ * - `process.cwd()`: final fallback.
37
+ *
38
+ * Failure mode: a `BaseDirsConfigError` (invalid `BASE_DIRS` JSON, missing
39
+ * directory, not-a-directory, ...) is reported to stderr and exits with
40
+ * code 1. This is intentional: the resolver explicitly does NOT fall back
41
+ * (see §Technical Decisions → Resolution order in the multi-base-dirs
42
+ * plan), so CLI consumers should fail fast rather than silently degrading
43
+ * to `cwd`.
44
+ *
45
+ * Warnings (`base-dirs-overrides-base-dir`, `nested-root-pruned`) are
46
+ * returned to the caller rather than written here, so each subcommand can
47
+ * decide its own rendering (JSON-output subcommands like `list` may need
48
+ * to keep stderr clean even when warnings are present).
49
+ */
50
+ export declare function resolveCliBaseDirsOrExit(cliRoots: string[]): Promise<CliBaseDirsResolution>;
14
51
  //# sourceMappingURL=common.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,OAAO,EAAE,KAAK,oBAAoB,EAAiB,MAAM,cAAc,CAAA;AAEvE;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,oBAAoB,GAAG,WAAW,CAK3E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,oBAAoB,GAAG,QAAQ,CAOrE"}
1
+ {"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EACL,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAG3B,MAAM,uBAAuB,CAAA;AAE9B,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,OAAO,EAAE,KAAK,oBAAoB,EAA+B,MAAM,cAAc,CAAA;AAErF;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,oBAAoB,GAAG,WAAW,CAK3E;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,oBAAoB,GAAG,QAAQ,CAOrE;AAED;;;;GAIG;AACH,MAAM,WAAW,qBAAqB;IACpC,MAAM,EAAE,cAAc,CAAA;IACtB,QAAQ,EAAE,qBAAqB,EAAE,CAAA;CAClC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,wBAAsB,wBAAwB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAqDjG"}
@@ -1,7 +1,10 @@
1
1
  // Shared CLI component helpers — factory functions for VectorStore and Embedder
2
+ // plus base-directory resolution shared by every subcommand that scans files.
2
3
  import { Embedder } from '../embedder/index.js';
4
+ import { parseBaseDirsEnv, resolveBaseDirs, } from '../utils/base-dirs.js';
5
+ import { checkSensitivePath } from '../utils/sensitive-path.js';
3
6
  import { VectorStore } from '../vectordb/index.js';
4
- import { resolveDevice } from './options.js';
7
+ import { resolveDevice, validatePath } from './options.js';
5
8
  /**
6
9
  * Create an uninitialized VectorStore from resolved global config.
7
10
  * Callers are responsible for calling initialize() before use.
@@ -24,4 +27,81 @@ export function createEmbedder(config) {
24
27
  device: resolveDevice(process.env['RAG_DEVICE']),
25
28
  });
26
29
  }
30
+ /**
31
+ * Resolve effective base directories for a CLI subcommand using the shared
32
+ * resolver, surfacing any configuration error as a process-level failure.
33
+ *
34
+ * Inputs (single source of truth for CLI precedence — kept here so per-
35
+ * subcommand entry points don't each replicate the env-fallback chain):
36
+ * - `cliRoots`: repeated `--base-dir` flag values in CLI order. When non-
37
+ * empty, REPLACES env roots — no merge.
38
+ * - `process.env['BASE_DIRS']`: JSON array, used only when CLI roots are
39
+ * absent.
40
+ * - `process.env['BASE_DIR']`: single path, used only when CLI roots and
41
+ * `BASE_DIRS` are absent.
42
+ * - `process.cwd()`: final fallback.
43
+ *
44
+ * Failure mode: a `BaseDirsConfigError` (invalid `BASE_DIRS` JSON, missing
45
+ * directory, not-a-directory, ...) is reported to stderr and exits with
46
+ * code 1. This is intentional: the resolver explicitly does NOT fall back
47
+ * (see §Technical Decisions → Resolution order in the multi-base-dirs
48
+ * plan), so CLI consumers should fail fast rather than silently degrading
49
+ * to `cwd`.
50
+ *
51
+ * Warnings (`base-dirs-overrides-base-dir`, `nested-root-pruned`) are
52
+ * returned to the caller rather than written here, so each subcommand can
53
+ * decide its own rendering (JSON-output subcommands like `list` may need
54
+ * to keep stderr clean even when warnings are present).
55
+ */
56
+ export async function resolveCliBaseDirsOrExit(cliRoots) {
57
+ // Screen the raw env-supplied paths before the resolver realpath-
58
+ // normalizes them, so a literal `BASE_DIR=/etc` is rejected with the
59
+ // env var as the attribution surface.
60
+ if (cliRoots.length === 0) {
61
+ if (process.env['BASE_DIRS'] !== undefined && process.env['BASE_DIRS'].length > 0) {
62
+ const parsed = parseBaseDirsEnv(process.env['BASE_DIRS']);
63
+ if (parsed.ok) {
64
+ for (const raw of parsed.value) {
65
+ const sensitive = checkSensitivePath(raw, 'BASE_DIRS');
66
+ if (sensitive) {
67
+ console.error(sensitive);
68
+ process.exit(1);
69
+ }
70
+ }
71
+ }
72
+ // Malformed BASE_DIRS surfaces below via resolveBaseDirs.
73
+ }
74
+ else if (process.env['BASE_DIR'] !== undefined && process.env['BASE_DIR'].trim().length > 0) {
75
+ const sensitive = checkSensitivePath(process.env['BASE_DIR'], 'BASE_DIR');
76
+ if (sensitive) {
77
+ console.error(sensitive);
78
+ process.exit(1);
79
+ }
80
+ }
81
+ }
82
+ const result = await resolveBaseDirs({
83
+ cliRoots,
84
+ envBaseDirs: process.env['BASE_DIRS'],
85
+ envBaseDir: process.env['BASE_DIR'],
86
+ cwd: process.cwd(),
87
+ });
88
+ if (!result.ok) {
89
+ console.error(result.error.message);
90
+ process.exit(1);
91
+ }
92
+ // Apply the sensitive-path policy uniformly to every effective root
93
+ // (CLI, env, or cwd). Pre-multi-root code validated `BASE_DIR` here; the
94
+ // same policy must continue to apply to `BASE_DIRS` entries and to CLI
95
+ // roots that pre-validation in the subcommand may have missed (e.g.
96
+ // realpath-resolved targets of symlinks). Reported under `--base-dir`
97
+ // because that is the flag the user most directly controls.
98
+ for (const root of result.config.baseDirs) {
99
+ const sensitive = validatePath(root, '--base-dir');
100
+ if (sensitive) {
101
+ console.error(sensitive);
102
+ process.exit(1);
103
+ }
104
+ }
105
+ return { config: result.config, warnings: result.warnings };
106
+ }
27
107
  //# sourceMappingURL=common.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"common.js","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAAA,gFAAgF;AAEhF,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,OAAO,EAA6B,aAAa,EAAE,MAAM,cAAc,CAAA;AAEvE;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAA4B;IAC5D,OAAO,IAAI,WAAW,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAA;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAA4B;IACzD,OAAO,IAAI,QAAQ,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACjD,CAAC,CAAA;AACJ,CAAC"}
1
+ {"version":3,"file":"common.js","sourceRoot":"","sources":["../../src/cli/common.ts"],"names":[],"mappings":"AAAA,gFAAgF;AAChF,8EAA8E;AAE9E,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAC/C,OAAO,EAGL,gBAAgB,EAChB,eAAe,GAChB,MAAM,uBAAuB,CAAA;AAC9B,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AAC/D,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAClD,OAAO,EAA6B,aAAa,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAErF;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,MAA4B;IAC5D,OAAO,IAAI,WAAW,CAAC;QACrB,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,QAAQ;KACpB,CAAC,CAAA;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,MAA4B;IACzD,OAAO,IAAI,QAAQ,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,MAAM,CAAC,QAAQ;QACzB,MAAM,EAAE,aAAa,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACjD,CAAC,CAAA;AACJ,CAAC;AAYD;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,QAAkB;IAC/D,kEAAkE;IAClE,qEAAqE;IACrE,sCAAsC;IACtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAA;YACzD,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;gBACd,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;oBAC/B,MAAM,SAAS,GAAG,kBAAkB,CAAC,GAAG,EAAE,WAAW,CAAC,CAAA;oBACtD,IAAI,SAAS,EAAE,CAAC;wBACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;wBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;oBACjB,CAAC;gBACH,CAAC;YACH,CAAC;YACD,0DAA0D;QAC5D,CAAC;aAAM,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9F,MAAM,SAAS,GAAG,kBAAkB,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,UAAU,CAAC,CAAA;YACzE,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,eAAe,CAAC;QACnC,QAAQ;QACR,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC;QACrC,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;QACnC,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;KACnB,CAAC,CAAA;IAEF,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAA;QACnC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,oEAAoE;IACpE,yEAAyE;IACzE,uEAAuE;IACvE,oEAAoE;IACpE,sEAAsE;IACtE,4DAA4D;IAC5D,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;QAC1C,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,EAAE,YAAY,CAAC,CAAA;QAClD,IAAI,SAAS,EAAE,CAAC;YACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAA;AAC7D,CAAC"}
@@ -1,7 +1,7 @@
1
1
  // CLI delete subcommand — delete ingested content by file path or source URL
2
2
  import { unlink } from 'node:fs/promises';
3
3
  import { resolve } from 'node:path';
4
- import { generateMetaJsonPath, generateRawDataPath, isRawDataPath, } from '../utils/raw-data-utils.js';
4
+ import { generateMetaJsonPath, generateRawDataPath, isPathInRawDataDirLexical, } from '../utils/raw-data-utils.js';
5
5
  import { createVectorStore } from './common.js';
6
6
  import { resolveGlobalConfig, validatePath } from './options.js';
7
7
  // ============================================
@@ -120,8 +120,8 @@ export async function runDelete(args, globalOptions = {}) {
120
120
  }
121
121
  // Delete chunks from VectorStore
122
122
  await vectorStore.deleteChunks(targetPath);
123
- // Clean up physical raw-data files if applicable
124
- if (isRawDataPath(targetPath)) {
123
+ // Clean up physical raw-data files if applicable.
124
+ if (isPathInRawDataDirLexical(targetPath, globalConfig.dbPath)) {
125
125
  try {
126
126
  await unlink(targetPath);
127
127
  }
@@ -1 +1 @@
1
- {"version":3,"file":"delete.js","sourceRoot":"","sources":["../../src/cli/delete.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAE7E,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAA;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,GACd,MAAM,4BAA4B,CAAA;AACnC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAEhE,+CAA+C;AAC/C,OAAO;AACP,+CAA+C;AAE/C,MAAM,SAAS,GAAG;;;;;;;;;;;;;;;;yCAgBuB,CAAA;AAYzC;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAc;IAC/B,IAAI,IAAI,GAAG,KAAK,CAAA;IAChB,IAAI,MAA0B,CAAA;IAC9B,IAAI,QAA4B,CAAA;IAEhC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QAEpB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YACrC,IAAI,GAAG,IAAI,CAAA;YACX,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;YACvB,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACjD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;gBAC3C,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAM,GAAG,KAAK,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;YACvC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,QAAQ,GAAG,GAAG,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAe,EAAE,IAAI,EAAE,CAAA;IACnC,IAAI,MAAM,KAAK,SAAS;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAA;IAChD,IAAI,QAAQ,KAAK,SAAS;QAAE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAA;IACtD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc,EAAE,gBAA+B,EAAE;IAC/E,oBAAoB;IACpB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAE9B,gBAAgB;IAChB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,4DAA4D;IAC5D,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACvC,OAAO,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAA;QAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAA;QAC7D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,wBAAwB;IACxB,MAAM,YAAY,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAA;IAEvD,IAAI,CAAC;QACH,oEAAoE;QACpE,MAAM,WAAW,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAA;QACnD,MAAM,WAAW,CAAC,UAAU,EAAE,CAAA;QAE9B,6BAA6B;QAC7B,IAAI,UAAkB,CAAA;QAEtB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,yCAAyC;YACzC,UAAU,GAAG,mBAAmB,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;QAClF,CAAC;aAAM,CAAC;YACN,8CAA8C;YAC9C,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,QAAS,CAAC,CAAA;YAEtC,sDAAsD;YACtD,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;YACzD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;gBACpB,OAAM;YACR,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,MAAM,WAAW,CAAC,YAAY,CAAC,UAAU,CAAC,CAAA;QAE1C,iDAAiD;QACjD,IAAI,aAAa,CAAC,UAAU,CAAC,EAAE,CAAC;YAC9B,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,CAAC,CAAA;YAC1B,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,uDAAuD;gBACvD,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,oBAAoB,CAAC,UAAU,CAAC,CAAC,CAAA;YAChD,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,gBAAgB;gBAChB,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,CAAC,QAAQ,EAAE,CAAA;QAE5B,+BAA+B;QAC/B,MAAM,MAAM,GAAG;YACb,QAAQ,EAAE,UAAU;YACpB,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAA;QACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAA;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACrE,OAAO,CAAC,KAAK,CAAC,UAAU,MAAM,EAAE,CAAC,CAAA;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"delete.js","sourceRoot":"","sources":["../../src/cli/delete.ts"],"names":[],"mappings":"AAAA,6EAA6E;AAE7E,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAA;AACzC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EACL,oBAAoB,EACpB,mBAAmB,EACnB,yBAAyB,GAC1B,MAAM,4BAA4B,CAAA;AACnC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,mBAAmB,EAAE,YAAY,EAAE,MAAM,cAAc,CAAA;AAEhE,+CAA+C;AAC/C,OAAO;AACP,+CAA+C;AAE/C,MAAM,SAAS,GAAG;;;;;;;;;;;;;;;;yCAgBuB,CAAA;AAYzC;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAc;IAC/B,IAAI,IAAI,GAAG,KAAK,CAAA;IAChB,IAAI,MAA0B,CAAA;IAC9B,IAAI,QAA4B,CAAA;IAEhC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QAEpB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,QAAQ,EAAE,CAAC;YACrC,IAAI,GAAG,IAAI,CAAA;YACX,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,CAAC,CAAA;YACvB,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACjD,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;gBAC3C,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YACjB,CAAC;YACD,MAAM,GAAG,KAAK,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;aAAM,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAA;YACvC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;YACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACjB,CAAC;aAAM,CAAC;YACN,iCAAiC;YACjC,QAAQ,GAAG,GAAG,CAAA;YACd,CAAC,EAAE,CAAA;QACL,CAAC;IACH,CAAC;IAED,MAAM,MAAM,GAAe,EAAE,IAAI,EAAE,CAAA;IACnC,IAAI,MAAM,KAAK,SAAS;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAA;IAChD,IAAI,QAAQ,KAAK,SAAS;QAAE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAA;IACtD,OAAO,MAAM,CAAA;AACf,CAAC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,IAAc,EAAE,gBAA+B,EAAE;IAC/E,oBAAoB;IACpB,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;IAE9B,gBAAgB;IAChB,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,4DAA4D;IAC5D,IAAI,CAAC,MAAM,CAAC,QAAQ,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;QACvC,OAAO,CAAC,KAAK,CAAC,4CAA4C,CAAC,CAAA;QAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAA;QAC7D,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IAED,wBAAwB;IACxB,MAAM,YAAY,GAAG,mBAAmB,CAAC,aAAa,CAAC,CAAA;IAEvD,IAAI,CAAC;QACH,oEAAoE;QACpE,MAAM,WAAW,GAAG,iBAAiB,CAAC,YAAY,CAAC,CAAA;QACnD,MAAM,WAAW,CAAC,UAAU,EAAE,CAAA;QAE9B,6BAA6B;QAC7B,IAAI,UAAkB,CAAA;QAEtB,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAClB,yCAAyC;YACzC,UAAU,GAAG,mBAAmB,CAAC,YAAY,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,CAAA;QAClF,CAAC;aAAM,CAAC;YACN,8CAA8C;YAC9C,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,QAAS,CAAC,CAAA;YAEtC,sDAAsD;YACtD,MAAM,SAAS,GAAG,YAAY,CAAC,UAAU,EAAE,aAAa,CAAC,CAAA;YACzD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAA;gBACxB,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;gBACpB,OAAM;YACR,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,MAAM,WAAW,CAAC,YAAY,CAAC,UAAU,CAAC,CAAA;QAE1C,kDAAkD;QAClD,IAAI,yBAAyB,CAAC,UAAU,EAAE,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;YAC/D,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,UAAU,CAAC,CAAA;YAC1B,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,uDAAuD;gBACvD,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,MAAM,CAAC,oBAAoB,CAAC,UAAU,CAAC,CAAC,CAAA;YAChD,CAAC;YAAC,OAAO,KAAc,EAAE,CAAC;gBACxB,gBAAgB;gBAChB,IACE,CAAC,CAAC,KAAK,YAAY,KAAK,CAAC;oBACzB,CAAC,CAAC,MAAM,IAAI,KAAK,CAAC;oBACjB,KAA+B,CAAC,IAAI,KAAK,QAAQ,EAClD,CAAC;oBACD,MAAM,KAAK,CAAA;gBACb,CAAC;YACH,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,MAAM,WAAW,CAAC,QAAQ,EAAE,CAAA;QAE5B,+BAA+B;QAC/B,MAAM,MAAM,GAAG;YACb,QAAQ,EAAE,UAAU;YACpB,OAAO,EAAE,IAAI;YACb,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAA;QACD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAA;IAC9C,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,MAAM,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACrE,OAAO,CAAC,KAAK,CAAC,UAAU,MAAM,EAAE,CAAC,CAAA;QACjC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC"}
@@ -1,10 +1,13 @@
1
1
  import { SemanticChunker } from '../chunker/index.js';
2
2
  import type { Embedder } from '../embedder/index.js';
3
3
  import { DocumentParser } from '../parser/index.js';
4
+ import type { QualityProfile } from '../pdf-visual/types.js';
5
+ import type { BaseDirsConfig, BaseDirsConfigWarning } from '../utils/base-dirs.js';
4
6
  import type { VectorStore } from '../vectordb/index.js';
5
7
  import type { GlobalOptions, ResolvedGlobalConfig } from './options.js';
6
8
  interface IngestConfig {
7
- baseDir: string;
9
+ baseDirs: BaseDirsConfig;
10
+ baseDirsWarnings: BaseDirsConfigWarning[];
8
11
  dbPath: string;
9
12
  cacheDir: string;
10
13
  modelName: string;
@@ -12,10 +15,21 @@ interface IngestConfig {
12
15
  chunkMinLength?: number;
13
16
  }
14
17
  interface IngestCliOptions {
15
- baseDir?: string | undefined;
18
+ /**
19
+ * Collected `--base-dir` values in CLI order. Repeatable: each flag
20
+ * occurrence appends one entry. An empty array means the flag was not
21
+ * provided (resolver then falls through to env / cwd).
22
+ */
23
+ baseDirs?: string[] | undefined;
16
24
  maxFileSize?: number | undefined;
17
25
  chunkMinLength?: number | undefined;
18
26
  visual?: boolean | undefined;
27
+ /**
28
+ * Visual-quality profile selector. Only meaningful when `visual` is true;
29
+ * silently ignored otherwise (mirrors the existing `--visual` precedent
30
+ * of silently coercing for non-PDF files). Defaults to `'fast'`.
31
+ */
32
+ visualQuality?: QualityProfile | undefined;
19
33
  }
20
34
  interface ParsedArgs {
21
35
  positional: string | undefined;
@@ -30,27 +44,37 @@ interface ParsedArgs {
30
44
  export declare function parseArgs(args: string[]): ParsedArgs;
31
45
  /**
32
46
  * Resolve ingest config by merging global config with ingest-specific options.
33
- * Ingest-specific: baseDir, maxFileSize (CLI flags > env vars > defaults).
34
- * Validates all resolved values before returning.
47
+ *
48
+ * Base directories are resolved via the shared CLI resolver
49
+ * ({@link resolveCliBaseDirsOrExit}) which applies the documented precedence
50
+ * (CLI roots > `BASE_DIRS` > `BASE_DIR` > `cwd`), realpath-normalizes every
51
+ * effective root, dedupes exact duplicates, and prunes nested roots. CLI
52
+ * roots are pre-validated against the sensitive-path policy here so the
53
+ * user sees `--base-dir`-attributed errors before the resolver touches the
54
+ * filesystem.
55
+ *
56
+ * Other ingest-specific values (maxFileSize, chunkMinLength) follow the
57
+ * existing CLI > env > defaults order and are validated against the same
58
+ * ranges as before.
35
59
  */
36
- export declare function resolveConfig(globalConfig: ResolvedGlobalConfig, ingestOptions?: IngestCliOptions): IngestConfig;
60
+ export declare function resolveConfig(globalConfig: ResolvedGlobalConfig, ingestOptions?: IngestCliOptions): Promise<IngestConfig>;
37
61
  /**
38
- * Options for `ingestSingleFile`. Discriminated on `visual` so the visual path
39
- * is type-only callable with the VLM config it actually needs:
62
+ * Options for `ingestSingleFile`. Discriminated on `visual` so the visual
63
+ * path is type-only callable with the VLM config it actually needs:
40
64
  * - `visual` absent or `false` → no VLM fields required (and not accepted).
41
- * - `visual: true` → `vlmModelName` and `cacheDir` required; `device` optional.
65
+ * - `visual: true` → `profile` and `cacheDir` required; `device` optional.
42
66
  *
43
- * Why a union rather than always-required fields: making `vlmModelName` /
44
- * `cacheDir` unconditionally required forces non-visual callers (default-mode
45
- * tests, future direct-import callers that only ingest non-PDF files) to
46
- * fabricate VLM config they will never use. The visual-true variant still
47
- * catches accidental misuse at compile time, which was the original goal.
67
+ * Why a union rather than always-required fields: making the VLM fields
68
+ * unconditionally required forces non-visual callers (default-mode tests,
69
+ * future direct-import callers that only ingest non-PDF files) to fabricate
70
+ * VLM config they will never use. The visual-true variant still catches
71
+ * accidental misuse at compile time, which was the original goal.
48
72
  */
49
73
  export type IngestSingleFileOptions = {
50
74
  visual?: false | undefined;
51
75
  } | {
52
76
  visual: true;
53
- vlmModelName: string;
77
+ profile: QualityProfile;
54
78
  cacheDir: string;
55
79
  device?: string | undefined;
56
80
  };
@@ -1 +1 @@
1
- {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAGpD,OAAO,EAAE,cAAc,EAAwB,MAAM,oBAAoB,CAAA;AACzE,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAEpE,OAAO,KAAK,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAA;AAmBvE,UAAU,YAAY;IACpB,OAAO,EAAE,MAAM,CAAA;IACf,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAQD,UAAU,gBAAgB;IACxB,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAC5B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAChC,cAAc,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IACnC,MAAM,CAAC,EAAE,OAAO,GAAG,SAAS,CAAA;CAC7B;AAED,UAAU,UAAU;IAClB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,OAAO,EAAE,gBAAgB,CAAA;IACzB,IAAI,EAAE,OAAO,CAAA;CACd;AAkCD;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CA6EpD;AAMD;;;;GAIG;AACH,wBAAgB,aAAa,CAC3B,YAAY,EAAE,oBAAoB,EAClC,aAAa,GAAE,gBAAqB,GACnC,YAAY,CA+Cd;AAgFD;;;;;;;;;;;GAWG;AACH,MAAM,MAAM,uBAAuB,GAC/B;IAAE,MAAM,CAAC,EAAE,KAAK,GAAG,SAAS,CAAA;CAAE,GAC9B;IACE,MAAM,EAAE,IAAI,CAAA;IACZ,YAAY,EAAE,MAAM,CAAA;IACpB,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;CAC5B,CAAA;AAEL;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,cAAc,EACtB,OAAO,EAAE,eAAe,EACxB,QAAQ,EAAE,QAAQ,EAClB,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,MAAM,CAAC,CAsGjB;AAMD;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,aAAa,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CAuHhG"}
1
+ {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/cli/ingest.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAA;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAA;AAGpD,OAAO,EAAE,cAAc,EAAwB,MAAM,oBAAoB,CAAA;AACzE,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAC5D,OAAO,KAAK,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAClF,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,sBAAsB,CAAA;AAEpE,OAAO,KAAK,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,cAAc,CAAA;AAoBvE,UAAU,YAAY;IACpB,QAAQ,EAAE,cAAc,CAAA;IACxB,gBAAgB,EAAE,qBAAqB,EAAE,CAAA;IACzC,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,MAAM,CAAA;IACjB,WAAW,EAAE,MAAM,CAAA;IACnB,cAAc,CAAC,EAAE,MAAM,CAAA;CACxB;AAQD,UAAU,gBAAgB;IACxB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,EAAE,GAAG,SAAS,CAAA;IAC/B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IAChC,cAAc,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;IACnC,MAAM,CAAC,EAAE,OAAO,GAAG,SAAS,CAAA;IAC5B;;;;OAIG;IACH,aAAa,CAAC,EAAE,cAAc,GAAG,SAAS,CAAA;CAC3C;AAED,UAAU,UAAU;IAClB,UAAU,EAAE,MAAM,GAAG,SAAS,CAAA;IAC9B,OAAO,EAAE,gBAAgB,CAAA;IACzB,IAAI,EAAE,OAAO,CAAA;CACd;AAmCD;;;;GAIG;AACH,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,UAAU,CA+FpD;AAMD;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,aAAa,CACjC,YAAY,EAAE,oBAAoB,EAClC,aAAa,GAAE,gBAAqB,GACnC,OAAO,CAAC,YAAY,CAAC,CAyDvB;AAmHD;;;;;;;;;;;GAWG;AACH,MAAM,MAAM,uBAAuB,GAC/B;IAAE,MAAM,CAAC,EAAE,KAAK,GAAG,SAAS,CAAA;CAAE,GAC9B;IACE,MAAM,EAAE,IAAI,CAAA;IACZ,OAAO,EAAE,cAAc,CAAA;IACvB,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAA;CAC5B,CAAA;AAEL;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,cAAc,EACtB,OAAO,EAAE,eAAe,EACxB,QAAQ,EAAE,QAAQ,EAClB,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,MAAM,CAAC,CAuGjB;AAMD;;;;GAIG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,aAAa,GAAE,aAAkB,GAAG,OAAO,CAAC,IAAI,CAAC,CA0IhG"}