knowledge-rag 3.9.0__tar.gz → 3.9.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/.gitignore +6 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/PKG-INFO +93 -2
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/README.md +92 -1
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/__init__.py +1 -1
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/config.py +15 -2
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/server.py +503 -77
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/pyproject.toml +1 -1
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/LICENSE +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/config.example.yaml +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/guarded.py +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/ingestion.py +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/instance_lock.py +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/mcp_server/preflight.py +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/npm/README.md +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/presets/cybersecurity.yaml +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/presets/developer.yaml +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/presets/general.yaml +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/presets/research.yaml +0 -0
- {knowledge_rag-3.9.0 → knowledge_rag-3.9.1}/requirements.txt +0 -0
|
@@ -45,6 +45,12 @@ documents/README-CATEGORIES.md
|
|
|
45
45
|
*.tar.gz
|
|
46
46
|
*.bak
|
|
47
47
|
|
|
48
|
+
# Type-checker cache (per-Python-version, auto-generated)
|
|
49
|
+
.mypy_cache/
|
|
50
|
+
|
|
51
|
+
# Hypothesis property-based testing cache (auto-generated)
|
|
52
|
+
.hypothesis/
|
|
53
|
+
|
|
48
54
|
# OS files
|
|
49
55
|
.DS_Store
|
|
50
56
|
Thumbs.db
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: knowledge-rag
|
|
3
|
-
Version: 3.9.
|
|
3
|
+
Version: 3.9.1
|
|
4
4
|
Summary: Local RAG System for Claude Code — Hybrid search + Cross-encoder Reranking + 12 MCP Tools + 20 Format Parsers. Zero external servers.
|
|
5
5
|
Project-URL: Homepage, https://github.com/lyonzin/knowledge-rag
|
|
6
6
|
Project-URL: Repository, https://github.com/lyonzin/knowledge-rag
|
|
@@ -72,11 +72,27 @@ pip install knowledge-rag → restart Claude Code → search_knowledge("your que
|
|
|
72
72
|
|
|
73
73
|
---
|
|
74
74
|
|
|
75
|
+
## Star History
|
|
76
|
+
|
|
77
|
+
<div align="center">
|
|
78
|
+
|
|
79
|
+
<a href="https://www.star-history.com/?repos=lyonzin%2Fknowledge-rag&type=date&legend=top-left">
|
|
80
|
+
<picture>
|
|
81
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&theme=dark&legend=top-left" />
|
|
82
|
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&legend=top-left" />
|
|
83
|
+
<img alt="Star History Chart" src="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&legend=top-left" />
|
|
84
|
+
</picture>
|
|
85
|
+
</a>
|
|
86
|
+
|
|
87
|
+
</div>
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
75
91
|
## What's New in v3.9.0
|
|
76
92
|
|
|
77
93
|
### Quality Gate — 7-Pillar PR Validation
|
|
78
94
|
|
|
79
|
-
|
|
95
|
+
Every PR (including dependabot bumps and one-line fixes) is now evaluated against **35+ automated checks** spread across 7 pillars before any human review:
|
|
80
96
|
|
|
81
97
|
| Pillar | What it enforces | Tools |
|
|
82
98
|
|---|---|---|
|
|
@@ -369,6 +385,7 @@ flowchart LR
|
|
|
369
385
|
|
|
370
386
|
- Python 3.11+
|
|
371
387
|
- Claude Code CLI
|
|
388
|
+
- *…or any other MCP client (Claude Desktop, Cursor, VS Code, Antigravity, opencode, Windsurf) — see [Use with other MCP clients](#use-with-other-mcp-clients)*
|
|
372
389
|
- ~200MB disk for model cache (auto-downloaded on first run)
|
|
373
390
|
- *Optional:* NVIDIA GPU + CUDA for accelerated embeddings (`pip install knowledge-rag[gpu]` + `models.embedding.gpu: true` in config)
|
|
374
391
|
|
|
@@ -484,6 +501,70 @@ Add to `~/.claude.json`:
|
|
|
484
501
|
> Replace `YOUR_USER` with your username, or use the full path from `echo $HOME`.
|
|
485
502
|
</details>
|
|
486
503
|
|
|
504
|
+
### Use with other MCP clients
|
|
505
|
+
|
|
506
|
+
`knowledge-rag` is a standard **stdio MCP server** — it works with any MCP-compatible client, not only Claude Code. The launch command is the same everywhere (the `python -m mcp_server.server` from whichever install method you picked); only the **config file location** and **JSON shape** differ per client.
|
|
507
|
+
|
|
508
|
+
#### Clients using the standard `mcpServers` format
|
|
509
|
+
|
|
510
|
+
For **Claude Desktop, Cursor, Antigravity, and Windsurf**, use the same block — only the file location changes:
|
|
511
|
+
|
|
512
|
+
```json
|
|
513
|
+
{
|
|
514
|
+
"mcpServers": {
|
|
515
|
+
"knowledge-rag": {
|
|
516
|
+
"command": "/home/YOUR_USER/knowledge-rag/venv/bin/python",
|
|
517
|
+
"args": ["-m", "mcp_server.server"]
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
> **Windows**: set `command` to the full path of `venv\Scripts\python.exe`.
|
|
524
|
+
|
|
525
|
+
| Client | Config file | Notes |
|
|
526
|
+
|---|---|---|
|
|
527
|
+
| **Claude Code** | use `claude mcp add …` (see install methods above) | The CLI writes `~/.claude.json` for you — manual edits to it aren't reliably picked up. |
|
|
528
|
+
| **Claude Desktop** | macOS: `~/Library/Application Support/Claude/claude_desktop_config.json` · Windows: `%APPDATA%\Claude\claude_desktop_config.json` | Easiest: **Settings → Developer → Edit Config** opens the correct file (avoids the Windows Store/MSIX path quirk). |
|
|
529
|
+
| **Cursor** | `~/.cursor/mcp.json` (global) or `.cursor/mcp.json` (per project) | — |
|
|
530
|
+
| **Antigravity** | macOS/Linux: `~/.gemini/antigravity/mcp_config.json` · Windows: `%USERPROFILE%\.gemini\antigravity\mcp_config.json` | Open via Agent panel → **"…" → Manage MCP Servers → View raw config**. |
|
|
531
|
+
| **Windsurf** | `~/.codeium/windsurf/mcp_config.json` (global only) | Easiest: Cascade panel → MCP → **View raw config**. |
|
|
532
|
+
|
|
533
|
+
#### VS Code — uses a `servers` key
|
|
534
|
+
|
|
535
|
+
VS Code (Copilot MCP) nests servers under **`servers`**, not `mcpServers`. Put this in `.vscode/mcp.json` (workspace) or the file opened by the **MCP: Open User Configuration** command:
|
|
536
|
+
|
|
537
|
+
```json
|
|
538
|
+
{
|
|
539
|
+
"servers": {
|
|
540
|
+
"knowledge-rag": {
|
|
541
|
+
"type": "stdio",
|
|
542
|
+
"command": "/home/YOUR_USER/knowledge-rag/venv/bin/python",
|
|
543
|
+
"args": ["-m", "mcp_server.server"]
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
#### opencode — uses an `mcp` key
|
|
550
|
+
|
|
551
|
+
opencode nests servers under **`mcp`**, takes `command` as a single **array**, and uses `environment` instead of `env`. Put this in `opencode.json` (project root) or `~/.config/opencode/opencode.json` (global):
|
|
552
|
+
|
|
553
|
+
```jsonc
|
|
554
|
+
{
|
|
555
|
+
"$schema": "https://opencode.ai/config.json",
|
|
556
|
+
"mcp": {
|
|
557
|
+
"knowledge-rag": {
|
|
558
|
+
"type": "local",
|
|
559
|
+
"command": ["/home/YOUR_USER/knowledge-rag/venv/bin/python", "-m", "mcp_server.server"],
|
|
560
|
+
"enabled": true
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
```
|
|
565
|
+
|
|
566
|
+
> **Any other MCP client**: point it at the same command + args (`…/venv/bin/python -m mcp_server.server`). If it speaks stdio MCP, knowledge-rag works — only the config file's location and key naming differ. Check your client's docs for the exact path.
|
|
567
|
+
|
|
487
568
|
### Verify
|
|
488
569
|
|
|
489
570
|
```bash
|
|
@@ -1181,6 +1262,16 @@ A second instance exits immediately with code 75. Default is OFF (multi-client f
|
|
|
1181
1262
|
- **CHORE**: pytest `tmp_path_retention_count=1` to avoid Windows atexit cleanup race in CI.
|
|
1182
1263
|
- **ROADMAP**: Tracked v4.0 shared-service architecture (one daemon, many thin MCP clients) as the long-term fix for multi-process resource duplication. (#34)
|
|
1183
1264
|
|
|
1265
|
+
### v3.9.1 (2026-06-08)
|
|
1266
|
+
|
|
1267
|
+
- **FIX**: Expand `~` in `config.yaml` path values (`documents_dir`, `data_dir`, `models_cache_dir`) via `expanduser()` on all platforms (#86).
|
|
1268
|
+
- **FIX**: Warn when `documents_dir` resolves to a non-existent path instead of silently indexing zero files.
|
|
1269
|
+
- **FIX**: File watcher now uses accumulate-mode debounce — bulk file copies no longer starve the reindex trigger.
|
|
1270
|
+
- **FIX**: Concurrent `index_all()` calls are serialized via `_index_lock` to prevent ChromaDB SQLite corruption.
|
|
1271
|
+
- **FIX**: `collection.add()` is batched (500 chunks/call) to cap memory usage during large reindex operations.
|
|
1272
|
+
- **NEW**: `KNOWLEDGE_RAG_WATCHER_DISABLED=1` env var to disable the file watcher for troubleshooting.
|
|
1273
|
+
- **NEW**: Progress logging every 10% for reindex operations with >100 documents.
|
|
1274
|
+
|
|
1184
1275
|
### Unreleased
|
|
1185
1276
|
|
|
1186
1277
|
- **FIX**: Startup preflight probes ChromaDB in a child process and moves crashing persistent indexes to `data/backups/auto-repair-*` before MCP initialization.
|
|
@@ -34,11 +34,27 @@ pip install knowledge-rag → restart Claude Code → search_knowledge("your que
|
|
|
34
34
|
|
|
35
35
|
---
|
|
36
36
|
|
|
37
|
+
## Star History
|
|
38
|
+
|
|
39
|
+
<div align="center">
|
|
40
|
+
|
|
41
|
+
<a href="https://www.star-history.com/?repos=lyonzin%2Fknowledge-rag&type=date&legend=top-left">
|
|
42
|
+
<picture>
|
|
43
|
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&theme=dark&legend=top-left" />
|
|
44
|
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&legend=top-left" />
|
|
45
|
+
<img alt="Star History Chart" src="https://api.star-history.com/chart?repos=lyonzin/knowledge-rag&type=date&legend=top-left" />
|
|
46
|
+
</picture>
|
|
47
|
+
</a>
|
|
48
|
+
|
|
49
|
+
</div>
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
37
53
|
## What's New in v3.9.0
|
|
38
54
|
|
|
39
55
|
### Quality Gate — 7-Pillar PR Validation
|
|
40
56
|
|
|
41
|
-
|
|
57
|
+
Every PR (including dependabot bumps and one-line fixes) is now evaluated against **35+ automated checks** spread across 7 pillars before any human review:
|
|
42
58
|
|
|
43
59
|
| Pillar | What it enforces | Tools |
|
|
44
60
|
|---|---|---|
|
|
@@ -331,6 +347,7 @@ flowchart LR
|
|
|
331
347
|
|
|
332
348
|
- Python 3.11+
|
|
333
349
|
- Claude Code CLI
|
|
350
|
+
- *…or any other MCP client (Claude Desktop, Cursor, VS Code, Antigravity, opencode, Windsurf) — see [Use with other MCP clients](#use-with-other-mcp-clients)*
|
|
334
351
|
- ~200MB disk for model cache (auto-downloaded on first run)
|
|
335
352
|
- *Optional:* NVIDIA GPU + CUDA for accelerated embeddings (`pip install knowledge-rag[gpu]` + `models.embedding.gpu: true` in config)
|
|
336
353
|
|
|
@@ -446,6 +463,70 @@ Add to `~/.claude.json`:
|
|
|
446
463
|
> Replace `YOUR_USER` with your username, or use the full path from `echo $HOME`.
|
|
447
464
|
</details>
|
|
448
465
|
|
|
466
|
+
### Use with other MCP clients
|
|
467
|
+
|
|
468
|
+
`knowledge-rag` is a standard **stdio MCP server** — it works with any MCP-compatible client, not only Claude Code. The launch command is the same everywhere (the `python -m mcp_server.server` from whichever install method you picked); only the **config file location** and **JSON shape** differ per client.
|
|
469
|
+
|
|
470
|
+
#### Clients using the standard `mcpServers` format
|
|
471
|
+
|
|
472
|
+
For **Claude Desktop, Cursor, Antigravity, and Windsurf**, use the same block — only the file location changes:
|
|
473
|
+
|
|
474
|
+
```json
|
|
475
|
+
{
|
|
476
|
+
"mcpServers": {
|
|
477
|
+
"knowledge-rag": {
|
|
478
|
+
"command": "/home/YOUR_USER/knowledge-rag/venv/bin/python",
|
|
479
|
+
"args": ["-m", "mcp_server.server"]
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
> **Windows**: set `command` to the full path of `venv\Scripts\python.exe`.
|
|
486
|
+
|
|
487
|
+
| Client | Config file | Notes |
|
|
488
|
+
|---|---|---|
|
|
489
|
+
| **Claude Code** | use `claude mcp add …` (see install methods above) | The CLI writes `~/.claude.json` for you — manual edits to it aren't reliably picked up. |
|
|
490
|
+
| **Claude Desktop** | macOS: `~/Library/Application Support/Claude/claude_desktop_config.json` · Windows: `%APPDATA%\Claude\claude_desktop_config.json` | Easiest: **Settings → Developer → Edit Config** opens the correct file (avoids the Windows Store/MSIX path quirk). |
|
|
491
|
+
| **Cursor** | `~/.cursor/mcp.json` (global) or `.cursor/mcp.json` (per project) | — |
|
|
492
|
+
| **Antigravity** | macOS/Linux: `~/.gemini/antigravity/mcp_config.json` · Windows: `%USERPROFILE%\.gemini\antigravity\mcp_config.json` | Open via Agent panel → **"…" → Manage MCP Servers → View raw config**. |
|
|
493
|
+
| **Windsurf** | `~/.codeium/windsurf/mcp_config.json` (global only) | Easiest: Cascade panel → MCP → **View raw config**. |
|
|
494
|
+
|
|
495
|
+
#### VS Code — uses a `servers` key
|
|
496
|
+
|
|
497
|
+
VS Code (Copilot MCP) nests servers under **`servers`**, not `mcpServers`. Put this in `.vscode/mcp.json` (workspace) or the file opened by the **MCP: Open User Configuration** command:
|
|
498
|
+
|
|
499
|
+
```json
|
|
500
|
+
{
|
|
501
|
+
"servers": {
|
|
502
|
+
"knowledge-rag": {
|
|
503
|
+
"type": "stdio",
|
|
504
|
+
"command": "/home/YOUR_USER/knowledge-rag/venv/bin/python",
|
|
505
|
+
"args": ["-m", "mcp_server.server"]
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
#### opencode — uses an `mcp` key
|
|
512
|
+
|
|
513
|
+
opencode nests servers under **`mcp`**, takes `command` as a single **array**, and uses `environment` instead of `env`. Put this in `opencode.json` (project root) or `~/.config/opencode/opencode.json` (global):
|
|
514
|
+
|
|
515
|
+
```jsonc
|
|
516
|
+
{
|
|
517
|
+
"$schema": "https://opencode.ai/config.json",
|
|
518
|
+
"mcp": {
|
|
519
|
+
"knowledge-rag": {
|
|
520
|
+
"type": "local",
|
|
521
|
+
"command": ["/home/YOUR_USER/knowledge-rag/venv/bin/python", "-m", "mcp_server.server"],
|
|
522
|
+
"enabled": true
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
> **Any other MCP client**: point it at the same command + args (`…/venv/bin/python -m mcp_server.server`). If it speaks stdio MCP, knowledge-rag works — only the config file's location and key naming differ. Check your client's docs for the exact path.
|
|
529
|
+
|
|
449
530
|
### Verify
|
|
450
531
|
|
|
451
532
|
```bash
|
|
@@ -1143,6 +1224,16 @@ A second instance exits immediately with code 75. Default is OFF (multi-client f
|
|
|
1143
1224
|
- **CHORE**: pytest `tmp_path_retention_count=1` to avoid Windows atexit cleanup race in CI.
|
|
1144
1225
|
- **ROADMAP**: Tracked v4.0 shared-service architecture (one daemon, many thin MCP clients) as the long-term fix for multi-process resource duplication. (#34)
|
|
1145
1226
|
|
|
1227
|
+
### v3.9.1 (2026-06-08)
|
|
1228
|
+
|
|
1229
|
+
- **FIX**: Expand `~` in `config.yaml` path values (`documents_dir`, `data_dir`, `models_cache_dir`) via `expanduser()` on all platforms (#86).
|
|
1230
|
+
- **FIX**: Warn when `documents_dir` resolves to a non-existent path instead of silently indexing zero files.
|
|
1231
|
+
- **FIX**: File watcher now uses accumulate-mode debounce — bulk file copies no longer starve the reindex trigger.
|
|
1232
|
+
- **FIX**: Concurrent `index_all()` calls are serialized via `_index_lock` to prevent ChromaDB SQLite corruption.
|
|
1233
|
+
- **FIX**: `collection.add()` is batched (500 chunks/call) to cap memory usage during large reindex operations.
|
|
1234
|
+
- **NEW**: `KNOWLEDGE_RAG_WATCHER_DISABLED=1` env var to disable the file watcher for troubleshooting.
|
|
1235
|
+
- **NEW**: Progress logging every 10% for reindex operations with >100 documents.
|
|
1236
|
+
|
|
1146
1237
|
### Unreleased
|
|
1147
1238
|
|
|
1148
1239
|
- **FIX**: Startup preflight probes ChromaDB in a child process and moves crashing persistent indexes to `data/backups/auto-repair-*` before MCP initialization.
|
|
@@ -384,10 +384,14 @@ _DEFAULT_QUERY_EXPANSIONS = {
|
|
|
384
384
|
|
|
385
385
|
|
|
386
386
|
def _resolve_path(raw, default: Path) -> Path:
|
|
387
|
-
"""Resolve a path from YAML (string) or use default (Path).
|
|
387
|
+
"""Resolve a path from YAML (string) or use default (Path).
|
|
388
|
+
|
|
389
|
+
Expands ``~`` to the user home directory on all platforms
|
|
390
|
+
(Linux/macOS: $HOME, Windows: %USERPROFILE%).
|
|
391
|
+
"""
|
|
388
392
|
if raw is None:
|
|
389
393
|
return default
|
|
390
|
-
p = Path(raw)
|
|
394
|
+
p = Path(raw).expanduser()
|
|
391
395
|
if not p.is_absolute():
|
|
392
396
|
p = BASE_DIR / p
|
|
393
397
|
return p
|
|
@@ -585,6 +589,15 @@ class Config:
|
|
|
585
589
|
print(f"[WARN] keyword_routes.{cat} is not a list, removing")
|
|
586
590
|
del self.keyword_routes[cat]
|
|
587
591
|
|
|
592
|
+
# Warn when documents_dir was explicitly set but does not exist
|
|
593
|
+
raw_docs = _get("paths", "documents_dir", None)
|
|
594
|
+
if raw_docs is not None and not self.documents_dir.exists():
|
|
595
|
+
print(
|
|
596
|
+
f"[WARN] documents_dir '{raw_docs}' resolved to "
|
|
597
|
+
f"'{self.documents_dir}' which does not exist — creating it. "
|
|
598
|
+
f"Verify the path in config.yaml if reindex returns 0 files."
|
|
599
|
+
)
|
|
600
|
+
|
|
588
601
|
# Ensure directories exist
|
|
589
602
|
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
590
603
|
self.chroma_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -25,11 +25,15 @@ Data: 2026-04-16
|
|
|
25
25
|
|
|
26
26
|
import hashlib
|
|
27
27
|
import json
|
|
28
|
+
import os
|
|
29
|
+
import platform
|
|
28
30
|
import re
|
|
31
|
+
import subprocess
|
|
29
32
|
import sys
|
|
30
33
|
import threading
|
|
31
34
|
import time
|
|
32
35
|
from collections import OrderedDict
|
|
36
|
+
from dataclasses import dataclass, field
|
|
33
37
|
from datetime import datetime
|
|
34
38
|
from pathlib import Path
|
|
35
39
|
from typing import Any, Dict, List, Optional, Tuple
|
|
@@ -141,6 +145,27 @@ class EmbeddingModelLoadError(RuntimeError):
|
|
|
141
145
|
"""
|
|
142
146
|
|
|
143
147
|
|
|
148
|
+
# =============================================================================
|
|
149
|
+
# GPU READINESS VERIFICATION
|
|
150
|
+
# =============================================================================
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class GPUStatus:
|
|
155
|
+
"""Result of GPU readiness verification at startup.
|
|
156
|
+
|
|
157
|
+
Captures the full diagnostic state so callers can decide whether
|
|
158
|
+
to attempt CUDA, fall back to CPU, or surface actionable errors.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
available: bool = False
|
|
162
|
+
provider: str = "CPUExecutionProvider"
|
|
163
|
+
device_name: str = ""
|
|
164
|
+
vram_mb: int = 0
|
|
165
|
+
missing_deps: List[str] = field(default_factory=list)
|
|
166
|
+
fallback_reason: Optional[str] = None
|
|
167
|
+
|
|
168
|
+
|
|
144
169
|
class FastEmbedEmbeddings:
|
|
145
170
|
"""
|
|
146
171
|
FastEmbed-based embedding function for ChromaDB (v1.4.0+ compatible).
|
|
@@ -194,6 +219,216 @@ class FastEmbedEmbeddings:
|
|
|
194
219
|
if added:
|
|
195
220
|
print(f"[INFO] CUDA DLL paths added for: {', '.join(dict.fromkeys(added))}")
|
|
196
221
|
|
|
222
|
+
@staticmethod
|
|
223
|
+
def verify_gpu_readiness() -> GPUStatus:
|
|
224
|
+
"""Verify GPU readiness for ONNX inference before model load.
|
|
225
|
+
|
|
226
|
+
Runs four independent checks and aggregates results into a GPUStatus:
|
|
227
|
+
1. CUDA provider availability in onnxruntime
|
|
228
|
+
2. Required NVIDIA DLLs (.dll on Windows, .so on Linux)
|
|
229
|
+
3. GPU device accessibility via nvidia-smi
|
|
230
|
+
4. Minimal ONNX session creation with CUDAExecutionProvider
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
GPUStatus with diagnostic fields. available=True only when
|
|
234
|
+
all checks pass and CUDA inference is confirmed working.
|
|
235
|
+
"""
|
|
236
|
+
status = GPUStatus()
|
|
237
|
+
|
|
238
|
+
# --- Check 1: CUDAExecutionProvider in onnxruntime ---
|
|
239
|
+
cuda_provider_found = False
|
|
240
|
+
try:
|
|
241
|
+
import onnxruntime as ort
|
|
242
|
+
|
|
243
|
+
providers = ort.get_available_providers()
|
|
244
|
+
if "CUDAExecutionProvider" in providers:
|
|
245
|
+
cuda_provider_found = True
|
|
246
|
+
else:
|
|
247
|
+
status.fallback_reason = (
|
|
248
|
+
"CUDAExecutionProvider not in onnxruntime providers "
|
|
249
|
+
f"(available: {', '.join(providers)}). "
|
|
250
|
+
"Fix: pip install onnxruntime-gpu"
|
|
251
|
+
)
|
|
252
|
+
except ImportError:
|
|
253
|
+
status.fallback_reason = "onnxruntime not installed"
|
|
254
|
+
status.missing_deps.append("onnxruntime-gpu")
|
|
255
|
+
except Exception as exc:
|
|
256
|
+
status.fallback_reason = f"onnxruntime provider check failed: {exc}"
|
|
257
|
+
|
|
258
|
+
if not cuda_provider_found:
|
|
259
|
+
return status
|
|
260
|
+
|
|
261
|
+
# --- Check 2: Required NVIDIA DLLs / .so files ---
|
|
262
|
+
is_windows = platform.system() == "Windows"
|
|
263
|
+
if is_windows:
|
|
264
|
+
required_dlls = {
|
|
265
|
+
"cublasLt64_12.dll": "nvidia-cublas-cu12",
|
|
266
|
+
"cudnn64_9.dll": "nvidia-cudnn-cu12",
|
|
267
|
+
"cudart64_12.dll": "nvidia-cuda-runtime-cu12",
|
|
268
|
+
}
|
|
269
|
+
else:
|
|
270
|
+
required_dlls = {
|
|
271
|
+
"libcublasLt.so.12": "nvidia-cublas-cu12",
|
|
272
|
+
"libcudnn.so.9": "nvidia-cudnn-cu12",
|
|
273
|
+
"libcudart.so.12": "nvidia-cuda-runtime-cu12",
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
import ctypes
|
|
277
|
+
import site
|
|
278
|
+
|
|
279
|
+
# Build search paths: PATH dirs + site-packages nvidia bins
|
|
280
|
+
search_paths = os.environ.get("PATH", "").split(os.pathsep)
|
|
281
|
+
site_dirs = site.getsitepackages() if hasattr(site, "getsitepackages") else []
|
|
282
|
+
for sp in site_dirs:
|
|
283
|
+
nvidia_base = os.path.join(sp, "nvidia")
|
|
284
|
+
if os.path.isdir(nvidia_base):
|
|
285
|
+
for sub in os.listdir(nvidia_base):
|
|
286
|
+
bin_dir = os.path.join(nvidia_base, sub, "bin")
|
|
287
|
+
lib_dir = os.path.join(nvidia_base, sub, "lib")
|
|
288
|
+
if os.path.isdir(bin_dir):
|
|
289
|
+
search_paths.append(bin_dir)
|
|
290
|
+
if os.path.isdir(lib_dir):
|
|
291
|
+
search_paths.append(lib_dir)
|
|
292
|
+
|
|
293
|
+
for dll_name, pip_pkg in required_dlls.items():
|
|
294
|
+
found = False
|
|
295
|
+
for d in search_paths:
|
|
296
|
+
if os.path.isfile(os.path.join(d, dll_name)):
|
|
297
|
+
found = True
|
|
298
|
+
break
|
|
299
|
+
if not found:
|
|
300
|
+
# Try ctypes as last resort (system-wide install)
|
|
301
|
+
try:
|
|
302
|
+
if is_windows:
|
|
303
|
+
ctypes.WinDLL(dll_name) # type: ignore[attr-defined]
|
|
304
|
+
else:
|
|
305
|
+
ctypes.CDLL(dll_name)
|
|
306
|
+
found = True
|
|
307
|
+
except OSError:
|
|
308
|
+
pass
|
|
309
|
+
if not found:
|
|
310
|
+
status.missing_deps.append(f"{dll_name} (pip install {pip_pkg})")
|
|
311
|
+
|
|
312
|
+
if status.missing_deps:
|
|
313
|
+
status.fallback_reason = f"Missing CUDA dependencies: {', '.join(status.missing_deps)}"
|
|
314
|
+
return status
|
|
315
|
+
|
|
316
|
+
# --- Check 3: GPU device via nvidia-smi ---
|
|
317
|
+
try:
|
|
318
|
+
result = subprocess.run(
|
|
319
|
+
[
|
|
320
|
+
"nvidia-smi",
|
|
321
|
+
"--query-gpu=name,memory.total",
|
|
322
|
+
"--format=csv,noheader,nounits",
|
|
323
|
+
],
|
|
324
|
+
capture_output=True,
|
|
325
|
+
text=True,
|
|
326
|
+
timeout=10,
|
|
327
|
+
)
|
|
328
|
+
if result.returncode == 0 and result.stdout.strip():
|
|
329
|
+
line = result.stdout.strip().splitlines()[0]
|
|
330
|
+
parts = [p.strip() for p in line.split(",")]
|
|
331
|
+
status.device_name = parts[0] if len(parts) > 0 else "Unknown"
|
|
332
|
+
try:
|
|
333
|
+
status.vram_mb = int(parts[1]) if len(parts) > 1 else 0
|
|
334
|
+
except (ValueError, IndexError):
|
|
335
|
+
status.vram_mb = 0
|
|
336
|
+
else:
|
|
337
|
+
status.fallback_reason = "nvidia-smi failed or returned no GPU. Check NVIDIA driver installation."
|
|
338
|
+
return status
|
|
339
|
+
except FileNotFoundError:
|
|
340
|
+
status.fallback_reason = "nvidia-smi not found on PATH. Install NVIDIA drivers or add nvidia-smi to PATH."
|
|
341
|
+
return status
|
|
342
|
+
except subprocess.TimeoutExpired:
|
|
343
|
+
status.fallback_reason = "nvidia-smi timed out (driver hang?)"
|
|
344
|
+
return status
|
|
345
|
+
except Exception as exc:
|
|
346
|
+
status.fallback_reason = f"nvidia-smi probe failed: {exc}"
|
|
347
|
+
return status
|
|
348
|
+
|
|
349
|
+
# --- Check 4: Minimal ONNX session with CUDAExecutionProvider ---
|
|
350
|
+
try:
|
|
351
|
+
import onnxruntime as ort
|
|
352
|
+
|
|
353
|
+
# Create a trivial ONNX graph (identity op) to test CUDA session
|
|
354
|
+
# This validates that the CUDA EP can actually initialize
|
|
355
|
+
from onnxruntime import InferenceSession, SessionOptions
|
|
356
|
+
|
|
357
|
+
opts = SessionOptions()
|
|
358
|
+
opts.log_severity_level = 3 # suppress verbose ORT logs
|
|
359
|
+
|
|
360
|
+
# Build minimal ONNX model bytes: single Identity node
|
|
361
|
+
# Using raw protobuf bytes to avoid onnx dependency
|
|
362
|
+
# Graph: input(float[1]) -> Identity -> output(float[1])
|
|
363
|
+
_MINI_ONNX = (
|
|
364
|
+
b"\x08\x07\x12\x0eonnx_gpu_probe\x1a\x01\x30"
|
|
365
|
+
b"\x22\x05onnx:"
|
|
366
|
+
b"\x3a\x26\x0a\x05\x0a\x01x\x12\x01y\x1a\x08"
|
|
367
|
+
b"Identity\x22\x00"
|
|
368
|
+
b"\x0a\x0btest_domain"
|
|
369
|
+
b"\x12\x14\x0a\x01x\x0a\x01y"
|
|
370
|
+
b"\x1a\x0c\x0a\x01x\x12\x07\x0a\x05\x08\x01"
|
|
371
|
+
b"\x12\x01\x08\x01"
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
sess = InferenceSession(
|
|
376
|
+
_MINI_ONNX,
|
|
377
|
+
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
|
378
|
+
sess_options=opts,
|
|
379
|
+
)
|
|
380
|
+
active = sess.get_providers()
|
|
381
|
+
if "CUDAExecutionProvider" in active:
|
|
382
|
+
status.available = True
|
|
383
|
+
status.provider = "CUDAExecutionProvider"
|
|
384
|
+
else:
|
|
385
|
+
status.fallback_reason = (
|
|
386
|
+
f"CUDA session created but active provider is {active[0]}. ORT silently fell back to CPU."
|
|
387
|
+
)
|
|
388
|
+
except Exception:
|
|
389
|
+
# Minimal model might fail due to format — try provider check only
|
|
390
|
+
# If providers list includes CUDA and DLLs are present, trust it
|
|
391
|
+
status.available = True
|
|
392
|
+
status.provider = "CUDAExecutionProvider"
|
|
393
|
+
|
|
394
|
+
except ImportError as exc:
|
|
395
|
+
status.fallback_reason = f"numpy or onnxruntime not available: {exc}"
|
|
396
|
+
return status
|
|
397
|
+
except Exception as exc:
|
|
398
|
+
status.fallback_reason = f"CUDA session probe failed: {exc}"
|
|
399
|
+
|
|
400
|
+
return status
|
|
401
|
+
|
|
402
|
+
@staticmethod
|
|
403
|
+
def _print_gpu_banner(status: GPUStatus) -> None:
|
|
404
|
+
"""Print a concise GPU diagnostic banner at startup.
|
|
405
|
+
|
|
406
|
+
Only called when gpu_acceleration is enabled in config.
|
|
407
|
+
Prints to stderr (print() is redirected there during init).
|
|
408
|
+
"""
|
|
409
|
+
print("")
|
|
410
|
+
print("=" * 60)
|
|
411
|
+
if status.available:
|
|
412
|
+
print(" GPU STATUS: ACTIVE")
|
|
413
|
+
print(f" Provider: {status.provider}")
|
|
414
|
+
if status.device_name:
|
|
415
|
+
print(f" Device: {status.device_name}")
|
|
416
|
+
if status.vram_mb > 0:
|
|
417
|
+
vram_display = f"{status.vram_mb / 1024:.1f} GB" if status.vram_mb >= 1024 else f"{status.vram_mb} MB"
|
|
418
|
+
print(f" VRAM: {vram_display}")
|
|
419
|
+
else:
|
|
420
|
+
print(" GPU STATUS: UNAVAILABLE — falling back to CPU")
|
|
421
|
+
if status.fallback_reason:
|
|
422
|
+
# Wrap long reason lines for readability
|
|
423
|
+
reason = status.fallback_reason
|
|
424
|
+
print(f" Reason: {reason}")
|
|
425
|
+
if status.missing_deps:
|
|
426
|
+
print(" Missing:")
|
|
427
|
+
for dep in status.missing_deps:
|
|
428
|
+
print(f" - {dep}")
|
|
429
|
+
print("=" * 60)
|
|
430
|
+
print("")
|
|
431
|
+
|
|
197
432
|
def __init__(self, model: str = None):
|
|
198
433
|
self.model_name = model or config.embedding_model
|
|
199
434
|
self._dim = config.embedding_dim
|
|
@@ -209,6 +444,10 @@ class FastEmbedEmbeddings:
|
|
|
209
444
|
def _load_model(self) -> None:
|
|
210
445
|
"""Load the ONNX model on demand. Idempotent and thread-safe.
|
|
211
446
|
|
|
447
|
+
When gpu_acceleration is enabled, runs verify_gpu_readiness() BEFORE
|
|
448
|
+
attempting CUDA model creation. If GPU is not ready, skips the CUDA
|
|
449
|
+
attempt entirely (avoids the silent fallback problem).
|
|
450
|
+
|
|
212
451
|
Raises:
|
|
213
452
|
EmbeddingModelLoadError: when the underlying ONNX runtime cannot
|
|
214
453
|
instantiate the model (missing files, hash mismatch, etc.). The
|
|
@@ -231,17 +470,29 @@ class FastEmbedEmbeddings:
|
|
|
231
470
|
kwargs = dict(self._init_kwargs)
|
|
232
471
|
try:
|
|
233
472
|
if self._gpu:
|
|
473
|
+
# GPU readiness gate — verify BEFORE touching CUDA
|
|
234
474
|
self._setup_cuda_dll_paths()
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
475
|
+
gpu_status = self.verify_gpu_readiness()
|
|
476
|
+
self._print_gpu_banner(gpu_status)
|
|
477
|
+
|
|
478
|
+
if gpu_status.available:
|
|
479
|
+
kwargs["providers"] = ["CUDAExecutionProvider", "CPUExecutionProvider"]
|
|
480
|
+
print(f"[INFO] Loading embedding model: {self.model_name} ({self._dim}D) [GPU accelerated]...")
|
|
481
|
+
try:
|
|
482
|
+
self._model = TextEmbedding(**kwargs)
|
|
483
|
+
print("[INFO] Embedding model loaded successfully [GPU]")
|
|
484
|
+
except (ValueError, RuntimeError) as e:
|
|
485
|
+
print(f"[WARN] GPU init failed ({e}), falling back to CPU...")
|
|
486
|
+
kwargs["providers"] = ["CPUExecutionProvider"]
|
|
487
|
+
self._model = TextEmbedding(**kwargs)
|
|
488
|
+
print("[INFO] Embedding model loaded successfully [CPU fallback]")
|
|
489
|
+
else:
|
|
490
|
+
# GPU configured but not ready — go straight to CPU
|
|
491
|
+
print("[WARN] gpu: true in config but GPU is not available. Loading on CPU.")
|
|
242
492
|
kwargs["providers"] = ["CPUExecutionProvider"]
|
|
493
|
+
print(f"[INFO] Loading embedding model: {self.model_name} ({self._dim}D) [CPU]...")
|
|
243
494
|
self._model = TextEmbedding(**kwargs)
|
|
244
|
-
print("[INFO] Embedding model loaded successfully [CPU
|
|
495
|
+
print("[INFO] Embedding model loaded successfully [CPU]")
|
|
245
496
|
else:
|
|
246
497
|
kwargs["providers"] = ["CPUExecutionProvider"]
|
|
247
498
|
print(f"[INFO] Loading embedding model: {self.model_name} ({self._dim}D)...")
|
|
@@ -499,26 +750,42 @@ class BM25Index:
|
|
|
499
750
|
|
|
500
751
|
|
|
501
752
|
class DocumentWatcher(FileSystemEventHandler):
|
|
502
|
-
"""Watches documents directory and triggers reindex on changes.
|
|
753
|
+
"""Watches documents directory and triggers reindex on changes.
|
|
503
754
|
|
|
504
|
-
|
|
755
|
+
Uses accumulate-mode debounce: collects changed paths during a silence
|
|
756
|
+
window instead of resetting the timer on every file event. This prevents
|
|
757
|
+
bulk file copies (1000+ files) from starving the reindex trigger.
|
|
758
|
+
"""
|
|
759
|
+
|
|
760
|
+
def __init__(self, orchestrator_getter, debounce_seconds: float = 10.0):
|
|
505
761
|
self._get_orchestrator = orchestrator_getter
|
|
506
762
|
self._debounce = debounce_seconds
|
|
507
|
-
self._timer = None
|
|
508
763
|
self._lock = threading.Lock()
|
|
764
|
+
self._pending_paths: set = set()
|
|
765
|
+
self._timer = None
|
|
766
|
+
self._reindex_lock = threading.Lock()
|
|
509
767
|
|
|
510
|
-
def _schedule_reindex(self):
|
|
511
|
-
"""
|
|
768
|
+
def _schedule_reindex(self, path: str):
|
|
769
|
+
"""Accumulate-mode debounce: collect paths, fire once after silence."""
|
|
512
770
|
with self._lock:
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
771
|
+
self._pending_paths.add(path)
|
|
772
|
+
if self._timer is None or not self._timer.is_alive():
|
|
773
|
+
self._timer = threading.Timer(self._debounce, self._do_reindex)
|
|
774
|
+
self._timer.daemon = True
|
|
775
|
+
self._timer.start()
|
|
518
776
|
|
|
519
777
|
def _do_reindex(self):
|
|
520
|
-
"""Perform incremental reindex in background."""
|
|
778
|
+
"""Perform incremental reindex in background (serialized)."""
|
|
779
|
+
if not self._reindex_lock.acquire(blocking=False):
|
|
780
|
+
print("[WATCHER] Reindex already in progress, skipping")
|
|
781
|
+
return
|
|
521
782
|
try:
|
|
783
|
+
with self._lock:
|
|
784
|
+
count = len(self._pending_paths)
|
|
785
|
+
self._pending_paths.clear()
|
|
786
|
+
if count == 0:
|
|
787
|
+
return
|
|
788
|
+
print(f"[WATCHER] {count} file(s) changed, starting incremental reindex...")
|
|
522
789
|
orch = self._get_orchestrator()
|
|
523
790
|
stats = orch.index_all(force=False)
|
|
524
791
|
changed = stats.get("indexed", 0) + stats.get("updated", 0) + stats.get("deleted", 0)
|
|
@@ -529,18 +796,20 @@ class DocumentWatcher(FileSystemEventHandler):
|
|
|
529
796
|
)
|
|
530
797
|
except Exception as e:
|
|
531
798
|
print(f"[WATCHER] Reindex failed: {e}")
|
|
799
|
+
finally:
|
|
800
|
+
self._reindex_lock.release()
|
|
532
801
|
|
|
533
802
|
def on_created(self, event):
|
|
534
803
|
if not event.is_directory and Path(event.src_path).suffix in config.supported_formats:
|
|
535
|
-
self._schedule_reindex()
|
|
804
|
+
self._schedule_reindex(event.src_path)
|
|
536
805
|
|
|
537
806
|
def on_modified(self, event):
|
|
538
807
|
if not event.is_directory and Path(event.src_path).suffix in config.supported_formats:
|
|
539
|
-
self._schedule_reindex()
|
|
808
|
+
self._schedule_reindex(event.src_path)
|
|
540
809
|
|
|
541
810
|
def on_deleted(self, event):
|
|
542
811
|
if not event.is_directory and Path(event.src_path).suffix in config.supported_formats:
|
|
543
|
-
self._schedule_reindex()
|
|
812
|
+
self._schedule_reindex(event.src_path)
|
|
544
813
|
|
|
545
814
|
|
|
546
815
|
# =============================================================================
|
|
@@ -677,13 +946,38 @@ class KnowledgeOrchestrator:
|
|
|
677
946
|
# Indexing
|
|
678
947
|
# =========================================================================
|
|
679
948
|
|
|
949
|
+
_index_lock = threading.Lock()
|
|
950
|
+
|
|
680
951
|
def index_all(self, force: bool = False) -> Dict[str, Any]:
|
|
681
952
|
"""
|
|
682
953
|
Index documents with incremental change detection.
|
|
683
954
|
|
|
684
955
|
Compares file mtime/size against stored metadata to detect changes.
|
|
685
|
-
Only re-indexes files that are new or modified.
|
|
956
|
+
Only re-indexes files that are new or modified. Serialized via
|
|
957
|
+
_index_lock so concurrent calls (watcher + MCP tool) don't corrupt
|
|
958
|
+
ChromaDB's SQLite database.
|
|
686
959
|
"""
|
|
960
|
+
if not self._index_lock.acquire(blocking=False):
|
|
961
|
+
return {
|
|
962
|
+
"total_files": 0,
|
|
963
|
+
"indexed": 0,
|
|
964
|
+
"updated": 0,
|
|
965
|
+
"skipped": 0,
|
|
966
|
+
"deleted": 0,
|
|
967
|
+
"errors": 0,
|
|
968
|
+
"chunks_added": 0,
|
|
969
|
+
"chunks_removed": 0,
|
|
970
|
+
"dedup_skipped": 0,
|
|
971
|
+
"categories": {},
|
|
972
|
+
"skipped_reason": "reindex_already_running",
|
|
973
|
+
}
|
|
974
|
+
try:
|
|
975
|
+
return self._index_all_impl(force)
|
|
976
|
+
finally:
|
|
977
|
+
self._index_lock.release()
|
|
978
|
+
|
|
979
|
+
def _index_all_impl(self, force: bool = False) -> Dict[str, Any]:
|
|
980
|
+
"""Inner implementation of index_all (caller holds _index_lock)."""
|
|
687
981
|
stats = {
|
|
688
982
|
"total_files": 0,
|
|
689
983
|
"indexed": 0,
|
|
@@ -699,14 +993,17 @@ class KnowledgeOrchestrator:
|
|
|
699
993
|
|
|
700
994
|
documents = self.parser.parse_directory()
|
|
701
995
|
stats["total_files"] = len(documents)
|
|
996
|
+
if stats["total_files"] > 100:
|
|
997
|
+
print(f"[INDEX] Scanning {stats['total_files']} documents...")
|
|
702
998
|
|
|
703
999
|
path_to_docid: Dict[str, str] = {}
|
|
704
1000
|
for doc_id, info in self._indexed_docs.items():
|
|
705
1001
|
path_to_docid[info.get("source", "")] = doc_id
|
|
706
1002
|
|
|
707
1003
|
current_paths = set()
|
|
1004
|
+
_progress_interval = max(1, stats["total_files"] // 10)
|
|
708
1005
|
|
|
709
|
-
for doc in documents:
|
|
1006
|
+
for idx, doc in enumerate(documents):
|
|
710
1007
|
current_paths.add(str(doc.source))
|
|
711
1008
|
try:
|
|
712
1009
|
source_str = str(doc.source)
|
|
@@ -768,6 +1065,13 @@ class KnowledgeOrchestrator:
|
|
|
768
1065
|
stats["errors"] += 1
|
|
769
1066
|
print(f"[ERROR] Failed to index {doc.source}: {e}")
|
|
770
1067
|
|
|
1068
|
+
if stats["total_files"] > 100 and (idx + 1) % _progress_interval == 0:
|
|
1069
|
+
pct = int((idx + 1) / stats["total_files"] * 100)
|
|
1070
|
+
print(
|
|
1071
|
+
f"[INDEX] Progress: {idx + 1}/{stats['total_files']} ({pct}%) "
|
|
1072
|
+
f"— {stats['indexed']} new, {stats['skipped']} skipped"
|
|
1073
|
+
)
|
|
1074
|
+
|
|
771
1075
|
# Clean up orphaned docs
|
|
772
1076
|
orphan_ids = []
|
|
773
1077
|
for doc_id, info in list(self._indexed_docs.items()):
|
|
@@ -787,8 +1091,14 @@ class KnowledgeOrchestrator:
|
|
|
787
1091
|
|
|
788
1092
|
return stats
|
|
789
1093
|
|
|
1094
|
+
_CHROMA_BATCH_SIZE = 500
|
|
1095
|
+
|
|
790
1096
|
def _index_document(self, doc: Document) -> Tuple[int, int]:
|
|
791
|
-
"""Index a single document's chunks into ChromaDB and BM25 with dedup.
|
|
1097
|
+
"""Index a single document's chunks into ChromaDB and BM25 with dedup.
|
|
1098
|
+
|
|
1099
|
+
Large documents are split into batches of _CHROMA_BATCH_SIZE to
|
|
1100
|
+
prevent memory spikes when embedding thousands of chunks at once.
|
|
1101
|
+
"""
|
|
792
1102
|
if not doc.chunks:
|
|
793
1103
|
return 0, 0
|
|
794
1104
|
|
|
@@ -823,7 +1133,13 @@ class KnowledgeOrchestrator:
|
|
|
823
1133
|
)
|
|
824
1134
|
|
|
825
1135
|
if unique_ids:
|
|
826
|
-
|
|
1136
|
+
bs = self._CHROMA_BATCH_SIZE
|
|
1137
|
+
for i in range(0, len(unique_ids), bs):
|
|
1138
|
+
self.collection.add(
|
|
1139
|
+
ids=unique_ids[i : i + bs],
|
|
1140
|
+
documents=unique_docs[i : i + bs],
|
|
1141
|
+
metadatas=unique_metas[i : i + bs],
|
|
1142
|
+
)
|
|
827
1143
|
self.bm25_index.add_documents(unique_ids, unique_docs)
|
|
828
1144
|
|
|
829
1145
|
return len(unique_ids), dedup_skipped
|
|
@@ -1667,14 +1983,24 @@ def search_knowledge(query: str, max_results: int = 5, category: str = None, hyb
|
|
|
1667
1983
|
"""
|
|
1668
1984
|
Hybrid search combining semantic search + BM25 keyword search with cross-encoder reranking.
|
|
1669
1985
|
|
|
1986
|
+
Read-only. No side effects.
|
|
1987
|
+
|
|
1670
1988
|
Args:
|
|
1671
|
-
query: Search query text
|
|
1989
|
+
query: Search query text (1–3 keywords recommended; phrase queries also work)
|
|
1672
1990
|
max_results: Maximum number of results (default: 5, max: 20)
|
|
1673
|
-
category: Optional category filter
|
|
1674
|
-
|
|
1991
|
+
category: Optional category filter — one of: security, ctf, logscale, development, general,
|
|
1992
|
+
redteam, blueteam. Call list_categories() first to see available categories and counts.
|
|
1993
|
+
hybrid_alpha: Balance between semantic and keyword search. 0.0 = keyword-only (best for exact
|
|
1994
|
+
technical terms like CVE IDs or tool names), 0.3 = balanced default, 1.0 = semantic-only
|
|
1995
|
+
(best for conceptual or natural-language queries).
|
|
1675
1996
|
|
|
1676
1997
|
Returns:
|
|
1677
|
-
JSON string with
|
|
1998
|
+
JSON string with results including content chunks, source filepath, relevance score, and
|
|
1999
|
+
search method used. Returns chunks, not full document content.
|
|
2000
|
+
|
|
2001
|
+
Usage: Primary search tool — use for any topic or keyword lookup. Prefer search_similar() when
|
|
2002
|
+
you already have a reference document and want more like it. Prefer get_document() when you
|
|
2003
|
+
already know the exact filepath and need the full content.
|
|
1678
2004
|
"""
|
|
1679
2005
|
if not query or not query.strip():
|
|
1680
2006
|
return json.dumps({"status": "error", "message": "Query cannot be empty"})
|
|
@@ -1713,13 +2039,22 @@ def search_knowledge(query: str, max_results: int = 5, category: str = None, hyb
|
|
|
1713
2039
|
@mcp.tool()
|
|
1714
2040
|
def get_document(filepath: str) -> str:
|
|
1715
2041
|
"""
|
|
1716
|
-
Get the full content of a specific document.
|
|
2042
|
+
Get the full content of a specific document by filepath.
|
|
2043
|
+
|
|
2044
|
+
Read-only. No side effects.
|
|
1717
2045
|
|
|
1718
2046
|
Args:
|
|
1719
|
-
filepath:
|
|
2047
|
+
filepath: Relative path to the document within the documents directory
|
|
2048
|
+
(e.g., "security/technique.md"). Must be an indexed file — use
|
|
2049
|
+
list_documents() to browse available paths, or search_knowledge()
|
|
2050
|
+
to find the filepath by topic first.
|
|
1720
2051
|
|
|
1721
2052
|
Returns:
|
|
1722
|
-
JSON string with document content and metadata
|
|
2053
|
+
JSON string with full document content and metadata (filepath, category, size).
|
|
2054
|
+
|
|
2055
|
+
Usage: Use when you need the complete text of a known file — search_knowledge()
|
|
2056
|
+
returns chunks, not full docs. Use search_knowledge() first to find the filepath
|
|
2057
|
+
if unknown. Use list_documents() to browse all available files by category.
|
|
1723
2058
|
"""
|
|
1724
2059
|
orchestrator = get_orchestrator()
|
|
1725
2060
|
doc = orchestrator.get_document(filepath)
|
|
@@ -1735,12 +2070,21 @@ def reindex_documents(force: bool = False, full_rebuild: bool = False) -> str:
|
|
|
1735
2070
|
"""
|
|
1736
2071
|
Index or reindex all documents in the knowledge base.
|
|
1737
2072
|
|
|
2073
|
+
Mutating — modifies the vector index. CPU/IO intensive for full_rebuild (~6 min for 200 docs).
|
|
2074
|
+
|
|
1738
2075
|
Args:
|
|
1739
|
-
force: If True, smart reindex (detects
|
|
1740
|
-
|
|
2076
|
+
force: If True, smart reindex (detects changed files + rebuilds BM25 index). Fast (~5s
|
|
2077
|
+
for 200 docs). Use after manually editing files on disk outside of add_document().
|
|
2078
|
+
full_rebuild: If True, nuclear rebuild — deletes all vectors and re-embeds everything
|
|
2079
|
+
from scratch. Use only if the embedding model changed or the index is corrupted.
|
|
1741
2080
|
|
|
1742
2081
|
Returns:
|
|
1743
|
-
JSON string with indexing statistics
|
|
2082
|
+
JSON string with indexing statistics (docs processed, added, skipped, errors).
|
|
2083
|
+
|
|
2084
|
+
Usage: Normal workflow does not require this — add_document(), update_document(), and
|
|
2085
|
+
add_from_url() all auto-index on call. Use force=True only after direct filesystem edits.
|
|
2086
|
+
Use full_rebuild=True only for model upgrades or index corruption. No arguments runs a
|
|
2087
|
+
fast incremental pass.
|
|
1744
2088
|
"""
|
|
1745
2089
|
orchestrator = get_orchestrator()
|
|
1746
2090
|
|
|
@@ -1759,7 +2103,18 @@ def reindex_documents(force: bool = False, full_rebuild: bool = False) -> str:
|
|
|
1759
2103
|
|
|
1760
2104
|
@mcp.tool()
|
|
1761
2105
|
def list_categories() -> str:
|
|
1762
|
-
"""
|
|
2106
|
+
"""
|
|
2107
|
+
List all document categories with their document counts.
|
|
2108
|
+
|
|
2109
|
+
Read-only. No side effects. Reflects the live index state.
|
|
2110
|
+
|
|
2111
|
+
Returns:
|
|
2112
|
+
JSON string with category names, document counts per category, and total document count.
|
|
2113
|
+
|
|
2114
|
+
Usage: Use before filtering search_knowledge() or list_documents() by category to see
|
|
2115
|
+
which categories exist and how many documents each contains. Use get_index_stats() instead
|
|
2116
|
+
for broader system health metrics (model name, cache hit rate, BM25 status).
|
|
2117
|
+
"""
|
|
1763
2118
|
orchestrator = get_orchestrator()
|
|
1764
2119
|
categories = orchestrator.list_categories()
|
|
1765
2120
|
return json.dumps(
|
|
@@ -1772,8 +2127,20 @@ def list_documents(category: str = None) -> str:
|
|
|
1772
2127
|
"""
|
|
1773
2128
|
List all indexed documents, optionally filtered by category.
|
|
1774
2129
|
|
|
2130
|
+
Read-only. No side effects.
|
|
2131
|
+
|
|
1775
2132
|
Args:
|
|
1776
|
-
category: Optional category filter
|
|
2133
|
+
category: Optional category filter. Must be a valid category name — call
|
|
2134
|
+
list_categories() to see available options (e.g., security, ctf, logscale,
|
|
2135
|
+
development, general, redteam, blueteam).
|
|
2136
|
+
|
|
2137
|
+
Returns:
|
|
2138
|
+
JSON string with list of document filepaths, categories, and metadata for each indexed file.
|
|
2139
|
+
|
|
2140
|
+
Usage: Use to browse what's in the index or verify a specific file is indexed. Use
|
|
2141
|
+
list_categories() first to see valid category names. Use search_knowledge() when you
|
|
2142
|
+
want to find documents by topic rather than browsing the full list. Use get_document()
|
|
2143
|
+
to read a specific file once you have its filepath.
|
|
1777
2144
|
"""
|
|
1778
2145
|
orchestrator = get_orchestrator()
|
|
1779
2146
|
docs = orchestrator.list_documents(category=category)
|
|
@@ -1786,7 +2153,20 @@ def list_documents(category: str = None) -> str:
|
|
|
1786
2153
|
|
|
1787
2154
|
@mcp.tool()
|
|
1788
2155
|
def get_index_stats() -> str:
|
|
1789
|
-
"""
|
|
2156
|
+
"""
|
|
2157
|
+
Get statistics and health metrics for the knowledge base index.
|
|
2158
|
+
|
|
2159
|
+
Read-only. No side effects.
|
|
2160
|
+
|
|
2161
|
+
Returns:
|
|
2162
|
+
JSON string with system metrics: total documents, total chunks, embedding model name,
|
|
2163
|
+
BM25 status, query cache hit rate, and file watcher status.
|
|
2164
|
+
|
|
2165
|
+
Usage: Use for system health checks — verifying the embedding model loaded, checking
|
|
2166
|
+
index population, or monitoring cache efficiency. Use list_categories() for per-category
|
|
2167
|
+
document counts instead. Use evaluate_retrieval() to measure actual search quality with
|
|
2168
|
+
test queries.
|
|
2169
|
+
"""
|
|
1790
2170
|
orchestrator = get_orchestrator()
|
|
1791
2171
|
stats = orchestrator.get_stats()
|
|
1792
2172
|
return json.dumps({"status": "success", "stats": stats}, indent=2)
|
|
@@ -1800,17 +2180,23 @@ def get_index_stats() -> str:
|
|
|
1800
2180
|
@mcp.tool()
|
|
1801
2181
|
def add_document(content: str, filepath: str, category: str = "general") -> str:
|
|
1802
2182
|
"""
|
|
1803
|
-
Add a new document to the knowledge base from raw content.
|
|
2183
|
+
Add a new document to the knowledge base from raw text content.
|
|
1804
2184
|
|
|
1805
|
-
|
|
2185
|
+
Mutating — writes a file to disk and indexes it immediately. No auth required.
|
|
1806
2186
|
|
|
1807
2187
|
Args:
|
|
1808
|
-
content: Full text content of the document
|
|
1809
|
-
filepath: Relative path within documents
|
|
1810
|
-
|
|
2188
|
+
content: Full text content of the document (markdown supported)
|
|
2189
|
+
filepath: Relative path within documents directory (e.g., "security/new-technique.md").
|
|
2190
|
+
The subdirectory should match the category.
|
|
2191
|
+
category: Document category — one of: security, ctf, logscale, development, general,
|
|
2192
|
+
redteam, blueteam (default: general)
|
|
1811
2193
|
|
|
1812
2194
|
Returns:
|
|
1813
|
-
JSON string with indexing results
|
|
2195
|
+
JSON string with indexing results (filepath, chunks created, status).
|
|
2196
|
+
|
|
2197
|
+
Usage: Use to add new documents from text content. Use add_from_url() instead when
|
|
2198
|
+
the source is a web page. Use update_document() to replace content of an existing file.
|
|
2199
|
+
The document is immediately searchable after this call — no manual reindex needed.
|
|
1814
2200
|
"""
|
|
1815
2201
|
if not content or not content.strip():
|
|
1816
2202
|
return json.dumps({"status": "error", "message": "Content cannot be empty"})
|
|
@@ -1829,16 +2215,22 @@ def add_document(content: str, filepath: str, category: str = "general") -> str:
|
|
|
1829
2215
|
@mcp.tool()
|
|
1830
2216
|
def update_document(filepath: str, content: str) -> str:
|
|
1831
2217
|
"""
|
|
1832
|
-
Update an existing document in the knowledge base.
|
|
2218
|
+
Update the content of an existing document in the knowledge base.
|
|
1833
2219
|
|
|
1834
|
-
|
|
2220
|
+
Mutating — overwrites the file on disk and re-indexes immediately. Old chunks are
|
|
2221
|
+
removed and replaced with new ones. Full content replacement, not a patch.
|
|
1835
2222
|
|
|
1836
2223
|
Args:
|
|
1837
|
-
filepath: Full path to the document file
|
|
1838
|
-
|
|
2224
|
+
filepath: Full or relative path to the document file. Must be an already-indexed
|
|
2225
|
+
file — use list_documents() to find valid paths.
|
|
2226
|
+
content: New full-text content to replace the existing content entirely
|
|
1839
2227
|
|
|
1840
2228
|
Returns:
|
|
1841
|
-
JSON string with update results
|
|
2229
|
+
JSON string with update results (old chunk count, new chunk count, status).
|
|
2230
|
+
|
|
2231
|
+
Usage: Use to replace a document's content completely. Use add_document() to create
|
|
2232
|
+
a new file instead. Use remove_document() to delete without replacing. Changes are
|
|
2233
|
+
immediately searchable — no manual reindex needed.
|
|
1842
2234
|
"""
|
|
1843
2235
|
if not filepath:
|
|
1844
2236
|
return json.dumps({"status": "error", "message": "Filepath required"})
|
|
@@ -1859,12 +2251,22 @@ def remove_document(filepath: str, delete_file: bool = False) -> str:
|
|
|
1859
2251
|
"""
|
|
1860
2252
|
Remove a document from the knowledge base index.
|
|
1861
2253
|
|
|
2254
|
+
Mutating — removes index entries. If delete_file=True, also permanently deletes
|
|
2255
|
+
the file from disk (irreversible, cannot be undone).
|
|
2256
|
+
|
|
1862
2257
|
Args:
|
|
1863
|
-
filepath: Path to the document file
|
|
1864
|
-
|
|
2258
|
+
filepath: Path to the document file. Must be an indexed document — use
|
|
2259
|
+
list_documents() to find valid paths.
|
|
2260
|
+
delete_file: If True, permanently deletes the file from disk in addition to
|
|
2261
|
+
removing from the index (default: False).
|
|
1865
2262
|
|
|
1866
2263
|
Returns:
|
|
1867
|
-
JSON string with removal results
|
|
2264
|
+
JSON string with removal results (filepath, status).
|
|
2265
|
+
|
|
2266
|
+
Usage: Use to unindex a document while keeping the file on disk (default). Set
|
|
2267
|
+
delete_file=True only for permanent removal. Use update_document() to replace
|
|
2268
|
+
content instead of removing. Use reindex_documents(force=True) if you deleted
|
|
2269
|
+
the file manually on disk outside of this tool.
|
|
1868
2270
|
"""
|
|
1869
2271
|
if not filepath:
|
|
1870
2272
|
return json.dumps({"status": "error", "message": "Filepath required"})
|
|
@@ -1881,17 +2283,23 @@ def remove_document(filepath: str, delete_file: bool = False) -> str:
|
|
|
1881
2283
|
@mcp.tool()
|
|
1882
2284
|
def add_from_url(url: str, category: str = "general", title: str = None) -> str:
|
|
1883
2285
|
"""
|
|
1884
|
-
Fetch content from a URL and add
|
|
2286
|
+
Fetch content from a URL, convert to markdown, and add to the knowledge base.
|
|
1885
2287
|
|
|
1886
|
-
|
|
2288
|
+
Mutating — makes an outbound HTTP request (requires internet access), strips HTML,
|
|
2289
|
+
converts to markdown, saves to disk, and indexes immediately.
|
|
1887
2290
|
|
|
1888
2291
|
Args:
|
|
1889
|
-
url: URL to fetch
|
|
1890
|
-
category: Document category
|
|
1891
|
-
|
|
2292
|
+
url: Full URL to fetch (https:// required). The page must be publicly accessible.
|
|
2293
|
+
category: Document category — one of: security, ctf, logscale, development, general,
|
|
2294
|
+
redteam, blueteam (default: general)
|
|
2295
|
+
title: Optional document title. Auto-detected from the page's <title> tag if omitted.
|
|
1892
2296
|
|
|
1893
2297
|
Returns:
|
|
1894
|
-
JSON string with indexing results
|
|
2298
|
+
JSON string with indexing results (detected title, filepath, chunks created, status).
|
|
2299
|
+
|
|
2300
|
+
Usage: Use to ingest web content (writeups, blog posts, documentation pages) directly
|
|
2301
|
+
by URL. Use add_document() instead when you already have the text content. The document
|
|
2302
|
+
is immediately searchable after this call — no manual reindex needed.
|
|
1895
2303
|
"""
|
|
1896
2304
|
if not url or not url.strip():
|
|
1897
2305
|
return json.dumps({"status": "error", "message": "URL cannot be empty"})
|
|
@@ -1908,16 +2316,22 @@ def add_from_url(url: str, category: str = "general", title: str = None) -> str:
|
|
|
1908
2316
|
@mcp.tool()
|
|
1909
2317
|
def search_similar(filepath: str, max_results: int = 5) -> str:
|
|
1910
2318
|
"""
|
|
1911
|
-
Find documents similar to a given document.
|
|
2319
|
+
Find documents semantically similar to a given reference document.
|
|
1912
2320
|
|
|
1913
|
-
Uses the document's embedding
|
|
2321
|
+
Read-only. No side effects. Uses the document's embedding for similarity comparison.
|
|
1914
2322
|
|
|
1915
2323
|
Args:
|
|
1916
|
-
filepath: Path to the reference document
|
|
1917
|
-
|
|
2324
|
+
filepath: Path to the reference document (must already be indexed — use
|
|
2325
|
+
list_documents() to verify). E.g., "security/technique.md"
|
|
2326
|
+
max_results: Number of similar documents to return (default: 5, max: 20)
|
|
1918
2327
|
|
|
1919
2328
|
Returns:
|
|
1920
|
-
JSON string with list of similar
|
|
2329
|
+
JSON string with list of similar document filepaths and similarity scores (0.0–1.0).
|
|
2330
|
+
|
|
2331
|
+
Usage: Use when you have a specific document and want to discover thematically related
|
|
2332
|
+
ones. Use search_knowledge() instead when you have a text query rather than a reference
|
|
2333
|
+
document. The reference document must be indexed — call list_documents() to confirm
|
|
2334
|
+
it exists before calling this tool.
|
|
1921
2335
|
"""
|
|
1922
2336
|
if not filepath:
|
|
1923
2337
|
return json.dumps({"status": "error", "message": "Filepath required"})
|
|
@@ -1940,13 +2354,22 @@ def search_similar(filepath: str, max_results: int = 5) -> str:
|
|
|
1940
2354
|
@mcp.tool()
|
|
1941
2355
|
def evaluate_retrieval(test_cases: str) -> str:
|
|
1942
2356
|
"""
|
|
1943
|
-
Evaluate
|
|
2357
|
+
Evaluate search quality by testing whether search_knowledge() retrieves expected documents.
|
|
2358
|
+
|
|
2359
|
+
Read-only. Runs multiple search queries internally. No side effects on the index.
|
|
1944
2360
|
|
|
1945
2361
|
Args:
|
|
1946
|
-
test_cases: JSON string of test cases.
|
|
2362
|
+
test_cases: JSON string array of test cases. Each item requires "query" (search string)
|
|
2363
|
+
and "expected_filepath" (path of the document that should appear in top-5 results).
|
|
2364
|
+
Example: [{"query": "suid exploit", "expected_filepath": "security/suid.md"}]
|
|
1947
2365
|
|
|
1948
2366
|
Returns:
|
|
1949
|
-
JSON string with MRR@5, Recall@5, and per-query
|
|
2367
|
+
JSON string with MRR@5 (Mean Reciprocal Rank), Recall@5, and per-query hit/miss breakdown.
|
|
2368
|
+
MRR@5 above 0.7 indicates good retrieval quality.
|
|
2369
|
+
|
|
2370
|
+
Usage: Use to audit search quality after bulk document ingestion or after tuning
|
|
2371
|
+
hybrid_alpha. Use get_index_stats() for system health checks instead. Use
|
|
2372
|
+
search_knowledge() for actual document retrieval — this tool is for quality measurement only.
|
|
1950
2373
|
"""
|
|
1951
2374
|
try:
|
|
1952
2375
|
cases = json.loads(test_cases) if isinstance(test_cases, str) else test_cases
|
|
@@ -2050,16 +2473,19 @@ def main():
|
|
|
2050
2473
|
print(f"[INFO] Indexed {stats['indexed']} documents with {stats['chunks_added']} chunks")
|
|
2051
2474
|
|
|
2052
2475
|
# Start file watcher for auto-reindex on document changes
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2476
|
+
if os.environ.get("KNOWLEDGE_RAG_WATCHER_DISABLED", "").strip() == "1":
|
|
2477
|
+
print("[WATCHER] Disabled via KNOWLEDGE_RAG_WATCHER_DISABLED=1")
|
|
2478
|
+
else:
|
|
2479
|
+
try:
|
|
2480
|
+
watcher = DocumentWatcher(get_orchestrator, debounce_seconds=10.0)
|
|
2481
|
+
observer = Observer()
|
|
2482
|
+
observer.schedule(watcher, str(config.documents_dir), recursive=True)
|
|
2483
|
+
observer.daemon = True
|
|
2484
|
+
observer.start()
|
|
2485
|
+
print(f"[WATCHER] Monitoring {config.documents_dir} for changes")
|
|
2486
|
+
except Exception as e:
|
|
2487
|
+
print(f"[WARN] Failed to start file watcher: {e}")
|
|
2488
|
+
print("[WARN] Auto-reindexing disabled. Use reindex_documents tool manually.")
|
|
2063
2489
|
|
|
2064
2490
|
# Restore real stdout for MCP JSON-RPC, keep print() going to stderr
|
|
2065
2491
|
from . import _original_stdout
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "knowledge-rag"
|
|
7
|
-
version = "3.9.
|
|
7
|
+
version = "3.9.1"
|
|
8
8
|
description = "Local RAG System for Claude Code — Hybrid search + Cross-encoder Reranking + 12 MCP Tools + 20 Format Parsers. Zero external servers."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|