knowledge-rag 3.6.1__tar.gz → 3.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: knowledge-rag
3
- Version: 3.6.1
3
+ Version: 3.7.0
4
4
  Summary: Local RAG System for Claude Code — Hybrid search + Cross-encoder Reranking + 12 MCP Tools + 20 Format Parsers. Zero external servers.
5
5
  Project-URL: Homepage, https://github.com/lyonzin/knowledge-rag
6
6
  Project-URL: Repository, https://github.com/lyonzin/knowledge-rag
@@ -30,7 +30,7 @@ Requires-Dist: python-docx>=1.0.0
30
30
  Requires-Dist: python-pptx>=1.0.0
31
31
  Requires-Dist: pyyaml>=6.0
32
32
  Requires-Dist: rank-bm25>=0.2.2
33
- Requires-Dist: requests>=2.31.0
33
+ Requires-Dist: requests>=2.33.0
34
34
  Requires-Dist: watchdog>=4.0.0
35
35
  Provides-Extra: gpu
36
36
  Requires-Dist: onnxruntime-gpu>=1.14.0; extra == 'gpu'
@@ -40,7 +40,9 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  <div align="center">
42
42
 
43
- ![Version](https://img.shields.io/badge/version-3.5.2-blue.svg)
43
+ [![PyPI](https://img.shields.io/pypi/v/knowledge-rag)](https://pypi.org/project/knowledge-rag/)
44
+ [![NPM](https://img.shields.io/npm/v/knowledge-rag)](https://www.npmjs.com/package/knowledge-rag)
45
+ [![Downloads](https://static.pepy.tech/badge/knowledge-rag/month)](https://pepy.tech/project/knowledge-rag)
44
46
  ![Python](https://img.shields.io/badge/python-3.11%2B-green.svg)
45
47
  ![License](https://img.shields.io/badge/license-MIT-yellow.svg)
46
48
  ![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-lightgrey.svg)
@@ -48,7 +50,6 @@ Description-Content-Type: text/markdown
48
50
  [![CI](https://github.com/lyonzin/knowledge-rag/actions/workflows/ci.yml/badge.svg)](https://github.com/lyonzin/knowledge-rag/actions/workflows/ci.yml)
49
51
  [![CodeQL](https://github.com/lyonzin/knowledge-rag/actions/workflows/security.yml/badge.svg)](https://github.com/lyonzin/knowledge-rag/actions/workflows/security.yml)
50
52
  [![Glama Score](https://glama.ai/mcp/servers/lyonzin/knowledge-rag/badges/score.svg)](https://glama.ai/mcp/servers/lyonzin/knowledge-rag)
51
- [![PyPI](https://img.shields.io/pypi/v/knowledge-rag)](https://pypi.org/project/knowledge-rag/)
52
53
 
53
54
  ### Your docs, your machine, zero cloud. Claude Code searches them natively.
54
55
 
@@ -809,7 +810,7 @@ models:
809
810
  dimensions: 384
810
811
  gpu: false # Set true + pip install knowledge-rag[gpu]
811
812
  reranker:
812
- enabled: true # Set false on low-resource machines
813
+ enabled: true # Falls back to RRF if model is unavailable
813
814
  model: "Xenova/ms-marco-MiniLM-L-6-v2"
814
815
  top_k_multiplier: 3 # Candidates fetched before reranking
815
816
 
@@ -896,6 +897,8 @@ For `.md` files, chunking splits at `##` and `###` header boundaries first. Sect
896
897
  | `models.reranker.model` | `Xenova/ms-marco-MiniLM-L-6-v2` | Reranker model |
897
898
  | `models.reranker.top_k_multiplier` | 3 | Fetch N*multiplier candidates for reranking |
898
899
 
900
+ If the reranker model is not available locally and the machine cannot download it, search now falls back to the RRF order from hybrid semantic+BM25 retrieval. This keeps `search_knowledge` available offline, but result ordering may be less precise for ambiguous queries until the reranker model is cached.
901
+
899
902
  **Embedding model options** (fastest → most accurate):
900
903
  - `BAAI/bge-small-en-v1.5` — 384D, ~33MB (default)
901
904
  - `BAAI/bge-base-en-v1.5` — 768D, ~130MB
@@ -1026,6 +1029,31 @@ rm -rf models_cache
1026
1029
  # Then restart the MCP server
1027
1030
  ```
1028
1031
 
1032
+ ### Reranker model download fails
1033
+
1034
+ The reranker is lazy-loaded on the first query. If the model is not cached and the machine is offline, search continues without reranking and uses the RRF order from hybrid retrieval. To keep reranking enabled offline, run one query while online or pre-populate `models_cache/` on the target machine.
1035
+
1036
+ You can still disable reranking explicitly in `config.yaml`:
1037
+
1038
+ ```yaml
1039
+ models:
1040
+ reranker:
1041
+ enabled: false
1042
+ ```
1043
+
1044
+ Disabling reranking reduces memory use and avoids first-query model loading. The tradeoff is lower ranking precision, especially when several chunks match the same terms but only one is the best answer.
1045
+
1046
+ ### ChromaDB index crashes on startup
1047
+
1048
+ Native ChromaDB failures can terminate Python before normal exception handling runs. Startup now probes ChromaDB in a child process before initializing the MCP server. If the probe crashes, the active `chroma_db/` and `index_metadata.json` are moved to `data/backups/auto-repair-*`, and the next startup can rebuild a clean index.
1049
+
1050
+ The same guarded behavior is available through either console script:
1051
+
1052
+ ```bash
1053
+ knowledge-rag
1054
+ knowledge-rag-guarded
1055
+ ```
1056
+
1029
1057
  ### Index is empty
1030
1058
 
1031
1059
  ```bash
@@ -1056,7 +1084,7 @@ pip install --upgrade knowledge-rag
1056
1084
 
1057
1085
  ### Slow first query
1058
1086
 
1059
- The cross-encoder reranker model is lazy-loaded on the first query. This adds a one-time ~2-3 second delay for model download and loading. Subsequent queries are fast.
1087
+ The cross-encoder reranker model is lazy-loaded on the first query. This adds a one-time ~2-3 second delay for model download and loading. Subsequent queries are fast. If the model cannot be loaded, search falls back to RRF ordering and does not retry loading the reranker until the server restarts.
1060
1088
 
1061
1089
  ### Memory usage
1062
1090
 
@@ -1066,8 +1094,16 @@ With ~200 documents, expect ~300-500MB RAM. The embedding model (~50MB) and rera
1066
1094
 
1067
1095
  ## Changelog
1068
1096
 
1069
- ### v3.6.1 (2026-04-23)
1097
+ ### Unreleased
1098
+
1099
+ - **FIX**: Startup preflight probes ChromaDB in a child process and moves crashing persistent indexes to `data/backups/auto-repair-*` before MCP initialization.
1100
+ - **FIX**: Reranker load failures now fall back to RRF ordering instead of failing `search_knowledge` on offline machines.
1101
+ - **FIX**: Virtualenv project-root detection now handles Python symlinks that resolve to the system interpreter.
1102
+ - **NEW**: `knowledge-rag-guarded` console script kept as an explicit guarded startup alias.
1103
+
1104
+ ### v3.6.2 (2026-04-23)
1070
1105
 
1106
+ - **INFRA**: NPM provenance attestation (SLSA supply chain security), full README on npm page
1071
1107
  - **DOCS**: Reorganize Installation section — add NPX and Docker install methods, update What's New to v3.6.0
1072
1108
 
1073
1109
  ### v3.6.0 (2026-04-23)
@@ -2,7 +2,9 @@
2
2
 
3
3
  <div align="center">
4
4
 
5
- ![Version](https://img.shields.io/badge/version-3.5.2-blue.svg)
5
+ [![PyPI](https://img.shields.io/pypi/v/knowledge-rag)](https://pypi.org/project/knowledge-rag/)
6
+ [![NPM](https://img.shields.io/npm/v/knowledge-rag)](https://www.npmjs.com/package/knowledge-rag)
7
+ [![Downloads](https://static.pepy.tech/badge/knowledge-rag/month)](https://pepy.tech/project/knowledge-rag)
6
8
  ![Python](https://img.shields.io/badge/python-3.11%2B-green.svg)
7
9
  ![License](https://img.shields.io/badge/license-MIT-yellow.svg)
8
10
  ![Platform](https://img.shields.io/badge/platform-Windows%20%7C%20Linux%20%7C%20macOS-lightgrey.svg)
@@ -10,7 +12,6 @@
10
12
  [![CI](https://github.com/lyonzin/knowledge-rag/actions/workflows/ci.yml/badge.svg)](https://github.com/lyonzin/knowledge-rag/actions/workflows/ci.yml)
11
13
  [![CodeQL](https://github.com/lyonzin/knowledge-rag/actions/workflows/security.yml/badge.svg)](https://github.com/lyonzin/knowledge-rag/actions/workflows/security.yml)
12
14
  [![Glama Score](https://glama.ai/mcp/servers/lyonzin/knowledge-rag/badges/score.svg)](https://glama.ai/mcp/servers/lyonzin/knowledge-rag)
13
- [![PyPI](https://img.shields.io/pypi/v/knowledge-rag)](https://pypi.org/project/knowledge-rag/)
14
15
 
15
16
  ### Your docs, your machine, zero cloud. Claude Code searches them natively.
16
17
 
@@ -771,7 +772,7 @@ models:
771
772
  dimensions: 384
772
773
  gpu: false # Set true + pip install knowledge-rag[gpu]
773
774
  reranker:
774
- enabled: true # Set false on low-resource machines
775
+ enabled: true # Falls back to RRF if model is unavailable
775
776
  model: "Xenova/ms-marco-MiniLM-L-6-v2"
776
777
  top_k_multiplier: 3 # Candidates fetched before reranking
777
778
 
@@ -858,6 +859,8 @@ For `.md` files, chunking splits at `##` and `###` header boundaries first. Sect
858
859
  | `models.reranker.model` | `Xenova/ms-marco-MiniLM-L-6-v2` | Reranker model |
859
860
  | `models.reranker.top_k_multiplier` | 3 | Fetch N*multiplier candidates for reranking |
860
861
 
862
+ If the reranker model is not available locally and the machine cannot download it, search now falls back to the RRF order from hybrid semantic+BM25 retrieval. This keeps `search_knowledge` available offline, but result ordering may be less precise for ambiguous queries until the reranker model is cached.
863
+
861
864
  **Embedding model options** (fastest → most accurate):
862
865
  - `BAAI/bge-small-en-v1.5` — 384D, ~33MB (default)
863
866
  - `BAAI/bge-base-en-v1.5` — 768D, ~130MB
@@ -988,6 +991,31 @@ rm -rf models_cache
988
991
  # Then restart the MCP server
989
992
  ```
990
993
 
994
+ ### Reranker model download fails
995
+
996
+ The reranker is lazy-loaded on the first query. If the model is not cached and the machine is offline, search continues without reranking and uses the RRF order from hybrid retrieval. To keep reranking enabled offline, run one query while online or pre-populate `models_cache/` on the target machine.
997
+
998
+ You can still disable reranking explicitly in `config.yaml`:
999
+
1000
+ ```yaml
1001
+ models:
1002
+ reranker:
1003
+ enabled: false
1004
+ ```
1005
+
1006
+ Disabling reranking reduces memory use and avoids first-query model loading. The tradeoff is lower ranking precision, especially when several chunks match the same terms but only one is the best answer.
1007
+
1008
+ ### ChromaDB index crashes on startup
1009
+
1010
+ Native ChromaDB failures can terminate Python before normal exception handling runs. Startup now probes ChromaDB in a child process before initializing the MCP server. If the probe crashes, the active `chroma_db/` and `index_metadata.json` are moved to `data/backups/auto-repair-*`, and the next startup can rebuild a clean index.
1011
+
1012
+ The same guarded behavior is available through either console script:
1013
+
1014
+ ```bash
1015
+ knowledge-rag
1016
+ knowledge-rag-guarded
1017
+ ```
1018
+
991
1019
  ### Index is empty
992
1020
 
993
1021
  ```bash
@@ -1018,7 +1046,7 @@ pip install --upgrade knowledge-rag
1018
1046
 
1019
1047
  ### Slow first query
1020
1048
 
1021
- The cross-encoder reranker model is lazy-loaded on the first query. This adds a one-time ~2-3 second delay for model download and loading. Subsequent queries are fast.
1049
+ The cross-encoder reranker model is lazy-loaded on the first query. This adds a one-time ~2-3 second delay for model download and loading. Subsequent queries are fast. If the model cannot be loaded, search falls back to RRF ordering and does not retry loading the reranker until the server restarts.
1022
1050
 
1023
1051
  ### Memory usage
1024
1052
 
@@ -1028,8 +1056,16 @@ With ~200 documents, expect ~300-500MB RAM. The embedding model (~50MB) and rera
1028
1056
 
1029
1057
  ## Changelog
1030
1058
 
1031
- ### v3.6.1 (2026-04-23)
1059
+ ### Unreleased
1060
+
1061
+ - **FIX**: Startup preflight probes ChromaDB in a child process and moves crashing persistent indexes to `data/backups/auto-repair-*` before MCP initialization.
1062
+ - **FIX**: Reranker load failures now fall back to RRF ordering instead of failing `search_knowledge` on offline machines.
1063
+ - **FIX**: Virtualenv project-root detection now handles Python symlinks that resolve to the system interpreter.
1064
+ - **NEW**: `knowledge-rag-guarded` console script kept as an explicit guarded startup alias.
1065
+
1066
+ ### v3.6.2 (2026-04-23)
1032
1067
 
1068
+ - **INFRA**: NPM provenance attestation (SLSA supply chain security), full README on npm page
1033
1069
  - **DOCS**: Reorganize Installation section — add NPX and Docker install methods, update What's New to v3.6.0
1034
1070
 
1035
1071
  ### v3.6.0 (2026-04-23)
@@ -54,10 +54,11 @@ def _has_documents(path: Path) -> bool:
54
54
 
55
55
  def _venv_project_dir():
56
56
  """Detect project root from venv location (pip install from PyPI)."""
57
- exe = Path(sys.executable).resolve()
58
- for parent in exe.parents:
59
- if parent.name in ("venv", ".venv", "env", ".env"):
60
- return parent.parent
57
+ candidates = [Path(sys.prefix), Path(sys.executable), Path(sys.executable).resolve()]
58
+ for candidate in candidates:
59
+ for parent in (candidate, *candidate.parents):
60
+ if parent.name in ("venv", ".venv", "env", ".env"):
61
+ return parent.parent
61
62
  return None
62
63
 
63
64
 
@@ -0,0 +1,10 @@
1
+ """Backward-compatible guarded console entry point for knowledge-rag."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .server import main
6
+
7
+
8
+ def guarded_main() -> None:
9
+ """Run the MCP server; server.main performs startup preflight."""
10
+ main()
@@ -0,0 +1,74 @@
1
+ """Startup preflight checks for persistent ChromaDB state."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ from .config import BASE_DIR, config
13
+
14
+
15
+ def _backup_active_index(reason: str) -> Path:
16
+ """Move active ChromaDB state aside so the server can rebuild cleanly."""
17
+ stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
18
+ backup_dir = config.data_dir / "backups" / f"auto-repair-{stamp}"
19
+ backup_dir.mkdir(parents=True, exist_ok=False)
20
+
21
+ if config.chroma_dir.exists():
22
+ shutil.move(str(config.chroma_dir), str(backup_dir / f"chroma_db.{reason}"))
23
+
24
+ metadata_file = config.data_dir / "index_metadata.json"
25
+ if metadata_file.exists():
26
+ shutil.move(str(metadata_file), str(backup_dir / f"index_metadata.{reason}.json"))
27
+
28
+ return backup_dir
29
+
30
+
31
+ def _probe_chroma(timeout_seconds: int = 30) -> subprocess.CompletedProcess[str]:
32
+ """Check Chroma in a child process so native crashes do not kill MCP startup."""
33
+ code = r"""
34
+ import chromadb
35
+
36
+ from mcp_server.config import config
37
+
38
+ if not config.chroma_dir.exists():
39
+ print("missing")
40
+ raise SystemExit(0)
41
+
42
+ client = chromadb.PersistentClient(path=str(config.chroma_dir))
43
+ collection = client.get_or_create_collection(name=config.collection_name)
44
+ print(collection.count())
45
+ """
46
+ env = os.environ.copy()
47
+ env.setdefault("KNOWLEDGE_RAG_DIR", str(BASE_DIR))
48
+ return subprocess.run(
49
+ [sys.executable, "-c", code],
50
+ cwd=str(BASE_DIR),
51
+ env=env,
52
+ text=True,
53
+ stdout=subprocess.PIPE,
54
+ stderr=subprocess.PIPE,
55
+ timeout=timeout_seconds,
56
+ check=False,
57
+ )
58
+
59
+
60
+ def run_preflight(timeout_seconds: int = 30) -> bool:
61
+ """Return True when active Chroma state was moved aside for repair."""
62
+ result = _probe_chroma(timeout_seconds=timeout_seconds)
63
+ if result.returncode == 0:
64
+ return False
65
+
66
+ reason = "segfault" if result.returncode in (-11, 139) else "failed"
67
+ backup_dir = _backup_active_index(reason)
68
+ print(
69
+ f"[RECOVERY] Chroma preflight failed with code {result.returncode}; moved active index to {backup_dir}",
70
+ file=sys.stderr,
71
+ )
72
+ if result.stderr:
73
+ print(result.stderr[-2000:], file=sys.stderr)
74
+ return True
@@ -248,13 +248,22 @@ class CrossEncoderReranker:
248
248
  def __init__(self, model: str = None):
249
249
  self.model_name = model or config.reranker_model
250
250
  self._model = None # Lazy init
251
+ self._load_failed = False
251
252
 
252
- def _ensure_model(self):
253
+ def _ensure_model(self) -> bool:
253
254
  """Lazy initialization of cross-encoder model"""
255
+ if self._load_failed:
256
+ return False
254
257
  if self._model is None:
255
258
  print(f"[INFO] Loading reranker model: {self.model_name}...")
256
- self._model = TextCrossEncoder(model_name=self.model_name, cache_dir=str(config.models_cache_dir))
257
- print("[INFO] Reranker model loaded successfully")
259
+ try:
260
+ self._model = TextCrossEncoder(model_name=self.model_name, cache_dir=str(config.models_cache_dir))
261
+ print("[INFO] Reranker model loaded successfully")
262
+ except Exception as e:
263
+ self._load_failed = True
264
+ print(f"[WARN] Reranker unavailable, using RRF order: {e}")
265
+ return False
266
+ return True
258
267
 
259
268
  def rerank(self, query: str, documents: List[Dict[str, Any]], top_k: int = 5) -> List[Dict[str, Any]]:
260
269
  """
@@ -271,7 +280,8 @@ class CrossEncoderReranker:
271
280
  if not documents or not config.reranker_enabled:
272
281
  return documents[:top_k]
273
282
 
274
- self._ensure_model()
283
+ if not self._ensure_model():
284
+ return documents[:top_k]
275
285
 
276
286
  texts = [doc.get("document", "") for doc in documents]
277
287
 
@@ -1924,6 +1934,10 @@ def main():
1924
1934
  _handle_init()
1925
1935
  return
1926
1936
 
1937
+ from .preflight import run_preflight
1938
+
1939
+ run_preflight()
1940
+
1927
1941
  orchestrator = get_orchestrator()
1928
1942
 
1929
1943
  # Migration: check dimension mismatch AFTER full init (avoids segfault during __init__)
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Knowledge RAG
4
4
 
5
- Local RAG system for Claude Code. Hybrid BM25 + semantic search with cross-encoder reranking. 12 MCP tools. Zero external servers. Everything runs on your machine.
5
+ Local RAG system for Claude Code. Hybrid BM25 + semantic search with cross-encoder reranking. 12 MCP tools, 20 format parsers. Zero external servers. Everything runs on your machine.
6
6
 
7
7
  ## Quick Start
8
8
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "knowledge-rag"
7
- version = "3.6.1"
7
+ version = "3.7.0"
8
8
  description = "Local RAG System for Claude Code — Hybrid search + Cross-encoder Reranking + 12 MCP Tools + 20 Format Parsers. Zero external servers."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -34,7 +34,7 @@ dependencies = [
34
34
  "fastembed[reranking]>=0.4.0",
35
35
  "mcp>=1.0.0",
36
36
  "rank-bm25>=0.2.2",
37
- "requests>=2.31.0",
37
+ "requests>=2.33.0",
38
38
  "beautifulsoup4>=4.12.0",
39
39
  "python-docx>=1.0.0",
40
40
  "openpyxl>=3.1.0",
@@ -54,6 +54,7 @@ Changelog = "https://github.com/lyonzin/knowledge-rag/releases"
54
54
 
55
55
  [project.scripts]
56
56
  knowledge-rag = "mcp_server.server:main"
57
+ knowledge-rag-guarded = "mcp_server.guarded:guarded_main"
57
58
 
58
59
  [tool.hatch.build.targets.wheel]
59
60
  packages = ["mcp_server"]
@@ -1,6 +1,6 @@
1
1
  # Knowledge RAG System - Python Dependencies
2
2
  # ==========================================
3
- # Requires Python 3.11 or 3.12 (NOT 3.13+ due to onnxruntime)
3
+ # Requires Python 3.11+ (3.11, 3.12, 3.13, 3.14 supported)
4
4
 
5
5
  # Vector Database (uses new PersistentClient API)
6
6
  chromadb>=1.4.0
@@ -19,7 +19,7 @@ mcp>=1.0.0
19
19
  rank-bm25>=0.2.2
20
20
 
21
21
  # URL content fetching (add_from_url tool)
22
- requests>=2.31.0
22
+ requests>=2.33.0
23
23
 
24
24
  # HTML parsing (add_from_url tool)
25
25
  beautifulsoup4>=4.12.0
@@ -44,6 +44,6 @@ watchdog>=4.0.0
44
44
  # 2. Default embedding model: BAAI/bge-small-en-v1.5 (384-dim)
45
45
  # Cached in ~/.cache/fastembed/
46
46
  #
47
- # 3. Python 3.13+ is NOT supported because chromadb
48
- # depends on onnxruntime which has no 3.13 wheels
47
+ # 3. Python 3.13+ is supported since v3.5.1
48
+ # (onnxruntime now ships wheels for 3.13 and 3.14)
49
49
  # ==========================================
File without changes
File without changes