cfunklabs-rag-react-docs 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cfunklabs-rag-react-docs
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Retrieval-only MCP server over the indexed React documentation, with a prebuilt index downloaded on first run.
5
5
  Project-URL: Homepage, https://github.com/cfunklabs/rag-react-docs
6
6
  Project-URL: Repository, https://github.com/cfunklabs/rag-react-docs
@@ -13,6 +13,7 @@ Classifier: License :: OSI Approved :: MIT License
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Software Development :: Documentation
15
15
  Requires-Python: >=3.14
16
+ Requires-Dist: certifi>=2024.0.0
16
17
  Requires-Dist: chromadb>=1.5.9
17
18
  Requires-Dist: mcp>=1.28.1
18
19
  Requires-Dist: platformdirs>=4.0.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "cfunklabs-rag-react-docs"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  description = "Retrieval-only MCP server over the indexed React documentation, with a prebuilt index downloaded on first run."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.14"
@@ -17,6 +17,7 @@ classifiers = [
17
17
  # Runtime deps for the published wheel: retrieval + MCP server only. The generation/ingestion
18
18
  # stack (langchain, langgraph, anthropic, ...) is dev-only and lives in [dependency-groups].
19
19
  dependencies = [
20
+ "certifi>=2024.0.0",
20
21
  "chromadb>=1.5.9",
21
22
  "mcp>=1.28.1",
22
23
  "platformdirs>=4.0.0",
@@ -5,4 +5,4 @@ ChromaDB index is downloaded from a GitHub Release on first run (see `datastore.
5
5
  users never run the ingestion pipeline themselves.
6
6
  """
7
7
 
8
- __version__ = "0.1.2"
8
+ __version__ = "0.1.3"
@@ -2,18 +2,21 @@
2
2
 
3
3
  The published package ships no vectors: the ~34 MB index lives as a GitHub Release asset and is
4
4
  fetched + cached the first time the server needs it. Subsequent runs read straight from the
5
- cache and work offline. Only the standard library is used for the download so the wheel stays
6
- dependency-light (no httpx/requests).
5
+ cache and work offline. The download uses urllib with an explicit certifi CA bundle so TLS
6
+ verification works even on interpreters that lack a configured system cert store (e.g. the
7
+ python.org macOS framework build), rather than relying on the ambient default SSL context.
7
8
  """
8
9
 
9
10
  import hashlib
10
11
  import os
11
12
  import shutil
13
+ import ssl
12
14
  import tarfile
13
15
  import tempfile
14
16
  import urllib.request
15
17
  from pathlib import Path
16
18
 
19
+ import certifi
17
20
  import chromadb
18
21
 
19
22
  from .config import COLLECTION_NAME, INDEX_URL, datastore_dir
@@ -24,9 +27,14 @@ from .config import COLLECTION_NAME, INDEX_URL, datastore_dir
24
27
  # looks valid on the next run.
25
28
  _MARKER = "chroma.sqlite3"
26
29
 
30
+ # Verify TLS against certifi's CA bundle instead of the interpreter default. Some Python builds
31
+ # (notably python.org macOS framework installs) ship without usable root certificates, which
32
+ # makes the default context fail with CERTIFICATE_VERIFY_FAILED on any HTTPS download.
33
+ _SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
34
+
27
35
 
28
36
  def _download(url: str, dest: Path) -> None:
29
- with urllib.request.urlopen(url) as response, open(dest, "wb") as out:
37
+ with urllib.request.urlopen(url, context=_SSL_CONTEXT) as response, open(dest, "wb") as out:
30
38
  shutil.copyfileobj(response, out)
31
39
 
32
40
 
@@ -46,7 +54,7 @@ def _verify_checksum(archive: Path, url: str) -> None:
46
54
  tolerated (some releases may not publish one) but a present-and-mismatched one is fatal.
47
55
  """
48
56
  try:
49
- with urllib.request.urlopen(url + ".sha256") as response:
57
+ with urllib.request.urlopen(url + ".sha256", context=_SSL_CONTEXT) as response:
50
58
  expected = response.read().decode().strip().split()[0]
51
59
  except Exception:
52
60
  return
@@ -11,7 +11,7 @@ import sys
11
11
 
12
12
  from mcp.server.fastmcp import FastMCP
13
13
 
14
- from .config import DEFAULT_TOP_K
14
+ from .config import DEFAULT_TOP_K, INDEX_URL
15
15
  from .datastore import get_rag_collection
16
16
  from .retrieval import retrieve_chunks
17
17
 
@@ -19,12 +19,23 @@ from .retrieval import retrieve_chunks
19
19
  mcp = FastMCP("rag-react-docs")
20
20
 
21
21
 
22
- def _collection_is_empty() -> bool:
22
+ def _index_error() -> str | None:
23
+ """Return a human-readable reason the index is unavailable, or None if it's ready.
24
+
25
+ Distinguishes a real load/download failure (surfacing the underlying exception and the
26
+ URL it tried) from a genuinely empty collection, so callers report the actual cause rather
27
+ than a catch-all "index is empty" message.
28
+ """
23
29
  try:
24
- return get_rag_collection().count() == 0
25
- except Exception:
26
- # Treat a missing/uninitialized/failed-download collection the same as an empty one.
27
- return True
30
+ count = get_rag_collection().count()
31
+ except Exception as exc:
32
+ return (
33
+ f"Could not load the documentation index (downloaded from {INDEX_URL}): "
34
+ f"{type(exc).__name__}: {exc}"
35
+ )
36
+ if count == 0:
37
+ return "The documentation index loaded but contains no documents."
38
+ return None
28
39
 
29
40
 
30
41
  @mcp.tool()
@@ -40,17 +51,10 @@ def search_docs(question: str, k: int = DEFAULT_TOP_K) -> list[dict]:
40
51
  - content: the raw chunk text to ground an answer on
41
52
  - distance: the retrieval distance (lower is more similar)
42
53
  """
43
- if _collection_is_empty():
44
- return [
45
- {
46
- "source": "rag-react-docs",
47
- "content": (
48
- "The documentation index is empty or could not be loaded. "
49
- "Check network access on first run so the index can be downloaded."
50
- ),
51
- "distance": None,
52
- }
53
- ]
54
+ error = _index_error()
55
+ if error:
56
+ print(f"[rag-react-docs] {error}", file=sys.stderr)
57
+ return [{"source": "rag-react-docs", "content": error, "distance": None}]
54
58
 
55
59
  return retrieve_chunks(question, k)
56
60
 
@@ -61,16 +65,11 @@ def main() -> None:
61
65
  # JSON-RPC protocol, so anything printed there would corrupt the stream.
62
66
  print(f"[rag-react-docs] MCP server starting on stdio (top_k={DEFAULT_TOP_K}).", file=sys.stderr)
63
67
  print("[rag-react-docs] Ensuring documentation index is available...", file=sys.stderr)
64
- try:
65
- if _collection_is_empty():
66
- print(
67
- "[rag-react-docs] Warning: index empty or unavailable -- check network access.",
68
- file=sys.stderr,
69
- )
70
- else:
71
- print("[rag-react-docs] Index ready.", file=sys.stderr)
72
- except Exception as exc: # pragma: no cover - defensive; _collection_is_empty swallows most
73
- print(f"[rag-react-docs] Warning: could not verify index: {exc}", file=sys.stderr)
68
+ error = _index_error()
69
+ if error:
70
+ print(f"[rag-react-docs] Warning: {error}", file=sys.stderr)
71
+ else:
72
+ print("[rag-react-docs] Index ready.", file=sys.stderr)
74
73
 
75
74
  print("[rag-react-docs] Ready. Press Ctrl+C to stop.", file=sys.stderr)
76
75
  try: