docchat-server 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ """docchat-server - version-pinned documentation retrieval as an MCP server.
2
+
3
+ Stripped-down sibling of docchat (https://github.com/AshwinUgale/docchat).
4
+ Exposes ``search_docs`` and ``list_indexed`` MCP tools that Claude Code,
5
+ Cursor, Cline, or any other MCP-aware client can call to ground library
6
+ questions in the exact pinned-version docs instead of training data.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ __version__ = "0.0.1"
12
+ __all__ = ["__version__"]
docchat_server/cli.py ADDED
@@ -0,0 +1,111 @@
1
+ """docchat-server CLI.
2
+
3
+ Subcommands:
4
+ - ``docchat-server serve`` - run the MCP server on stdio.
5
+ - ``docchat-server index <library> <ver>`` - populate the local Qdrant.
6
+ - ``docchat-server list`` - show indexed collections.
7
+
8
+ The ``serve`` subcommand is what an MCP host (Claude Code, Cursor, Cline)
9
+ spawns. The ``index`` and ``list`` subcommands are for the user, run
10
+ manually to set up before pointing an MCP host at the server.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import os
17
+ import sys
18
+
19
+ from dotenv import load_dotenv
20
+
21
+ from docchat_server import __version__
22
+
23
+
24
+ def _cmd_serve(_args: argparse.Namespace) -> int:
25
+ # Import inside the handler so `docchat-server index` doesn't pull in
26
+ # FastMCP (which checks OPENAI_API_KEY at import time in server.py).
27
+ from docchat_server.server import main as serve_main
28
+
29
+ serve_main()
30
+ return 0
31
+
32
+
33
+ def _cmd_index(args: argparse.Namespace) -> int:
34
+ load_dotenv()
35
+ if not os.environ.get("OPENAI_API_KEY"):
36
+ print(
37
+ "ERROR: OPENAI_API_KEY is not set. docchat-server uses OpenAI's "
38
+ "embeddings API. Set it in your shell or a .env file.",
39
+ file=sys.stderr,
40
+ )
41
+ return 2
42
+
43
+ from openai import OpenAI
44
+
45
+ from docchat_server.indexer import DocIndexer, open_qdrant
46
+
47
+ qdrant = open_qdrant()
48
+ indexer = DocIndexer(qdrant=qdrant, openai=OpenAI())
49
+
50
+ def _progress(msg: str) -> None:
51
+ print(f"[indexer] {msg}", file=sys.stderr, flush=True)
52
+
53
+ try:
54
+ total = indexer.index(args.library, args.version, progress=_progress)
55
+ except ValueError as exc:
56
+ print(f"ERROR: {exc}", file=sys.stderr)
57
+ return 2
58
+ except RuntimeError as exc:
59
+ print(f"ERROR: {exc}", file=sys.stderr)
60
+ return 1
61
+ print(f"indexed {total} chunks into {args.library}@{args.version}")
62
+ return 0
63
+
64
+
65
+ def _cmd_list(_args: argparse.Namespace) -> int:
66
+ from docchat_server.indexer import open_qdrant
67
+ from docchat_server.library_config import LIBRARY_CONFIG
68
+
69
+ qdrant = open_qdrant()
70
+ collections = qdrant.get_collections().collections
71
+ print("Indexed collections:")
72
+ if not collections:
73
+ print(" (none yet - run `docchat-server index <library> <version>`)")
74
+ for c in collections:
75
+ try:
76
+ count = qdrant.count(collection_name=c.name).count
77
+ except Exception:
78
+ count = "?"
79
+ print(f" - {c.name} ({count} chunks)")
80
+ print()
81
+ print(f"Supported libraries: {', '.join(sorted(LIBRARY_CONFIG.keys()))}")
82
+ return 0
83
+
84
+
85
+ def main(argv: list[str] | None = None) -> int:
86
+ parser = argparse.ArgumentParser(
87
+ prog="docchat-server",
88
+ description="Version-pinned doc retrieval as an MCP server.",
89
+ )
90
+ parser.add_argument("--version", action="version", version=f"docchat-server {__version__}")
91
+ sub = parser.add_subparsers(dest="cmd", required=True)
92
+
93
+ p_serve = sub.add_parser("serve", help="run the MCP server on stdio")
94
+ p_serve.set_defaults(func=_cmd_serve)
95
+
96
+ p_index = sub.add_parser(
97
+ "index", help="fetch + embed + upsert docs for one (library, version)"
98
+ )
99
+ p_index.add_argument("library", help="e.g. react, fastapi, vue")
100
+ p_index.add_argument("version", help="e.g. 18.2.0, 0.100.0, 3.4.0")
101
+ p_index.set_defaults(func=_cmd_index)
102
+
103
+ p_list = sub.add_parser("list", help="show indexed collections")
104
+ p_list.set_defaults(func=_cmd_list)
105
+
106
+ args = parser.parse_args(argv)
107
+ return int(args.func(args))
108
+
109
+
110
+ if __name__ == "__main__":
111
+ sys.exit(main())
@@ -0,0 +1,261 @@
1
+ """Doc indexer - fetch + chunk + embed + store for one (library, version).
2
+
3
+ Ported from docchat/sidecar/src/docchat_sidecar/indexer.py with two changes:
4
+
5
+ 1. Uses ``QdrantClient`` in embedded mode (local path, no Docker) instead
6
+ of ``AsyncQdrantClient`` against a running server. The embedded mode
7
+ writes to ``~/.docchat-server/qdrant/`` and is fine for the
8
+ single-user / hundreds-of-thousands-of-vectors scale this MCP server
9
+ serves. Users can override via the ``QDRANT_PATH`` env var.
10
+ 2. Drops the streaming-progress protocol (IndexProgress / IndexComplete
11
+ frames). The MCP server doesn't need WebSocket-style progress events;
12
+ the CLI prints a simple progress line per page fetched.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import re
19
+ import sys
20
+ import uuid
21
+ from collections.abc import Iterable
22
+ from dataclasses import dataclass
23
+ from pathlib import Path
24
+
25
+ import httpx
26
+ from openai import OpenAI
27
+ from qdrant_client import QdrantClient
28
+ from qdrant_client.http.models import Distance, PointStruct, VectorParams
29
+
30
+ from docchat_server.library_config import collection_name_for, urls_for
31
+
32
+ __all__ = ["DocIndexer", "default_qdrant_path", "open_qdrant"]
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Mirrors docchat: text-embedding-3-small at 1536 dims, ~500-token chunks.
37
+ _DEFAULT_EMBED_MODEL = "text-embedding-3-small"
38
+ _DEFAULT_DIMENSIONS = 1536
39
+ _CHUNK_TARGET_CHARS = 2000
40
+ _MDX_NOISE_RE = re.compile(r"^(import |export )", re.MULTILINE)
41
+ _FRONTMATTER_RE = re.compile(r"\A---\n.*?\n---\n", re.DOTALL)
42
+ _H2_HEADING_RE = re.compile(r"^##\s+(.+?)\s*$")
43
+
44
+
45
+ def default_qdrant_path() -> Path:
46
+ """Embedded-Qdrant storage directory. ``$QDRANT_PATH`` overrides."""
47
+ import os
48
+
49
+ override = os.environ.get("QDRANT_PATH")
50
+ if override:
51
+ return Path(override).expanduser()
52
+ return Path.home() / ".docchat-server" / "qdrant"
53
+
54
+
55
+ def open_qdrant(path: Path | None = None) -> QdrantClient:
56
+ """Open the embedded Qdrant store, creating the parent dir if needed."""
57
+ p = path or default_qdrant_path()
58
+ p.mkdir(parents=True, exist_ok=True)
59
+ return QdrantClient(path=str(p))
60
+
61
+
62
+ @dataclass(frozen=True, kw_only=True)
63
+ class _Chunk:
64
+ source_url: str
65
+ chunk_index: int
66
+ text: str
67
+ api_name: str
68
+ section_heading: str | None
69
+
70
+
71
+ class DocIndexer:
72
+ """Fetch + chunk + embed + write docs for one (library, version).
73
+
74
+ Synchronous (since embedded Qdrant is sync). Progress is reported via
75
+ a callable so the CLI can print to stderr; pass ``progress=None`` for
76
+ silent operation.
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ *,
82
+ qdrant: QdrantClient,
83
+ openai: OpenAI,
84
+ embed_model: str = _DEFAULT_EMBED_MODEL,
85
+ embed_dimensions: int = _DEFAULT_DIMENSIONS,
86
+ http: httpx.Client | None = None,
87
+ ) -> None:
88
+ self._qdrant = qdrant
89
+ self._openai = openai
90
+ self._embed_model = embed_model
91
+ self._embed_dimensions = embed_dimensions
92
+ self._http = http
93
+
94
+ def index(
95
+ self,
96
+ library: str,
97
+ version: str,
98
+ *,
99
+ progress: "Callable[[str], None] | None" = None,
100
+ ) -> int:
101
+ """Index one (library, version). Returns count of chunks upserted.
102
+
103
+ Raises:
104
+ ValueError: library not in LIBRARY_CONFIG.
105
+ RuntimeError: fetched 0 chunks (network or source-URL drift).
106
+ """
107
+ urls = urls_for(library, version)
108
+ if not urls:
109
+ raise ValueError(
110
+ f"no indexer wired for {library!r}; supported: react, fastapi, vue. "
111
+ "Add a LibraryConfig entry in library_config.py to extend."
112
+ )
113
+
114
+ collection = collection_name_for(library, version)
115
+ self._reset_collection(collection)
116
+
117
+ owns_http = self._http is None
118
+ http = self._http or httpx.Client(timeout=30.0, follow_redirects=True)
119
+
120
+ chunks: list[_Chunk] = []
121
+ try:
122
+ for page_index, url in enumerate(urls):
123
+ if progress:
124
+ progress(f"fetching {page_index + 1}/{len(urls)}: {url.rsplit('/', 1)[-1]}")
125
+ try:
126
+ response = http.get(url)
127
+ response.raise_for_status()
128
+ except httpx.HTTPError as exc:
129
+ logger.warning("skipping %s: %s", url, exc)
130
+ continue
131
+ text = _clean_mdx(response.text)
132
+ api_name = _api_name_from_url(url)
133
+ for idx, (chunk_text, section_heading) in enumerate(_split_into_chunks(text)):
134
+ chunks.append(
135
+ _Chunk(
136
+ source_url=url,
137
+ chunk_index=idx,
138
+ text=chunk_text,
139
+ api_name=api_name,
140
+ section_heading=section_heading,
141
+ )
142
+ )
143
+
144
+ total = len(chunks)
145
+ if total == 0:
146
+ raise RuntimeError(
147
+ f"fetched 0 chunks for {library}@{version}; check network or "
148
+ "source URLs in library_config.py"
149
+ )
150
+
151
+ BATCH = 16
152
+ for batch_start in range(0, total, BATCH):
153
+ batch = chunks[batch_start : batch_start + BATCH]
154
+ vectors = self._embed([c.text for c in batch])
155
+ points = [
156
+ PointStruct(
157
+ id=str(uuid.uuid4()),
158
+ vector=vector,
159
+ payload={
160
+ "library": library,
161
+ "version": version,
162
+ "source_url": c.source_url,
163
+ "chunk_index": c.chunk_index,
164
+ "text": c.text,
165
+ "api_name": c.api_name,
166
+ "section_heading": c.section_heading,
167
+ },
168
+ )
169
+ for c, vector in zip(batch, vectors, strict=True)
170
+ ]
171
+ self._qdrant.upsert(collection_name=collection, points=points)
172
+ if progress:
173
+ done = min(batch_start + BATCH, total)
174
+ progress(f"embedded + upserted {done}/{total}")
175
+
176
+ return total
177
+ finally:
178
+ if owns_http:
179
+ http.close()
180
+
181
+ def _reset_collection(self, collection: str) -> None:
182
+ """Drop + recreate the collection so re-indexing is idempotent."""
183
+ if self._qdrant.collection_exists(collection_name=collection):
184
+ self._qdrant.delete_collection(collection_name=collection)
185
+ self._qdrant.create_collection(
186
+ collection_name=collection,
187
+ vectors_config=VectorParams(size=self._embed_dimensions, distance=Distance.COSINE),
188
+ )
189
+
190
+ def _embed(self, texts: list[str]) -> list[list[float]]:
191
+ response = self._openai.embeddings.create(model=self._embed_model, input=texts)
192
+ return [item.embedding for item in response.data]
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Helpers (module-private; tested via the public DocIndexer)
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def _api_name_from_url(url: str) -> str:
201
+ """Derive a stable API name from a doc-source URL.
202
+
203
+ Examples:
204
+ ".../reference/react/useState.md" -> "useState"
205
+ ".../docs/tutorial/dependencies/index.md" -> "dependencies"
206
+ """
207
+ tail = url.rsplit("/", 1)[-1]
208
+ stem = tail.removesuffix(".md").removesuffix(".mdx")
209
+ if stem == "index":
210
+ parts = url.rstrip("/").split("/")
211
+ if len(parts) >= 2:
212
+ return parts[-2]
213
+ return stem
214
+
215
+
216
+ def _clean_mdx(raw: str) -> str:
217
+ """Strip MDX frontmatter + import/export lines so we're left with prose."""
218
+ no_frontmatter = _FRONTMATTER_RE.sub("", raw, count=1)
219
+ no_imports = _MDX_NOISE_RE.sub("", no_frontmatter)
220
+ return no_imports.strip()
221
+
222
+
223
+ def _split_into_chunks(text: str) -> Iterable[tuple[str, str | None]]:
224
+ """Paragraph-aware splitter targeting ~500-token chunks, with H2 heading capture."""
225
+ if not text.strip():
226
+ return
227
+ buffer: list[str] = []
228
+ buffer_len = 0
229
+ current_heading: str | None = None
230
+ chunk_start_heading: str | None = None
231
+ for paragraph in re.split(r"\n\s*\n", text):
232
+ paragraph = paragraph.strip()
233
+ if not paragraph:
234
+ continue
235
+ first_line = paragraph.splitlines()[0]
236
+ match = _H2_HEADING_RE.match(first_line)
237
+ if match:
238
+ current_heading = match.group(1).strip()
239
+ para_len = len(paragraph)
240
+ if buffer and buffer_len + para_len > _CHUNK_TARGET_CHARS:
241
+ yield "\n\n".join(buffer), chunk_start_heading
242
+ buffer = [paragraph]
243
+ buffer_len = para_len
244
+ chunk_start_heading = current_heading
245
+ else:
246
+ if not buffer:
247
+ chunk_start_heading = current_heading
248
+ buffer.append(paragraph)
249
+ buffer_len += para_len + 2
250
+ if buffer:
251
+ yield "\n\n".join(buffer), chunk_start_heading
252
+
253
+
254
+ # Re-export so callers can type-hint without an extra import.
255
+ from collections.abc import Callable as _Callable # noqa: E402
256
+ Callable = _Callable # type: ignore[assignment]
257
+
258
+
259
+ def _eprint(*args: object) -> None:
260
+ """stderr print helper - the CLI passes this as progress to keep stdout clean."""
261
+ print(*args, file=sys.stderr, flush=True)
@@ -0,0 +1,124 @@
1
+ """Per-library doc-source config + Qdrant collection naming.
2
+
3
+ Ported verbatim from docchat/sidecar/src/docchat_sidecar/indexer.py
4
+ (_LIBRARY_CONFIG). Each library declares the source repo, the doc paths to
5
+ fetch, and a ``ref_for(version)`` callable that maps the user's pinned
6
+ version to a git ref.
7
+
8
+ For libraries whose docs live in the same repo as the released source
9
+ (FastAPI, Flask), ``ref_for`` returns the version tag - so indexing
10
+ ``fastapi@0.100.0`` fetches Pydantic-v2-era docs from the 0.100.0 tag.
11
+ For libraries whose docs live in a separate untagged repo (React, Vue),
12
+ ``ref_for`` returns ``"main"`` and the chunk metadata still surfaces the
13
+ user's pinned version via the collection name + chunk header.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import re
19
+ from collections.abc import Callable
20
+ from dataclasses import dataclass
21
+
22
+ __all__ = ["LibraryConfig", "LIBRARY_CONFIG", "collection_name_for", "urls_for"]
23
+
24
+
25
+ _REACT_DOC_PATHS: tuple[str, ...] = (
26
+ "src/content/reference/react/useState.md",
27
+ "src/content/reference/react/useEffect.md",
28
+ "src/content/reference/react/useContext.md",
29
+ "src/content/reference/react/useReducer.md",
30
+ "src/content/reference/react/useMemo.md",
31
+ "src/content/reference/react/useCallback.md",
32
+ "src/content/reference/react/useRef.md",
33
+ "src/content/reference/react/useId.md",
34
+ "src/content/reference/react/useSyncExternalStore.md",
35
+ "src/content/reference/react/useTransition.md",
36
+ )
37
+
38
+ _FASTAPI_DOC_PATHS: tuple[str, ...] = (
39
+ "docs/en/docs/tutorial/first-steps.md",
40
+ "docs/en/docs/tutorial/path-params.md",
41
+ "docs/en/docs/tutorial/query-params.md",
42
+ "docs/en/docs/tutorial/body.md",
43
+ "docs/en/docs/tutorial/response-model.md",
44
+ "docs/en/docs/tutorial/dependencies/index.md",
45
+ "docs/en/docs/tutorial/background-tasks.md",
46
+ "docs/en/docs/tutorial/middleware.md",
47
+ "docs/en/docs/tutorial/cors.md",
48
+ "docs/en/docs/tutorial/dependencies/dependencies-with-yield.md",
49
+ )
50
+
51
+ _VUE_DOC_PATHS: tuple[str, ...] = (
52
+ "src/api/reactivity-core.md",
53
+ "src/api/reactivity-utilities.md",
54
+ "src/api/composition-api-setup.md",
55
+ "src/api/composition-api-lifecycle.md",
56
+ "src/api/composition-api-dependency-injection.md",
57
+ "src/api/general.md",
58
+ "src/api/sfc-script-setup.md",
59
+ "src/guide/essentials/reactivity-fundamentals.md",
60
+ "src/guide/essentials/computed.md",
61
+ "src/guide/essentials/watchers.md",
62
+ )
63
+
64
+
65
+ @dataclass(frozen=True, kw_only=True)
66
+ class LibraryConfig:
67
+ """Per-library doc-source config used by urls_for to build raw-GitHub URLs."""
68
+
69
+ repo: str
70
+ paths: tuple[str, ...]
71
+ ref_for: Callable[[str], str]
72
+
73
+
74
+ def _fastapi_ref(version: str) -> str:
75
+ """FastAPI is tagged per release; the docs at that tag reflect the
76
+ correct Pydantic generation (v1 for <0.100, v2 for >=0.100)."""
77
+ return version
78
+
79
+
80
+ def _docs_repo_main(_: str) -> str:
81
+ """React/Vue docs aren't version-tagged; always fetch from main."""
82
+ return "main"
83
+
84
+
85
+ LIBRARY_CONFIG: dict[str, LibraryConfig] = {
86
+ "react": LibraryConfig(
87
+ repo="reactjs/react.dev",
88
+ paths=_REACT_DOC_PATHS,
89
+ ref_for=_docs_repo_main,
90
+ ),
91
+ "fastapi": LibraryConfig(
92
+ repo="tiangolo/fastapi",
93
+ paths=_FASTAPI_DOC_PATHS,
94
+ ref_for=_fastapi_ref,
95
+ ),
96
+ "vue": LibraryConfig(
97
+ repo="vuejs/docs",
98
+ paths=_VUE_DOC_PATHS,
99
+ ref_for=_docs_repo_main,
100
+ ),
101
+ }
102
+
103
+
104
+ def collection_name_for(library: str, version: str) -> str:
105
+ """Qdrant collection name for a (library, version) pair.
106
+
107
+ Lowercases the library and replaces ``.`` with ``_`` so Qdrant's
108
+ collection-name constraints are satisfied. Example::
109
+
110
+ collection_name_for("react", "18.2.0") -> "react_18_2_0"
111
+ """
112
+ safe_lib = re.sub(r"[^a-z0-9]+", "_", library.lower()).strip("_")
113
+ safe_ver = re.sub(r"[^a-z0-9]+", "_", version.lower()).strip("_")
114
+ return f"{safe_lib}_{safe_ver}"
115
+
116
+
117
+ def urls_for(library: str, version: str) -> tuple[str, ...]:
118
+ """Source URLs for a given (library, version), or () if unsupported."""
119
+ config = LIBRARY_CONFIG.get(library.lower())
120
+ if config is None:
121
+ return ()
122
+ ref = config.ref_for(version)
123
+ base = f"https://raw.githubusercontent.com/{config.repo}/{ref}"
124
+ return tuple(f"{base}/{path}" for path in config.paths)
File without changes
@@ -0,0 +1,171 @@
1
+ """Doc retrieval - the body of the ``search_docs`` MCP tool.
2
+
3
+ Ported from docchat/sidecar/src/docchat_sidecar/tools.py SearchDocsTool.
4
+ Two differences from the upstream:
5
+
6
+ 1. Synchronous (embedded Qdrant is sync). The MCP server runs the call
7
+ in a thread pool when invoked from an async tool handler.
8
+ 2. Returns a formatted text string + citation list directly, no
9
+ intermediate ``ToolResult`` dataclass - the MCP server flattens this
10
+ into a single string the client model consumes.
11
+
12
+ Per-library cosine score floors carry over from docchat's eval-tuned
13
+ defaults (ADR-008 / ADR-011 / ADR-012): React 0.15 (default),
14
+ FastAPI 0.10, Vue 0.05. Empirical, not principled - reflects how dense
15
+ each library's doc corpus is.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import logging
21
+ from dataclasses import dataclass
22
+
23
+ from openai import OpenAI
24
+ from qdrant_client import QdrantClient
25
+
26
+ from docchat_server.library_config import collection_name_for
27
+
28
+ __all__ = ["Citation", "SearchResult", "search_docs"]
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ _DEFAULT_FLOOR = 0.15
33
+ _FLOORS_BY_LIBRARY: dict[str, float] = {
34
+ "fastapi": 0.10,
35
+ "vue": 0.05,
36
+ }
37
+
38
+
39
+ @dataclass(frozen=True, kw_only=True)
40
+ class Citation:
41
+ """Citation surfaced alongside a retrieval result."""
42
+
43
+ library: str
44
+ version: str
45
+ source: str
46
+ source_url: str | None = None
47
+
48
+ def render(self) -> str:
49
+ return f"[{self.library}@{self.version}:{self.source}]"
50
+
51
+
52
+ @dataclass(frozen=True, kw_only=True)
53
+ class SearchResult:
54
+ """Top-K retrieval result for a single search_docs call."""
55
+
56
+ text: str
57
+ citations: tuple[Citation, ...]
58
+ top_scores: tuple[float, ...]
59
+
60
+
61
+ def _floor_for(library: str, override: float | None) -> float:
62
+ if override is not None:
63
+ return override
64
+ return _FLOORS_BY_LIBRARY.get(library.lower(), _DEFAULT_FLOOR)
65
+
66
+
67
+ def search_docs(
68
+ *,
69
+ qdrant: QdrantClient,
70
+ openai: OpenAI,
71
+ library: str,
72
+ version: str,
73
+ query: str,
74
+ api_name: str | None = None,
75
+ top_k: int = 5,
76
+ score_floor: float | None = None,
77
+ embed_model: str = "text-embedding-3-small",
78
+ ) -> SearchResult:
79
+ """Retrieve top-k chunks from the (library, version) collection.
80
+
81
+ Drops hits below the per-library cosine floor. Returns a canonical
82
+ "No relevant chunks found" string when nothing clears the floor - the
83
+ calling LLM should treat that as a refusal signal rather than guess.
84
+
85
+ Args:
86
+ api_name: optional post-filter on chunk payload.api_name
87
+ (case-insensitive startswith). Use when the user's query
88
+ names a specific API to constrain to chunks for that API.
89
+ """
90
+ collection = collection_name_for(library, version)
91
+ if not qdrant.collection_exists(collection_name=collection):
92
+ return SearchResult(
93
+ text=(
94
+ f"No indexed docs for {library}@{version}. "
95
+ f"Run `docchat-server index {library} {version}` to populate."
96
+ ),
97
+ citations=(),
98
+ top_scores=(),
99
+ )
100
+
101
+ response = openai.embeddings.create(model=embed_model, input=[query])
102
+ query_vector = response.data[0].embedding
103
+
104
+ query_response = qdrant.query_points(
105
+ collection_name=collection,
106
+ query=query_vector,
107
+ limit=top_k,
108
+ )
109
+ raw_hits = query_response.points
110
+
111
+ top_scores = tuple(round(getattr(h, "score", 0.0), 3) for h in raw_hits[:5])
112
+ floor = _floor_for(library, score_floor)
113
+ if raw_hits:
114
+ logger.info(
115
+ "search_docs %s@%s floor=%.2f top-scores=%r query=%r",
116
+ library, version, floor, list(top_scores), query,
117
+ )
118
+
119
+ hits = [h for h in raw_hits if getattr(h, "score", 0.0) >= floor]
120
+ if api_name:
121
+ api_lower = api_name.lower()
122
+ hits = [
123
+ h
124
+ for h in hits
125
+ if (h.payload or {}).get("api_name", "").lower().startswith(api_lower)
126
+ ]
127
+
128
+ if not hits:
129
+ return SearchResult(
130
+ text=f"No relevant chunks found for {query!r}.",
131
+ citations=(),
132
+ top_scores=top_scores,
133
+ )
134
+
135
+ text_parts: list[str] = []
136
+ citations: list[Citation] = []
137
+ seen_sources: set[str] = set()
138
+ for hit in hits:
139
+ payload = hit.payload or {}
140
+ chunk_text = payload.get("text", "")
141
+ source_url = payload.get("source_url", "")
142
+ source_label = source_url.rsplit("/", 1)[-1] if source_url else "doc"
143
+ payload_lib = payload.get("library", library)
144
+ payload_ver = payload.get("version", version)
145
+ hit_api_name = payload.get("api_name")
146
+ section_heading = payload.get("section_heading")
147
+ header = f"## {payload_lib}@{payload_ver}"
148
+ if hit_api_name:
149
+ header += f" - {hit_api_name}"
150
+ location_bits: list[str] = [source_label] if source_label else []
151
+ if section_heading:
152
+ location_bits.append(section_heading)
153
+ if location_bits:
154
+ header += f" ({' / '.join(location_bits)})"
155
+ text_parts.append(f"{header}\n\n{chunk_text}")
156
+ if source_label not in seen_sources:
157
+ citations.append(
158
+ Citation(
159
+ library=payload_lib,
160
+ version=payload_ver,
161
+ source=source_label,
162
+ source_url=source_url or None,
163
+ )
164
+ )
165
+ seen_sources.add(source_label)
166
+
167
+ return SearchResult(
168
+ text="\n\n---\n\n".join(text_parts),
169
+ citations=tuple(citations),
170
+ top_scores=top_scores,
171
+ )
@@ -0,0 +1,149 @@
1
+ """FastMCP server - exposes search_docs + list_indexed as MCP tools.
2
+
3
+ Transport: stdio (the default for local MCP servers Claude Code + Cursor
4
+ spawn). All logging goes to stderr - stdout is reserved for the JSON-RPC
5
+ stream and any stray prints will corrupt it.
6
+
7
+ Tools registered:
8
+ - ``search_docs(library, version, query, api_name?, top_k?)`` - the
9
+ version-pinned doc retrieval that's the whole point of this server.
10
+ - ``list_indexed()`` - what (library, version) collections are populated
11
+ in the local embedded Qdrant. Use before search_docs if you're unsure
12
+ what's available.
13
+
14
+ Indexing is deliberately CLI-only (``docchat-server index <lib> <ver>``);
15
+ exposing it as an MCP tool would let any connected LLM trigger arbitrary
16
+ embedding cost / network calls, which is the wrong default.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import logging
23
+ import os
24
+ import sys
25
+
26
+ from dotenv import load_dotenv
27
+ from fastmcp import FastMCP
28
+ from openai import OpenAI
29
+
30
+ from docchat_server import __version__
31
+ from docchat_server.indexer import open_qdrant
32
+ from docchat_server.library_config import LIBRARY_CONFIG
33
+ from docchat_server.retrieval import search_docs as _search_docs
34
+
35
+ # All logs to stderr so stdout stays clean for MCP JSON-RPC.
36
+ logging.basicConfig(
37
+ level=logging.INFO,
38
+ format="%(name)s | %(levelname)s | %(message)s",
39
+ stream=sys.stderr,
40
+ )
41
+ logger = logging.getLogger("docchat-server")
42
+
43
+ # .env in the cwd is a convenience for local development; production
44
+ # users should rely on environment variables set by the MCP host.
45
+ load_dotenv()
46
+
47
+
48
+ def _require_openai_key() -> None:
49
+ """Fail fast on startup if the embedding key is missing - otherwise
50
+ the first search_docs call would return an opaque error to the LLM."""
51
+ if not os.environ.get("OPENAI_API_KEY"):
52
+ logger.error(
53
+ "OPENAI_API_KEY is not set. docchat-server uses OpenAI's embeddings "
54
+ "API for retrieval. Set the env var in your MCP host config."
55
+ )
56
+ sys.exit(2)
57
+
58
+
59
+ _require_openai_key()
60
+
61
+ mcp: FastMCP = FastMCP(name="docchat", version=__version__)
62
+
63
+ # Shared resources opened once per server lifetime. Embedded Qdrant is
64
+ # single-process; OpenAI client is connection-pooled.
65
+ _QDRANT = open_qdrant()
66
+ _OPENAI = OpenAI()
67
+
68
+
69
+ @mcp.tool()
70
+ async def search_docs(
71
+ library: str,
72
+ version: str,
73
+ query: str,
74
+ api_name: str | None = None,
75
+ top_k: int = 5,
76
+ ) -> str:
77
+ """Search the version-pinned documentation for a specific library.
78
+
79
+ Returns top-K chunks from the indexed docs of the EXACT pinned version
80
+ (e.g. react@18.2.0, not 19.1.0). Use BEFORE answering any library API
81
+ question to avoid version-mismatched APIs.
82
+
83
+ If the result starts with "No relevant chunks found" or "No indexed
84
+ docs", do not hallucinate - tell the user the docs aren't available
85
+ at that pinned version.
86
+
87
+ Args:
88
+ library: Library name (e.g. "react", "fastapi", "vue"). Lowercase.
89
+ version: Pinned version string (e.g. "18.2.0", "0.100.0").
90
+ query: Natural-language question about the library.
91
+ api_name: Optional - constrain to chunks tagged with this API name
92
+ (case-insensitive startswith). Use when the query names a
93
+ specific API like "useState" or "Depends".
94
+ top_k: Max chunks to return. Default 5.
95
+ """
96
+ # Run sync retrieval in a thread so we don't block the asyncio loop.
97
+ result = await asyncio.to_thread(
98
+ _search_docs,
99
+ qdrant=_QDRANT,
100
+ openai=_OPENAI,
101
+ library=library,
102
+ version=version,
103
+ query=query,
104
+ api_name=api_name,
105
+ top_k=top_k,
106
+ )
107
+ citations_line = (
108
+ "\n\nCitations: " + ", ".join(c.render() for c in result.citations)
109
+ if result.citations
110
+ else ""
111
+ )
112
+ return result.text + citations_line
113
+
114
+
115
+ @mcp.tool()
116
+ async def list_indexed() -> dict[str, object]:
117
+ """List all (library, version) collections currently indexed locally.
118
+
119
+ Returns a dict with ``collections`` (list of {name, library, version,
120
+ points_count}) and ``supported_libraries`` (which libraries this
121
+ server's indexer knows how to populate).
122
+ """
123
+
124
+ def _query() -> dict[str, object]:
125
+ collections = _QDRANT.get_collections().collections
126
+ rows: list[dict[str, object]] = []
127
+ for c in collections:
128
+ try:
129
+ count = _QDRANT.count(collection_name=c.name).count
130
+ except Exception:
131
+ count = -1
132
+ rows.append({"name": c.name, "points_count": count})
133
+ return {
134
+ "collections": rows,
135
+ "supported_libraries": sorted(LIBRARY_CONFIG.keys()),
136
+ "qdrant_path": str(_QDRANT._client.location if hasattr(_QDRANT, "_client") else ""),
137
+ }
138
+
139
+ return await asyncio.to_thread(_query)
140
+
141
+
142
+ def main() -> None:
143
+ """Entrypoint when invoked as ``docchat-server serve`` (or directly)."""
144
+ logger.info("docchat-server %s starting on stdio", __version__)
145
+ mcp.run(transport="stdio")
146
+
147
+
148
+ if __name__ == "__main__":
149
+ main()
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: docchat-server
3
+ Version: 0.0.1
4
+ Summary: Version-pinned documentation retrieval as a Model Context Protocol server. Gives Claude Code / Cursor / any MCP-aware AI grounded answers from the docs of the exact library version your lockfile pins.
5
+ Project-URL: Homepage, https://github.com/AshwinUgale/docchat-mcp
6
+ Project-URL: Repository, https://github.com/AshwinUgale/docchat-mcp
7
+ Project-URL: Issues, https://github.com/AshwinUgale/docchat-mcp/issues
8
+ Author-email: Ashwin Ugale <ugaleashwin@gmail.com>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: claude,cursor,docs,llm,mcp,model-context-protocol,rag,version-pinned
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Classifier: Topic :: Software Development :: Documentation
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: fastmcp>=0.4
22
+ Requires-Dist: httpx>=0.27
23
+ Requires-Dist: openai>=1.40
24
+ Requires-Dist: python-dotenv>=1.0
25
+ Requires-Dist: qdrant-client>=1.12
26
+ Description-Content-Type: text/markdown
27
+
28
+ # docchat-server
29
+
30
+ > Version-pinned documentation retrieval as a Model Context Protocol server. Gives Claude Code / Cursor / any MCP-aware AI grounded answers from the docs of the exact library version your lockfile pins.
31
+
32
+ [![status](https://img.shields.io/badge/status-alpha-orange)](#)
33
+ [![license](https://img.shields.io/badge/license-MIT-green)](./LICENSE)
34
+ [![version](https://img.shields.io/badge/version-0.0.1-blue)](./pyproject.toml)
35
+
36
+ **Status:** v0.0 — initial scaffold. v0.1 ships PyPI + Smithery registration once the FastMCP server is locally verified.
37
+
38
+ ---
39
+
40
+ ## What it is
41
+
42
+ Claude Code, Cursor, and other AI coding assistants answer library questions from training data. If your project pins `react@18.2.0` and the latest is `19.1.0`, you get React 19 APIs in your React 18 file — the model has no way to know which version actually matters.
43
+
44
+ `docchat-server` is an MCP server that fixes that. Index a library at the version you pin once. Register the server with your MCP host. Now every query to your coding assistant can be grounded in the docs for the *exact pinned version*, with hard refusal when the docs don't cover the question.
45
+
46
+ It's the [DocChat VS Code extension](https://github.com/AshwinUgale/docchat) stripped of its agent + chat UI, exposed as an MCP tool surface instead. The retrieval logic, version-aware routing, and per-library cosine score floors are identical (and identically eval-tuned).
47
+
48
+ ---
49
+
50
+ ## Install
51
+
52
+ ```bash
53
+ pip install docchat-server # or: uvx --from docchat-server docchat-server
54
+ ```
55
+
56
+ Requires Python 3.11+ and an `OPENAI_API_KEY` env var (used for query + index-time embeddings). The Qdrant vector store runs *embedded* — no Docker, no separate server.
57
+
58
+ ---
59
+
60
+ ## Use (3 steps)
61
+
62
+ ### 1. Index the libraries you care about
63
+
64
+ ```bash
65
+ export OPENAI_API_KEY=sk-...
66
+
67
+ docchat-server index react 18.2.0
68
+ docchat-server index fastapi 0.100.0
69
+ docchat-server index vue 3.4.0
70
+ ```
71
+
72
+ Each index takes ~30–60 seconds and a few cents of embedding cost. Stored at `~/.docchat-server/qdrant/`.
73
+
74
+ ### 2. Verify
75
+
76
+ ```bash
77
+ docchat-server list
78
+ ```
79
+
80
+ ```
81
+ Indexed collections:
82
+ - react_18_2_0 (47 chunks)
83
+ - fastapi_0_100_0 (38 chunks)
84
+ - vue_3_4_0 (62 chunks)
85
+
86
+ Supported libraries: fastapi, react, vue
87
+ ```
88
+
89
+ ### 3. Register with your MCP host
90
+
91
+ Claude Desktop / Claude Code: add to your MCP config (`~/.config/claude/mcp-config.json` on Mac/Linux, `%APPDATA%\Claude\mcp-config.json` on Windows):
92
+
93
+ ```json
94
+ {
95
+ "mcpServers": {
96
+ "docchat": {
97
+ "command": "docchat-server",
98
+ "args": ["serve"],
99
+ "env": {
100
+ "OPENAI_API_KEY": "sk-..."
101
+ }
102
+ }
103
+ }
104
+ }
105
+ ```
106
+
107
+ Restart your MCP host. The `docchat` server should appear with two tools: `search_docs` and `list_indexed`.
108
+
109
+ ---
110
+
111
+ ## The tools
112
+
113
+ ### `search_docs(library, version, query, api_name?, top_k?)`
114
+
115
+ Retrieves top-K chunks from the indexed docs of the exact pinned version. Returns the chunks with citations, or `"No relevant chunks found"` if nothing clears the per-library cosine floor (a hard signal to the model that it should refuse rather than guess).
116
+
117
+ ### `list_indexed()`
118
+
119
+ Returns the collections currently populated locally. Useful as a session-start probe — your assistant can call this once to know what's available before answering anything.
120
+
121
+ ---
122
+
123
+ ## Sibling project
124
+
125
+ The same retrieval engine ships as a [VS Code extension on the Marketplace](https://marketplace.visualstudio.com/items?itemName=AshwinUgale.docchat). Source: https://github.com/AshwinUgale/docchat. If you want a chat panel instead of MCP-tool integration, install that.
126
+
127
+ ---
128
+
129
+ ## Roadmap
130
+
131
+ - **v0.1** — PyPI publish, Smithery listing, README screenshots from real Claude Code session
132
+ - **v0.2** — `detect_pinned_libraries(workspace_path)` tool (parse package.json / pyproject.toml / requirements.txt and report pinned versions to the assistant)
133
+ - **v0.3** — `--repo` / `--paths` flags for arbitrary library indexing (extend beyond the built-in react / fastapi / vue)
134
+ - **v0.4** — local embeddings via sentence-transformers (drop the OpenAI dependency for the embed step)
135
+
136
+ ---
137
+
138
+ ## License
139
+
140
+ MIT. See [LICENSE](./LICENSE).
@@ -0,0 +1,12 @@
1
+ docchat_server/__init__.py,sha256=a3G4CL9CTKViYNwUs6CMTVZkO2ZDSpwroDsW-ZN67K8,456
2
+ docchat_server/cli.py,sha256=4U1TE7AWOnvI3unwGtKem79GyHWHS7VlobtbhXSue5Q,3558
3
+ docchat_server/indexer.py,sha256=d5ooFf0_cIQ8T6MpMSkuWJhCJ9zDyVMw-z_Q7QpzaEM,9486
4
+ docchat_server/library_config.py,sha256=J16tMsmpsis2wsrRlHCqnQmIFHk70IfJvRHoPS8-SY8,4285
5
+ docchat_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ docchat_server/retrieval.py,sha256=sccPnZuOj9sWOsdgElAeau-3C5NW5PaXTt-xZX5M3so,5504
7
+ docchat_server/server.py,sha256=H9GsIJYCS-TyuC74Tz1Asu1klr0pk0Y0pj5avFGMP40,5023
8
+ docchat_server-0.0.1.dist-info/METADATA,sha256=qXKDFfN7589Q513p81EdSrZLM6xnQjkhzNZxRQHYMtU,5387
9
+ docchat_server-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ docchat_server-0.0.1.dist-info/entry_points.txt,sha256=g4vlJ09aZDAwBW9G1yy-X5-jhRyB1zvQWtjkPl1JmjI,59
11
+ docchat_server-0.0.1.dist-info/licenses/LICENSE,sha256=P8rHqt8ByxNDoOk_AzGbfaUCbCR0WdIYhPKg8tsM_NU,1069
12
+ docchat_server-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ docchat-server = docchat_server.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ashwin Ugale
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.