PyPI - docchat-server - Versions diffs - 0.0.1__py3-none-any.whl - Mend

docchat-server 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

docchat_server/__init__.py +12 -0
docchat_server/cli.py +111 -0
docchat_server/indexer.py +261 -0
docchat_server/library_config.py +124 -0
docchat_server/py.typed +0 -0
docchat_server/retrieval.py +171 -0
docchat_server/server.py +149 -0
docchat_server-0.0.1.dist-info/METADATA +140 -0
docchat_server-0.0.1.dist-info/RECORD +12 -0
docchat_server-0.0.1.dist-info/WHEEL +4 -0
docchat_server-0.0.1.dist-info/entry_points.txt +2 -0
docchat_server-0.0.1.dist-info/licenses/LICENSE +21 -0

docchat_server/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""docchat-server - version-pinned documentation retrieval as an MCP server.
+Stripped-down sibling of docchat (https://github.com/AshwinUgale/docchat).
+Exposes ``search_docs`` and ``list_indexed`` MCP tools that Claude Code,
+Cursor, Cline, or any other MCP-aware client can call to ground library
+questions in the exact pinned-version docs instead of training data.
+"""
+from __future__ import annotations
+__version__ = "0.0.1"
+__all__ = ["__version__"]

docchat_server/cli.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""docchat-server CLI.
+Subcommands:
+- ``docchat-server serve``                  - run the MCP server on stdio.
+- ``docchat-server index <library> <ver>``  - populate the local Qdrant.
+- ``docchat-server list``                   - show indexed collections.
+The ``serve`` subcommand is what an MCP host (Claude Code, Cursor, Cline)
+spawns. The ``index`` and ``list`` subcommands are for the user, run
+manually to set up before pointing an MCP host at the server.
+"""
+from __future__ import annotations
+import argparse
+import os
+import sys
+from dotenv import load_dotenv
+from docchat_server import __version__
+def _cmd_serve(_args: argparse.Namespace) -> int:
+    # Import inside the handler so `docchat-server index` doesn't pull in
+    # FastMCP (which checks OPENAI_API_KEY at import time in server.py).
+    from docchat_server.server import main as serve_main
+    serve_main()
+    return 0
+def _cmd_index(args: argparse.Namespace) -> int:
+    load_dotenv()
+    if not os.environ.get("OPENAI_API_KEY"):
+        print(
+            "ERROR: OPENAI_API_KEY is not set. docchat-server uses OpenAI's "
+            "embeddings API. Set it in your shell or a .env file.",
+            file=sys.stderr,
+        )
+        return 2
+    from openai import OpenAI
+    from docchat_server.indexer import DocIndexer, open_qdrant
+    qdrant = open_qdrant()
+    indexer = DocIndexer(qdrant=qdrant, openai=OpenAI())
+    def _progress(msg: str) -> None:
+        print(f"[indexer] {msg}", file=sys.stderr, flush=True)
+    try:
+        total = indexer.index(args.library, args.version, progress=_progress)
+    except ValueError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 2
+    except RuntimeError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        return 1
+    print(f"indexed {total} chunks into {args.library}@{args.version}")
+    return 0
+def _cmd_list(_args: argparse.Namespace) -> int:
+    from docchat_server.indexer import open_qdrant
+    from docchat_server.library_config import LIBRARY_CONFIG
+    qdrant = open_qdrant()
+    collections = qdrant.get_collections().collections
+    print("Indexed collections:")
+    if not collections:
+        print("  (none yet - run `docchat-server index <library> <version>`)")
+    for c in collections:
+        try:
+            count = qdrant.count(collection_name=c.name).count
+        except Exception:
+            count = "?"
+        print(f"  - {c.name}  ({count} chunks)")
+    print()
+    print(f"Supported libraries: {', '.join(sorted(LIBRARY_CONFIG.keys()))}")
+    return 0
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="docchat-server",
+        description="Version-pinned doc retrieval as an MCP server.",
+    )
+    parser.add_argument("--version", action="version", version=f"docchat-server {__version__}")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+    p_serve = sub.add_parser("serve", help="run the MCP server on stdio")
+    p_serve.set_defaults(func=_cmd_serve)
+    p_index = sub.add_parser(
+        "index", help="fetch + embed + upsert docs for one (library, version)"
+    )
+    p_index.add_argument("library", help="e.g. react, fastapi, vue")
+    p_index.add_argument("version", help="e.g. 18.2.0, 0.100.0, 3.4.0")
+    p_index.set_defaults(func=_cmd_index)
+    p_list = sub.add_parser("list", help="show indexed collections")
+    p_list.set_defaults(func=_cmd_list)
+    args = parser.parse_args(argv)
+    return int(args.func(args))
+if __name__ == "__main__":
+    sys.exit(main())

docchat_server/indexer.py ADDED Viewed

@@ -0,0 +1,261 @@
+"""Doc indexer - fetch + chunk + embed + store for one (library, version).
+Ported from docchat/sidecar/src/docchat_sidecar/indexer.py with two changes:
+1. Uses ``QdrantClient`` in embedded mode (local path, no Docker) instead
+   of ``AsyncQdrantClient`` against a running server. The embedded mode
+   writes to ``~/.docchat-server/qdrant/`` and is fine for the
+   single-user / hundreds-of-thousands-of-vectors scale this MCP server
+   serves. Users can override via the ``QDRANT_PATH`` env var.
+2. Drops the streaming-progress protocol (IndexProgress / IndexComplete
+   frames). The MCP server doesn't need WebSocket-style progress events;
+   the CLI prints a simple progress line per page fetched.
+"""
+from __future__ import annotations
+import logging
+import re
+import sys
+import uuid
+from collections.abc import Iterable
+from dataclasses import dataclass
+from pathlib import Path
+import httpx
+from openai import OpenAI
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, PointStruct, VectorParams
+from docchat_server.library_config import collection_name_for, urls_for
+__all__ = ["DocIndexer", "default_qdrant_path", "open_qdrant"]
+logger = logging.getLogger(__name__)
+# Mirrors docchat: text-embedding-3-small at 1536 dims, ~500-token chunks.
+_DEFAULT_EMBED_MODEL = "text-embedding-3-small"
+_DEFAULT_DIMENSIONS = 1536
+_CHUNK_TARGET_CHARS = 2000
+_MDX_NOISE_RE = re.compile(r"^(import |export )", re.MULTILINE)
+_FRONTMATTER_RE = re.compile(r"\A---\n.*?\n---\n", re.DOTALL)
+_H2_HEADING_RE = re.compile(r"^##\s+(.+?)\s*$")
+def default_qdrant_path() -> Path:
+    """Embedded-Qdrant storage directory. ``$QDRANT_PATH`` overrides."""
+    import os
+    override = os.environ.get("QDRANT_PATH")
+    if override:
+        return Path(override).expanduser()
+    return Path.home() / ".docchat-server" / "qdrant"
+def open_qdrant(path: Path | None = None) -> QdrantClient:
+    """Open the embedded Qdrant store, creating the parent dir if needed."""
+    p = path or default_qdrant_path()
+    p.mkdir(parents=True, exist_ok=True)
+    return QdrantClient(path=str(p))
+@dataclass(frozen=True, kw_only=True)
+class _Chunk:
+    source_url: str
+    chunk_index: int
+    text: str
+    api_name: str
+    section_heading: str | None
+class DocIndexer:
+    """Fetch + chunk + embed + write docs for one (library, version).
+    Synchronous (since embedded Qdrant is sync). Progress is reported via
+    a callable so the CLI can print to stderr; pass ``progress=None`` for
+    silent operation.
+    """
+    def __init__(
+        self,
+        *,
+        qdrant: QdrantClient,
+        openai: OpenAI,
+        embed_model: str = _DEFAULT_EMBED_MODEL,
+        embed_dimensions: int = _DEFAULT_DIMENSIONS,
+        http: httpx.Client | None = None,
+    ) -> None:
+        self._qdrant = qdrant
+        self._openai = openai
+        self._embed_model = embed_model
+        self._embed_dimensions = embed_dimensions
+        self._http = http
+    def index(
+        self,
+        library: str,
+        version: str,
+        *,
+        progress: "Callable[[str], None] | None" = None,
+    ) -> int:
+        """Index one (library, version). Returns count of chunks upserted.
+        Raises:
+            ValueError: library not in LIBRARY_CONFIG.
+            RuntimeError: fetched 0 chunks (network or source-URL drift).
+        """
+        urls = urls_for(library, version)
+        if not urls:
+            raise ValueError(
+                f"no indexer wired for {library!r}; supported: react, fastapi, vue. "
+                "Add a LibraryConfig entry in library_config.py to extend."
+            )
+        collection = collection_name_for(library, version)
+        self._reset_collection(collection)
+        owns_http = self._http is None
+        http = self._http or httpx.Client(timeout=30.0, follow_redirects=True)
+        chunks: list[_Chunk] = []
+        try:
+            for page_index, url in enumerate(urls):
+                if progress:
+                    progress(f"fetching {page_index + 1}/{len(urls)}: {url.rsplit('/', 1)[-1]}")
+                try:
+                    response = http.get(url)
+                    response.raise_for_status()
+                except httpx.HTTPError as exc:
+                    logger.warning("skipping %s: %s", url, exc)
+                    continue
+                text = _clean_mdx(response.text)
+                api_name = _api_name_from_url(url)
+                for idx, (chunk_text, section_heading) in enumerate(_split_into_chunks(text)):
+                    chunks.append(
+                        _Chunk(
+                            source_url=url,
+                            chunk_index=idx,
+                            text=chunk_text,
+                            api_name=api_name,
+                            section_heading=section_heading,
+                        )
+                    )
+            total = len(chunks)
+            if total == 0:
+                raise RuntimeError(
+                    f"fetched 0 chunks for {library}@{version}; check network or "
+                    "source URLs in library_config.py"
+                )
+            BATCH = 16
+            for batch_start in range(0, total, BATCH):
+                batch = chunks[batch_start : batch_start + BATCH]
+                vectors = self._embed([c.text for c in batch])
+                points = [
+                    PointStruct(
+                        id=str(uuid.uuid4()),
+                        vector=vector,
+                        payload={
+                            "library": library,
+                            "version": version,
+                            "source_url": c.source_url,
+                            "chunk_index": c.chunk_index,
+                            "text": c.text,
+                            "api_name": c.api_name,
+                            "section_heading": c.section_heading,
+                        },
+                    )
+                    for c, vector in zip(batch, vectors, strict=True)
+                ]
+                self._qdrant.upsert(collection_name=collection, points=points)
+                if progress:
+                    done = min(batch_start + BATCH, total)
+                    progress(f"embedded + upserted {done}/{total}")
+            return total
+        finally:
+            if owns_http:
+                http.close()
+    def _reset_collection(self, collection: str) -> None:
+        """Drop + recreate the collection so re-indexing is idempotent."""
+        if self._qdrant.collection_exists(collection_name=collection):
+            self._qdrant.delete_collection(collection_name=collection)
+        self._qdrant.create_collection(
+            collection_name=collection,
+            vectors_config=VectorParams(size=self._embed_dimensions, distance=Distance.COSINE),
+        )
+    def _embed(self, texts: list[str]) -> list[list[float]]:
+        response = self._openai.embeddings.create(model=self._embed_model, input=texts)
+        return [item.embedding for item in response.data]
+# ---------------------------------------------------------------------------
+# Helpers (module-private; tested via the public DocIndexer)
+# ---------------------------------------------------------------------------
+def _api_name_from_url(url: str) -> str:
+    """Derive a stable API name from a doc-source URL.
+    Examples:
+        ".../reference/react/useState.md"        -> "useState"
+        ".../docs/tutorial/dependencies/index.md" -> "dependencies"
+    """
+    tail = url.rsplit("/", 1)[-1]
+    stem = tail.removesuffix(".md").removesuffix(".mdx")
+    if stem == "index":
+        parts = url.rstrip("/").split("/")
+        if len(parts) >= 2:
+            return parts[-2]
+    return stem
+def _clean_mdx(raw: str) -> str:
+    """Strip MDX frontmatter + import/export lines so we're left with prose."""
+    no_frontmatter = _FRONTMATTER_RE.sub("", raw, count=1)
+    no_imports = _MDX_NOISE_RE.sub("", no_frontmatter)
+    return no_imports.strip()
+def _split_into_chunks(text: str) -> Iterable[tuple[str, str | None]]:
+    """Paragraph-aware splitter targeting ~500-token chunks, with H2 heading capture."""
+    if not text.strip():
+        return
+    buffer: list[str] = []
+    buffer_len = 0
+    current_heading: str | None = None
+    chunk_start_heading: str | None = None
+    for paragraph in re.split(r"\n\s*\n", text):
+        paragraph = paragraph.strip()
+        if not paragraph:
+            continue
+        first_line = paragraph.splitlines()[0]
+        match = _H2_HEADING_RE.match(first_line)
+        if match:
+            current_heading = match.group(1).strip()
+        para_len = len(paragraph)
+        if buffer and buffer_len + para_len > _CHUNK_TARGET_CHARS:
+            yield "\n\n".join(buffer), chunk_start_heading
+            buffer = [paragraph]
+            buffer_len = para_len
+            chunk_start_heading = current_heading
+        else:
+            if not buffer:
+                chunk_start_heading = current_heading
+            buffer.append(paragraph)
+            buffer_len += para_len + 2
+    if buffer:
+        yield "\n\n".join(buffer), chunk_start_heading
+# Re-export so callers can type-hint without an extra import.
+from collections.abc import Callable as _Callable  # noqa: E402
+Callable = _Callable  # type: ignore[assignment]
+def _eprint(*args: object) -> None:
+    """stderr print helper - the CLI passes this as progress to keep stdout clean."""
+    print(*args, file=sys.stderr, flush=True)

docchat_server/library_config.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Per-library doc-source config + Qdrant collection naming.
+Ported verbatim from docchat/sidecar/src/docchat_sidecar/indexer.py
+(_LIBRARY_CONFIG). Each library declares the source repo, the doc paths to
+fetch, and a ``ref_for(version)`` callable that maps the user's pinned
+version to a git ref.
+For libraries whose docs live in the same repo as the released source
+(FastAPI, Flask), ``ref_for`` returns the version tag - so indexing
+``fastapi@0.100.0`` fetches Pydantic-v2-era docs from the 0.100.0 tag.
+For libraries whose docs live in a separate untagged repo (React, Vue),
+``ref_for`` returns ``"main"`` and the chunk metadata still surfaces the
+user's pinned version via the collection name + chunk header.
+"""
+from __future__ import annotations
+import re
+from collections.abc import Callable
+from dataclasses import dataclass
+__all__ = ["LibraryConfig", "LIBRARY_CONFIG", "collection_name_for", "urls_for"]
+_REACT_DOC_PATHS: tuple[str, ...] = (
+    "src/content/reference/react/useState.md",
+    "src/content/reference/react/useEffect.md",
+    "src/content/reference/react/useContext.md",
+    "src/content/reference/react/useReducer.md",
+    "src/content/reference/react/useMemo.md",
+    "src/content/reference/react/useCallback.md",
+    "src/content/reference/react/useRef.md",
+    "src/content/reference/react/useId.md",
+    "src/content/reference/react/useSyncExternalStore.md",
+    "src/content/reference/react/useTransition.md",
+)
+_FASTAPI_DOC_PATHS: tuple[str, ...] = (
+    "docs/en/docs/tutorial/first-steps.md",
+    "docs/en/docs/tutorial/path-params.md",
+    "docs/en/docs/tutorial/query-params.md",
+    "docs/en/docs/tutorial/body.md",
+    "docs/en/docs/tutorial/response-model.md",
+    "docs/en/docs/tutorial/dependencies/index.md",
+    "docs/en/docs/tutorial/background-tasks.md",
+    "docs/en/docs/tutorial/middleware.md",
+    "docs/en/docs/tutorial/cors.md",
+    "docs/en/docs/tutorial/dependencies/dependencies-with-yield.md",
+)
+_VUE_DOC_PATHS: tuple[str, ...] = (
+    "src/api/reactivity-core.md",
+    "src/api/reactivity-utilities.md",
+    "src/api/composition-api-setup.md",
+    "src/api/composition-api-lifecycle.md",
+    "src/api/composition-api-dependency-injection.md",
+    "src/api/general.md",
+    "src/api/sfc-script-setup.md",
+    "src/guide/essentials/reactivity-fundamentals.md",
+    "src/guide/essentials/computed.md",
+    "src/guide/essentials/watchers.md",
+)
+@dataclass(frozen=True, kw_only=True)
+class LibraryConfig:
+    """Per-library doc-source config used by urls_for to build raw-GitHub URLs."""
+    repo: str
+    paths: tuple[str, ...]
+    ref_for: Callable[[str], str]
+def _fastapi_ref(version: str) -> str:
+    """FastAPI is tagged per release; the docs at that tag reflect the
+    correct Pydantic generation (v1 for <0.100, v2 for >=0.100)."""
+    return version
+def _docs_repo_main(_: str) -> str:
+    """React/Vue docs aren't version-tagged; always fetch from main."""
+    return "main"
+LIBRARY_CONFIG: dict[str, LibraryConfig] = {
+    "react": LibraryConfig(
+        repo="reactjs/react.dev",
+        paths=_REACT_DOC_PATHS,
+        ref_for=_docs_repo_main,
+    ),
+    "fastapi": LibraryConfig(
+        repo="tiangolo/fastapi",
+        paths=_FASTAPI_DOC_PATHS,
+        ref_for=_fastapi_ref,
+    ),
+    "vue": LibraryConfig(
+        repo="vuejs/docs",
+        paths=_VUE_DOC_PATHS,
+        ref_for=_docs_repo_main,
+    ),
+}
+def collection_name_for(library: str, version: str) -> str:
+    """Qdrant collection name for a (library, version) pair.
+    Lowercases the library and replaces ``.`` with ``_`` so Qdrant's
+    collection-name constraints are satisfied. Example::
+        collection_name_for("react", "18.2.0") -> "react_18_2_0"
+    """
+    safe_lib = re.sub(r"[^a-z0-9]+", "_", library.lower()).strip("_")
+    safe_ver = re.sub(r"[^a-z0-9]+", "_", version.lower()).strip("_")
+    return f"{safe_lib}_{safe_ver}"
+def urls_for(library: str, version: str) -> tuple[str, ...]:
+    """Source URLs for a given (library, version), or ()  if unsupported."""
+    config = LIBRARY_CONFIG.get(library.lower())
+    if config is None:
+        return ()
+    ref = config.ref_for(version)
+    base = f"https://raw.githubusercontent.com/{config.repo}/{ref}"
+    return tuple(f"{base}/{path}" for path in config.paths)

docchat_server/py.typed ADDED Viewed

File without changes

docchat_server/retrieval.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""Doc retrieval - the body of the ``search_docs`` MCP tool.
+Ported from docchat/sidecar/src/docchat_sidecar/tools.py SearchDocsTool.
+Two differences from the upstream:
+1. Synchronous (embedded Qdrant is sync). The MCP server runs the call
+   in a thread pool when invoked from an async tool handler.
+2. Returns a formatted text string + citation list directly, no
+   intermediate ``ToolResult`` dataclass - the MCP server flattens this
+   into a single string the client model consumes.
+Per-library cosine score floors carry over from docchat's eval-tuned
+defaults (ADR-008 / ADR-011 / ADR-012): React 0.15 (default),
+FastAPI 0.10, Vue 0.05. Empirical, not principled - reflects how dense
+each library's doc corpus is.
+"""
+from __future__ import annotations
+import logging
+from dataclasses import dataclass
+from openai import OpenAI
+from qdrant_client import QdrantClient
+from docchat_server.library_config import collection_name_for
+__all__ = ["Citation", "SearchResult", "search_docs"]
+logger = logging.getLogger(__name__)
+_DEFAULT_FLOOR = 0.15
+_FLOORS_BY_LIBRARY: dict[str, float] = {
+    "fastapi": 0.10,
+    "vue": 0.05,
+}
+@dataclass(frozen=True, kw_only=True)
+class Citation:
+    """Citation surfaced alongside a retrieval result."""
+    library: str
+    version: str
+    source: str
+    source_url: str | None = None
+    def render(self) -> str:
+        return f"[{self.library}@{self.version}:{self.source}]"
+@dataclass(frozen=True, kw_only=True)
+class SearchResult:
+    """Top-K retrieval result for a single search_docs call."""
+    text: str
+    citations: tuple[Citation, ...]
+    top_scores: tuple[float, ...]
+def _floor_for(library: str, override: float | None) -> float:
+    if override is not None:
+        return override
+    return _FLOORS_BY_LIBRARY.get(library.lower(), _DEFAULT_FLOOR)
+def search_docs(
+    *,
+    qdrant: QdrantClient,
+    openai: OpenAI,
+    library: str,
+    version: str,
+    query: str,
+    api_name: str | None = None,
+    top_k: int = 5,
+    score_floor: float | None = None,
+    embed_model: str = "text-embedding-3-small",
+) -> SearchResult:
+    """Retrieve top-k chunks from the (library, version) collection.
+    Drops hits below the per-library cosine floor. Returns a canonical
+    "No relevant chunks found" string when nothing clears the floor - the
+    calling LLM should treat that as a refusal signal rather than guess.
+    Args:
+        api_name: optional post-filter on chunk payload.api_name
+            (case-insensitive startswith). Use when the user's query
+            names a specific API to constrain to chunks for that API.
+    """
+    collection = collection_name_for(library, version)
+    if not qdrant.collection_exists(collection_name=collection):
+        return SearchResult(
+            text=(
+                f"No indexed docs for {library}@{version}. "
+                f"Run `docchat-server index {library} {version}` to populate."
+            ),
+            citations=(),
+            top_scores=(),
+        )
+    response = openai.embeddings.create(model=embed_model, input=[query])
+    query_vector = response.data[0].embedding
+    query_response = qdrant.query_points(
+        collection_name=collection,
+        query=query_vector,
+        limit=top_k,
+    )
+    raw_hits = query_response.points
+    top_scores = tuple(round(getattr(h, "score", 0.0), 3) for h in raw_hits[:5])
+    floor = _floor_for(library, score_floor)
+    if raw_hits:
+        logger.info(
+            "search_docs %s@%s floor=%.2f top-scores=%r query=%r",
+            library, version, floor, list(top_scores), query,
+        )
+    hits = [h for h in raw_hits if getattr(h, "score", 0.0) >= floor]
+    if api_name:
+        api_lower = api_name.lower()
+        hits = [
+            h
+            for h in hits
+            if (h.payload or {}).get("api_name", "").lower().startswith(api_lower)
+        ]
+    if not hits:
+        return SearchResult(
+            text=f"No relevant chunks found for {query!r}.",
+            citations=(),
+            top_scores=top_scores,
+        )
+    text_parts: list[str] = []
+    citations: list[Citation] = []
+    seen_sources: set[str] = set()
+    for hit in hits:
+        payload = hit.payload or {}
+        chunk_text = payload.get("text", "")
+        source_url = payload.get("source_url", "")
+        source_label = source_url.rsplit("/", 1)[-1] if source_url else "doc"
+        payload_lib = payload.get("library", library)
+        payload_ver = payload.get("version", version)
+        hit_api_name = payload.get("api_name")
+        section_heading = payload.get("section_heading")
+        header = f"## {payload_lib}@{payload_ver}"
+        if hit_api_name:
+            header += f" - {hit_api_name}"
+        location_bits: list[str] = [source_label] if source_label else []
+        if section_heading:
+            location_bits.append(section_heading)
+        if location_bits:
+            header += f"  ({' / '.join(location_bits)})"
+        text_parts.append(f"{header}\n\n{chunk_text}")
+        if source_label not in seen_sources:
+            citations.append(
+                Citation(
+                    library=payload_lib,
+                    version=payload_ver,
+                    source=source_label,
+                    source_url=source_url or None,
+                )
+            )
+            seen_sources.add(source_label)
+    return SearchResult(
+        text="\n\n---\n\n".join(text_parts),
+        citations=tuple(citations),
+        top_scores=top_scores,
+    )

docchat_server/server.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""FastMCP server - exposes search_docs + list_indexed as MCP tools.
+Transport: stdio (the default for local MCP servers Claude Code + Cursor
+spawn). All logging goes to stderr - stdout is reserved for the JSON-RPC
+stream and any stray prints will corrupt it.
+Tools registered:
+- ``search_docs(library, version, query, api_name?, top_k?)`` - the
+  version-pinned doc retrieval that's the whole point of this server.
+- ``list_indexed()`` - what (library, version) collections are populated
+  in the local embedded Qdrant. Use before search_docs if you're unsure
+  what's available.
+Indexing is deliberately CLI-only (``docchat-server index <lib> <ver>``);
+exposing it as an MCP tool would let any connected LLM trigger arbitrary
+embedding cost / network calls, which is the wrong default.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import sys
+from dotenv import load_dotenv
+from fastmcp import FastMCP
+from openai import OpenAI
+from docchat_server import __version__
+from docchat_server.indexer import open_qdrant
+from docchat_server.library_config import LIBRARY_CONFIG
+from docchat_server.retrieval import search_docs as _search_docs
+# All logs to stderr so stdout stays clean for MCP JSON-RPC.
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(name)s | %(levelname)s | %(message)s",
+    stream=sys.stderr,
+)
+logger = logging.getLogger("docchat-server")
+# .env in the cwd is a convenience for local development; production
+# users should rely on environment variables set by the MCP host.
+load_dotenv()
+def _require_openai_key() -> None:
+    """Fail fast on startup if the embedding key is missing - otherwise
+    the first search_docs call would return an opaque error to the LLM."""
+    if not os.environ.get("OPENAI_API_KEY"):
+        logger.error(
+            "OPENAI_API_KEY is not set. docchat-server uses OpenAI's embeddings "
+            "API for retrieval. Set the env var in your MCP host config."
+        )
+        sys.exit(2)
+_require_openai_key()
+mcp: FastMCP = FastMCP(name="docchat", version=__version__)
+# Shared resources opened once per server lifetime. Embedded Qdrant is
+# single-process; OpenAI client is connection-pooled.
+_QDRANT = open_qdrant()
+_OPENAI = OpenAI()
+@mcp.tool()
+async def search_docs(
+    library: str,
+    version: str,
+    query: str,
+    api_name: str | None = None,
+    top_k: int = 5,
+) -> str:
+    """Search the version-pinned documentation for a specific library.
+    Returns top-K chunks from the indexed docs of the EXACT pinned version
+    (e.g. react@18.2.0, not 19.1.0). Use BEFORE answering any library API
+    question to avoid version-mismatched APIs.
+    If the result starts with "No relevant chunks found" or "No indexed
+    docs", do not hallucinate - tell the user the docs aren't available
+    at that pinned version.
+    Args:
+        library: Library name (e.g. "react", "fastapi", "vue"). Lowercase.
+        version: Pinned version string (e.g. "18.2.0", "0.100.0").
+        query: Natural-language question about the library.
+        api_name: Optional - constrain to chunks tagged with this API name
+            (case-insensitive startswith). Use when the query names a
+            specific API like "useState" or "Depends".
+        top_k: Max chunks to return. Default 5.
+    """
+    # Run sync retrieval in a thread so we don't block the asyncio loop.
+    result = await asyncio.to_thread(
+        _search_docs,
+        qdrant=_QDRANT,
+        openai=_OPENAI,
+        library=library,
+        version=version,
+        query=query,
+        api_name=api_name,
+        top_k=top_k,
+    )
+    citations_line = (
+        "\n\nCitations: " + ", ".join(c.render() for c in result.citations)
+        if result.citations
+        else ""
+    )
+    return result.text + citations_line
+@mcp.tool()
+async def list_indexed() -> dict[str, object]:
+    """List all (library, version) collections currently indexed locally.
+    Returns a dict with ``collections`` (list of {name, library, version,
+    points_count}) and ``supported_libraries`` (which libraries this
+    server's indexer knows how to populate).
+    """
+    def _query() -> dict[str, object]:
+        collections = _QDRANT.get_collections().collections
+        rows: list[dict[str, object]] = []
+        for c in collections:
+            try:
+                count = _QDRANT.count(collection_name=c.name).count
+            except Exception:
+                count = -1
+            rows.append({"name": c.name, "points_count": count})
+        return {
+            "collections": rows,
+            "supported_libraries": sorted(LIBRARY_CONFIG.keys()),
+            "qdrant_path": str(_QDRANT._client.location if hasattr(_QDRANT, "_client") else ""),
+        }
+    return await asyncio.to_thread(_query)
+def main() -> None:
+    """Entrypoint when invoked as ``docchat-server serve`` (or directly)."""
+    logger.info("docchat-server %s starting on stdio", __version__)
+    mcp.run(transport="stdio")
+if __name__ == "__main__":
+    main()

docchat_server-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,140 @@
+Metadata-Version: 2.4
+Name: docchat-server
+Version: 0.0.1
+Summary: Version-pinned documentation retrieval as a Model Context Protocol server. Gives Claude Code / Cursor / any MCP-aware AI grounded answers from the docs of the exact library version your lockfile pins.
+Project-URL: Homepage, https://github.com/AshwinUgale/docchat-mcp
+Project-URL: Repository, https://github.com/AshwinUgale/docchat-mcp
+Project-URL: Issues, https://github.com/AshwinUgale/docchat-mcp/issues
+Author-email: Ashwin Ugale <ugaleashwin@gmail.com>
+License: MIT
+License-File: LICENSE
+Keywords: claude,cursor,docs,llm,mcp,model-context-protocol,rag,version-pinned
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Documentation
+Requires-Python: >=3.11
+Requires-Dist: fastmcp>=0.4
+Requires-Dist: httpx>=0.27
+Requires-Dist: openai>=1.40
+Requires-Dist: python-dotenv>=1.0
+Requires-Dist: qdrant-client>=1.12
+Description-Content-Type: text/markdown
+# docchat-server
+> Version-pinned documentation retrieval as a Model Context Protocol server. Gives Claude Code / Cursor / any MCP-aware AI grounded answers from the docs of the exact library version your lockfile pins.
+[![status](https://img.shields.io/badge/status-alpha-orange)](#)
+[![license](https://img.shields.io/badge/license-MIT-green)](./LICENSE)
+[![version](https://img.shields.io/badge/version-0.0.1-blue)](./pyproject.toml)
+**Status:** v0.0 — initial scaffold. v0.1 ships PyPI + Smithery registration once the FastMCP server is locally verified.
+---
+## What it is
+Claude Code, Cursor, and other AI coding assistants answer library questions from training data. If your project pins `react@18.2.0` and the latest is `19.1.0`, you get React 19 APIs in your React 18 file — the model has no way to know which version actually matters.
+`docchat-server` is an MCP server that fixes that. Index a library at the version you pin once. Register the server with your MCP host. Now every query to your coding assistant can be grounded in the docs for the *exact pinned version*, with hard refusal when the docs don't cover the question.
+It's the [DocChat VS Code extension](https://github.com/AshwinUgale/docchat) stripped of its agent + chat UI, exposed as an MCP tool surface instead. The retrieval logic, version-aware routing, and per-library cosine score floors are identical (and identically eval-tuned).
+---
+## Install
+```bash
+pip install docchat-server        # or: uvx --from docchat-server docchat-server
+```
+Requires Python 3.11+ and an `OPENAI_API_KEY` env var (used for query + index-time embeddings). The Qdrant vector store runs *embedded* — no Docker, no separate server.
+---
+## Use (3 steps)
+### 1. Index the libraries you care about
+```bash
+export OPENAI_API_KEY=sk-...
+docchat-server index react 18.2.0
+docchat-server index fastapi 0.100.0
+docchat-server index vue 3.4.0
+```
+Each index takes ~30–60 seconds and a few cents of embedding cost. Stored at `~/.docchat-server/qdrant/`.
+### 2. Verify
+```bash
+docchat-server list
+```
+```
+Indexed collections:
+  - react_18_2_0    (47 chunks)
+  - fastapi_0_100_0 (38 chunks)
+  - vue_3_4_0       (62 chunks)
+Supported libraries: fastapi, react, vue
+```
+### 3. Register with your MCP host
+Claude Desktop / Claude Code: add to your MCP config (`~/.config/claude/mcp-config.json` on Mac/Linux, `%APPDATA%\Claude\mcp-config.json` on Windows):
+```json
+{
+  "mcpServers": {
+    "docchat": {
+      "command": "docchat-server",
+      "args": ["serve"],
+      "env": {
+        "OPENAI_API_KEY": "sk-..."
+      }
+    }
+  }
+}
+```
+Restart your MCP host. The `docchat` server should appear with two tools: `search_docs` and `list_indexed`.
+---
+## The tools
+### `search_docs(library, version, query, api_name?, top_k?)`
+Retrieves top-K chunks from the indexed docs of the exact pinned version. Returns the chunks with citations, or `"No relevant chunks found"` if nothing clears the per-library cosine floor (a hard signal to the model that it should refuse rather than guess).
+### `list_indexed()`
+Returns the collections currently populated locally. Useful as a session-start probe — your assistant can call this once to know what's available before answering anything.
+---
+## Sibling project
+The same retrieval engine ships as a [VS Code extension on the Marketplace](https://marketplace.visualstudio.com/items?itemName=AshwinUgale.docchat). Source: https://github.com/AshwinUgale/docchat. If you want a chat panel instead of MCP-tool integration, install that.
+---
+## Roadmap
+- **v0.1** — PyPI publish, Smithery listing, README screenshots from real Claude Code session
+- **v0.2** — `detect_pinned_libraries(workspace_path)` tool (parse package.json / pyproject.toml / requirements.txt and report pinned versions to the assistant)
+- **v0.3** — `--repo` / `--paths` flags for arbitrary library indexing (extend beyond the built-in react / fastapi / vue)
+- **v0.4** — local embeddings via sentence-transformers (drop the OpenAI dependency for the embed step)
+---
+## License
+MIT. See [LICENSE](./LICENSE).

docchat_server-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+docchat_server/__init__.py,sha256=a3G4CL9CTKViYNwUs6CMTVZkO2ZDSpwroDsW-ZN67K8,456
+docchat_server/cli.py,sha256=4U1TE7AWOnvI3unwGtKem79GyHWHS7VlobtbhXSue5Q,3558
+docchat_server/indexer.py,sha256=d5ooFf0_cIQ8T6MpMSkuWJhCJ9zDyVMw-z_Q7QpzaEM,9486
+docchat_server/library_config.py,sha256=J16tMsmpsis2wsrRlHCqnQmIFHk70IfJvRHoPS8-SY8,4285
+docchat_server/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+docchat_server/retrieval.py,sha256=sccPnZuOj9sWOsdgElAeau-3C5NW5PaXTt-xZX5M3so,5504
+docchat_server/server.py,sha256=H9GsIJYCS-TyuC74Tz1Asu1klr0pk0Y0pj5avFGMP40,5023
+docchat_server-0.0.1.dist-info/METADATA,sha256=qXKDFfN7589Q513p81EdSrZLM6xnQjkhzNZxRQHYMtU,5387
+docchat_server-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+docchat_server-0.0.1.dist-info/entry_points.txt,sha256=g4vlJ09aZDAwBW9G1yy-X5-jhRyB1zvQWtjkPl1JmjI,59
+docchat_server-0.0.1.dist-info/licenses/LICENSE,sha256=P8rHqt8ByxNDoOk_AzGbfaUCbCR0WdIYhPKg8tsM_NU,1069
+docchat_server-0.0.1.dist-info/RECORD,,

docchat_server-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

docchat_server-0.0.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ docchat-server = docchat_server.cli:main

docchat_server-0.0.1.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Ashwin Ugale
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.