docs-kit 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs_kit-0.1.1/.github/workflows/ci.yml +31 -0
- docs_kit-0.1.1/.github/workflows/publish.yml +79 -0
- docs_kit-0.1.1/.gitignore +15 -0
- docs_kit-0.1.1/AGENTS.md +64 -0
- docs_kit-0.1.1/CHANGELOG.md +20 -0
- docs_kit-0.1.1/CLAUDE.md +85 -0
- docs_kit-0.1.1/CONTRIBUTING.md +67 -0
- docs_kit-0.1.1/LICENSE +21 -0
- docs_kit-0.1.1/PKG-INFO +268 -0
- docs_kit-0.1.1/README.md +223 -0
- docs_kit-0.1.1/data/sample_docs/claude-code-changelog.md +2235 -0
- docs_kit-0.1.1/data/sample_docs/the-adventure-of-the-speckled-band.md +1139 -0
- docs_kit-0.1.1/docs-kit.yaml +19 -0
- docs_kit-0.1.1/docs_kit/__init__.py +32 -0
- docs_kit-0.1.1/docs_kit/__main__.py +4 -0
- docs_kit-0.1.1/docs_kit/_version.py +1 -0
- docs_kit-0.1.1/docs_kit/agent.py +190 -0
- docs_kit-0.1.1/docs_kit/cli/__init__.py +0 -0
- docs_kit-0.1.1/docs_kit/cli/__main__.py +34 -0
- docs_kit-0.1.1/docs_kit/cli/commands.py +542 -0
- docs_kit-0.1.1/docs_kit/cli/help.py +140 -0
- docs_kit-0.1.1/docs_kit/connectors/__init__.py +0 -0
- docs_kit-0.1.1/docs_kit/connectors/embeddings/__init__.py +3 -0
- docs_kit-0.1.1/docs_kit/connectors/embeddings/base.py +9 -0
- docs_kit-0.1.1/docs_kit/connectors/embeddings/fastembed.py +30 -0
- docs_kit-0.1.1/docs_kit/connectors/fetchers/__init__.py +0 -0
- docs_kit-0.1.1/docs_kit/connectors/fetchers/base.py +8 -0
- docs_kit-0.1.1/docs_kit/connectors/fetchers/gitbook.py +7 -0
- docs_kit-0.1.1/docs_kit/connectors/fetchers/llms_txt.py +85 -0
- docs_kit-0.1.1/docs_kit/connectors/fetchers/mintlify.py +94 -0
- docs_kit-0.1.1/docs_kit/connectors/parsers/__init__.py +4 -0
- docs_kit-0.1.1/docs_kit/connectors/parsers/base.py +8 -0
- docs_kit-0.1.1/docs_kit/connectors/parsers/markdown.py +8 -0
- docs_kit-0.1.1/docs_kit/connectors/parsers/text.py +8 -0
- docs_kit-0.1.1/docs_kit/connectors/vector_stores/__init__.py +3 -0
- docs_kit-0.1.1/docs_kit/connectors/vector_stores/base.py +15 -0
- docs_kit-0.1.1/docs_kit/connectors/vector_stores/qdrant.py +279 -0
- docs_kit-0.1.1/docs_kit/core/__init__.py +0 -0
- docs_kit-0.1.1/docs_kit/core/chunking.py +227 -0
- docs_kit-0.1.1/docs_kit/core/config.py +67 -0
- docs_kit-0.1.1/docs_kit/core/html_utils.py +78 -0
- docs_kit-0.1.1/docs_kit/core/models.py +28 -0
- docs_kit-0.1.1/docs_kit/mcp/__init__.py +0 -0
- docs_kit-0.1.1/docs_kit/mcp/server.py +100 -0
- docs_kit-0.1.1/docs_kit/mcp/tools.py +10 -0
- docs_kit-0.1.1/npx-wrapper/bin/docs-kit.js +58 -0
- docs_kit-0.1.1/npx-wrapper/package.json +16 -0
- docs_kit-0.1.1/pyproject.toml +38 -0
- docs_kit-0.1.1/scripts/smoke_test.sh +24 -0
- docs_kit-0.1.1/tests/__init__.py +0 -0
- docs_kit-0.1.1/tests/test_agent.py +100 -0
- docs_kit-0.1.1/tests/test_chunking.py +41 -0
- docs_kit-0.1.1/tests/test_cli.py +169 -0
- docs_kit-0.1.1/tests/test_config.py +65 -0
- docs_kit-0.1.1/tests/test_embeddings_fastembed.py +30 -0
- docs_kit-0.1.1/tests/test_fetcher_gitbook.py +159 -0
- docs_kit-0.1.1/tests/test_fetcher_mintlify.py +168 -0
- docs_kit-0.1.1/tests/test_install_cmd.py +222 -0
- docs_kit-0.1.1/tests/test_mcp_tools.py +96 -0
- docs_kit-0.1.1/tests/test_models.py +17 -0
- docs_kit-0.1.1/tests/test_parsers.py +16 -0
- docs_kit-0.1.1/tests/test_vector_store_qdrant.py +137 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
env:
|
|
10
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
python-version: ["3.11", "3.12"]
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v5
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v6
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: pip install -e ".[dev]"
|
|
29
|
+
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: pytest tests/ -v
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
|
|
8
|
+
env:
|
|
9
|
+
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
smoke-test:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v5
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v6
|
|
19
|
+
with:
|
|
20
|
+
python-version: "3.11"
|
|
21
|
+
|
|
22
|
+
- name: Install package
|
|
23
|
+
run: pip install -e ".[sse]" build twine "hatchling<1.27"
|
|
24
|
+
|
|
25
|
+
- name: Run smoke test
|
|
26
|
+
run: |
|
|
27
|
+
python -m docs_kit --help
|
|
28
|
+
TMP_DIR="$(mktemp -d)"
|
|
29
|
+
python -m docs_kit init --dir "$TMP_DIR"
|
|
30
|
+
test -f "$TMP_DIR/docs-kit.yaml"
|
|
31
|
+
python -m docs_kit doctor --config "$TMP_DIR/docs-kit.yaml"
|
|
32
|
+
|
|
33
|
+
- name: Build artifacts
|
|
34
|
+
run: python -m build --no-isolation
|
|
35
|
+
|
|
36
|
+
- name: Check artifacts
|
|
37
|
+
run: python -m twine check dist/*
|
|
38
|
+
|
|
39
|
+
build:
|
|
40
|
+
needs: smoke-test
|
|
41
|
+
runs-on: ubuntu-latest
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/checkout@v5
|
|
44
|
+
|
|
45
|
+
- name: Set up Python
|
|
46
|
+
uses: actions/setup-python@v6
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.11"
|
|
49
|
+
|
|
50
|
+
- name: Install build tools
|
|
51
|
+
run: pip install build twine "hatchling<1.27"
|
|
52
|
+
|
|
53
|
+
- name: Build
|
|
54
|
+
run: python -m build --no-isolation
|
|
55
|
+
|
|
56
|
+
- name: Check artifacts
|
|
57
|
+
run: python -m twine check dist/*
|
|
58
|
+
|
|
59
|
+
- name: Upload dist artifacts
|
|
60
|
+
uses: actions/upload-artifact@v4
|
|
61
|
+
with:
|
|
62
|
+
name: dist
|
|
63
|
+
path: dist/
|
|
64
|
+
|
|
65
|
+
publish-pypi:
|
|
66
|
+
needs: build
|
|
67
|
+
runs-on: ubuntu-latest
|
|
68
|
+
environment: release
|
|
69
|
+
permissions:
|
|
70
|
+
id-token: write
|
|
71
|
+
steps:
|
|
72
|
+
- name: Download dist artifacts
|
|
73
|
+
uses: actions/download-artifact@v4
|
|
74
|
+
with:
|
|
75
|
+
name: dist
|
|
76
|
+
path: dist/
|
|
77
|
+
|
|
78
|
+
- name: Publish to PyPI
|
|
79
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
docs_kit-0.1.1/AGENTS.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# docs-kit — Agent Instructions
|
|
2
|
+
|
|
3
|
+
## Scope
|
|
4
|
+
|
|
5
|
+
This document applies to AI agents working on the **docs-kit** codebase: a **PyPI** package (and optional **npx** wrapper) that **fetches GitBook documentation**, **stores and embeds it locally** (markdown on disk + Qdrant), and **exposes retrieval via an MCP server** for **Claude Code**, **Claude Desktop**, **Cursor**, and other MCP-aware tools.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## What the product does
|
|
10
|
+
|
|
11
|
+
1. **Fetch** — `GitBookFetcher` uses `/llms-full.txt` or `/llms.txt` on a public GitBook base URL.
|
|
12
|
+
2. **Local storage** — `docs-kit fetch` writes `.md` files; `docs-kit ingest` chunks, embeds with FastEmbed, and upserts into local Qdrant.
|
|
13
|
+
3. **MCP** — `docs-kit serve` runs tools: `search_docs`, `list_sources`, `get_collection_info`, `get_full_document`.
|
|
14
|
+
4. **Install** — `docs-kit install` merges an `mcpServers.docs-kit` entry into agent settings for `claude-code`, `claude-desktop`, or `cursor`.
|
|
15
|
+
|
|
16
|
+
The **npx** package under `npx-wrapper/` only ensures Python 3.11+ and installs/runs the **`docs-kit`** Python module; behavior lives in Python.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Security (Non-Negotiable)
|
|
21
|
+
|
|
22
|
+
- **MUST NOT** read, access, or reference `.env`, `.env.local`, `.env.*`, or any secret/credentials files.
|
|
23
|
+
- **MUST NOT** recommend reading environment files. If config is needed, ask the user for non-sensitive details.
|
|
24
|
+
- **MUST** treat `.env` files as if they do not exist.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Verification After Changes
|
|
29
|
+
|
|
30
|
+
From the repository root:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pytest tests/ -v
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
- **MUST NOT** claim work is complete without a successful test run when behavior or dependencies changed.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Git Commit Workflow
|
|
41
|
+
|
|
42
|
+
When the user asks to commit, stage, or write a commit message:
|
|
43
|
+
|
|
44
|
+
1. **MUST** analyze staged/unstaged changes via `git status` and `git diff` in the relevant git root.
|
|
45
|
+
2. **MUST** write a commit message following Conventional Commits:
|
|
46
|
+
- `feat:` — new feature
|
|
47
|
+
- `fix:` — bug fix
|
|
48
|
+
- `chore:` — maintenance, deps, config
|
|
49
|
+
- `docs:` — documentation only
|
|
50
|
+
- `style:` — formatting, no logic change
|
|
51
|
+
- `refactor:` — code restructure
|
|
52
|
+
- `test:` — tests only
|
|
53
|
+
- `perf:` — performance
|
|
54
|
+
3. **SHOULD** keep subject line under 50 chars, imperative mood; use bullet points in body.
|
|
55
|
+
4. **SHOULD** suggest splitting into atomic commits if changes span unrelated concerns.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Project Context (Brief)
|
|
60
|
+
|
|
61
|
+
- **Package name:** `docs-kit` (import `docs_kit`, CLI `docs-kit`).
|
|
62
|
+
- **Main types:** `DocsKitConfig`, `DocsKitAgent` (`docs_kit/agent.py`); MCP in `docs_kit/mcp/server.py`.
|
|
63
|
+
- **CLI:** `docs_kit/cli/commands.py` — `init`, `fetch`, `ingest`, `serve`, `install`, `query`, `inspect`, `doctor`.
|
|
64
|
+
- **Docs:** [README.md](README.md) for user-facing quickstart and command table.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [0.1.0] - 2026-03-16
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- `DocsKitAgent` high-level Python API: `ingest()`, `query()`
|
|
12
|
+
- CLI: `docs-kit init`, `fetch`, `ingest`, `serve`, `install`, `query`, `inspect`, `doctor`
|
|
13
|
+
- GitBook fetch via `/llms-full.txt` / `/llms.txt` and linked pages
|
|
14
|
+
- Local embeddings with FastEmbed (dense + sparse/BM25)
|
|
15
|
+
- Vector store: Qdrant (local path or remote URL)
|
|
16
|
+
- Document parsers: `.txt`, `.md`
|
|
17
|
+
- `DocsKitConfig` with YAML and environment variable support
|
|
18
|
+
- MCP server tools: `search_docs`, `list_sources`, `get_collection_info`, `get_full_document`
|
|
19
|
+
- `docs-kit.yaml` annotated example config
|
|
20
|
+
- GitHub Actions CI (Python 3.11, 3.12) and publish pipeline (TestPyPI → PyPI)
|
docs_kit-0.1.1/CLAUDE.md
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# docs-kit — Project Memory
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
|
|
5
|
+
**docs-kit** is a distributable toolkit (PyPI **`docs-kit`**, optional **npm/npx** wrapper) that:
|
|
6
|
+
|
|
7
|
+
1. **Pulls** documentation from public **GitBook** sites via AI-oriented endpoints (`/llms-full.txt` or `/llms.txt` and linked pages).
|
|
8
|
+
2. **Stores** content locally — either as downloaded `.md` files (`docs-kit fetch`) or in a **local Qdrant** vector store after embedding (`docs-kit ingest`).
|
|
9
|
+
3. **Ingests** with **local embeddings** (FastEmbed by default; no API keys required for embeddings).
|
|
10
|
+
4. **Serves** an **MCP (Model Context Protocol) server** so **Claude Code**, **Claude Desktop**, **Cursor**, and other MCP-capable agents can query the knowledge base (`search_docs`, `list_sources`, `get_collection_info`, `get_full_document`).
|
|
11
|
+
5. **Installs** MCP wiring via `docs-kit install claude-code | claude-desktop | cursor`.
|
|
12
|
+
|
|
13
|
+
**Pitch:** Point at any public GitBook docs, embed locally, expose them to your coding agents through MCP.
|
|
14
|
+
|
|
15
|
+
**Target:** Developers who want product/docs RAG in the agent loop without shipping docs to a remote embedding API.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Distribution
|
|
20
|
+
|
|
21
|
+
| Channel | What it is |
|
|
22
|
+
|---------|------------|
|
|
23
|
+
| **PyPI** | Canonical install: `pip install docs-kit`. CLI entry: `docs-kit`. |
|
|
24
|
+
| **npm/npx** | `npx-wrapper/` — thin Node shim that ensures Python 3.11+ and `pip install docs-kit`, then runs `python -m docs_kit …`. |
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Repo Structure
|
|
29
|
+
|
|
30
|
+
| Path | Purpose |
|
|
31
|
+
|------|---------|
|
|
32
|
+
| `docs_kit/` | Python package: `DocsKitAgent`, GitBook fetcher, parsers, FastEmbed, Qdrant store, MCP server, Click CLI |
|
|
33
|
+
| `docs_kit/cli/` | CLI (`docs-kit init`, `fetch`, `ingest`, `serve`, `install`, `query`, `inspect`, `doctor`) |
|
|
34
|
+
| `docs_kit/mcp/` | FastMCP server (`stdio` default; `sse` with optional deps) |
|
|
35
|
+
| `docs_kit/connectors/fetchers/gitbook.py` | GitBook `llms-full.txt` / `llms.txt` fetch |
|
|
36
|
+
| `npx-wrapper/` | npm package wrapping the Python CLI |
|
|
37
|
+
| `tests/` | pytest suite |
|
|
38
|
+
| `data/sample_docs/` | Sample markdown for local ingest tests |
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Tech Stack
|
|
43
|
+
|
|
44
|
+
- **Python** ≥ 3.11, **Click**, **Pydantic** / **pydantic-settings**, **httpx**, **PyYAML**
|
|
45
|
+
- **Embeddings:** FastEmbed (local)
|
|
46
|
+
- **Vector store:** Qdrant (local path, e.g. `.docs-kit/qdrant` in config)
|
|
47
|
+
- **MCP:** `mcp` + FastMCP; optional **uvicorn** / **starlette** for SSE (`docs-kit[sse]`)
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Security & Privacy
|
|
52
|
+
|
|
53
|
+
- **NEVER** touch `.env`, `.env.local`, `.env.*`, or other secret/credentials files. Treat them as if they do not exist.
|
|
54
|
+
- **NEVER** read, access, reference, or recommend reading environment or secret files.
|
|
55
|
+
- **ALWAYS** work on `.env.example` when documenting or adding environment variables — use placeholder values only.
|
|
56
|
+
- If config is needed, ask the user for non-sensitive details.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Build & Verification
|
|
61
|
+
|
|
62
|
+
From the project root (where `pyproject.toml` lives):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pytest tests/ -v
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
After substantive changes, run the full test suite before claiming completion.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Git Commits
|
|
73
|
+
|
|
74
|
+
- **MUST** write descriptive commit messages using [Conventional Commits](https://www.conventionalcommits.org/): `feat`, `fix`, `chore`, `docs`, `style`, `refactor`, `test`, `perf`.
|
|
75
|
+
- **SHOULD** keep subject line under 50 characters, imperative mood; use bullet points in body for context.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Key Paths & Commands
|
|
80
|
+
|
|
81
|
+
- **Config:** `docs-kit.yaml` (from `docs-kit init`) — embedding model, Qdrant `local_path`, MCP transport/port.
|
|
82
|
+
- **Fetch only:** `docs-kit fetch <gitbook-url> --output docs-kit-docs`
|
|
83
|
+
- **Ingest URL or path:** `docs-kit ingest https://example.gitbook.io/docs` or `docs-kit ingest ./path/to/markdown`
|
|
84
|
+
- **MCP:** `docs-kit serve` (stdio); SSE requires `docs-kit[sse]` and config/flags for `sse`.
|
|
85
|
+
- **Agent setup:** `docs-kit install claude-code` (optional `--project` for project-level Claude Code).
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# Contributing to docs-kit
|
|
2
|
+
|
|
3
|
+
Thank you for contributing! This guide covers project layout and common extension points.
|
|
4
|
+
|
|
5
|
+
## Project structure
|
|
6
|
+
|
|
7
|
+
```text
|
|
8
|
+
docs_kit/
|
|
9
|
+
agent.py # DocsKitAgent; parser registry (_PARSERS); component wiring
|
|
10
|
+
core/
|
|
11
|
+
config.py # DocsKitConfig (pydantic-settings)
|
|
12
|
+
models.py # Document, Chunk, RetrievedChunk
|
|
13
|
+
chunking.py # Text/markdown chunking
|
|
14
|
+
connectors/
|
|
15
|
+
fetchers/ # GitBook and other doc sources (base + gitbook)
|
|
16
|
+
embeddings/ # Embedding backends (FastEmbed)
|
|
17
|
+
parsers/ # Document loaders (text, markdown)
|
|
18
|
+
vector_stores/ # Qdrant store
|
|
19
|
+
cli/
|
|
20
|
+
commands.py # Click commands
|
|
21
|
+
__main__.py # CLI entry point
|
|
22
|
+
mcp/ # MCP server (tools, stdio/SSE)
|
|
23
|
+
tests/ # pytest tests (mirror docs_kit/ where useful)
|
|
24
|
+
npx-wrapper/ # optional npm/npx shim to the Python CLI
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Adding a new document parser
|
|
28
|
+
|
|
29
|
+
1. Implement a loader subclassing `BaseLoader` in `docs_kit/connectors/parsers/`.
|
|
30
|
+
2. Register the file extension in `_PARSERS` in `docs_kit/agent.py` (dotted import path to the class).
|
|
31
|
+
|
|
32
|
+
## Adding a new embedding provider
|
|
33
|
+
|
|
34
|
+
1. Implement the dense (and if needed sparse) API following `docs_kit/connectors/embeddings/base.py`.
|
|
35
|
+
2. Extend the `embedding.provider` branch in `DocsKitAgent._init_components()` in `docs_kit/agent.py`.
|
|
36
|
+
3. Extend `DocsKitConfig` / YAML schema in `docs_kit/core/config.py` if new settings are required.
|
|
37
|
+
4. Add optional dependencies in `pyproject.toml` if the provider needs extra packages.
|
|
38
|
+
|
|
39
|
+
## Adding a new vector store
|
|
40
|
+
|
|
41
|
+
1. Implement `BaseVectorStore` in `docs_kit/connectors/vector_stores/`.
|
|
42
|
+
2. Extend the `vector_store.provider` branch in `DocsKitAgent._init_components()` and add config fields as needed.
|
|
43
|
+
|
|
44
|
+
## Adding a new doc source (fetcher)
|
|
45
|
+
|
|
46
|
+
1. Subclass `BaseFetcher` in `docs_kit/connectors/fetchers/`.
|
|
47
|
+
2. Wire the new source into the CLI `fetch` / `ingest` paths in `docs_kit/cli/commands.py` (and any agent helpers) following the GitBook pattern.
|
|
48
|
+
|
|
49
|
+
## Running tests
|
|
50
|
+
|
|
51
|
+
**On macOS (to avoid arm64/x86_64 Rosetta issues):**
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
arch -arm64 .venv/bin/python -m pytest tests/ -v
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**On Linux / CI:**
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pytest tests/ -v
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Submitting a PR
|
|
64
|
+
|
|
65
|
+
- Branch name: `feat/<topic>` or `fix/<description>`
|
|
66
|
+
- Run the full test suite before opening the PR
|
|
67
|
+
- New connectors or fetchers should include tests where practical
|
docs_kit-0.1.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Docs Kit Limited
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
docs_kit-0.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: docs-kit
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Fetch docs, embed locally, expose via MCP for AI agents.
|
|
5
|
+
License: MIT License
|
|
6
|
+
|
|
7
|
+
Copyright (c) 2026 Docs Kit Limited
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
|
26
|
+
Requires-Python: >=3.11
|
|
27
|
+
Requires-Dist: click>=8.0.0
|
|
28
|
+
Requires-Dist: fastembed>=0.6.0
|
|
29
|
+
Requires-Dist: httpx>=0.27.0
|
|
30
|
+
Requires-Dist: mcp>=1.0.0
|
|
31
|
+
Requires-Dist: pydantic-settings>=2.2.1
|
|
32
|
+
Requires-Dist: pydantic>=2.0.0
|
|
33
|
+
Requires-Dist: pyyaml>=6.0
|
|
34
|
+
Requires-Dist: qdrant-client>=1.10.0
|
|
35
|
+
Requires-Dist: tomli-w>=1.0.0
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: starlette>=0.36.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: uvicorn>=0.27.0; extra == 'dev'
|
|
41
|
+
Provides-Extra: sse
|
|
42
|
+
Requires-Dist: starlette>=0.36.0; extra == 'sse'
|
|
43
|
+
Requires-Dist: uvicorn>=0.27.0; extra == 'sse'
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# docs-kit
|
|
47
|
+
|
|
48
|
+
[](https://pypi.org/project/docs-kit/)
|
|
49
|
+
[](LICENSE)
|
|
50
|
+
[](https://pypi.org/project/docs-kit/)
|
|
51
|
+
[](https://github.com/docs-kit/docs-kit/actions/workflows/ci.yml)
|
|
52
|
+
|
|
53
|
+
Fetch docs from GitBook, Mintlify, or local files, embed them locally, and expose retrieval to AI tools over MCP.
|
|
54
|
+
|
|
55
|
+
No API keys are required for the default local embedding path.
|
|
56
|
+
|
|
57
|
+
## What it does
|
|
58
|
+
|
|
59
|
+
- Fetches public docs from GitBook and Mintlify sites via `llms-full.txt` / `llms.txt` (with sitemap.xml fallback for Mintlify)
|
|
60
|
+
- Ingests local `.md` and `.txt` files
|
|
61
|
+
- Stores vectors in local Qdrant by default
|
|
62
|
+
- Serves an MCP server over `stdio` or SSE
|
|
63
|
+
- Exposes MCP tools to ingest, remove, and list sources at runtime
|
|
64
|
+
- Installs MCP config for supported AI clients
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install docs-kit
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Or with `npx`:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
npx docs-kit ingest https://docs.example.com
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Quickstart
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# 1. Create a config file
|
|
82
|
+
docs-kit init
|
|
83
|
+
|
|
84
|
+
# 2. Ingest docs (GitBook or Mintlify — auto-detected)
|
|
85
|
+
docs-kit ingest https://docs.elevenlabs.io
|
|
86
|
+
|
|
87
|
+
# 3. Check the collection
|
|
88
|
+
docs-kit inspect
|
|
89
|
+
|
|
90
|
+
# 4. Install into your client
|
|
91
|
+
docs-kit install claude-code
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Commands
|
|
95
|
+
|
|
96
|
+
### `docs-kit init`
|
|
97
|
+
|
|
98
|
+
Create `docs-kit.yaml`.
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
docs-kit init
|
|
102
|
+
docs-kit init --dir ./sandbox
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### `docs-kit ingest <path-or-url>`
|
|
106
|
+
|
|
107
|
+
Ingest a local file, directory, or documentation URL into the vector store. Supports GitBook and Mintlify sites out of the box — auto-detected by default.
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
docs-kit ingest ./docs
|
|
111
|
+
docs-kit ingest https://docs.example.com
|
|
112
|
+
docs-kit ingest https://docs.mintlify-site.com --provider mintlify
|
|
113
|
+
docs-kit ingest https://docs.gitbook-site.com --provider gitbook
|
|
114
|
+
docs-kit ingest ./docs --recreate
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
`--provider` accepts `auto` (default), `gitbook`, or `mintlify`. In `auto` mode, the fetcher tries `/llms-full.txt` → `/llms.txt` → `/sitemap.xml` in order.
|
|
118
|
+
|
|
119
|
+
### `docs-kit fetch <url>`
|
|
120
|
+
|
|
121
|
+
Download GitBook docs as Markdown without ingesting them.
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
docs-kit fetch https://docs.example.com
|
|
125
|
+
docs-kit fetch https://docs.example.com --output ./downloaded-docs
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### `docs-kit serve`
|
|
129
|
+
|
|
130
|
+
Run the MCP server. `stdio` is the default. Use SSE for HTTP clients.
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
docs-kit serve
|
|
134
|
+
docs-kit serve --transport sse --port 3001
|
|
135
|
+
docs-kit serve --config ./docs-kit.yaml
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### `docs-kit install <agent>`
|
|
139
|
+
|
|
140
|
+
Install docs-kit into a supported client config.
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
docs-kit install claude-code
|
|
144
|
+
docs-kit install codex
|
|
145
|
+
docs-kit install claude-code --project
|
|
146
|
+
docs-kit install cursor --config ./docs-kit.yaml
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### `docs-kit query <text>`
|
|
150
|
+
|
|
151
|
+
Run retrieval directly from the CLI.
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
docs-kit query "How do I authenticate?"
|
|
155
|
+
docs-kit query "getting started" --limit 3
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### `docs-kit inspect`
|
|
159
|
+
|
|
160
|
+
Show collection and embedding configuration details.
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
docs-kit inspect
|
|
164
|
+
docs-kit inspect --config ./docs-kit.yaml
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### `docs-kit doctor`
|
|
168
|
+
|
|
169
|
+
Check environment variables, config presence, and Qdrant connectivity.
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
docs-kit doctor
|
|
173
|
+
docs-kit doctor --config ./docs-kit.yaml
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### `docs-kit list`
|
|
177
|
+
|
|
178
|
+
List ingested sources with their ingestion timestamps.
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
docs-kit list
|
|
182
|
+
docs-kit list --config ./docs-kit.yaml
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### `docs-kit remove <source>`
|
|
186
|
+
|
|
187
|
+
Remove an ingested source by URL or file path.
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
docs-kit remove https://docs.example.com/page
|
|
191
|
+
docs-kit remove ./docs/getting-started.md
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## MCP Tools
|
|
195
|
+
|
|
196
|
+
When connected to an MCP client, docs-kit exposes:
|
|
197
|
+
|
|
198
|
+
| Tool | Description |
|
|
199
|
+
|------|-------------|
|
|
200
|
+
| `search_docs(query, limit=5)` | Hybrid dense + BM25 retrieval |
|
|
201
|
+
| `list_sources()` | List all ingested source URLs/paths |
|
|
202
|
+
| `list_ingested_sources()` | List sources with ingestion timestamps |
|
|
203
|
+
| `get_collection_info()` | Collection stats (exists, point count) |
|
|
204
|
+
| `get_full_document(source)` | Retrieve full stored document by source |
|
|
205
|
+
| `ingest_urls(urls, provider="auto")` | Ingest comma-separated URLs at runtime |
|
|
206
|
+
| `remove_source(source)` | Remove a source and all its chunks |
|
|
207
|
+
|
|
208
|
+
## Install Targets
|
|
209
|
+
|
|
210
|
+
Local config install is supported for:
|
|
211
|
+
|
|
212
|
+
- `claude-code`
|
|
213
|
+
- `claude-desktop`
|
|
214
|
+
- `cursor`
|
|
215
|
+
- `codex`
|
|
216
|
+
|
|
217
|
+
Codex aliases:
|
|
218
|
+
|
|
219
|
+
- `codex-app`
|
|
220
|
+
|
|
221
|
+
ChatGPT aliases are accepted for guidance only:
|
|
222
|
+
|
|
223
|
+
- `chatgpt`
|
|
224
|
+
- `chatgpt-desktop`
|
|
225
|
+
|
|
226
|
+
`chatgpt` and `chatgpt-desktop` do not currently use a local stdio config written by this command. The installer prints guidance for the current OpenAI flow, which uses remote MCP apps/connectors in ChatGPT settings.
|
|
227
|
+
|
|
228
|
+
## Configuration
|
|
229
|
+
|
|
230
|
+
`docs-kit.yaml` created by `docs-kit init`:
|
|
231
|
+
|
|
232
|
+
```yaml
|
|
233
|
+
embedding:
|
|
234
|
+
provider: fastembed
|
|
235
|
+
model: BAAI/bge-small-en-v1.5
|
|
236
|
+
|
|
237
|
+
vector_store:
|
|
238
|
+
provider: qdrant
|
|
239
|
+
local_path: .docs-kit/qdrant
|
|
240
|
+
collection_name: knowledge_base
|
|
241
|
+
|
|
242
|
+
ingestion:
|
|
243
|
+
chunk_size: 800
|
|
244
|
+
chunk_overlap: 120
|
|
245
|
+
bm25_model: Qdrant/bm25
|
|
246
|
+
|
|
247
|
+
mcp:
|
|
248
|
+
transport: stdio
|
|
249
|
+
host: localhost
|
|
250
|
+
port: 3001
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
## Supported Sources
|
|
254
|
+
|
|
255
|
+
| Source | Strategy |
|
|
256
|
+
|--------|----------|
|
|
257
|
+
| GitBook sites | `/llms-full.txt` → `/llms.txt` |
|
|
258
|
+
| Mintlify sites | `/llms-full.txt` → `/llms.txt` → `/sitemap.xml` |
|
|
259
|
+
| Local `.md` files | Direct file read |
|
|
260
|
+
| Local `.txt` files | Direct file read |
|
|
261
|
+
|
|
262
|
+
Both GitBook and Mintlify support the [`llms.txt` standard](https://llmstxt.org), so in most cases the same auto strategy works for both. The Mintlify fetcher adds a `/sitemap.xml` fallback for sites where `llms.txt` is disabled.
|
|
263
|
+
|
|
264
|
+
## Requirements
|
|
265
|
+
|
|
266
|
+
- Python 3.11+
|
|
267
|
+
- Disk space for the local embedding model download
|
|
268
|
+
- Local Qdrant storage under `.docs-kit/` by default
|