claude-local-docs 1.0.2 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +25 -5
- package/.claude-plugin/plugin.json +25 -4
- package/.mcp.json +9 -3
- package/README.md +162 -78
- package/commands/fetch-docs.md +146 -33
- package/dist/discovery.d.ts +46 -0
- package/dist/discovery.js +357 -0
- package/dist/discovery.js.map +1 -0
- package/dist/fetcher.d.ts +6 -1
- package/dist/fetcher.js +8 -5
- package/dist/fetcher.js.map +1 -1
- package/dist/index.js +85 -5
- package/dist/index.js.map +1 -1
- package/dist/indexer.d.ts +4 -4
- package/dist/indexer.js +95 -37
- package/dist/indexer.js.map +1 -1
- package/dist/integration.test.d.ts +8 -0
- package/dist/integration.test.js +114 -0
- package/dist/integration.test.js.map +1 -0
- package/dist/reranker.d.ts +2 -4
- package/dist/reranker.js +14 -42
- package/dist/reranker.js.map +1 -1
- package/dist/search.js +5 -4
- package/dist/search.js.map +1 -1
- package/docker-compose.nvidia.yml +14 -0
- package/docker-compose.yml +20 -0
- package/hooks/hooks.json +16 -0
- package/package.json +14 -3
- package/scripts/ensure-tei.sh +71 -0
- package/start-tei.sh +239 -0
|
@@ -1,22 +1,42 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
|
|
3
3
|
"name": "claude-local-docs",
|
|
4
|
-
"description": "
|
|
4
|
+
"description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline. GPU-accelerated via TEI Docker containers.",
|
|
5
5
|
"owner": {
|
|
6
|
-
"name": "matthew"
|
|
6
|
+
"name": "matthew",
|
|
7
|
+
"email": "matteodante@users.noreply.github.com"
|
|
8
|
+
},
|
|
9
|
+
"metadata": {
|
|
10
|
+
"version": "1.0.7"
|
|
7
11
|
},
|
|
8
12
|
"plugins": [
|
|
9
13
|
{
|
|
10
14
|
"name": "claude-local-docs",
|
|
11
|
-
"description": "
|
|
12
|
-
"version": "1.0.
|
|
15
|
+
"description": "Offline-capable documentation search for JS/TS projects. Reads your package.json, fetches docs (preferring llms.txt), and indexes them with a 4-stage RAG pipeline: vector search + BM25 keywords + RRF fusion + cross-encoder reranking. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection (NVIDIA CUDA, Apple Metal).",
|
|
16
|
+
"version": "1.0.7",
|
|
13
17
|
"author": {
|
|
14
18
|
"name": "matthew"
|
|
15
19
|
},
|
|
16
20
|
"source": "./",
|
|
17
21
|
"category": "development",
|
|
18
22
|
"license": "MIT",
|
|
19
|
-
"keywords": [
|
|
23
|
+
"keywords": [
|
|
24
|
+
"documentation",
|
|
25
|
+
"search",
|
|
26
|
+
"rag",
|
|
27
|
+
"embeddings",
|
|
28
|
+
"local-first",
|
|
29
|
+
"semantic-search",
|
|
30
|
+
"llms-txt",
|
|
31
|
+
"context7-alternative",
|
|
32
|
+
"offline",
|
|
33
|
+
"vector-search",
|
|
34
|
+
"bm25",
|
|
35
|
+
"reranking",
|
|
36
|
+
"dependency-docs",
|
|
37
|
+
"docker",
|
|
38
|
+
"tei"
|
|
39
|
+
]
|
|
20
40
|
}
|
|
21
41
|
]
|
|
22
42
|
}
|
|
@@ -1,8 +1,29 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-local-docs",
|
|
3
|
-
"
|
|
4
|
-
"
|
|
3
|
+
"version": "1.0.7",
|
|
4
|
+
"description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline (vector + BM25 + RRF + cross-encoder reranking). Uses TEI Docker containers for GPU-accelerated embeddings and reranking.",
|
|
5
5
|
"author": {
|
|
6
|
-
"name": "matthew"
|
|
7
|
-
|
|
6
|
+
"name": "matthew",
|
|
7
|
+
"url": "https://github.com/matteodante"
|
|
8
|
+
},
|
|
9
|
+
"homepage": "https://github.com/matteodante/claude-local-docs",
|
|
10
|
+
"repository": "https://github.com/matteodante/claude-local-docs",
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"keywords": [
|
|
13
|
+
"documentation",
|
|
14
|
+
"search",
|
|
15
|
+
"rag",
|
|
16
|
+
"embeddings",
|
|
17
|
+
"local-first",
|
|
18
|
+
"semantic-search",
|
|
19
|
+
"llms-txt",
|
|
20
|
+
"context7-alternative",
|
|
21
|
+
"offline",
|
|
22
|
+
"vector-search",
|
|
23
|
+
"bm25",
|
|
24
|
+
"reranking",
|
|
25
|
+
"dependency-docs",
|
|
26
|
+
"docker",
|
|
27
|
+
"tei"
|
|
28
|
+
]
|
|
8
29
|
}
|
package/.mcp.json
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
{
|
|
2
|
-
"
|
|
3
|
-
"
|
|
4
|
-
|
|
2
|
+
"mcpServers": {
|
|
3
|
+
"local-docs": {
|
|
4
|
+
"command": "npx",
|
|
5
|
+
"args": ["-y", "claude-local-docs@latest"],
|
|
6
|
+
"env": {
|
|
7
|
+
"TEI_EMBED_URL": "http://localhost:39281",
|
|
8
|
+
"TEI_RERANK_URL": "http://localhost:39282"
|
|
9
|
+
}
|
|
10
|
+
}
|
|
5
11
|
}
|
|
6
12
|
}
|
package/README.md
CHANGED
|
@@ -1,57 +1,91 @@
|
|
|
1
1
|
# claude-local-docs
|
|
2
2
|
|
|
3
|
-
A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally and provides production-grade semantic search
|
|
3
|
+
A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally and provides production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection.
|
|
4
4
|
|
|
5
5
|
## Why not Context7?
|
|
6
6
|
|
|
7
7
|
| | **claude-local-docs** | **Context7** |
|
|
8
8
|
|---|---|---|
|
|
9
|
-
| **Runs where** | Your machine (
|
|
9
|
+
| **Runs where** | Your machine (TEI Docker) | Upstash cloud servers |
|
|
10
10
|
| **Privacy** | Docs never leave your machine | Queries sent to cloud API |
|
|
11
11
|
| **Rate limits** | None | API-dependent |
|
|
12
12
|
| **Offline** | Full search works offline | Requires internet |
|
|
13
|
+
| **GPU accelerated** | NVIDIA CUDA / Apple Metal | N/A |
|
|
13
14
|
| **Search quality** | 4-stage RAG (vector + BM25 + RRF + cross-encoder reranking) | Single-stage retrieval |
|
|
14
15
|
| **Doc sources** | Prefers llms.txt, falls back to official docs | Pre-indexed source repos |
|
|
15
16
|
| **Scope** | Your project's actual dependencies | Any library |
|
|
16
|
-
| **Setup** | `npm install` + `/fetch-docs` | Install plugin |
|
|
17
17
|
| **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
|
|
18
18
|
|
|
19
|
-
##
|
|
19
|
+
## Prerequisites
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
Detect monorepo ┌─── Vector search (LanceDB) ───┐
|
|
26
|
-
Scan all workspace pkgs │ nomic-embed-text-v1.5 │
|
|
27
|
-
Resolve catalog: versions │ │
|
|
28
|
-
│ │ ├─→ RRF Fusion
|
|
29
|
-
▼ │ │ (k=60)
|
|
30
|
-
For each runtime dep: ├── BM25 search (LanceDB FTS) ──┘
|
|
31
|
-
- Search for llms.txt │ keyword + stemming │
|
|
32
|
-
- Raw fetch (no truncation)│ ▼
|
|
33
|
-
- Chunk + embed + store │ Cross-encoder rerank
|
|
34
|
-
│ ms-marco-MiniLM-L-6-v2
|
|
35
|
-
│ │
|
|
36
|
-
└──────────────────────────────────┘
|
|
37
|
-
│
|
|
38
|
-
▼
|
|
39
|
-
Top-K results
|
|
40
|
-
```
|
|
21
|
+
- **Docker** — [Docker Desktop](https://www.docker.com/products/docker-desktop/) for TEI containers
|
|
22
|
+
- **Node.js 20+**
|
|
23
|
+
- **NVIDIA GPU** (optional) — auto-detected, uses architecture-optimized TEI images
|
|
24
|
+
- **Apple Silicon** (optional) — native Metal build via Rust/cargo (no Docker needed)
|
|
41
25
|
|
|
42
26
|
## Installation
|
|
43
27
|
|
|
28
|
+
### As a Claude Code MCP server (recommended)
|
|
29
|
+
|
|
30
|
+
Add this to your project's `.mcp.json` (or global `~/.claude/mcp.json`):
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"mcpServers": {
|
|
35
|
+
"local-docs": {
|
|
36
|
+
"command": "npx",
|
|
37
|
+
"args": ["-y", "claude-local-docs@latest"],
|
|
38
|
+
"env": {
|
|
39
|
+
"TEI_EMBED_URL": "http://localhost:39281",
|
|
40
|
+
"TEI_RERANK_URL": "http://localhost:39282"
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Then start the TEI containers (clone the repo or download `start-tei.sh` + `docker-compose.yml`):
|
|
48
|
+
|
|
44
49
|
```bash
|
|
45
|
-
|
|
46
|
-
|
|
50
|
+
./start-tei.sh
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
The plugin includes a SessionStart hook that auto-checks TEI health and starts containers if needed.
|
|
47
54
|
|
|
48
|
-
|
|
49
|
-
|
|
55
|
+
### Manual / development setup
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
git clone https://github.com/matteodante/claude-local-docs.git
|
|
59
|
+
cd claude-local-docs
|
|
50
60
|
npm install
|
|
51
61
|
npm run build
|
|
62
|
+
|
|
63
|
+
# Start TEI (auto-detects GPU)
|
|
64
|
+
./start-tei.sh
|
|
52
65
|
```
|
|
53
66
|
|
|
54
|
-
|
|
67
|
+
## How it works
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
/fetch-docs search_docs("how to use useState")
|
|
71
|
+
| |
|
|
72
|
+
v v
|
|
73
|
+
Detect monorepo +--- Vector search (LanceDB) ---+
|
|
74
|
+
Scan all workspace pkgs | nomic-embed-text-v1.5 |
|
|
75
|
+
Resolve catalog: versions | |
|
|
76
|
+
| | +-> RRF Fusion
|
|
77
|
+
v | | (k=60)
|
|
78
|
+
For each runtime dep: +-- BM25 search (LanceDB FTS) --+
|
|
79
|
+
- Search for llms.txt | keyword + stemming |
|
|
80
|
+
- Raw fetch (no truncation)| v
|
|
81
|
+
- Chunk + embed + store | Cross-encoder rerank
|
|
82
|
+
| ms-marco-MiniLM-L-6-v2
|
|
83
|
+
| (via TEI :39282)
|
|
84
|
+
+----------------------------------+
|
|
85
|
+
|
|
|
86
|
+
v
|
|
87
|
+
Top-K results
|
|
88
|
+
```
|
|
55
89
|
|
|
56
90
|
## Usage
|
|
57
91
|
|
|
@@ -61,7 +95,7 @@ Or install as a project-local plugin by cloning into your project and referencin
|
|
|
61
95
|
/fetch-docs
|
|
62
96
|
```
|
|
63
97
|
|
|
64
|
-
Claude analyzes your project (including monorepo workspaces), finds all runtime dependencies, searches the web for the best documentation for each one (preferring `llms-full.txt` > `llms.txt` > official docs), and indexes everything locally.
|
|
98
|
+
Claude analyzes your project (including monorepo workspaces), finds all runtime dependencies, searches the web for the best documentation for each one (preferring `llms-full.txt` > `llms.txt` > official docs), and indexes everything locally.
|
|
65
99
|
|
|
66
100
|
### 2. Search
|
|
67
101
|
|
|
@@ -79,39 +113,51 @@ Show me the API for zod's .refine()
|
|
|
79
113
|
- **`get_doc_section`** — Retrieve specific sections by heading or chunk ID
|
|
80
114
|
- **`analyze_dependencies`** — List all deps (monorepo-aware, catalog-resolved, runtime/dev tagged)
|
|
81
115
|
- **`fetch_and_store_doc`** — Fetch a URL and index it directly (no AI truncation)
|
|
116
|
+
- **`discover_and_fetch_docs`** — Auto-discover and index docs for a library (probes npm, llms.txt, GitHub, homepage)
|
|
82
117
|
|
|
83
|
-
##
|
|
84
|
-
|
|
85
|
-
This plugin implements a 4-stage advanced RAG pipeline, the current production standard:
|
|
118
|
+
## TEI backend
|
|
86
119
|
|
|
87
|
-
|
|
88
|
-
|---|---|---|
|
|
89
|
-
| **Vector search** | LanceDB + nomic-embed-text-v1.5 | Semantic similarity (understands meaning) |
|
|
90
|
-
| **BM25 search** | LanceDB native FTS (BM25, stemming, stop words) | Keyword matching (exact terms like `useEffect`) |
|
|
91
|
-
| **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
|
|
92
|
-
| **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 | Rescores top 30 candidates with deep relevance model |
|
|
120
|
+
ML inference runs in TEI (HuggingFace Text Embeddings Inference) containers:
|
|
93
121
|
|
|
94
|
-
|
|
122
|
+
| Container | Port | Model | Purpose |
|
|
123
|
+
|---|---|---|---|
|
|
124
|
+
| tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings (384-dim Matryoshka) |
|
|
125
|
+
| tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
|
|
95
126
|
|
|
96
|
-
|
|
97
|
-
- **Keyword-only** search misses semantic meaning ("state management" won't find "useState")
|
|
98
|
-
- **Hybrid + reranking** catches both, then a cross-encoder picks the truly relevant results
|
|
127
|
+
### Starting TEI
|
|
99
128
|
|
|
100
|
-
|
|
129
|
+
```bash
|
|
130
|
+
./start-tei.sh # Auto-detect GPU
|
|
131
|
+
./start-tei.sh --metal # Force Apple Metal (native, no Docker)
|
|
132
|
+
./start-tei.sh --cpu # Force CPU Docker
|
|
133
|
+
./start-tei.sh --stop # Stop all TEI
|
|
134
|
+
```
|
|
101
135
|
|
|
102
|
-
|
|
136
|
+
Auto-detection selects the optimal backend:
|
|
103
137
|
|
|
104
|
-
|
|
|
138
|
+
| Platform | Backend | Image tag |
|
|
105
139
|
|---|---|---|
|
|
106
|
-
|
|
|
107
|
-
|
|
|
140
|
+
| NVIDIA RTX 50x0 (Blackwell) | Docker CUDA | `120-1.9` |
|
|
141
|
+
| NVIDIA RTX 40x0 (Ada) | Docker CUDA | `89-1.9` |
|
|
142
|
+
| NVIDIA RTX 30x0 (Ampere) | Docker CUDA | `86-1.9` |
|
|
143
|
+
| Apple Silicon | Native Metal | `cargo install --features metal` |
|
|
144
|
+
| No GPU | Docker CPU | `cpu-1.9` |
|
|
108
145
|
|
|
109
|
-
|
|
146
|
+
GPU override for NVIDIA:
|
|
147
|
+
```bash
|
|
148
|
+
docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
|
|
149
|
+
```
|
|
110
150
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
-
|
|
114
|
-
|
|
151
|
+
## Search pipeline
|
|
152
|
+
|
|
153
|
+
4-stage RAG pipeline:
|
|
154
|
+
|
|
155
|
+
| Stage | Technology | Purpose |
|
|
156
|
+
|---|---|---|
|
|
157
|
+
| **Vector search** | LanceDB + nomic-embed-text-v1.5 via TEI | Semantic similarity (understands meaning) |
|
|
158
|
+
| **BM25 search** | LanceDB native FTS (BM25, stemming, stop words) | Keyword matching (exact terms like `useEffect`) |
|
|
159
|
+
| **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
|
|
160
|
+
| **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 via TEI | Rescores top 50 candidates with deep relevance model |
|
|
115
161
|
|
|
116
162
|
## Storage
|
|
117
163
|
|
|
@@ -132,36 +178,36 @@ your-project/.claude/docs/
|
|
|
132
178
|
| Tool | Description |
|
|
133
179
|
|---|---|
|
|
134
180
|
| `analyze_dependencies` | Monorepo-aware dep analysis: detects workspaces, resolves catalog versions, tags runtime/dev |
|
|
135
|
-
| `store_and_index_doc` | Receive markdown, chunk, embed, store in LanceDB |
|
|
136
|
-
| `
|
|
137
|
-
| `search_docs` | Full RAG pipeline: vector + BM25 + RRF + rerank |
|
|
181
|
+
| `store_and_index_doc` | Receive markdown, chunk, embed via TEI, store in LanceDB |
|
|
182
|
+
| `search_docs` | Full RAG pipeline: vector + BM25 + RRF + rerank via TEI |
|
|
138
183
|
| `list_docs` | List indexed libraries with metadata |
|
|
139
184
|
| `get_doc_section` | Get specific chunks by library + heading or chunk ID |
|
|
185
|
+
| `fetch_and_store_doc` | Fetch URL directly (raw HTTP, no truncation), then chunk + embed + store |
|
|
186
|
+
| `discover_and_fetch_docs` | Auto-discover docs: probes npm registry, llms.txt URLs, GitHub, homepage HTML. Detects and expands index files |
|
|
140
187
|
|
|
141
188
|
## Dependencies
|
|
142
189
|
|
|
143
|
-
All open source:
|
|
144
|
-
|
|
145
190
|
| Package | License | Purpose |
|
|
146
191
|
|---|---|---|
|
|
147
192
|
| `@lancedb/lancedb` | Apache 2.0 | Embedded vector database + native FTS |
|
|
148
|
-
| `@huggingface/transformers` | Apache 2.0 | Run ONNX models locally |
|
|
149
193
|
| `@modelcontextprotocol/sdk` | MIT | MCP server framework |
|
|
150
194
|
| `zod` | MIT | Schema validation |
|
|
195
|
+
| `turndown` | MIT | HTML to markdown conversion |
|
|
196
|
+
| `turndown-plugin-gfm` | MIT | GFM support for turndown (tables, strikethrough, etc.) |
|
|
151
197
|
|
|
152
|
-
|
|
198
|
+
TEI containers (Docker):
|
|
199
|
+
|
|
200
|
+
| Image | Model | Purpose |
|
|
201
|
+
|---|---|---|
|
|
202
|
+
| `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings |
|
|
203
|
+
| `text-embeddings-inference:*` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
|
|
153
204
|
|
|
154
205
|
## Development
|
|
155
206
|
|
|
156
207
|
```bash
|
|
157
208
|
npm run dev # Watch mode — rebuilds on file changes
|
|
158
209
|
npm run build # One-time build
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
### Testing with MCP Inspector
|
|
162
|
-
|
|
163
|
-
```bash
|
|
164
|
-
npx @modelcontextprotocol/inspector node dist/index.js
|
|
210
|
+
npm test # Integration test (requires TEI running)
|
|
165
211
|
```
|
|
166
212
|
|
|
167
213
|
## Project structure
|
|
@@ -169,25 +215,63 @@ npx @modelcontextprotocol/inspector node dist/index.js
|
|
|
169
215
|
```
|
|
170
216
|
claude-local-docs/
|
|
171
217
|
├── .claude-plugin/
|
|
172
|
-
│ ├── plugin.json
|
|
173
|
-
│ └── marketplace.json
|
|
174
|
-
├── .mcp.json
|
|
218
|
+
│ ├── plugin.json # Plugin manifest
|
|
219
|
+
│ └── marketplace.json # Marketplace listing
|
|
220
|
+
├── .mcp.json # MCP server config (stdio transport)
|
|
175
221
|
├── commands/
|
|
176
|
-
│ └── fetch-docs.md
|
|
222
|
+
│ └── fetch-docs.md # /fetch-docs — Claude as research agent
|
|
223
|
+
├── hooks/
|
|
224
|
+
│ └── hooks.json # SessionStart hook for TEI containers
|
|
225
|
+
├── scripts/
|
|
226
|
+
│ └── ensure-tei.sh # Idempotent TEI health check + start
|
|
227
|
+
├── docker-compose.yml # TEI containers (uses ${TEI_TAG})
|
|
228
|
+
├── docker-compose.nvidia.yml # NVIDIA GPU device passthrough
|
|
229
|
+
├── start-tei.sh # Auto-detect GPU, start TEI
|
|
177
230
|
├── src/
|
|
178
|
-
│ ├── index.ts
|
|
179
|
-
│ ├──
|
|
180
|
-
│ ├──
|
|
181
|
-
│ ├──
|
|
182
|
-
│ ├──
|
|
183
|
-
│ ├──
|
|
184
|
-
│ ├──
|
|
185
|
-
│
|
|
231
|
+
│ ├── index.ts # MCP server entry, 7 tool definitions
|
|
232
|
+
│ ├── discovery.ts # Doc discovery: npm registry, URL probing, index expansion, HTML→markdown
|
|
233
|
+
│ ├── indexer.ts # Chunking + TEI embeddings
|
|
234
|
+
│ ├── search.ts # 4-stage pipeline: vector + BM25 + RRF + rerank
|
|
235
|
+
│ ├── reranker.ts # TEI cross-encoder reranking
|
|
236
|
+
│ ├── store.ts # LanceDB storage + metadata persistence
|
|
237
|
+
│ ├── fetcher.ts # Raw HTTP fetch (no AI truncation)
|
|
238
|
+
│ ├── workspace.ts # Monorepo detection + pnpm catalog
|
|
239
|
+
│ ├── types.ts # Shared TypeScript interfaces
|
|
240
|
+
│ ├── turndown-plugin-gfm.d.ts # Type declarations for turndown-plugin-gfm
|
|
241
|
+
│ └── integration.test.ts # Integration tests (requires TEI running)
|
|
186
242
|
├── LICENSE
|
|
187
243
|
├── package.json
|
|
188
244
|
└── tsconfig.json
|
|
189
245
|
```
|
|
190
246
|
|
|
247
|
+
## Troubleshooting
|
|
248
|
+
|
|
249
|
+
### TEI containers not starting
|
|
250
|
+
```bash
|
|
251
|
+
# Check Docker is running
|
|
252
|
+
docker info
|
|
253
|
+
|
|
254
|
+
# Check container logs
|
|
255
|
+
docker compose logs tei-embed
|
|
256
|
+
docker compose logs tei-rerank
|
|
257
|
+
|
|
258
|
+
# Restart
|
|
259
|
+
./start-tei.sh --stop && ./start-tei.sh
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Port conflicts
|
|
263
|
+
If 39281/39282 are in use, override via env vars:
|
|
264
|
+
```bash
|
|
265
|
+
TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 node dist/index.js
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Apple Silicon — slow performance
|
|
269
|
+
The default Docker CPU image runs via Rosetta 2. Use native Metal instead:
|
|
270
|
+
```bash
|
|
271
|
+
./start-tei.sh --metal
|
|
272
|
+
```
|
|
273
|
+
Requires Rust (`curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh`). First build takes a few minutes.
|
|
274
|
+
|
|
191
275
|
## License
|
|
192
276
|
|
|
193
277
|
MIT
|
package/commands/fetch-docs.md
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: "Fetch and index documentation for all project dependencies"
|
|
3
|
-
allowed-tools: ["mcp__local-docs__analyze_dependencies", "mcp__local-docs__list_docs", "mcp__local-
|
|
3
|
+
allowed-tools: ["mcp__local-docs__analyze_dependencies", "mcp__local-docs__list_docs", "mcp__local-docs__discover_and_fetch_docs", "mcp__local-docs__fetch_and_store_doc", "WebSearch", "WebFetch"]
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# Fetch Documentation for Project Dependencies
|
|
7
7
|
|
|
8
|
-
You are a documentation
|
|
8
|
+
You are a documentation indexing agent. Your job is to discover and index the best available documentation for each runtime dependency in this project.
|
|
9
9
|
|
|
10
10
|
## Steps
|
|
11
11
|
|
|
@@ -31,32 +31,54 @@ This leaves only **runtime dependencies** that actually need documentation.
|
|
|
31
31
|
|
|
32
32
|
Call `list_docs` to see which libraries are already indexed. **Skip** any library that was fetched within the last 7 days unless the user explicitly asks to refresh.
|
|
33
33
|
|
|
34
|
-
### 4. Fetch Documentation
|
|
34
|
+
### 4. Fetch Documentation
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
For each remaining library, follow this multi-step strategy. The goal is to find the **best quality** source — `llms-full.txt` > `llms.txt` (expanded index) > homepage HTML > README.
|
|
37
37
|
|
|
38
|
-
####
|
|
39
|
-
1. **WebSearch** for `"{library name} llms-full.txt"`
|
|
40
|
-
2. If you find a direct URL to `llms-full.txt`:
|
|
41
|
-
- Call **`fetch_and_store_doc`** with the URL (this fetches raw content — no truncation)
|
|
42
|
-
- Report: `[1/N] library-name — chunks from llms-full.txt (size)`
|
|
38
|
+
#### Step A: Check Known URLs first
|
|
43
39
|
|
|
44
|
-
|
|
45
|
-
If no llms-full.txt found:
|
|
46
|
-
1. **WebSearch** for `"{library name} llms.txt"`
|
|
47
|
-
2. If you find a direct URL to `llms.txt`:
|
|
48
|
-
- Call **`fetch_and_store_doc`** with the URL
|
|
49
|
-
- Report: `[2/N] library-name — chunks from llms.txt (size)`
|
|
40
|
+
Before any searching, check if the library is in the **Known URLs Reference** below. If there's a known `llms-full.txt` or `llms.txt` URL, use it directly with `fetch_and_store_doc`. This is the fastest path.
|
|
50
41
|
|
|
51
|
-
####
|
|
52
|
-
If no llms.txt exists:
|
|
53
|
-
1. **WebSearch** for `"{library name} official documentation"`
|
|
54
|
-
2. **WebFetch** the main documentation page
|
|
55
|
-
3. Call **`store_and_index_doc`** with the fetched content
|
|
56
|
-
4. Report: `[3/N] library-name — chunks from official docs`
|
|
42
|
+
#### Step B: WebSearch for llms.txt
|
|
57
43
|
|
|
58
|
-
|
|
59
|
-
|
|
44
|
+
For libraries NOT in the known list, use **WebSearch** to find the actual `llms.txt` or `llms-full.txt` URL. Use queries like:
|
|
45
|
+
|
|
46
|
+
> `{library-name} llms-full.txt site:{homepage-domain}`
|
|
47
|
+
|
|
48
|
+
or more broadly:
|
|
49
|
+
|
|
50
|
+
> `{library-name} llms-full.txt OR llms.txt documentation`
|
|
51
|
+
|
|
52
|
+
If the search finds a concrete URL to an `llms.txt` or `llms-full.txt` file, pass it directly to **`fetch_and_store_doc`**. Prefer `llms-full.txt` over `llms.txt` when both exist.
|
|
53
|
+
|
|
54
|
+
**Batch the searches**: Run WebSearch for multiple libraries in parallel (up to 5 at a time) to collect URLs upfront. Then fetch them one by one.
|
|
55
|
+
|
|
56
|
+
#### Step C: `discover_and_fetch_docs` (automatic probing)
|
|
57
|
+
|
|
58
|
+
If neither known URLs nor WebSearch found an `llms.txt` URL, call **`discover_and_fetch_docs`**. This tool automatically:
|
|
59
|
+
1. Checks npm registry for `llms`/`llmsFull` fields in package.json (newest convention)
|
|
60
|
+
2. Probes homepage, `docs.{domain}`, `llms.{domain}`, `/docs/` subpath for llms-full.txt/llms.txt
|
|
61
|
+
3. Probes GitHub raw for llms-full.txt/llms.txt on main/master branches
|
|
62
|
+
4. Falls back to README.md from GitHub
|
|
63
|
+
5. Falls back to homepage HTML → markdown conversion
|
|
64
|
+
6. Detects index files and expands them by fetching linked pages
|
|
65
|
+
|
|
66
|
+
#### Step D: Training data fallback
|
|
67
|
+
|
|
68
|
+
If all above fail, try **`fetch_and_store_doc`** with documentation URLs you know from your training data (GitHub raw docs, official doc site pages, etc.).
|
|
69
|
+
|
|
70
|
+
#### Evaluating results
|
|
71
|
+
|
|
72
|
+
After each library is fetched, check the chunk count:
|
|
73
|
+
- **< 5 chunks**: Very thin. Use WebSearch to find additional doc pages (API reference, guides) and fetch with `fetch_and_store_doc` to supplement.
|
|
74
|
+
- **5-20 chunks**: Acceptable for small libraries.
|
|
75
|
+
- **20+ chunks**: Good coverage.
|
|
76
|
+
|
|
77
|
+
#### Progress reporting
|
|
78
|
+
|
|
79
|
+
After each library, report:
|
|
80
|
+
- `[1/N] library-name — X chunks from {source} (size)`
|
|
81
|
+
- `[2/N] library-name — FAILED: {error message}`
|
|
60
82
|
|
|
61
83
|
### 5. Final Summary
|
|
62
84
|
|
|
@@ -66,22 +88,113 @@ After processing all libraries, report:
|
|
|
66
88
|
Done! Indexed X/Y libraries.
|
|
67
89
|
|
|
68
90
|
react — 85 chunks (llms-full.txt, 340KB)
|
|
69
|
-
next — 120 chunks (llms
|
|
70
|
-
zod — 45 chunks (llms.txt, 95KB)
|
|
71
|
-
express — 30 chunks (
|
|
72
|
-
lodash —
|
|
91
|
+
next — 120 chunks (llms.txt-index, expanded 45 pages)
|
|
92
|
+
zod — 45 chunks (llms-full.txt, 95KB)
|
|
93
|
+
express — 30 chunks (homepage-html)
|
|
94
|
+
lodash — FAILED (no docs found)
|
|
73
95
|
|
|
74
96
|
Total: 280 chunks across 4 libraries.
|
|
75
97
|
Use search_docs to query your documentation.
|
|
76
98
|
```
|
|
77
99
|
|
|
100
|
+
## Known URLs Reference
|
|
101
|
+
|
|
102
|
+
Use these URLs directly with `fetch_and_store_doc` — no searching needed. Prefer `llms-full.txt` when available.
|
|
103
|
+
|
|
104
|
+
### Frameworks & Core
|
|
105
|
+
|
|
106
|
+
| Library | Best URL |
|
|
107
|
+
|---|---|
|
|
108
|
+
| react | `https://react.dev/llms.txt` |
|
|
109
|
+
| react-dom | (use react URL above) |
|
|
110
|
+
| next | `https://nextjs.org/docs/llms-full.txt` |
|
|
111
|
+
| nuxt | `https://nuxt.com/llms-full.txt` |
|
|
112
|
+
| svelte | `https://svelte.dev/llms-full.txt` |
|
|
113
|
+
| @sveltejs/kit | `https://svelte.dev/llms-full.txt` |
|
|
114
|
+
| vue | (no official llms.txt — use `discover_and_fetch_docs`) |
|
|
115
|
+
| react-native | `https://reactnative.dev/llms.txt` |
|
|
116
|
+
| expo | `https://docs.expo.dev/llms-full.txt` |
|
|
117
|
+
| hono | `https://hono.dev/llms.txt` |
|
|
118
|
+
| bun | `https://bun.sh/llms.txt` |
|
|
119
|
+
|
|
120
|
+
### Styling & UI
|
|
121
|
+
|
|
122
|
+
| Library | Best URL |
|
|
123
|
+
|---|---|
|
|
124
|
+
| tailwindcss | `https://tailwindcss.com/llms.txt` |
|
|
125
|
+
| @shadcn/ui / shadcn | `https://ui.shadcn.com/llms.txt` |
|
|
126
|
+
| @chakra-ui/react | `https://chakra-ui.com/llms-full.txt` |
|
|
127
|
+
| daisyui | `https://daisyui.com/llms.txt` |
|
|
128
|
+
| tamagui | `https://tamagui.dev/llms.txt` |
|
|
129
|
+
| @mantine/core | (check `https://mantine.dev/llms.txt`) |
|
|
130
|
+
| react-native-unistyles | `https://www.unistyl.es/llms.txt` |
|
|
131
|
+
|
|
132
|
+
### Data & State
|
|
133
|
+
|
|
134
|
+
| Library | Best URL |
|
|
135
|
+
|---|---|
|
|
136
|
+
| zod | `https://zod.dev/llms-full.txt` |
|
|
137
|
+
| @tanstack/react-query | `https://tanstack.com/query/llms-full.txt` |
|
|
138
|
+
| @tanstack/react-router | `https://tanstack.com/llms.txt` |
|
|
139
|
+
| drizzle-orm | `https://orm.drizzle.team/llms-full.txt` |
|
|
140
|
+
| @prisma/client | `https://prisma.io/docs/llms-full.txt` |
|
|
141
|
+
| convex | `https://docs.convex.dev/llms.txt` |
|
|
142
|
+
|
|
143
|
+
### Backend & APIs
|
|
144
|
+
|
|
145
|
+
| Library | Best URL |
|
|
146
|
+
|---|---|
|
|
147
|
+
| stripe | `https://docs.stripe.com/llms.txt` |
|
|
148
|
+
| @supabase/supabase-js | `https://supabase.com/llms.txt` |
|
|
149
|
+
| resend | `https://resend.com/docs/llms-full.txt` |
|
|
150
|
+
| @medusajs/medusa | `https://docs.medusajs.com/llms-full.txt` |
|
|
151
|
+
| better-auth | `https://www.better-auth.com/llms.txt` |
|
|
152
|
+
|
|
153
|
+
### AI & LLM
|
|
154
|
+
|
|
155
|
+
| Library | Best URL |
|
|
156
|
+
|---|---|
|
|
157
|
+
| ai (Vercel AI SDK) | `https://sdk.vercel.ai/llms.txt` |
|
|
158
|
+
| @anthropic-ai/sdk | `https://docs.anthropic.com/llms-full.txt` |
|
|
159
|
+
| langchain | `https://js.langchain.com/llms.txt` |
|
|
160
|
+
| @modelcontextprotocol/sdk | `https://modelcontextprotocol.io/llms-full.txt` |
|
|
161
|
+
| mastra | `https://mastra.ai/llms-full.txt` |
|
|
162
|
+
|
|
163
|
+
### Dev Tools & Infra
|
|
164
|
+
|
|
165
|
+
| Library | Best URL |
|
|
166
|
+
|---|---|
|
|
167
|
+
| turbo | `https://turbo.build/llms.txt` |
|
|
168
|
+
| @trigger.dev/sdk | `https://trigger.dev/docs/llms-full.txt` |
|
|
169
|
+
| @cloudflare/workers-types | `https://developers.cloudflare.com/llms-full.txt` |
|
|
170
|
+
| @upstash/redis | `https://upstash.com/docs/llms-full.txt` |
|
|
171
|
+
| @netlify/functions | `https://docs.netlify.com/llms.txt` |
|
|
172
|
+
| @liveblocks/client | `https://liveblocks.io/llms-full.txt` |
|
|
173
|
+
|
|
174
|
+
### Animation
|
|
175
|
+
|
|
176
|
+
| Library | Best URL |
|
|
177
|
+
|---|---|
|
|
178
|
+
| motion / framer-motion | Special: `https://llms.motion.dev/docs/react-quick-start.md` (or use WebSearch for full index) |
|
|
179
|
+
|
|
180
|
+
### Notes on special patterns
|
|
181
|
+
|
|
182
|
+
- **Stripe**: Any Stripe doc page becomes markdown by appending `.md` (e.g. `https://docs.stripe.com/payments.md`)
|
|
183
|
+
- **Motion (Framer Motion)**: Uses `llms.motion.dev` subdomain — `motion.dev/docs/{page}` becomes `llms.motion.dev/docs/{page}.md`
|
|
184
|
+
- **Mintlify-hosted docs**: Sites using Mintlify auto-generate `/llms.txt` and `/llms-full.txt` (Anthropic, Cursor, CrewAI, Pinecone, etc.)
|
|
185
|
+
- **GitBook-hosted docs**: Auto-generates `/llms.txt` since Jan 2025
|
|
186
|
+
- **Nuxt Content docs**: May have separate `https://content.nuxt.com/llms-full.txt`
|
|
187
|
+
- **package.json `llms`/`llmsFull` fields**: Some libraries (like Zod) include doc URLs directly in their npm package metadata — `discover_and_fetch_docs` checks this automatically
|
|
188
|
+
|
|
78
189
|
## Critical Rules
|
|
79
190
|
|
|
80
|
-
- **
|
|
81
|
-
- **
|
|
82
|
-
- **
|
|
83
|
-
- **Use `
|
|
84
|
-
- **
|
|
191
|
+
- **Check known URLs first** — the reference table above is faster and more reliable than searching.
|
|
192
|
+
- **Search second, probe third** — use WebSearch to find llms.txt URLs before falling back to blind URL probing via `discover_and_fetch_docs`.
|
|
193
|
+
- **Prefer `llms-full.txt` over `llms.txt`** — the full version has complete documentation without truncation.
|
|
194
|
+
- **Use `fetch_and_store_doc` when you have a known URL** — from the reference table, WebSearch results, or training data.
|
|
195
|
+
- **Use `discover_and_fetch_docs` when you have no URL** — it will probe common patterns automatically.
|
|
196
|
+
- **Supplement thin results** — if a library has < 5 chunks, search for additional doc pages and fetch them.
|
|
197
|
+
- **NEVER write files to the filesystem directly.** Do NOT use the Write tool, Bash tool, or any other method to save documentation content to disk. ALL storage goes through the MCP tools.
|
|
198
|
+
- **One library at a time for fetching** — clear progress, no batching (but WebSearch can be batched)
|
|
85
199
|
- **Skip dev deps by default** — runtime deps only
|
|
86
|
-
- For scoped packages like `@scope/package`, search for both the full name and just the package part
|
|
87
200
|
- Handle errors gracefully: if a library fails, log it and move to the next one
|