claude-local-docs 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +2 -1
- package/README.md +124 -58
- package/commands/fetch-docs.md +54 -28
- package/commands/index-codebase.md +53 -0
- package/dist/code-indexer.d.ts +14 -0
- package/dist/code-indexer.js +519 -0
- package/dist/code-indexer.js.map +1 -0
- package/dist/code-search.d.ts +14 -0
- package/dist/code-search.js +155 -0
- package/dist/code-search.js.map +1 -0
- package/dist/code-store.d.ts +39 -0
- package/dist/code-store.js +206 -0
- package/dist/code-store.js.map +1 -0
- package/dist/code.test.d.ts +7 -0
- package/dist/code.test.js +197 -0
- package/dist/code.test.js.map +1 -0
- package/dist/discovery.js +56 -4
- package/dist/discovery.js.map +1 -1
- package/dist/docs.test.d.ts +7 -0
- package/dist/docs.test.js +105 -0
- package/dist/docs.test.js.map +1 -0
- package/dist/file-walker.d.ts +34 -0
- package/dist/file-walker.js +199 -0
- package/dist/file-walker.js.map +1 -0
- package/dist/index.js +321 -22
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +4 -23
- package/dist/indexer.js.map +1 -1
- package/dist/integration.test.d.ts +3 -2
- package/dist/integration.test.js +461 -11
- package/dist/integration.test.js.map +1 -1
- package/dist/reranker.d.ts +2 -2
- package/dist/reranker.js +10 -12
- package/dist/reranker.js.map +1 -1
- package/dist/rrf.d.ts +17 -0
- package/dist/rrf.js +25 -0
- package/dist/rrf.js.map +1 -0
- package/dist/search.d.ts +2 -0
- package/dist/search.js +30 -52
- package/dist/search.js.map +1 -1
- package/dist/sfc-extractor.d.ts +14 -0
- package/dist/sfc-extractor.js +70 -0
- package/dist/sfc-extractor.js.map +1 -0
- package/dist/store.d.ts +2 -0
- package/dist/store.js +39 -24
- package/dist/store.js.map +1 -1
- package/dist/tei-client.d.ts +70 -0
- package/dist/tei-client.js +153 -0
- package/dist/tei-client.js.map +1 -0
- package/dist/types.d.ts +49 -0
- package/dist/types.js +4 -1
- package/dist/types.js.map +1 -1
- package/dist/unit.test.d.ts +8 -0
- package/dist/unit.test.js +1241 -0
- package/dist/unit.test.js.map +1 -0
- package/docker-compose.nvidia.yml +7 -0
- package/docker-compose.yml +9 -0
- package/package.json +8 -2
- package/scripts/ensure-tei.sh +93 -19
- package/start-tei.sh +17 -3
package/.mcp.json
CHANGED
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# claude-local-docs
|
|
2
2
|
|
|
3
|
-
A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally
|
|
3
|
+
A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation **and source code** locally with production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection. Supports JS/TS, Vue, Svelte, and Astro with AST-aware chunking, JSDoc extraction, and git-diff incremental indexing.
|
|
4
4
|
|
|
5
5
|
## Why not Context7?
|
|
6
6
|
|
|
@@ -13,8 +13,11 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
|
|
|
13
13
|
| **GPU accelerated** | NVIDIA CUDA / Apple Metal | N/A |
|
|
14
14
|
| **Search quality** | 4-stage RAG (vector + BM25 + RRF + cross-encoder reranking) | Single-stage retrieval |
|
|
15
15
|
| **Doc sources** | Prefers llms.txt, falls back to official docs | Pre-indexed source repos |
|
|
16
|
-
| **
|
|
16
|
+
| **Code search** | Semantic AST-level search via Qodo-Embed-1-1.5B | N/A |
|
|
17
|
+
| **Framework support** | JS, TS, Vue, Svelte, Astro (SFC script extraction) | N/A |
|
|
18
|
+
| **Scope** | Your project's actual dependencies + source code | Any library |
|
|
17
19
|
| **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
|
|
20
|
+
| **Resilience** | BM25-only fallback when TEI is down, retry + timeout | N/A |
|
|
18
21
|
|
|
19
22
|
## Prerequisites
|
|
20
23
|
|
|
@@ -25,32 +28,17 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
|
|
|
25
28
|
|
|
26
29
|
## Installation
|
|
27
30
|
|
|
28
|
-
### As a Claude Code
|
|
29
|
-
|
|
30
|
-
Add this to your project's `.mcp.json` (or global `~/.claude/mcp.json`):
|
|
31
|
-
|
|
32
|
-
```json
|
|
33
|
-
{
|
|
34
|
-
"mcpServers": {
|
|
35
|
-
"local-docs": {
|
|
36
|
-
"command": "npx",
|
|
37
|
-
"args": ["-y", "claude-local-docs@latest"],
|
|
38
|
-
"env": {
|
|
39
|
-
"TEI_EMBED_URL": "http://localhost:39281",
|
|
40
|
-
"TEI_RERANK_URL": "http://localhost:39282"
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
Then start the TEI containers (clone the repo or download `start-tei.sh` + `docker-compose.yml`):
|
|
31
|
+
### As a Claude Code plugin (recommended)
|
|
48
32
|
|
|
49
33
|
```bash
|
|
50
|
-
|
|
34
|
+
# Add the marketplace
|
|
35
|
+
/plugin marketplace add matteodante/claude-local-docs
|
|
36
|
+
|
|
37
|
+
# Install the plugin
|
|
38
|
+
/plugin install claude-local-docs
|
|
51
39
|
```
|
|
52
40
|
|
|
53
|
-
The plugin
|
|
41
|
+
The plugin starts TEI containers automatically on session start via a SessionStart hook.
|
|
54
42
|
|
|
55
43
|
### Manual / development setup
|
|
56
44
|
|
|
@@ -66,6 +54,8 @@ npm run build
|
|
|
66
54
|
|
|
67
55
|
## How it works
|
|
68
56
|
|
|
57
|
+
### Documentation search
|
|
58
|
+
|
|
69
59
|
```
|
|
70
60
|
/fetch-docs search_docs("how to use useState")
|
|
71
61
|
| |
|
|
@@ -87,6 +77,33 @@ npm run build
|
|
|
87
77
|
Top-K results
|
|
88
78
|
```
|
|
89
79
|
|
|
80
|
+
### Codebase search
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
/index-codebase search_code("RRF fusion logic")
|
|
84
|
+
| |
|
|
85
|
+
v v
|
|
86
|
+
Walk project files +--- Vector search (LanceDB) -------+
|
|
87
|
+
Respect .gitignore | Qodo-Embed-1-1.5B (1536-dim) |
|
|
88
|
+
Git-diff incremental skip | |
|
|
89
|
+
| | +-> RRF Fusion
|
|
90
|
+
v | | (k=60)
|
|
91
|
+
For each JS/TS/Vue/ +-- BM25 search (LanceDB FTS) ------+
|
|
92
|
+
Svelte/Astro file: | camelCase split + stemming |
|
|
93
|
+
- Extract <script> (SFC) | |
|
|
94
|
+
- Parse AST (tree-sitter) +-- File-path boost (optional) -----+
|
|
95
|
+
- Extract functions/classes | v
|
|
96
|
+
- Extract JSDoc/decorators | Cross-encoder rerank
|
|
97
|
+
- Contextual headers | ms-marco-MiniLM-L-6-v2
|
|
98
|
+
- Embed with Qodo-Embed | (via TEI :39282)
|
|
99
|
+
- Store in LanceDB +--------------------------------------+
|
|
100
|
+
|
|
|
101
|
+
v
|
|
102
|
+
Function-level results
|
|
103
|
+
(file, lines, scope, score)
|
|
104
|
+
+ neighbor chunk expansion
|
|
105
|
+
```
|
|
106
|
+
|
|
90
107
|
## Usage
|
|
91
108
|
|
|
92
109
|
### 1. Index your project's docs
|
|
@@ -97,23 +114,38 @@ npm run build
|
|
|
97
114
|
|
|
98
115
|
Claude analyzes your project (including monorepo workspaces), finds all runtime dependencies, searches the web for the best documentation for each one (preferring `llms-full.txt` > `llms.txt` > official docs), and indexes everything locally.
|
|
99
116
|
|
|
100
|
-
### 2.
|
|
117
|
+
### 2. Index your source code
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
/index-codebase
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Parses all JS/TS/Vue/Svelte/Astro files with tree-sitter, extracts JSDoc comments and decorators, generates Qodo-Embed-1-1.5B embeddings for function/class/method-level chunks, and stores them in LanceDB. Incremental via git-diff (falls back to SHA-256 hashing for non-git projects). Only changed files are re-indexed.
|
|
101
124
|
|
|
102
|
-
|
|
125
|
+
### 3. Search
|
|
126
|
+
|
|
127
|
+
Ask Claude anything. It will automatically use the right search tool:
|
|
103
128
|
|
|
104
129
|
```
|
|
130
|
+
# Library documentation (search_docs)
|
|
105
131
|
How do I set up middleware in Express?
|
|
106
132
|
What are the options for useQuery in TanStack Query?
|
|
107
133
|
Show me the API for zod's .refine()
|
|
134
|
+
|
|
135
|
+
# Your codebase (search_code)
|
|
136
|
+
Where is the authentication middleware?
|
|
137
|
+
Find the database connection setup
|
|
138
|
+
How does the search pipeline work?
|
|
108
139
|
```
|
|
109
140
|
|
|
110
|
-
###
|
|
141
|
+
### 4. Other tools
|
|
111
142
|
|
|
112
143
|
- **`list_docs`** — See what's indexed, when it was fetched, chunk counts
|
|
113
144
|
- **`get_doc_section`** — Retrieve specific sections by heading or chunk ID
|
|
145
|
+
- **`get_codebase_status`** — Check index status, language breakdown, changed files
|
|
114
146
|
- **`analyze_dependencies`** — List all deps (monorepo-aware, catalog-resolved, runtime/dev tagged)
|
|
115
147
|
- **`fetch_and_store_doc`** — Fetch a URL and index it directly (no AI truncation)
|
|
116
|
-
- **`discover_and_fetch_docs`** — Auto-discover and index docs for
|
|
148
|
+
- **`discover_and_fetch_docs`** — Auto-discover and index docs for any npm package
|
|
117
149
|
|
|
118
150
|
## TEI backend
|
|
119
151
|
|
|
@@ -121,8 +153,11 @@ ML inference runs in TEI (HuggingFace Text Embeddings Inference) containers:
|
|
|
121
153
|
|
|
122
154
|
| Container | Port | Model | Purpose |
|
|
123
155
|
|---|---|---|---|
|
|
124
|
-
| tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` |
|
|
125
|
-
| tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
|
|
156
|
+
| tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings (384-dim Matryoshka) |
|
|
157
|
+
| tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking (docs + code) |
|
|
158
|
+
| tei-code-embed | `:39283` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim, 68.5 CoIR) |
|
|
159
|
+
|
|
160
|
+
All TEI communication goes through a shared `TeiClient` class (`src/tei-client.ts`) with automatic retry (2 attempts, exponential backoff), 30s timeout, and batch splitting. If TEI is unavailable, search pipelines gracefully degrade to BM25-only results.
|
|
126
161
|
|
|
127
162
|
### Starting TEI
|
|
128
163
|
|
|
@@ -150,25 +185,39 @@ docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
|
|
|
150
185
|
|
|
151
186
|
## Search pipeline
|
|
152
187
|
|
|
153
|
-
4-stage RAG pipeline:
|
|
188
|
+
Both doc search and code search use the same 4-stage RAG pipeline:
|
|
154
189
|
|
|
155
190
|
| Stage | Technology | Purpose |
|
|
156
191
|
|---|---|---|
|
|
157
|
-
| **Vector search** | LanceDB + nomic-embed-
|
|
192
|
+
| **Vector search** | LanceDB + nomic-embed / Qodo-Embed via TEI | Semantic similarity (understands meaning) |
|
|
158
193
|
| **BM25 search** | LanceDB native FTS (BM25, stemming, stop words) | Keyword matching (exact terms like `useEffect`) |
|
|
159
194
|
| **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
|
|
160
195
|
| **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 via TEI | Rescores top 50 candidates with deep relevance model |
|
|
161
196
|
|
|
197
|
+
### Code search specifics
|
|
198
|
+
|
|
199
|
+
- **AST chunking**: tree-sitter parses JS/TS/Vue/Svelte/Astro into function/class/method/interface/namespace entities
|
|
200
|
+
- **JSDoc + decorators**: Extracted from AST and prepended to chunk text for richer search context
|
|
201
|
+
- **Metadata flags**: `exported`, `async`, `abstract` tracked per entity
|
|
202
|
+
- **Qodo-Embed-1-1.5B**: 1.5B parameter model, 68.5 CoIR score, 32K context window, 1536-dim embeddings
|
|
203
|
+
- **Contextual headers**: file path + scope chain + flags + decorators + JSDoc prepended for BM25
|
|
204
|
+
- **File-path boost**: Queries containing file names (e.g., "rrf.ts") get a third RRF signal boosting matching files
|
|
205
|
+
- **Neighbor expansion**: Adjacent chunks from the same file are merged for fuller context
|
|
206
|
+
- **Incremental indexing**: Git-diff based (fast, ~50-100ms), falls back to SHA-256 hashing for non-git projects
|
|
207
|
+
- **Graceful degradation**: BM25-only results when vector embedding or reranker is unavailable
|
|
208
|
+
- **SFC support**: Vue `<script>`/`<script setup>`, Svelte `<script>`/`<script context="module">`, Astro `---` frontmatter + `<script>` tags
|
|
209
|
+
|
|
162
210
|
## Storage
|
|
163
211
|
|
|
164
212
|
All data stays in your project directory:
|
|
165
213
|
|
|
166
214
|
```
|
|
167
215
|
your-project/.claude/docs/
|
|
168
|
-
├── lancedb/
|
|
169
|
-
├── .metadata.json
|
|
216
|
+
├── lancedb/ # Vector database (docs + code tables)
|
|
217
|
+
├── .metadata.json # Doc fetch timestamps, source URLs per library
|
|
218
|
+
├── .code-metadata.json # File hashes, language, chunk counts, last index
|
|
170
219
|
└── raw/
|
|
171
|
-
├── react.md
|
|
220
|
+
├── react.md # Raw fetched documentation
|
|
172
221
|
├── next.md
|
|
173
222
|
└── tanstack__query.md
|
|
174
223
|
```
|
|
@@ -177,13 +226,16 @@ your-project/.claude/docs/
|
|
|
177
226
|
|
|
178
227
|
| Tool | Description |
|
|
179
228
|
|---|---|
|
|
180
|
-
| `analyze_dependencies` |
|
|
181
|
-
| `store_and_index_doc` |
|
|
182
|
-
| `
|
|
183
|
-
| `
|
|
184
|
-
| `
|
|
185
|
-
| `
|
|
186
|
-
| `
|
|
229
|
+
| `analyze_dependencies` | Detect and list all npm dependencies (monorepo-aware, runtime/dev tagged) |
|
|
230
|
+
| `store_and_index_doc` | Index documentation content you already have as a string |
|
|
231
|
+
| `fetch_and_store_doc` | Fetch documentation from a URL and index it (raw HTTP, no truncation) |
|
|
232
|
+
| `discover_and_fetch_docs` | Auto-discover and index docs for an npm package |
|
|
233
|
+
| `search_docs` | Semantic search across indexed library documentation |
|
|
234
|
+
| `list_docs` | List indexed libraries with version and fetch date |
|
|
235
|
+
| `get_doc_section` | Retrieve specific doc sections by heading or chunk ID |
|
|
236
|
+
| `index_codebase` | Index project source code for semantic search (incremental, .gitignore-aware) |
|
|
237
|
+
| `search_code` | Semantic search across project source code (function/class-level) |
|
|
238
|
+
| `get_codebase_status` | Check codebase index status, language breakdown, changed files |
|
|
187
239
|
|
|
188
240
|
## Dependencies
|
|
189
241
|
|
|
@@ -191,23 +243,27 @@ your-project/.claude/docs/
|
|
|
191
243
|
|---|---|---|
|
|
192
244
|
| `@lancedb/lancedb` | Apache 2.0 | Embedded vector database + native FTS |
|
|
193
245
|
| `@modelcontextprotocol/sdk` | MIT | MCP server framework |
|
|
246
|
+
| `web-tree-sitter` | MIT | WASM-based AST parsing for code chunking |
|
|
247
|
+
| `tree-sitter-wasms` | MIT | Pre-built WASM grammars (JS/TS/Vue/Svelte) |
|
|
248
|
+
| `ignore` | MIT | .gitignore pattern matching |
|
|
194
249
|
| `zod` | MIT | Schema validation |
|
|
195
|
-
| `turndown` | MIT | HTML to markdown conversion |
|
|
196
|
-
| `turndown-plugin-gfm` | MIT | GFM support for turndown (tables, strikethrough, etc.) |
|
|
197
250
|
|
|
198
251
|
TEI containers (Docker):
|
|
199
252
|
|
|
200
253
|
| Image | Model | Purpose |
|
|
201
254
|
|---|---|---|
|
|
202
|
-
| `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` |
|
|
255
|
+
| `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings |
|
|
203
256
|
| `text-embeddings-inference:*` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
|
|
257
|
+
| `text-embeddings-inference:*` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim) |
|
|
204
258
|
|
|
205
259
|
## Development
|
|
206
260
|
|
|
207
261
|
```bash
|
|
208
|
-
npm run dev
|
|
209
|
-
npm run build
|
|
210
|
-
npm test
|
|
262
|
+
npm run dev # Watch mode — rebuilds on file changes
|
|
263
|
+
npm run build # One-time build
|
|
264
|
+
npm run test:unit # Unit tests (no TEI needed)
|
|
265
|
+
npm run test:docs # Doc search integration tests (requires TEI on :39281, :39282)
|
|
266
|
+
npm run test:code # Code search integration tests (requires TEI on :39281, :39282, :39283)
|
|
211
267
|
```
|
|
212
268
|
|
|
213
269
|
## Project structure
|
|
@@ -219,7 +275,8 @@ claude-local-docs/
|
|
|
219
275
|
│ └── marketplace.json # Marketplace listing
|
|
220
276
|
├── .mcp.json # MCP server config (stdio transport)
|
|
221
277
|
├── commands/
|
|
222
|
-
│
|
|
278
|
+
│ ├── fetch-docs.md # /fetch-docs — Claude as research agent
|
|
279
|
+
│ └── index-codebase.md # /index-codebase — index source code
|
|
223
280
|
├── hooks/
|
|
224
281
|
│ └── hooks.json # SessionStart hook for TEI containers
|
|
225
282
|
├── scripts/
|
|
@@ -228,17 +285,25 @@ claude-local-docs/
|
|
|
228
285
|
├── docker-compose.nvidia.yml # NVIDIA GPU device passthrough
|
|
229
286
|
├── start-tei.sh # Auto-detect GPU, start TEI
|
|
230
287
|
├── src/
|
|
231
|
-
│ ├── index.ts # MCP server entry,
|
|
232
|
-
│ ├──
|
|
233
|
-
│ ├── indexer.ts #
|
|
234
|
-
│ ├── search.ts #
|
|
288
|
+
│ ├── index.ts # MCP server entry, 10 tool definitions
|
|
289
|
+
│ ├── tei-client.ts # Shared TEI HTTP client (retry, timeout, batching)
|
|
290
|
+
│ ├── indexer.ts # Doc chunking + nomic-embed-text embeddings
|
|
291
|
+
│ ├── search.ts # Doc search pipeline (vector + BM25 + RRF + rerank)
|
|
292
|
+
│ ├── rrf.ts # Shared Reciprocal Rank Fusion utility
|
|
235
293
|
│ ├── reranker.ts # TEI cross-encoder reranking
|
|
236
|
-
│ ├── store.ts # LanceDB
|
|
294
|
+
│ ├── store.ts # LanceDB "docs" table + metadata
|
|
295
|
+
│ ├── code-indexer.ts # AST chunking (tree-sitter) + Qodo-Embed embeddings
|
|
296
|
+
│ ├── code-search.ts # Code search pipeline (4-stage + file-path boost + neighbors)
|
|
297
|
+
│ ├── code-store.ts # LanceDB "code" table + file hash tracking + schema migration
|
|
298
|
+
│ ├── file-walker.ts # Project file discovery + .gitignore + git-diff
|
|
299
|
+
│ ├── sfc-extractor.ts # Vue/Svelte/Astro <script> block extraction
|
|
237
300
|
│ ├── fetcher.ts # Raw HTTP fetch (no AI truncation)
|
|
238
301
|
│ ├── workspace.ts # Monorepo detection + pnpm catalog
|
|
302
|
+
│ ├── discovery.ts # npm registry + URL probing for docs
|
|
239
303
|
│ ├── types.ts # Shared TypeScript interfaces
|
|
240
|
-
│ ├──
|
|
241
|
-
│
|
|
304
|
+
│ ├── unit.test.ts # Unit tests (no TEI needed)
|
|
305
|
+
│ ├── docs.test.ts # Doc search integration tests
|
|
306
|
+
│ └── code.test.ts # Code search integration tests
|
|
242
307
|
├── LICENSE
|
|
243
308
|
├── package.json
|
|
244
309
|
└── tsconfig.json
|
|
@@ -254,15 +319,16 @@ docker info
|
|
|
254
319
|
# Check container logs
|
|
255
320
|
docker compose logs tei-embed
|
|
256
321
|
docker compose logs tei-rerank
|
|
322
|
+
docker compose logs tei-code-embed
|
|
257
323
|
|
|
258
324
|
# Restart
|
|
259
325
|
./start-tei.sh --stop && ./start-tei.sh
|
|
260
326
|
```
|
|
261
327
|
|
|
262
328
|
### Port conflicts
|
|
263
|
-
If 39281/39282 are in use, override via env vars:
|
|
329
|
+
If 39281/39282/39283 are in use, override via env vars:
|
|
264
330
|
```bash
|
|
265
|
-
TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 node dist/index.js
|
|
331
|
+
TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 TEI_CODE_EMBED_URL=http://localhost:49283 node dist/index.js
|
|
266
332
|
```
|
|
267
333
|
|
|
268
334
|
### Apple Silicon — slow performance
|
package/commands/fetch-docs.md
CHANGED
|
@@ -33,47 +33,48 @@ Call `list_docs` to see which libraries are already indexed. **Skip** any librar
|
|
|
33
33
|
|
|
34
34
|
### 4. Fetch Documentation
|
|
35
35
|
|
|
36
|
-
For each remaining library, follow this
|
|
36
|
+
For each remaining library, follow this strategy. The goal is to find the **best quality** source — `llms-full.txt` > `llms.txt` (expanded index) > homepage HTML > README.
|
|
37
37
|
|
|
38
38
|
#### Step A: Check Known URLs first
|
|
39
39
|
|
|
40
|
-
Before any
|
|
40
|
+
Before any probing, check if the library is in the **Known URLs Reference** below. If there's a known `llms-full.txt` or `llms.txt` URL, use it directly with `fetch_and_store_doc`. This is the fastest path.
|
|
41
41
|
|
|
42
|
-
#### Step B:
|
|
42
|
+
#### Step B: `discover_and_fetch_docs` (automatic probing)
|
|
43
43
|
|
|
44
|
-
For libraries NOT in the known list,
|
|
44
|
+
For libraries NOT in the known list, call **`discover_and_fetch_docs`**. This tool automatically:
|
|
45
|
+
1. Checks npm registry for `llms`/`llmsFull` fields in package.json (newest convention)
|
|
46
|
+
2. Probes homepage (skipping GitHub homepages), `docs.{domain}`, `llms.{domain}`, `/docs/` subpath for llms-full.txt/llms.txt
|
|
47
|
+
3. Validates redirect domains (rejects cross-domain redirects like GitHub → docs.github.com)
|
|
48
|
+
4. Validates content quality (rejects 404 pages, too-short content)
|
|
49
|
+
5. Probes GitHub raw for llms-full.txt/llms.txt on main/master branches
|
|
50
|
+
6. Falls back to README.md from GitHub
|
|
51
|
+
7. Falls back to homepage HTML → markdown conversion
|
|
52
|
+
8. Detects index files and expands them by fetching linked pages
|
|
45
53
|
|
|
46
|
-
|
|
54
|
+
#### Step C: WebSearch fallback
|
|
47
55
|
|
|
48
|
-
or
|
|
56
|
+
If `discover_and_fetch_docs` fails or returns very thin results (< 3 chunks), use **WebSearch** to find the actual `llms.txt` or `llms-full.txt` URL:
|
|
49
57
|
|
|
50
58
|
> `{library-name} llms-full.txt OR llms.txt documentation`
|
|
51
59
|
|
|
52
|
-
If the search finds a concrete URL
|
|
53
|
-
|
|
54
|
-
**Batch the searches**: Run WebSearch for multiple libraries in parallel (up to 5 at a time) to collect URLs upfront. Then fetch them one by one.
|
|
55
|
-
|
|
56
|
-
#### Step C: `discover_and_fetch_docs` (automatic probing)
|
|
57
|
-
|
|
58
|
-
If neither known URLs nor WebSearch found an `llms.txt` URL, call **`discover_and_fetch_docs`**. This tool automatically:
|
|
59
|
-
1. Checks npm registry for `llms`/`llmsFull` fields in package.json (newest convention)
|
|
60
|
-
2. Probes homepage, `docs.{domain}`, `llms.{domain}`, `/docs/` subpath for llms-full.txt/llms.txt
|
|
61
|
-
3. Probes GitHub raw for llms-full.txt/llms.txt on main/master branches
|
|
62
|
-
4. Falls back to README.md from GitHub
|
|
63
|
-
5. Falls back to homepage HTML → markdown conversion
|
|
64
|
-
6. Detects index files and expands them by fetching linked pages
|
|
60
|
+
If the search finds a concrete URL, pass it to **`fetch_and_store_doc`**. Prefer `llms-full.txt` over `llms.txt`.
|
|
65
61
|
|
|
66
62
|
#### Step D: Training data fallback
|
|
67
63
|
|
|
68
64
|
If all above fail, try **`fetch_and_store_doc`** with documentation URLs you know from your training data (GitHub raw docs, official doc site pages, etc.).
|
|
69
65
|
|
|
70
|
-
#### Evaluating results
|
|
66
|
+
#### Evaluating results & chunk quality
|
|
71
67
|
|
|
72
68
|
After each library is fetched, check the chunk count:
|
|
73
|
-
- **<
|
|
69
|
+
- **< 3 chunks**: Very thin — flag as "very thin, may need supplementing". Try `fetch_and_store_doc` with additional doc pages from training data.
|
|
70
|
+
- **3-5 chunks**: Thin. Acceptable for small/simple libraries, but note it in the summary.
|
|
74
71
|
- **5-20 chunks**: Acceptable for small libraries.
|
|
75
72
|
- **20+ chunks**: Good coverage.
|
|
76
73
|
|
|
74
|
+
Also note the source type:
|
|
75
|
+
- `readme` fallback means the library has no proper docs site — worth noting
|
|
76
|
+
- `homepage-html` means HTML was converted — quality varies
|
|
77
|
+
|
|
77
78
|
#### Progress reporting
|
|
78
79
|
|
|
79
80
|
After each library, report:
|
|
@@ -93,6 +94,12 @@ Done! Indexed X/Y libraries.
|
|
|
93
94
|
express — 30 chunks (homepage-html)
|
|
94
95
|
lodash — FAILED (no docs found)
|
|
95
96
|
|
|
97
|
+
Thin coverage (< 5 chunks):
|
|
98
|
+
some-lib — 2 chunks (readme) ⚠️
|
|
99
|
+
|
|
100
|
+
README fallback (no docs site found):
|
|
101
|
+
another-lib — 8 chunks (readme)
|
|
102
|
+
|
|
96
103
|
Total: 280 chunks across 4 libraries.
|
|
97
104
|
Use search_docs to query your documentation.
|
|
98
105
|
```
|
|
@@ -112,10 +119,11 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
|
|
|
112
119
|
| svelte | `https://svelte.dev/llms-full.txt` |
|
|
113
120
|
| @sveltejs/kit | `https://svelte.dev/llms-full.txt` |
|
|
114
121
|
| vue | (no official llms.txt — use `discover_and_fetch_docs`) |
|
|
115
|
-
| react-native | `https://reactnative.dev/llms.txt` |
|
|
122
|
+
| react-native | `https://reactnative.dev/llms-full.txt` |
|
|
116
123
|
| expo | `https://docs.expo.dev/llms-full.txt` |
|
|
117
124
|
| hono | `https://hono.dev/llms.txt` |
|
|
118
125
|
| bun | `https://bun.sh/llms.txt` |
|
|
126
|
+
| astro | `https://astro.build/llms.txt` |
|
|
119
127
|
|
|
120
128
|
### Styling & UI
|
|
121
129
|
|
|
@@ -139,6 +147,7 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
|
|
|
139
147
|
| drizzle-orm | `https://orm.drizzle.team/llms-full.txt` |
|
|
140
148
|
| @prisma/client | `https://prisma.io/docs/llms-full.txt` |
|
|
141
149
|
| convex | `https://docs.convex.dev/llms.txt` |
|
|
150
|
+
| zustand | `https://zustand.docs.pmnd.rs/llms-full.txt` |
|
|
142
151
|
|
|
143
152
|
### Backend & APIs
|
|
144
153
|
|
|
@@ -149,6 +158,7 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
|
|
|
149
158
|
| resend | `https://resend.com/docs/llms-full.txt` |
|
|
150
159
|
| @medusajs/medusa | `https://docs.medusajs.com/llms-full.txt` |
|
|
151
160
|
| better-auth | `https://www.better-auth.com/llms.txt` |
|
|
161
|
+
| bullmq | `https://docs.bullmq.io/llms-full.txt` |
|
|
152
162
|
|
|
153
163
|
### AI & LLM
|
|
154
164
|
|
|
@@ -171,11 +181,27 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
|
|
|
171
181
|
| @netlify/functions | `https://docs.netlify.com/llms.txt` |
|
|
172
182
|
| @liveblocks/client | `https://liveblocks.io/llms-full.txt` |
|
|
173
183
|
|
|
184
|
+
### React Native Libraries
|
|
185
|
+
|
|
186
|
+
| Library | Best URL |
|
|
187
|
+
|---|---|
|
|
188
|
+
| react-native-reanimated | `https://docs.swmansion.com/react-native-reanimated/llms.txt` |
|
|
189
|
+
| react-native-gesture-handler | `https://docs.swmansion.com/react-native-gesture-handler/llms.txt` |
|
|
190
|
+
| @react-navigation/native | `https://reactnavigation.org/llms.txt` |
|
|
191
|
+
| react-native-keyboard-controller | `https://kirillzyusko.github.io/react-native-keyboard-controller/llms-full.txt` |
|
|
192
|
+
|
|
193
|
+
### i18n
|
|
194
|
+
|
|
195
|
+
| Library | Best URL |
|
|
196
|
+
|---|---|
|
|
197
|
+
| i18next | `https://www.i18next.com/llms-full.txt` |
|
|
198
|
+
| react-i18next | `https://react.i18next.com/llms-full.txt` |
|
|
199
|
+
|
|
174
200
|
### Animation
|
|
175
201
|
|
|
176
202
|
| Library | Best URL |
|
|
177
203
|
|---|---|
|
|
178
|
-
| motion / framer-motion | Special: `https://llms.motion.dev/docs/react-quick-start.md` (or use
|
|
204
|
+
| motion / framer-motion | Special: `https://llms.motion.dev/docs/react-quick-start.md` (or use `discover_and_fetch_docs`) |
|
|
179
205
|
|
|
180
206
|
### Notes on special patterns
|
|
181
207
|
|
|
@@ -188,13 +214,13 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
|
|
|
188
214
|
|
|
189
215
|
## Critical Rules
|
|
190
216
|
|
|
191
|
-
- **Check known URLs first** — the reference table above is faster and more reliable than
|
|
192
|
-
- **
|
|
217
|
+
- **Check known URLs first** — the reference table above is faster and more reliable than probing.
|
|
218
|
+
- **Use `discover_and_fetch_docs` for unknown libraries** — it now correctly handles GitHub homepages and validates redirects.
|
|
193
219
|
- **Prefer `llms-full.txt` over `llms.txt`** — the full version has complete documentation without truncation.
|
|
194
|
-
- **Use `fetch_and_store_doc` when you have a known URL** — from the reference table
|
|
220
|
+
- **Use `fetch_and_store_doc` when you have a known URL** — from the reference table or training data.
|
|
195
221
|
- **Use `discover_and_fetch_docs` when you have no URL** — it will probe common patterns automatically.
|
|
196
|
-
- **
|
|
222
|
+
- **Flag thin results** — report libraries with < 3 chunks as "very thin" in the summary.
|
|
197
223
|
- **NEVER write files to the filesystem directly.** Do NOT use the Write tool, Bash tool, or any other method to save documentation content to disk. ALL storage goes through the MCP tools.
|
|
198
|
-
- **One library at a time for fetching** — clear progress, no batching
|
|
224
|
+
- **One library at a time for fetching** — clear progress, no batching
|
|
199
225
|
- **Skip dev deps by default** — runtime deps only
|
|
200
226
|
- Handle errors gracefully: if a library fails, log it and move to the next one
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: "Index the project's source code for semantic search"
|
|
3
|
+
allowed-tools: ["mcp__local-docs__get_codebase_status", "mcp__local-docs__index_codebase"]
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Index Project Codebase
|
|
7
|
+
|
|
8
|
+
You are a codebase indexing agent. Your job is to index the project's source code so it can be searched semantically with `search_code`.
|
|
9
|
+
|
|
10
|
+
## Steps
|
|
11
|
+
|
|
12
|
+
### 1. Check Current Status
|
|
13
|
+
|
|
14
|
+
Call `get_codebase_status` to see:
|
|
15
|
+
- Whether any code has been indexed before
|
|
16
|
+
- How many files are currently indexed
|
|
17
|
+
- Language breakdown (TypeScript vs JavaScript)
|
|
18
|
+
- Files that have changed since last index
|
|
19
|
+
|
|
20
|
+
### 2. Run Indexing
|
|
21
|
+
|
|
22
|
+
Based on the status:
|
|
23
|
+
|
|
24
|
+
- **First time**: Call `index_codebase` with no parameters. This will index all JS/TS files.
|
|
25
|
+
- **Files changed**: Call `index_codebase` with no parameters. Incremental indexing will only process changed files.
|
|
26
|
+
- **Force refresh**: Call `index_codebase` with `forceReindex: true` to re-index everything.
|
|
27
|
+
- **Up to date**: If no files have changed, tell the user the index is current.
|
|
28
|
+
|
|
29
|
+
### 3. Report Results
|
|
30
|
+
|
|
31
|
+
After indexing completes, report:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
Codebase indexed!
|
|
35
|
+
|
|
36
|
+
TypeScript: 45 files
|
|
37
|
+
JavaScript: 12 files
|
|
38
|
+
Total: 57 files, 320 chunks
|
|
39
|
+
|
|
40
|
+
Indexed: 15 files (changed)
|
|
41
|
+
Skipped: 42 files (unchanged)
|
|
42
|
+
Removed: 0 files (deleted)
|
|
43
|
+
|
|
44
|
+
Use search_code to search your codebase semantically.
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
If there were errors, list them so the user can investigate.
|
|
48
|
+
|
|
49
|
+
## Critical Rules
|
|
50
|
+
|
|
51
|
+
- Always check status first — avoid unnecessary full re-indexing
|
|
52
|
+
- Report per-language breakdown
|
|
53
|
+
- Mention `search_code` is available after indexing
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST-based code chunking via web-tree-sitter + code embedding.
|
|
3
|
+
* Parses JS/TS files into function/class/method-level chunks with contextual headers.
|
|
4
|
+
* Extracts JSDoc, decorators, and metadata flags (exported, async, abstract).
|
|
5
|
+
*/
|
|
6
|
+
import type { CodeRow } from "./types.js";
|
|
7
|
+
/**
|
|
8
|
+
* Parse and chunk a code file into entities. Accepts an optional lineOffset
|
|
9
|
+
* for SFC files where script content starts at a non-zero line.
|
|
10
|
+
*/
|
|
11
|
+
export declare function chunkCodeFile(source: string, filePath: string, language: string, lineOffset?: number): Promise<Omit<CodeRow, "id" | "vector">[]>;
|
|
12
|
+
export declare function embedCodeTexts(texts: string[], mode?: "document" | "query"): Promise<number[][]>;
|
|
13
|
+
/** Parse and embed a code file, returning rows ready for LanceDB. */
|
|
14
|
+
export declare function indexCodeFile(source: string, filePath: string, language: string): Promise<Omit<CodeRow, "id">[]>;
|