claude-local-docs 1.0.14 → 1.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.mcp.json +2 -1
  2. package/README.md +119 -31
  3. package/commands/index-codebase.md +53 -0
  4. package/dist/code-indexer.d.ts +14 -0
  5. package/dist/code-indexer.js +519 -0
  6. package/dist/code-indexer.js.map +1 -0
  7. package/dist/code-search.d.ts +14 -0
  8. package/dist/code-search.js +155 -0
  9. package/dist/code-search.js.map +1 -0
  10. package/dist/code-store.d.ts +39 -0
  11. package/dist/code-store.js +206 -0
  12. package/dist/code-store.js.map +1 -0
  13. package/dist/code.test.d.ts +7 -0
  14. package/dist/code.test.js +197 -0
  15. package/dist/code.test.js.map +1 -0
  16. package/dist/docs.test.d.ts +7 -0
  17. package/dist/docs.test.js +105 -0
  18. package/dist/docs.test.js.map +1 -0
  19. package/dist/file-walker.d.ts +34 -0
  20. package/dist/file-walker.js +199 -0
  21. package/dist/file-walker.js.map +1 -0
  22. package/dist/index.js +315 -21
  23. package/dist/index.js.map +1 -1
  24. package/dist/indexer.js +4 -23
  25. package/dist/indexer.js.map +1 -1
  26. package/dist/integration.test.d.ts +3 -2
  27. package/dist/integration.test.js +461 -11
  28. package/dist/integration.test.js.map +1 -1
  29. package/dist/reranker.d.ts +2 -2
  30. package/dist/reranker.js +10 -14
  31. package/dist/reranker.js.map +1 -1
  32. package/dist/rrf.d.ts +17 -0
  33. package/dist/rrf.js +25 -0
  34. package/dist/rrf.js.map +1 -0
  35. package/dist/search.d.ts +2 -0
  36. package/dist/search.js +23 -50
  37. package/dist/search.js.map +1 -1
  38. package/dist/sfc-extractor.d.ts +14 -0
  39. package/dist/sfc-extractor.js +70 -0
  40. package/dist/sfc-extractor.js.map +1 -0
  41. package/dist/store.d.ts +2 -0
  42. package/dist/store.js +16 -20
  43. package/dist/store.js.map +1 -1
  44. package/dist/tei-client.d.ts +70 -0
  45. package/dist/tei-client.js +153 -0
  46. package/dist/tei-client.js.map +1 -0
  47. package/dist/types.d.ts +49 -0
  48. package/dist/types.js +4 -1
  49. package/dist/types.js.map +1 -1
  50. package/dist/unit.test.d.ts +8 -0
  51. package/dist/unit.test.js +1243 -0
  52. package/dist/unit.test.js.map +1 -0
  53. package/docker-compose.nvidia.yml +7 -0
  54. package/docker-compose.yml +9 -0
  55. package/package.json +8 -2
  56. package/scripts/ensure-tei.sh +93 -19
  57. package/start-tei.sh +17 -3
package/.mcp.json CHANGED
@@ -5,7 +5,8 @@
5
5
  "args": ["-y", "claude-local-docs@latest"],
6
6
  "env": {
7
7
  "TEI_EMBED_URL": "http://localhost:39281",
8
- "TEI_RERANK_URL": "http://localhost:39282"
8
+ "TEI_RERANK_URL": "http://localhost:39282",
9
+ "TEI_CODE_EMBED_URL": "http://localhost:39283"
9
10
  }
10
11
  }
11
12
  }
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # claude-local-docs
2
2
 
3
- A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally and provides production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection.
3
+ A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation **and source code** locally with production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection. Supports JS/TS, Vue, Svelte, and Astro with AST-aware chunking, JSDoc extraction, and git-diff incremental indexing.
4
4
 
5
5
  ## Why not Context7?
6
6
 
@@ -13,8 +13,11 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
13
13
  | **GPU accelerated** | NVIDIA CUDA / Apple Metal | N/A |
14
14
  | **Search quality** | 4-stage RAG (vector + BM25 + RRF + cross-encoder reranking) | Single-stage retrieval |
15
15
  | **Doc sources** | Prefers llms.txt, falls back to official docs | Pre-indexed source repos |
16
- | **Scope** | Your project's actual dependencies | Any library |
16
+ | **Code search** | Semantic AST-level search via Qodo-Embed-1-1.5B | N/A |
17
+ | **Framework support** | JS, TS, Vue, Svelte, Astro (SFC script extraction) | N/A |
18
+ | **Scope** | Your project's actual dependencies + source code | Any library |
17
19
  | **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
20
+ | **Resilience** | BM25-only fallback when TEI is down, retry + timeout | N/A |
18
21
 
19
22
  ## Prerequisites
20
23
 
@@ -51,6 +54,8 @@ npm run build
51
54
 
52
55
  ## How it works
53
56
 
57
+ ### Documentation search
58
+
54
59
  ```
55
60
  /fetch-docs search_docs("how to use useState")
56
61
  | |
@@ -72,6 +77,33 @@ npm run build
72
77
  Top-K results
73
78
  ```
74
79
 
80
+ ### Codebase search
81
+
82
+ ```
83
+ /index-codebase search_code("RRF fusion logic")
84
+ | |
85
+ v v
86
+ Walk project files +--- Vector search (LanceDB) -------+
87
+ Respect .gitignore | Qodo-Embed-1-1.5B (1536-dim) |
88
+ Git-diff incremental skip | |
89
+ | | +-> RRF Fusion
90
+ v | | (k=60)
91
+ For each JS/TS/Vue/ +-- BM25 search (LanceDB FTS) ------+
92
+ Svelte/Astro file: | camelCase split + stemming |
93
+ - Extract <script> (SFC) | |
94
+ - Parse AST (tree-sitter) +-- File-path boost (optional) -----+
95
+ - Extract functions/classes | v
96
+ - Extract JSDoc/decorators | Cross-encoder rerank
97
+ - Contextual headers | ms-marco-MiniLM-L-6-v2
98
+ - Embed with Qodo-Embed | (via TEI :39282)
99
+ - Store in LanceDB +--------------------------------------+
100
+ |
101
+ v
102
+ Function-level results
103
+ (file, lines, scope, score)
104
+ + neighbor chunk expansion
105
+ ```
106
+
75
107
  ## Usage
76
108
 
77
109
  ### 1. Index your project's docs
@@ -82,22 +114,38 @@ npm run build
82
114
 
83
115
  Claude analyzes your project (including monorepo workspaces), finds all runtime dependencies, searches the web for the best documentation for each one (preferring `llms-full.txt` > `llms.txt` > official docs), and indexes everything locally.
84
116
 
85
- ### 2. Search
117
+ ### 2. Index your source code
86
118
 
87
- Ask Claude anything about your dependencies. It will automatically use `search_docs` to find relevant documentation chunks:
119
+ ```
120
+ /index-codebase
121
+ ```
122
+
123
+ Parses all JS/TS/Vue/Svelte/Astro files with tree-sitter, extracts JSDoc comments and decorators, generates Qodo-Embed-1-1.5B embeddings for function/class/method-level chunks, and stores them in LanceDB. Incremental via git-diff (falls back to SHA-256 hashing for non-git projects). Only changed files are re-indexed.
124
+
125
+ ### 3. Search
126
+
127
+ Ask Claude anything. It will automatically use the right search tool:
88
128
 
89
129
  ```
130
+ # Library documentation (search_docs)
90
131
  How do I set up middleware in Express?
91
132
  What are the options for useQuery in TanStack Query?
92
133
  Show me the API for zod's .refine()
134
+
135
+ # Your codebase (search_code)
136
+ Where is the authentication middleware?
137
+ Find the database connection setup
138
+ How does the search pipeline work?
93
139
  ```
94
140
 
95
- ### 3. Other tools
141
+ ### 4. Other tools
96
142
 
97
143
  - **`list_docs`** — See what's indexed, when it was fetched, chunk counts
98
144
  - **`get_doc_section`** — Retrieve specific sections by heading or chunk ID
145
+ - **`get_codebase_status`** — Check index status, language breakdown, changed files
99
146
  - **`analyze_dependencies`** — List all deps (monorepo-aware, catalog-resolved, runtime/dev tagged)
100
147
  - **`fetch_and_store_doc`** — Fetch a URL and index it directly (no AI truncation)
148
+ - **`discover_and_fetch_docs`** — Auto-discover and index docs for any npm package
101
149
 
102
150
  ## TEI backend
103
151
 
@@ -105,8 +153,11 @@ ML inference runs in TEI (HuggingFace Text Embeddings Inference) containers:
105
153
 
106
154
  | Container | Port | Model | Purpose |
107
155
  |---|---|---|---|
108
- | tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings (384-dim Matryoshka) |
109
- | tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
156
+ | tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings (384-dim Matryoshka) |
157
+ | tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking (docs + code) |
158
+ | tei-code-embed | `:39283` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim, 68.5 CoIR) |
159
+
160
+ All TEI communication goes through a shared `TeiClient` class (`src/tei-client.ts`) with automatic retry (2 attempts, exponential backoff), 30s timeout, and batch splitting. If TEI is unavailable, search pipelines gracefully degrade to BM25-only results.
110
161
 
111
162
  ### Starting TEI
112
163
 
@@ -134,14 +185,27 @@ docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
134
185
 
135
186
  ## Search pipeline
136
187
 
137
- 4-stage RAG pipeline:
188
+ Both doc search and code search use the same 4-stage RAG pipeline:
138
189
 
139
190
  | Stage | Technology | Purpose |
140
191
  |---|---|---|
141
- | **Vector search** | LanceDB + nomic-embed-text-v1.5 via TEI | Semantic similarity (understands meaning) |
192
+ | **Vector search** | LanceDB + nomic-embed / Qodo-Embed via TEI | Semantic similarity (understands meaning) |
142
193
  | **BM25 search** | LanceDB native FTS (BM25, stemming, stop words) | Keyword matching (exact terms like `useEffect`) |
143
194
  | **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
144
- | **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 via TEI | Rescores top 30 candidates with deep relevance model |
195
+ | **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 via TEI | Rescores top 50 candidates with deep relevance model |
196
+
197
+ ### Code search specifics
198
+
199
+ - **AST chunking**: tree-sitter parses JS/TS/Vue/Svelte/Astro into function/class/method/interface/namespace entities
200
+ - **JSDoc + decorators**: Extracted from AST and prepended to chunk text for richer search context
201
+ - **Metadata flags**: `exported`, `async`, `abstract` tracked per entity
202
+ - **Qodo-Embed-1-1.5B**: 1.5B parameter model, 68.5 CoIR score, 32K context window, 1536-dim embeddings
203
+ - **Contextual headers**: file path + scope chain + flags + decorators + JSDoc prepended for BM25
204
+ - **File-path boost**: Queries containing file names (e.g., "rrf.ts") get a third RRF signal boosting matching files
205
+ - **Neighbor expansion**: Adjacent chunks from the same file are merged for fuller context
206
+ - **Incremental indexing**: Git-diff based (fast, ~50-100ms), falls back to SHA-256 hashing for non-git projects
207
+ - **Graceful degradation**: BM25-only results when vector embedding or reranker is unavailable
208
+ - **SFC support**: Vue `<script>`/`<script setup>`, Svelte `<script>`/`<script context="module">`, Astro `---` frontmatter + `<script>` tags
145
209
 
146
210
  ## Storage
147
211
 
@@ -149,10 +213,11 @@ All data stays in your project directory:
149
213
 
150
214
  ```
151
215
  your-project/.claude/docs/
152
- ├── lancedb/ # Vector database (LanceDB files)
153
- ├── .metadata.json # Fetch timestamps, source URLs per library
216
+ ├── lancedb/ # Vector database (docs + code tables)
217
+ ├── .metadata.json # Doc fetch timestamps, source URLs per library
218
+ ├── .code-metadata.json # File hashes, language, chunk counts, last index
154
219
  └── raw/
155
- ├── react.md # Raw fetched documentation
220
+ ├── react.md # Raw fetched documentation
156
221
  ├── next.md
157
222
  └── tanstack__query.md
158
223
  ```
@@ -161,12 +226,16 @@ your-project/.claude/docs/
161
226
 
162
227
  | Tool | Description |
163
228
  |---|---|
164
- | `analyze_dependencies` | Monorepo-aware dep analysis: detects workspaces, resolves catalog versions, tags runtime/dev |
165
- | `store_and_index_doc` | Receive markdown, chunk, embed via TEI, store in LanceDB |
166
- | `fetch_and_store_doc` | Fetch URL directly (raw HTTP, no truncation), then chunk + embed + store |
167
- | `search_docs` | Full RAG pipeline: vector + BM25 + RRF + rerank via TEI |
168
- | `list_docs` | List indexed libraries with metadata |
169
- | `get_doc_section` | Get specific chunks by library + heading or chunk ID |
229
+ | `analyze_dependencies` | Detect and list all npm dependencies (monorepo-aware, runtime/dev tagged) |
230
+ | `store_and_index_doc` | Index documentation content you already have as a string |
231
+ | `fetch_and_store_doc` | Fetch documentation from a URL and index it (raw HTTP, no truncation) |
232
+ | `discover_and_fetch_docs` | Auto-discover and index docs for an npm package |
233
+ | `search_docs` | Semantic search across indexed library documentation |
234
+ | `list_docs` | List indexed libraries with version and fetch date |
235
+ | `get_doc_section` | Retrieve specific doc sections by heading or chunk ID |
236
+ | `index_codebase` | Index project source code for semantic search (incremental, .gitignore-aware) |
237
+ | `search_code` | Semantic search across project source code (function/class-level) |
238
+ | `get_codebase_status` | Check codebase index status, language breakdown, changed files |
170
239
 
171
240
  ## Dependencies
172
241
 
@@ -174,21 +243,27 @@ your-project/.claude/docs/
174
243
  |---|---|---|
175
244
  | `@lancedb/lancedb` | Apache 2.0 | Embedded vector database + native FTS |
176
245
  | `@modelcontextprotocol/sdk` | MIT | MCP server framework |
246
+ | `web-tree-sitter` | MIT | WASM-based AST parsing for code chunking |
247
+ | `tree-sitter-wasms` | MIT | Pre-built WASM grammars (JS/TS/Vue/Svelte) |
248
+ | `ignore` | MIT | .gitignore pattern matching |
177
249
  | `zod` | MIT | Schema validation |
178
250
 
179
251
  TEI containers (Docker):
180
252
 
181
253
  | Image | Model | Purpose |
182
254
  |---|---|---|
183
- | `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings |
255
+ | `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings |
184
256
  | `text-embeddings-inference:*` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
257
+ | `text-embeddings-inference:*` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim) |
185
258
 
186
259
  ## Development
187
260
 
188
261
  ```bash
189
- npm run dev # Watch mode — rebuilds on file changes
190
- npm run build # One-time build
191
- npm test # Integration test (requires TEI running)
262
+ npm run dev # Watch mode — rebuilds on file changes
263
+ npm run build # One-time build
264
+ npm run test:unit # Unit tests (no TEI needed)
265
+ npm run test:docs # Doc search integration tests (requires TEI on :39281, :39282)
266
+ npm run test:code # Code search integration tests (requires TEI on :39281, :39282, :39283)
192
267
  ```
193
268
 
194
269
  ## Project structure
@@ -200,7 +275,8 @@ claude-local-docs/
200
275
  │ └── marketplace.json # Marketplace listing
201
276
  ├── .mcp.json # MCP server config (stdio transport)
202
277
  ├── commands/
203
- └── fetch-docs.md # /fetch-docs — Claude as research agent
278
+ ├── fetch-docs.md # /fetch-docs — Claude as research agent
279
+ │ └── index-codebase.md # /index-codebase — index source code
204
280
  ├── hooks/
205
281
  │ └── hooks.json # SessionStart hook for TEI containers
206
282
  ├── scripts/
@@ -209,14 +285,25 @@ claude-local-docs/
209
285
  ├── docker-compose.nvidia.yml # NVIDIA GPU device passthrough
210
286
  ├── start-tei.sh # Auto-detect GPU, start TEI
211
287
  ├── src/
212
- │ ├── index.ts # MCP server entry, 6 tool definitions
213
- │ ├── indexer.ts # Chunking + TEI embeddings
214
- │ ├── search.ts # 4-stage pipeline: vector + BM25 + RRF + rerank
288
+ │ ├── index.ts # MCP server entry, 10 tool definitions
289
+ │ ├── tei-client.ts # Shared TEI HTTP client (retry, timeout, batching)
290
+ │ ├── indexer.ts # Doc chunking + nomic-embed-text embeddings
291
+ │ ├── search.ts # Doc search pipeline (vector + BM25 + RRF + rerank)
292
+ │ ├── rrf.ts # Shared Reciprocal Rank Fusion utility
215
293
  │ ├── reranker.ts # TEI cross-encoder reranking
216
- │ ├── store.ts # LanceDB storage + metadata persistence
294
+ │ ├── store.ts # LanceDB "docs" table + metadata
295
+ │ ├── code-indexer.ts # AST chunking (tree-sitter) + Qodo-Embed embeddings
296
+ │ ├── code-search.ts # Code search pipeline (4-stage + file-path boost + neighbors)
297
+ │ ├── code-store.ts # LanceDB "code" table + file hash tracking + schema migration
298
+ │ ├── file-walker.ts # Project file discovery + .gitignore + git-diff
299
+ │ ├── sfc-extractor.ts # Vue/Svelte/Astro <script> block extraction
217
300
  │ ├── fetcher.ts # Raw HTTP fetch (no AI truncation)
218
301
  │ ├── workspace.ts # Monorepo detection + pnpm catalog
219
- └── types.ts # Shared TypeScript interfaces
302
+ ├── discovery.ts # npm registry + URL probing for docs
303
+ │ ├── types.ts # Shared TypeScript interfaces
304
+ │ ├── unit.test.ts # Unit tests (no TEI needed)
305
+ │ ├── docs.test.ts # Doc search integration tests
306
+ │ └── code.test.ts # Code search integration tests
220
307
  ├── LICENSE
221
308
  ├── package.json
222
309
  └── tsconfig.json
@@ -232,15 +319,16 @@ docker info
232
319
  # Check container logs
233
320
  docker compose logs tei-embed
234
321
  docker compose logs tei-rerank
322
+ docker compose logs tei-code-embed
235
323
 
236
324
  # Restart
237
325
  ./start-tei.sh --stop && ./start-tei.sh
238
326
  ```
239
327
 
240
328
  ### Port conflicts
241
- If 39281/39282 are in use, override via env vars:
329
+ If 39281/39282/39283 are in use, override via env vars:
242
330
  ```bash
243
- TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 node dist/index.js
331
+ TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 TEI_CODE_EMBED_URL=http://localhost:49283 node dist/index.js
244
332
  ```
245
333
 
246
334
  ### Apple Silicon — slow performance
@@ -0,0 +1,53 @@
1
+ ---
2
+ description: "Index the project's source code for semantic search"
3
+ allowed-tools: ["mcp__local-docs__get_codebase_status", "mcp__local-docs__index_codebase"]
4
+ ---
5
+
6
+ # Index Project Codebase
7
+
8
+ You are a codebase indexing agent. Your job is to index the project's source code so it can be searched semantically with `search_code`.
9
+
10
+ ## Steps
11
+
12
+ ### 1. Check Current Status
13
+
14
+ Call `get_codebase_status` to see:
15
+ - Whether any code has been indexed before
16
+ - How many files are currently indexed
17
+ - Language breakdown (TypeScript vs JavaScript)
18
+ - Files that have changed since last index
19
+
20
+ ### 2. Run Indexing
21
+
22
+ Based on the status:
23
+
24
+ - **First time**: Call `index_codebase` with no parameters. This will index all JS/TS files.
25
+ - **Files changed**: Call `index_codebase` with no parameters. Incremental indexing will only process changed files.
26
+ - **Force refresh**: Call `index_codebase` with `forceReindex: true` to re-index everything.
27
+ - **Up to date**: If no files have changed, tell the user the index is current.
28
+
29
+ ### 3. Report Results
30
+
31
+ After indexing completes, report:
32
+
33
+ ```
34
+ Codebase indexed!
35
+
36
+ TypeScript: 45 files
37
+ JavaScript: 12 files
38
+ Total: 57 files, 320 chunks
39
+
40
+ Indexed: 15 files (changed)
41
+ Skipped: 42 files (unchanged)
42
+ Removed: 0 files (deleted)
43
+
44
+ Use search_code to search your codebase semantically.
45
+ ```
46
+
47
+ If there were errors, list them so the user can investigate.
48
+
49
+ ## Critical Rules
50
+
51
+ - Always check status first — avoid unnecessary full re-indexing
52
+ - Report per-language breakdown
53
+ - Mention `search_code` is available after indexing
@@ -0,0 +1,14 @@
1
+ /**
2
+ * AST-based code chunking via web-tree-sitter + code embedding.
3
+ * Parses JS/TS files into function/class/method-level chunks with contextual headers.
4
+ * Extracts JSDoc, decorators, and metadata flags (exported, async, abstract).
5
+ */
6
+ import type { CodeRow } from "./types.js";
7
+ /**
8
+ * Parse and chunk a code file into entities. Accepts an optional lineOffset
9
+ * for SFC files where script content starts at a non-zero line.
10
+ */
11
+ export declare function chunkCodeFile(source: string, filePath: string, language: string, lineOffset?: number): Promise<Omit<CodeRow, "id" | "vector">[]>;
12
+ export declare function embedCodeTexts(texts: string[], mode?: "document" | "query"): Promise<number[][]>;
13
+ /** Parse and embed a code file, returning rows ready for LanceDB. */
14
+ export declare function indexCodeFile(source: string, filePath: string, language: string): Promise<Omit<CodeRow, "id">[]>;