npm - claude-local-docs - Versions diffs - 1.0.13 → 1.0.15 - Mend

claude-local-docs 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/.mcp.json +2 -1
package/README.md +124 -58
package/commands/fetch-docs.md +54 -28
package/commands/index-codebase.md +53 -0
package/dist/code-indexer.d.ts +14 -0
package/dist/code-indexer.js +519 -0
package/dist/code-indexer.js.map +1 -0
package/dist/code-search.d.ts +14 -0
package/dist/code-search.js +155 -0
package/dist/code-search.js.map +1 -0
package/dist/code-store.d.ts +39 -0
package/dist/code-store.js +206 -0
package/dist/code-store.js.map +1 -0
package/dist/code.test.d.ts +7 -0
package/dist/code.test.js +197 -0
package/dist/code.test.js.map +1 -0
package/dist/discovery.js +56 -4
package/dist/discovery.js.map +1 -1
package/dist/docs.test.d.ts +7 -0
package/dist/docs.test.js +105 -0
package/dist/docs.test.js.map +1 -0
package/dist/file-walker.d.ts +34 -0
package/dist/file-walker.js +199 -0
package/dist/file-walker.js.map +1 -0
package/dist/index.js +321 -22
package/dist/index.js.map +1 -1
package/dist/indexer.js +4 -23
package/dist/indexer.js.map +1 -1
package/dist/integration.test.d.ts +3 -2
package/dist/integration.test.js +461 -11
package/dist/integration.test.js.map +1 -1
package/dist/reranker.d.ts +2 -2
package/dist/reranker.js +10 -12
package/dist/reranker.js.map +1 -1
package/dist/rrf.d.ts +17 -0
package/dist/rrf.js +25 -0
package/dist/rrf.js.map +1 -0
package/dist/search.d.ts +2 -0
package/dist/search.js +30 -52
package/dist/search.js.map +1 -1
package/dist/sfc-extractor.d.ts +14 -0
package/dist/sfc-extractor.js +70 -0
package/dist/sfc-extractor.js.map +1 -0
package/dist/store.d.ts +2 -0
package/dist/store.js +39 -24
package/dist/store.js.map +1 -1
package/dist/tei-client.d.ts +70 -0
package/dist/tei-client.js +153 -0
package/dist/tei-client.js.map +1 -0
package/dist/types.d.ts +49 -0
package/dist/types.js +4 -1
package/dist/types.js.map +1 -1
package/dist/unit.test.d.ts +8 -0
package/dist/unit.test.js +1241 -0
package/dist/unit.test.js.map +1 -0
package/docker-compose.nvidia.yml +7 -0
package/docker-compose.yml +9 -0
package/package.json +8 -2
package/scripts/ensure-tei.sh +93 -19
package/start-tei.sh +17 -3

package/.mcp.json CHANGED Viewed

@@ -5,7 +5,8 @@
       "args": ["-y", "claude-local-docs@latest"],
       "env": {
         "TEI_EMBED_URL": "http://localhost:39281",
-        "TEI_RERANK_URL": "http://localhost:39282"
+        "TEI_RERANK_URL": "http://localhost:39282",
+        "TEI_CODE_EMBED_URL": "http://localhost:39283"
       }
     }
   }

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # claude-local-docs
-A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally and provides production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection.
+A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation **and source code** locally with production-grade semantic search. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection. Supports JS/TS, Vue, Svelte, and Astro with AST-aware chunking, JSDoc extraction, and git-diff incremental indexing.
 ## Why not Context7?
@@ -13,8 +13,11 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
 | **GPU accelerated** | NVIDIA CUDA / Apple Metal | N/A |
 | **Search quality** | 4-stage RAG (vector + BM25 + RRF + cross-encoder reranking) | Single-stage retrieval |
 | **Doc sources** | Prefers llms.txt, falls back to official docs | Pre-indexed source repos |
-| **Scope** | Your project's actual dependencies | Any library |
+| **Code search** | Semantic AST-level search via Qodo-Embed-1-1.5B | N/A |
+| **Framework support** | JS, TS, Vue, Svelte, Astro (SFC script extraction) | N/A |
+| **Scope** | Your project's actual dependencies + source code | Any library |
 | **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
+| **Resilience** | BM25-only fallback when TEI is down, retry + timeout | N/A |
 ## Prerequisites
@@ -25,32 +28,17 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
 ## Installation
-### As a Claude Code MCP server (recommended)
-Add this to your project's `.mcp.json` (or global `~/.claude/mcp.json`):
-```json
-{
-  "mcpServers": {
-    "local-docs": {
-      "command": "npx",
-      "args": ["-y", "claude-local-docs@latest"],
-      "env": {
-        "TEI_EMBED_URL": "http://localhost:39281",
-        "TEI_RERANK_URL": "http://localhost:39282"
-      }
-    }
-  }
-}
-```
-Then start the TEI containers (clone the repo or download `start-tei.sh` + `docker-compose.yml`):
+### As a Claude Code plugin (recommended)
 ```bash
-./start-tei.sh
+# Add the marketplace
+/plugin marketplace add matteodante/claude-local-docs
+# Install the plugin
+/plugin install claude-local-docs
 ```
-The plugin includes a SessionStart hook that auto-checks TEI health and starts containers if needed.
+The plugin starts TEI containers automatically on session start via a SessionStart hook.
 ### Manual / development setup
@@ -66,6 +54,8 @@ npm run build
 ## How it works
+### Documentation search
 ```
 /fetch-docs                        search_docs("how to use useState")
      |                                       |
@@ -87,6 +77,33 @@ npm run build
                                           Top-K results
 ```
+### Codebase search
+```
+/index-codebase                    search_code("RRF fusion logic")
+     |                                       |
+     v                                       v
+ Walk project files            +--- Vector search (LanceDB) -------+
+ Respect .gitignore            |    Qodo-Embed-1-1.5B (1536-dim)   |
+ Git-diff incremental skip     |                                    |
+     |                         |                                    +-> RRF Fusion
+     v                         |                                    |    (k=60)
+ For each JS/TS/Vue/           +-- BM25 search (LanceDB FTS) ------+
+ Svelte/Astro file:            |    camelCase split + stemming      |
+   - Extract <script> (SFC)    |                                    |
+   - Parse AST (tree-sitter)   +-- File-path boost (optional) -----+
+   - Extract functions/classes |                                      v
+   - Extract JSDoc/decorators  |                            Cross-encoder rerank
+   - Contextual headers        |                            ms-marco-MiniLM-L-6-v2
+   - Embed with Qodo-Embed     |                              (via TEI :39282)
+   - Store in LanceDB          +--------------------------------------+
+                                                  |
+                                                  v
+                                      Function-level results
+                                   (file, lines, scope, score)
+                                   + neighbor chunk expansion
+```
 ## Usage
 ### 1. Index your project's docs
@@ -97,23 +114,38 @@ npm run build
 Claude analyzes your project (including monorepo workspaces), finds all runtime dependencies, searches the web for the best documentation for each one (preferring `llms-full.txt` > `llms.txt` > official docs), and indexes everything locally.
-### 2. Search
+### 2. Index your source code
+```
+/index-codebase
+```
+Parses all JS/TS/Vue/Svelte/Astro files with tree-sitter, extracts JSDoc comments and decorators, generates Qodo-Embed-1-1.5B embeddings for function/class/method-level chunks, and stores them in LanceDB. Incremental via git-diff (falls back to SHA-256 hashing for non-git projects). Only changed files are re-indexed.
-Ask Claude anything about your dependencies. It will automatically use `search_docs` to find relevant documentation chunks:
+### 3. Search
+Ask Claude anything. It will automatically use the right search tool:
 ```
+# Library documentation (search_docs)
 How do I set up middleware in Express?
 What are the options for useQuery in TanStack Query?
 Show me the API for zod's .refine()
+# Your codebase (search_code)
+Where is the authentication middleware?
+Find the database connection setup
+How does the search pipeline work?
 ```
-### 3. Other tools
+### 4. Other tools
 - **`list_docs`** — See what's indexed, when it was fetched, chunk counts
 - **`get_doc_section`** — Retrieve specific sections by heading or chunk ID
+- **`get_codebase_status`** — Check index status, language breakdown, changed files
 - **`analyze_dependencies`** — List all deps (monorepo-aware, catalog-resolved, runtime/dev tagged)
 - **`fetch_and_store_doc`** — Fetch a URL and index it directly (no AI truncation)
-- **`discover_and_fetch_docs`** — Auto-discover and index docs for a library (probes npm, llms.txt, GitHub, homepage)
+- **`discover_and_fetch_docs`** — Auto-discover and index docs for any npm package
 ## TEI backend
@@ -121,8 +153,11 @@ ML inference runs in TEI (HuggingFace Text Embeddings Inference) containers:
 | Container | Port | Model | Purpose |
 |---|---|---|---|
-| tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings (384-dim Matryoshka) |
-| tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
+| tei-embed | `:39281` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings (384-dim Matryoshka) |
+| tei-rerank | `:39282` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking (docs + code) |
+| tei-code-embed | `:39283` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim, 68.5 CoIR) |
+All TEI communication goes through a shared `TeiClient` class (`src/tei-client.ts`) with automatic retry (2 attempts, exponential backoff), 30s timeout, and batch splitting. If TEI is unavailable, search pipelines gracefully degrade to BM25-only results.
 ### Starting TEI
@@ -150,25 +185,39 @@ docker compose -f docker-compose.yml -f docker-compose.nvidia.yml up -d
 ## Search pipeline
-4-stage RAG pipeline:
+Both doc search and code search use the same 4-stage RAG pipeline:
 | Stage | Technology | Purpose |
 |---|---|---|
-| **Vector search** | LanceDB + nomic-embed-text-v1.5 via TEI | Semantic similarity (understands meaning) |
+| **Vector search** | LanceDB + nomic-embed / Qodo-Embed via TEI | Semantic similarity (understands meaning) |
 | **BM25 search** | LanceDB native FTS (BM25, stemming, stop words) | Keyword matching (exact terms like `useEffect`) |
 | **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
 | **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 via TEI | Rescores top 50 candidates with deep relevance model |
+### Code search specifics
+- **AST chunking**: tree-sitter parses JS/TS/Vue/Svelte/Astro into function/class/method/interface/namespace entities
+- **JSDoc + decorators**: Extracted from AST and prepended to chunk text for richer search context
+- **Metadata flags**: `exported`, `async`, `abstract` tracked per entity
+- **Qodo-Embed-1-1.5B**: 1.5B parameter model, 68.5 CoIR score, 32K context window, 1536-dim embeddings
+- **Contextual headers**: file path + scope chain + flags + decorators + JSDoc prepended for BM25
+- **File-path boost**: Queries containing file names (e.g., "rrf.ts") get a third RRF signal boosting matching files
+- **Neighbor expansion**: Adjacent chunks from the same file are merged for fuller context
+- **Incremental indexing**: Git-diff based (fast, ~50-100ms), falls back to SHA-256 hashing for non-git projects
+- **Graceful degradation**: BM25-only results when vector embedding or reranker is unavailable
+- **SFC support**: Vue `<script>`/`<script setup>`, Svelte `<script>`/`<script context="module">`, Astro `---` frontmatter + `<script>` tags
 ## Storage
 All data stays in your project directory:
 ```
 your-project/.claude/docs/
-├── lancedb/              # Vector database (LanceDB files)
-├── .metadata.json        # Fetch timestamps, source URLs per library
+├── lancedb/                  # Vector database (docs + code tables)
+├── .metadata.json            # Doc fetch timestamps, source URLs per library
+├── .code-metadata.json       # File hashes, language, chunk counts, last index
 └── raw/
-    ├── react.md          # Raw fetched documentation
+    ├── react.md              # Raw fetched documentation
     ├── next.md
     └── tanstack__query.md
 ```
@@ -177,13 +226,16 @@ your-project/.claude/docs/
 | Tool | Description |
 |---|---|
-| `analyze_dependencies` | Monorepo-aware dep analysis: detects workspaces, resolves catalog versions, tags runtime/dev |
-| `store_and_index_doc` | Receive markdown, chunk, embed via TEI, store in LanceDB |
-| `search_docs` | Full RAG pipeline: vector + BM25 + RRF + rerank via TEI |
-| `list_docs` | List indexed libraries with metadata |
-| `get_doc_section` | Get specific chunks by library + heading or chunk ID |
-| `fetch_and_store_doc` | Fetch URL directly (raw HTTP, no truncation), then chunk + embed + store |
-| `discover_and_fetch_docs` | Auto-discover docs: probes npm registry, llms.txt URLs, GitHub, homepage HTML. Detects and expands index files |
+| `analyze_dependencies` | Detect and list all npm dependencies (monorepo-aware, runtime/dev tagged) |
+| `store_and_index_doc` | Index documentation content you already have as a string |
+| `fetch_and_store_doc` | Fetch documentation from a URL and index it (raw HTTP, no truncation) |
+| `discover_and_fetch_docs` | Auto-discover and index docs for an npm package |
+| `search_docs` | Semantic search across indexed library documentation |
+| `list_docs` | List indexed libraries with version and fetch date |
+| `get_doc_section` | Retrieve specific doc sections by heading or chunk ID |
+| `index_codebase` | Index project source code for semantic search (incremental, .gitignore-aware) |
+| `search_code` | Semantic search across project source code (function/class-level) |
+| `get_codebase_status` | Check codebase index status, language breakdown, changed files |
 ## Dependencies
@@ -191,23 +243,27 @@ your-project/.claude/docs/
 |---|---|---|
 | `@lancedb/lancedb` | Apache 2.0 | Embedded vector database + native FTS |
 | `@modelcontextprotocol/sdk` | MIT | MCP server framework |
+| `web-tree-sitter` | MIT | WASM-based AST parsing for code chunking |
+| `tree-sitter-wasms` | MIT | Pre-built WASM grammars (JS/TS/Vue/Svelte) |
+| `ignore` | MIT | .gitignore pattern matching |
 | `zod` | MIT | Schema validation |
-| `turndown` | MIT | HTML to markdown conversion |
-| `turndown-plugin-gfm` | MIT | GFM support for turndown (tables, strikethrough, etc.) |
 TEI containers (Docker):
 | Image | Model | Purpose |
 |---|---|---|
-| `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Text embeddings |
+| `text-embeddings-inference:*` | `nomic-ai/nomic-embed-text-v1.5` | Doc embeddings |
 | `text-embeddings-inference:*` | `cross-encoder/ms-marco-MiniLM-L-6-v2` | Cross-encoder reranking |
+| `text-embeddings-inference:*` | `Qodo/Qodo-Embed-1-1.5B` | Code embeddings (1536-dim) |
 ## Development
 ```bash
-npm run dev    # Watch mode — rebuilds on file changes
-npm run build  # One-time build
-npm test       # Integration test (requires TEI running)
+npm run dev         # Watch mode — rebuilds on file changes
+npm run build       # One-time build
+npm run test:unit   # Unit tests (no TEI needed)
+npm run test:docs   # Doc search integration tests (requires TEI on :39281, :39282)
+npm run test:code   # Code search integration tests (requires TEI on :39281, :39282, :39283)
 ```
 ## Project structure
@@ -219,7 +275,8 @@ claude-local-docs/
 │   └── marketplace.json      # Marketplace listing
 ├── .mcp.json                 # MCP server config (stdio transport)
 ├── commands/
-│   └── fetch-docs.md         # /fetch-docs — Claude as research agent
+│   ├── fetch-docs.md         # /fetch-docs — Claude as research agent
+│   └── index-codebase.md     # /index-codebase — index source code
 ├── hooks/
 │   └── hooks.json            # SessionStart hook for TEI containers
 ├── scripts/
@@ -228,17 +285,25 @@ claude-local-docs/
 ├── docker-compose.nvidia.yml # NVIDIA GPU device passthrough
 ├── start-tei.sh              # Auto-detect GPU, start TEI
 ├── src/
-│   ├── index.ts              # MCP server entry, 7 tool definitions
-│   ├── discovery.ts          # Doc discovery: npm registry, URL probing, index expansion, HTML→markdown
-│   ├── indexer.ts            # Chunking + TEI embeddings
-│   ├── search.ts             # 4-stage pipeline: vector + BM25 + RRF + rerank
+│   ├── index.ts              # MCP server entry, 10 tool definitions
+│   ├── tei-client.ts         # Shared TEI HTTP client (retry, timeout, batching)
+│   ├── indexer.ts            # Doc chunking + nomic-embed-text embeddings
+│   ├── search.ts             # Doc search pipeline (vector + BM25 + RRF + rerank)
+│   ├── rrf.ts                # Shared Reciprocal Rank Fusion utility
 │   ├── reranker.ts           # TEI cross-encoder reranking
-│   ├── store.ts              # LanceDB storage + metadata persistence
+│   ├── store.ts              # LanceDB "docs" table + metadata
+│   ├── code-indexer.ts       # AST chunking (tree-sitter) + Qodo-Embed embeddings
+│   ├── code-search.ts        # Code search pipeline (4-stage + file-path boost + neighbors)
+│   ├── code-store.ts         # LanceDB "code" table + file hash tracking + schema migration
+│   ├── file-walker.ts        # Project file discovery + .gitignore + git-diff
+│   ├── sfc-extractor.ts      # Vue/Svelte/Astro <script> block extraction
 │   ├── fetcher.ts            # Raw HTTP fetch (no AI truncation)
 │   ├── workspace.ts          # Monorepo detection + pnpm catalog
+│   ├── discovery.ts          # npm registry + URL probing for docs
 │   ├── types.ts              # Shared TypeScript interfaces
-│   ├── turndown-plugin-gfm.d.ts  # Type declarations for turndown-plugin-gfm
-│   └── integration.test.ts   # Integration tests (requires TEI running)
+│   ├── unit.test.ts          # Unit tests (no TEI needed)
+│   ├── docs.test.ts          # Doc search integration tests
+│   └── code.test.ts          # Code search integration tests
 ├── LICENSE
 ├── package.json
 └── tsconfig.json
@@ -254,15 +319,16 @@ docker info
 # Check container logs
 docker compose logs tei-embed
 docker compose logs tei-rerank
+docker compose logs tei-code-embed
 # Restart
 ./start-tei.sh --stop && ./start-tei.sh
 ```
 ### Port conflicts
-If 39281/39282 are in use, override via env vars:
+If 39281/39282/39283 are in use, override via env vars:
 ```bash
-TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 node dist/index.js
+TEI_EMBED_URL=http://localhost:49281 TEI_RERANK_URL=http://localhost:49282 TEI_CODE_EMBED_URL=http://localhost:49283 node dist/index.js
 ```
 ### Apple Silicon — slow performance

package/commands/fetch-docs.md CHANGED Viewed

@@ -33,47 +33,48 @@ Call `list_docs` to see which libraries are already indexed. **Skip** any librar
 ### 4. Fetch Documentation
-For each remaining library, follow this multi-step strategy. The goal is to find the **best quality** source — `llms-full.txt` > `llms.txt` (expanded index) > homepage HTML > README.
+For each remaining library, follow this strategy. The goal is to find the **best quality** source — `llms-full.txt` > `llms.txt` (expanded index) > homepage HTML > README.
 #### Step A: Check Known URLs first
-Before any searching, check if the library is in the **Known URLs Reference** below. If there's a known `llms-full.txt` or `llms.txt` URL, use it directly with `fetch_and_store_doc`. This is the fastest path.
+Before any probing, check if the library is in the **Known URLs Reference** below. If there's a known `llms-full.txt` or `llms.txt` URL, use it directly with `fetch_and_store_doc`. This is the fastest path.
-#### Step B: WebSearch for llms.txt
+#### Step B: `discover_and_fetch_docs` (automatic probing)
-For libraries NOT in the known list, use **WebSearch** to find the actual `llms.txt` or `llms-full.txt` URL. Use queries like:
+For libraries NOT in the known list, call **`discover_and_fetch_docs`**. This tool automatically:
+1. Checks npm registry for `llms`/`llmsFull` fields in package.json (newest convention)
+2. Probes homepage (skipping GitHub homepages), `docs.{domain}`, `llms.{domain}`, `/docs/` subpath for llms-full.txt/llms.txt
+3. Validates redirect domains (rejects cross-domain redirects like GitHub → docs.github.com)
+4. Validates content quality (rejects 404 pages, too-short content)
+5. Probes GitHub raw for llms-full.txt/llms.txt on main/master branches
+6. Falls back to README.md from GitHub
+7. Falls back to homepage HTML → markdown conversion
+8. Detects index files and expands them by fetching linked pages
-> `{library-name} llms-full.txt site:{homepage-domain}`
+#### Step C: WebSearch fallback
-or more broadly:
+If `discover_and_fetch_docs` fails or returns very thin results (< 3 chunks), use **WebSearch** to find the actual `llms.txt` or `llms-full.txt` URL:
 > `{library-name} llms-full.txt OR llms.txt documentation`
-If the search finds a concrete URL to an `llms.txt` or `llms-full.txt` file, pass it directly to **`fetch_and_store_doc`**. Prefer `llms-full.txt` over `llms.txt` when both exist.
-**Batch the searches**: Run WebSearch for multiple libraries in parallel (up to 5 at a time) to collect URLs upfront. Then fetch them one by one.
-#### Step C: `discover_and_fetch_docs` (automatic probing)
-If neither known URLs nor WebSearch found an `llms.txt` URL, call **`discover_and_fetch_docs`**. This tool automatically:
-1. Checks npm registry for `llms`/`llmsFull` fields in package.json (newest convention)
-2. Probes homepage, `docs.{domain}`, `llms.{domain}`, `/docs/` subpath for llms-full.txt/llms.txt
-3. Probes GitHub raw for llms-full.txt/llms.txt on main/master branches
-4. Falls back to README.md from GitHub
-5. Falls back to homepage HTML → markdown conversion
-6. Detects index files and expands them by fetching linked pages
+If the search finds a concrete URL, pass it to **`fetch_and_store_doc`**. Prefer `llms-full.txt` over `llms.txt`.
 #### Step D: Training data fallback
 If all above fail, try **`fetch_and_store_doc`** with documentation URLs you know from your training data (GitHub raw docs, official doc site pages, etc.).
-#### Evaluating results
+#### Evaluating results & chunk quality
 After each library is fetched, check the chunk count:
-- **< 5 chunks**: Very thin. Use WebSearch to find additional doc pages (API reference, guides) and fetch with `fetch_and_store_doc` to supplement.
+- **< 3 chunks**: Very thin — flag as "very thin, may need supplementing". Try `fetch_and_store_doc` with additional doc pages from training data.
+- **3-5 chunks**: Thin. Acceptable for small/simple libraries, but note it in the summary.
 - **5-20 chunks**: Acceptable for small libraries.
 - **20+ chunks**: Good coverage.
+Also note the source type:
+- `readme` fallback means the library has no proper docs site — worth noting
+- `homepage-html` means HTML was converted — quality varies
 #### Progress reporting
 After each library, report:
@@ -93,6 +94,12 @@ Done! Indexed X/Y libraries.
   express      — 30 chunks (homepage-html)
   lodash       — FAILED (no docs found)
+Thin coverage (< 5 chunks):
+  some-lib     — 2 chunks (readme) ⚠️
+README fallback (no docs site found):
+  another-lib  — 8 chunks (readme)
 Total: 280 chunks across 4 libraries.
 Use search_docs to query your documentation.
 ```
@@ -112,10 +119,11 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
 | svelte | `https://svelte.dev/llms-full.txt` |
 | @sveltejs/kit | `https://svelte.dev/llms-full.txt` |
 | vue | (no official llms.txt — use `discover_and_fetch_docs`) |
-| react-native | `https://reactnative.dev/llms.txt` |
+| react-native | `https://reactnative.dev/llms-full.txt` |
 | expo | `https://docs.expo.dev/llms-full.txt` |
 | hono | `https://hono.dev/llms.txt` |
 | bun | `https://bun.sh/llms.txt` |
+| astro | `https://astro.build/llms.txt` |
 ### Styling & UI
@@ -139,6 +147,7 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
 | drizzle-orm | `https://orm.drizzle.team/llms-full.txt` |
 | @prisma/client | `https://prisma.io/docs/llms-full.txt` |
 | convex | `https://docs.convex.dev/llms.txt` |
+| zustand | `https://zustand.docs.pmnd.rs/llms-full.txt` |
 ### Backend & APIs
@@ -149,6 +158,7 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
 | resend | `https://resend.com/docs/llms-full.txt` |
 | @medusajs/medusa | `https://docs.medusajs.com/llms-full.txt` |
 | better-auth | `https://www.better-auth.com/llms.txt` |
+| bullmq | `https://docs.bullmq.io/llms-full.txt` |
 ### AI & LLM
@@ -171,11 +181,27 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
 | @netlify/functions | `https://docs.netlify.com/llms.txt` |
 | @liveblocks/client | `https://liveblocks.io/llms-full.txt` |
+### React Native Libraries
+| Library | Best URL |
+|---|---|
+| react-native-reanimated | `https://docs.swmansion.com/react-native-reanimated/llms.txt` |
+| react-native-gesture-handler | `https://docs.swmansion.com/react-native-gesture-handler/llms.txt` |
+| @react-navigation/native | `https://reactnavigation.org/llms.txt` |
+| react-native-keyboard-controller | `https://kirillzyusko.github.io/react-native-keyboard-controller/llms-full.txt` |
+### i18n
+| Library | Best URL |
+|---|---|
+| i18next | `https://www.i18next.com/llms-full.txt` |
+| react-i18next | `https://react.i18next.com/llms-full.txt` |
 ### Animation
 | Library | Best URL |
 |---|---|
-| motion / framer-motion | Special: `https://llms.motion.dev/docs/react-quick-start.md` (or use WebSearch for full index) |
+| motion / framer-motion | Special: `https://llms.motion.dev/docs/react-quick-start.md` (or use `discover_and_fetch_docs`) |
 ### Notes on special patterns
@@ -188,13 +214,13 @@ Use these URLs directly with `fetch_and_store_doc` — no searching needed. Pref
 ## Critical Rules
-- **Check known URLs first** — the reference table above is faster and more reliable than searching.
-- **Search second, probe third** — use WebSearch to find llms.txt URLs before falling back to blind URL probing via `discover_and_fetch_docs`.
+- **Check known URLs first** — the reference table above is faster and more reliable than probing.
+- **Use `discover_and_fetch_docs` for unknown libraries** — it now correctly handles GitHub homepages and validates redirects.
 - **Prefer `llms-full.txt` over `llms.txt`** — the full version has complete documentation without truncation.
-- **Use `fetch_and_store_doc` when you have a known URL** — from the reference table, WebSearch results, or training data.
+- **Use `fetch_and_store_doc` when you have a known URL** — from the reference table or training data.
 - **Use `discover_and_fetch_docs` when you have no URL** — it will probe common patterns automatically.
-- **Supplement thin results** — if a library has < 5 chunks, search for additional doc pages and fetch them.
+- **Flag thin results** — report libraries with < 3 chunks as "very thin" in the summary.
 - **NEVER write files to the filesystem directly.** Do NOT use the Write tool, Bash tool, or any other method to save documentation content to disk. ALL storage goes through the MCP tools.
-- **One library at a time for fetching** — clear progress, no batching (but WebSearch can be batched)
+- **One library at a time for fetching** — clear progress, no batching
 - **Skip dev deps by default** — runtime deps only
 - Handle errors gracefully: if a library fails, log it and move to the next one

package/commands/index-codebase.md ADDED Viewed

@@ -0,0 +1,53 @@
+---
+description: "Index the project's source code for semantic search"
+allowed-tools: ["mcp__local-docs__get_codebase_status", "mcp__local-docs__index_codebase"]
+---
+# Index Project Codebase
+You are a codebase indexing agent. Your job is to index the project's source code so it can be searched semantically with `search_code`.
+## Steps
+### 1. Check Current Status
+Call `get_codebase_status` to see:
+- Whether any code has been indexed before
+- How many files are currently indexed
+- Language breakdown (TypeScript vs JavaScript)
+- Files that have changed since last index
+### 2. Run Indexing
+Based on the status:
+- **First time**: Call `index_codebase` with no parameters. This will index all JS/TS files.
+- **Files changed**: Call `index_codebase` with no parameters. Incremental indexing will only process changed files.
+- **Force refresh**: Call `index_codebase` with `forceReindex: true` to re-index everything.
+- **Up to date**: If no files have changed, tell the user the index is current.
+### 3. Report Results
+After indexing completes, report:
+```
+Codebase indexed!
+  TypeScript: 45 files
+  JavaScript: 12 files
+  Total: 57 files, 320 chunks
+  Indexed: 15 files (changed)
+  Skipped: 42 files (unchanged)
+  Removed: 0 files (deleted)
+Use search_code to search your codebase semantically.
+```
+If there were errors, list them so the user can investigate.
+## Critical Rules
+- Always check status first — avoid unnecessary full re-indexing
+- Report per-language breakdown
+- Mention `search_code` is available after indexing

package/dist/code-indexer.d.ts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * AST-based code chunking via web-tree-sitter + code embedding.
+ * Parses JS/TS files into function/class/method-level chunks with contextual headers.
+ * Extracts JSDoc, decorators, and metadata flags (exported, async, abstract).
+ */
+import type { CodeRow } from "./types.js";
+/**
+ * Parse and chunk a code file into entities. Accepts an optional lineOffset
+ * for SFC files where script content starts at a non-zero line.
+ */
+export declare function chunkCodeFile(source: string, filePath: string, language: string, lineOffset?: number): Promise<Omit<CodeRow, "id" | "vector">[]>;
+export declare function embedCodeTexts(texts: string[], mode?: "document" | "query"): Promise<number[][]>;
+/** Parse and embed a code file, returning rows ready for LanceDB. */
+export declare function indexCodeFile(source: string, filePath: string, language: string): Promise<Omit<CodeRow, "id">[]>;