claude-local-docs 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ {
2
+ "$schema": "https://anthropic.com/claude-code/marketplace.schema.json",
3
+ "name": "claude-local-docs",
4
+ "description": "A local-first Context7 alternative. Indexes your JS/TS project dependency docs locally with a 4-stage RAG pipeline (vector + BM25 + RRF + cross-encoder reranking). No cloud APIs at query time — all models run on your machine.",
5
+ "owner": {
6
+ "name": "matthew"
7
+ },
8
+ "plugins": [
9
+ {
10
+ "name": "claude-local-docs",
11
+ "description": "A local-first, offline-capable alternative to Context7 for dependency documentation. Reads your package.json, fetches docs (preferring llms.txt), and indexes them with a production-grade 4-stage RAG pipeline: vector search + BM25 keywords + RRF fusion + cross-encoder reranking. All models (nomic-embed-text-v1.5, ms-marco-MiniLM) run locally via ONNX — no cloud API calls at query time, no rate limits, full privacy.",
12
+ "version": "1.0.0",
13
+ "author": {
14
+ "name": "matthew"
15
+ },
16
+ "source": "./",
17
+ "category": "development",
18
+ "license": "MIT",
19
+ "keywords": ["documentation", "search", "rag", "embeddings", "local-first", "semantic-search", "llms-txt", "context7-alternative", "offline", "vector-search", "bm25", "reranking", "dependency-docs"]
20
+ }
21
+ ]
22
+ }
@@ -0,0 +1,8 @@
1
+ {
2
+ "name": "claude-local-docs",
3
+ "description": "A local-first Context7 alternative. Indexes JS/TS dependency docs with a 4-stage RAG pipeline (vector + BM25 + reranking). All models run locally — no cloud APIs at query time.",
4
+ "version": "1.0.0",
5
+ "author": {
6
+ "name": "matthew"
7
+ }
8
+ }
package/.mcp.json ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "mcpServers": {
3
+ "local-docs": {
4
+ "command": "npx",
5
+ "args": ["-y", "claude-local-docs"],
6
+ "type": "stdio"
7
+ }
8
+ }
9
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 matthew
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,187 @@
1
+ # claude-local-docs
2
+
3
+ A local-first alternative to Context7 for Claude Code. Indexes your project's dependency documentation locally and provides production-grade semantic search — no cloud APIs at query time, no rate limits, full privacy.
4
+
5
+ ## Why not Context7?
6
+
7
+ | | **claude-local-docs** | **Context7** |
8
+ |---|---|---|
9
+ | **Runs where** | Your machine (ONNX models) | Upstash cloud servers |
10
+ | **Privacy** | Docs never leave your machine | Queries sent to cloud API |
11
+ | **Rate limits** | None | API-dependent |
12
+ | **Offline** | Full search works offline | Requires internet |
13
+ | **Search quality** | 4-stage RAG (vector + BM25 + RRF + cross-encoder reranking) | Single-stage retrieval |
14
+ | **Doc sources** | Prefers llms.txt, falls back to official docs | Pre-indexed source repos |
15
+ | **Scope** | Your project's actual dependencies | Any library |
16
+ | **Setup** | `npm install` + `/fetch-docs` | Install plugin |
17
+
18
+ ## How it works
19
+
20
+ ```
21
+ /fetch-docs search_docs("how to use useState")
22
+ │ │
23
+ ▼ ▼
24
+ Read package.json ┌─── Vector search (LanceDB) ───┐
25
+ │ │ nomic-embed-text-v1.5 │
26
+ ▼ │ │
27
+ For each dependency: │ ├─→ RRF Fusion
28
+ - Search web for llms.txt │ │ (k=60)
29
+ - Fetch best docs ├─── BM25 search (MiniSearch) ──┘
30
+ - Chunk + embed + store │ keyword + fuzzy match │
31
+ │ ▼
32
+ │ Cross-encoder rerank
33
+ │ ms-marco-MiniLM-L-6-v2
34
+ │ │
35
+ └──────────────────────────────────┘
36
+
37
+
38
+ Top-K results
39
+ ```
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ # Clone into your Claude Code plugins directory
45
+ git clone <repo-url> ~/.claude/plugins/claude-local-docs
46
+
47
+ # Install dependencies and build
48
+ cd ~/.claude/plugins/claude-local-docs
49
+ npm install
50
+ npm run build
51
+ ```
52
+
53
+ Or install as a project-local plugin by cloning into your project and referencing it in your Claude Code settings.
54
+
55
+ ## Usage
56
+
57
+ ### 1. Index your project's docs
58
+
59
+ ```
60
+ /fetch-docs
61
+ ```
62
+
63
+ Claude reads your `package.json`, searches the web for the best documentation for each dependency (preferring `llms.txt` / `llms-full.txt`), and indexes everything locally.
64
+
65
+ ### 2. Search
66
+
67
+ Ask Claude anything about your dependencies. It will automatically use `search_docs` to find relevant documentation chunks:
68
+
69
+ ```
70
+ How do I set up middleware in Express?
71
+ What are the options for useQuery in TanStack Query?
72
+ Show me the API for zod's .refine()
73
+ ```
74
+
75
+ ### 3. Other tools
76
+
77
+ - **`list_docs`** — See what's indexed, when it was fetched, chunk counts
78
+ - **`get_doc_section`** — Retrieve specific sections by heading or chunk ID
79
+ - **`analyze_dependencies`** — List all deps from package.json
80
+
81
+ ## Search pipeline
82
+
83
+ This plugin implements a 4-stage advanced RAG pipeline, the current production standard:
84
+
85
+ | Stage | Technology | Purpose |
86
+ |---|---|---|
87
+ | **Vector search** | LanceDB + nomic-embed-text-v1.5 | Semantic similarity (understands meaning) |
88
+ | **BM25 search** | MiniSearch (BM25+ algorithm) | Keyword matching (exact terms like `useEffect`) |
89
+ | **RRF fusion** | Reciprocal Rank Fusion (k=60) | Merges both ranked lists, handles different score scales |
90
+ | **Cross-encoder rerank** | ms-marco-MiniLM-L-6-v2 | Rescores top 30 candidates with deep relevance model |
91
+
92
+ ### Why this matters
93
+
94
+ - **Vector-only** search misses exact API names and error codes
95
+ - **Keyword-only** search misses semantic meaning ("state management" won't find "useState")
96
+ - **Hybrid + reranking** catches both, then a cross-encoder picks the truly relevant results
97
+
98
+ ## Models
99
+
100
+ All models run locally via ONNX. Downloaded once on first use, then cached.
101
+
102
+ | Model | Size | Purpose |
103
+ |---|---|---|
104
+ | `nomic-ai/nomic-embed-text-v1.5` | ~270MB | Text embeddings (86% top-5 accuracy, Matryoshka 384-dim) |
105
+ | `Xenova/ms-marco-MiniLM-L-6-v2` | ~23MB | Cross-encoder reranking |
106
+
107
+ ## Chunking strategy
108
+
109
+ - Split markdown by headings (`##`, `###`, `####`) preserving the heading path
110
+ - Target ~2000 characters per chunk
111
+ - 10% overlap between chunks to prevent losing context at boundaries
112
+ - Large sections split at paragraph boundaries
113
+
114
+ ## Storage
115
+
116
+ All data stays in your project directory:
117
+
118
+ ```
119
+ your-project/.claude/docs/
120
+ ├── lancedb/ # Vector database (LanceDB files)
121
+ ├── .metadata.json # Fetch timestamps, source URLs per library
122
+ └── raw/
123
+ ├── react.md # Raw fetched documentation
124
+ ├── next.md
125
+ └── tanstack__query.md
126
+ ```
127
+
128
+ ## MCP Tools
129
+
130
+ | Tool | Description |
131
+ |---|---|
132
+ | `analyze_dependencies` | Read package.json, return all deps with versions |
133
+ | `store_and_index_doc` | Receive markdown, chunk, embed, store in LanceDB |
134
+ | `search_docs` | Full RAG pipeline: vector + BM25 + RRF + rerank |
135
+ | `list_docs` | List indexed libraries with metadata |
136
+ | `get_doc_section` | Get specific chunks by library + heading or chunk ID |
137
+
138
+ ## Dependencies
139
+
140
+ All open source:
141
+
142
+ | Package | License | Purpose |
143
+ |---|---|---|
144
+ | `@lancedb/lancedb` | Apache 2.0 | Embedded vector database |
145
+ | `@huggingface/transformers` | Apache 2.0 | Run ONNX models locally |
146
+ | `minisearch` | MIT | BM25+ full-text search |
147
+ | `@modelcontextprotocol/sdk` | MIT | MCP server framework |
148
+ | `zod` | MIT | Schema validation |
149
+
150
+ ## Development
151
+
152
+ ```bash
153
+ npm run dev # Watch mode — rebuilds on file changes
154
+ npm run build # One-time build
155
+ ```
156
+
157
+ ### Testing with MCP Inspector
158
+
159
+ ```bash
160
+ npx @modelcontextprotocol/inspector node dist/index.js
161
+ ```
162
+
163
+ ## Project structure
164
+
165
+ ```
166
+ claude-local-docs/
167
+ ├── .claude-plugin/
168
+ │ ├── plugin.json # Plugin manifest
169
+ │ └── marketplace.json # Marketplace listing
170
+ ├── .mcp.json # MCP server config (stdio transport)
171
+ ├── commands/
172
+ │ └── fetch-docs.md # /fetch-docs command — Claude as research agent
173
+ ├── src/
174
+ │ ├── index.ts # MCP server entry, 5 tool definitions
175
+ │ ├── indexer.ts # Chunking + nomic-embed-text-v1.5 embeddings
176
+ │ ├── search.ts # 4-stage pipeline: vector + BM25 + RRF + rerank
177
+ │ ├── reranker.ts # Cross-encoder (ms-marco-MiniLM-L-6-v2)
178
+ │ ├── store.ts # LanceDB storage + metadata persistence
179
+ │ └── types.ts # Shared TypeScript interfaces
180
+ ├── LICENSE
181
+ ├── package.json
182
+ └── tsconfig.json
183
+ ```
184
+
185
+ ## License
186
+
187
+ MIT
@@ -0,0 +1,55 @@
1
+ ---
2
+ description: "Fetch and index documentation for all project dependencies"
3
+ allowed-tools: ["mcp__local-docs__analyze_dependencies", "mcp__local-docs__list_docs", "mcp__local-docs__store_and_index_doc", "WebFetch", "WebSearch"]
4
+ ---
5
+
6
+ # Fetch Documentation for Project Dependencies
7
+
8
+ You are a documentation research agent. Your job is to find and fetch the best available documentation for each dependency in this project, then store it locally for semantic search.
9
+
10
+ ## Steps
11
+
12
+ ### 1. Analyze Dependencies
13
+ Call the `analyze_dependencies` MCP tool to get the list of dependencies from package.json.
14
+
15
+ ### 2. Check Existing Cache
16
+ Call the `list_docs` MCP tool to see which libraries are already indexed. **Skip** any library that was fetched within the last 7 days unless the user explicitly asks to refresh.
17
+
18
+ ### 3. Fetch Documentation for Each Library
19
+
20
+ Process libraries in batches of 3-5. For each library that needs fetching:
21
+
22
+ #### a. Search for llms.txt (preferred)
23
+ 1. **Search the web** for `"{library name} llms.txt documentation"`
24
+ 2. If you find a URL to `llms-full.txt` or `llms.txt`, **fetch it** using WebFetch
25
+ 3. `llms-full.txt` is preferred over `llms.txt` (it has more detail)
26
+
27
+ #### b. Fallback: Official Documentation
28
+ If no llms.txt exists:
29
+ 1. **Search the web** for `"{library name} {version} official documentation"`
30
+ 2. Find the main documentation page
31
+ 3. **Fetch** the docs page content using WebFetch
32
+ 4. If the docs have multiple important pages (API reference, guides), fetch the most critical 2-3 pages and combine them
33
+
34
+ #### c. Store the Documentation
35
+ For each library, call the `store_and_index_doc` MCP tool with:
36
+ - `library`: the package name (e.g., "react", "@tanstack/query")
37
+ - `version`: the version from package.json
38
+ - `content`: the fetched markdown content
39
+ - `sourceUrl`: the URL where the content was fetched from
40
+
41
+ ### 4. Report Results
42
+
43
+ After processing all libraries, provide a summary:
44
+ - Which libraries were successfully indexed (with chunk counts)
45
+ - Which libraries failed and why
46
+ - Total chunks in the index
47
+ - Remind the user they can now use `search_docs` to query the documentation
48
+
49
+ ## Important Notes
50
+
51
+ - **Be thorough but efficient**: Don't fetch huge API references if a concise llms.txt is available
52
+ - **Prefer quality over quantity**: A good llms.txt is better than scraping dozens of doc pages
53
+ - **Handle errors gracefully**: If a library's docs can't be found, log it and move on
54
+ - **Respect rate limits**: Don't fire off too many web requests simultaneously
55
+ - For scoped packages like `@scope/package`, search for both the full name and just the package part
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { readFile } from "node:fs/promises";
5
+ import { join } from "node:path";
6
+ import { z } from "zod";
7
+ import { DocStore, resolveProjectRoot } from "./store.js";
8
+ import { indexDocument } from "./indexer.js";
9
+ import { searchDocs } from "./search.js";
10
+ const projectRoot = resolveProjectRoot();
11
+ const store = new DocStore(projectRoot);
12
+ const server = new McpServer({
13
+ name: "local-docs",
14
+ version: "1.0.0",
15
+ });
16
+ // --- Tool 1: analyze_dependencies ---
17
+ server.tool("analyze_dependencies", "Read package.json and return all dependencies with their versions", { packageJsonPath: z.string().optional().describe("Path to package.json. Defaults to the project root.") }, async ({ packageJsonPath }) => {
18
+ try {
19
+ const pkgPath = packageJsonPath ?? join(projectRoot, "package.json");
20
+ const raw = await readFile(pkgPath, "utf-8");
21
+ const pkg = JSON.parse(raw);
22
+ const deps = [];
23
+ for (const [name, version] of Object.entries(pkg.dependencies ?? {})) {
24
+ deps.push({ name, version: version });
25
+ }
26
+ for (const [name, version] of Object.entries(pkg.devDependencies ?? {})) {
27
+ deps.push({ name, version: version });
28
+ }
29
+ return {
30
+ content: [{ type: "text", text: JSON.stringify(deps, null, 2) }],
31
+ };
32
+ }
33
+ catch (err) {
34
+ return {
35
+ content: [{ type: "text", text: `Error reading package.json: ${err.message}` }],
36
+ isError: true,
37
+ };
38
+ }
39
+ });
40
+ // --- Tool 2: store_and_index_doc ---
41
+ server.tool("store_and_index_doc", "Store and index documentation for a library. Chunks markdown, generates embeddings via nomic-embed-text-v1.5, and persists to LanceDB in .claude/docs/", {
42
+ library: z.string().describe("Library name (e.g. 'react', '@tanstack/query')"),
43
+ version: z.string().describe("Library version"),
44
+ content: z.string().describe("Raw markdown documentation content"),
45
+ sourceUrl: z.string().describe("URL where the docs were fetched from"),
46
+ }, async ({ library, version, content, sourceUrl }) => {
47
+ try {
48
+ // Save raw doc
49
+ await store.saveRawDoc(library, content);
50
+ // Chunk and embed
51
+ const chunks = await indexDocument(content, library);
52
+ // Store in LanceDB
53
+ const result = await store.addLibrary(library, version, sourceUrl, chunks);
54
+ return {
55
+ content: [
56
+ {
57
+ type: "text",
58
+ text: JSON.stringify({
59
+ success: true,
60
+ library,
61
+ chunkCount: result.chunkCount,
62
+ totalIndexSize: result.indexSize,
63
+ storedAt: store.getDocsDir(),
64
+ }),
65
+ },
66
+ ],
67
+ };
68
+ }
69
+ catch (err) {
70
+ return {
71
+ content: [{ type: "text", text: `Error indexing ${library}: ${err.message}` }],
72
+ isError: true,
73
+ };
74
+ }
75
+ });
76
+ // --- Tool 3: search_docs ---
77
+ server.tool("search_docs", "Advanced RAG search: BM25 keyword search + vector similarity → RRF fusion → cross-encoder reranking. Uses nomic-embed-text-v1.5 for embeddings and ms-marco-MiniLM for reranking.", {
78
+ query: z.string().describe("Search query"),
79
+ library: z.string().optional().describe("Filter results to a specific library"),
80
+ topK: z.number().optional().describe("Number of results to return (default: 10)"),
81
+ }, async ({ query, library, topK }) => {
82
+ try {
83
+ if (await store.isEmpty()) {
84
+ return {
85
+ content: [
86
+ {
87
+ type: "text",
88
+ text: "No documentation indexed yet. Run /fetch-docs first.",
89
+ },
90
+ ],
91
+ };
92
+ }
93
+ const results = await searchDocs(query, store, { library, topK });
94
+ const formatted = results.map((r) => ({
95
+ score: r.score,
96
+ library: r.library,
97
+ heading: r.headingPath.join(" > "),
98
+ chunkId: r.chunkId,
99
+ content: r.content.length > 500
100
+ ? r.content.slice(0, 500) + "..."
101
+ : r.content,
102
+ }));
103
+ return {
104
+ content: [{ type: "text", text: JSON.stringify(formatted, null, 2) }],
105
+ };
106
+ }
107
+ catch (err) {
108
+ return {
109
+ content: [{ type: "text", text: `Search error: ${err.message}` }],
110
+ isError: true,
111
+ };
112
+ }
113
+ });
114
+ // --- Tool 4: list_docs ---
115
+ server.tool("list_docs", "List all indexed documentation libraries with metadata", {}, async () => {
116
+ try {
117
+ const metadata = await store.loadMetadata();
118
+ if (metadata.libraries.length === 0) {
119
+ return {
120
+ content: [
121
+ {
122
+ type: "text",
123
+ text: "No documentation indexed yet. Run /fetch-docs first.",
124
+ },
125
+ ],
126
+ };
127
+ }
128
+ return {
129
+ content: [
130
+ { type: "text", text: JSON.stringify(metadata.libraries, null, 2) },
131
+ ],
132
+ };
133
+ }
134
+ catch (err) {
135
+ return {
136
+ content: [{ type: "text", text: `Error listing docs: ${err.message}` }],
137
+ isError: true,
138
+ };
139
+ }
140
+ });
141
+ // --- Tool 5: get_doc_section ---
142
+ server.tool("get_doc_section", "Retrieve specific documentation chunks by library + heading or chunk ID", {
143
+ library: z.string().describe("Library name"),
144
+ heading: z.string().optional().describe("Heading text to search for in the heading path"),
145
+ chunkId: z.number().optional().describe("Specific chunk ID to retrieve"),
146
+ }, async ({ library, heading, chunkId }) => {
147
+ try {
148
+ if (chunkId !== undefined) {
149
+ const chunk = await store.getChunkById(chunkId);
150
+ if (!chunk) {
151
+ return {
152
+ content: [{ type: "text", text: `Chunk ${chunkId} not found` }],
153
+ };
154
+ }
155
+ return {
156
+ content: [
157
+ {
158
+ type: "text",
159
+ text: JSON.stringify({
160
+ id: chunk.id,
161
+ library: chunk.library,
162
+ headingPath: chunk.headingPath,
163
+ content: chunk.text,
164
+ }, null, 2),
165
+ },
166
+ ],
167
+ };
168
+ }
169
+ if (heading) {
170
+ const chunks = await store.getChunksByHeading(library, heading);
171
+ if (chunks.length === 0) {
172
+ return {
173
+ content: [
174
+ {
175
+ type: "text",
176
+ text: `No chunks found for library "${library}" with heading matching "${heading}"`,
177
+ },
178
+ ],
179
+ };
180
+ }
181
+ const formatted = chunks.map((c) => ({
182
+ id: c.id,
183
+ headingPath: c.headingPath,
184
+ content: c.text,
185
+ }));
186
+ return {
187
+ content: [{ type: "text", text: JSON.stringify(formatted, null, 2) }],
188
+ };
189
+ }
190
+ // No heading or chunkId — return summaries for the library
191
+ const chunks = await store.getChunks(library);
192
+ if (chunks.length === 0) {
193
+ return {
194
+ content: [
195
+ {
196
+ type: "text",
197
+ text: `No documentation found for library "${library}"`,
198
+ },
199
+ ],
200
+ };
201
+ }
202
+ const summary = chunks.map((c) => ({
203
+ id: c.id,
204
+ heading: typeof c.headingPath === "string"
205
+ ? JSON.parse(c.headingPath).join(" > ")
206
+ : c.headingPath,
207
+ preview: c.text.slice(0, 120) + (c.text.length > 120 ? "..." : ""),
208
+ }));
209
+ return {
210
+ content: [{ type: "text", text: JSON.stringify(summary, null, 2) }],
211
+ };
212
+ }
213
+ catch (err) {
214
+ return {
215
+ content: [{ type: "text", text: `Error: ${err.message}` }],
216
+ isError: true,
217
+ };
218
+ }
219
+ });
220
+ // --- Start server ---
221
+ async function main() {
222
+ const transport = new StdioServerTransport();
223
+ await server.connect(transport);
224
+ }
225
+ main().catch((err) => {
226
+ console.error("Fatal error:", err);
227
+ process.exit(1);
228
+ });
229
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAC1D,OAAO,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,WAAW,GAAG,kBAAkB,EAAE,CAAC;AACzC,MAAM,KAAK,GAAG,IAAI,QAAQ,CAAC,WAAW,CAAC,CAAC;AAExC,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IAC3B,IAAI,EAAE,YAAY;IAClB,OAAO,EAAE,OAAO;CACjB,CAAC,CAAC;AAEH,uCAAuC;AACvC,MAAM,CAAC,IAAI,CACT,sBAAsB,EACtB,mEAAmE,EACnE,EAAE,eAAe,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC,EAAE,EAC1G,KAAK,EAAE,EAAE,eAAe,EAAE,EAAE,EAAE;IAC5B,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,eAAe,IAAI,IAAI,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;QACrE,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAE5B,MAAM,IAAI,GAAiB,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,EAAE,CAAC,EAAE,CAAC;YACrE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAiB,EAAE,CAAC,CAAC;QAClD,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,EAAE,CAAC,EAAE,CAAC;YACxE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,OAAiB,EAAE,CAAC,CAAC;QAClD,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;SAC1E,CAAC;IACJ,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,+BAA+B,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;YACxF,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,sCAAsC;AACtC,MAAM,CAAC,IAAI,CACT,qBAAqB,EACrB,wJAAwJ,EACxJ;IACE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gDAAgD,CAAC;IAC9E,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;IAC/C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oCAAoC,CAAC;IAClE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;CACvE,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE,EAAE;IACjD,IAAI,CAAC;QACH,eAAe;QACf,MAAM,KAAK,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEzC,kBAAkB;QAClB,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAErD,mBAAmB;QACnB,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,UAAU,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QAE3E,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,OAAO;wBACP,UAAU,EAAE,MAAM,CAAC,UAAU;wBAC7B,cAAc,EAAE,MAAM,CAAC,SAAS;wBAChC,QAAQ,EAAE,KAAK,CAAC,UAAU,EAAE;qBAC7B,CAAC;iBACH;aACF;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,kBAAkB,OAAO,KAAK,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;YACvF,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,8BAA8B;AAC9B,MAAM,CAAC,IAAI,CACT,aAAa,EACb,mLAAmL,EACnL;IACE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;IAC1C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;IAC/E,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,2CAA2C,CAAC;CAClF,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE;IACjC,IAAI,CAAC;QACH,IAAI,MAAM,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1B,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,sDAAsD;qBAC7D;iBACF;aACF,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;QAElE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACpC,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,OAAO,EAAE,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC;YAClC,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,OAAO,EACL,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG;gBACpB,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK;gBACjC,CAAC,CAAC,CAAC,CAAC,OAAO;SAChB,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;SAC/E,CAAC;IACJ,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,iBAAiB,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1E,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,4BAA4B;AAC5B,MAAM,CAAC,IAAI,CACT,WAAW,EACX,wDAAwD,EACxD,EAAE,EACF,KAAK,IAAI,EAAE;IACT,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,YAAY,EAAE,CAAC;QAC5C,IAAI,QAAQ,CAAC,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACpC,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,sDAAsD;qBAC7D;iBACF;aACF,CAAC;QACJ,CAAC;QAED,OAAO;YACL,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE;aAC7E;SACF,CAAC;IACJ,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,uBAAuB,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;YAChF,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,kCAAkC;AAClC,MAAM,CAAC,IAAI,CACT,iBAAiB,EACjB,yEAAyE,EACzE;IACE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC;IAC5C,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gDAAgD,CAAC;IACzF,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;CACzE,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE;IACtC,IAAI,CAAC;QACH,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;YAChD,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,OAAO;oBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,SAAS,OAAO,YAAY,EAAE,CAAC;iBACzE,CAAC;YACJ,CAAC;YACD,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAClB;4BACE,EAAE,EAAE,KAAK,CAAC,EAAE;4BACZ,OAAO,EAAE,KAAK,CAAC,OAAO;4BACtB,WAAW,EAAE,KAAK,CAAC,WAAW;4BAC9B,OAAO,EAAE,KAAK,CAAC,IAAI;yBACpB,EACD,IAAI,EACJ,CAAC,CACF;qBACF;iBACF;aACF,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,kBAAkB,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAChE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACxB,OAAO;oBACL,OAAO,EAAE;wBACP;4BACE,IAAI,EAAE,MAAe;4BACrB,IAAI,EAAE,gCAAgC,OAAO,4BAA4B,OAAO,GAAG;yBACpF;qBACF;iBACF,CAAC;YACJ,CAAC;YACD,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC;gBACxC,EAAE,EAAE,CAAC,CAAC,EAAE;gBACR,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,OAAO,EAAE,CAAC,CAAC,IAAI;aAChB,CAAC,CAAC,CAAC;YACJ,OAAO;gBACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;aAC/E,CAAC;QACJ,CAAC;QAED,2DAA2D;QAC3D,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,uCAAuC,OAAO,GAAG;qBACxD;iBACF;aACF,CAAC;QACJ,CAAC;QACD,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC;YACtC,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,OAAO,EAAE,OAAO,CAAC,CAAC,WAAW,KAAK,QAAQ;gBACxC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;gBACvC,CAAC,CAAC,CAAC,CAAC,WAAW;YACjB,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;SACnE,CAAC,CAAC,CAAC;QACJ,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC;SAC7E,CAAC;IACJ,CAAC;IAAC,OAAO,GAAQ,EAAE,CAAC;QAClB,OAAO;YACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,UAAU,GAAG,CAAC,OAAO,EAAE,EAAE,CAAC;YACnE,OAAO,EAAE,IAAI;SACd,CAAC;IACJ,CAAC;AACH,CAAC,CACF,CAAC;AAEF,uBAAuB;AACvB,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAClC,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;IACnC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,16 @@
1
+ import type { DocRow } from "./types.js";
2
+ /**
3
+ * Semantic chunking: split markdown by headings, then split large sections
4
+ * at paragraph boundaries with overlap.
5
+ */
6
+ export declare function chunkMarkdown(markdown: string, library: string): Omit<DocRow, "id" | "vector">[];
7
+ /**
8
+ * Generate embeddings using nomic-embed-text-v1.5.
9
+ * Uses Matryoshka dimensionality reduction to MATRYOSHKA_DIM.
10
+ * Prefixes text with task type for better accuracy.
11
+ */
12
+ export declare function embedTexts(texts: string[], taskType?: "search_document" | "search_query"): Promise<number[][]>;
13
+ /**
14
+ * Chunk markdown and generate embeddings, returning rows ready for LanceDB.
15
+ */
16
+ export declare function indexDocument(markdown: string, library: string): Promise<Omit<DocRow, "id">[]>;
@@ -0,0 +1,125 @@
1
+ // Lazy-loaded pipeline
2
+ let embedPipeline = null;
3
+ let layerNormFn = null;
4
+ const MODEL_NAME = "nomic-ai/nomic-embed-text-v1.5";
5
+ const MATRYOSHKA_DIM = 384; // Use 384-dim slice for speed; can bump to 768 for max accuracy
6
+ async function getEmbedPipeline() {
7
+ if (embedPipeline)
8
+ return { pipe: embedPipeline, layerNorm: layerNormFn };
9
+ const transformers = await import("@huggingface/transformers");
10
+ embedPipeline = await transformers.pipeline("feature-extraction", MODEL_NAME);
11
+ layerNormFn = transformers.layer_norm;
12
+ return { pipe: embedPipeline, layerNorm: layerNormFn };
13
+ }
14
+ /**
15
+ * Semantic chunking: split markdown by headings, then split large sections
16
+ * at paragraph boundaries with overlap.
17
+ */
18
+ export function chunkMarkdown(markdown, library) {
19
+ const lines = markdown.split("\n");
20
+ const chunks = [];
21
+ const headingStack = [];
22
+ let currentText = "";
23
+ function flush() {
24
+ const trimmed = currentText.trim();
25
+ if (trimmed.length > 0) {
26
+ const headingPath = headingStack.map((h) => h.text);
27
+ const subChunks = splitWithOverlap(trimmed, 2000, 200);
28
+ for (const sub of subChunks) {
29
+ chunks.push({
30
+ library,
31
+ headingPath: JSON.stringify(headingPath),
32
+ text: sub,
33
+ });
34
+ }
35
+ }
36
+ currentText = "";
37
+ }
38
+ for (const line of lines) {
39
+ const headingMatch = line.match(/^(#{1,4})\s+(.+)/);
40
+ if (headingMatch) {
41
+ flush();
42
+ const level = headingMatch[1].length;
43
+ const text = headingMatch[2].trim();
44
+ // Pop headings at same or deeper level
45
+ while (headingStack.length > 0 &&
46
+ headingStack[headingStack.length - 1].level >= level) {
47
+ headingStack.pop();
48
+ }
49
+ headingStack.push({ level, text });
50
+ currentText += line + "\n";
51
+ }
52
+ else {
53
+ currentText += line + "\n";
54
+ }
55
+ }
56
+ flush();
57
+ return chunks;
58
+ }
59
+ /**
60
+ * Split text at paragraph boundaries with ~10% overlap.
61
+ * Overlap ensures context isn't lost at chunk edges.
62
+ */
63
+ function splitWithOverlap(text, maxChars, overlapChars) {
64
+ if (text.length <= maxChars)
65
+ return [text];
66
+ const paragraphs = text.split(/\n\n+/);
67
+ const result = [];
68
+ let current = "";
69
+ let overlapBuffer = ""; // trailing text from previous chunk
70
+ for (const para of paragraphs) {
71
+ if (current.length + para.length + 2 > maxChars && current.length > 0) {
72
+ result.push(current.trim());
73
+ // Start next chunk with overlap from end of current
74
+ overlapBuffer = current.slice(-overlapChars);
75
+ current = overlapBuffer + "\n\n" + para;
76
+ }
77
+ else {
78
+ current += (current ? "\n\n" : "") + para;
79
+ }
80
+ }
81
+ if (current.trim()) {
82
+ result.push(current.trim());
83
+ }
84
+ return result;
85
+ }
86
+ /**
87
+ * Generate embeddings using nomic-embed-text-v1.5.
88
+ * Uses Matryoshka dimensionality reduction to MATRYOSHKA_DIM.
89
+ * Prefixes text with task type for better accuracy.
90
+ */
91
+ export async function embedTexts(texts, taskType = "search_document") {
92
+ const { pipe, layerNorm } = await getEmbedPipeline();
93
+ const embeddings = [];
94
+ // Prefix each text with the task type as required by nomic
95
+ const prefixed = texts.map((t) => `${taskType}: ${t}`);
96
+ // Process in batches
97
+ const batchSize = 16;
98
+ for (let i = 0; i < prefixed.length; i += batchSize) {
99
+ const batch = prefixed.slice(i, i + batchSize);
100
+ const raw = await pipe(batch, { pooling: "mean" });
101
+ // Apply layer norm + Matryoshka truncation + L2 normalize
102
+ const normed = layerNorm(raw, [raw.dims[1]]);
103
+ const truncated = normed.slice(null, [0, MATRYOSHKA_DIM]);
104
+ const normalized = truncated.normalize(2, -1);
105
+ for (let j = 0; j < batch.length; j++) {
106
+ embeddings.push(Array.from(normalized[j].data));
107
+ }
108
+ }
109
+ return embeddings;
110
+ }
111
+ /**
112
+ * Chunk markdown and generate embeddings, returning rows ready for LanceDB.
113
+ */
114
+ export async function indexDocument(markdown, library) {
115
+ const rawChunks = chunkMarkdown(markdown, library);
116
+ if (rawChunks.length === 0)
117
+ return [];
118
+ const texts = rawChunks.map((c) => c.text);
119
+ const embeddings = await embedTexts(texts, "search_document");
120
+ return rawChunks.map((chunk, i) => ({
121
+ ...chunk,
122
+ vector: embeddings[i],
123
+ }));
124
+ }
125
+ //# sourceMappingURL=indexer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"indexer.js","sourceRoot":"","sources":["../src/indexer.ts"],"names":[],"mappings":"AAEA,uBAAuB;AACvB,IAAI,aAAa,GAAQ,IAAI,CAAC;AAC9B,IAAI,WAAW,GAAQ,IAAI,CAAC;AAE5B,MAAM,UAAU,GAAG,gCAAgC,CAAC;AACpD,MAAM,cAAc,GAAG,GAAG,CAAC,CAAC,gEAAgE;AAE5F,KAAK,UAAU,gBAAgB;IAC7B,IAAI,aAAa;QAAE,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAC1E,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;IAC/D,aAAa,GAAG,MAAM,YAAY,CAAC,QAAQ,CAAC,oBAAoB,EAAE,UAAU,CAAC,CAAC;IAC9E,WAAW,GAAG,YAAY,CAAC,UAAU,CAAC;IACtC,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;AACzD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,aAAa,CAC3B,QAAgB,EAChB,OAAe;IAEf,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,MAAM,GAAoC,EAAE,CAAC;IAEnD,MAAM,YAAY,GAAsC,EAAE,CAAC;IAC3D,IAAI,WAAW,GAAG,EAAE,CAAC;IAErB,SAAS,KAAK;QACZ,MAAM,OAAO,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACpD,MAAM,SAAS,GAAG,gBAAgB,CAAC,OAAO,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;YACvD,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,MAAM,CAAC,IAAI,CAAC;oBACV,OAAO;oBACP,WAAW,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;oBACxC,IAAI,EAAE,GAAG;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QACD,WAAW,GAAG,EAAE,CAAC;IACnB,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACpD,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,EAAE,CAAC;YACR,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACrC,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAEpC,uCAAuC;YACvC,OACE,YAAY,CAAC,MAAM,GAAG,CAAC;gBACvB,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EACpD,CAAC;gBACD,YAAY,CAAC,GAAG,EAAE,CAAC;YACrB,CAAC;YACD,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;YACnC,WAAW,IAAI,IAAI,GAAG,IAAI,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,WAAW,IAAI,IAAI,GAAG,IAAI,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,KAAK,EAAE,CAAC;IAER,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,QAAgB,EAChB,YAAoB;IAEpB,IAAI,IAAI,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAE3C,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,aAAa,GAAG,EAAE,CAAC,CAAC,oCAAoC;IAE5D,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,QAAQ,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5B,oDAAoD;YACpD,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC;YAC7C,OAAO,GAAG,aAAa,GAAG,MAAM,GAAG,IAAI,CAAC;QAC1C,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;QAC5C,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAe,EACf,WAA+C,iBAAiB;IAEhE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,GAAG,MAAM,gBAAgB,EAAE,CAAC;IACrD,MAAM,UAAU,GAAe,EAAE,CAAC;IAElC,2DAA2D;IAC3D,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,QAAQ,KAAK,CAAC,EAAE,CAAC,CAAC;IAEvD,qBAAqB;IACrB,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC/C,MAAM,GAAG,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QAEnD,0DAA0D;QAC1D,MAAM,MAAM,GAAG,SAAS,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC,CAAC;QAC1D,MAAM,UAAU,GAAG,SAAS,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAoB,CAAC,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,QAAgB,EAChB,OAAe;IAEf,MAAM,SAAS,GAAG,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;IAE9D,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAClC,GAAG,KAAK;QACR,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC;KACtB,CAAC,CAAC,CAAC;AACN,CAAC"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Cross-encoder reranker using Xenova/ms-marco-MiniLM-L-6-v2.
3
+ * Takes query + candidate passages, returns relevance scores.
4
+ * ~23MB model, runs locally via Transformers.js ONNX runtime.
5
+ */
6
+ export interface RerankCandidate {
7
+ id: number;
8
+ text: string;
9
+ library: string;
10
+ headingPath: string;
11
+ rrfScore: number;
12
+ }
13
+ export interface RerankResult extends RerankCandidate {
14
+ rerankerScore: number;
15
+ }
16
+ /**
17
+ * Rerank candidates using cross-encoder.
18
+ * Processes in batches to manage memory.
19
+ */
20
+ export declare function rerank(query: string, candidates: RerankCandidate[]): Promise<RerankResult[]>;
@@ -0,0 +1,53 @@
1
+ /**
2
+ * Cross-encoder reranker using Xenova/ms-marco-MiniLM-L-6-v2.
3
+ * Takes query + candidate passages, returns relevance scores.
4
+ * ~23MB model, runs locally via Transformers.js ONNX runtime.
5
+ */
6
+ let rerankerModel = null;
7
+ let rerankerTokenizer = null;
8
+ const RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2";
9
+ async function loadReranker() {
10
+ if (rerankerModel && rerankerTokenizer) {
11
+ return { model: rerankerModel, tokenizer: rerankerTokenizer };
12
+ }
13
+ const { AutoModelForSequenceClassification, AutoTokenizer } = await import("@huggingface/transformers");
14
+ rerankerModel = await AutoModelForSequenceClassification.from_pretrained(RERANKER_MODEL);
15
+ rerankerTokenizer = await AutoTokenizer.from_pretrained(RERANKER_MODEL);
16
+ return { model: rerankerModel, tokenizer: rerankerTokenizer };
17
+ }
18
+ /**
19
+ * Rerank candidates using cross-encoder.
20
+ * Processes in batches to manage memory.
21
+ */
22
+ export async function rerank(query, candidates) {
23
+ if (candidates.length === 0)
24
+ return [];
25
+ const { model, tokenizer } = await loadReranker();
26
+ const results = [];
27
+ // Process in batches of 16
28
+ const batchSize = 16;
29
+ for (let i = 0; i < candidates.length; i += batchSize) {
30
+ const batch = candidates.slice(i, i + batchSize);
31
+ const queries = batch.map(() => query);
32
+ const passages = batch.map((c) => c.text);
33
+ const features = tokenizer(queries, {
34
+ text_pair: passages,
35
+ padding: true,
36
+ truncation: true,
37
+ });
38
+ const output = await model(features);
39
+ // output.logits is a Tensor of shape [batch_size, 1]
40
+ // Higher score = more relevant
41
+ for (let j = 0; j < batch.length; j++) {
42
+ const score = output.logits.data[j];
43
+ results.push({
44
+ ...batch[j],
45
+ rerankerScore: score,
46
+ });
47
+ }
48
+ }
49
+ // Sort by reranker score descending
50
+ results.sort((a, b) => b.rerankerScore - a.rerankerScore);
51
+ return results;
52
+ }
53
+ //# sourceMappingURL=reranker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"reranker.js","sourceRoot":"","sources":["../src/reranker.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,IAAI,aAAa,GAAQ,IAAI,CAAC;AAC9B,IAAI,iBAAiB,GAAQ,IAAI,CAAC;AAElC,MAAM,cAAc,GAAG,+BAA+B,CAAC;AAEvD,KAAK,UAAU,YAAY;IACzB,IAAI,aAAa,IAAI,iBAAiB,EAAE,CAAC;QACvC,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,iBAAiB,EAAE,CAAC;IAChE,CAAC;IACD,MAAM,EAAE,kCAAkC,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CACxE,2BAA2B,CAC5B,CAAC;IACF,aAAa,GAAG,MAAM,kCAAkC,CAAC,eAAe,CACtE,cAAc,CACf,CAAC;IACF,iBAAiB,GAAG,MAAM,aAAa,CAAC,eAAe,CAAC,cAAc,CAAC,CAAC;IACxE,OAAO,EAAE,KAAK,EAAE,aAAa,EAAE,SAAS,EAAE,iBAAiB,EAAE,CAAC;AAChE,CAAC;AAcD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAC1B,KAAa,EACb,UAA6B;IAE7B,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEvC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,YAAY,EAAE,CAAC;IAClD,MAAM,OAAO,GAAmB,EAAE,CAAC;IAEnC,2BAA2B;IAC3B,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACtD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAEjD,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAE1C,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,EAAE;YAClC,SAAS,EAAE,QAAQ;YACnB,OAAO,EAAE,IAAI;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,CAAC;QAErC,qDAAqD;QACrD,+BAA+B;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,CAAC,IAAI,CAAC;gBACX,GAAG,KAAK,CAAC,CAAC,CAAC;gBACX,aAAa,EAAE,KAAK;aACrB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,oCAAoC;IACpC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,GAAG,CAAC,CAAC,aAAa,CAAC,CAAC;IAC1D,OAAO,OAAO,CAAC;AACjB,CAAC"}
@@ -0,0 +1,13 @@
1
+ /**
2
+ * Advanced RAG search pipeline:
3
+ * 1. Vector search (LanceDB) — semantic similarity
4
+ * 2. BM25 search (MiniSearch) — keyword/exact match
5
+ * 3. Reciprocal Rank Fusion — merge both ranked lists
6
+ * 4. Cross-encoder rerank — rescore top candidates
7
+ */
8
+ import type { SearchResult } from "./types.js";
9
+ import type { DocStore } from "./store.js";
10
+ export declare function searchDocs(query: string, store: DocStore, options?: {
11
+ library?: string;
12
+ topK?: number;
13
+ }): Promise<SearchResult[]>;
package/dist/search.js ADDED
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Advanced RAG search pipeline:
3
+ * 1. Vector search (LanceDB) — semantic similarity
4
+ * 2. BM25 search (MiniSearch) — keyword/exact match
5
+ * 3. Reciprocal Rank Fusion — merge both ranked lists
6
+ * 4. Cross-encoder rerank — rescore top candidates
7
+ */
8
+ import MiniSearch from "minisearch";
9
+ import { embedTexts } from "./indexer.js";
10
+ import { rerank } from "./reranker.js";
11
+ // --- BM25 index (lazy-built, cached per session) ---
12
+ let bm25Index = null;
13
+ let bm25IndexedCount = 0;
14
+ async function getBM25Index(store) {
15
+ const allChunks = await store.getChunks();
16
+ // Rebuild if chunk count changed (new docs indexed)
17
+ if (bm25Index && bm25IndexedCount === allChunks.length) {
18
+ return bm25Index;
19
+ }
20
+ bm25Index = new MiniSearch({
21
+ fields: ["text"],
22
+ storeFields: ["id", "library", "headingPath", "text"],
23
+ searchOptions: {
24
+ boost: { text: 1 },
25
+ fuzzy: 0.2,
26
+ prefix: true,
27
+ },
28
+ });
29
+ // MiniSearch needs objects with a unique `id` field
30
+ const docs = allChunks.map((chunk) => ({
31
+ id: chunk.id,
32
+ library: chunk.library,
33
+ headingPath: chunk.headingPath,
34
+ text: chunk.text,
35
+ }));
36
+ bm25Index.addAll(docs);
37
+ bm25IndexedCount = allChunks.length;
38
+ return bm25Index;
39
+ }
40
+ /**
41
+ * RRF: score = sum( weight_r / (k + rank_r) ) for each ranker r.
42
+ * k=60 is the standard default (Azure, Weaviate, OpenSearch).
43
+ */
44
+ function reciprocalRankFusion(vectorRanked, bm25Ranked, options) {
45
+ const { k, vectorWeight, bm25Weight } = options;
46
+ const scoreMap = new Map();
47
+ // Score from vector search (rank is 1-based)
48
+ for (let rank = 0; rank < vectorRanked.length; rank++) {
49
+ const doc = vectorRanked[rank];
50
+ const score = vectorWeight / (k + rank + 1);
51
+ const existing = scoreMap.get(doc.id);
52
+ if (existing) {
53
+ existing.rrfScore += score;
54
+ }
55
+ else {
56
+ scoreMap.set(doc.id, { ...doc, rrfScore: score });
57
+ }
58
+ }
59
+ // Score from BM25 search
60
+ for (let rank = 0; rank < bm25Ranked.length; rank++) {
61
+ const doc = bm25Ranked[rank];
62
+ const score = bm25Weight / (k + rank + 1);
63
+ const existing = scoreMap.get(doc.id);
64
+ if (existing) {
65
+ existing.rrfScore += score;
66
+ }
67
+ else {
68
+ scoreMap.set(doc.id, { ...doc, rrfScore: score });
69
+ }
70
+ }
71
+ // Sort by fused score
72
+ const results = Array.from(scoreMap.values());
73
+ results.sort((a, b) => b.rrfScore - a.rrfScore);
74
+ return results;
75
+ }
76
+ // --- Main search pipeline ---
77
+ export async function searchDocs(query, store, options) {
78
+ const topK = options?.topK ?? 10;
79
+ const candidateCount = 50; // retrieve more for reranking
80
+ // Step 1: Vector search via LanceDB
81
+ const [queryVector] = await embedTexts([query], "search_query");
82
+ const vectorHits = await store.vectorSearch(queryVector, candidateCount, options?.library);
83
+ const vectorRanked = vectorHits.map((row) => ({
84
+ id: row.id,
85
+ text: row.text,
86
+ library: row.library,
87
+ headingPath: row.headingPath,
88
+ }));
89
+ // Step 2: BM25 search via MiniSearch
90
+ const index = await getBM25Index(store);
91
+ const bm25Options = {};
92
+ if (options?.library) {
93
+ bm25Options.filter = (result) => result.library === options.library;
94
+ }
95
+ const bm25Hits = index.search(query, bm25Options).slice(0, candidateCount);
96
+ const bm25Ranked = bm25Hits.map((hit) => ({
97
+ id: hit.id,
98
+ text: hit.text,
99
+ library: hit.library,
100
+ headingPath: hit.headingPath,
101
+ }));
102
+ // Step 3: RRF fusion (k=60, BM25 weight=1.0, vector weight=0.7)
103
+ const fused = reciprocalRankFusion(vectorRanked, bm25Ranked, {
104
+ k: 60,
105
+ vectorWeight: 0.7,
106
+ bm25Weight: 1.0,
107
+ });
108
+ // Step 4: Cross-encoder rerank top 30 candidates
109
+ const rerankCandidates = fused
110
+ .slice(0, 30)
111
+ .map((f) => ({
112
+ id: f.id,
113
+ text: f.text,
114
+ library: f.library,
115
+ headingPath: f.headingPath,
116
+ rrfScore: f.rrfScore,
117
+ }));
118
+ const reranked = await rerank(query, rerankCandidates);
119
+ // Step 5: Format and return top-K
120
+ return reranked.slice(0, topK).map((r) => ({
121
+ score: Math.round(r.rerankerScore * 1000) / 1000,
122
+ library: r.library,
123
+ headingPath: JSON.parse(r.headingPath),
124
+ content: r.text,
125
+ chunkId: r.id,
126
+ }));
127
+ }
128
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,UAAU,MAAM,YAAY,CAAC;AAGpC,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,MAAM,EAAwB,MAAM,eAAe,CAAC;AAE7D,sDAAsD;AAEtD,IAAI,SAAS,GAAsB,IAAI,CAAC;AACxC,IAAI,gBAAgB,GAAG,CAAC,CAAC;AAEzB,KAAK,UAAU,YAAY,CAAC,KAAe;IACzC,MAAM,SAAS,GAAG,MAAM,KAAK,CAAC,SAAS,EAAE,CAAC;IAE1C,oDAAoD;IACpD,IAAI,SAAS,IAAI,gBAAgB,KAAK,SAAS,CAAC,MAAM,EAAE,CAAC;QACvD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,SAAS,GAAG,IAAI,UAAU,CAAC;QACzB,MAAM,EAAE,CAAC,MAAM,CAAC;QAChB,WAAW,EAAE,CAAC,IAAI,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,CAAC;QACrD,aAAa,EAAE;YACb,KAAK,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE;YAClB,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,IAAI;SACb;KACF,CAAC,CAAC;IAEH,oDAAoD;IACpD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,KAAa,EAAE,EAAE,CAAC,CAAC;QAC7C,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,IAAI,EAAE,KAAK,CAAC,IAAI;KACjB,CAAC,CAAC,CAAC;IAEJ,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACvB,gBAAgB,GAAG,SAAS,CAAC,MAAM,CAAC;IACpC,OAAO,SAAS,CAAC;AACnB,CAAC;AAeD;;;GAGG;AACH,SAAS,oBAAoB,CAC3B,YAAyB,EACzB,UAAuB,EACvB,OAAgE;IAEhE,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,OAAO,CAAC;IAChD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAuB,CAAC;IAEhD,6CAA6C;IAC7C,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;QACtD,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,KAAK,GAAG,YAAY,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;QAC5C,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,QAAQ,IAAI,KAAK,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,GAAG,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,UAAU,CAAC,MAAM,EAAE,IAAI,EAAE,EAAE,CAAC;QACpD,MAAM,GAAG,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAC7B,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;QAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,QAAQ,IAAI,KAAK,CAAC;QAC7B,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,GAAG,GAAG,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,sBAAsB;IACtB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;IAC9C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAChD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,+BAA+B;AAE/B,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAa,EACb,KAAe,EACf,OAA6C;IAE7C,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC;IACjC,MAAM,cAAc,GAAG,EAAE,CAAC,CAAC,8BAA8B;IAEzD,oCAAoC;IACpC,MAAM,CAAC,WAAW,CAAC,GAAG,MAAM,UAAU,CAAC,CAAC,KAAK,CAAC,EAAE,cAAc,CAAC,CAAC;IAChE,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,YAAY,CACzC,WAAW,EACX,cAAc,EACd,OAAO,EAAE,OAAO,CACjB,CAAC;IACF,MAAM,YAAY,GAAgB,UAAU,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACzD,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EAAE,GAAG,CAAC,WAAW;KAC7B,CAAC,CAAC,CAAC;IAEJ,qCAAqC;IACrC,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,KAAK,CAAC,CAAC;IACxC,MAAM,WAAW,GAAQ,EAAE,CAAC;IAC5B,IAAI,OAAO,EAAE,OAAO,EAAE,CAAC;QACrB,WAAW,CAAC,MAAM,GAAG,CAAC,MAAW,EAAE,EAAE,CAAC,MAAM,CAAC,OAAO,KAAK,OAAO,CAAC,OAAO,CAAC;IAC3E,CAAC;IACD,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IAC3E,MAAM,UAAU,GAAgB,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAQ,EAAE,EAAE,CAAC,CAAC;QAC1D,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,WAAW,EAAE,GAAG,CAAC,WAAW;KAC7B,CAAC,CAAC,CAAC;IAEJ,gEAAgE;IAChE,MAAM,KAAK,GAAG,oBAAoB,CAAC,YAAY,EAAE,UAAU,EAAE;QAC3D,CAAC,EAAE,EAAE;QACL,YAAY,EAAE,GAAG;QACjB,UAAU,EAAE,GAAG;KAChB,CAAC,CAAC;IAEH,iDAAiD;IACjD,MAAM,gBAAgB,GAAsB,KAAK;SAC9C,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACX,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,QAAQ,EAAE,CAAC,CAAC,QAAQ;KACrB,CAAC,CAAC,CAAC;IAEN,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IAEvD,kCAAkC;IAClC,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACzC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,IAAI;QAChD,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAa;QAClD,OAAO,EAAE,CAAC,CAAC,IAAI;QACf,OAAO,EAAE,CAAC,CAAC,EAAE;KACd,CAAC,CAAC,CAAC;AACN,CAAC"}
@@ -0,0 +1,31 @@
1
+ import type { DocRow, DocMetadata } from "./types.js";
2
+ export declare class DocStore {
3
+ private docsDir;
4
+ private dbPath;
5
+ private metadataPath;
6
+ private rawDir;
7
+ private metadata;
8
+ private nextId;
9
+ constructor(projectRoot: string);
10
+ private ensureDir;
11
+ private getTable;
12
+ loadMetadata(): Promise<DocMetadata>;
13
+ private saveMetadata;
14
+ saveRawDoc(library: string, content: string): Promise<void>;
15
+ addLibrary(library: string, version: string, sourceUrl: string, chunks: Omit<DocRow, "id">[]): Promise<{
16
+ chunkCount: number;
17
+ indexSize: number;
18
+ }>;
19
+ /** Vector search using LanceDB native search. Returns rows with _distance. */
20
+ vectorSearch(queryVector: number[], limit: number, library?: string): Promise<(DocRow & {
21
+ _distance?: number;
22
+ })[]>;
23
+ /** Get chunks by library or all chunks (for list/section tools). */
24
+ getChunks(library?: string): Promise<DocRow[]>;
25
+ getChunkById(chunkId: number): Promise<DocRow | undefined>;
26
+ getChunksByHeading(library: string, heading: string): Promise<DocRow[]>;
27
+ isEmpty(): Promise<boolean>;
28
+ getDocsDir(): string;
29
+ }
30
+ /** Resolve the project root: walk up from cwd until we find a package.json */
31
+ export declare function resolveProjectRoot(startDir?: string): string;
package/dist/store.js ADDED
@@ -0,0 +1,177 @@
1
+ import { readFile, writeFile, mkdir } from "node:fs/promises";
2
+ import { join } from "node:path";
3
+ import { existsSync } from "node:fs";
4
+ // Lazy-loaded LanceDB connection
5
+ let dbInstance = null;
6
+ let tableInstance = null;
7
+ export class DocStore {
8
+ docsDir;
9
+ dbPath;
10
+ metadataPath;
11
+ rawDir;
12
+ metadata = null;
13
+ nextId = 1;
14
+ constructor(projectRoot) {
15
+ this.docsDir = join(projectRoot, ".claude", "docs");
16
+ this.dbPath = join(this.docsDir, "lancedb");
17
+ this.metadataPath = join(this.docsDir, ".metadata.json");
18
+ this.rawDir = join(this.docsDir, "raw");
19
+ }
20
+ async ensureDir() {
21
+ await mkdir(this.rawDir, { recursive: true });
22
+ }
23
+ async getTable() {
24
+ if (tableInstance)
25
+ return tableInstance;
26
+ const lancedb = await import("@lancedb/lancedb");
27
+ dbInstance = await lancedb.connect(this.dbPath);
28
+ try {
29
+ tableInstance = await dbInstance.openTable("docs");
30
+ // Find max existing ID
31
+ const rows = await tableInstance.query().select(["id"]).toArray();
32
+ if (rows.length > 0) {
33
+ this.nextId = Math.max(...rows.map((r) => r.id)) + 1;
34
+ }
35
+ }
36
+ catch {
37
+ // Table doesn't exist yet — will be created on first insert
38
+ tableInstance = null;
39
+ }
40
+ return tableInstance;
41
+ }
42
+ async loadMetadata() {
43
+ if (this.metadata)
44
+ return this.metadata;
45
+ try {
46
+ const data = await readFile(this.metadataPath, "utf-8");
47
+ this.metadata = JSON.parse(data);
48
+ }
49
+ catch {
50
+ this.metadata = { libraries: [] };
51
+ }
52
+ return this.metadata;
53
+ }
54
+ async saveMetadata(metadata) {
55
+ await this.ensureDir();
56
+ this.metadata = metadata;
57
+ await writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
58
+ }
59
+ async saveRawDoc(library, content) {
60
+ await this.ensureDir();
61
+ const safeName = library.replace(/\//g, "__").replace(/@/g, "");
62
+ await writeFile(join(this.rawDir, `${safeName}.md`), content, "utf-8");
63
+ }
64
+ async addLibrary(library, version, sourceUrl, chunks) {
65
+ await this.ensureDir();
66
+ const lancedb = await import("@lancedb/lancedb");
67
+ if (!dbInstance) {
68
+ dbInstance = await lancedb.connect(this.dbPath);
69
+ }
70
+ // Assign IDs
71
+ const rows = chunks.map((chunk) => ({
72
+ ...chunk,
73
+ id: this.nextId++,
74
+ }));
75
+ // Get or create table
76
+ let table = await this.getTable();
77
+ if (table) {
78
+ // Delete existing rows for this library, then add new ones
79
+ await table.delete(`library = '${library.replace(/'/g, "''")}'`);
80
+ if (rows.length > 0) {
81
+ await table.add(rows);
82
+ }
83
+ }
84
+ else {
85
+ // Create table with first batch of rows
86
+ if (rows.length > 0) {
87
+ table = await dbInstance.createTable("docs", rows);
88
+ tableInstance = table;
89
+ }
90
+ }
91
+ // Update metadata
92
+ const metadata = await this.loadMetadata();
93
+ const existing = metadata.libraries.findIndex((l) => l.library === library);
94
+ const libMeta = {
95
+ library,
96
+ version,
97
+ sourceUrl,
98
+ fetchedAt: new Date().toISOString(),
99
+ chunkCount: rows.length,
100
+ };
101
+ if (existing >= 0) {
102
+ metadata.libraries[existing] = libMeta;
103
+ }
104
+ else {
105
+ metadata.libraries.push(libMeta);
106
+ }
107
+ await this.saveMetadata(metadata);
108
+ // Count total rows
109
+ const totalRows = table
110
+ ? (await table.countRows())
111
+ : 0;
112
+ return {
113
+ chunkCount: rows.length,
114
+ indexSize: totalRows,
115
+ };
116
+ }
117
+ /** Vector search using LanceDB native search. Returns rows with _distance. */
118
+ async vectorSearch(queryVector, limit, library) {
119
+ const table = await this.getTable();
120
+ if (!table)
121
+ return [];
122
+ let query = table.vectorSearch(queryVector).limit(limit);
123
+ if (library) {
124
+ query = query.where(`library = '${library.replace(/'/g, "''")}'`);
125
+ }
126
+ return await query.toArray();
127
+ }
128
+ /** Get chunks by library or all chunks (for list/section tools). */
129
+ async getChunks(library) {
130
+ const table = await this.getTable();
131
+ if (!table)
132
+ return [];
133
+ let query = table.query();
134
+ if (library) {
135
+ query = query.where(`library = '${library.replace(/'/g, "''")}'`);
136
+ }
137
+ return await query.toArray();
138
+ }
139
+ async getChunkById(chunkId) {
140
+ const table = await this.getTable();
141
+ if (!table)
142
+ return undefined;
143
+ const rows = await table.query().where(`id = ${chunkId}`).toArray();
144
+ return rows[0];
145
+ }
146
+ async getChunksByHeading(library, heading) {
147
+ // LanceDB string filtering doesn't support LIKE on JSON, so we filter in JS
148
+ const chunks = await this.getChunks(library);
149
+ const lowerHeading = heading.toLowerCase();
150
+ return chunks.filter((c) => {
151
+ const path = JSON.parse(c.headingPath);
152
+ return path.some((h) => h.toLowerCase().includes(lowerHeading));
153
+ });
154
+ }
155
+ async isEmpty() {
156
+ const table = await this.getTable();
157
+ return !table;
158
+ }
159
+ getDocsDir() {
160
+ return this.docsDir;
161
+ }
162
+ }
163
+ /** Resolve the project root: walk up from cwd until we find a package.json */
164
+ export function resolveProjectRoot(startDir) {
165
+ let dir = startDir ?? process.cwd();
166
+ while (true) {
167
+ if (existsSync(join(dir, "package.json"))) {
168
+ return dir;
169
+ }
170
+ const parent = join(dir, "..");
171
+ if (parent === dir)
172
+ break;
173
+ dir = parent;
174
+ }
175
+ return startDir ?? process.cwd();
176
+ }
177
+ //# sourceMappingURL=store.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"store.js","sourceRoot":"","sources":["../src/store.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AAGrC,iCAAiC;AACjC,IAAI,UAAU,GAAQ,IAAI,CAAC;AAC3B,IAAI,aAAa,GAAQ,IAAI,CAAC;AAE9B,MAAM,OAAO,QAAQ;IACX,OAAO,CAAS;IAChB,MAAM,CAAS;IACf,YAAY,CAAS;IACrB,MAAM,CAAS;IACf,QAAQ,GAAuB,IAAI,CAAC;IACpC,MAAM,GAAW,CAAC,CAAC;IAE3B,YAAY,WAAmB;QAC7B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC5C,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;QACzD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC1C,CAAC;IAEO,KAAK,CAAC,SAAS;QACrB,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC;IAEO,KAAK,CAAC,QAAQ;QACpB,IAAI,aAAa;YAAE,OAAO,aAAa,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QACjD,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhD,IAAI,CAAC;YACH,aAAa,GAAG,MAAM,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YACnD,uBAAuB;YACvB,MAAM,IAAI,GAAG,MAAM,aAAa,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;YAClE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,4DAA4D;YAC5D,aAAa,GAAG,IAAI,CAAC;QACvB,CAAC;QACD,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QACxC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YACxD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAgB,CAAC;QAClD,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,QAAQ,GAAG,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC;QACpC,CAAC;QACD,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAEO,KAAK,CAAC,YAAY,CAAC,QAAqB;QAC9C,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAAe,EAAE,OAAe;QAC/C,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAChE,MAAM,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,QAAQ,KAAK,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,UAAU,CACd,OAAe,EACf,OAAe,EACf,SAAiB,EACjB,MAA4B;QAE5B,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAEjD,IAAI,CAAC,UAAU,EAAE,CAAC;YAChB,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAClD,CAAC;QAED,aAAa;QACb,MAAM,IAAI,GAAa,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAC5C,GAAG,KAAK;YACR,EAAE,EAAE,IAAI,CAAC,MAAM,EAAE;SAClB,CAAC,CAAC,CAAC;QAEJ,sBAAsB;QACtB,IAAI,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClC,IAAI,KAAK,EAAE,CAAC;YACV,2DAA2D;YAC3D,MAAM,KAAK,CAAC,MAAM,CAAC,cAAc,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;YACjE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,GAAG,MAAM,UAAU,CAAC,WAAW,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBACnD,aAAa,GAAG,KAAK,CAAC;YACxB,CAAC;QACH,CAAC;QAED,kBAAkB;QAClB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,QAAQ,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,OAAO,CAAC,CAAC;QAC5E,MAAM,OAAO,GAAoB;YAC/B,OAAO;YACP,OAAO;YACP,SAAS;YACT,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,UAAU,EAAE,IAAI,CAAC,MAAM;SACxB,CAAC;QACF,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;YAClB,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC;QACzC,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QAElC,mBAAmB;QACnB,MAAM,SAAS,GAAG,KAAK;YACrB,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,SAAS,EAAE,CAAC;YAC3B,CAAC,CAAC,CAAC,CAAC;QAEN,OAAO;YACL,UAAU,EAAE,IAAI,CAAC,MAAM;YACvB,SAAS,EAAE,SAAS;SACrB,CAAC;IACJ,CAAC;IAED,8EAA8E;IAC9E,KAAK,CAAC,YAAY,CAChB,WAAqB,EACrB,KAAa,EACb,OAAgB;QAEhB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QAEtB,IAAI,KAAK,GAAG,KAAK,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACzD,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QACpE,CAAC;QACD,OAAO,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;IAC/B,CAAC;IAED,oEAAoE;IACpE,KAAK,CAAC,SAAS,CAAC,OAAgB;QAC9B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QAEtB,IAAI,KAAK,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;QAC1B,IAAI,OAAO,EAAE,CAAC;YACZ,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;QACpE,CAAC;QACD,OAAO,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,OAAe;QAChC,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,SAAS,CAAC;QAE7B,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,QAAQ,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;QACpE,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,kBAAkB,CACtB,OAAe,EACf,OAAe;QAEf,4EAA4E;QAC5E,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAC7C,MAAM,YAAY,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;QAC3C,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAS,EAAE,EAAE;YACjC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,WAAW,CAAa,CAAC;YACnD,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC;IAChB,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;CACF;AAED,8EAA8E;AAC9E,MAAM,UAAU,kBAAkB,CAAC,QAAiB;IAClD,IAAI,GAAG,GAAG,QAAQ,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACpC,OAAO,IAAI,EAAE,CAAC;QACZ,IAAI,UAAU,CAAC,IAAI,CAAC,GAAG,EAAE,cAAc,CAAC,CAAC,EAAE,CAAC;YAC1C,OAAO,GAAG,CAAC;QACb,CAAC;QACD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC/B,IAAI,MAAM,KAAK,GAAG;YAAE,MAAM;QAC1B,GAAG,GAAG,MAAM,CAAC;IACf,CAAC;IACD,OAAO,QAAQ,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;AACnC,CAAC"}
@@ -0,0 +1,29 @@
1
+ export interface Dependency {
2
+ name: string;
3
+ version: string;
4
+ }
5
+ export interface LibraryMetadata {
6
+ library: string;
7
+ version: string;
8
+ sourceUrl: string;
9
+ fetchedAt: string;
10
+ chunkCount: number;
11
+ }
12
+ export interface DocMetadata {
13
+ libraries: LibraryMetadata[];
14
+ }
15
+ export interface SearchResult {
16
+ score: number;
17
+ library: string;
18
+ headingPath: string[];
19
+ content: string;
20
+ chunkId: number;
21
+ }
22
+ /** Shape of a row stored in LanceDB */
23
+ export interface DocRow {
24
+ id: number;
25
+ library: string;
26
+ headingPath: string;
27
+ text: string;
28
+ vector: number[];
29
+ }
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
package/package.json ADDED
@@ -0,0 +1,53 @@
1
+ {
2
+ "name": "claude-local-docs",
3
+ "version": "1.0.0",
4
+ "description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline. All models run locally via ONNX.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "claude-local-docs": "dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "commands",
13
+ ".claude-plugin",
14
+ ".mcp.json",
15
+ "README.md",
16
+ "LICENSE"
17
+ ],
18
+ "scripts": {
19
+ "build": "tsc",
20
+ "dev": "tsc --watch",
21
+ "prepublishOnly": "tsc"
22
+ },
23
+ "keywords": [
24
+ "claude-code",
25
+ "mcp",
26
+ "documentation",
27
+ "search",
28
+ "rag",
29
+ "embeddings",
30
+ "context7-alternative",
31
+ "local-first",
32
+ "semantic-search",
33
+ "llms-txt"
34
+ ],
35
+ "author": "matthew",
36
+ "license": "MIT",
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "https://github.com/matteodante/claude-local-docs.git"
40
+ },
41
+ "dependencies": {
42
+ "@huggingface/transformers": "^3.4.1",
43
+ "@lancedb/lancedb": "^0.17.0",
44
+ "@modelcontextprotocol/sdk": "^1.12.1",
45
+ "minisearch": "^7.1.0",
46
+ "zod": "^3.24.0"
47
+ },
48
+ "devDependencies": {
49
+ "@types/node": "^22.0.0",
50
+ "esbuild": "^0.27.3",
51
+ "typescript": "^5.7.0"
52
+ }
53
+ }