tailwindcss-docs-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +159 -0
- package/dist/auto-index.d.ts +21 -0
- package/dist/auto-index.d.ts.map +1 -0
- package/dist/auto-index.js +27 -0
- package/dist/auto-index.js.map +1 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +67 -0
- package/dist/index.js.map +1 -0
- package/dist/pipeline/chunker.d.ts +61 -0
- package/dist/pipeline/chunker.d.ts.map +1 -0
- package/dist/pipeline/chunker.js +162 -0
- package/dist/pipeline/chunker.js.map +1 -0
- package/dist/pipeline/embedder.d.ts +57 -0
- package/dist/pipeline/embedder.d.ts.map +1 -0
- package/dist/pipeline/embedder.js +108 -0
- package/dist/pipeline/embedder.js.map +1 -0
- package/dist/pipeline/fetcher.d.ts +63 -0
- package/dist/pipeline/fetcher.d.ts.map +1 -0
- package/dist/pipeline/fetcher.js +128 -0
- package/dist/pipeline/fetcher.js.map +1 -0
- package/dist/pipeline/parser.d.ts +73 -0
- package/dist/pipeline/parser.d.ts.map +1 -0
- package/dist/pipeline/parser.js +127 -0
- package/dist/pipeline/parser.js.map +1 -0
- package/dist/server.d.ts +47 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +141 -0
- package/dist/server.js.map +1 -0
- package/dist/storage/database.d.ts +90 -0
- package/dist/storage/database.d.ts.map +1 -0
- package/dist/storage/database.js +342 -0
- package/dist/storage/database.js.map +1 -0
- package/dist/storage/search.d.ts +84 -0
- package/dist/storage/search.d.ts.map +1 -0
- package/dist/storage/search.js +165 -0
- package/dist/storage/search.js.map +1 -0
- package/dist/tools/check-status.d.ts +35 -0
- package/dist/tools/check-status.d.ts.map +1 -0
- package/dist/tools/check-status.js +40 -0
- package/dist/tools/check-status.js.map +1 -0
- package/dist/tools/fetch-docs.d.ts +42 -0
- package/dist/tools/fetch-docs.d.ts.map +1 -0
- package/dist/tools/fetch-docs.js +98 -0
- package/dist/tools/fetch-docs.js.map +1 -0
- package/dist/tools/list-utilities.d.ts +49 -0
- package/dist/tools/list-utilities.d.ts.map +1 -0
- package/dist/tools/list-utilities.js +63 -0
- package/dist/tools/list-utilities.js.map +1 -0
- package/dist/tools/search-docs.d.ts +44 -0
- package/dist/tools/search-docs.d.ts.map +1 -0
- package/dist/tools/search-docs.js +53 -0
- package/dist/tools/search-docs.js.map +1 -0
- package/dist/utils/categories.d.ts +25 -0
- package/dist/utils/categories.d.ts.map +1 -0
- package/dist/utils/categories.js +240 -0
- package/dist/utils/categories.js.map +1 -0
- package/dist/utils/config.d.ts +39 -0
- package/dist/utils/config.d.ts.map +1 -0
- package/dist/utils/config.js +37 -0
- package/dist/utils/config.js.map +1 -0
- package/dist/utils/query-expansion.d.ts +28 -0
- package/dist/utils/query-expansion.d.ts.map +1 -0
- package/dist/utils/query-expansion.js +147 -0
- package/dist/utils/query-expansion.js.map +1 -0
- package/dist/utils/similarity.d.ts +12 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +28 -0
- package/dist/utils/similarity.js.map +1 -0
- package/package.json +59 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vitaly Gorodetsky
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# tailwindcss-docs-mcp
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/tailwindcss-docs-mcp)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
|
|
6
|
+
Local semantic search for Tailwind CSS documentation via [Model Context Protocol](https://modelcontextprotocol.io).
|
|
7
|
+
|
|
8
|
+
LLMs hallucinate Tailwind classes. This MCP server downloads the docs once, embeds them locally with ONNX, and gives your AI instant semantic search — no API keys, no external services, no per-query costs.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```json
|
|
13
|
+
{
|
|
14
|
+
"mcpServers": {
|
|
15
|
+
"tailwindcss-docs-mcp": {
|
|
16
|
+
"command": "npx",
|
|
17
|
+
"args": ["-y", "tailwindcss-docs-mcp"]
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
<details>
|
|
24
|
+
<summary>Claude Code</summary>
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
claude mcp add tailwindcss-docs-mcp -- npx -y tailwindcss-docs-mcp
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
</details>
|
|
31
|
+
|
|
32
|
+
<details>
|
|
33
|
+
<summary>Cursor / Windsurf</summary>
|
|
34
|
+
|
|
35
|
+
Add to your project's `.mcp.json`:
|
|
36
|
+
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"mcpServers": {
|
|
40
|
+
"tailwindcss-docs-mcp": {
|
|
41
|
+
"command": "npx",
|
|
42
|
+
"args": ["-y", "tailwindcss-docs-mcp"]
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
</details>
|
|
49
|
+
|
|
50
|
+
### First run
|
|
51
|
+
|
|
52
|
+
On first boot the embedding model (~27 MB) downloads in the background. The server is usable immediately — tools that need embeddings will return a status message until the model is ready. Use `check_status` to verify.
|
|
53
|
+
|
|
54
|
+
Then ask your assistant: _"Fetch the Tailwind CSS docs"_ — this downloads and indexes the documentation. You only need to do this once.
|
|
55
|
+
|
|
56
|
+
### Example prompts
|
|
57
|
+
|
|
58
|
+
- _"How do I center a div with Tailwind?"_
|
|
59
|
+
- _"Show me the grid layout utilities"_
|
|
60
|
+
- _"What's the dark mode configuration in v4?"_
|
|
61
|
+
- _"Search for responsive padding classes"_
|
|
62
|
+
|
|
63
|
+
## Tools
|
|
64
|
+
|
|
65
|
+
### `fetch_docs`
|
|
66
|
+
|
|
67
|
+
Download and index Tailwind CSS documentation. Run once, re-run with `force: true` to refresh.
|
|
68
|
+
|
|
69
|
+
| Parameter | Type | Default | Description |
|
|
70
|
+
| --------- | --------- | ------- | ---------------------------------- |
|
|
71
|
+
| `version` | `string` | `"v4"` | `"v3"` or `"v4"` |
|
|
72
|
+
| `force` | `boolean` | `false` | Re-download even if already cached |
|
|
73
|
+
|
|
74
|
+
### `search_docs`
|
|
75
|
+
|
|
76
|
+
Semantic + keyword hybrid search. Returns relevant snippets with code examples and deep links.
|
|
77
|
+
|
|
78
|
+
| Parameter | Type | Default | Description |
|
|
79
|
+
| --------- | -------- | ------- | ----------------------------- |
|
|
80
|
+
| `query` | `string` | — | Natural language search query |
|
|
81
|
+
| `version` | `string` | `"v4"` | `"v3"` or `"v4"` |
|
|
82
|
+
| `limit` | `number` | `5` | Results to return (1–20) |
|
|
83
|
+
|
|
84
|
+
### `list_utilities`
|
|
85
|
+
|
|
86
|
+
Browse all utility categories (Layout, Spacing, Typography, etc.).
|
|
87
|
+
|
|
88
|
+
| Parameter | Type | Default | Description |
|
|
89
|
+
| ---------- | -------- | ------- | ----------------------- |
|
|
90
|
+
| `category` | `string` | — | Filter by category name |
|
|
91
|
+
| `version` | `string` | `"v4"` | `"v3"` or `"v4"` |
|
|
92
|
+
|
|
93
|
+
### `check_status`
|
|
94
|
+
|
|
95
|
+
Check index state — doc counts, embedding model status, last indexed time.
|
|
96
|
+
|
|
97
|
+
| Parameter | Type | Default | Description |
|
|
98
|
+
| --------- | -------- | ------- | --------------------------------------- |
|
|
99
|
+
| `version` | `string` | — | Check specific version, or omit for all |
|
|
100
|
+
|
|
101
|
+
## How It Works
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
GitHub → Fetch MDX → Parse → Chunk by heading → Embed (ONNX) → SQLite
|
|
105
|
+
↓
|
|
106
|
+
search_docs → Hybrid Search
|
|
107
|
+
Semantic + FTS5
|
|
108
|
+
Rank Fusion
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
- **Hybrid search** — cosine similarity on embeddings + FTS5 keyword match, fused via reciprocal rank fusion. Semantic handles _"how to center a div"_, keywords handle exact classes like `grid-cols-3`.
|
|
112
|
+
- **Incremental re-indexing** — SHA-256 content hashing per chunk. Unchanged content skips re-embedding.
|
|
113
|
+
- **In-process embeddings** — `snowflake-arctic-embed-xs` (22M params, 384 dims) runs locally via ONNX. No external calls.
|
|
114
|
+
- **Auto model management** — the embedding model (~27 MB) downloads on first boot and caches locally. Server starts immediately; model loads in background.
|
|
115
|
+
|
|
116
|
+
## Configuration
|
|
117
|
+
|
|
118
|
+
| Variable | Default | Description |
|
|
119
|
+
| ----------------------------------- | ------------------------- | ------------------------------------- |
|
|
120
|
+
| `TAILWIND_DOCS_MCP_DEFAULT_VERSION` | `v4` | Set to `v3` for Tailwind 3.x projects |
|
|
121
|
+
| `TAILWIND_DOCS_MCP_PATH` | `~/.tailwindcss-docs-mcp` | Data directory |
|
|
122
|
+
|
|
123
|
+
<details>
|
|
124
|
+
<summary>Using with Tailwind CSS v3</summary>
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"mcpServers": {
|
|
129
|
+
"tailwindcss-docs-mcp": {
|
|
130
|
+
"command": "npx",
|
|
131
|
+
"args": ["-y", "tailwindcss-docs-mcp"],
|
|
132
|
+
"env": {
|
|
133
|
+
"TAILWIND_DOCS_MCP_DEFAULT_VERSION": "v3"
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
</details>
|
|
141
|
+
|
|
142
|
+
## Development
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
git clone https://github.com/vitalis/tailwindcss-docs-mcp.git
|
|
146
|
+
cd tailwindcss-docs-mcp
|
|
147
|
+
bun install
|
|
148
|
+
bun run test # Vitest
|
|
149
|
+
bun run build # TypeScript
|
|
150
|
+
bunx biome check src/ test/ # Lint
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## Acknowledgments
|
|
154
|
+
|
|
155
|
+
Inspired by [HexDocs MCP](https://github.com/bradleygolden/hexdocs-mcp), which does the same for Elixir/Hex documentation.
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
[MIT](LICENSE)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { Embedder } from "./pipeline/embedder.js";
|
|
2
|
+
import type { Database } from "./storage/database.js";
|
|
3
|
+
import type { Config } from "./utils/config.js";
|
|
4
|
+
/**
|
|
5
|
+
* Callbacks for auto-index lifecycle events.
|
|
6
|
+
*/
|
|
7
|
+
export interface AutoIndexCallbacks {
|
|
8
|
+
onStart: () => void;
|
|
9
|
+
onComplete: () => void;
|
|
10
|
+
onError: (error: unknown) => void;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Auto-index the default Tailwind CSS version if not already indexed.
|
|
14
|
+
*
|
|
15
|
+
* Checks the database for an existing index. If found, calls onComplete
|
|
16
|
+
* immediately. Otherwise, triggers handleFetchDocs in the background.
|
|
17
|
+
*
|
|
18
|
+
* Reuses handleFetchDocs which has its own mutex for concurrent-call safety.
|
|
19
|
+
*/
|
|
20
|
+
export declare function maybeAutoIndex(config: Config, db: Database, embedder: Embedder, callbacks: AutoIndexCallbacks): Promise<void>;
|
|
21
|
+
//# sourceMappingURL=auto-index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auto-index.d.ts","sourceRoot":"","sources":["../src/auto-index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AACvD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,IAAI,CAAC;IACpB,UAAU,EAAE,MAAM,IAAI,CAAC;IACvB,OAAO,EAAE,CAAC,KAAK,EAAE,OAAO,KAAK,IAAI,CAAC;CACnC;AAED;;;;;;;GAOG;AACH,wBAAsB,cAAc,CAClC,MAAM,EAAE,MAAM,EACd,EAAE,EAAE,QAAQ,EACZ,QAAQ,EAAE,QAAQ,EAClB,SAAS,EAAE,kBAAkB,GAC5B,OAAO,CAAC,IAAI,CAAC,CAkBf"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { handleFetchDocs } from "./tools/fetch-docs.js";
|
|
2
|
+
/**
|
|
3
|
+
* Auto-index the default Tailwind CSS version if not already indexed.
|
|
4
|
+
*
|
|
5
|
+
* Checks the database for an existing index. If found, calls onComplete
|
|
6
|
+
* immediately. Otherwise, triggers handleFetchDocs in the background.
|
|
7
|
+
*
|
|
8
|
+
* Reuses handleFetchDocs which has its own mutex for concurrent-call safety.
|
|
9
|
+
*/
|
|
10
|
+
export async function maybeAutoIndex(config, db, embedder, callbacks) {
|
|
11
|
+
const version = config.defaultVersion;
|
|
12
|
+
// Skip if already indexed
|
|
13
|
+
const status = db.getIndexStatus(version);
|
|
14
|
+
if (status.length > 0) {
|
|
15
|
+
callbacks.onComplete();
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
callbacks.onStart();
|
|
19
|
+
try {
|
|
20
|
+
await handleFetchDocs({ version }, config, db, embedder);
|
|
21
|
+
callbacks.onComplete();
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
callbacks.onError(error);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=auto-index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"auto-index.js","sourceRoot":"","sources":["../src/auto-index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAYxD;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,EAAY,EACZ,QAAkB,EAClB,SAA6B;IAE7B,MAAM,OAAO,GAAG,MAAM,CAAC,cAAc,CAAC;IAEtC,0BAA0B;IAC1B,MAAM,MAAM,GAAG,EAAE,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,SAAS,CAAC,UAAU,EAAE,CAAC;QACvB,OAAO;IACT,CAAC;IAED,SAAS,CAAC,OAAO,EAAE,CAAC;IAEpB,IAAI,CAAC;QACH,MAAM,eAAe,CAAC,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,CAAC,CAAC;QACzD,SAAS,CAAC,UAAU,EAAE,CAAC;IACzB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* tailwindcss-docs-mcp — MCP server entry point.
|
|
4
|
+
*
|
|
5
|
+
* Local semantic search for Tailwind CSS documentation
|
|
6
|
+
* via Model Context Protocol (MCP).
|
|
7
|
+
*
|
|
8
|
+
* Boot workflow:
|
|
9
|
+
* 1. Start MCP server immediately (database only, no embedder)
|
|
10
|
+
* 2. Try to load embedding model from cache (no network, fast)
|
|
11
|
+
* - Success: server is fully operational, check for updates in background
|
|
12
|
+
* - Failure: download model in background, server tools fail until ready
|
|
13
|
+
* 3. Background update/download completes — embedder is hot-swapped immediately
|
|
14
|
+
*/
|
|
15
|
+
export {};
|
|
16
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* tailwindcss-docs-mcp — MCP server entry point.
|
|
4
|
+
*
|
|
5
|
+
* Local semantic search for Tailwind CSS documentation
|
|
6
|
+
* via Model Context Protocol (MCP).
|
|
7
|
+
*
|
|
8
|
+
* Boot workflow:
|
|
9
|
+
* 1. Start MCP server immediately (database only, no embedder)
|
|
10
|
+
* 2. Try to load embedding model from cache (no network, fast)
|
|
11
|
+
* - Success: server is fully operational, check for updates in background
|
|
12
|
+
* - Failure: download model in background, server tools fail until ready
|
|
13
|
+
* 3. Background update/download completes — embedder is hot-swapped immediately
|
|
14
|
+
*/
|
|
15
|
+
import { mkdirSync } from "node:fs";
|
|
16
|
+
import { createEmbedder, loadCachedEmbedder } from "./pipeline/embedder.js";
|
|
17
|
+
import { createServer } from "./server.js";
|
|
18
|
+
import { createDatabase } from "./storage/database.js";
|
|
19
|
+
import { loadConfig } from "./utils/config.js";
|
|
20
|
+
async function main() {
|
|
21
|
+
const config = loadConfig();
|
|
22
|
+
// Ensure data directory exists
|
|
23
|
+
mkdirSync(config.dataDir, { recursive: true });
|
|
24
|
+
// Initialize database
|
|
25
|
+
const db = await createDatabase(config);
|
|
26
|
+
// Ensure database is closed on process exit (WAL checkpoint, release locks)
|
|
27
|
+
process.on("exit", () => {
|
|
28
|
+
db.close();
|
|
29
|
+
});
|
|
30
|
+
// Start MCP server immediately (embedder loads in background)
|
|
31
|
+
const server = await createServer({ config, db, embedder: null });
|
|
32
|
+
// Phase 1: Try local cache (fast, no network)
|
|
33
|
+
const cached = await loadCachedEmbedder(config);
|
|
34
|
+
if (cached) {
|
|
35
|
+
// Model loaded from cache — server is fully operational
|
|
36
|
+
server.setEmbedder(cached);
|
|
37
|
+
console.error("[tailwindcss-docs-mcp] Embedding model loaded from cache.");
|
|
38
|
+
// Phase 2: Check for model updates in background, hot-swap if updated
|
|
39
|
+
createEmbedder(config)
|
|
40
|
+
.then((updated) => {
|
|
41
|
+
server.setEmbedder(updated);
|
|
42
|
+
console.error("[tailwindcss-docs-mcp] Model update check complete.");
|
|
43
|
+
})
|
|
44
|
+
.catch((error) => {
|
|
45
|
+
console.error("[tailwindcss-docs-mcp] Model update check failed:", error);
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
// Model not cached — download in background
|
|
50
|
+
server.setEmbedderStatus("downloading");
|
|
51
|
+
console.error("[tailwindcss-docs-mcp] Embedding model not cached. Downloading in background...");
|
|
52
|
+
createEmbedder(config)
|
|
53
|
+
.then((embedder) => {
|
|
54
|
+
server.setEmbedder(embedder);
|
|
55
|
+
console.error("[tailwindcss-docs-mcp] Embedding model downloaded and ready.");
|
|
56
|
+
})
|
|
57
|
+
.catch((error) => {
|
|
58
|
+
server.setEmbedderStatus("failed");
|
|
59
|
+
console.error("[tailwindcss-docs-mcp] Failed to download embedding model:", error);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
main().catch((error) => {
|
|
64
|
+
console.error("Fatal error:", error);
|
|
65
|
+
process.exit(1);
|
|
66
|
+
});
|
|
67
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AACpC,OAAO,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,+BAA+B;IAC/B,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE/C,sBAAsB;IACtB,MAAM,EAAE,GAAG,MAAM,cAAc,CAAC,MAAM,CAAC,CAAC;IAExC,4EAA4E;IAC5E,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC,CAAC,CAAC;IAEH,8DAA8D;IAC9D,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAElE,8CAA8C;IAC9C,MAAM,MAAM,GAAG,MAAM,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAEhD,IAAI,MAAM,EAAE,CAAC;QACX,wDAAwD;QACxD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,CAAC,KAAK,CAAC,2DAA2D,CAAC,CAAC;QAE3E,sEAAsE;QACtE,cAAc,CAAC,MAAM,CAAC;aACnB,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE;YAChB,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAC5B,OAAO,CAAC,KAAK,CAAC,qDAAqD,CAAC,CAAC;QACvE,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,KAAc,EAAE,EAAE;YACxB,OAAO,CAAC,KAAK,CAAC,mDAAmD,EAAE,KAAK,CAAC,CAAC;QAC5E,CAAC,CAAC,CAAC;IACP,CAAC;SAAM,CAAC;QACN,4CAA4C;QAC5C,MAAM,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;QACxC,OAAO,CAAC,KAAK,CACX,iFAAiF,CAClF,CAAC;QACF,cAAc,CAAC,MAAM,CAAC;aACnB,IAAI,CAAC,CAAC,QAAQ,EAAE,EAAE;YACjB,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;YAC7B,OAAO,CAAC,KAAK,CAAC,8DAA8D,CAAC,CAAC;QAChF,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,KAAc,EAAE,EAAE;YACxB,MAAM,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;YACnC,OAAO,CAAC,KAAK,CAAC,4DAA4D,EAAE,KAAK,CAAC,CAAC;QACrF,CAAC,CAAC,CAAC;IACP,CAAC;AACH,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAc,EAAE,EAAE;IAC9B,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import type { CleanDocument } from "./parser.js";
|
|
2
|
+
/**
|
|
3
|
+
* A chunk of documentation content with metadata for embedding and retrieval.
|
|
4
|
+
*/
|
|
5
|
+
export interface Chunk {
|
|
6
|
+
/** SHA-256 hash of content for deduplication and incremental re-indexing */
|
|
7
|
+
id: string;
|
|
8
|
+
/** Parent document slug (e.g., "padding") */
|
|
9
|
+
docSlug: string;
|
|
10
|
+
/** Heading breadcrumb (e.g., "## Basic usage > ### Padding a single side") */
|
|
11
|
+
heading: string;
|
|
12
|
+
/** Chunk text content (clean markdown) */
|
|
13
|
+
content: string;
|
|
14
|
+
/** Deep link URL (e.g., "https://tailwindcss.com/docs/padding#basic-usage") */
|
|
15
|
+
url: string;
|
|
16
|
+
/** Tailwind CSS version */
|
|
17
|
+
version: string;
|
|
18
|
+
/** Approximate token count for context window awareness */
|
|
19
|
+
tokenCount: number;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Options for the chunking process.
|
|
23
|
+
*/
|
|
24
|
+
export interface ChunkOptions {
|
|
25
|
+
/** Maximum tokens per chunk (default: 500) */
|
|
26
|
+
maxTokens?: number;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Split a clean document into semantically meaningful chunks.
|
|
30
|
+
*
|
|
31
|
+
* Chunking rules:
|
|
32
|
+
* 1. Split on `##` headings — each `##` section becomes one or more chunks
|
|
33
|
+
* 2. Max chunk size: ~500 tokens (configurable)
|
|
34
|
+
* 3. Code blocks stay intact — never split a code block across chunks
|
|
35
|
+
* 4. Heading breadcrumb: include heading hierarchy in each chunk
|
|
36
|
+
* 5. Overlap: include parent `##` heading text in sub-chunks
|
|
37
|
+
*/
|
|
38
|
+
export declare function chunkDocument(doc: CleanDocument, options?: ChunkOptions): Chunk[];
|
|
39
|
+
/**
|
|
40
|
+
* Generate a SHA-256 content hash for a chunk.
|
|
41
|
+
*
|
|
42
|
+
* Used for incremental re-indexing: unchanged content produces
|
|
43
|
+
* the same hash, so we can skip re-embedding.
|
|
44
|
+
*/
|
|
45
|
+
export declare function contentHash(content: string): string;
|
|
46
|
+
/**
|
|
47
|
+
* Estimate the token count for a string.
|
|
48
|
+
*
|
|
49
|
+
* Uses a simple heuristic: ~4 characters per token for English text.
|
|
50
|
+
* This is intentionally approximate — exact tokenization is model-dependent
|
|
51
|
+
* and not worth the overhead for chunk size estimation.
|
|
52
|
+
*/
|
|
53
|
+
export declare function estimateTokens(text: string): number;
|
|
54
|
+
/**
|
|
55
|
+
* Convert a heading string into a URL-safe anchor fragment.
|
|
56
|
+
*
|
|
57
|
+
* "## Basic usage" -> "basic-usage"
|
|
58
|
+
* "### Adding horizontal padding" -> "adding-horizontal-padding"
|
|
59
|
+
*/
|
|
60
|
+
export declare function headingToAnchor(heading: string): string;
|
|
61
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/pipeline/chunker.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,4EAA4E;IAC5E,EAAE,EAAE,MAAM,CAAC;IACX,6CAA6C;IAC7C,OAAO,EAAE,MAAM,CAAC;IAChB,8EAA8E;IAC9E,OAAO,EAAE,MAAM,CAAC;IAChB,0CAA0C;IAC1C,OAAO,EAAE,MAAM,CAAC;IAChB,+EAA+E;IAC/E,GAAG,EAAE,MAAM,CAAC;IACZ,2BAA2B;IAC3B,OAAO,EAAE,MAAM,CAAC;IAChB,2DAA2D;IAC3D,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8CAA8C;IAC9C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAKD;;;;;;;;;GASG;AACH,wBAAgB,aAAa,CAAC,GAAG,EAAE,aAAa,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,KAAK,EAAE,CAcjF;AA2ID;;;;;GAKG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAQvD"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
/** Default maximum tokens per chunk */
|
|
3
|
+
const DEFAULT_MAX_TOKENS = 500;
|
|
4
|
+
/**
|
|
5
|
+
* Split a clean document into semantically meaningful chunks.
|
|
6
|
+
*
|
|
7
|
+
* Chunking rules:
|
|
8
|
+
* 1. Split on `##` headings — each `##` section becomes one or more chunks
|
|
9
|
+
* 2. Max chunk size: ~500 tokens (configurable)
|
|
10
|
+
* 3. Code blocks stay intact — never split a code block across chunks
|
|
11
|
+
* 4. Heading breadcrumb: include heading hierarchy in each chunk
|
|
12
|
+
* 5. Overlap: include parent `##` heading text in sub-chunks
|
|
13
|
+
*/
|
|
14
|
+
export function chunkDocument(doc, options) {
|
|
15
|
+
const maxTokens = options?.maxTokens ?? DEFAULT_MAX_TOKENS;
|
|
16
|
+
const content = doc.content.trim();
|
|
17
|
+
if (!content)
|
|
18
|
+
return [];
|
|
19
|
+
const sections = splitOnHeadingLevel(content, 2);
|
|
20
|
+
const chunks = [];
|
|
21
|
+
for (const section of sections) {
|
|
22
|
+
chunkSection(doc, section, maxTokens, chunks);
|
|
23
|
+
}
|
|
24
|
+
return chunks;
|
|
25
|
+
}
|
|
26
|
+
/** Process a single ## section, splitting further if needed. */
|
|
27
|
+
function chunkSection(doc, section, maxTokens, out) {
|
|
28
|
+
const h2Heading = extractHeading(section, 2);
|
|
29
|
+
if (estimateTokens(section) <= maxTokens) {
|
|
30
|
+
out.push(buildChunk(doc, h2Heading || "", section.trim(), h2Heading || ""));
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
const sectionBody = h2Heading ? section.slice(section.indexOf("\n") + 1).trim() : section;
|
|
34
|
+
const subsections = splitOnHeadingLevel(sectionBody, 3);
|
|
35
|
+
for (const sub of subsections) {
|
|
36
|
+
chunkSubsection(doc, sub, h2Heading, maxTokens, out);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/** Process a single ### subsection within a ## section. */
|
|
40
|
+
function chunkSubsection(doc, sub, h2Heading, maxTokens, out) {
|
|
41
|
+
const h3Heading = extractHeading(sub, 3);
|
|
42
|
+
const breadcrumb = [h2Heading, h3Heading].filter(Boolean).join(" > ");
|
|
43
|
+
const chunkContent = h2Heading ? `${h2Heading}\n\n${sub.trim()}` : sub.trim();
|
|
44
|
+
if (estimateTokens(chunkContent) <= maxTokens) {
|
|
45
|
+
out.push(buildChunk(doc, breadcrumb, chunkContent, breadcrumb));
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
const parts = splitByParagraphs(sub, maxTokens);
|
|
49
|
+
for (const part of parts) {
|
|
50
|
+
const partContent = h2Heading ? `${h2Heading}\n\n${part.trim()}` : part.trim();
|
|
51
|
+
out.push(buildChunk(doc, breadcrumb, partContent, breadcrumb));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
function buildChunk(doc, breadcrumb, content, headingForAnchor) {
|
|
55
|
+
const lastHeading = headingForAnchor.split(" > ").pop() || "";
|
|
56
|
+
const anchor = headingToAnchor(lastHeading);
|
|
57
|
+
const url = anchor ? `${doc.url}#${anchor}` : doc.url;
|
|
58
|
+
return {
|
|
59
|
+
id: contentHash(content),
|
|
60
|
+
docSlug: doc.slug,
|
|
61
|
+
heading: breadcrumb,
|
|
62
|
+
content,
|
|
63
|
+
url,
|
|
64
|
+
version: doc.version,
|
|
65
|
+
tokenCount: estimateTokens(content),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
function splitOnHeadingLevel(content, level) {
|
|
69
|
+
const prefix = "#".repeat(level);
|
|
70
|
+
const regex = new RegExp(`(?=^${prefix} )`, "gm");
|
|
71
|
+
return content.split(regex).filter((s) => s.trim());
|
|
72
|
+
}
|
|
73
|
+
/** Check if adding a piece to the accumulator would exceed the token limit. */
|
|
74
|
+
function wouldExceedLimit(current, addition, maxTokens) {
|
|
75
|
+
return estimateTokens(`${current}\n\n${addition}`) > maxTokens;
|
|
76
|
+
}
|
|
77
|
+
/** Append a piece to the accumulator, or start fresh. */
|
|
78
|
+
function appendOrStart(current, piece) {
|
|
79
|
+
return current ? `${current}\n\n${piece}` : piece;
|
|
80
|
+
}
|
|
81
|
+
/** Split content into paragraph-sized parts, never breaking code blocks. */
|
|
82
|
+
function splitByParagraphs(content, maxTokens) {
|
|
83
|
+
const segments = content.split(/(```[\s\S]*?```)/);
|
|
84
|
+
const parts = [];
|
|
85
|
+
let current = "";
|
|
86
|
+
for (const segment of segments) {
|
|
87
|
+
if (segment.startsWith("```")) {
|
|
88
|
+
current = processCodeBlock(segment, current, maxTokens, parts);
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
current = processTextBlock(segment, current, maxTokens, parts);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (current.trim())
|
|
95
|
+
parts.push(current.trim());
|
|
96
|
+
return parts;
|
|
97
|
+
}
|
|
98
|
+
function processCodeBlock(block, current, maxTokens, parts) {
|
|
99
|
+
if (!wouldExceedLimit(current, block, maxTokens)) {
|
|
100
|
+
return appendOrStart(current, block);
|
|
101
|
+
}
|
|
102
|
+
if (current.trim())
|
|
103
|
+
parts.push(current.trim());
|
|
104
|
+
return block;
|
|
105
|
+
}
|
|
106
|
+
function processTextBlock(block, current, maxTokens, parts) {
|
|
107
|
+
const paragraphs = block.split(/\n\n+/);
|
|
108
|
+
let acc = current;
|
|
109
|
+
for (const para of paragraphs) {
|
|
110
|
+
if (!para.trim())
|
|
111
|
+
continue;
|
|
112
|
+
if (!wouldExceedLimit(acc, para, maxTokens)) {
|
|
113
|
+
acc = appendOrStart(acc, para);
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
if (acc.trim())
|
|
117
|
+
parts.push(acc.trim());
|
|
118
|
+
acc = para;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return acc;
|
|
122
|
+
}
|
|
123
|
+
function extractHeading(text, level) {
|
|
124
|
+
const prefix = "#".repeat(level);
|
|
125
|
+
const firstLine = text.split("\n")[0].trim();
|
|
126
|
+
return firstLine.startsWith(`${prefix} `) ? firstLine : null;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Generate a SHA-256 content hash for a chunk.
|
|
130
|
+
*
|
|
131
|
+
* Used for incremental re-indexing: unchanged content produces
|
|
132
|
+
* the same hash, so we can skip re-embedding.
|
|
133
|
+
*/
|
|
134
|
+
export function contentHash(content) {
|
|
135
|
+
return createHash("sha256").update(content).digest("hex");
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Estimate the token count for a string.
|
|
139
|
+
*
|
|
140
|
+
* Uses a simple heuristic: ~4 characters per token for English text.
|
|
141
|
+
* This is intentionally approximate — exact tokenization is model-dependent
|
|
142
|
+
* and not worth the overhead for chunk size estimation.
|
|
143
|
+
*/
|
|
144
|
+
export function estimateTokens(text) {
|
|
145
|
+
return Math.ceil(text.length / 4);
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Convert a heading string into a URL-safe anchor fragment.
|
|
149
|
+
*
|
|
150
|
+
* "## Basic usage" -> "basic-usage"
|
|
151
|
+
* "### Adding horizontal padding" -> "adding-horizontal-padding"
|
|
152
|
+
*/
|
|
153
|
+
export function headingToAnchor(heading) {
|
|
154
|
+
return heading
|
|
155
|
+
.replace(/^#+\s*/, "")
|
|
156
|
+
.toLowerCase()
|
|
157
|
+
.replace(/[^a-z0-9\s-]/g, "")
|
|
158
|
+
.replace(/\s+/g, "-")
|
|
159
|
+
.replace(/-+/g, "-")
|
|
160
|
+
.replace(/^-|-$/g, "");
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/pipeline/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA+BzC,uCAAuC;AACvC,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAE/B;;;;;;;;;GASG;AACH,MAAM,UAAU,aAAa,CAAC,GAAkB,EAAE,OAAsB;IACtE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,kBAAkB,CAAC;IAC3D,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IAEnC,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,YAAY,CAAC,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,gEAAgE;AAChE,SAAS,YAAY,CAAC,GAAkB,EAAE,OAAe,EAAE,SAAiB,EAAE,GAAY;IACxF,MAAM,SAAS,GAAG,cAAc,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAE7C,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,SAAS,EAAE,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,SAAS,IAAI,EAAE,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC,CAAC;QAC5E,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAC1F,MAAM,WAAW,GAAG,mBAAmB,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAExD,KAAK,MAAM,GAAG,IAAI,WAAW,EAAE,CAAC;QAC9B,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,SAAS,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED,2DAA2D;AAC3D,SAAS,eAAe,CACtB,GAAkB,EAClB,GAAW,EACX,SAAwB,EACxB,SAAiB,EACjB,GAAY;IAEZ,MAAM,SAAS,GAAG,cAAc,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;IACzC,MAAM,UAAU,GAAG,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtE,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,OAAO,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IAE9E,IAAI,cAAc,CAAC,YAAY,CAAC,IAAI,SAAS,EAAE,CAAC;QAC9C,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,UAAU,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC,CAAC;QAChE,OAAO;IACT,CAAC;IAED,MAAM,KAAK,GAAG,iBAAiB,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IAChD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,OAAO,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAC/E,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,EAAE,UAAU,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC,CAAC;IACjE,CAAC;AACH,CAAC;AAED,SAAS,UAAU,CACjB,GAAkB,EAClB,UAAkB,EAClB,OAAe,EACf,gBAAwB;IAExB,MAAM,WAAW,GAAG,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC;IAEtD,OAAO;QACL,EAAE,EAAE,WAAW,CAAC,OAAO,CAAC;QACxB,OAAO,EAAE,GAAG,CAAC,IAAI;QACjB,OAAO,EAAE,UAAU;QACnB,OAAO;QACP,GAAG;QACH,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,UAAU,EAAE,cAAc,CAAC,OAAO,CAAC;KACpC,CAAC;AACJ,CAAC;AAED,SAAS,mBAAmB,CAAC,OAAe,EAAE,KAAa;IACzD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,MAAM,IAAI,EAAE,IAAI,CAAC,CAAC;IAClD,OAAO,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;AACtD,CAAC;AAED,+EAA+E;AAC/E,SAAS,gBAAgB,CAAC,OAAe,EAAE,QAAgB,EAAE,SAAiB;IAC5E,OAAO,cAAc,CAAC,GAAG,OAAO,OAAO,QAAQ,EAAE,CAAC,GAAG,SAAS,CAAC;AACjE,CAAC;AAED,yDAAyD;AACzD,SAAS,aAAa,CAAC,OAAe,EAAE,KAAa;IACnD,OAAO,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AACpD,CAAC;AAED,4EAA4E;AAC5E,SAAS,iBAAiB,CAAC,OAAe,EAAE,SAAiB;IAC3D,MAAM,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IACnD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,GAAG,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,gBAAgB,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAE/C,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,gBAAgB,CACvB,KAAa,EACb,OAAe,EACf,SAAiB,EACjB,KAAe;IAEf,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE,KAAK,EAAE,SAAS,CAAC,EAAE,CAAC;QACjD,OAAO,aAAa,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IACvC,CAAC;IACD,IAAI,OAAO,CAAC,IAAI,EAAE;QAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IAC/C,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,gBAAgB,CACvB,KAAa,EACb,OAAe,EACf,SAAiB,EACjB,KAAe;IAEf,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACxC,IAAI,GAAG,GAAG,OAAO,CAAC;IAElB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAS;QAC3B,IAAI,CAAC,gBAAgB,CAAC,GAAG,EAAE,IAAI,EAAE,SAAS,CAAC,EAAE,CAAC;YAC5C,GAAG,GAAG,aAAa,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QACjC,CAAC;aAAM,CAAC;YACN,IAAI,GAAG,CAAC,IAAI,EAAE;gBAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;YACvC,GAAG,GAAG,IAAI,CAAC;QACb,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa;IACjD,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IACjC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7C,OAAO,SAAS,CAAC,UAAU,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC;AAC/D,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe;IAC7C,OAAO,OAAO;SACX,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;SACrB,WAAW,EAAE;SACb,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC;SACnB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;AAC3B,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { Config } from "../utils/config.js";
|
|
2
|
+
/**
|
|
3
|
+
* Options for embedding operations.
|
|
4
|
+
*/
|
|
5
|
+
export interface EmbedOptions {
|
|
6
|
+
/** Whether to prepend the query prefix (for search queries, not documents) */
|
|
7
|
+
isQuery: boolean;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* The embedder instance, initialized with the ONNX model.
|
|
11
|
+
*/
|
|
12
|
+
export interface Embedder {
|
|
13
|
+
/** Embed a single text string */
|
|
14
|
+
embed(text: string, options?: EmbedOptions): Promise<Float32Array>;
|
|
15
|
+
/** Embed multiple texts in a batch */
|
|
16
|
+
embedBatch(texts: string[], options?: EmbedOptions): Promise<Float32Array[]>;
|
|
17
|
+
/** Check if the model is loaded */
|
|
18
|
+
isReady(): boolean;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Build the embedding input string for a chunk.
|
|
22
|
+
*
|
|
23
|
+
* Prepends metadata to improve retrieval quality:
|
|
24
|
+
* - Library context ("Tailwind CSS: {docTitle}")
|
|
25
|
+
* - Section context (heading breadcrumb)
|
|
26
|
+
* - Actual content
|
|
27
|
+
*/
|
|
28
|
+
export declare function buildEmbeddingInput(docTitle: string, heading: string, content: string): string;
|
|
29
|
+
/**
|
|
30
|
+
* Normalize a vector to unit length (L2 normalization).
|
|
31
|
+
*/
|
|
32
|
+
export declare function normalize(vector: Float32Array): Float32Array;
|
|
33
|
+
/**
|
|
34
|
+
* Try to load the embedder from local cache only (no network).
|
|
35
|
+
* Returns null if the model is not cached or cache is corrupted.
|
|
36
|
+
*
|
|
37
|
+
* CONCURRENCY: This function and `createEmbedder` both mutate the shared
|
|
38
|
+
* `env` singleton from @huggingface/transformers. They must NOT be called
|
|
39
|
+
* concurrently — always await one before starting the other.
|
|
40
|
+
*/
|
|
41
|
+
export declare function loadCachedEmbedder(config: Config): Promise<Embedder | null>;
|
|
42
|
+
/**
|
|
43
|
+
* Create an embedder, downloading the model if needed.
|
|
44
|
+
*
|
|
45
|
+
* The model (snowflake-arctic-embed-xs) is downloaded on first use and cached locally.
|
|
46
|
+
* Subsequent calls load from cache (~27 MB model file).
|
|
47
|
+
*
|
|
48
|
+
* For search queries, the model requires prepending a specific prefix:
|
|
49
|
+
* "Represent this sentence for searching relevant passages: "
|
|
50
|
+
* This is handled automatically when `isQuery: true` is passed to embed().
|
|
51
|
+
*
|
|
52
|
+
* CONCURRENCY: This function and `loadCachedEmbedder` both mutate the shared
|
|
53
|
+
* `env` singleton from @huggingface/transformers. They must NOT be called
|
|
54
|
+
* concurrently — always await one before starting the other.
|
|
55
|
+
*/
|
|
56
|
+
export declare function createEmbedder(config: Config): Promise<Embedder>;
|
|
57
|
+
//# sourceMappingURL=embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../src/pipeline/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,8EAA8E;IAC9E,OAAO,EAAE,OAAO,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,iCAAiC;IACjC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IACnE,sCAAsC;IACtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;IAC7E,mCAAmC;IACnC,OAAO,IAAI,OAAO,CAAC;CACpB;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAE9F;AAED;;GAEG;AACH,wBAAgB,SAAS,CAAC,MAAM,EAAE,YAAY,GAAG,YAAY,CAa5D;AAgDD;;;;;;;GAOG;AACH,wBAAsB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CAgBjF;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAsB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,CAAC,CAQtE"}
|