mcp-docs-scraper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +357 -0
  3. package/dist/index.d.ts +3 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +20 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/server.d.ts +6 -0
  8. package/dist/server.d.ts.map +1 -0
  9. package/dist/server.js +231 -0
  10. package/dist/server.js.map +1 -0
  11. package/dist/services/cache-manager.d.ts +100 -0
  12. package/dist/services/cache-manager.d.ts.map +1 -0
  13. package/dist/services/cache-manager.js +212 -0
  14. package/dist/services/cache-manager.js.map +1 -0
  15. package/dist/services/content-cleaner.d.ts +48 -0
  16. package/dist/services/content-cleaner.d.ts.map +1 -0
  17. package/dist/services/content-cleaner.js +295 -0
  18. package/dist/services/content-cleaner.js.map +1 -0
  19. package/dist/services/github-detector.d.ts +49 -0
  20. package/dist/services/github-detector.d.ts.map +1 -0
  21. package/dist/services/github-detector.js +276 -0
  22. package/dist/services/github-detector.js.map +1 -0
  23. package/dist/services/github-fetcher.d.ts +94 -0
  24. package/dist/services/github-fetcher.d.ts.map +1 -0
  25. package/dist/services/github-fetcher.js +393 -0
  26. package/dist/services/github-fetcher.js.map +1 -0
  27. package/dist/services/search-index.d.ts +106 -0
  28. package/dist/services/search-index.d.ts.map +1 -0
  29. package/dist/services/search-index.js +210 -0
  30. package/dist/services/search-index.js.map +1 -0
  31. package/dist/services/web-scraper.d.ts +88 -0
  32. package/dist/services/web-scraper.d.ts.map +1 -0
  33. package/dist/services/web-scraper.js +244 -0
  34. package/dist/services/web-scraper.js.map +1 -0
  35. package/dist/tools/clear-cache.d.ts +24 -0
  36. package/dist/tools/clear-cache.d.ts.map +1 -0
  37. package/dist/tools/clear-cache.js +29 -0
  38. package/dist/tools/clear-cache.js.map +1 -0
  39. package/dist/tools/detect-github.d.ts +21 -0
  40. package/dist/tools/detect-github.d.ts.map +1 -0
  41. package/dist/tools/detect-github.js +18 -0
  42. package/dist/tools/detect-github.js.map +1 -0
  43. package/dist/tools/get-content.d.ts +43 -0
  44. package/dist/tools/get-content.d.ts.map +1 -0
  45. package/dist/tools/get-content.js +84 -0
  46. package/dist/tools/get-content.js.map +1 -0
  47. package/dist/tools/get-tree.d.ts +31 -0
  48. package/dist/tools/get-tree.d.ts.map +1 -0
  49. package/dist/tools/get-tree.js +102 -0
  50. package/dist/tools/get-tree.js.map +1 -0
  51. package/dist/tools/index-docs.d.ts +63 -0
  52. package/dist/tools/index-docs.d.ts.map +1 -0
  53. package/dist/tools/index-docs.js +371 -0
  54. package/dist/tools/index-docs.js.map +1 -0
  55. package/dist/tools/index.d.ts +11 -0
  56. package/dist/tools/index.d.ts.map +1 -0
  57. package/dist/tools/index.js +11 -0
  58. package/dist/tools/index.js.map +1 -0
  59. package/dist/tools/list-cached.d.ts +19 -0
  60. package/dist/tools/list-cached.d.ts.map +1 -0
  61. package/dist/tools/list-cached.js +20 -0
  62. package/dist/tools/list-cached.js.map +1 -0
  63. package/dist/tools/search-docs.d.ts +31 -0
  64. package/dist/tools/search-docs.d.ts.map +1 -0
  65. package/dist/tools/search-docs.js +64 -0
  66. package/dist/tools/search-docs.js.map +1 -0
  67. package/dist/types/cache.d.ts +53 -0
  68. package/dist/types/cache.d.ts.map +1 -0
  69. package/dist/types/cache.js +2 -0
  70. package/dist/types/cache.js.map +1 -0
  71. package/dist/types/errors.d.ts +102 -0
  72. package/dist/types/errors.d.ts.map +1 -0
  73. package/dist/types/errors.js +216 -0
  74. package/dist/types/errors.js.map +1 -0
  75. package/dist/types/index.d.ts +6 -0
  76. package/dist/types/index.d.ts.map +1 -0
  77. package/dist/types/index.js +5 -0
  78. package/dist/types/index.js.map +1 -0
  79. package/dist/utils/fs.d.ts +45 -0
  80. package/dist/utils/fs.d.ts.map +1 -0
  81. package/dist/utils/fs.js +113 -0
  82. package/dist/utils/fs.js.map +1 -0
  83. package/dist/utils/rate-limit.d.ts +55 -0
  84. package/dist/utils/rate-limit.d.ts.map +1 -0
  85. package/dist/utils/rate-limit.js +89 -0
  86. package/dist/utils/rate-limit.js.map +1 -0
  87. package/dist/utils/url.d.ts +69 -0
  88. package/dist/utils/url.d.ts.map +1 -0
  89. package/dist/utils/url.js +251 -0
  90. package/dist/utils/url.js.map +1 -0
  91. package/package.json +58 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,357 @@
1
+ # MCP Docs Scraper
2
+
3
+ An MCP (Model Context Protocol) server that gives coding agents fast, efficient access to documentation.
4
+
5
+ ## Features
6
+
7
+ - **GitHub-first fetching** - Pulls docs directly from GitHub repos when possible (cleaner, faster)
8
+ - **Smart web scraping fallback** - Crawls and cleans docs sites when no repo is available
9
+ - **Auto-detection** - Automatically detects GitHub repos from documentation URLs
10
+ - **Full-text search** - Search within cached documentation with snippets
11
+ - **Local caching** - No duplicate fetches, works offline after initial index
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ # Clone the repository
17
+ git clone https://github.com/kwiscion/mcp-docs-scraper.git
18
+ cd mcp-docs-scraper
19
+
20
+ # Install dependencies
21
+ pnpm install
22
+
23
+ # Build
24
+ pnpm build
25
+ ```
26
+
27
+ ## Configuration
28
+
29
+ ### Using npx (Recommended)
30
+
31
+ **Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS, `%APPDATA%\Claude\claude_desktop_config.json` on Windows):
32
+
33
+ ```json
34
+ {
35
+ "mcpServers": {
36
+ "docs-scraper": {
37
+ "command": "npx",
38
+ "args": ["-y", "mcp-docs-scraper"]
39
+ }
40
+ }
41
+ }
42
+ ```
43
+
44
+ **Cursor** (`.cursor/mcp.json`):
45
+
46
+ ```json
47
+ {
48
+ "mcpServers": {
49
+ "docs-scraper": {
50
+ "command": "npx",
51
+ "args": ["-y", "mcp-docs-scraper"]
52
+ }
53
+ }
54
+ }
55
+ ```
56
+
57
+ ### Using Local Installation
58
+
59
+ If you've cloned and built the repo locally:
60
+
61
+ ```json
62
+ {
63
+ "mcpServers": {
64
+ "docs-scraper": {
65
+ "command": "node",
66
+ "args": ["/absolute/path/to/mcp-docs-scraper/dist/index.js"]
67
+ }
68
+ }
69
+ }
70
+ ```
71
+
72
+ ### With GitHub Token (Optional)
73
+
74
+ For higher API rate limits (5000/hour vs 60/hour), set a GitHub token:
75
+
76
+ ```json
77
+ {
78
+ "mcpServers": {
79
+ "docs-scraper": {
80
+ "command": "npx",
81
+ "args": ["-y", "mcp-docs-scraper"],
82
+ "env": {
83
+ "GITHUB_TOKEN": "ghp_your_token_here"
84
+ }
85
+ }
86
+ }
87
+ }
88
+ ```
89
+
90
+ ## Available Tools
91
+
92
+ | Tool | Description |
93
+ | -------------------- | ------------------------------------------------ |
94
+ | `ping` | Health check - returns pong |
95
+ | `index_docs` | Fetch and cache documentation from GitHub or web |
96
+ | `get_docs_tree` | Get hierarchical structure of cached docs |
97
+ | `get_docs_content` | Retrieve content of specific doc files |
98
+ | `search_docs` | Full-text search within cached docs |
99
+ | `detect_github_repo` | Find GitHub repo from a docs website URL |
100
+ | `list_cached_docs` | List all cached documentation |
101
+ | `clear_cache` | Remove cached documentation |
102
+
103
+ ### `index_docs`
104
+
105
+ Fetch and cache documentation from a GitHub repository or website.
106
+
107
+ ```typescript
108
+ // Index from GitHub (auto-detected)
109
+ index_docs({ url: "https://github.com/colinhacks/zod" });
110
+
111
+ // Index from GitHub via docs site (auto-detection)
112
+ index_docs({ url: "https://zod.dev" });
113
+
114
+ // Force web scraping
115
+ index_docs({ url: "https://docs.example.com", type: "scrape", depth: 2 });
116
+
117
+ // Re-index (ignore cache)
118
+ index_docs({ url: "https://github.com/owner/repo", force_refresh: true });
119
+ ```
120
+
121
+ **Returns:**
122
+
123
+ ```json
124
+ {
125
+ "id": "colinhacks_zod",
126
+ "source": "github",
127
+ "repo": "colinhacks/zod",
128
+ "tree": [...],
129
+ "stats": {
130
+ "pages": 15,
131
+ "total_size_bytes": 245000,
132
+ "indexed_at": "2025-01-07T..."
133
+ }
134
+ }
135
+ ```
136
+
137
+ ### `get_docs_tree`
138
+
139
+ Get the file tree for cached documentation.
140
+
141
+ ```typescript
142
+ // Full tree
143
+ get_docs_tree({ docs_id: "colinhacks_zod" });
144
+
145
+ // Subtree only
146
+ get_docs_tree({ docs_id: "colinhacks_zod", path: "docs/", max_depth: 2 });
147
+ ```
148
+
149
+ ### `search_docs`
150
+
151
+ Full-text search within cached documentation.
152
+
153
+ ```typescript
154
+ search_docs({
155
+ docs_id: "colinhacks_zod",
156
+ query: "custom validation",
157
+ limit: 10,
158
+ });
159
+ ```
160
+
161
+ **Returns:**
162
+
163
+ ```json
164
+ {
165
+ "docs_id": "colinhacks_zod",
166
+ "query": "custom validation",
167
+ "results": [
168
+ {
169
+ "path": "README.md",
170
+ "title": "Zod",
171
+ "snippet": "...you can create custom validators using...",
172
+ "score": 12.5
173
+ }
174
+ ]
175
+ }
176
+ ```
177
+
178
+ ### `get_docs_content`
179
+
180
+ Retrieve actual content of specific files from cache.
181
+
182
+ ```typescript
183
+ get_docs_content({
184
+ docs_id: "colinhacks_zod",
185
+ paths: ["README.md", "docs/guide.md"],
186
+ });
187
+ ```
188
+
189
+ **Returns:**
190
+
191
+ ```json
192
+ {
193
+ "docs_id": "colinhacks_zod",
194
+ "contents": {
195
+ "README.md": {
196
+ "content": "# Zod\n\nTypeScript-first schema validation...",
197
+ "title": "Zod",
198
+ "headings": ["# Zod", "## Installation", "## Basic Usage"],
199
+ "size_bytes": 15234
200
+ }
201
+ },
202
+ "not_found": []
203
+ }
204
+ ```
205
+
206
+ ### `detect_github_repo`
207
+
208
+ Find GitHub repository from a documentation website URL.
209
+
210
+ ```typescript
211
+ detect_github_repo({ url: "https://zod.dev" });
212
+ ```
213
+
214
+ **Returns:**
215
+
216
+ ```json
217
+ {
218
+ "found": true,
219
+ "repo": "colinhacks/zod",
220
+ "confidence": "high",
221
+ "detection_method": "github_links"
222
+ }
223
+ ```
224
+
225
+ ### `list_cached_docs`
226
+
227
+ List all documentation sets in the local cache.
228
+
229
+ ```typescript
230
+ list_cached_docs();
231
+ ```
232
+
233
+ ### `clear_cache`
234
+
235
+ Remove cached documentation.
236
+
237
+ ```typescript
238
+ // Clear specific entry
239
+ clear_cache({ docs_id: "colinhacks_zod" });
240
+
241
+ // Clear all
242
+ clear_cache({ all: true });
243
+ ```
244
+
245
+ ## Usage Example
246
+
247
+ Here's a typical workflow for an AI coding agent:
248
+
249
+ 1. **Find the docs source:**
250
+
251
+ ```
252
+ detect_github_repo({ url: "https://zod.dev" })
253
+ → { found: true, repo: "colinhacks/zod" }
254
+ ```
255
+
256
+ 2. **Index the documentation:**
257
+
258
+ ```
259
+ index_docs({ url: "https://github.com/colinhacks/zod" })
260
+ → { id: "colinhacks_zod", tree: [...] }
261
+ ```
262
+
263
+ 3. **Browse the structure:**
264
+
265
+ ```
266
+ get_docs_tree({ docs_id: "colinhacks_zod" })
267
+ → Returns hierarchical file tree
268
+ ```
269
+
270
+ 4. **Search for specific topics:**
271
+
272
+ ```
273
+ search_docs({ docs_id: "colinhacks_zod", query: "transform" })
274
+ → Returns matching files with snippets
275
+ ```
276
+
277
+ 5. **Get the content you need:**
278
+ ```
279
+ get_docs_content({ docs_id: "colinhacks_zod", paths: ["README.md"] })
280
+ → Returns full markdown content
281
+ ```
282
+
283
+ ## Cache Location
284
+
285
+ Documentation is cached locally at:
286
+
287
+ - **macOS/Linux:** `~/.mcp-docs-cache/`
288
+ - **Windows:** `%USERPROFILE%\.mcp-docs-cache\`
289
+
290
+ Structure:
291
+
292
+ ```
293
+ .mcp-docs-cache/
294
+ ├── github/
295
+ │ └── owner_repo/
296
+ │ ├── meta.json
297
+ │ ├── search-index.json
298
+ │ └── content/
299
+ │ └── *.md
300
+ └── scraped/
301
+ └── domain_path/
302
+ ├── meta.json
303
+ ├── search-index.json
304
+ └── content/
305
+ └── *.md
306
+ ```
307
+
308
+ ## Troubleshooting
309
+
310
+ ### "GitHub API rate limit exceeded"
311
+
312
+ **Solution:** Set a `GITHUB_TOKEN` environment variable for higher rate limits (see Configuration).
313
+
314
+ ### "Documentation not found in cache"
315
+
316
+ **Solution:** Run `index_docs` first to fetch and cache the documentation.
317
+
318
+ ### "No content found" when scraping
319
+
320
+ **Possible causes:**
321
+
322
+ - The site blocks automated access
323
+ - The URL doesn't contain documentation content
324
+ - Try a more specific URL (e.g., `/docs` instead of homepage)
325
+
326
+ **Solution:** Try using `detect_github_repo` to find a GitHub source instead.
327
+
328
+ ### "Website blocked automated access"
329
+
330
+ **Solution:** The site's robots.txt or security settings prevent scraping. Try:
331
+
332
+ 1. Use `detect_github_repo` to find a GitHub alternative
333
+ 2. Try a different starting URL
334
+ 3. Use the GitHub source if available
335
+
336
+ ## Development
337
+
338
+ ```bash
339
+ # Install dependencies
340
+ pnpm install
341
+
342
+ # Build TypeScript
343
+ pnpm build
344
+
345
+ # Run the server
346
+ pnpm start
347
+
348
+ # Development mode (with tsx)
349
+ pnpm dev
350
+ ```
351
+
352
+ - Node.js 22.0.0 or higher
353
+ - pnpm (recommended) or npm
354
+
355
+ ## License
356
+
357
+ MIT License - see [LICENSE](LICENSE) file for details.
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env node
2
+ import { createServer } from "./server.js";
3
+ async function main() {
4
+ const server = createServer();
5
+ // Handle graceful shutdown
6
+ process.on("SIGINT", async () => {
7
+ await server.close();
8
+ process.exit(0);
9
+ });
10
+ process.on("SIGTERM", async () => {
11
+ await server.close();
12
+ process.exit(0);
13
+ });
14
+ await server.run();
15
+ }
16
+ main().catch((error) => {
17
+ console.error("Fatal error:", error);
18
+ process.exit(1);
19
+ });
20
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,KAAK,UAAU,IAAI;IACjB,MAAM,MAAM,GAAG,YAAY,EAAE,CAAC;IAE9B,2BAA2B;IAC3B,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,KAAK,IAAI,EAAE;QAC9B,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,KAAK,IAAI,EAAE;QAC/B,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC,CAAC,CAAC;IAEH,MAAM,MAAM,CAAC,GAAG,EAAE,CAAC;AACrB,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,CAAC;IACrC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,6 @@
1
+ export interface DocsScraperServer {
2
+ run(): Promise<void>;
3
+ close(): Promise<void>;
4
+ }
5
+ export declare function createServer(): DocsScraperServer;
6
+ //# sourceMappingURL=server.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAcA,MAAM,WAAW,iBAAiB;IAChC,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACxB;AAED,wBAAgB,YAAY,IAAI,iBAAiB,CAgRhD"}
package/dist/server.js ADDED
@@ -0,0 +1,231 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+ import { listCachedDocs, clearCache, indexDocs, getDocsTree, getDocsContent, searchDocs, detectGitHub, } from "./tools/index.js";
5
+ import { createErrorResponse } from "./types/errors.js";
6
+ export function createServer() {
7
+ const server = new McpServer({
8
+ name: "mcp-docs-scraper",
9
+ version: "0.1.0",
10
+ });
11
+ // Register the ping tool - a simple health check
12
+ server.registerTool("ping", {
13
+ title: "Ping",
14
+ description: "Health check tool - returns pong",
15
+ inputSchema: {},
16
+ }, async () => {
17
+ return {
18
+ content: [
19
+ {
20
+ type: "text",
21
+ text: JSON.stringify({ message: "pong" }, null, 2),
22
+ },
23
+ ],
24
+ };
25
+ });
26
+ // Register list_cached_docs tool
27
+ server.registerTool("list_cached_docs", {
28
+ title: "List Cached Docs",
29
+ description: "List all documentation sets in the local cache",
30
+ inputSchema: {},
31
+ }, async () => {
32
+ const result = await listCachedDocs();
33
+ return {
34
+ content: [
35
+ {
36
+ type: "text",
37
+ text: JSON.stringify(result, null, 2),
38
+ },
39
+ ],
40
+ };
41
+ });
42
+ // Register clear_cache tool
43
+ server.registerTool("clear_cache", {
44
+ title: "Clear Cache",
45
+ description: "Remove cached documentation. Pass docs_id to clear specific entry, or all:true to clear everything.",
46
+ inputSchema: {
47
+ docs_id: z
48
+ .string()
49
+ .optional()
50
+ .describe("Specific docs ID to clear (optional)"),
51
+ all: z
52
+ .boolean()
53
+ .optional()
54
+ .describe("Clear all cached docs (default: false)"),
55
+ },
56
+ }, async ({ docs_id, all }) => {
57
+ const result = await clearCache({ docs_id, all });
58
+ return {
59
+ content: [
60
+ {
61
+ type: "text",
62
+ text: JSON.stringify(result, null, 2),
63
+ },
64
+ ],
65
+ };
66
+ });
67
+ // Register index_docs tool
68
+ server.registerTool("index_docs", {
69
+ title: "Index Docs",
70
+ description: "Fetch and cache documentation from a GitHub repository. Downloads markdown files and stores them locally for fast access.",
71
+ inputSchema: {
72
+ url: z
73
+ .string()
74
+ .describe("GitHub repository URL (e.g., https://github.com/owner/repo)"),
75
+ type: z
76
+ .enum(["github", "scrape", "auto"])
77
+ .optional()
78
+ .describe('Source type: "github", "scrape", or "auto" (default: auto)'),
79
+ force_refresh: z
80
+ .boolean()
81
+ .optional()
82
+ .describe("Ignore cache and re-fetch (default: false)"),
83
+ },
84
+ }, async ({ url, type, force_refresh }) => {
85
+ try {
86
+ const result = await indexDocs({ url, type, force_refresh });
87
+ return {
88
+ content: [
89
+ {
90
+ type: "text",
91
+ text: JSON.stringify(result, null, 2),
92
+ },
93
+ ],
94
+ };
95
+ }
96
+ catch (error) {
97
+ return createErrorResponse(error);
98
+ }
99
+ });
100
+ // Register get_docs_tree tool
101
+ server.registerTool("get_docs_tree", {
102
+ title: "Get Docs Tree",
103
+ description: "Get the hierarchical file tree for cached documentation. Use after index_docs to browse available files.",
104
+ inputSchema: {
105
+ docs_id: z
106
+ .string()
107
+ .describe("The docs ID from index_docs response (required)"),
108
+ path: z
109
+ .string()
110
+ .optional()
111
+ .describe("Subtree path to filter (optional, default: root)"),
112
+ max_depth: z
113
+ .number()
114
+ .optional()
115
+ .describe("Maximum depth to return (optional, default: unlimited)"),
116
+ },
117
+ }, async ({ docs_id, path, max_depth }) => {
118
+ try {
119
+ const result = await getDocsTree({ docs_id, path, max_depth });
120
+ return {
121
+ content: [
122
+ {
123
+ type: "text",
124
+ text: JSON.stringify(result, null, 2),
125
+ },
126
+ ],
127
+ };
128
+ }
129
+ catch (error) {
130
+ return createErrorResponse(error);
131
+ }
132
+ });
133
+ // Register get_docs_content tool
134
+ server.registerTool("get_docs_content", {
135
+ title: "Get Docs Content",
136
+ description: "Retrieve actual content of specific doc files from cache. Returns content with extracted headings for navigation.",
137
+ inputSchema: {
138
+ docs_id: z
139
+ .string()
140
+ .describe("The docs ID from index_docs response (required)"),
141
+ paths: z
142
+ .array(z.string())
143
+ .describe("Array of file paths to fetch (required)"),
144
+ format: z
145
+ .enum(["markdown", "raw"])
146
+ .optional()
147
+ .describe('Output format: "markdown" or "raw" (default: markdown)'),
148
+ },
149
+ }, async ({ docs_id, paths, format }) => {
150
+ try {
151
+ const result = await getDocsContent({ docs_id, paths, format });
152
+ return {
153
+ content: [
154
+ {
155
+ type: "text",
156
+ text: JSON.stringify(result, null, 2),
157
+ },
158
+ ],
159
+ };
160
+ }
161
+ catch (error) {
162
+ return createErrorResponse(error);
163
+ }
164
+ });
165
+ // Register search_docs tool
166
+ server.registerTool("search_docs", {
167
+ title: "Search Docs",
168
+ description: "Full-text search within cached documentation. Returns relevant results with matching snippets.",
169
+ inputSchema: {
170
+ docs_id: z
171
+ .string()
172
+ .describe("The docs ID from index_docs response (required)"),
173
+ query: z.string().describe("Search query (required)"),
174
+ limit: z
175
+ .number()
176
+ .optional()
177
+ .describe("Max results to return (default: 10, max: 50)"),
178
+ },
179
+ }, async ({ docs_id, query, limit }) => {
180
+ try {
181
+ const result = await searchDocs({ docs_id, query, limit });
182
+ return {
183
+ content: [
184
+ {
185
+ type: "text",
186
+ text: JSON.stringify(result, null, 2),
187
+ },
188
+ ],
189
+ };
190
+ }
191
+ catch (error) {
192
+ return createErrorResponse(error);
193
+ }
194
+ });
195
+ // Register detect_github_repo tool
196
+ server.registerTool("detect_github_repo", {
197
+ title: "Detect GitHub Repo",
198
+ description: "Find GitHub repository from a documentation website URL. Use before index_docs to check if a site has a GitHub repo.",
199
+ inputSchema: {
200
+ url: z
201
+ .string()
202
+ .describe("Docs website URL to analyze (e.g., https://zod.dev)"),
203
+ },
204
+ }, async ({ url }) => {
205
+ try {
206
+ const result = await detectGitHub({ url });
207
+ return {
208
+ content: [
209
+ {
210
+ type: "text",
211
+ text: JSON.stringify(result, null, 2),
212
+ },
213
+ ],
214
+ };
215
+ }
216
+ catch (error) {
217
+ return createErrorResponse(error);
218
+ }
219
+ });
220
+ const transport = new StdioServerTransport();
221
+ return {
222
+ async run() {
223
+ await server.connect(transport);
224
+ console.error("MCP Docs Scraper server running on stdio");
225
+ },
226
+ async close() {
227
+ await server.close();
228
+ },
229
+ };
230
+ }
231
+ //# sourceMappingURL=server.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EACL,cAAc,EACd,UAAU,EACV,SAAS,EACT,WAAW,EACX,cAAc,EACd,UAAU,EACV,YAAY,GACb,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAOxD,MAAM,UAAU,YAAY;IAC1B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QAC3B,IAAI,EAAE,kBAAkB;QACxB,OAAO,EAAE,OAAO;KACjB,CAAC,CAAC;IAEH,iDAAiD;IACjD,MAAM,CAAC,YAAY,CACjB,MAAM,EACN;QACE,KAAK,EAAE,MAAM;QACb,WAAW,EAAE,kCAAkC;QAC/C,WAAW,EAAE,EAAE;KAChB,EACD,KAAK,IAAI,EAAE;QACT,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;iBACnD;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,iCAAiC;IACjC,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,gDAAgD;QAC7D,WAAW,EAAE,EAAE;KAChB,EACD,KAAK,IAAI,EAAE;QACT,MAAM,MAAM,GAAG,MAAM,cAAc,EAAE,CAAC;QACtC,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;iBACtC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,4BAA4B;IAC5B,MAAM,CAAC,YAAY,CACjB,aAAa,EACb;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EACT,qGAAqG;QACvG,WAAW,EAAE;YACX,OAAO,EAAE,CAAC;iBACP,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,sCAAsC,CAAC;YACnD,GAAG,EAAE,CAAC;iBACH,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CAAC,wCAAwC,CAAC;SACtD;KACF,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,EAAE,EAAE;QACzB,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,EAAE,OAAO,EAAE,GAAG,EAAE,CAAC,CAAC;QAClD,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;iBACtC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,2BAA2B;IAC3B,MAAM,CAAC,YAAY,CACjB,YAAY,EACZ;QACE,KAAK,EAAE,YAAY;QACnB,WAAW,EACT,2HAA2H;QAC7H,WAAW,EAAE;YACX,GAAG,EAAE,CAAC;iBACH,MAAM,EAAE;iBACR,QAAQ,CACP,6DAA6D,CAC9D;YACH,IAAI,EAAE,CAAC;iBACJ,IAAI,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,MAAM,CAAC,CAAC;iBAClC,QAAQ,EAAE;iBACV,QAAQ,CACP,4DAA4D,CAC7D;YACH,aAAa,EAAE,CAAC;iBACb,OAAO,EAAE;iBACT,QAAQ,EAAE;iBACV,QAAQ,CAAC,4CAA4C,CAAC;SAC1D;KACF,EACD,KAAK,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,EAAE,EAAE;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,CAAC;YAC7D,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CACF,CAAC;IAEF,8BAA8B;IAC9B,MAAM,CAAC,YAAY,CACjB,eAAe,EACf;QACE,KAAK,EAAE,eAAe;QACtB,WAAW,EACT,0GAA0G;QAC5G,WAAW,EAAE;YACX,OAAO,EAAE,CAAC;iBACP,MAAM,EAAE;iBACR,QAAQ,CAAC,iDAAiD,CAAC;YAC9D,IAAI,EAAE,CAAC;iBACJ,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,kDAAkD,CAAC;YAC/D,SAAS,EAAE,CAAC;iBACT,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,wDAAwD,CAAC;SACtE;KACF,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,EAAE,EAAE;QACrC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,CAAC;YAC/D,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CACF,CAAC;IAEF,iCAAiC;IACjC,MAAM,CAAC,YAAY,CACjB,kBAAkB,EAClB;QACE,KAAK,EAAE,kBAAkB;QACzB,WAAW,EACT,mHAAmH;QACrH,WAAW,EAAE;YACX,OAAO,EAAE,CAAC;iBACP,MAAM,EAAE;iBACR,QAAQ,CAAC,iDAAiD,CAAC;YAC9D,KAAK,EAAE,CAAC;iBACL,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC;iBACjB,QAAQ,CAAC,yCAAyC,CAAC;YACtD,MAAM,EAAE,CAAC;iBACN,IAAI,CAAC,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;iBACzB,QAAQ,EAAE;iBACV,QAAQ,CAAC,wDAAwD,CAAC;SACtE;KACF,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;QACnC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,cAAc,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAChE,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CACF,CAAC;IAEF,4BAA4B;IAC5B,MAAM,CAAC,YAAY,CACjB,aAAa,EACb;QACE,KAAK,EAAE,aAAa;QACpB,WAAW,EACT,gGAAgG;QAClG,WAAW,EAAE;YACX,OAAO,EAAE,CAAC;iBACP,MAAM,EAAE;iBACR,QAAQ,CAAC,iDAAiD,CAAC;YAC9D,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;YACrD,KAAK,EAAE,CAAC;iBACL,MAAM,EAAE;iBACR,QAAQ,EAAE;iBACV,QAAQ,CAAC,8CAA8C,CAAC;SAC5D;KACF,EACD,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAClC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YAC3D,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CACF,CAAC;IAEF,mCAAmC;IACnC,MAAM,CAAC,YAAY,CACjB,oBAAoB,EACpB;QACE,KAAK,EAAE,oBAAoB;QAC3B,WAAW,EACT,sHAAsH;QACxH,WAAW,EAAE;YACX,GAAG,EAAE,CAAC;iBACH,MAAM,EAAE;iBACR,QAAQ,CAAC,qDAAqD,CAAC;SACnE;KACF,EACD,KAAK,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;QAChB,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC;YAC3C,OAAO;gBACL,OAAO,EAAE;oBACP;wBACE,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;qBACtC;iBACF;aACF,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CACF,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAE7C,OAAO;QACL,KAAK,CAAC,GAAG;YACP,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;YAChC,OAAO,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QAC5D,CAAC;QAED,KAAK,CAAC,KAAK;YACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;QACvB,CAAC;KACF,CAAC;AACJ,CAAC"}