@cosmocoder/mcp-web-docs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +368 -0
- package/build/__mocks__/embeddings.d.ts +17 -0
- package/build/__mocks__/embeddings.js +66 -0
- package/build/__mocks__/embeddings.js.map +1 -0
- package/build/config.d.ts +44 -0
- package/build/config.js +158 -0
- package/build/config.js.map +1 -0
- package/build/config.test.d.ts +1 -0
- package/build/config.test.js +165 -0
- package/build/config.test.js.map +1 -0
- package/build/crawler/auth.d.ts +128 -0
- package/build/crawler/auth.js +546 -0
- package/build/crawler/auth.js.map +1 -0
- package/build/crawler/auth.test.d.ts +1 -0
- package/build/crawler/auth.test.js +174 -0
- package/build/crawler/auth.test.js.map +1 -0
- package/build/crawler/base.d.ts +24 -0
- package/build/crawler/base.js +149 -0
- package/build/crawler/base.js.map +1 -0
- package/build/crawler/base.test.d.ts +1 -0
- package/build/crawler/base.test.js +234 -0
- package/build/crawler/base.test.js.map +1 -0
- package/build/crawler/browser-config.d.ts +2 -0
- package/build/crawler/browser-config.js +29 -0
- package/build/crawler/browser-config.js.map +1 -0
- package/build/crawler/browser-config.test.d.ts +1 -0
- package/build/crawler/browser-config.test.js +56 -0
- package/build/crawler/browser-config.test.js.map +1 -0
- package/build/crawler/cheerio.d.ts +11 -0
- package/build/crawler/cheerio.js +134 -0
- package/build/crawler/cheerio.js.map +1 -0
- package/build/crawler/chromium.d.ts +21 -0
- package/build/crawler/chromium.js +596 -0
- package/build/crawler/chromium.js.map +1 -0
- package/build/crawler/content-extractor-types.d.ts +25 -0
- package/build/crawler/content-extractor-types.js +2 -0
- package/build/crawler/content-extractor-types.js.map +1 -0
- package/build/crawler/content-extractors.d.ts +9 -0
- package/build/crawler/content-extractors.js +9 -0
- package/build/crawler/content-extractors.js.map +1 -0
- package/build/crawler/content-utils.d.ts +2 -0
- package/build/crawler/content-utils.js +22 -0
- package/build/crawler/content-utils.js.map +1 -0
- package/build/crawler/content-utils.test.d.ts +1 -0
- package/build/crawler/content-utils.test.js +99 -0
- package/build/crawler/content-utils.test.js.map +1 -0
- package/build/crawler/crawlee-crawler.d.ts +63 -0
- package/build/crawler/crawlee-crawler.js +342 -0
- package/build/crawler/crawlee-crawler.js.map +1 -0
- package/build/crawler/crawlee-crawler.test.d.ts +1 -0
- package/build/crawler/crawlee-crawler.test.js +280 -0
- package/build/crawler/crawlee-crawler.test.js.map +1 -0
- package/build/crawler/default-extractor.d.ts +4 -0
- package/build/crawler/default-extractor.js +26 -0
- package/build/crawler/default-extractor.js.map +1 -0
- package/build/crawler/default-extractor.test.d.ts +1 -0
- package/build/crawler/default-extractor.test.js +200 -0
- package/build/crawler/default-extractor.test.js.map +1 -0
- package/build/crawler/default.d.ts +11 -0
- package/build/crawler/default.js +138 -0
- package/build/crawler/default.js.map +1 -0
- package/build/crawler/docs-crawler.d.ts +26 -0
- package/build/crawler/docs-crawler.js +97 -0
- package/build/crawler/docs-crawler.js.map +1 -0
- package/build/crawler/docs-crawler.test.d.ts +1 -0
- package/build/crawler/docs-crawler.test.js +185 -0
- package/build/crawler/docs-crawler.test.js.map +1 -0
- package/build/crawler/factory.d.ts +6 -0
- package/build/crawler/factory.js +83 -0
- package/build/crawler/factory.js.map +1 -0
- package/build/crawler/github-pages-extractor.d.ts +4 -0
- package/build/crawler/github-pages-extractor.js +33 -0
- package/build/crawler/github-pages-extractor.js.map +1 -0
- package/build/crawler/github-pages-extractor.test.d.ts +1 -0
- package/build/crawler/github-pages-extractor.test.js +184 -0
- package/build/crawler/github-pages-extractor.test.js.map +1 -0
- package/build/crawler/github.d.ts +20 -0
- package/build/crawler/github.js +181 -0
- package/build/crawler/github.js.map +1 -0
- package/build/crawler/github.test.d.ts +1 -0
- package/build/crawler/github.test.js +326 -0
- package/build/crawler/github.test.js.map +1 -0
- package/build/crawler/puppeteer.d.ts +16 -0
- package/build/crawler/puppeteer.js +191 -0
- package/build/crawler/puppeteer.js.map +1 -0
- package/build/crawler/queue-manager.d.ts +43 -0
- package/build/crawler/queue-manager.js +169 -0
- package/build/crawler/queue-manager.js.map +1 -0
- package/build/crawler/queue-manager.test.d.ts +1 -0
- package/build/crawler/queue-manager.test.js +509 -0
- package/build/crawler/queue-manager.test.js.map +1 -0
- package/build/crawler/site-rules.d.ts +11 -0
- package/build/crawler/site-rules.js +104 -0
- package/build/crawler/site-rules.js.map +1 -0
- package/build/crawler/site-rules.test.d.ts +1 -0
- package/build/crawler/site-rules.test.js +139 -0
- package/build/crawler/site-rules.test.js.map +1 -0
- package/build/crawler/storybook-extractor.d.ts +34 -0
- package/build/crawler/storybook-extractor.js +767 -0
- package/build/crawler/storybook-extractor.js.map +1 -0
- package/build/crawler/storybook-extractor.test.d.ts +1 -0
- package/build/crawler/storybook-extractor.test.js +491 -0
- package/build/crawler/storybook-extractor.test.js.map +1 -0
- package/build/embeddings/fastembed.d.ts +25 -0
- package/build/embeddings/fastembed.js +188 -0
- package/build/embeddings/fastembed.js.map +1 -0
- package/build/embeddings/fastembed.test.d.ts +1 -0
- package/build/embeddings/fastembed.test.js +307 -0
- package/build/embeddings/fastembed.test.js.map +1 -0
- package/build/embeddings/openai.d.ts +8 -0
- package/build/embeddings/openai.js +56 -0
- package/build/embeddings/openai.js.map +1 -0
- package/build/embeddings/types.d.ts +4 -0
- package/build/embeddings/types.js +2 -0
- package/build/embeddings/types.js.map +1 -0
- package/build/index.d.ts +2 -0
- package/build/index.js +1007 -0
- package/build/index.js.map +1 -0
- package/build/index.test.d.ts +1 -0
- package/build/index.test.js +364 -0
- package/build/index.test.js.map +1 -0
- package/build/indexing/queue-manager.d.ts +36 -0
- package/build/indexing/queue-manager.js +86 -0
- package/build/indexing/queue-manager.js.map +1 -0
- package/build/indexing/queue-manager.test.d.ts +1 -0
- package/build/indexing/queue-manager.test.js +257 -0
- package/build/indexing/queue-manager.test.js.map +1 -0
- package/build/indexing/status.d.ts +39 -0
- package/build/indexing/status.js +207 -0
- package/build/indexing/status.js.map +1 -0
- package/build/indexing/status.test.d.ts +1 -0
- package/build/indexing/status.test.js +246 -0
- package/build/indexing/status.test.js.map +1 -0
- package/build/processor/content.d.ts +16 -0
- package/build/processor/content.js +286 -0
- package/build/processor/content.js.map +1 -0
- package/build/processor/content.test.d.ts +1 -0
- package/build/processor/content.test.js +369 -0
- package/build/processor/content.test.js.map +1 -0
- package/build/processor/markdown.d.ts +11 -0
- package/build/processor/markdown.js +256 -0
- package/build/processor/markdown.js.map +1 -0
- package/build/processor/markdown.test.d.ts +1 -0
- package/build/processor/markdown.test.js +312 -0
- package/build/processor/markdown.test.js.map +1 -0
- package/build/processor/metadata-parser.d.ts +37 -0
- package/build/processor/metadata-parser.js +245 -0
- package/build/processor/metadata-parser.js.map +1 -0
- package/build/processor/metadata-parser.test.d.ts +1 -0
- package/build/processor/metadata-parser.test.js +357 -0
- package/build/processor/metadata-parser.test.js.map +1 -0
- package/build/processor/processor.d.ts +8 -0
- package/build/processor/processor.js +190 -0
- package/build/processor/processor.js.map +1 -0
- package/build/processor/processor.test.d.ts +1 -0
- package/build/processor/processor.test.js +357 -0
- package/build/processor/processor.test.js.map +1 -0
- package/build/rag/cache.d.ts +10 -0
- package/build/rag/cache.js +10 -0
- package/build/rag/cache.js.map +1 -0
- package/build/rag/code-generator.d.ts +11 -0
- package/build/rag/code-generator.js +30 -0
- package/build/rag/code-generator.js.map +1 -0
- package/build/rag/context-assembler.d.ts +23 -0
- package/build/rag/context-assembler.js +113 -0
- package/build/rag/context-assembler.js.map +1 -0
- package/build/rag/docs-search.d.ts +55 -0
- package/build/rag/docs-search.js +380 -0
- package/build/rag/docs-search.js.map +1 -0
- package/build/rag/pipeline.d.ts +26 -0
- package/build/rag/pipeline.js +91 -0
- package/build/rag/pipeline.js.map +1 -0
- package/build/rag/query-processor.d.ts +14 -0
- package/build/rag/query-processor.js +57 -0
- package/build/rag/query-processor.js.map +1 -0
- package/build/rag/reranker.d.ts +55 -0
- package/build/rag/reranker.js +210 -0
- package/build/rag/reranker.js.map +1 -0
- package/build/rag/response-generator.d.ts +20 -0
- package/build/rag/response-generator.js +101 -0
- package/build/rag/response-generator.js.map +1 -0
- package/build/rag/retriever.d.ts +19 -0
- package/build/rag/retriever.js +111 -0
- package/build/rag/retriever.js.map +1 -0
- package/build/rag/validator.d.ts +22 -0
- package/build/rag/validator.js +128 -0
- package/build/rag/validator.js.map +1 -0
- package/build/rag/version-manager.d.ts +23 -0
- package/build/rag/version-manager.js +98 -0
- package/build/rag/version-manager.js.map +1 -0
- package/build/setupTests.d.ts +4 -0
- package/build/setupTests.js +50 -0
- package/build/setupTests.js.map +1 -0
- package/build/storage/storage.d.ts +38 -0
- package/build/storage/storage.js +700 -0
- package/build/storage/storage.js.map +1 -0
- package/build/storage/storage.test.d.ts +1 -0
- package/build/storage/storage.test.js +338 -0
- package/build/storage/storage.test.js.map +1 -0
- package/build/types/rag.d.ts +27 -0
- package/build/types/rag.js +2 -0
- package/build/types/rag.js.map +1 -0
- package/build/types.d.ts +120 -0
- package/build/types.js +2 -0
- package/build/types.js.map +1 -0
- package/build/util/content-utils.d.ts +31 -0
- package/build/util/content-utils.js +120 -0
- package/build/util/content-utils.js.map +1 -0
- package/build/util/content.d.ts +1 -0
- package/build/util/content.js +16 -0
- package/build/util/content.js.map +1 -0
- package/build/util/docs.d.ts +1 -0
- package/build/util/docs.js +26 -0
- package/build/util/docs.js.map +1 -0
- package/build/util/docs.test.d.ts +1 -0
- package/build/util/docs.test.js +49 -0
- package/build/util/docs.test.js.map +1 -0
- package/build/util/favicon.d.ts +6 -0
- package/build/util/favicon.js +88 -0
- package/build/util/favicon.js.map +1 -0
- package/build/util/favicon.test.d.ts +1 -0
- package/build/util/favicon.test.js +140 -0
- package/build/util/favicon.test.js.map +1 -0
- package/build/util/logger.d.ts +17 -0
- package/build/util/logger.js +72 -0
- package/build/util/logger.js.map +1 -0
- package/build/util/logger.test.d.ts +1 -0
- package/build/util/logger.test.js +46 -0
- package/build/util/logger.test.js.map +1 -0
- package/build/util/security.d.ts +312 -0
- package/build/util/security.js +719 -0
- package/build/util/security.js.map +1 -0
- package/build/util/security.test.d.ts +1 -0
- package/build/util/security.test.js +524 -0
- package/build/util/security.test.js.map +1 -0
- package/build/util/site-detector.d.ts +22 -0
- package/build/util/site-detector.js +42 -0
- package/build/util/site-detector.js.map +1 -0
- package/package.json +112 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 cosmocoder
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
# MCP Web Docs
|
|
2
|
+
|
|
3
|
+
**Index Any Documentation. Search Locally. Stay Private.**
|
|
4
|
+
|
|
5
|
+
A self-hosted Model Context Protocol (MCP) server that crawls, indexes, and searches documentation from *any* website. Unlike remote MCP servers limited to GitHub repos or pre-indexed libraries, web-docs gives you full control over what gets indexed — including private documentation behind authentication.
|
|
6
|
+
|
|
7
|
+
[Features](#-features) • [Installation](#-installation) • [Quick Start](#-quick-start) • [Tools](#-available-tools) • [Tips](#-tips) • [Troubleshooting](#-troubleshooting) • [Contributing](#-contributing)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## ❌ The Problem
|
|
12
|
+
|
|
13
|
+
AI assistants struggle with documentation:
|
|
14
|
+
|
|
15
|
+
- ❌ **Remote MCP servers** only work with GitHub or pre-indexed libraries
|
|
16
|
+
- ❌ **Private docs** behind authentication can't be accessed
|
|
17
|
+
- ❌ **Outdated indexes** don't reflect your team's latest documentation
|
|
18
|
+
- ❌ **No control** over what gets indexed or when
|
|
19
|
+
|
|
20
|
+
## ✅ The Solution
|
|
21
|
+
|
|
22
|
+
**MCP Web Docs** crawls and indexes documentation from ANY website locally:
|
|
23
|
+
|
|
24
|
+
- ✅ **Any website** - Docusaurus, Storybook, GitBook, custom sites, internal wikis
|
|
25
|
+
- ✅ **Private docs** - Interactive browser login for authenticated sites
|
|
26
|
+
- ✅ **Always fresh** - Re-index anytime with one command
|
|
27
|
+
- ✅ **Your data, your machine** - No API keys, no cloud, full privacy
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## ✨ Features
|
|
32
|
+
|
|
33
|
+
- **🌐 Universal Crawler** - Works with any documentation site, not just GitHub
|
|
34
|
+
- **🔍 Hybrid Search** - Combines full-text search (FTS) with semantic vector search
|
|
35
|
+
- **🔐 Authentication Support** - Crawl private/protected docs with interactive browser login (auto-detects your default browser)
|
|
36
|
+
- **📊 Smart Extraction** - Automatically extracts code blocks, props tables, and structured content
|
|
37
|
+
- **⚡ Local Embeddings** - Uses FastEmbed for fast, private embedding generation (no API keys)
|
|
38
|
+
- **🗄️ Persistent Storage** - LanceDB for vectors, SQLite for metadata
|
|
39
|
+
- **🔄 Real-time Progress** - Track indexing status with progress updates
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 🚀 Installation
|
|
44
|
+
|
|
45
|
+
### Prerequisites
|
|
46
|
+
|
|
47
|
+
- Node.js >= 22.19.0
|
|
48
|
+
|
|
49
|
+
### Setup
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Clone the repository
|
|
53
|
+
git clone https://github.com/user/mcp-web-docs.git
|
|
54
|
+
cd mcp-web-docs
|
|
55
|
+
|
|
56
|
+
# Install dependencies (automatically installs Playwright browsers)
|
|
57
|
+
npm install
|
|
58
|
+
|
|
59
|
+
# Build
|
|
60
|
+
npm run build
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Configure Your MCP Client
|
|
64
|
+
|
|
65
|
+
<details>
|
|
66
|
+
<summary><b>Cursor</b></summary>
|
|
67
|
+
|
|
68
|
+
Add to your Cursor MCP settings (`~/.cursor/mcp.json`):
|
|
69
|
+
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"mcpServers": {
|
|
73
|
+
"web-docs": {
|
|
74
|
+
"command": "node",
|
|
75
|
+
"args": ["/path/to/mcp-web-docs/build/index.js"]
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
</details>
|
|
82
|
+
|
|
83
|
+
<details>
|
|
84
|
+
<summary><b>Claude Desktop</b></summary>
|
|
85
|
+
|
|
86
|
+
Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS):
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"mcpServers": {
|
|
91
|
+
"web-docs": {
|
|
92
|
+
"command": "node",
|
|
93
|
+
"args": ["/path/to/mcp-web-docs/build/index.js"]
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
</details>
|
|
100
|
+
|
|
101
|
+
<details>
|
|
102
|
+
<summary><b>VS Code</b></summary>
|
|
103
|
+
|
|
104
|
+
Add to `.vscode/mcp.json` in your workspace:
|
|
105
|
+
|
|
106
|
+
```json
|
|
107
|
+
{
|
|
108
|
+
"servers": {
|
|
109
|
+
"web-docs": {
|
|
110
|
+
"command": "node",
|
|
111
|
+
"args": ["/path/to/mcp-web-docs/build/index.js"]
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
</details>
|
|
118
|
+
|
|
119
|
+
<details>
|
|
120
|
+
<summary><b>Windsurf</b></summary>
|
|
121
|
+
|
|
122
|
+
Add to `~/.codeium/windsurf/mcp_config.json`:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"mcpServers": {
|
|
127
|
+
"web-docs": {
|
|
128
|
+
"command": "node",
|
|
129
|
+
"args": ["/path/to/mcp-web-docs/build/index.js"]
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
</details>
|
|
136
|
+
|
|
137
|
+
<details>
|
|
138
|
+
<summary><b>Cline</b></summary>
|
|
139
|
+
|
|
140
|
+
Add to `~/Library/Application Support/Code/User/globalStorage/saoudrizwan.claude-dev/settings/cline_mcp_settings.json`:
|
|
141
|
+
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"mcpServers": {
|
|
145
|
+
"web-docs": {
|
|
146
|
+
"command": "node",
|
|
147
|
+
"args": ["/path/to/mcp-web-docs/build/index.js"],
|
|
148
|
+
"disabled": false,
|
|
149
|
+
"autoApprove": []
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
</details>
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## ⚡ Quick Start
|
|
160
|
+
|
|
161
|
+
### 1. Index public documentation
|
|
162
|
+
|
|
163
|
+
```
|
|
164
|
+
Index the LanceDB documentation from https://lancedb.com/docs/
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
The AI assistant will call `add_documentation` and begin crawling.
|
|
168
|
+
|
|
169
|
+
### 2. Search for information
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
How do I create a table in LanceDB?
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
The AI will use `search_documentation` to find relevant content.
|
|
176
|
+
|
|
177
|
+
### 3. For private docs, authenticate first
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
I need to index private documentation at https://internal.company.com/docs/
|
|
181
|
+
It requires authentication.
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
A browser window will open for you to log in. The session is saved for future crawls.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## 🔨 Available Tools
|
|
189
|
+
|
|
190
|
+
### `add_documentation`
|
|
191
|
+
|
|
192
|
+
Add a new documentation site for indexing.
|
|
193
|
+
|
|
194
|
+
```typescript
|
|
195
|
+
add_documentation({
|
|
196
|
+
url: "https://docs.example.com/",
|
|
197
|
+
title: "Example Docs", // Optional
|
|
198
|
+
id: "example-docs", // Optional custom ID
|
|
199
|
+
auth: { // Optional authentication
|
|
200
|
+
requiresAuth: true,
|
|
201
|
+
// browser auto-detected from OS settings if omitted
|
|
202
|
+
loginTimeoutSecs: 300
|
|
203
|
+
}
|
|
204
|
+
})
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### `search_documentation`
|
|
208
|
+
|
|
209
|
+
Search through indexed documentation using hybrid search (FTS + semantic).
|
|
210
|
+
|
|
211
|
+
```typescript
|
|
212
|
+
search_documentation({
|
|
213
|
+
query: "how to configure authentication",
|
|
214
|
+
url: "https://docs.example.com/", // Optional: filter to specific site
|
|
215
|
+
limit: 10 // Optional: max results
|
|
216
|
+
})
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### `authenticate`
|
|
220
|
+
|
|
221
|
+
Open a browser window for interactive login to protected sites. Your default browser is automatically detected from OS settings.
|
|
222
|
+
|
|
223
|
+
```typescript
|
|
224
|
+
authenticate({
|
|
225
|
+
url: "https://private-docs.example.com/",
|
|
226
|
+
// browser auto-detected from OS settings - only specify to override
|
|
227
|
+
loginTimeoutSecs: 300 // Optional: timeout in seconds
|
|
228
|
+
})
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### `list_documentation`
|
|
232
|
+
|
|
233
|
+
List all indexed documentation sites.
|
|
234
|
+
|
|
235
|
+
### `reindex_documentation`
|
|
236
|
+
|
|
237
|
+
Re-crawl and re-index a specific documentation site.
|
|
238
|
+
|
|
239
|
+
### `get_indexing_status`
|
|
240
|
+
|
|
241
|
+
Get the current status of indexing operations.
|
|
242
|
+
|
|
243
|
+
### `delete_documentation`
|
|
244
|
+
|
|
245
|
+
Delete an indexed documentation site and all its data.
|
|
246
|
+
|
|
247
|
+
### `clear_auth`
|
|
248
|
+
|
|
249
|
+
Clear saved authentication session for a domain.
|
|
250
|
+
|
|
251
|
+
---
|
|
252
|
+
|
|
253
|
+
## 💡 Tips
|
|
254
|
+
|
|
255
|
+
### Crafting Better Search Queries
|
|
256
|
+
|
|
257
|
+
The search uses hybrid full-text and semantic search. For best results:
|
|
258
|
+
|
|
259
|
+
1. **Be specific** - Include unique terms from what you're looking for
|
|
260
|
+
- Instead of: `"Button props"`
|
|
261
|
+
- Try: `"Button props onClick disabled loading"`
|
|
262
|
+
|
|
263
|
+
2. **Use exact phrases** - Wrap in quotes for exact matching
|
|
264
|
+
- `"authentication middleware"` finds that exact phrase
|
|
265
|
+
|
|
266
|
+
3. **Include context** - Add related terms to narrow results
|
|
267
|
+
- API docs: `"GET /users endpoint authentication headers"`
|
|
268
|
+
- Config: `"webpack config entry output plugins"`
|
|
269
|
+
|
|
270
|
+
### Auto-Invoke with Rules
|
|
271
|
+
|
|
272
|
+
To avoid typing search instructions in every prompt, add a rule to your MCP client:
|
|
273
|
+
|
|
274
|
+
**Cursor** (`Cursor Settings > Rules`):
|
|
275
|
+
```
|
|
276
|
+
When I ask about library documentation or need code examples,
|
|
277
|
+
use the web-docs MCP server to search indexed documentation.
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Windsurf** (`.windsurfrules`):
|
|
281
|
+
```
|
|
282
|
+
Always use web-docs search_documentation when I ask about
|
|
283
|
+
API references, configuration, or library usage.
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
### Scoping Searches
|
|
287
|
+
|
|
288
|
+
If you have multiple sites indexed, filter by URL to search within a specific site:
|
|
289
|
+
|
|
290
|
+
```typescript
|
|
291
|
+
search_documentation({
|
|
292
|
+
query: "routing",
|
|
293
|
+
url: "https://nextjs.org/docs/" // Only search Next.js docs
|
|
294
|
+
})
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## 🚨 Troubleshooting
|
|
300
|
+
|
|
301
|
+
<details>
|
|
302
|
+
<summary><b>"Failed to parse document content"</b></summary>
|
|
303
|
+
|
|
304
|
+
The content extractor couldn't process the page. Try:
|
|
305
|
+
- Re-indexing the documentation
|
|
306
|
+
- Checking if the site uses JavaScript rendering (should work with Playwright)
|
|
307
|
+
- Looking at the crawled data in `~/.mcp-web-docs/crawlee/datasets/`
|
|
308
|
+
|
|
309
|
+
</details>
|
|
310
|
+
|
|
311
|
+
<details>
|
|
312
|
+
<summary><b>Authentication not working</b></summary>
|
|
313
|
+
|
|
314
|
+
- Make sure you call `authenticate` before `add_documentation`
|
|
315
|
+
- The browser window needs to stay open until login is detected
|
|
316
|
+
- For OAuth sites, complete the full flow manually
|
|
317
|
+
- Your default browser is auto-detected; specify a different one with `browser: "firefox"`, for example, if needed
|
|
318
|
+
|
|
319
|
+
</details>
|
|
320
|
+
|
|
321
|
+
<details>
|
|
322
|
+
<summary><b>Search not returning expected results</b></summary>
|
|
323
|
+
|
|
324
|
+
- Try more specific queries with unique terms
|
|
325
|
+
- Use quotes for exact phrase matching
|
|
326
|
+
- Filter by URL to search within a specific documentation site
|
|
327
|
+
- Re-index if the documentation has been updated
|
|
328
|
+
|
|
329
|
+
</details>
|
|
330
|
+
|
|
331
|
+
<details>
|
|
332
|
+
<summary><b>Playwright browser issues</b></summary>
|
|
333
|
+
|
|
334
|
+
If browsers aren't installed, run:
|
|
335
|
+
```bash
|
|
336
|
+
npx playwright install
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
</details>
|
|
340
|
+
|
|
341
|
+
---
|
|
342
|
+
|
|
343
|
+
### Data Storage
|
|
344
|
+
|
|
345
|
+
All data is stored locally in `~/.mcp-web-docs/`:
|
|
346
|
+
|
|
347
|
+
```
|
|
348
|
+
~/.mcp-web-docs/
|
|
349
|
+
├── docs.db # SQLite database for document metadata
|
|
350
|
+
├── vectors/ # LanceDB vector database
|
|
351
|
+
├── sessions/ # Saved authentication sessions
|
|
352
|
+
└── crawlee/ # Crawlee datasets (cached crawl data)
|
|
353
|
+
```
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## 📄 License
|
|
357
|
+
|
|
358
|
+
MIT License - see [LICENSE](LICENSE) for details.
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
## 🙏 Acknowledgments
|
|
363
|
+
|
|
364
|
+
- [Model Context Protocol](https://modelcontextprotocol.io/) - The protocol specification
|
|
365
|
+
- [Crawlee](https://crawlee.dev/) - Web scraping and browser automation
|
|
366
|
+
- [LanceDB](https://lancedb.com/) - Vector database
|
|
367
|
+
- [FastEmbed](https://github.com/Anush008/fastembed) - Local embedding generation
|
|
368
|
+
- [Playwright](https://playwright.dev/) - Browser automation
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embeddings provider for testing
|
|
3
|
+
*/
|
|
4
|
+
import type { EmbeddingsProvider } from '../embeddings/types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Creates a mock embeddings provider for testing
|
|
7
|
+
* @param dimensions - The number of dimensions for the embeddings (default 384 like FastEmbed)
|
|
8
|
+
*/
|
|
9
|
+
export declare function createMockEmbeddings(dimensions?: number): EmbeddingsProvider;
|
|
10
|
+
/**
|
|
11
|
+
* Mock embeddings provider that returns zero vectors (for error testing)
|
|
12
|
+
*/
|
|
13
|
+
export declare function createZeroEmbeddings(dimensions?: number): EmbeddingsProvider;
|
|
14
|
+
/**
|
|
15
|
+
* Mock embeddings provider that throws errors (for error handling testing)
|
|
16
|
+
*/
|
|
17
|
+
export declare function createFailingEmbeddings(dimensions?: number): EmbeddingsProvider;
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock embeddings provider for testing
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Creates a mock embeddings provider for testing
|
|
6
|
+
* @param dimensions - The number of dimensions for the embeddings (default 384 like FastEmbed)
|
|
7
|
+
*/
|
|
8
|
+
export function createMockEmbeddings(dimensions = 384) {
|
|
9
|
+
return {
|
|
10
|
+
dimensions,
|
|
11
|
+
embed: async (text) => {
|
|
12
|
+
// Generate deterministic embedding based on text content hash
|
|
13
|
+
const hash = simpleHash(text);
|
|
14
|
+
return generateDeterministicVector(hash, dimensions);
|
|
15
|
+
},
|
|
16
|
+
};
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Simple hash function for generating deterministic values
|
|
20
|
+
*/
|
|
21
|
+
function simpleHash(str) {
|
|
22
|
+
let hash = 0;
|
|
23
|
+
for (let i = 0; i < str.length; i++) {
|
|
24
|
+
const char = str.charCodeAt(i);
|
|
25
|
+
hash = (hash << 5) - hash + char;
|
|
26
|
+
hash = hash & hash; // Convert to 32bit integer
|
|
27
|
+
}
|
|
28
|
+
return Math.abs(hash);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Generate a deterministic vector based on a seed
|
|
32
|
+
*/
|
|
33
|
+
function generateDeterministicVector(seed, dimensions) {
|
|
34
|
+
const vector = [];
|
|
35
|
+
let value = seed;
|
|
36
|
+
for (let i = 0; i < dimensions; i++) {
|
|
37
|
+
// Linear congruential generator
|
|
38
|
+
value = (value * 1103515245 + 12345) & 0x7fffffff;
|
|
39
|
+
// Normalize to [-1, 1]
|
|
40
|
+
vector.push((value / 0x7fffffff) * 2 - 1);
|
|
41
|
+
}
|
|
42
|
+
// Normalize vector to unit length
|
|
43
|
+
const magnitude = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
|
|
44
|
+
return vector.map((v) => v / magnitude);
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Mock embeddings provider that returns zero vectors (for error testing)
|
|
48
|
+
*/
|
|
49
|
+
export function createZeroEmbeddings(dimensions = 384) {
|
|
50
|
+
return {
|
|
51
|
+
dimensions,
|
|
52
|
+
embed: async () => new Array(dimensions).fill(0),
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Mock embeddings provider that throws errors (for error handling testing)
|
|
57
|
+
*/
|
|
58
|
+
export function createFailingEmbeddings(dimensions = 384) {
|
|
59
|
+
return {
|
|
60
|
+
dimensions,
|
|
61
|
+
embed: async () => {
|
|
62
|
+
throw new Error('Embeddings service unavailable');
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
//# sourceMappingURL=embeddings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/__mocks__/embeddings.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,aAAqB,GAAG;IAC3D,OAAO;QACL,UAAU;QACV,KAAK,EAAE,KAAK,EAAE,IAAY,EAAqB,EAAE;YAC/C,8DAA8D;YAC9D,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YAC9B,OAAO,2BAA2B,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC;QACvD,CAAC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,2BAA2B;IACjD,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,IAAY,EAAE,UAAkB;IACnE,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,KAAK,GAAG,IAAI,CAAC;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,gCAAgC;QAChC,KAAK,GAAG,CAAC,KAAK,GAAG,UAAU,GAAG,KAAK,CAAC,GAAG,UAAU,CAAC;QAClD,uBAAuB;QACvB,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;IAC5C,CAAC;IACD,kCAAkC;IAClC,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACvE,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,aAAqB,GAAG;IAC3D,OAAO;QACL,UAAU;QACV,KAAK,EAAE,KAAK,IAAuB,EAAE,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;KACpE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAqB,GAAG;IAC9D,OAAO;QACL,UAAU;QACV,KAAK,EAAE,KAAK,IAAuB,EAAE;YACnC,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACpD,CAAC;KACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export interface DocsConfig {
|
|
2
|
+
githubToken?: string;
|
|
3
|
+
maxDepth: number;
|
|
4
|
+
maxRequestsPerCrawl: number;
|
|
5
|
+
maxChunkSize: number;
|
|
6
|
+
maxConcurrentRequests: number;
|
|
7
|
+
cacheSize: number;
|
|
8
|
+
dataDir: string;
|
|
9
|
+
dbPath: string;
|
|
10
|
+
vectorDbPath: string;
|
|
11
|
+
}
|
|
12
|
+
export declare function loadConfig(): Promise<DocsConfig>;
|
|
13
|
+
export declare const IGNORED_PATHS: string[];
|
|
14
|
+
export declare const RATE_LIMIT: {
|
|
15
|
+
maxRequests: number;
|
|
16
|
+
timeWindow: number;
|
|
17
|
+
minDelay: number;
|
|
18
|
+
};
|
|
19
|
+
export declare const QUEUE_OPTIONS: {
|
|
20
|
+
maxRequestRetries: number;
|
|
21
|
+
retryDelay: number;
|
|
22
|
+
maxRequestsPerCrawl: number;
|
|
23
|
+
};
|
|
24
|
+
export declare const GITHUB_RATE_LIMIT: {
|
|
25
|
+
unauthenticated: {
|
|
26
|
+
maxRequests: number;
|
|
27
|
+
timeWindow: number;
|
|
28
|
+
};
|
|
29
|
+
authenticated: {
|
|
30
|
+
maxRequests: number;
|
|
31
|
+
timeWindow: number;
|
|
32
|
+
};
|
|
33
|
+
};
|
|
34
|
+
export declare function isValidUrl(urlString: string): boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Validate URL and check for SSRF attacks (blocks private/internal networks)
|
|
37
|
+
* @param urlString - URL to validate
|
|
38
|
+
* @param allowPrivate - If true, skips SSRF checks (for trusted internal use)
|
|
39
|
+
* @returns true if URL is valid and safe
|
|
40
|
+
*/
|
|
41
|
+
export declare function isValidPublicUrl(urlString: string, allowPrivate?: boolean): boolean;
|
|
42
|
+
export declare function normalizeUrl(urlString: string): string;
|
|
43
|
+
export declare function isGitHubUrl(urlString: string): boolean;
|
|
44
|
+
export declare function isMarkdownPath(path: string): boolean;
|
package/build/config.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { join } from 'path';
|
|
2
|
+
import { homedir } from 'os';
|
|
3
|
+
import { mkdir } from 'fs/promises';
|
|
4
|
+
import { logger } from './util/logger.js';
|
|
5
|
+
import { validatePublicUrl } from './util/security.js';
|
|
6
|
+
const DATA_DIR = join(homedir(), '.mcp-web-docs');
|
|
7
|
+
const DEFAULT_CONFIG = {
|
|
8
|
+
maxDepth: 4,
|
|
9
|
+
maxRequestsPerCrawl: 1000, // Match DocsCrawler default for better coverage
|
|
10
|
+
maxChunkSize: 1000,
|
|
11
|
+
maxConcurrentRequests: 3, // Allow concurrent requests for better performance while maintaining stability
|
|
12
|
+
cacheSize: 1000,
|
|
13
|
+
dataDir: DATA_DIR,
|
|
14
|
+
dbPath: join(DATA_DIR, 'docs.db'),
|
|
15
|
+
vectorDbPath: join(DATA_DIR, 'vectors'),
|
|
16
|
+
};
|
|
17
|
+
export async function loadConfig() {
|
|
18
|
+
logger.debug('[Config] Loading configuration');
|
|
19
|
+
// Optional GitHub token for higher rate limits
|
|
20
|
+
const githubToken = process.env.GITHUB_TOKEN;
|
|
21
|
+
if (githubToken) {
|
|
22
|
+
logger.debug('[Config] GitHub token found');
|
|
23
|
+
}
|
|
24
|
+
// Ensure data directory exists and set up Crawlee storage
|
|
25
|
+
try {
|
|
26
|
+
logger.debug(`[Config] Creating data directory: ${DATA_DIR}`);
|
|
27
|
+
await mkdir(DATA_DIR, { recursive: true });
|
|
28
|
+
// Set Crawlee storage directory
|
|
29
|
+
const crawleeStorageDir = join(DATA_DIR, 'crawlee');
|
|
30
|
+
process.env.CRAWLEE_STORAGE_DIR = crawleeStorageDir;
|
|
31
|
+
await mkdir(crawleeStorageDir, { recursive: true });
|
|
32
|
+
}
|
|
33
|
+
catch (error) {
|
|
34
|
+
logger.debug('[Config] Error creating data directory:', error);
|
|
35
|
+
throw error;
|
|
36
|
+
}
|
|
37
|
+
const config = {
|
|
38
|
+
...DEFAULT_CONFIG,
|
|
39
|
+
githubToken,
|
|
40
|
+
};
|
|
41
|
+
logger.debug('[Config] Configuration loaded:', {
|
|
42
|
+
...config,
|
|
43
|
+
githubToken: githubToken ? '***' : undefined,
|
|
44
|
+
});
|
|
45
|
+
return config;
|
|
46
|
+
}
|
|
47
|
+
// Constants for indexing
|
|
48
|
+
export const IGNORED_PATHS = [
|
|
49
|
+
'favicon.ico',
|
|
50
|
+
'robots.txt',
|
|
51
|
+
'.rst.txt',
|
|
52
|
+
'genindex',
|
|
53
|
+
'py-modindex',
|
|
54
|
+
'search.html',
|
|
55
|
+
'search',
|
|
56
|
+
'genindex.html',
|
|
57
|
+
'changelog',
|
|
58
|
+
'changelog.html',
|
|
59
|
+
'assets/',
|
|
60
|
+
'static/',
|
|
61
|
+
'images/',
|
|
62
|
+
'img/',
|
|
63
|
+
'css/',
|
|
64
|
+
'js/',
|
|
65
|
+
'fonts/',
|
|
66
|
+
// Common repository paths to ignore
|
|
67
|
+
'node_modules/',
|
|
68
|
+
'vendor/',
|
|
69
|
+
'test/',
|
|
70
|
+
'tests/',
|
|
71
|
+
'example/',
|
|
72
|
+
'examples/',
|
|
73
|
+
'build/',
|
|
74
|
+
'dist/',
|
|
75
|
+
'.git/',
|
|
76
|
+
];
|
|
77
|
+
// Rate limiting constants
|
|
78
|
+
export const RATE_LIMIT = {
|
|
79
|
+
maxRequests: 60, // Increased for better throughput
|
|
80
|
+
timeWindow: 60 * 1000, // 1 minute
|
|
81
|
+
minDelay: 250, // Reduced delay between requests
|
|
82
|
+
};
|
|
83
|
+
// Queue configuration
|
|
84
|
+
export const QUEUE_OPTIONS = {
|
|
85
|
+
maxRequestRetries: 2,
|
|
86
|
+
retryDelay: 1000,
|
|
87
|
+
maxRequestsPerCrawl: 2000, // Increased from default 1000
|
|
88
|
+
};
|
|
89
|
+
// GitHub API rate limits
|
|
90
|
+
export const GITHUB_RATE_LIMIT = {
|
|
91
|
+
unauthenticated: {
|
|
92
|
+
maxRequests: 60,
|
|
93
|
+
timeWindow: 60 * 60 * 1000, // 1 hour
|
|
94
|
+
},
|
|
95
|
+
authenticated: {
|
|
96
|
+
maxRequests: 5000,
|
|
97
|
+
timeWindow: 60 * 60 * 1000, // 1 hour
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
// Utility function to validate URLs
|
|
101
|
+
export function isValidUrl(urlString) {
|
|
102
|
+
try {
|
|
103
|
+
const url = new URL(urlString);
|
|
104
|
+
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
105
|
+
}
|
|
106
|
+
catch {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Validate URL and check for SSRF attacks (blocks private/internal networks)
|
|
112
|
+
* @param urlString - URL to validate
|
|
113
|
+
* @param allowPrivate - If true, skips SSRF checks (for trusted internal use)
|
|
114
|
+
* @returns true if URL is valid and safe
|
|
115
|
+
*/
|
|
116
|
+
export function isValidPublicUrl(urlString, allowPrivate = false) {
|
|
117
|
+
if (!isValidUrl(urlString)) {
|
|
118
|
+
return false;
|
|
119
|
+
}
|
|
120
|
+
if (allowPrivate) {
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
try {
|
|
124
|
+
validatePublicUrl(urlString);
|
|
125
|
+
return true;
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
logger.debug(`[Config] URL blocked by SSRF protection: ${urlString}`, error);
|
|
129
|
+
return false;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
// Utility function to normalize URLs
|
|
133
|
+
export function normalizeUrl(urlString) {
|
|
134
|
+
try {
|
|
135
|
+
const url = new URL(urlString);
|
|
136
|
+
// Remove trailing slash
|
|
137
|
+
return url.toString().replace(/\/$/, '');
|
|
138
|
+
}
|
|
139
|
+
catch {
|
|
140
|
+
throw new Error(`Invalid URL: ${urlString}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// Utility function to check if a URL is a GitHub repository
|
|
144
|
+
export function isGitHubUrl(urlString) {
|
|
145
|
+
try {
|
|
146
|
+
const url = new URL(urlString);
|
|
147
|
+
return url.hostname === 'github.com';
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
return false;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
// Utility function to check if a path is markdown
|
|
154
|
+
export function isMarkdownPath(path) {
|
|
155
|
+
const lowercasePath = path.toLowerCase();
|
|
156
|
+
return lowercasePath.endsWith('.md') || lowercasePath.endsWith('.mdx') || lowercasePath.endsWith('.markdown');
|
|
157
|
+
}
|
|
158
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAC7B,OAAO,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC1C,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAcvD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,eAAe,CAAC,CAAC;AAElD,MAAM,cAAc,GAAe;IACjC,QAAQ,EAAE,CAAC;IACX,mBAAmB,EAAE,IAAI,EAAE,gDAAgD;IAC3E,YAAY,EAAE,IAAI;IAClB,qBAAqB,EAAE,CAAC,EAAE,+EAA+E;IACzG,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,QAAQ;IACjB,MAAM,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;IACjC,YAAY,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;CACxC,CAAC;AAEF,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAE/C,+CAA+C;IAC/C,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC;IAC7C,IAAI,WAAW,EAAE,CAAC;QAChB,MAAM,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;IAC9C,CAAC;IAED,0DAA0D;IAC1D,IAAI,CAAC;QACH,MAAM,CAAC,KAAK,CAAC,qCAAqC,QAAQ,EAAE,CAAC,CAAC;QAC9D,MAAM,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE3C,gCAAgC;QAChC,MAAM,iBAAiB,GAAG,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QACpD,OAAO,CAAC,GAAG,CAAC,mBAAmB,GAAG,iBAAiB,CAAC;QACpD,MAAM,KAAK,CAAC,iBAAiB,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACtD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,yCAAyC,EAAE,KAAK,CAAC,CAAC;QAC/D,MAAM,KAAK,CAAC;IACd,CAAC;IAED,MAAM,MAAM,GAAe;QACzB,GAAG,cAAc;QACjB,WAAW;KACZ,CAAC;IAEF,MAAM,CAAC,KAAK,CAAC,gCAAgC,EAAE;QAC7C,GAAG,MAAM;QACT,WAAW,EAAE,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;KAC7C,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,yBAAyB;AACzB,MAAM,CAAC,MAAM,aAAa,GAAG;IAC3B,aAAa;IACb,YAAY;IACZ,UAAU;IACV,UAAU;IACV,aAAa;IACb,aAAa;IACb,QAAQ;IACR,eAAe;IACf,WAAW;IACX,gBAAgB;IAChB,SAAS;IACT,SAAS;IACT,SAAS;IACT,MAAM;IACN,MAAM;IACN,KAAK;IACL,QAAQ;IACR,oCAAoC;IACpC,eAAe;IACf,SAAS;IACT,OAAO;IACP,QAAQ;IACR,UAAU;IACV,WAAW;IACX,QAAQ;IACR,OAAO;IACP,OAAO;CACR,CAAC;AAEF,0BAA0B;AAC1B,MAAM,CAAC,MAAM,UAAU,GAAG;IACxB,WAAW,EAAE,EAAE,EAAE,kCAAkC;IACnD,UAAU,EAAE,EAAE,GAAG,IAAI,EAAE,WAAW;IAClC,QAAQ,EAAE,GAAG,EAAE,iCAAiC;CACjD,CAAC;AAEF,sBAAsB;AACtB,MAAM,CAAC,MAAM,aAAa,GAAG;IAC3B,iBAAiB,EAAE,CAAC;IACpB,UAAU,EAAE,IAAI;IAChB,mBAAmB,EAAE,IAAI,EAAE,8BAA8B;CAC1D,CAAC;AAEF,yBAAyB;AACzB,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,eAAe,EAAE;QACf,WAAW,EAAE,EAAE;QACf,UAAU,EAAE,EAAE,GAAG,EAAE,GAAG,IAAI,EAAE,SAAS;KACtC;IACD,aAAa,EAAE;QACb,WAAW,EAAE,IAAI;QACjB,UAAU,EAAE,EAAE,GAAG,EAAE,GAAG,IAAI,EAAE,SAAS;KACtC;CACF,CAAC;AAEF,oCAAoC;AACpC,MAAM,UAAU,UAAU,CAAC,SAAiB;IAC1C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QAC/B,OAAO,GAAG,CAAC,QAAQ,KAAK,OAAO,IAAI,GAAG,CAAC,QAAQ,KAAK,QAAQ,CAAC;IAC/D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,SAAiB,EAAE,YAAY,GAAG,KAAK;IACtE,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3B,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,CAAC;QACH,iBAAiB,CAAC,SAAS,CAAC,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,4CAA4C,SAAS,EAAE,EAAE,KAAK,CAAC,CAAC;QAC7E,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,qCAAqC;AACrC,MAAM,UAAU,YAAY,CAAC,SAAiB;IAC5C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QAC/B,wBAAwB;QACxB,OAAO,GAAG,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,gBAAgB,SAAS,EAAE,CAAC,CAAC;IAC/C,CAAC;AACH,CAAC;AAED,4DAA4D;AAC5D,MAAM,UAAU,WAAW,CAAC,SAAiB;IAC3C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC;QAC/B,OAAO,GAAG,CAAC,QAAQ,KAAK,YAAY,CAAC;IACvC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACzC,OAAO,aAAa,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,aAAa,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;AAChH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|