@arabold/docs-mcp-server 1.17.0 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +267 -209
- package/db/migrations/002-normalize-library-table.sql +50 -0
- package/db/migrations/003-normalize-vector-table.sql +33 -0
- package/db/migrations/004-complete-normalization.sql +67 -0
- package/db/migrations/005-add-status-tracking.sql +42 -0
- package/db/migrations/006-add-scraper-options.sql +16 -0
- package/dist/EmbeddingFactory-CElwVk3X.js.map +1 -1
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +5670 -5138
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +5651 -4115
- package/dist/index.js.map +1 -1
- package/package.json +35 -35
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +5670 -5138
- package/public/assets/main.js.map +1 -1
package/README.md
CHANGED
|
@@ -38,62 +38,96 @@ LLM-assisted coding promises speed and efficiency, but often falls short due to:
|
|
|
38
38
|
|
|
39
39
|
## How to Run the Docs MCP Server
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
Choose your deployment method:
|
|
42
42
|
|
|
43
|
-
- [
|
|
44
|
-
- [
|
|
45
|
-
- [
|
|
43
|
+
- [Standalone Server (Recommended)](#standalone-server-recommended)
|
|
44
|
+
- [Embedded Server](#embedded-server)
|
|
45
|
+
- [Advanced: Docker Compose (Scaling)](#advanced-docker-compose-scaling)
|
|
46
46
|
|
|
47
|
-
##
|
|
47
|
+
## Standalone Server (Recommended)
|
|
48
48
|
|
|
49
|
-
Run
|
|
49
|
+
Run a standalone server that includes both MCP endpoints and web interface in a single process. This is the easiest way to get started.
|
|
50
|
+
|
|
51
|
+
### Option 1: Docker
|
|
52
|
+
|
|
53
|
+
1. **Install Docker.**
|
|
54
|
+
2. **Start the server:**
|
|
50
55
|
|
|
51
|
-
1. **Install Docker and Docker Compose.**
|
|
52
|
-
2. **Clone the repository:**
|
|
53
|
-
```bash
|
|
54
|
-
git clone https://github.com/arabold/docs-mcp-server.git
|
|
55
|
-
cd docs-mcp-server
|
|
56
|
-
```
|
|
57
|
-
3. **Set up your environment:**
|
|
58
|
-
Copy the example environment file and add your OpenAI API key:
|
|
59
56
|
```bash
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
docker run --rm \
|
|
58
|
+
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
59
|
+
-v docs-mcp-data:/data \
|
|
60
|
+
-p 6280:6280 \
|
|
61
|
+
ghcr.io/arabold/docs-mcp-server:latest \
|
|
62
|
+
--protocol http --port 6280
|
|
62
63
|
```
|
|
63
|
-
|
|
64
|
+
|
|
65
|
+
Replace `your-openai-api-key` with your actual OpenAI API key.
|
|
66
|
+
|
|
67
|
+
### Option 2: npx
|
|
68
|
+
|
|
69
|
+
1. **Install Node.js 22.x or later.**
|
|
70
|
+
2. **Start the server:**
|
|
71
|
+
|
|
64
72
|
```bash
|
|
65
|
-
|
|
73
|
+
OPENAI_API_KEY="your-openai-api-key" npx @arabold/docs-mcp-server@latest
|
|
66
74
|
```
|
|
67
|
-
- Use `-d` for detached mode. Omit to see logs in your terminal.
|
|
68
|
-
- To rebuild after updates: `docker compose up -d --build`.
|
|
69
|
-
5. **Configure your MCP client:**
|
|
70
|
-
Add this to your MCP settings:
|
|
71
|
-
```json
|
|
72
|
-
{
|
|
73
|
-
"mcpServers": {
|
|
74
|
-
"docs-mcp-server": {
|
|
75
|
-
"url": "http://localhost:6280/sse",
|
|
76
|
-
"disabled": false,
|
|
77
|
-
"autoApprove": []
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
```
|
|
82
|
-
Restart your AI assistant after updating the config.
|
|
83
|
-
6. **Access the Web Interface:**
|
|
84
|
-
Open `http://localhost:6281` in your browser.
|
|
85
75
|
|
|
86
|
-
|
|
76
|
+
Replace `your-openai-api-key` with your actual OpenAI API key.
|
|
77
|
+
|
|
78
|
+
This will run the server on port 6280 by default.
|
|
79
|
+
|
|
80
|
+
### Configure Your MCP Client
|
|
87
81
|
|
|
88
|
-
|
|
89
|
-
- Persistent data storage via Docker volume
|
|
90
|
-
- Easy config via `.env`
|
|
82
|
+
Add this to your MCP settings (VS Code, Claude Desktop, etc.):
|
|
91
83
|
|
|
92
|
-
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"docs-mcp-server": {
|
|
88
|
+
"type": "sse",
|
|
89
|
+
"url": "http://localhost:6280/sse",
|
|
90
|
+
"disabled": false,
|
|
91
|
+
"autoApprove": []
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Alternative connection types:**
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
// SSE (Server-Sent Events)
|
|
101
|
+
"type": "sse", "url": "http://localhost:6280/sse"
|
|
102
|
+
|
|
103
|
+
// HTTP (Streamable)
|
|
104
|
+
"type": "http", "url": "http://localhost:6280/mcp"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Restart your AI assistant after updating the config.
|
|
108
|
+
|
|
109
|
+
### Access the Web Interface
|
|
110
|
+
|
|
111
|
+
Open `http://localhost:6280` in your browser to manage documentation and monitor jobs.
|
|
112
|
+
|
|
113
|
+
### CLI Usage with Standalone Server
|
|
114
|
+
|
|
115
|
+
You can also use CLI commands to interact with the local database:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# List indexed libraries
|
|
119
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest list
|
|
120
|
+
|
|
121
|
+
# Search documentation
|
|
122
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest search react "useState hook"
|
|
123
|
+
|
|
124
|
+
# Scrape new documentation (connects to running server's worker)
|
|
125
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest scrape react https://react.dev/reference/react --server-url http://localhost:6280/api
|
|
126
|
+
```
|
|
93
127
|
|
|
94
128
|
### Adding Library Documentation
|
|
95
129
|
|
|
96
|
-
1. Open the Web Interface at `http://localhost:
|
|
130
|
+
1. Open the Web Interface at `http://localhost:6280`.
|
|
97
131
|
2. Use the "Queue New Scrape Job" form.
|
|
98
132
|
3. Enter the documentation URL, library name, and (optionally) version.
|
|
99
133
|
4. Click "Queue Job". Monitor progress in the Job Queue.
|
|
@@ -103,6 +137,85 @@ Once a job completes, the docs are searchable via your AI assistant or the Web U
|
|
|
103
137
|
|
|
104
138
|

|
|
105
139
|
|
|
140
|
+
**Benefits:**
|
|
141
|
+
|
|
142
|
+
- Single command setup with both web UI and MCP server
|
|
143
|
+
- Persistent data storage (Docker volume or local directory)
|
|
144
|
+
- No repository cloning required
|
|
145
|
+
- Full feature access including web interface
|
|
146
|
+
|
|
147
|
+
To stop the server, press `Ctrl+C`.
|
|
148
|
+
|
|
149
|
+
## Embedded Server
|
|
150
|
+
|
|
151
|
+
Run the MCP server directly embedded in your AI assistant without a separate process or web interface. This method provides MCP integration only.
|
|
152
|
+
|
|
153
|
+
### Configure Your MCP Client
|
|
154
|
+
|
|
155
|
+
Add this to your MCP settings (VS Code, Claude Desktop, etc.):
|
|
156
|
+
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"mcpServers": {
|
|
160
|
+
"docs-mcp-server": {
|
|
161
|
+
"command": "npx",
|
|
162
|
+
"args": ["@arabold/docs-mcp-server@latest"],
|
|
163
|
+
"env": {
|
|
164
|
+
"OPENAI_API_KEY": "sk-proj-..." // Your OpenAI API key
|
|
165
|
+
},
|
|
166
|
+
"disabled": false,
|
|
167
|
+
"autoApprove": []
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Replace `sk-proj-...` with your OpenAI API key and restart your application.
|
|
174
|
+
|
|
175
|
+
### Adding Library Documentation
|
|
176
|
+
|
|
177
|
+
**Option 1: Use MCP Tools**
|
|
178
|
+
|
|
179
|
+
Your AI assistant can index new documentation using the built-in `scrape_docs` tool:
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
Please scrape the React documentation from https://react.dev/reference/react for library "react" version "18.x"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Option 2: Launch Web Interface**
|
|
186
|
+
|
|
187
|
+
Start a temporary web interface that shares the same database:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest web --port 6281
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Then open `http://localhost:6281` to manage documentation. Stop the web interface when done (`Ctrl+C`).
|
|
194
|
+
|
|
195
|
+
**Option 3: CLI Commands**
|
|
196
|
+
|
|
197
|
+
Use CLI commands directly (avoid running scrape jobs concurrently with embedded server):
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
# List libraries
|
|
201
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest list
|
|
202
|
+
|
|
203
|
+
# Search documentation
|
|
204
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest search react "useState hook"
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**Benefits:**
|
|
208
|
+
|
|
209
|
+
- Direct integration with AI assistant
|
|
210
|
+
- No separate server process required
|
|
211
|
+
- Persistent data storage in user's home directory
|
|
212
|
+
- Shared database with standalone server and CLI
|
|
213
|
+
|
|
214
|
+
**Limitations:**
|
|
215
|
+
|
|
216
|
+
- No web interface (unless launched separately)
|
|
217
|
+
- Documentation indexing requires MCP tools or separate commands
|
|
218
|
+
|
|
106
219
|
## Scraping Local Files and Folders
|
|
107
220
|
|
|
108
221
|
You can index documentation from your local filesystem by using a `file://` URL as the source. This works in both the Web UI and CLI.
|
|
@@ -118,7 +231,7 @@ You can index documentation from your local filesystem by using a `file://` URL
|
|
|
118
231
|
- All files with a MIME type of `text/*` are processed. This includes HTML, Markdown, plain text, and source code files such as `.js`, `.ts`, `.tsx`, `.css`, etc. Binary files, PDFs, images, and other non-text formats are ignored.
|
|
119
232
|
- You must use the `file://` prefix for local files/folders.
|
|
120
233
|
- The path must be accessible to the server process.
|
|
121
|
-
- **If running in Docker
|
|
234
|
+
- **If running in Docker:**
|
|
122
235
|
- You must mount the local folder into the container and use the container path in your `file://` URL.
|
|
123
236
|
- Example Docker run:
|
|
124
237
|
```bash
|
|
@@ -133,187 +246,67 @@ You can index documentation from your local filesystem by using a `file://` URL
|
|
|
133
246
|
|
|
134
247
|
See the tooltips in the Web UI and CLI help for more details.
|
|
135
248
|
|
|
136
|
-
##
|
|
137
|
-
|
|
138
|
-
> **Note:** The published Docker images support both x86_64 (amd64) and Mac Silicon (arm64).
|
|
139
|
-
|
|
140
|
-
This method is simple and doesn't require cloning the repository.
|
|
141
|
-
|
|
142
|
-
1. **Install and start Docker.**
|
|
143
|
-
2. **Configure your MCP client:**
|
|
144
|
-
Add this block to your MCP settings (adjust as needed):
|
|
145
|
-
```json
|
|
146
|
-
{
|
|
147
|
-
"mcpServers": {
|
|
148
|
-
"docs-mcp-server": {
|
|
149
|
-
"command": "docker",
|
|
150
|
-
"args": [
|
|
151
|
-
"run",
|
|
152
|
-
"-i",
|
|
153
|
-
"--rm",
|
|
154
|
-
"-e",
|
|
155
|
-
"OPENAI_API_KEY",
|
|
156
|
-
"-v",
|
|
157
|
-
"docs-mcp-data:/data",
|
|
158
|
-
"ghcr.io/arabold/docs-mcp-server:latest"
|
|
159
|
-
],
|
|
160
|
-
"env": {
|
|
161
|
-
"OPENAI_API_KEY": "sk-proj-..." // Your OpenAI API key
|
|
162
|
-
},
|
|
163
|
-
"disabled": false,
|
|
164
|
-
"autoApprove": []
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
}
|
|
168
|
-
```
|
|
169
|
-
Replace `sk-proj-...` with your OpenAI API key. Restart your application.
|
|
170
|
-
3. **Done!** The server is now available to your AI assistant.
|
|
171
|
-
|
|
172
|
-
**Docker Container Settings:**
|
|
249
|
+
## Advanced: Docker Compose (Scaling)
|
|
173
250
|
|
|
174
|
-
|
|
175
|
-
- `--rm`: Removes the container on exit.
|
|
176
|
-
- `-e OPENAI_API_KEY`: **Required.**
|
|
177
|
-
- `-v docs-mcp-data:/data`: **Required for persistence.**
|
|
251
|
+
For production deployments or when you need to scale processing, use Docker Compose to run separate services.
|
|
178
252
|
|
|
179
|
-
|
|
253
|
+
> **Note:** This feature is work in progress and will still be improved in future releases. Currently, it still requires all services to share the same database volume, defeating its original purpose.
|
|
180
254
|
|
|
181
|
-
**
|
|
255
|
+
**Start the services:**
|
|
182
256
|
|
|
183
257
|
```bash
|
|
184
|
-
#
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
-e DOCS_MCP_EMBEDDING_MODEL="text-embedding-3-small" \
|
|
188
|
-
-v docs-mcp-data:/data \
|
|
189
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
190
|
-
|
|
191
|
-
# OpenAI-compatible API (Ollama)
|
|
192
|
-
docker run -i --rm \
|
|
193
|
-
-e OPENAI_API_KEY="your-key" \
|
|
194
|
-
-e OPENAI_API_BASE="http://localhost:11434/v1" \
|
|
195
|
-
-e DOCS_MCP_EMBEDDING_MODEL="embeddings" \
|
|
196
|
-
-v docs-mcp-data:/data \
|
|
197
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
198
|
-
|
|
199
|
-
# Google Vertex AI
|
|
200
|
-
docker run -i --rm \
|
|
201
|
-
-e DOCS_MCP_EMBEDDING_MODEL="vertex:text-embedding-004" \
|
|
202
|
-
-e GOOGLE_APPLICATION_CREDENTIALS="/app/gcp-key.json" \
|
|
203
|
-
-v docs-mcp-data:/data \
|
|
204
|
-
-v /path/to/gcp-key.json:/app/gcp-key.json:ro \
|
|
205
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
206
|
-
|
|
207
|
-
# Google Gemini
|
|
208
|
-
docker run -i --rm \
|
|
209
|
-
-e DOCS_MCP_EMBEDDING_MODEL="gemini:embedding-001" \
|
|
210
|
-
-e GOOGLE_API_KEY="your-google-api-key" \
|
|
211
|
-
-v docs-mcp-data:/data \
|
|
212
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
213
|
-
|
|
214
|
-
# AWS Bedrock
|
|
215
|
-
docker run -i --rm \
|
|
216
|
-
-e AWS_ACCESS_KEY_ID="your-aws-key" \
|
|
217
|
-
-e AWS_SECRET_ACCESS_KEY="your-aws-secret" \
|
|
218
|
-
-e AWS_REGION="us-east-1" \
|
|
219
|
-
-e DOCS_MCP_EMBEDDING_MODEL="aws:amazon.titan-embed-text-v1" \
|
|
220
|
-
-v docs-mcp-data:/data \
|
|
221
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
222
|
-
|
|
223
|
-
# Azure OpenAI
|
|
224
|
-
docker run -i --rm \
|
|
225
|
-
-e AZURE_OPENAI_API_KEY="your-azure-key" \
|
|
226
|
-
-e AZURE_OPENAI_API_INSTANCE_NAME="your-instance" \
|
|
227
|
-
-e AZURE_OPENAI_API_DEPLOYMENT_NAME="your-deployment" \
|
|
228
|
-
-e AZURE_OPENAI_API_VERSION="2024-02-01" \
|
|
229
|
-
-e DOCS_MCP_EMBEDDING_MODEL="microsoft:text-embedding-ada-002" \
|
|
230
|
-
-v docs-mcp-data:/data \
|
|
231
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
232
|
-
```
|
|
233
|
-
|
|
234
|
-
### Web Interface via Docker
|
|
258
|
+
# Clone the repository (to get docker-compose.yml)
|
|
259
|
+
git clone https://github.com/arabold/docs-mcp-server.git
|
|
260
|
+
cd docs-mcp-server
|
|
235
261
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
```bash
|
|
239
|
-
docker run --rm \
|
|
240
|
-
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
241
|
-
-v docs-mcp-data:/data \
|
|
242
|
-
-p 6281:6281 \
|
|
243
|
-
ghcr.io/arabold/docs-mcp-server:latest \
|
|
244
|
-
web --port 6281
|
|
245
|
-
```
|
|
246
|
-
|
|
247
|
-
- Use the same volume name as your server.
|
|
248
|
-
- Map port 6281 with `-p 6281:6281`.
|
|
249
|
-
- Pass config variables with `-e` as needed.
|
|
250
|
-
|
|
251
|
-
### CLI via Docker
|
|
252
|
-
|
|
253
|
-
Run CLI commands by appending them after the image name:
|
|
254
|
-
|
|
255
|
-
```bash
|
|
256
|
-
docker run --rm \
|
|
257
|
-
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
258
|
-
-v docs-mcp-data:/data \
|
|
259
|
-
ghcr.io/arabold/docs-mcp-server:latest \
|
|
260
|
-
<command> [options]
|
|
261
|
-
```
|
|
262
|
+
# Set your environment variables
|
|
263
|
+
export OPENAI_API_KEY="your-key-here"
|
|
262
264
|
|
|
263
|
-
|
|
265
|
+
# Start all services
|
|
266
|
+
docker compose up -d
|
|
264
267
|
|
|
265
|
-
|
|
266
|
-
docker
|
|
267
|
-
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
268
|
-
-v docs-mcp-data:/data \
|
|
269
|
-
ghcr.io/arabold/docs-mcp-server:latest \
|
|
270
|
-
list
|
|
268
|
+
# Scale workers if needed
|
|
269
|
+
docker compose up -d --scale worker=3
|
|
271
270
|
```
|
|
272
271
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
272
|
+
**Service architecture:**
|
|
273
|
+
|
|
274
|
+
- **Worker** (port 8080): Handles documentation processing jobs
|
|
275
|
+
- **MCP Server** (port 6280): Provides `/sse` endpoint for AI tools
|
|
276
|
+
- **Web Interface** (port 6281): Browser-based management interface
|
|
277
|
+
|
|
278
|
+
**Configure your MCP client:**
|
|
279
|
+
|
|
280
|
+
```json
|
|
281
|
+
{
|
|
282
|
+
"mcpServers": {
|
|
283
|
+
"docs-mcp-server": {
|
|
284
|
+
"type": "sse",
|
|
285
|
+
"url": "http://localhost:6280/sse",
|
|
286
|
+
"disabled": false,
|
|
287
|
+
"autoApprove": []
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
277
291
|
```
|
|
278
292
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
You can run the Docs MCP Server without installing or cloning the repo:
|
|
282
|
-
|
|
283
|
-
1. **Run the server:**
|
|
284
|
-
```bash
|
|
285
|
-
npx @arabold/docs-mcp-server@latest
|
|
286
|
-
```
|
|
287
|
-
2. **Set your OpenAI API key:**
|
|
288
|
-
- Use the `OPENAI_API_KEY` environment variable.
|
|
289
|
-
- Example:
|
|
290
|
-
```bash
|
|
291
|
-
OPENAI_API_KEY="sk-proj-..." npx @arabold/docs-mcp-server@latest
|
|
292
|
-
```
|
|
293
|
-
3. **Configure your MCP client:**
|
|
294
|
-
- Use the same settings as in the Docker example, but replace the `command` and `args` with the `npx` command above.
|
|
295
|
-
|
|
296
|
-
**Note:** Data is stored in a temporary directory and will not persist between runs. For persistent storage, use Docker or a local install.
|
|
293
|
+
**Alternative connection types:**
|
|
297
294
|
|
|
298
|
-
|
|
295
|
+
```json
|
|
296
|
+
// SSE (Server-Sent Events)
|
|
297
|
+
"type": "sse", "url": "http://localhost:6280/sse"
|
|
299
298
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
```bash
|
|
303
|
-
npx @arabold/docs-mcp-server@latest <command> [options]
|
|
299
|
+
// HTTP (Streamable)
|
|
300
|
+
"type": "http", "url": "http://localhost:6280/mcp"
|
|
304
301
|
```
|
|
305
302
|
|
|
306
|
-
|
|
303
|
+
**Access interfaces:**
|
|
307
304
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
305
|
+
- Web Interface: `http://localhost:6281`
|
|
306
|
+
- MCP Endpoint (HTTP): `http://localhost:6280/mcp`
|
|
307
|
+
- MCP Endpoint (SSE): `http://localhost:6280/sse`
|
|
311
308
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
```bash
|
|
315
|
-
npx @arabold/docs-mcp-server@latest --help
|
|
316
|
-
```
|
|
309
|
+
This architecture allows independent scaling of processing (workers) and user interfaces.
|
|
317
310
|
|
|
318
311
|
## Configuration
|
|
319
312
|
|
|
@@ -333,8 +326,6 @@ The Docs MCP Server is configured via environment variables. Set these in your s
|
|
|
333
326
|
| `AZURE_OPENAI_API_INSTANCE_NAME` | Azure OpenAI instance name. |
|
|
334
327
|
| `AZURE_OPENAI_API_DEPLOYMENT_NAME` | Azure OpenAI deployment name. |
|
|
335
328
|
| `AZURE_OPENAI_API_VERSION` | Azure OpenAI API version. |
|
|
336
|
-
| `DOCS_MCP_DATA_DIR` | Data directory (default: `./data`). |
|
|
337
|
-
| `DOCS_MCP_PORT` | Server port (default: `6281`). |
|
|
338
329
|
|
|
339
330
|
See [examples above](#alternative-using-docker) for usage.
|
|
340
331
|
|
|
@@ -350,7 +341,74 @@ Set `DOCS_MCP_EMBEDDING_MODEL` to one of:
|
|
|
350
341
|
- `microsoft:text-embedding-ada-002` (Azure OpenAI)
|
|
351
342
|
- Or any OpenAI-compatible model name
|
|
352
343
|
|
|
353
|
-
|
|
344
|
+
### Provider-Specific Configuration Examples
|
|
345
|
+
|
|
346
|
+
Here are complete configuration examples for different embedding providers:
|
|
347
|
+
|
|
348
|
+
**OpenAI (Default):**
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
OPENAI_API_KEY="sk-proj-your-openai-api-key" \
|
|
352
|
+
DOCS_MCP_EMBEDDING_MODEL="text-embedding-3-small" \
|
|
353
|
+
npx @arabold/docs-mcp-server@latest
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
**Ollama (Local):**
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
OPENAI_API_KEY="ollama" \
|
|
360
|
+
OPENAI_API_BASE="http://localhost:11434/v1" \
|
|
361
|
+
DOCS_MCP_EMBEDDING_MODEL="nomic-embed-text" \
|
|
362
|
+
npx @arabold/docs-mcp-server@latest
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**LM Studio (Local):**
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
OPENAI_API_KEY="lmstudio" \
|
|
369
|
+
OPENAI_API_BASE="http://localhost:1234/v1" \
|
|
370
|
+
DOCS_MCP_EMBEDDING_MODEL="text-embedding-qwen3-embedding-4b" \
|
|
371
|
+
npx @arabold/docs-mcp-server@latest
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**Google Gemini:**
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
GOOGLE_API_KEY="your-google-api-key" \
|
|
378
|
+
DOCS_MCP_EMBEDDING_MODEL="gemini:embedding-001" \
|
|
379
|
+
npx @arabold/docs-mcp-server@latest
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
**Google Vertex AI:**
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
GOOGLE_APPLICATION_CREDENTIALS="/path/to/your/gcp-service-account.json" \
|
|
386
|
+
DOCS_MCP_EMBEDDING_MODEL="vertex:text-embedding-004" \
|
|
387
|
+
npx @arabold/docs-mcp-server@latest
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
**AWS Bedrock:**
|
|
391
|
+
|
|
392
|
+
```bash
|
|
393
|
+
AWS_ACCESS_KEY_ID="your-aws-access-key-id" \
|
|
394
|
+
AWS_SECRET_ACCESS_KEY="your-aws-secret-access-key" \
|
|
395
|
+
AWS_REGION="us-east-1" \
|
|
396
|
+
DOCS_MCP_EMBEDDING_MODEL="aws:amazon.titan-embed-text-v1" \
|
|
397
|
+
npx @arabold/docs-mcp-server@latest
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
**Azure OpenAI:**
|
|
401
|
+
|
|
402
|
+
```bash
|
|
403
|
+
AZURE_OPENAI_API_KEY="your-azure-openai-api-key" \
|
|
404
|
+
AZURE_OPENAI_API_INSTANCE_NAME="your-instance-name" \
|
|
405
|
+
AZURE_OPENAI_API_DEPLOYMENT_NAME="your-deployment-name" \
|
|
406
|
+
AZURE_OPENAI_API_VERSION="2024-02-01" \
|
|
407
|
+
DOCS_MCP_EMBEDDING_MODEL="microsoft:text-embedding-ada-002" \
|
|
408
|
+
npx @arabold/docs-mcp-server@latest
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
For more architectural details, see the [ARCHITECTURE.md](ARCHITECTURE.md).
|
|
354
412
|
|
|
355
413
|
## Development
|
|
356
414
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
-- Migration: Normalize schema by introducing libraries and versions tables
|
|
2
|
+
|
|
3
|
+
-- 1. Create libraries table
|
|
4
|
+
CREATE TABLE IF NOT EXISTS libraries (
|
|
5
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
6
|
+
name TEXT NOT NULL UNIQUE,
|
|
7
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
8
|
+
);
|
|
9
|
+
|
|
10
|
+
-- 2. Create versions table
|
|
11
|
+
CREATE TABLE IF NOT EXISTS versions (
|
|
12
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
13
|
+
library_id INTEGER NOT NULL REFERENCES libraries(id),
|
|
14
|
+
name TEXT, -- NULL for unversioned content
|
|
15
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
16
|
+
UNIQUE(library_id, name) -- Allows one NULL version per library
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
-- 3. Add foreign key columns to documents
|
|
20
|
+
ALTER TABLE documents ADD COLUMN library_id INTEGER REFERENCES libraries(id);
|
|
21
|
+
ALTER TABLE documents ADD COLUMN version_id INTEGER REFERENCES versions(id);
|
|
22
|
+
|
|
23
|
+
-- 4. Populate libraries table from existing documents
|
|
24
|
+
INSERT OR IGNORE INTO libraries (name)
|
|
25
|
+
SELECT DISTINCT library FROM documents;
|
|
26
|
+
|
|
27
|
+
-- 5. Populate versions table (convert empty string to NULL for unversioned)
|
|
28
|
+
INSERT OR IGNORE INTO versions (library_id, name)
|
|
29
|
+
SELECT DISTINCT
|
|
30
|
+
l.id,
|
|
31
|
+
CASE WHEN d.version = '' THEN NULL ELSE d.version END
|
|
32
|
+
FROM documents d
|
|
33
|
+
JOIN libraries l ON l.name = d.library;
|
|
34
|
+
|
|
35
|
+
-- 6. Update documents with foreign key references
|
|
36
|
+
UPDATE documents
|
|
37
|
+
SET library_id = (SELECT id FROM libraries WHERE libraries.name = documents.library),
|
|
38
|
+
version_id = (
|
|
39
|
+
SELECT v.id FROM versions v
|
|
40
|
+
JOIN libraries l ON v.library_id = l.id
|
|
41
|
+
WHERE l.name = documents.library
|
|
42
|
+
AND COALESCE(v.name, '') = COALESCE(documents.version, '')
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
-- 7. Add indexes for performance
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_documents_version_id ON documents(version_id);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_versions_library_id ON versions(library_id);
|
|
49
|
+
|
|
50
|
+
-- Note: documents_vec table and FTS triggers will be updated in subsequent migrations.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
-- Migration: Normalize documents_vec table to use library_id and version_id
|
|
2
|
+
-- Optimized for large datasets (1GB+)
|
|
3
|
+
|
|
4
|
+
-- 1. Ensure optimal indexes for the migration JOIN
|
|
5
|
+
CREATE INDEX IF NOT EXISTS idx_documents_id_lib_ver ON documents(id, library_id, version_id);
|
|
6
|
+
|
|
7
|
+
-- 2. Create temporary table to store vector data with foreign key IDs
|
|
8
|
+
CREATE TEMPORARY TABLE temp_vector_migration AS
|
|
9
|
+
SELECT
|
|
10
|
+
dv.rowid,
|
|
11
|
+
d.library_id,
|
|
12
|
+
d.version_id,
|
|
13
|
+
dv.embedding
|
|
14
|
+
FROM documents_vec dv
|
|
15
|
+
JOIN documents d ON dv.rowid = d.id;
|
|
16
|
+
|
|
17
|
+
-- 3. Drop the old virtual table
|
|
18
|
+
DROP TABLE documents_vec;
|
|
19
|
+
|
|
20
|
+
-- 4. Create new virtual table with normalized schema
|
|
21
|
+
CREATE VIRTUAL TABLE documents_vec USING vec0(
|
|
22
|
+
library_id INTEGER NOT NULL,
|
|
23
|
+
version_id INTEGER NOT NULL,
|
|
24
|
+
embedding FLOAT[1536]
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
-- 5. Restore vector data using foreign key IDs
|
|
28
|
+
INSERT INTO documents_vec (rowid, library_id, version_id, embedding)
|
|
29
|
+
SELECT rowid, library_id, version_id, embedding
|
|
30
|
+
FROM temp_vector_migration;
|
|
31
|
+
|
|
32
|
+
-- 6. Clean up temporary table
|
|
33
|
+
DROP TABLE temp_vector_migration;
|