@arabold/docs-mcp-server 1.16.1 → 1.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +269 -209
- package/db/migrations/002-normalize-library-table.sql +50 -0
- package/db/migrations/003-normalize-vector-table.sql +33 -0
- package/db/migrations/004-complete-normalization.sql +67 -0
- package/db/migrations/005-add-status-tracking.sql +42 -0
- package/db/migrations/006-add-scraper-options.sql +16 -0
- package/dist/EmbeddingFactory-CElwVk3X.js.map +1 -1
- package/dist/assets/main.css +1 -1
- package/dist/assets/main.js +8209 -7646
- package/dist/assets/main.js.map +1 -1
- package/dist/index.js +5961 -4062
- package/dist/index.js.map +1 -1
- package/package.json +35 -35
- package/public/assets/main.css +1 -1
- package/public/assets/main.js +8209 -7646
- package/public/assets/main.js.map +1 -1
package/README.md
CHANGED
|
@@ -38,62 +38,96 @@ LLM-assisted coding promises speed and efficiency, but often falls short due to:
|
|
|
38
38
|
|
|
39
39
|
## How to Run the Docs MCP Server
|
|
40
40
|
|
|
41
|
-
|
|
41
|
+
Choose your deployment method:
|
|
42
42
|
|
|
43
|
-
- [
|
|
44
|
-
- [
|
|
45
|
-
- [
|
|
43
|
+
- [Standalone Server (Recommended)](#standalone-server-recommended)
|
|
44
|
+
- [Embedded Server](#embedded-server)
|
|
45
|
+
- [Advanced: Docker Compose (Scaling)](#advanced-docker-compose-scaling)
|
|
46
46
|
|
|
47
|
-
##
|
|
47
|
+
## Standalone Server (Recommended)
|
|
48
48
|
|
|
49
|
-
Run
|
|
49
|
+
Run a standalone server that includes both MCP endpoints and web interface in a single process. This is the easiest way to get started.
|
|
50
|
+
|
|
51
|
+
### Option 1: Docker
|
|
52
|
+
|
|
53
|
+
1. **Install Docker.**
|
|
54
|
+
2. **Start the server:**
|
|
50
55
|
|
|
51
|
-
1. **Install Docker and Docker Compose.**
|
|
52
|
-
2. **Clone the repository:**
|
|
53
|
-
```bash
|
|
54
|
-
git clone https://github.com/arabold/docs-mcp-server.git
|
|
55
|
-
cd docs-mcp-server
|
|
56
|
-
```
|
|
57
|
-
3. **Set up your environment:**
|
|
58
|
-
Copy the example environment file and add your OpenAI API key:
|
|
59
56
|
```bash
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
docker run --rm \
|
|
58
|
+
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
59
|
+
-v docs-mcp-data:/data \
|
|
60
|
+
-p 6280:6280 \
|
|
61
|
+
ghcr.io/arabold/docs-mcp-server:latest \
|
|
62
|
+
--protocol http --port 6280
|
|
62
63
|
```
|
|
63
|
-
|
|
64
|
+
|
|
65
|
+
Replace `your-openai-api-key` with your actual OpenAI API key.
|
|
66
|
+
|
|
67
|
+
### Option 2: npx
|
|
68
|
+
|
|
69
|
+
1. **Install Node.js 22.x or later.**
|
|
70
|
+
2. **Start the server:**
|
|
71
|
+
|
|
64
72
|
```bash
|
|
65
|
-
|
|
73
|
+
OPENAI_API_KEY="your-openai-api-key" npx @arabold/docs-mcp-server@latest
|
|
66
74
|
```
|
|
67
|
-
- Use `-d` for detached mode. Omit to see logs in your terminal.
|
|
68
|
-
- To rebuild after updates: `docker compose up -d --build`.
|
|
69
|
-
5. **Configure your MCP client:**
|
|
70
|
-
Add this to your MCP settings:
|
|
71
|
-
```json
|
|
72
|
-
{
|
|
73
|
-
"mcpServers": {
|
|
74
|
-
"docs-mcp-server": {
|
|
75
|
-
"url": "http://localhost:6280/sse",
|
|
76
|
-
"disabled": false,
|
|
77
|
-
"autoApprove": []
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
```
|
|
82
|
-
Restart your AI assistant after updating the config.
|
|
83
|
-
6. **Access the Web Interface:**
|
|
84
|
-
Open `http://localhost:6281` in your browser.
|
|
85
75
|
|
|
86
|
-
|
|
76
|
+
Replace `your-openai-api-key` with your actual OpenAI API key.
|
|
77
|
+
|
|
78
|
+
This will run the server on port 6280 by default.
|
|
79
|
+
|
|
80
|
+
### Configure Your MCP Client
|
|
87
81
|
|
|
88
|
-
|
|
89
|
-
- Persistent data storage via Docker volume
|
|
90
|
-
- Easy config via `.env`
|
|
82
|
+
Add this to your MCP settings (VS Code, Claude Desktop, etc.):
|
|
91
83
|
|
|
92
|
-
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"docs-mcp-server": {
|
|
88
|
+
"type": "sse",
|
|
89
|
+
"url": "http://localhost:6280/sse",
|
|
90
|
+
"disabled": false,
|
|
91
|
+
"autoApprove": []
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Alternative connection types:**
|
|
98
|
+
|
|
99
|
+
```json
|
|
100
|
+
// SSE (Server-Sent Events)
|
|
101
|
+
"type": "sse", "url": "http://localhost:6280/sse"
|
|
102
|
+
|
|
103
|
+
// HTTP (Streamable)
|
|
104
|
+
"type": "http", "url": "http://localhost:6280/mcp"
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Restart your AI assistant after updating the config.
|
|
108
|
+
|
|
109
|
+
### Access the Web Interface
|
|
110
|
+
|
|
111
|
+
Open `http://localhost:6280` in your browser to manage documentation and monitor jobs.
|
|
112
|
+
|
|
113
|
+
### CLI Usage with Standalone Server
|
|
114
|
+
|
|
115
|
+
You can also use CLI commands to interact with the local database:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# List indexed libraries
|
|
119
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest list
|
|
120
|
+
|
|
121
|
+
# Search documentation
|
|
122
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest search react "useState hook"
|
|
123
|
+
|
|
124
|
+
# Scrape new documentation (connects to running server's worker)
|
|
125
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest scrape react https://react.dev/reference/react --server-url http://localhost:6280/api
|
|
126
|
+
```
|
|
93
127
|
|
|
94
128
|
### Adding Library Documentation
|
|
95
129
|
|
|
96
|
-
1. Open the Web Interface at `http://localhost:
|
|
130
|
+
1. Open the Web Interface at `http://localhost:6280`.
|
|
97
131
|
2. Use the "Queue New Scrape Job" form.
|
|
98
132
|
3. Enter the documentation URL, library name, and (optionally) version.
|
|
99
133
|
4. Click "Queue Job". Monitor progress in the Job Queue.
|
|
@@ -101,6 +135,87 @@ To stop, run `docker compose down`.
|
|
|
101
135
|
|
|
102
136
|
Once a job completes, the docs are searchable via your AI assistant or the Web UI.
|
|
103
137
|
|
|
138
|
+

|
|
139
|
+
|
|
140
|
+
**Benefits:**
|
|
141
|
+
|
|
142
|
+
- Single command setup with both web UI and MCP server
|
|
143
|
+
- Persistent data storage (Docker volume or local directory)
|
|
144
|
+
- No repository cloning required
|
|
145
|
+
- Full feature access including web interface
|
|
146
|
+
|
|
147
|
+
To stop the server, press `Ctrl+C`.
|
|
148
|
+
|
|
149
|
+
## Embedded Server
|
|
150
|
+
|
|
151
|
+
Run the MCP server directly embedded in your AI assistant without a separate process or web interface. This method provides MCP integration only.
|
|
152
|
+
|
|
153
|
+
### Configure Your MCP Client
|
|
154
|
+
|
|
155
|
+
Add this to your MCP settings (VS Code, Claude Desktop, etc.):
|
|
156
|
+
|
|
157
|
+
```json
|
|
158
|
+
{
|
|
159
|
+
"mcpServers": {
|
|
160
|
+
"docs-mcp-server": {
|
|
161
|
+
"command": "npx",
|
|
162
|
+
"args": ["@arabold/docs-mcp-server@latest"],
|
|
163
|
+
"env": {
|
|
164
|
+
"OPENAI_API_KEY": "sk-proj-..." // Your OpenAI API key
|
|
165
|
+
},
|
|
166
|
+
"disabled": false,
|
|
167
|
+
"autoApprove": []
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Replace `sk-proj-...` with your OpenAI API key and restart your application.
|
|
174
|
+
|
|
175
|
+
### Adding Library Documentation
|
|
176
|
+
|
|
177
|
+
**Option 1: Use MCP Tools**
|
|
178
|
+
|
|
179
|
+
Your AI assistant can index new documentation using the built-in `scrape_docs` tool:
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
Please scrape the React documentation from https://react.dev/reference/react for library "react" version "18.x"
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Option 2: Launch Web Interface**
|
|
186
|
+
|
|
187
|
+
Start a temporary web interface that shares the same database:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest web --port 6281
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Then open `http://localhost:6281` to manage documentation. Stop the web interface when done (`Ctrl+C`).
|
|
194
|
+
|
|
195
|
+
**Option 3: CLI Commands**
|
|
196
|
+
|
|
197
|
+
Use CLI commands directly (avoid running scrape jobs concurrently with embedded server):
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
# List libraries
|
|
201
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest list
|
|
202
|
+
|
|
203
|
+
# Search documentation
|
|
204
|
+
OPENAI_API_KEY="your-key" npx @arabold/docs-mcp-server@latest search react "useState hook"
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**Benefits:**
|
|
208
|
+
|
|
209
|
+
- Direct integration with AI assistant
|
|
210
|
+
- No separate server process required
|
|
211
|
+
- Persistent data storage in user's home directory
|
|
212
|
+
- Shared database with standalone server and CLI
|
|
213
|
+
|
|
214
|
+
**Limitations:**
|
|
215
|
+
|
|
216
|
+
- No web interface (unless launched separately)
|
|
217
|
+
- Documentation indexing requires MCP tools or separate commands
|
|
218
|
+
|
|
104
219
|
## Scraping Local Files and Folders
|
|
105
220
|
|
|
106
221
|
You can index documentation from your local filesystem by using a `file://` URL as the source. This works in both the Web UI and CLI.
|
|
@@ -116,7 +231,7 @@ You can index documentation from your local filesystem by using a `file://` URL
|
|
|
116
231
|
- All files with a MIME type of `text/*` are processed. This includes HTML, Markdown, plain text, and source code files such as `.js`, `.ts`, `.tsx`, `.css`, etc. Binary files, PDFs, images, and other non-text formats are ignored.
|
|
117
232
|
- You must use the `file://` prefix for local files/folders.
|
|
118
233
|
- The path must be accessible to the server process.
|
|
119
|
-
- **If running in Docker
|
|
234
|
+
- **If running in Docker:**
|
|
120
235
|
- You must mount the local folder into the container and use the container path in your `file://` URL.
|
|
121
236
|
- Example Docker run:
|
|
122
237
|
```bash
|
|
@@ -131,187 +246,67 @@ You can index documentation from your local filesystem by using a `file://` URL
|
|
|
131
246
|
|
|
132
247
|
See the tooltips in the Web UI and CLI help for more details.
|
|
133
248
|
|
|
134
|
-
##
|
|
135
|
-
|
|
136
|
-
> **Note:** The published Docker images support both x86_64 (amd64) and Mac Silicon (arm64).
|
|
137
|
-
|
|
138
|
-
This method is simple and doesn't require cloning the repository.
|
|
139
|
-
|
|
140
|
-
1. **Install and start Docker.**
|
|
141
|
-
2. **Configure your MCP client:**
|
|
142
|
-
Add this block to your MCP settings (adjust as needed):
|
|
143
|
-
```json
|
|
144
|
-
{
|
|
145
|
-
"mcpServers": {
|
|
146
|
-
"docs-mcp-server": {
|
|
147
|
-
"command": "docker",
|
|
148
|
-
"args": [
|
|
149
|
-
"run",
|
|
150
|
-
"-i",
|
|
151
|
-
"--rm",
|
|
152
|
-
"-e",
|
|
153
|
-
"OPENAI_API_KEY",
|
|
154
|
-
"-v",
|
|
155
|
-
"docs-mcp-data:/data",
|
|
156
|
-
"ghcr.io/arabold/docs-mcp-server:latest"
|
|
157
|
-
],
|
|
158
|
-
"env": {
|
|
159
|
-
"OPENAI_API_KEY": "sk-proj-..." // Your OpenAI API key
|
|
160
|
-
},
|
|
161
|
-
"disabled": false,
|
|
162
|
-
"autoApprove": []
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
```
|
|
167
|
-
Replace `sk-proj-...` with your OpenAI API key. Restart your application.
|
|
168
|
-
3. **Done!** The server is now available to your AI assistant.
|
|
169
|
-
|
|
170
|
-
**Docker Container Settings:**
|
|
249
|
+
## Advanced: Docker Compose (Scaling)
|
|
171
250
|
|
|
172
|
-
|
|
173
|
-
- `--rm`: Removes the container on exit.
|
|
174
|
-
- `-e OPENAI_API_KEY`: **Required.**
|
|
175
|
-
- `-v docs-mcp-data:/data`: **Required for persistence.**
|
|
251
|
+
For production deployments or when you need to scale processing, use Docker Compose to run separate services.
|
|
176
252
|
|
|
177
|
-
|
|
253
|
+
> **Note:** This feature is work in progress and will still be improved in future releases. Currently, it still requires all services to share the same database volume, defeating its original purpose.
|
|
178
254
|
|
|
179
|
-
**
|
|
255
|
+
**Start the services:**
|
|
180
256
|
|
|
181
257
|
```bash
|
|
182
|
-
#
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
-e DOCS_MCP_EMBEDDING_MODEL="text-embedding-3-small" \
|
|
186
|
-
-v docs-mcp-data:/data \
|
|
187
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
188
|
-
|
|
189
|
-
# OpenAI-compatible API (Ollama)
|
|
190
|
-
docker run -i --rm \
|
|
191
|
-
-e OPENAI_API_KEY="your-key" \
|
|
192
|
-
-e OPENAI_API_BASE="http://localhost:11434/v1" \
|
|
193
|
-
-e DOCS_MCP_EMBEDDING_MODEL="embeddings" \
|
|
194
|
-
-v docs-mcp-data:/data \
|
|
195
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
196
|
-
|
|
197
|
-
# Google Vertex AI
|
|
198
|
-
docker run -i --rm \
|
|
199
|
-
-e DOCS_MCP_EMBEDDING_MODEL="vertex:text-embedding-004" \
|
|
200
|
-
-e GOOGLE_APPLICATION_CREDENTIALS="/app/gcp-key.json" \
|
|
201
|
-
-v docs-mcp-data:/data \
|
|
202
|
-
-v /path/to/gcp-key.json:/app/gcp-key.json:ro \
|
|
203
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
204
|
-
|
|
205
|
-
# Google Gemini
|
|
206
|
-
docker run -i --rm \
|
|
207
|
-
-e DOCS_MCP_EMBEDDING_MODEL="gemini:embedding-001" \
|
|
208
|
-
-e GOOGLE_API_KEY="your-google-api-key" \
|
|
209
|
-
-v docs-mcp-data:/data \
|
|
210
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
211
|
-
|
|
212
|
-
# AWS Bedrock
|
|
213
|
-
docker run -i --rm \
|
|
214
|
-
-e AWS_ACCESS_KEY_ID="your-aws-key" \
|
|
215
|
-
-e AWS_SECRET_ACCESS_KEY="your-aws-secret" \
|
|
216
|
-
-e AWS_REGION="us-east-1" \
|
|
217
|
-
-e DOCS_MCP_EMBEDDING_MODEL="aws:amazon.titan-embed-text-v1" \
|
|
218
|
-
-v docs-mcp-data:/data \
|
|
219
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
220
|
-
|
|
221
|
-
# Azure OpenAI
|
|
222
|
-
docker run -i --rm \
|
|
223
|
-
-e AZURE_OPENAI_API_KEY="your-azure-key" \
|
|
224
|
-
-e AZURE_OPENAI_API_INSTANCE_NAME="your-instance" \
|
|
225
|
-
-e AZURE_OPENAI_API_DEPLOYMENT_NAME="your-deployment" \
|
|
226
|
-
-e AZURE_OPENAI_API_VERSION="2024-02-01" \
|
|
227
|
-
-e DOCS_MCP_EMBEDDING_MODEL="microsoft:text-embedding-ada-002" \
|
|
228
|
-
-v docs-mcp-data:/data \
|
|
229
|
-
ghcr.io/arabold/docs-mcp-server:latest
|
|
230
|
-
```
|
|
258
|
+
# Clone the repository (to get docker-compose.yml)
|
|
259
|
+
git clone https://github.com/arabold/docs-mcp-server.git
|
|
260
|
+
cd docs-mcp-server
|
|
231
261
|
|
|
232
|
-
|
|
262
|
+
# Set your environment variables
|
|
263
|
+
export OPENAI_API_KEY="your-key-here"
|
|
233
264
|
|
|
234
|
-
|
|
265
|
+
# Start all services
|
|
266
|
+
docker compose up -d
|
|
235
267
|
|
|
236
|
-
|
|
237
|
-
docker
|
|
238
|
-
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
239
|
-
-v docs-mcp-data:/data \
|
|
240
|
-
-p 6281:6281 \
|
|
241
|
-
ghcr.io/arabold/docs-mcp-server:latest \
|
|
242
|
-
web --port 6281
|
|
268
|
+
# Scale workers if needed
|
|
269
|
+
docker compose up -d --scale worker=3
|
|
243
270
|
```
|
|
244
271
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
```
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
272
|
+
**Service architecture:**
|
|
273
|
+
|
|
274
|
+
- **Worker** (port 8080): Handles documentation processing jobs
|
|
275
|
+
- **MCP Server** (port 6280): Provides `/sse` endpoint for AI tools
|
|
276
|
+
- **Web Interface** (port 6281): Browser-based management interface
|
|
277
|
+
|
|
278
|
+
**Configure your MCP client:**
|
|
279
|
+
|
|
280
|
+
```json
|
|
281
|
+
{
|
|
282
|
+
"mcpServers": {
|
|
283
|
+
"docs-mcp-server": {
|
|
284
|
+
"type": "sse",
|
|
285
|
+
"url": "http://localhost:6280/sse",
|
|
286
|
+
"disabled": false,
|
|
287
|
+
"autoApprove": []
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
259
291
|
```
|
|
260
292
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
```bash
|
|
264
|
-
docker run --rm \
|
|
265
|
-
-e OPENAI_API_KEY="your-openai-api-key" \
|
|
266
|
-
-v docs-mcp-data:/data \
|
|
267
|
-
ghcr.io/arabold/docs-mcp-server:latest \
|
|
268
|
-
list
|
|
269
|
-
```
|
|
293
|
+
**Alternative connection types:**
|
|
270
294
|
|
|
271
|
-
|
|
295
|
+
```json
|
|
296
|
+
// SSE (Server-Sent Events)
|
|
297
|
+
"type": "sse", "url": "http://localhost:6280/sse"
|
|
272
298
|
|
|
273
|
-
|
|
274
|
-
|
|
299
|
+
// HTTP (Streamable)
|
|
300
|
+
"type": "http", "url": "http://localhost:6280/mcp"
|
|
275
301
|
```
|
|
276
302
|
|
|
277
|
-
|
|
303
|
+
**Access interfaces:**
|
|
278
304
|
|
|
279
|
-
|
|
305
|
+
- Web Interface: `http://localhost:6281`
|
|
306
|
+
- MCP Endpoint (HTTP): `http://localhost:6280/mcp`
|
|
307
|
+
- MCP Endpoint (SSE): `http://localhost:6280/sse`
|
|
280
308
|
|
|
281
|
-
|
|
282
|
-
```bash
|
|
283
|
-
npx @arabold/docs-mcp-server@latest
|
|
284
|
-
```
|
|
285
|
-
2. **Set your OpenAI API key:**
|
|
286
|
-
- Use the `OPENAI_API_KEY` environment variable.
|
|
287
|
-
- Example:
|
|
288
|
-
```bash
|
|
289
|
-
OPENAI_API_KEY="sk-proj-..." npx @arabold/docs-mcp-server@latest
|
|
290
|
-
```
|
|
291
|
-
3. **Configure your MCP client:**
|
|
292
|
-
- Use the same settings as in the Docker example, but replace the `command` and `args` with the `npx` command above.
|
|
293
|
-
|
|
294
|
-
**Note:** Data is stored in a temporary directory and will not persist between runs. For persistent storage, use Docker or a local install.
|
|
295
|
-
|
|
296
|
-
### CLI via npx
|
|
297
|
-
|
|
298
|
-
You can run CLI commands directly with npx, without installing the package globally:
|
|
299
|
-
|
|
300
|
-
```bash
|
|
301
|
-
npx @arabold/docs-mcp-server@latest <command> [options]
|
|
302
|
-
```
|
|
303
|
-
|
|
304
|
-
Example:
|
|
305
|
-
|
|
306
|
-
```bash
|
|
307
|
-
npx @arabold/docs-mcp-server@latest list
|
|
308
|
-
```
|
|
309
|
-
|
|
310
|
-
For command help, run:
|
|
311
|
-
|
|
312
|
-
```bash
|
|
313
|
-
npx @arabold/docs-mcp-server@latest --help
|
|
314
|
-
```
|
|
309
|
+
This architecture allows independent scaling of processing (workers) and user interfaces.
|
|
315
310
|
|
|
316
311
|
## Configuration
|
|
317
312
|
|
|
@@ -331,8 +326,6 @@ The Docs MCP Server is configured via environment variables. Set these in your s
|
|
|
331
326
|
| `AZURE_OPENAI_API_INSTANCE_NAME` | Azure OpenAI instance name. |
|
|
332
327
|
| `AZURE_OPENAI_API_DEPLOYMENT_NAME` | Azure OpenAI deployment name. |
|
|
333
328
|
| `AZURE_OPENAI_API_VERSION` | Azure OpenAI API version. |
|
|
334
|
-
| `DOCS_MCP_DATA_DIR` | Data directory (default: `./data`). |
|
|
335
|
-
| `DOCS_MCP_PORT` | Server port (default: `6281`). |
|
|
336
329
|
|
|
337
330
|
See [examples above](#alternative-using-docker) for usage.
|
|
338
331
|
|
|
@@ -348,7 +341,74 @@ Set `DOCS_MCP_EMBEDDING_MODEL` to one of:
|
|
|
348
341
|
- `microsoft:text-embedding-ada-002` (Azure OpenAI)
|
|
349
342
|
- Or any OpenAI-compatible model name
|
|
350
343
|
|
|
351
|
-
|
|
344
|
+
### Provider-Specific Configuration Examples
|
|
345
|
+
|
|
346
|
+
Here are complete configuration examples for different embedding providers:
|
|
347
|
+
|
|
348
|
+
**OpenAI (Default):**
|
|
349
|
+
|
|
350
|
+
```bash
|
|
351
|
+
OPENAI_API_KEY="sk-proj-your-openai-api-key" \
|
|
352
|
+
DOCS_MCP_EMBEDDING_MODEL="text-embedding-3-small" \
|
|
353
|
+
npx @arabold/docs-mcp-server@latest
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
**Ollama (Local):**
|
|
357
|
+
|
|
358
|
+
```bash
|
|
359
|
+
OPENAI_API_KEY="ollama" \
|
|
360
|
+
OPENAI_API_BASE="http://localhost:11434/v1" \
|
|
361
|
+
DOCS_MCP_EMBEDDING_MODEL="nomic-embed-text" \
|
|
362
|
+
npx @arabold/docs-mcp-server@latest
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
**LM Studio (Local):**
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
OPENAI_API_KEY="lmstudio" \
|
|
369
|
+
OPENAI_API_BASE="http://localhost:1234/v1" \
|
|
370
|
+
DOCS_MCP_EMBEDDING_MODEL="text-embedding-qwen3-embedding-4b" \
|
|
371
|
+
npx @arabold/docs-mcp-server@latest
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**Google Gemini:**
|
|
375
|
+
|
|
376
|
+
```bash
|
|
377
|
+
GOOGLE_API_KEY="your-google-api-key" \
|
|
378
|
+
DOCS_MCP_EMBEDDING_MODEL="gemini:embedding-001" \
|
|
379
|
+
npx @arabold/docs-mcp-server@latest
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
**Google Vertex AI:**
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
GOOGLE_APPLICATION_CREDENTIALS="/path/to/your/gcp-service-account.json" \
|
|
386
|
+
DOCS_MCP_EMBEDDING_MODEL="vertex:text-embedding-004" \
|
|
387
|
+
npx @arabold/docs-mcp-server@latest
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
**AWS Bedrock:**
|
|
391
|
+
|
|
392
|
+
```bash
|
|
393
|
+
AWS_ACCESS_KEY_ID="your-aws-access-key-id" \
|
|
394
|
+
AWS_SECRET_ACCESS_KEY="your-aws-secret-access-key" \
|
|
395
|
+
AWS_REGION="us-east-1" \
|
|
396
|
+
DOCS_MCP_EMBEDDING_MODEL="aws:amazon.titan-embed-text-v1" \
|
|
397
|
+
npx @arabold/docs-mcp-server@latest
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
**Azure OpenAI:**
|
|
401
|
+
|
|
402
|
+
```bash
|
|
403
|
+
AZURE_OPENAI_API_KEY="your-azure-openai-api-key" \
|
|
404
|
+
AZURE_OPENAI_API_INSTANCE_NAME="your-instance-name" \
|
|
405
|
+
AZURE_OPENAI_API_DEPLOYMENT_NAME="your-deployment-name" \
|
|
406
|
+
AZURE_OPENAI_API_VERSION="2024-02-01" \
|
|
407
|
+
DOCS_MCP_EMBEDDING_MODEL="microsoft:text-embedding-ada-002" \
|
|
408
|
+
npx @arabold/docs-mcp-server@latest
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
For more architectural details, see the [ARCHITECTURE.md](ARCHITECTURE.md).
|
|
352
412
|
|
|
353
413
|
## Development
|
|
354
414
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
-- Migration: Normalize schema by introducing libraries and versions tables
|
|
2
|
+
|
|
3
|
+
-- 1. Create libraries table
|
|
4
|
+
CREATE TABLE IF NOT EXISTS libraries (
|
|
5
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
6
|
+
name TEXT NOT NULL UNIQUE,
|
|
7
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
8
|
+
);
|
|
9
|
+
|
|
10
|
+
-- 2. Create versions table
|
|
11
|
+
CREATE TABLE IF NOT EXISTS versions (
|
|
12
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
13
|
+
library_id INTEGER NOT NULL REFERENCES libraries(id),
|
|
14
|
+
name TEXT, -- NULL for unversioned content
|
|
15
|
+
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
16
|
+
UNIQUE(library_id, name) -- Allows one NULL version per library
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
-- 3. Add foreign key columns to documents
|
|
20
|
+
ALTER TABLE documents ADD COLUMN library_id INTEGER REFERENCES libraries(id);
|
|
21
|
+
ALTER TABLE documents ADD COLUMN version_id INTEGER REFERENCES versions(id);
|
|
22
|
+
|
|
23
|
+
-- 4. Populate libraries table from existing documents
|
|
24
|
+
INSERT OR IGNORE INTO libraries (name)
|
|
25
|
+
SELECT DISTINCT library FROM documents;
|
|
26
|
+
|
|
27
|
+
-- 5. Populate versions table (convert empty string to NULL for unversioned)
|
|
28
|
+
INSERT OR IGNORE INTO versions (library_id, name)
|
|
29
|
+
SELECT DISTINCT
|
|
30
|
+
l.id,
|
|
31
|
+
CASE WHEN d.version = '' THEN NULL ELSE d.version END
|
|
32
|
+
FROM documents d
|
|
33
|
+
JOIN libraries l ON l.name = d.library;
|
|
34
|
+
|
|
35
|
+
-- 6. Update documents with foreign key references
|
|
36
|
+
UPDATE documents
|
|
37
|
+
SET library_id = (SELECT id FROM libraries WHERE libraries.name = documents.library),
|
|
38
|
+
version_id = (
|
|
39
|
+
SELECT v.id FROM versions v
|
|
40
|
+
JOIN libraries l ON v.library_id = l.id
|
|
41
|
+
WHERE l.name = documents.library
|
|
42
|
+
AND COALESCE(v.name, '') = COALESCE(documents.version, '')
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
-- 7. Add indexes for performance
|
|
46
|
+
CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id);
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_documents_version_id ON documents(version_id);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_versions_library_id ON versions(library_id);
|
|
49
|
+
|
|
50
|
+
-- Note: documents_vec table and FTS triggers will be updated in subsequent migrations.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
-- Migration: Normalize documents_vec table to use library_id and version_id
|
|
2
|
+
-- Optimized for large datasets (1GB+)
|
|
3
|
+
|
|
4
|
+
-- 1. Ensure optimal indexes for the migration JOIN
|
|
5
|
+
CREATE INDEX IF NOT EXISTS idx_documents_id_lib_ver ON documents(id, library_id, version_id);
|
|
6
|
+
|
|
7
|
+
-- 2. Create temporary table to store vector data with foreign key IDs
|
|
8
|
+
CREATE TEMPORARY TABLE temp_vector_migration AS
|
|
9
|
+
SELECT
|
|
10
|
+
dv.rowid,
|
|
11
|
+
d.library_id,
|
|
12
|
+
d.version_id,
|
|
13
|
+
dv.embedding
|
|
14
|
+
FROM documents_vec dv
|
|
15
|
+
JOIN documents d ON dv.rowid = d.id;
|
|
16
|
+
|
|
17
|
+
-- 3. Drop the old virtual table
|
|
18
|
+
DROP TABLE documents_vec;
|
|
19
|
+
|
|
20
|
+
-- 4. Create new virtual table with normalized schema
|
|
21
|
+
CREATE VIRTUAL TABLE documents_vec USING vec0(
|
|
22
|
+
library_id INTEGER NOT NULL,
|
|
23
|
+
version_id INTEGER NOT NULL,
|
|
24
|
+
embedding FLOAT[1536]
|
|
25
|
+
);
|
|
26
|
+
|
|
27
|
+
-- 5. Restore vector data using foreign key IDs
|
|
28
|
+
INSERT INTO documents_vec (rowid, library_id, version_id, embedding)
|
|
29
|
+
SELECT rowid, library_id, version_id, embedding
|
|
30
|
+
FROM temp_vector_migration;
|
|
31
|
+
|
|
32
|
+
-- 6. Clean up temporary table
|
|
33
|
+
DROP TABLE temp_vector_migration;
|