distill-mcp-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +171 -0
- package/docs/CLAUDE.md-snippet.md +46 -0
- package/docs/full-setup.md +149 -0
- package/docs/lightweight-setup.md +113 -0
- package/package.json +39 -0
- package/src/cache.js +28 -0
- package/src/config.js +52 -0
- package/src/errors.js +59 -0
- package/src/index.js +52 -0
- package/src/modes/full.js +68 -0
- package/src/modes/lightweight.js +47 -0
- package/src/tools/convert_and_save.js +154 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Lakshman GK
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# distill-mcp
|
|
2
|
+
|
|
3
|
+
An MCP server that connects any MCP-compatible client to Distill, converting
|
|
4
|
+
local documents to clean, token-efficient Markdown before the LLM reads them.
|
|
5
|
+
Typical token reduction is 40–80% compared to raw document text, letting the
|
|
6
|
+
model fit more content into its context window and reason over it faster.
|
|
7
|
+
|
|
8
|
+
Works with Claude Desktop, Claude Code, Cursor, Windsurf, and any other
|
|
9
|
+
MCP client that supports tool calling.
|
|
10
|
+
|
|
11
|
+
## Two Modes
|
|
12
|
+
|
|
13
|
+
| | Lightweight | Full (Docker) |
|
|
14
|
+
|---|---|---|
|
|
15
|
+
| Requires Docker | No | Yes |
|
|
16
|
+
| DOCX, XLSX, PPTX, native PDF, HTML | Yes | Yes |
|
|
17
|
+
| Scanned PDF (OCR) | No | Yes |
|
|
18
|
+
| Audio (MP3, WAV, etc.) | No | Yes |
|
|
19
|
+
| Quality score | No | Yes |
|
|
20
|
+
| Install time | ~2 min | ~10 min |
|
|
21
|
+
|
|
22
|
+
**Lightweight** needs only Python and pip. **Full** needs Docker with
|
|
23
|
+
the Distill service running. See the setup guides for details.
|
|
24
|
+
|
|
25
|
+
## Quick install
|
|
26
|
+
|
|
27
|
+
No global install required:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npx -y distill-mcp-server
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Then configure Claude Desktop — see
|
|
34
|
+
[Lightweight setup](docs/lightweight-setup.md) or
|
|
35
|
+
[Full setup](docs/full-setup.md) for step-by-step instructions.
|
|
36
|
+
|
|
37
|
+
## Configuration
|
|
38
|
+
|
|
39
|
+
Add the server to your `claude_desktop_config.json`.
|
|
40
|
+
|
|
41
|
+
**Lightweight mode — macOS / Linux:**
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"mcpServers": {
|
|
46
|
+
"distill-mcp": {
|
|
47
|
+
"command": "npx",
|
|
48
|
+
"args": ["-y", "distill-mcp-server"],
|
|
49
|
+
"env": {
|
|
50
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"lightweight\",\"python_path\":\"python3\"}"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Lightweight mode — Windows:**
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"mcpServers": {
|
|
62
|
+
"distill-mcp": {
|
|
63
|
+
"command": "npx",
|
|
64
|
+
"args": ["-y", "distill-mcp-server"],
|
|
65
|
+
"env": {
|
|
66
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"lightweight\",\"python_path\":\"py\"}"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Full mode — all platforms:**
|
|
74
|
+
|
|
75
|
+
```json
|
|
76
|
+
{
|
|
77
|
+
"mcpServers": {
|
|
78
|
+
"distill-mcp": {
|
|
79
|
+
"command": "npx",
|
|
80
|
+
"args": ["-y", "distill-mcp-server"],
|
|
81
|
+
"env": {
|
|
82
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"full\",\"distill_url\":\"http://localhost:7860\"}"
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Full config key reference is in the setup guides:
|
|
90
|
+
[Lightweight](docs/lightweight-setup.md#config-reference) |
|
|
91
|
+
[Full](docs/full-setup.md#config-reference)
|
|
92
|
+
|
|
93
|
+
## CLAUDE.md snippet
|
|
94
|
+
|
|
95
|
+
Paste this into your project's `CLAUDE.md` so Claude knows how to use the
|
|
96
|
+
tool automatically. Copy the block below as-is:
|
|
97
|
+
|
|
98
|
+
```markdown
|
|
99
|
+
## Document Conversion — distill-mcp
|
|
100
|
+
|
|
101
|
+
When the user references a local file path (e.g. a PDF, DOCX, PPTX, XLSX,
|
|
102
|
+
HTML, or audio file), ALWAYS call the `convert_and_save` tool before reading
|
|
103
|
+
or reasoning about the document. Do not read the original file directly via
|
|
104
|
+
filesystem tools — use only the Markdown returned by `convert_and_save` as
|
|
105
|
+
the document content.
|
|
106
|
+
|
|
107
|
+
### Rules
|
|
108
|
+
|
|
109
|
+
1. Call `convert_and_save` with the absolute file path before doing anything
|
|
110
|
+
else with the document.
|
|
111
|
+
2. Use ONLY the Markdown output from `convert_and_save` as the document
|
|
112
|
+
content. Never read the original file with filesystem tools.
|
|
113
|
+
3. If the response includes `"overwritten": true`, tell the user that a
|
|
114
|
+
previous cached version was replaced before proceeding.
|
|
115
|
+
4. If the response includes any `warnings`, surface them to the user before
|
|
116
|
+
proceeding with the document content.
|
|
117
|
+
5. If `convert_and_save` returns an unsupported format error, tell the user
|
|
118
|
+
which formats are supported and suggest switching modes if applicable.
|
|
119
|
+
|
|
120
|
+
### Supported formats
|
|
121
|
+
|
|
122
|
+
| Category | Lightweight | Full (Docker) |
|
|
123
|
+
|---|---|---|
|
|
124
|
+
| Word | .docx, .doc, .odt | .docx, .doc, .odt |
|
|
125
|
+
| Excel | .xlsx, .xlsm, .csv | .xlsx, .xlsm, .csv |
|
|
126
|
+
| PowerPoint | .pptx, .ppt | .pptx, .ppt |
|
|
127
|
+
| PDF | .pdf (native text) | .pdf (native + scanned OCR) |
|
|
128
|
+
| HTML | .html, .htm | .html, .htm |
|
|
129
|
+
| Audio | — | .mp3, .wav, .m4a, .flac, .ogg |
|
|
130
|
+
| Other | — | .epub, .json, .sql, .wsdl, .wsd |
|
|
131
|
+
|
|
132
|
+
### Usage
|
|
133
|
+
|
|
134
|
+
Say "convert using distill" followed by the file path:
|
|
135
|
+
> Convert using distill C:\Users\me\Documents\report.pdf to markdown
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
The snippet is also available in
|
|
139
|
+
[docs/CLAUDE.md-snippet.md](docs/CLAUDE.md-snippet.md).
|
|
140
|
+
|
|
141
|
+
## Supported formats
|
|
142
|
+
|
|
143
|
+
| Category | Extensions | Lightweight | Full |
|
|
144
|
+
|---|---|---|---|
|
|
145
|
+
| Microsoft Word | `.docx`, `.doc`, `.odt` | Yes | Yes |
|
|
146
|
+
| Microsoft Excel | `.xlsx`, `.xlsm`, `.csv` | Yes | Yes |
|
|
147
|
+
| Microsoft PowerPoint | `.pptx`, `.ppt` | Yes | Yes |
|
|
148
|
+
| PDF (native text) | `.pdf` | Yes | Yes |
|
|
149
|
+
| PDF (scanned/OCR) | `.pdf` | No | Yes |
|
|
150
|
+
| HTML | `.html`, `.htm` | Yes | Yes |
|
|
151
|
+
| Audio | `.mp3`, `.wav`, `.m4a`, `.flac`, `.ogg` | No | Yes |
|
|
152
|
+
| EPUB | `.epub` | No | Yes |
|
|
153
|
+
| JSON | `.json` | No | Yes |
|
|
154
|
+
| SQL | `.sql` | No | Yes |
|
|
155
|
+
| WSDL | `.wsdl`, `.wsd` | No | Yes |
|
|
156
|
+
|
|
157
|
+
## Privacy
|
|
158
|
+
|
|
159
|
+
All processing happens locally. In lightweight mode, documents are converted
|
|
160
|
+
by the distill-core Python library on your machine — no data leaves your
|
|
161
|
+
computer. In full mode, documents are sent to the Distill Docker service
|
|
162
|
+
running locally on your machine — no data is sent to external services.
|
|
163
|
+
|
|
164
|
+
## Built on
|
|
165
|
+
|
|
166
|
+
[Distill](https://github.com/nicholasgasior/distill) — document-to-Markdown
|
|
167
|
+
conversion engine.
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# CLAUDE.md Snippet for distill-mcp
|
|
2
|
+
|
|
3
|
+
Paste the block below into your project's `CLAUDE.md` file. This tells Claude
|
|
4
|
+
how to use the `convert_and_save` tool when it encounters document file paths.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
```markdown
|
|
9
|
+
## Document Conversion — distill-mcp
|
|
10
|
+
|
|
11
|
+
When the user references a local file path (e.g. a PDF, DOCX, PPTX, XLSX,
|
|
12
|
+
HTML, or audio file), ALWAYS call the `convert_and_save` tool before reading
|
|
13
|
+
or reasoning about the document. Do not read the original file directly via
|
|
14
|
+
filesystem tools — use only the Markdown returned by `convert_and_save` as
|
|
15
|
+
the document content.
|
|
16
|
+
|
|
17
|
+
### Rules
|
|
18
|
+
|
|
19
|
+
1. Call `convert_and_save` with the absolute file path before doing anything
|
|
20
|
+
else with the document.
|
|
21
|
+
2. Use ONLY the Markdown output from `convert_and_save` as the document
|
|
22
|
+
content. Never read the original file with filesystem tools.
|
|
23
|
+
3. If the response includes `"overwritten": true`, tell the user that a
|
|
24
|
+
previous cached version was replaced before proceeding.
|
|
25
|
+
4. If the response includes any `warnings`, surface them to the user before
|
|
26
|
+
proceeding with the document content.
|
|
27
|
+
5. If `convert_and_save` returns an unsupported format error, tell the user
|
|
28
|
+
which formats are supported and suggest switching modes if applicable.
|
|
29
|
+
|
|
30
|
+
### Supported formats
|
|
31
|
+
|
|
32
|
+
| Category | Lightweight | Full (Docker) |
|
|
33
|
+
|---|---|---|
|
|
34
|
+
| Word | .docx, .doc, .odt | .docx, .doc, .odt |
|
|
35
|
+
| Excel | .xlsx, .xlsm, .csv | .xlsx, .xlsm, .csv |
|
|
36
|
+
| PowerPoint | .pptx, .ppt | .pptx, .ppt |
|
|
37
|
+
| PDF | .pdf (native text) | .pdf (native + scanned OCR) |
|
|
38
|
+
| HTML | .html, .htm | .html, .htm |
|
|
39
|
+
| Audio | — | .mp3, .wav, .m4a, .flac, .ogg |
|
|
40
|
+
| Other | — | .epub, .json, .sql, .wsdl, .wsd |
|
|
41
|
+
|
|
42
|
+
### Usage
|
|
43
|
+
|
|
44
|
+
Say "convert using distill" followed by the file path:
|
|
45
|
+
> Convert using distill C:\Users\me\Documents\report.pdf to markdown
|
|
46
|
+
```
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Full Mode Setup
|
|
2
|
+
|
|
3
|
+
Full mode sends documents to the Distill REST API running in Docker. It supports
|
|
4
|
+
all lightweight formats plus scanned PDFs (via OCR) and audio transcription.
|
|
5
|
+
|
|
6
|
+
## Prerequisites
|
|
7
|
+
|
|
8
|
+
- **Node.js 22 or later**
|
|
9
|
+
Download from [nodejs.org](https://nodejs.org/) if not already installed.
|
|
10
|
+
Verify: `node --version`
|
|
11
|
+
|
|
12
|
+
- **Docker Desktop installed and running**
|
|
13
|
+
If Docker is not installed, download it from
|
|
14
|
+
<https://docs.docker.com/get-docker/>
|
|
15
|
+
|
|
16
|
+
After installing, verify Docker is running:
|
|
17
|
+
```bash
|
|
18
|
+
docker info
|
|
19
|
+
```
|
|
20
|
+
This should print system information without errors.
|
|
21
|
+
|
|
22
|
+
- **Distill service running via Docker**
|
|
23
|
+
From your Distill project directory:
|
|
24
|
+
```bash
|
|
25
|
+
docker compose up -d
|
|
26
|
+
```
|
|
27
|
+
Verify the service is up:
|
|
28
|
+
```bash
|
|
29
|
+
curl http://localhost:7860/docs
|
|
30
|
+
```
|
|
31
|
+
You should see an HTML page (Swagger UI). If the command fails, check that
|
|
32
|
+
Docker is running and the container started without errors
|
|
33
|
+
(`docker compose logs`).
|
|
34
|
+
|
|
35
|
+
> **If you skip this step** and launch the MCP server, the first conversion
|
|
36
|
+
> will fail with:
|
|
37
|
+
> ```
|
|
38
|
+
> Distill service is not running. Start it with: docker compose up -d
|
|
39
|
+
> Then restart Claude Desktop.
|
|
40
|
+
> ```
|
|
41
|
+
> This same error appears whether Docker is not installed, Docker is installed
|
|
42
|
+
> but not running, or the Distill container has not been started. If you see
|
|
43
|
+
> this error, start by confirming Docker is installed and running, then start
|
|
44
|
+
> the Distill service with `docker compose up -d`.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
No global install required. The server runs via `npx`:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
npx -y distill-mcp-server
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Configuration
|
|
55
|
+
|
|
56
|
+
Open your `claude_desktop_config.json`:
|
|
57
|
+
|
|
58
|
+
- **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
|
|
59
|
+
- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
60
|
+
|
|
61
|
+
Add the distill server to the `mcpServers` section.
|
|
62
|
+
|
|
63
|
+
**macOS / Linux / Windows** (config is identical on all platforms):
|
|
64
|
+
|
|
65
|
+
```json
|
|
66
|
+
{
|
|
67
|
+
"mcpServers": {
|
|
68
|
+
"distill-mcp": {
|
|
69
|
+
"command": "npx",
|
|
70
|
+
"args": ["-y", "distill-mcp-server"],
|
|
71
|
+
"env": {
|
|
72
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"full\",\"distill_url\":\"http://localhost:7860\"}"
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
If Distill is running on a non-default host or port, change `distill_url`:
|
|
80
|
+
|
|
81
|
+
```json
|
|
82
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"full\",\"distill_url\":\"http://192.168.1.50:7860\"}"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Config reference
|
|
86
|
+
|
|
87
|
+
| Key | Type | Required | Default | Description |
|
|
88
|
+
|---|---|---|---|---|
|
|
89
|
+
| `mode` | string | Yes | — | Must be `"full"` |
|
|
90
|
+
| `distill_url` | string | No | `http://localhost:7860` | Distill service base URL |
|
|
91
|
+
| `cache_dir` | string | No | `~/Documents/distill-cache` | Directory where converted `.md` files are saved |
|
|
92
|
+
|
|
93
|
+
## Supported formats
|
|
94
|
+
|
|
95
|
+
Full mode supports everything in lightweight mode, plus additional formats.
|
|
96
|
+
|
|
97
|
+
| Category | Extensions |
|
|
98
|
+
|---|---|
|
|
99
|
+
| Microsoft Word | `.docx`, `.doc`, `.odt` |
|
|
100
|
+
| Microsoft Excel | `.xlsx`, `.xlsm`, `.csv` |
|
|
101
|
+
| Microsoft PowerPoint | `.pptx`, `.ppt` |
|
|
102
|
+
| PDF (native text and scanned/OCR) | `.pdf` |
|
|
103
|
+
| HTML | `.html`, `.htm` |
|
|
104
|
+
| Audio | `.mp3`, `.wav`, `.m4a`, `.flac`, `.ogg` |
|
|
105
|
+
| EPUB | `.epub` |
|
|
106
|
+
| JSON | `.json` |
|
|
107
|
+
| SQL | `.sql` |
|
|
108
|
+
| WSDL | `.wsdl`, `.wsd` |
|
|
109
|
+
|
|
110
|
+
## Quality score
|
|
111
|
+
|
|
112
|
+
Full mode returns a `quality_score` (0.0–1.0) with every conversion. It
|
|
113
|
+
measures heading, table, and list preservation plus token efficiency.
|
|
114
|
+
|
|
115
|
+
- **0.70 and above:** Good conversion. No warning.
|
|
116
|
+
- **Below 0.70:** A warning is included in the response:
|
|
117
|
+
```
|
|
118
|
+
Quality score 0.65 is below threshold (0.70). The conversion completed
|
|
119
|
+
but some structure may be lost. Check the document for complex tables
|
|
120
|
+
or non-standard formatting.
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
The conversion is always returned regardless of the score — the warning is
|
|
124
|
+
informational, not a failure. The 0.70 threshold is fixed and not configurable.
|
|
125
|
+
|
|
126
|
+
## Test it
|
|
127
|
+
|
|
128
|
+
Restart Claude Desktop (fully quit and reopen, including the system tray icon
|
|
129
|
+
on Windows). The distill connector should appear in the connectors list.
|
|
130
|
+
|
|
131
|
+
In a new conversation, type:
|
|
132
|
+
|
|
133
|
+
> Convert using distill C:\Users\me\Documents\report.pdf to markdown
|
|
134
|
+
|
|
135
|
+
Claude will call the `convert_and_save` tool, send the file to Distill, and
|
|
136
|
+
return the Markdown content. The converted file is also saved to your cache
|
|
137
|
+
directory.
|
|
138
|
+
|
|
139
|
+
## Troubleshooting
|
|
140
|
+
|
|
141
|
+
| Symptom | Cause | Fix |
|
|
142
|
+
|---|---|---|
|
|
143
|
+
| `Distill service is not running` | Docker not installed, not running, or container not started | Install Docker from [docs.docker.com/get-docker](https://docs.docker.com/get-docker/), start it, then run `docker compose up -d` |
|
|
144
|
+
| Health check timeout | Docker Desktop not running, or wrong `distill_url` | Verify Docker is running (`docker info`), check the URL matches your Distill service |
|
|
145
|
+
| Quality score warning | Document has complex tables or non-standard formatting | Review the converted Markdown for accuracy. The conversion is still usable |
|
|
146
|
+
| `File not found` | Path must be absolute | Provide the full path, e.g. `C:\Users\me\Documents\report.pdf`, not `report.pdf` |
|
|
147
|
+
| Tool does not appear in Claude Desktop | Config JSON is malformed or server failed to start | Check `%APPDATA%\Claude\logs\mcp-server-distill.log` (Windows) or `~/Library/Logs/Claude/mcp-server-distill.log` (macOS) for errors |
|
|
148
|
+
| Claude asks to upload instead of using the tool | Model did not auto-select the tool | Include "distill" in your prompt: "Convert using distill ..." |
|
|
149
|
+
| "Taking longer than usual" during conversion | Large file — Distill needs more time | This is normal for large documents. The conversion will complete |
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Lightweight Mode Setup
|
|
2
|
+
|
|
3
|
+
Lightweight mode shells out to the distill-core Python library to convert
|
|
4
|
+
documents. No Docker required.
|
|
5
|
+
|
|
6
|
+
## Prerequisites
|
|
7
|
+
|
|
8
|
+
- **Node.js 22 or later**
|
|
9
|
+
Download from [nodejs.org](https://nodejs.org/) if not already installed.
|
|
10
|
+
Verify: `node --version`
|
|
11
|
+
|
|
12
|
+
- **Python 3**
|
|
13
|
+
The Python command differs by operating system:
|
|
14
|
+
- **Windows:** Install from [python.org](https://www.python.org/downloads/).
|
|
15
|
+
The standard launcher is invoked as `py`.
|
|
16
|
+
- **macOS/Linux:** Typically pre-installed. Invoked as `python3`.
|
|
17
|
+
Verify: `py --version` (Windows) or `python3 --version` (macOS/Linux)
|
|
18
|
+
|
|
19
|
+
- **distill-core Python package**
|
|
20
|
+
Install via pip:
|
|
21
|
+
- **Windows:** `py -m pip install distill-core`
|
|
22
|
+
- **macOS/Linux:** `python3 -m pip install distill-core`
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
No global install required. The server runs via `npx`:
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npx -y distill-mcp-server
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Configuration
|
|
33
|
+
|
|
34
|
+
Open your `claude_desktop_config.json`:
|
|
35
|
+
|
|
36
|
+
- **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
|
|
37
|
+
- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
38
|
+
|
|
39
|
+
Add the distill server to the `mcpServers` section.
|
|
40
|
+
|
|
41
|
+
**macOS / Linux:**
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"mcpServers": {
|
|
46
|
+
"distill-mcp": {
|
|
47
|
+
"command": "npx",
|
|
48
|
+
"args": ["-y", "distill-mcp-server"],
|
|
49
|
+
"env": {
|
|
50
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"lightweight\",\"python_path\":\"python3\"}"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Windows:**
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
{
|
|
61
|
+
"mcpServers": {
|
|
62
|
+
"distill-mcp": {
|
|
63
|
+
"command": "npx",
|
|
64
|
+
"args": ["-y", "distill-mcp-server"],
|
|
65
|
+
"env": {
|
|
66
|
+
"DISTILL_MCP_CONFIG": "{\"mode\":\"lightweight\",\"python_path\":\"py\"}"
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
The only difference is `python_path` — `"python3"` on macOS/Linux, `"py"` on
|
|
74
|
+
Windows.
|
|
75
|
+
|
|
76
|
+
## Config reference
|
|
77
|
+
|
|
78
|
+
| Key | Type | Required | Default | Description |
|
|
79
|
+
|---|---|---|---|---|
|
|
80
|
+
| `mode` | string | Yes | — | Must be `"lightweight"` |
|
|
81
|
+
| `python_path` | string | No | `"py"` (Windows) / `"python3"` (macOS/Linux) | Path to Python binary |
|
|
82
|
+
| `cache_dir` | string | No | `~/Documents/distill-cache` | Directory where converted `.md` files are saved |
|
|
83
|
+
|
|
84
|
+
## Supported formats
|
|
85
|
+
|
|
86
|
+
| Category | Extensions |
|
|
87
|
+
|---|---|
|
|
88
|
+
| Microsoft Word | `.docx`, `.doc`, `.odt` |
|
|
89
|
+
| Microsoft Excel | `.xlsx`, `.xlsm`, `.csv` |
|
|
90
|
+
| Microsoft PowerPoint | `.pptx`, `.ppt` |
|
|
91
|
+
| PDF (native text only) | `.pdf` |
|
|
92
|
+
| HTML | `.html`, `.htm` |
|
|
93
|
+
|
|
94
|
+
Scanned/image PDFs are not supported in lightweight mode. Use
|
|
95
|
+
[full mode](full-setup.md) for OCR and audio transcription.
|
|
96
|
+
|
|
97
|
+
## Test it
|
|
98
|
+
|
|
99
|
+
Restart Claude Desktop (fully quit and reopen). In a new conversation, type:
|
|
100
|
+
|
|
101
|
+
> Convert using distill C:\Users\me\Documents\report.pdf to markdown
|
|
102
|
+
|
|
103
|
+
Claude will call the `convert_and_save` tool and return the Markdown content.
|
|
104
|
+
|
|
105
|
+
## Troubleshooting
|
|
106
|
+
|
|
107
|
+
| Symptom | Cause | Fix |
|
|
108
|
+
|---|---|---|
|
|
109
|
+
| `ModuleNotFoundError: No module named 'distill'` | distill-core is not installed | Run `py -m pip install distill-core` (Windows) or `python3 -m pip install distill-core` (macOS/Linux) |
|
|
110
|
+
| `No module named distill.__main__` | Wrong invocation — distill-core has no CLI entry point | Ensure `python_path` is set correctly (`py` on Windows, `python3` on macOS/Linux). The server uses the Python API, not `python -m distill` |
|
|
111
|
+
| `File not found` | Path must be absolute | Provide the full path, e.g. `C:\Users\me\Documents\report.pdf`, not `report.pdf` |
|
|
112
|
+
| `Unsupported format` | File extension is not in the lightweight supported list | Check the supported formats table above. For scanned PDFs and audio, switch to [full mode](full-setup.md) |
|
|
113
|
+
| Claude asks to upload instead of using the tool | Model did not auto-select the tool | Include "distill" in your prompt: "Convert using distill ..." |
|
package/package.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "distill-mcp-server",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "MCP server for Distill — convert documents to token-efficient Markdown in any MCP-compatible client",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"engines": { "node": ">=22.0.0" },
|
|
7
|
+
"bin": {
|
|
8
|
+
"distill-mcp-server": "./src/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src/",
|
|
12
|
+
"docs/lightweight-setup.md",
|
|
13
|
+
"docs/full-setup.md",
|
|
14
|
+
"docs/CLAUDE.md-snippet.md",
|
|
15
|
+
"README.md",
|
|
16
|
+
"LICENSE"
|
|
17
|
+
],
|
|
18
|
+
"scripts": {
|
|
19
|
+
"start": "node src/index.js",
|
|
20
|
+
"test": "node --test"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"mcp",
|
|
24
|
+
"claude",
|
|
25
|
+
"distill",
|
|
26
|
+
"markdown",
|
|
27
|
+
"document-conversion",
|
|
28
|
+
"model-context-protocol"
|
|
29
|
+
],
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@modelcontextprotocol/sdk": "^1.29.0"
|
|
32
|
+
},
|
|
33
|
+
"license": "MIT",
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "https://github.com/lakshgk/distill-mcp"
|
|
37
|
+
},
|
|
38
|
+
"homepage": "https://github.com/lakshgk/distill-mcp#readme"
|
|
39
|
+
}
|
package/src/cache.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { mkdir, writeFile, access } from 'node:fs/promises';
|
|
2
|
+
import { join, basename, extname } from 'node:path';
|
|
3
|
+
|
|
4
|
+
export async function save(markdown, sourcePath, cacheDir) {
|
|
5
|
+
const sourceBase = basename(sourcePath);
|
|
6
|
+
const ext = extname(sourceBase);
|
|
7
|
+
const nameWithoutExt = ext ? sourceBase.slice(0, -ext.length) : sourceBase;
|
|
8
|
+
const cacheFilename = `${nameWithoutExt}.md`;
|
|
9
|
+
const targetPath = join(cacheDir, cacheFilename);
|
|
10
|
+
|
|
11
|
+
let overwritten = false;
|
|
12
|
+
|
|
13
|
+
try {
|
|
14
|
+
try {
|
|
15
|
+
await access(targetPath);
|
|
16
|
+
overwritten = true;
|
|
17
|
+
} catch {
|
|
18
|
+
overwritten = false;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
await mkdir(cacheDir, { recursive: true });
|
|
22
|
+
await writeFile(targetPath, markdown, 'utf-8');
|
|
23
|
+
|
|
24
|
+
return { cachedAt: targetPath, overwritten };
|
|
25
|
+
} catch (err) {
|
|
26
|
+
return { cachedAt: null, overwritten: false, saveError: err.message };
|
|
27
|
+
}
|
|
28
|
+
}
|
package/src/config.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { homedir, platform } from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { ConfigError } from './errors.js';
|
|
4
|
+
|
|
5
|
+
const VALID_MODES = ['lightweight', 'full'];
|
|
6
|
+
|
|
7
|
+
// Windows: "py" is the standard launcher; macOS/Linux: "python3"
|
|
8
|
+
const DEFAULT_PYTHON_PATH = platform() === 'win32' ? 'py' : 'python3';
|
|
9
|
+
|
|
10
|
+
function getDefaultCacheDir() {
|
|
11
|
+
const home = homedir();
|
|
12
|
+
return join(home, 'Documents', 'distill-cache');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function loadConfig() {
|
|
16
|
+
const raw = process.env.DISTILL_MCP_CONFIG;
|
|
17
|
+
|
|
18
|
+
if (!raw) {
|
|
19
|
+
throw new ConfigError(
|
|
20
|
+
'DISTILL_MCP_CONFIG environment variable is not set. See README for configuration instructions.'
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
let parsed;
|
|
25
|
+
try {
|
|
26
|
+
parsed = JSON.parse(raw);
|
|
27
|
+
} catch {
|
|
28
|
+
throw new ConfigError(
|
|
29
|
+
'DISTILL_MCP_CONFIG is not valid JSON. See README for configuration instructions.'
|
|
30
|
+
);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const { mode, cache_dir, distill_url, python_path } = parsed;
|
|
34
|
+
|
|
35
|
+
if (!mode || !VALID_MODES.includes(mode)) {
|
|
36
|
+
throw new ConfigError(
|
|
37
|
+
`Invalid or missing "mode". Must be one of: ${VALID_MODES.join(', ')}. See README for configuration instructions.`
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
let resolvedCacheDir = cache_dir || getDefaultCacheDir();
|
|
42
|
+
if (typeof resolvedCacheDir === 'string' && resolvedCacheDir.startsWith('~')) {
|
|
43
|
+
resolvedCacheDir = join(homedir(), resolvedCacheDir.slice(1));
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
mode,
|
|
48
|
+
cache_dir: resolvedCacheDir,
|
|
49
|
+
distill_url: distill_url || 'http://localhost:7860',
|
|
50
|
+
python_path: python_path || DEFAULT_PYTHON_PATH,
|
|
51
|
+
};
|
|
52
|
+
}
|
package/src/errors.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
class DistillMcpError extends Error {
|
|
2
|
+
constructor(code, message) {
|
|
3
|
+
super(message);
|
|
4
|
+
this.name = 'DistillMcpError';
|
|
5
|
+
this.code = code;
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
class ConfigError extends DistillMcpError {
|
|
10
|
+
constructor(message) {
|
|
11
|
+
super('CONFIG_ERROR', message);
|
|
12
|
+
this.name = 'ConfigError';
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
class FileNotFoundError extends DistillMcpError {
|
|
17
|
+
constructor(message) {
|
|
18
|
+
super('FILE_NOT_FOUND_ERROR', message);
|
|
19
|
+
this.name = 'FileNotFoundError';
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
class UnsupportedFormatError extends DistillMcpError {
|
|
24
|
+
constructor(message) {
|
|
25
|
+
super('UNSUPPORTED_FORMAT_ERROR', message);
|
|
26
|
+
this.name = 'UnsupportedFormatError';
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
class DistillUnavailableError extends DistillMcpError {
|
|
31
|
+
constructor(message) {
|
|
32
|
+
super('DISTILL_UNAVAILABLE_ERROR', message);
|
|
33
|
+
this.name = 'DistillUnavailableError';
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
class CacheWriteError extends DistillMcpError {
|
|
38
|
+
constructor(message) {
|
|
39
|
+
super('CACHE_WRITE_ERROR', message);
|
|
40
|
+
this.name = 'CacheWriteError';
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
class ConversionError extends DistillMcpError {
|
|
45
|
+
constructor(message) {
|
|
46
|
+
super('CONVERSION_ERROR', message);
|
|
47
|
+
this.name = 'ConversionError';
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export {
|
|
52
|
+
DistillMcpError,
|
|
53
|
+
ConfigError,
|
|
54
|
+
FileNotFoundError,
|
|
55
|
+
UnsupportedFormatError,
|
|
56
|
+
DistillUnavailableError,
|
|
57
|
+
CacheWriteError,
|
|
58
|
+
ConversionError,
|
|
59
|
+
};
|
package/src/index.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
5
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
6
|
+
import { loadConfig } from './config.js';
|
|
7
|
+
import { ConfigError } from './errors.js';
|
|
8
|
+
import { handle as handleConvertAndSave } from './tools/convert_and_save.js';
|
|
9
|
+
|
|
10
|
+
async function main() {
|
|
11
|
+
let config;
|
|
12
|
+
try {
|
|
13
|
+
config = loadConfig();
|
|
14
|
+
} catch (err) {
|
|
15
|
+
if (err instanceof ConfigError) {
|
|
16
|
+
process.stderr.write(`Configuration error: ${err.message}\n`);
|
|
17
|
+
process.exit(1);
|
|
18
|
+
}
|
|
19
|
+
throw err;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const server = new McpServer({
|
|
23
|
+
name: 'distill-mcp',
|
|
24
|
+
version: '0.1.0',
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
server.tool(
|
|
28
|
+
'convert_and_save',
|
|
29
|
+
'YOU HAVE ACCESS TO THE LOCAL FILESYSTEM THROUGH THIS TOOL. When a user asks to convert, read, or process a document and provides a file path, ALWAYS use this tool — do NOT ask them to upload. This tool accepts an absolute file path (e.g. C:/Users/name/Documents/report.pdf) and converts the file to clean Markdown using the Distill document processing engine running locally. Supports DOCX, PDF, XLSX, PPTX, HTML, audio files, and more. Returns high-quality Markdown and saves a cached copy.',
|
|
30
|
+
{
|
|
31
|
+
file_path: z.string().describe('Absolute path to the document to convert'),
|
|
32
|
+
},
|
|
33
|
+
async (args, extra) => {
|
|
34
|
+
try {
|
|
35
|
+
return await handleConvertAndSave(args, config, extra);
|
|
36
|
+
} catch (err) {
|
|
37
|
+
return {
|
|
38
|
+
isError: true,
|
|
39
|
+
content: [{ type: 'text', text: `Unexpected error: ${err.message}` }],
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
const transport = new StdioServerTransport();
|
|
46
|
+
await server.connect(transport);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
main().catch((err) => {
|
|
50
|
+
process.stderr.write(`Fatal error: ${err.message}\n`);
|
|
51
|
+
process.exit(1);
|
|
52
|
+
});
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import { basename } from 'node:path';
|
|
3
|
+
import { ConversionError, DistillUnavailableError } from '../errors.js';
|
|
4
|
+
|
|
5
|
+
const SERVICE_DOWN_MESSAGE =
|
|
6
|
+
'Distill service is not running. Start it with: docker compose up -d\nThen restart Claude Desktop.';
|
|
7
|
+
|
|
8
|
+
export async function convert(filePath, config) {
|
|
9
|
+
const url = `${config.distill_url}/api/convert`;
|
|
10
|
+
|
|
11
|
+
let fileBuffer;
|
|
12
|
+
try {
|
|
13
|
+
fileBuffer = await readFile(filePath);
|
|
14
|
+
} catch (err) {
|
|
15
|
+
throw new ConversionError(`Failed to read file: ${err.message}`);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const form = new FormData();
|
|
19
|
+
form.append('file', new Blob([fileBuffer]), basename(filePath));
|
|
20
|
+
form.append('output_format', 'markdown');
|
|
21
|
+
|
|
22
|
+
let response;
|
|
23
|
+
try {
|
|
24
|
+
response = await fetch(url, { method: 'POST', body: form });
|
|
25
|
+
} catch {
|
|
26
|
+
throw new DistillUnavailableError(SERVICE_DOWN_MESSAGE);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (!response.ok) {
|
|
30
|
+
let detail;
|
|
31
|
+
try {
|
|
32
|
+
const body = await response.json();
|
|
33
|
+
detail = body.detail || `HTTP ${response.status}`;
|
|
34
|
+
} catch {
|
|
35
|
+
detail = `HTTP ${response.status}`;
|
|
36
|
+
}
|
|
37
|
+
throw new ConversionError(detail);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const data = await response.json();
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
markdown: data.markdown,
|
|
44
|
+
quality_score: data.quality?.overall ?? null,
|
|
45
|
+
warnings: data.warnings || [],
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export async function checkHealth(config) {
|
|
50
|
+
const url = `${config.distill_url}/`;
|
|
51
|
+
const controller = new AbortController();
|
|
52
|
+
const timeout = setTimeout(() => controller.abort(), 3000);
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const response = await fetch(url, { signal: controller.signal });
|
|
56
|
+
if (!response.ok) {
|
|
57
|
+
throw new DistillUnavailableError(SERVICE_DOWN_MESSAGE);
|
|
58
|
+
}
|
|
59
|
+
return true;
|
|
60
|
+
} catch (err) {
|
|
61
|
+
if (err instanceof DistillUnavailableError) {
|
|
62
|
+
throw err;
|
|
63
|
+
}
|
|
64
|
+
throw new DistillUnavailableError(SERVICE_DOWN_MESSAGE);
|
|
65
|
+
} finally {
|
|
66
|
+
clearTimeout(timeout);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { ConversionError } from '../errors.js';
|
|
3
|
+
|
|
4
|
+
export async function convert(filePath, config) {
|
|
5
|
+
const pythonPath = config.python_path;
|
|
6
|
+
|
|
7
|
+
return new Promise((resolve, reject) => {
|
|
8
|
+
let proc;
|
|
9
|
+
try {
|
|
10
|
+
proc = execFile(
|
|
11
|
+
pythonPath,
|
|
12
|
+
[
|
|
13
|
+
'-c',
|
|
14
|
+
`import distill, sys, io; sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'); result = distill.convert(sys.argv[1]); print(result.markdown)`,
|
|
15
|
+
filePath,
|
|
16
|
+
],
|
|
17
|
+
{ maxBuffer: 50 * 1024 * 1024, encoding: 'utf-8' },
|
|
18
|
+
(error, stdout, stderr) => {
|
|
19
|
+
if (error) {
|
|
20
|
+
reject(
|
|
21
|
+
new ConversionError(
|
|
22
|
+
stderr ? stderr.trim() : error.message
|
|
23
|
+
)
|
|
24
|
+
);
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
resolve({ markdown: stdout });
|
|
28
|
+
}
|
|
29
|
+
);
|
|
30
|
+
} catch (err) {
|
|
31
|
+
reject(
|
|
32
|
+
new ConversionError(
|
|
33
|
+
`Failed to spawn subprocess: ${err.message}`
|
|
34
|
+
)
|
|
35
|
+
);
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
proc.on('error', (err) => {
|
|
40
|
+
reject(
|
|
41
|
+
new ConversionError(
|
|
42
|
+
`Failed to spawn subprocess: ${err.message}`
|
|
43
|
+
)
|
|
44
|
+
);
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { access } from 'node:fs/promises';
|
|
2
|
+
import { resolve, extname } from 'node:path';
|
|
3
|
+
import { convert as lightweightConvert } from '../modes/lightweight.js';
|
|
4
|
+
import { convert as fullConvert, checkHealth } from '../modes/full.js';
|
|
5
|
+
import { save } from '../cache.js';
|
|
6
|
+
|
|
7
|
+
let healthChecked = false;
|
|
8
|
+
|
|
9
|
+
const LIGHTWEIGHT_EXTENSIONS = new Set([
|
|
10
|
+
'.docx', '.doc', '.odt',
|
|
11
|
+
'.xlsx', '.xlsm', '.csv',
|
|
12
|
+
'.pptx', '.ppt',
|
|
13
|
+
'.pdf',
|
|
14
|
+
'.html', '.htm',
|
|
15
|
+
]);
|
|
16
|
+
|
|
17
|
+
const FULL_EXTENSIONS = new Set([
|
|
18
|
+
...LIGHTWEIGHT_EXTENSIONS,
|
|
19
|
+
'.mp3', '.wav', '.m4a', '.flac', '.ogg',
|
|
20
|
+
'.epub', '.json', '.sql', '.wsdl', '.wsd',
|
|
21
|
+
]);
|
|
22
|
+
|
|
23
|
+
const QUALITY_THRESHOLD = 0.70;
|
|
24
|
+
|
|
25
|
+
function errorResponse(text) {
|
|
26
|
+
return {
|
|
27
|
+
isError: true,
|
|
28
|
+
content: [{ type: 'text', text }],
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function sendProgress(extra, progress, total, message) {
|
|
33
|
+
const token = extra?._meta?.progressToken;
|
|
34
|
+
if (token === undefined || !extra?.sendNotification) return;
|
|
35
|
+
try {
|
|
36
|
+
await extra.sendNotification({
|
|
37
|
+
method: 'notifications/progress',
|
|
38
|
+
params: { progressToken: token, progress, total, message },
|
|
39
|
+
});
|
|
40
|
+
} catch {
|
|
41
|
+
// Progress notifications are best-effort
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function handle(args, config, extra) {
|
|
46
|
+
try {
|
|
47
|
+
const filePath = args.file_path;
|
|
48
|
+
|
|
49
|
+
if (!filePath || typeof filePath !== 'string' || filePath.trim() === '') {
|
|
50
|
+
return errorResponse('file_path is required and must be a non-empty string.');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const resolvedPath = resolve(filePath);
|
|
54
|
+
|
|
55
|
+
if (resolvedPath.includes('..')) {
|
|
56
|
+
return errorResponse('Path traversal is not allowed.');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
try {
|
|
60
|
+
await access(resolvedPath);
|
|
61
|
+
} catch {
|
|
62
|
+
return errorResponse(`File not found: ${resolvedPath}`);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const ext = extname(resolvedPath).toLowerCase();
|
|
66
|
+
await sendProgress(extra, 1, 4, 'Validating file');
|
|
67
|
+
|
|
68
|
+
const supportedExtensions = config.mode === 'full' ? FULL_EXTENSIONS : LIGHTWEIGHT_EXTENSIONS;
|
|
69
|
+
|
|
70
|
+
if (!supportedExtensions.has(ext)) {
|
|
71
|
+
const modeLabel = config.mode === 'full' ? 'full' : 'lightweight';
|
|
72
|
+
const extList = [...supportedExtensions].join(' ');
|
|
73
|
+
return errorResponse(
|
|
74
|
+
`Unsupported format: ${ext}. Supported formats in ${modeLabel} mode: ${extList}.` +
|
|
75
|
+
(config.mode === 'lightweight' ? ' Switch to full mode for scanned PDF and audio.' : '')
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (config.mode === 'full' && !healthChecked) {
|
|
80
|
+
await sendProgress(extra, 2, 5, 'Checking Distill service');
|
|
81
|
+
try {
|
|
82
|
+
await checkHealth(config);
|
|
83
|
+
healthChecked = true;
|
|
84
|
+
} catch (err) {
|
|
85
|
+
return errorResponse(err.message);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
await sendProgress(extra, config.mode === 'full' ? 3 : 2, config.mode === 'full' ? 5 : 4, 'Converting document with Distill');
|
|
90
|
+
|
|
91
|
+
let result;
|
|
92
|
+
try {
|
|
93
|
+
if (config.mode === 'full') {
|
|
94
|
+
result = await fullConvert(resolvedPath, config);
|
|
95
|
+
} else {
|
|
96
|
+
result = await lightweightConvert(resolvedPath, config);
|
|
97
|
+
}
|
|
98
|
+
} catch (err) {
|
|
99
|
+
if (err.message && /scanned/i.test(err.message)) {
|
|
100
|
+
return errorResponse(
|
|
101
|
+
'This PDF appears to be scanned. Switch to full mode for OCR support.'
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
return errorResponse(err.message);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const { markdown } = result;
|
|
108
|
+
await sendProgress(extra, config.mode === 'full' ? 4 : 3, config.mode === 'full' ? 5 : 4, 'Saving to cache');
|
|
109
|
+
|
|
110
|
+
const cacheResult = await save(markdown, resolvedPath, config.cache_dir);
|
|
111
|
+
|
|
112
|
+
const warnings = [];
|
|
113
|
+
|
|
114
|
+
if (config.mode === 'full') {
|
|
115
|
+
if (result.warnings && result.warnings.length > 0) {
|
|
116
|
+
for (const w of result.warnings) {
|
|
117
|
+
warnings.push(typeof w === 'string' ? w : w.message || JSON.stringify(w));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
if (result.quality_score != null && result.quality_score < QUALITY_THRESHOLD) {
|
|
121
|
+
warnings.push(
|
|
122
|
+
`Quality score ${result.quality_score} is below threshold (0.70). The conversion ` +
|
|
123
|
+
'completed but some structure may be lost. Check the document for complex tables ' +
|
|
124
|
+
'or non-standard formatting.'
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (cacheResult.saveError) {
|
|
130
|
+
warnings.push(`Cache save failed: ${cacheResult.saveError}`);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
await sendProgress(extra, config.mode === 'full' ? 5 : 4, config.mode === 'full' ? 5 : 4, 'Done');
|
|
134
|
+
|
|
135
|
+
const response = {
|
|
136
|
+
markdown,
|
|
137
|
+
cached_at: cacheResult.cachedAt,
|
|
138
|
+
overwritten: cacheResult.overwritten,
|
|
139
|
+
source_file: resolvedPath,
|
|
140
|
+
format_detected: ext.slice(1),
|
|
141
|
+
mode: config.mode,
|
|
142
|
+
...(config.mode === 'full' && result.quality_score != null
|
|
143
|
+
? { quality_score: result.quality_score }
|
|
144
|
+
: {}),
|
|
145
|
+
warnings,
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
return {
|
|
149
|
+
content: [{ type: 'text', text: JSON.stringify(response, null, 2) }],
|
|
150
|
+
};
|
|
151
|
+
} catch (err) {
|
|
152
|
+
return errorResponse(`Unexpected error: ${err.message}`);
|
|
153
|
+
}
|
|
154
|
+
}
|