@sylphx/pdf-reader-mcp 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +99 -17
- package/dist/index.js +255 -163
- package/package.json +39 -43
package/README.md
CHANGED
|
@@ -93,35 +93,63 @@ Real-world performance from production testing:
|
|
|
93
93
|
|
|
94
94
|
## 📦 Installation
|
|
95
95
|
|
|
96
|
+
### VS Code
|
|
97
|
+
|
|
98
|
+
Install with one click using the VS Code extension buttons:
|
|
99
|
+
|
|
100
|
+
[](https://insiders.vscode.dev/redirect?url=vscode://ms-vscode.vscode-mcp/install?mcpId=sylphx-pdf-reader-mcp)
|
|
101
|
+
[](https://insiders.vscode.dev/redirect?url=vscode-insiders://ms-vscode.vscode-mcp/install?mcpId=sylphx-pdf-reader-mcp)
|
|
102
|
+
|
|
103
|
+
Or via CLI:
|
|
104
|
+
|
|
96
105
|
```bash
|
|
97
|
-
|
|
98
|
-
|
|
106
|
+
code --add-mcp '{"name":"pdf-reader","command":"npx","args":["@sylphx/pdf-reader-mcp"]}'
|
|
107
|
+
```
|
|
99
108
|
|
|
100
|
-
|
|
101
|
-
pnpm add @sylphx/pdf-reader-mcp
|
|
109
|
+
### Claude Code
|
|
102
110
|
|
|
103
|
-
|
|
104
|
-
|
|
111
|
+
```bash
|
|
112
|
+
claude mcp add pdf-reader -- npx @sylphx/pdf-reader-mcp
|
|
113
|
+
```
|
|
105
114
|
|
|
106
|
-
|
|
107
|
-
yarn add @sylphx/pdf-reader-mcp
|
|
115
|
+
### Claude Desktop
|
|
108
116
|
|
|
109
|
-
|
|
110
|
-
|
|
117
|
+
Add to `claude_desktop_config.json`:
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"mcpServers": {
|
|
122
|
+
"pdf-reader": {
|
|
123
|
+
"command": "npx",
|
|
124
|
+
"args": ["@sylphx/pdf-reader-mcp"]
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
111
128
|
```
|
|
112
129
|
|
|
113
|
-
|
|
130
|
+
<details>
|
|
131
|
+
<summary><strong>📍 Config file locations</strong></summary>
|
|
114
132
|
|
|
115
|
-
|
|
133
|
+
- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
134
|
+
- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
|
|
135
|
+
- **Linux**: `~/.config/Claude/claude_desktop_config.json`
|
|
136
|
+
|
|
137
|
+
</details>
|
|
138
|
+
|
|
139
|
+
### Cursor
|
|
116
140
|
|
|
117
|
-
|
|
141
|
+
1. Open **Settings** → **MCP** → **Add new MCP Server**
|
|
142
|
+
2. Select **Command** type
|
|
143
|
+
3. Enter: `npx @sylphx/pdf-reader-mcp`
|
|
118
144
|
|
|
119
|
-
|
|
145
|
+
### Windsurf
|
|
146
|
+
|
|
147
|
+
Add to your Windsurf MCP config:
|
|
120
148
|
|
|
121
149
|
```json
|
|
122
150
|
{
|
|
123
151
|
"mcpServers": {
|
|
124
|
-
"pdf-reader
|
|
152
|
+
"pdf-reader": {
|
|
125
153
|
"command": "npx",
|
|
126
154
|
"args": ["@sylphx/pdf-reader-mcp"]
|
|
127
155
|
}
|
|
@@ -129,6 +157,49 @@ Add to your MCP client (`claude_desktop_config.json`, Cursor, Cline):
|
|
|
129
157
|
}
|
|
130
158
|
```
|
|
131
159
|
|
|
160
|
+
### Cline
|
|
161
|
+
|
|
162
|
+
Add to Cline's MCP settings:
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"mcpServers": {
|
|
167
|
+
"pdf-reader": {
|
|
168
|
+
"command": "npx",
|
|
169
|
+
"args": ["@sylphx/pdf-reader-mcp"]
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Warp
|
|
176
|
+
|
|
177
|
+
1. Go to **Settings** → **AI** → **Manage MCP Servers** → **Add**
|
|
178
|
+
2. Or use the `/add-mcp` slash command with the standard config
|
|
179
|
+
|
|
180
|
+
### Smithery (One-click)
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
npx -y @smithery/cli install @sylphx/pdf-reader-mcp --client claude
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Manual Installation
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# Quick start - zero installation
|
|
190
|
+
npx @sylphx/pdf-reader-mcp
|
|
191
|
+
|
|
192
|
+
# Using bun (recommended)
|
|
193
|
+
bun add @sylphx/pdf-reader-mcp
|
|
194
|
+
|
|
195
|
+
# Using npm
|
|
196
|
+
npm install @sylphx/pdf-reader-mcp
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## 🎯 Quick Start
|
|
202
|
+
|
|
132
203
|
### Basic Usage
|
|
133
204
|
|
|
134
205
|
```json
|
|
@@ -701,11 +772,22 @@ MIT © [Sylphx](https://sylphx.com)
|
|
|
701
772
|
|
|
702
773
|
Built with:
|
|
703
774
|
- [PDF.js](https://mozilla.github.io/pdf.js/) - Mozilla PDF engine
|
|
704
|
-
- [
|
|
705
|
-
- [Vitest](https://vitest.dev) - Fast testing framework
|
|
775
|
+
- [Bun](https://bun.sh) - Fast JavaScript runtime
|
|
706
776
|
|
|
707
777
|
Special thanks to the open source community ❤️
|
|
708
778
|
|
|
779
|
+
## Powered by Sylphx
|
|
780
|
+
|
|
781
|
+
This project uses the following [@sylphx](https://github.com/SylphxAI) packages:
|
|
782
|
+
|
|
783
|
+
- [@sylphx/mcp-server-sdk](https://github.com/SylphxAI/mcp-server-sdk) - MCP server framework
|
|
784
|
+
- [@sylphx/biome-config](https://github.com/SylphxAI/biome-config) - Biome configuration
|
|
785
|
+
- [@sylphx/tsconfig](https://github.com/SylphxAI/tsconfig) - TypeScript configuration
|
|
786
|
+
- [@sylphx/bump](https://github.com/SylphxAI/bump) - Version management
|
|
787
|
+
- [@sylphx/doctor](https://github.com/SylphxAI/doctor) - Project health checker
|
|
788
|
+
- [@sylphx/leaf](https://github.com/SylphxAI/leaf) - Documentation framework
|
|
789
|
+
- [@sylphx/leaf-theme-default](https://github.com/SylphxAI/leaf-theme-default) - Documentation theme
|
|
790
|
+
|
|
709
791
|
---
|
|
710
792
|
|
|
711
793
|
<p align="center">
|
package/dist/index.js
CHANGED
|
@@ -1,23 +1,94 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// src/index.ts
|
|
4
|
-
import {
|
|
5
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
6
|
-
import {
|
|
7
|
-
CallToolRequestSchema,
|
|
8
|
-
ErrorCode as ErrorCode5,
|
|
9
|
-
ListToolsRequestSchema,
|
|
10
|
-
McpError as McpError5
|
|
11
|
-
} from "@modelcontextprotocol/sdk/types.js";
|
|
12
|
-
import { zodToJsonSchema } from "zod-to-json-schema";
|
|
4
|
+
import { createServer, stdio } from "@sylphx/mcp-server-sdk";
|
|
13
5
|
|
|
14
6
|
// src/handlers/readPdf.ts
|
|
15
|
-
import {
|
|
16
|
-
import { z as z2 } from "zod";
|
|
7
|
+
import { image, text, tool, toolError } from "@sylphx/mcp-server-sdk";
|
|
17
8
|
|
|
18
9
|
// src/pdf/extractor.ts
|
|
19
10
|
import { OPS } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
20
11
|
import { PNG } from "pngjs";
|
|
12
|
+
|
|
13
|
+
// src/utils/logger.ts
|
|
14
|
+
class Logger {
|
|
15
|
+
prefix;
|
|
16
|
+
minLevel;
|
|
17
|
+
constructor(component, minLevel = 1 /* INFO */) {
|
|
18
|
+
this.prefix = `[PDF Reader MCP${component ? ` - ${component}` : ""}]`;
|
|
19
|
+
this.minLevel = minLevel;
|
|
20
|
+
}
|
|
21
|
+
setLevel(level) {
|
|
22
|
+
this.minLevel = level;
|
|
23
|
+
}
|
|
24
|
+
debug(message, context) {
|
|
25
|
+
if (this.minLevel <= 0 /* DEBUG */) {
|
|
26
|
+
this.log("debug", message, context);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
info(message, context) {
|
|
30
|
+
if (this.minLevel <= 1 /* INFO */) {
|
|
31
|
+
this.log("info", message, context);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
warn(message, context) {
|
|
35
|
+
if (this.minLevel <= 2 /* WARN */) {
|
|
36
|
+
this.log("warn", message, context);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
error(message, context) {
|
|
40
|
+
if (this.minLevel <= 3 /* ERROR */) {
|
|
41
|
+
this.log("error", message, context);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
logWithContext(level, logMessage, structuredLog) {
|
|
45
|
+
if (level === "error") {
|
|
46
|
+
console.error(logMessage);
|
|
47
|
+
console.error(JSON.stringify(structuredLog));
|
|
48
|
+
} else if (level === "warn") {
|
|
49
|
+
console.warn(logMessage);
|
|
50
|
+
console.warn(JSON.stringify(structuredLog));
|
|
51
|
+
} else if (level === "info") {
|
|
52
|
+
console.info(logMessage);
|
|
53
|
+
} else {
|
|
54
|
+
console.log(logMessage);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
logSimple(level, logMessage) {
|
|
58
|
+
if (level === "error") {
|
|
59
|
+
console.error(logMessage);
|
|
60
|
+
} else if (level === "warn") {
|
|
61
|
+
console.warn(logMessage);
|
|
62
|
+
} else if (level === "info") {
|
|
63
|
+
console.info(logMessage);
|
|
64
|
+
} else {
|
|
65
|
+
console.log(logMessage);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
log(level, message, context) {
|
|
69
|
+
const logMessage = `${this.prefix} ${message}`;
|
|
70
|
+
if (context && Object.keys(context).length > 0) {
|
|
71
|
+
const timestamp = new Date().toISOString();
|
|
72
|
+
const structuredLog = {
|
|
73
|
+
timestamp,
|
|
74
|
+
level,
|
|
75
|
+
component: this.prefix,
|
|
76
|
+
message,
|
|
77
|
+
...context
|
|
78
|
+
};
|
|
79
|
+
this.logWithContext(level, logMessage, structuredLog);
|
|
80
|
+
} else {
|
|
81
|
+
this.logSimple(level, logMessage);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
var createLogger = (component, minLevel) => {
|
|
86
|
+
return new Logger(component, minLevel);
|
|
87
|
+
};
|
|
88
|
+
var logger = new Logger("", 2 /* WARN */);
|
|
89
|
+
|
|
90
|
+
// src/pdf/extractor.ts
|
|
91
|
+
var logger2 = createLogger("Extractor");
|
|
21
92
|
var encodePixelsToPNG = (pixelData, width, height, channels) => {
|
|
22
93
|
const png = new PNG({ width, height });
|
|
23
94
|
if (channels === 4) {
|
|
@@ -44,6 +115,83 @@ var encodePixelsToPNG = (pixelData, width, height, channels) => {
|
|
|
44
115
|
const pngBuffer = PNG.sync.write(png);
|
|
45
116
|
return pngBuffer.toString("base64");
|
|
46
117
|
};
|
|
118
|
+
var processImageData = (imageData, pageNum, arrayIndex) => {
|
|
119
|
+
if (!imageData || typeof imageData !== "object") {
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
const img = imageData;
|
|
123
|
+
if (!img.data || !img.width || !img.height) {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
|
|
127
|
+
const format = img.kind === 1 ? "grayscale" : img.kind === 3 ? "rgba" : "rgb";
|
|
128
|
+
const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
|
|
129
|
+
return {
|
|
130
|
+
page: pageNum,
|
|
131
|
+
index: arrayIndex,
|
|
132
|
+
width: img.width,
|
|
133
|
+
height: img.height,
|
|
134
|
+
format,
|
|
135
|
+
data: pngBase64
|
|
136
|
+
};
|
|
137
|
+
};
|
|
138
|
+
var retrieveImageData = async (page, imageName, pageNum) => {
|
|
139
|
+
if (imageName.startsWith("g_")) {
|
|
140
|
+
try {
|
|
141
|
+
const imageData = page.commonObjs.get(imageName);
|
|
142
|
+
if (imageData) {
|
|
143
|
+
return imageData;
|
|
144
|
+
}
|
|
145
|
+
} catch (error) {
|
|
146
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
147
|
+
logger2.warn("Error getting image from commonObjs", { imageName, error: message });
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
try {
|
|
151
|
+
const imageData = page.objs.get(imageName);
|
|
152
|
+
if (imageData !== undefined) {
|
|
153
|
+
return imageData;
|
|
154
|
+
}
|
|
155
|
+
} catch (error) {
|
|
156
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
157
|
+
logger2.warn("Sync image get failed, trying async", { imageName, error: message });
|
|
158
|
+
}
|
|
159
|
+
return new Promise((resolve) => {
|
|
160
|
+
let resolved = false;
|
|
161
|
+
let timeoutId = null;
|
|
162
|
+
const cleanup = () => {
|
|
163
|
+
if (timeoutId !== null) {
|
|
164
|
+
clearTimeout(timeoutId);
|
|
165
|
+
timeoutId = null;
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
timeoutId = setTimeout(() => {
|
|
169
|
+
if (!resolved) {
|
|
170
|
+
resolved = true;
|
|
171
|
+
cleanup();
|
|
172
|
+
logger2.warn("Image extraction timeout", { imageName, pageNum });
|
|
173
|
+
resolve(null);
|
|
174
|
+
}
|
|
175
|
+
}, 1e4);
|
|
176
|
+
try {
|
|
177
|
+
page.objs.get(imageName, (imageData) => {
|
|
178
|
+
if (!resolved) {
|
|
179
|
+
resolved = true;
|
|
180
|
+
cleanup();
|
|
181
|
+
resolve(imageData);
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
} catch (error) {
|
|
185
|
+
if (!resolved) {
|
|
186
|
+
resolved = true;
|
|
187
|
+
cleanup();
|
|
188
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
189
|
+
logger2.warn("Error in async image get", { imageName, error: message });
|
|
190
|
+
resolve(null);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
};
|
|
47
195
|
var extractMetadataAndPageCount = async (pdfDocument, includeMetadata, includePageCount) => {
|
|
48
196
|
const output = {};
|
|
49
197
|
if (includePageCount) {
|
|
@@ -69,7 +217,8 @@ var extractMetadataAndPageCount = async (pdfDocument, includeMetadata, includePa
|
|
|
69
217
|
output.metadata = metadataRecord;
|
|
70
218
|
}
|
|
71
219
|
} catch (metaError) {
|
|
72
|
-
|
|
220
|
+
const message = metaError instanceof Error ? metaError.message : String(metaError);
|
|
221
|
+
logger2.warn("Error extracting metadata", { error: message });
|
|
73
222
|
}
|
|
74
223
|
}
|
|
75
224
|
return output;
|
|
@@ -118,11 +267,10 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
118
267
|
imageIndices.push(i);
|
|
119
268
|
}
|
|
120
269
|
}
|
|
121
|
-
const imagePromises = imageIndices.map((imgIndex, arrayIndex) =>
|
|
270
|
+
const imagePromises = imageIndices.map(async (imgIndex, arrayIndex) => {
|
|
122
271
|
const argsArray = operatorList.argsArray[imgIndex];
|
|
123
272
|
if (!argsArray || argsArray.length === 0) {
|
|
124
|
-
|
|
125
|
-
return;
|
|
273
|
+
return null;
|
|
126
274
|
}
|
|
127
275
|
const imageName = argsArray[0];
|
|
128
276
|
let yPosition = 0;
|
|
@@ -133,77 +281,28 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
133
281
|
yPosition = Math.round(yCoord);
|
|
134
282
|
}
|
|
135
283
|
}
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
}
|
|
140
|
-
const img = imageData;
|
|
141
|
-
if (!img.data || !img.width || !img.height) {
|
|
142
|
-
return null;
|
|
143
|
-
}
|
|
144
|
-
const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
|
|
145
|
-
const format = img.kind === 1 ? "grayscale" : img.kind === 3 ? "rgba" : "rgb";
|
|
146
|
-
const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
|
|
284
|
+
const imageData = await retrieveImageData(page, imageName, pageNum);
|
|
285
|
+
const extractedImage = processImageData(imageData, pageNum, arrayIndex);
|
|
286
|
+
if (extractedImage) {
|
|
147
287
|
return {
|
|
148
288
|
type: "image",
|
|
149
289
|
yPosition,
|
|
150
|
-
imageData:
|
|
151
|
-
page: pageNum,
|
|
152
|
-
index: arrayIndex,
|
|
153
|
-
width: img.width,
|
|
154
|
-
height: img.height,
|
|
155
|
-
format,
|
|
156
|
-
data: pngBase64
|
|
157
|
-
}
|
|
290
|
+
imageData: extractedImage
|
|
158
291
|
};
|
|
159
|
-
};
|
|
160
|
-
if (imageName.startsWith("g_")) {
|
|
161
|
-
try {
|
|
162
|
-
const imageData = page.commonObjs.get(imageName);
|
|
163
|
-
if (imageData) {
|
|
164
|
-
const result = processImageData(imageData);
|
|
165
|
-
resolve(result);
|
|
166
|
-
return;
|
|
167
|
-
}
|
|
168
|
-
} catch (error) {
|
|
169
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
170
|
-
console.warn(`[PDF Reader MCP] Error getting image from commonObjs ${imageName}: ${message}`);
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
try {
|
|
174
|
-
const imageData = page.objs.get(imageName);
|
|
175
|
-
if (imageData !== undefined) {
|
|
176
|
-
const result = processImageData(imageData);
|
|
177
|
-
resolve(result);
|
|
178
|
-
return;
|
|
179
|
-
}
|
|
180
|
-
} catch (error) {
|
|
181
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
182
|
-
console.warn(`[PDF Reader MCP] Sync image get failed for ${imageName}, trying async: ${message}`);
|
|
183
292
|
}
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
if (!resolved) {
|
|
187
|
-
resolved = true;
|
|
188
|
-
console.warn(`[PDF Reader MCP] Image extraction timeout for ${imageName} on page ${String(pageNum)}`);
|
|
189
|
-
resolve(null);
|
|
190
|
-
}
|
|
191
|
-
}, 1e4);
|
|
192
|
-
page.objs.get(imageName, (imageData) => {
|
|
193
|
-
if (!resolved) {
|
|
194
|
-
resolved = true;
|
|
195
|
-
clearTimeout(timeout);
|
|
196
|
-
const result = processImageData(imageData);
|
|
197
|
-
resolve(result);
|
|
198
|
-
}
|
|
199
|
-
});
|
|
200
|
-
}));
|
|
293
|
+
return null;
|
|
294
|
+
});
|
|
201
295
|
const resolvedImages = await Promise.all(imagePromises);
|
|
202
|
-
|
|
296
|
+
const validImages = resolvedImages.filter((item) => item !== null);
|
|
297
|
+
contentItems.push(...validImages);
|
|
203
298
|
}
|
|
204
299
|
} catch (error) {
|
|
205
300
|
const message = error instanceof Error ? error.message : String(error);
|
|
206
|
-
|
|
301
|
+
logger2.warn("Error extracting page content", {
|
|
302
|
+
pageNum,
|
|
303
|
+
sourceDescription,
|
|
304
|
+
error: message
|
|
305
|
+
});
|
|
207
306
|
return [
|
|
208
307
|
{
|
|
209
308
|
type: "text",
|
|
@@ -217,62 +316,82 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
|
|
|
217
316
|
|
|
218
317
|
// src/pdf/loader.ts
|
|
219
318
|
import fs from "node:fs/promises";
|
|
220
|
-
import { ErrorCode as ErrorCode2, McpError as McpError2 } from "@modelcontextprotocol/sdk/types.js";
|
|
221
319
|
import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
222
320
|
|
|
321
|
+
// src/utils/errors.ts
|
|
322
|
+
class PdfError extends Error {
|
|
323
|
+
code;
|
|
324
|
+
constructor(code, message, options) {
|
|
325
|
+
super(message, options?.cause ? { cause: options.cause } : undefined);
|
|
326
|
+
this.code = code;
|
|
327
|
+
this.name = "PdfError";
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
223
331
|
// src/utils/pathUtils.ts
|
|
332
|
+
import os from "node:os";
|
|
224
333
|
import path from "node:path";
|
|
225
|
-
import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
|
|
226
334
|
var PROJECT_ROOT = process.cwd();
|
|
335
|
+
var ALLOWED_ROOTS = [PROJECT_ROOT, os.homedir()];
|
|
227
336
|
var resolvePath = (userPath) => {
|
|
228
337
|
if (typeof userPath !== "string") {
|
|
229
|
-
throw new
|
|
338
|
+
throw new PdfError(-32602 /* InvalidParams */, "Path must be a string.");
|
|
230
339
|
}
|
|
231
340
|
const normalizedUserPath = path.normalize(userPath);
|
|
232
|
-
|
|
233
|
-
|
|
341
|
+
const resolvedPath = path.isAbsolute(normalizedUserPath) ? normalizedUserPath : path.resolve(PROJECT_ROOT, normalizedUserPath);
|
|
342
|
+
const isWithinAllowedRoot = ALLOWED_ROOTS.some((allowedRoot) => {
|
|
343
|
+
const relativePath = path.relative(allowedRoot, resolvedPath);
|
|
344
|
+
return relativePath !== "" && !relativePath.startsWith("..") && !path.isAbsolute(relativePath);
|
|
345
|
+
});
|
|
346
|
+
if (!isWithinAllowedRoot) {
|
|
347
|
+
throw new PdfError(-32602 /* InvalidParams */, "Access denied: Path resolves outside allowed directories.");
|
|
234
348
|
}
|
|
235
|
-
return
|
|
349
|
+
return resolvedPath;
|
|
236
350
|
};
|
|
237
351
|
|
|
238
352
|
// src/pdf/loader.ts
|
|
353
|
+
var logger3 = createLogger("Loader");
|
|
354
|
+
var MAX_PDF_SIZE = 100 * 1024 * 1024;
|
|
239
355
|
var loadPdfDocument = async (source, sourceDescription) => {
|
|
240
356
|
let pdfDataSource;
|
|
241
357
|
try {
|
|
242
358
|
if (source.path) {
|
|
243
359
|
const safePath = resolvePath(source.path);
|
|
244
360
|
const buffer = await fs.readFile(safePath);
|
|
361
|
+
if (buffer.length > MAX_PDF_SIZE) {
|
|
362
|
+
throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${MAX_PDF_SIZE} bytes (${(MAX_PDF_SIZE / 1024 / 1024).toFixed(0)}MB). File size: ${buffer.length} bytes.`);
|
|
363
|
+
}
|
|
245
364
|
pdfDataSource = new Uint8Array(buffer);
|
|
246
365
|
} else if (source.url) {
|
|
247
366
|
pdfDataSource = { url: source.url };
|
|
248
367
|
} else {
|
|
249
|
-
throw new
|
|
368
|
+
throw new PdfError(-32602 /* InvalidParams */, `Source ${sourceDescription} missing 'path' or 'url'.`);
|
|
250
369
|
}
|
|
251
370
|
} catch (err) {
|
|
252
|
-
if (err instanceof
|
|
371
|
+
if (err instanceof PdfError) {
|
|
253
372
|
throw err;
|
|
254
373
|
}
|
|
255
374
|
const message = err instanceof Error ? err.message : String(err);
|
|
256
|
-
const errorCode =
|
|
375
|
+
const errorCode = -32600 /* InvalidRequest */;
|
|
257
376
|
if (typeof err === "object" && err !== null && "code" in err && err.code === "ENOENT" && source.path) {
|
|
258
|
-
throw new
|
|
377
|
+
throw new PdfError(errorCode, `File not found at '${source.path}'.`, {
|
|
259
378
|
cause: err instanceof Error ? err : undefined
|
|
260
379
|
});
|
|
261
380
|
}
|
|
262
|
-
throw new
|
|
381
|
+
throw new PdfError(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
|
|
263
382
|
}
|
|
264
383
|
const loadingTask = getDocument(pdfDataSource);
|
|
265
384
|
try {
|
|
266
385
|
return await loadingTask.promise;
|
|
267
386
|
} catch (err) {
|
|
268
|
-
console.error(`[PDF Reader MCP] PDF.js loading error for ${sourceDescription}:`, err);
|
|
269
387
|
const message = err instanceof Error ? err.message : String(err);
|
|
270
|
-
|
|
388
|
+
logger3.error("PDF.js loading error", { sourceDescription, error: message });
|
|
389
|
+
throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${sourceDescription}. Reason: ${message || "Unknown loading error"}`, { cause: err instanceof Error ? err : undefined });
|
|
271
390
|
}
|
|
272
391
|
};
|
|
273
392
|
|
|
274
393
|
// src/pdf/parser.ts
|
|
275
|
-
|
|
394
|
+
var logger4 = createLogger("Parser");
|
|
276
395
|
var MAX_RANGE_SIZE = 1e4;
|
|
277
396
|
var parseRangePart = (part, pages) => {
|
|
278
397
|
const trimmedPart = part.trim();
|
|
@@ -290,7 +409,7 @@ var parseRangePart = (part, pages) => {
|
|
|
290
409
|
pages.add(i);
|
|
291
410
|
}
|
|
292
411
|
if (end === Infinity && practicalEnd === start + MAX_RANGE_SIZE) {
|
|
293
|
-
|
|
412
|
+
logger4.warn("Open-ended range truncated", { start, practicalEnd });
|
|
294
413
|
}
|
|
295
414
|
} else {
|
|
296
415
|
const page = parseInt(trimmedPart, 10);
|
|
@@ -329,7 +448,7 @@ var getTargetPages = (sourcePages, sourceDescription) => {
|
|
|
329
448
|
return uniquePages;
|
|
330
449
|
} catch (error) {
|
|
331
450
|
const message = error instanceof Error ? error.message : String(error);
|
|
332
|
-
throw new
|
|
451
|
+
throw new PdfError(-32602 /* InvalidParams */, `Invalid page specification for source ${sourceDescription}: ${message}`);
|
|
333
452
|
}
|
|
334
453
|
};
|
|
335
454
|
var determinePagesToProcess = (targetPages, totalPages, includeFullText) => {
|
|
@@ -369,13 +488,15 @@ var readPdfArgsSchema = z.object({
|
|
|
369
488
|
}).strict();
|
|
370
489
|
|
|
371
490
|
// src/handlers/readPdf.ts
|
|
491
|
+
var logger5 = createLogger("ReadPdf");
|
|
372
492
|
var processSingleSource = async (source, options) => {
|
|
373
493
|
const sourceDescription = source.path ?? source.url ?? "unknown source";
|
|
374
494
|
let individualResult = { source: sourceDescription, success: false };
|
|
495
|
+
let pdfDocument = null;
|
|
375
496
|
try {
|
|
376
497
|
const targetPages = getTargetPages(source.pages, sourceDescription);
|
|
377
498
|
const { pages: _pages, ...loadArgs } = source;
|
|
378
|
-
|
|
499
|
+
pdfDocument = await loadPdfDocument(loadArgs, sourceDescription);
|
|
379
500
|
const totalPages = pdfDocument.numPages;
|
|
380
501
|
const metadataOutput = await extractMetadataAndPageCount(pdfDocument, options.includeMetadata, options.includePageCount);
|
|
381
502
|
const output = { ...metadataOutput };
|
|
@@ -411,9 +532,7 @@ var processSingleSource = async (source, options) => {
|
|
|
411
532
|
individualResult = { ...individualResult, data: output, success: true };
|
|
412
533
|
} catch (error) {
|
|
413
534
|
let errorMessage = `Failed to process PDF from ${sourceDescription}.`;
|
|
414
|
-
if (error instanceof
|
|
415
|
-
errorMessage = error.message;
|
|
416
|
-
} else if (error instanceof Error) {
|
|
535
|
+
if (error instanceof Error) {
|
|
417
536
|
errorMessage += ` Reason: ${error.message}`;
|
|
418
537
|
} else {
|
|
419
538
|
errorMessage += ` Unknown error: ${JSON.stringify(error)}`;
|
|
@@ -421,27 +540,38 @@ var processSingleSource = async (source, options) => {
|
|
|
421
540
|
individualResult.error = errorMessage;
|
|
422
541
|
individualResult.success = false;
|
|
423
542
|
individualResult.data = undefined;
|
|
543
|
+
} finally {
|
|
544
|
+
if (pdfDocument && typeof pdfDocument.destroy === "function") {
|
|
545
|
+
try {
|
|
546
|
+
await pdfDocument.destroy();
|
|
547
|
+
} catch (destroyError) {
|
|
548
|
+
const message = destroyError instanceof Error ? destroyError.message : String(destroyError);
|
|
549
|
+
logger5.warn("Error destroying PDF document", { sourceDescription, error: message });
|
|
550
|
+
}
|
|
551
|
+
}
|
|
424
552
|
}
|
|
425
553
|
return individualResult;
|
|
426
554
|
};
|
|
427
|
-
var
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
555
|
+
var readPdf = tool().description("Reads content/metadata/images from one or more PDFs (local/URL). Each source can specify pages to extract.").input(readPdfArgsSchema).handler(async ({ input }) => {
|
|
556
|
+
const { sources, include_full_text, include_metadata, include_page_count, include_images } = input;
|
|
557
|
+
const MAX_CONCURRENT_SOURCES = 3;
|
|
558
|
+
const results = [];
|
|
559
|
+
const options = {
|
|
560
|
+
includeFullText: include_full_text ?? false,
|
|
561
|
+
includeMetadata: include_metadata ?? true,
|
|
562
|
+
includePageCount: include_page_count ?? true,
|
|
563
|
+
includeImages: include_images ?? false
|
|
564
|
+
};
|
|
565
|
+
for (let i = 0;i < sources.length; i += MAX_CONCURRENT_SOURCES) {
|
|
566
|
+
const batch = sources.slice(i, i + MAX_CONCURRENT_SOURCES);
|
|
567
|
+
const batchResults = await Promise.all(batch.map((source) => processSingleSource(source, options)));
|
|
568
|
+
results.push(...batchResults);
|
|
569
|
+
}
|
|
570
|
+
const allFailed = results.every((r) => !r.success);
|
|
571
|
+
if (allFailed) {
|
|
572
|
+
const errorMessages = results.map((r) => r.error).join("; ");
|
|
573
|
+
return toolError(`All PDF sources failed to process: ${errorMessages}`);
|
|
574
|
+
}
|
|
445
575
|
const content = [];
|
|
446
576
|
const resultsForJson = results.map((result) => {
|
|
447
577
|
if (result.data) {
|
|
@@ -460,71 +590,33 @@ var handleReadPdfFunc = async (args) => {
|
|
|
460
590
|
}
|
|
461
591
|
return result;
|
|
462
592
|
});
|
|
463
|
-
content.push({
|
|
464
|
-
type: "text",
|
|
465
|
-
text: JSON.stringify({ results: resultsForJson }, null, 2)
|
|
466
|
-
});
|
|
593
|
+
content.push(text(JSON.stringify({ results: resultsForJson }, null, 2)));
|
|
467
594
|
for (const result of results) {
|
|
468
595
|
if (!result.success || !result.data?.page_contents)
|
|
469
596
|
continue;
|
|
470
597
|
for (const pageContent of result.data.page_contents) {
|
|
471
598
|
for (const item of pageContent.items) {
|
|
472
599
|
if (item.type === "text" && item.textContent) {
|
|
473
|
-
content.push(
|
|
474
|
-
type: "text",
|
|
475
|
-
text: item.textContent
|
|
476
|
-
});
|
|
600
|
+
content.push(text(item.textContent));
|
|
477
601
|
} else if (item.type === "image" && item.imageData) {
|
|
478
|
-
content.push(
|
|
479
|
-
type: "image",
|
|
480
|
-
data: item.imageData.data,
|
|
481
|
-
mimeType: "image/png"
|
|
482
|
-
});
|
|
602
|
+
content.push(image(item.imageData.data, "image/png"));
|
|
483
603
|
}
|
|
484
604
|
}
|
|
485
605
|
}
|
|
486
606
|
}
|
|
487
|
-
return
|
|
488
|
-
};
|
|
489
|
-
var readPdfToolDefinition = {
|
|
490
|
-
name: "read_pdf",
|
|
491
|
-
description: "Reads content/metadata/images from one or more PDFs (local/URL). Each source can specify pages to extract.",
|
|
492
|
-
schema: readPdfArgsSchema,
|
|
493
|
-
handler: handleReadPdfFunc
|
|
494
|
-
};
|
|
495
|
-
|
|
496
|
-
// src/handlers/index.ts
|
|
497
|
-
var allToolDefinitions = [readPdfToolDefinition];
|
|
607
|
+
return content;
|
|
608
|
+
});
|
|
498
609
|
|
|
499
610
|
// src/index.ts
|
|
500
|
-
var server =
|
|
611
|
+
var server = createServer({
|
|
501
612
|
name: "pdf-reader-mcp",
|
|
502
613
|
version: "1.3.0",
|
|
503
|
-
|
|
504
|
-
},
|
|
505
|
-
|
|
506
|
-
});
|
|
507
|
-
var generateInputSchema = (schema) => {
|
|
508
|
-
return zodToJsonSchema(schema, { target: "openApi3" });
|
|
509
|
-
};
|
|
510
|
-
server.setRequestHandler(ListToolsRequestSchema, () => {
|
|
511
|
-
const availableTools = allToolDefinitions.map((def) => ({
|
|
512
|
-
name: def.name,
|
|
513
|
-
description: def.description,
|
|
514
|
-
inputSchema: generateInputSchema(def.schema)
|
|
515
|
-
}));
|
|
516
|
-
return { tools: availableTools };
|
|
517
|
-
});
|
|
518
|
-
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
519
|
-
const toolDefinition = allToolDefinitions.find((def) => def.name === request.params.name);
|
|
520
|
-
if (!toolDefinition) {
|
|
521
|
-
throw new McpError5(ErrorCode5.MethodNotFound, `Unknown tool: ${request.params.name}`);
|
|
522
|
-
}
|
|
523
|
-
return toolDefinition.handler(request.params.arguments);
|
|
614
|
+
instructions: "MCP Server for reading PDF files and extracting text, metadata, images, and page information.",
|
|
615
|
+
tools: { read_pdf: readPdf },
|
|
616
|
+
transport: stdio()
|
|
524
617
|
});
|
|
525
618
|
async function main() {
|
|
526
|
-
|
|
527
|
-
await server.connect(transport);
|
|
619
|
+
await server.start();
|
|
528
620
|
if (process.env.DEBUG_MCP) {
|
|
529
621
|
console.error("[PDF Reader MCP] Server running on stdio");
|
|
530
622
|
console.error("[PDF Reader MCP] Project root:", process.cwd());
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sylphx/pdf-reader-mcp",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "An MCP server providing tools to read PDF files.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -11,6 +11,12 @@
|
|
|
11
11
|
"README.md",
|
|
12
12
|
"LICENSE"
|
|
13
13
|
],
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"import": "./dist/index.js",
|
|
17
|
+
"types": "./dist/index.d.ts"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
14
20
|
"publishConfig": {
|
|
15
21
|
"access": "public"
|
|
16
22
|
},
|
|
@@ -43,9 +49,9 @@
|
|
|
43
49
|
"build": "bunup",
|
|
44
50
|
"watch": "tsc --watch",
|
|
45
51
|
"inspector": "npx @modelcontextprotocol/inspector dist/index.js",
|
|
46
|
-
"test": "
|
|
47
|
-
"test:watch": "
|
|
48
|
-
"test:cov": "
|
|
52
|
+
"test": "bun test",
|
|
53
|
+
"test:watch": "bun test --watch",
|
|
54
|
+
"test:cov": "bun test --coverage",
|
|
49
55
|
"lint": "biome lint .",
|
|
50
56
|
"lint:fix": "biome lint --write .",
|
|
51
57
|
"format": "biome format --write .",
|
|
@@ -53,58 +59,48 @@
|
|
|
53
59
|
"check": "biome check .",
|
|
54
60
|
"check:fix": "biome check --write .",
|
|
55
61
|
"validate": "bun run check && bun run test",
|
|
56
|
-
"docs:dev": "
|
|
57
|
-
"docs:build": "
|
|
58
|
-
"docs:preview": "
|
|
62
|
+
"docs:dev": "leaf dev docs",
|
|
63
|
+
"docs:build": "leaf build docs",
|
|
64
|
+
"docs:preview": "leaf preview docs",
|
|
59
65
|
"start": "node dist/index.js",
|
|
60
66
|
"typecheck": "tsc --noEmit",
|
|
61
|
-
"benchmark": "
|
|
67
|
+
"benchmark": "bun bench",
|
|
62
68
|
"clean": "rm -rf dist coverage",
|
|
63
69
|
"docs:api": "typedoc --entryPoints src/index.ts --tsconfig tsconfig.json --plugin typedoc-plugin-markdown --out docs/api --readme none",
|
|
64
|
-
"prepublishOnly": "bun run clean && bun run build",
|
|
70
|
+
"prepublishOnly": "bunx @sylphx/doctor prepublish && bun run clean && bun run build",
|
|
65
71
|
"release": "standard-version",
|
|
66
|
-
"prepare": "
|
|
67
|
-
"changeset": "changeset",
|
|
68
|
-
"version-packages": "changeset version",
|
|
69
|
-
"release:new": "bun run build && changeset publish"
|
|
72
|
+
"prepare": "node_modules/.bin/lefthook install || true"
|
|
70
73
|
},
|
|
71
74
|
"dependencies": {
|
|
72
|
-
"@
|
|
73
|
-
"glob": "^11.0
|
|
74
|
-
"pdfjs-dist": "^5.4.
|
|
75
|
+
"@sylphx/mcp-server-sdk": "1.0.0",
|
|
76
|
+
"glob": "^11.1.0",
|
|
77
|
+
"pdfjs-dist": "^5.4.394",
|
|
75
78
|
"pngjs": "^7.0.0",
|
|
76
|
-
"zod": "
|
|
77
|
-
"zod-to-json-schema": "^3.
|
|
79
|
+
"zod": "4.2.0-canary.20251124T022609",
|
|
80
|
+
"zod-to-json-schema": "^3.25.0"
|
|
78
81
|
},
|
|
79
82
|
"devDependencies": {
|
|
80
|
-
"@biomejs/biome": "^2.3.
|
|
81
|
-
"@
|
|
82
|
-
"@
|
|
83
|
-
"@
|
|
83
|
+
"@biomejs/biome": "^2.3.8",
|
|
84
|
+
"@solidjs/router": "^0.15.4",
|
|
85
|
+
"@sylphx/biome-config": "^0.4.0",
|
|
86
|
+
"@sylphx/bump": "^0.12.1",
|
|
87
|
+
"@sylphx/doctor": "^1.23.3",
|
|
88
|
+
"@sylphx/leaf": "^1.0.0",
|
|
89
|
+
"@sylphx/leaf-theme-default": "^1.0.0",
|
|
90
|
+
"@sylphx/tsconfig": "^0.3.0",
|
|
84
91
|
"@types/glob": "^8.1.0",
|
|
85
|
-
"@types/node": "^24.
|
|
92
|
+
"@types/node": "^24.10.1",
|
|
86
93
|
"@types/pngjs": "^6.0.5",
|
|
87
|
-
"@vitest/coverage-v8": "^4.0.8",
|
|
88
94
|
"bunup": "^0.16.10",
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
-
"typedoc": "^0.28.2",
|
|
95
|
+
"lefthook": "^2.0.4",
|
|
96
|
+
"solid-js": "^1.9.10",
|
|
97
|
+
"typedoc": "^0.28.14",
|
|
93
98
|
"typedoc-plugin-markdown": "^4.9.0",
|
|
94
|
-
"typescript": "^5.
|
|
95
|
-
"
|
|
96
|
-
"vitest": "^4.0.7",
|
|
97
|
-
"vue": "^3.5.13"
|
|
98
|
-
},
|
|
99
|
-
"commitlint": {
|
|
100
|
-
"extends": [
|
|
101
|
-
"@commitlint/config-conventional"
|
|
102
|
-
]
|
|
103
|
-
},
|
|
104
|
-
"lint-staged": {
|
|
105
|
-
"*.{ts,tsx,js,cjs,json}": [
|
|
106
|
-
"biome check --write --no-errors-on-unmatched --files-ignore-unknown=true"
|
|
107
|
-
]
|
|
99
|
+
"typescript": "^5.9.3",
|
|
100
|
+
"vite": "^7.2.4"
|
|
108
101
|
},
|
|
109
|
-
"packageManager": "bun@1.3.1"
|
|
102
|
+
"packageManager": "bun@1.3.1",
|
|
103
|
+
"overrides": {
|
|
104
|
+
"js-yaml": "^4.1.0"
|
|
105
|
+
}
|
|
110
106
|
}
|