@sylphx/pdf-reader-mcp 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +89 -17
  2. package/dist/index.js +255 -163
  3. package/package.json +104 -108
package/README.md CHANGED
@@ -93,35 +93,56 @@ Real-world performance from production testing:
93
93
 
94
94
  ## 📦 Installation
95
95
 
96
+ ### Claude Code
97
+
96
98
  ```bash
97
- # Quick start - zero installation
98
- npx @sylphx/pdf-reader-mcp
99
+ claude mcp add pdf-reader -- npx @sylphx/pdf-reader-mcp
100
+ ```
99
101
 
100
- # Using pnpm (recommended)
101
- pnpm add @sylphx/pdf-reader-mcp
102
+ ### Claude Desktop
102
103
 
103
- # Using npm
104
- npm install @sylphx/pdf-reader-mcp
104
+ Add to `claude_desktop_config.json`:
105
105
 
106
- # Using yarn
107
- yarn add @sylphx/pdf-reader-mcp
106
+ ```json
107
+ {
108
+ "mcpServers": {
109
+ "pdf-reader": {
110
+ "command": "npx",
111
+ "args": ["@sylphx/pdf-reader-mcp"]
112
+ }
113
+ }
114
+ }
115
+ ```
108
116
 
109
- # For Claude Desktop (easiest)
110
- npx -y @smithery/cli install @sylphx/pdf-reader-mcp --client claude
117
+ <details>
118
+ <summary><strong>📍 Config file locations</strong></summary>
119
+
120
+ - **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
121
+ - **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
122
+ - **Linux**: `~/.config/Claude/claude_desktop_config.json`
123
+
124
+ </details>
125
+
126
+ ### VS Code
127
+
128
+ ```bash
129
+ code --add-mcp '{"name":"pdf-reader","command":"npx","args":["@sylphx/pdf-reader-mcp"]}'
111
130
  ```
112
131
 
113
- ---
132
+ ### Cursor
114
133
 
115
- ## 🎯 Quick Start
134
+ 1. Open **Settings** → **MCP** → **Add new MCP Server**
135
+ 2. Select **Command** type
136
+ 3. Enter: `npx @sylphx/pdf-reader-mcp`
116
137
 
117
- ### Configuration
138
+ ### Windsurf
118
139
 
119
- Add to your MCP client (`claude_desktop_config.json`, Cursor, Cline):
140
+ Add to your Windsurf MCP config:
120
141
 
121
142
  ```json
122
143
  {
123
144
  "mcpServers": {
124
- "pdf-reader-mcp": {
145
+ "pdf-reader": {
125
146
  "command": "npx",
126
147
  "args": ["@sylphx/pdf-reader-mcp"]
127
148
  }
@@ -129,6 +150,46 @@ Add to your MCP client (`claude_desktop_config.json`, Cursor, Cline):
129
150
  }
130
151
  ```
131
152
 
153
+ ### Cline
154
+
155
+ Add to Cline's MCP settings:
156
+
157
+ ```json
158
+ {
159
+ "mcpServers": {
160
+ "pdf-reader": {
161
+ "command": "npx",
162
+ "args": ["@sylphx/pdf-reader-mcp"]
163
+ }
164
+ }
165
+ }
166
+ ```
167
+
168
+ ### Warp
169
+
170
+ 1. Go to **Settings** → **AI** → **Manage MCP Servers** → **Add**
171
+ 2. Command: `npx`, Args: `@sylphx/pdf-reader-mcp`
172
+
173
+ ### Smithery (One-click)
174
+
175
+ ```bash
176
+ npx -y @smithery/cli install @sylphx/pdf-reader-mcp --client claude
177
+ ```
178
+
179
+ ### Manual Installation
180
+
181
+ ```bash
182
+ # Quick start - zero installation
183
+ npx @sylphx/pdf-reader-mcp
184
+
185
+ # Or install globally
186
+ npm install -g @sylphx/pdf-reader-mcp
187
+ ```
188
+
189
+ ---
190
+
191
+ ## 🎯 Quick Start
192
+
132
193
  ### Basic Usage
133
194
 
134
195
  ```json
@@ -701,11 +762,22 @@ MIT © [Sylphx](https://sylphx.com)
701
762
 
702
763
  Built with:
703
764
  - [PDF.js](https://mozilla.github.io/pdf.js/) - Mozilla PDF engine
704
- - [MCP SDK](https://modelcontextprotocol.io) - Model Context Protocol
705
- - [Vitest](https://vitest.dev) - Fast testing framework
765
+ - [Bun](https://bun.sh) - Fast JavaScript runtime
706
766
 
707
767
  Special thanks to the open source community ❤️
708
768
 
769
+ ## Powered by Sylphx
770
+
771
+ This project uses the following [@sylphx](https://github.com/SylphxAI) packages:
772
+
773
+ - [@sylphx/mcp-server-sdk](https://github.com/SylphxAI/mcp-server-sdk) - MCP server framework
774
+ - [@sylphx/biome-config](https://github.com/SylphxAI/biome-config) - Biome configuration
775
+ - [@sylphx/tsconfig](https://github.com/SylphxAI/tsconfig) - TypeScript configuration
776
+ - [@sylphx/bump](https://github.com/SylphxAI/bump) - Version management
777
+ - [@sylphx/doctor](https://github.com/SylphxAI/doctor) - Project health checker
778
+ - [@sylphx/leaf](https://github.com/SylphxAI/leaf) - Documentation framework
779
+ - [@sylphx/leaf-theme-default](https://github.com/SylphxAI/leaf-theme-default) - Documentation theme
780
+
709
781
  ---
710
782
 
711
783
  <p align="center">
package/dist/index.js CHANGED
@@ -1,23 +1,94 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // src/index.ts
4
- import { Server } from "@modelcontextprotocol/sdk/server/index.js";
5
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
- import {
7
- CallToolRequestSchema,
8
- ErrorCode as ErrorCode5,
9
- ListToolsRequestSchema,
10
- McpError as McpError5
11
- } from "@modelcontextprotocol/sdk/types.js";
12
- import { zodToJsonSchema } from "zod-to-json-schema";
4
+ import { createServer, stdio } from "@sylphx/mcp-server-sdk";
13
5
 
14
6
  // src/handlers/readPdf.ts
15
- import { ErrorCode as ErrorCode4, McpError as McpError4 } from "@modelcontextprotocol/sdk/types.js";
16
- import { z as z2 } from "zod";
7
+ import { image, text, tool, toolError } from "@sylphx/mcp-server-sdk";
17
8
 
18
9
  // src/pdf/extractor.ts
19
10
  import { OPS } from "pdfjs-dist/legacy/build/pdf.mjs";
20
11
  import { PNG } from "pngjs";
12
+
13
+ // src/utils/logger.ts
14
+ class Logger {
15
+ prefix;
16
+ minLevel;
17
+ constructor(component, minLevel = 1 /* INFO */) {
18
+ this.prefix = `[PDF Reader MCP${component ? ` - ${component}` : ""}]`;
19
+ this.minLevel = minLevel;
20
+ }
21
+ setLevel(level) {
22
+ this.minLevel = level;
23
+ }
24
+ debug(message, context) {
25
+ if (this.minLevel <= 0 /* DEBUG */) {
26
+ this.log("debug", message, context);
27
+ }
28
+ }
29
+ info(message, context) {
30
+ if (this.minLevel <= 1 /* INFO */) {
31
+ this.log("info", message, context);
32
+ }
33
+ }
34
+ warn(message, context) {
35
+ if (this.minLevel <= 2 /* WARN */) {
36
+ this.log("warn", message, context);
37
+ }
38
+ }
39
+ error(message, context) {
40
+ if (this.minLevel <= 3 /* ERROR */) {
41
+ this.log("error", message, context);
42
+ }
43
+ }
44
+ logWithContext(level, logMessage, structuredLog) {
45
+ if (level === "error") {
46
+ console.error(logMessage);
47
+ console.error(JSON.stringify(structuredLog));
48
+ } else if (level === "warn") {
49
+ console.warn(logMessage);
50
+ console.warn(JSON.stringify(structuredLog));
51
+ } else if (level === "info") {
52
+ console.info(logMessage);
53
+ } else {
54
+ console.log(logMessage);
55
+ }
56
+ }
57
+ logSimple(level, logMessage) {
58
+ if (level === "error") {
59
+ console.error(logMessage);
60
+ } else if (level === "warn") {
61
+ console.warn(logMessage);
62
+ } else if (level === "info") {
63
+ console.info(logMessage);
64
+ } else {
65
+ console.log(logMessage);
66
+ }
67
+ }
68
+ log(level, message, context) {
69
+ const logMessage = `${this.prefix} ${message}`;
70
+ if (context && Object.keys(context).length > 0) {
71
+ const timestamp = new Date().toISOString();
72
+ const structuredLog = {
73
+ timestamp,
74
+ level,
75
+ component: this.prefix,
76
+ message,
77
+ ...context
78
+ };
79
+ this.logWithContext(level, logMessage, structuredLog);
80
+ } else {
81
+ this.logSimple(level, logMessage);
82
+ }
83
+ }
84
+ }
85
+ var createLogger = (component, minLevel) => {
86
+ return new Logger(component, minLevel);
87
+ };
88
+ var logger = new Logger("", 2 /* WARN */);
89
+
90
+ // src/pdf/extractor.ts
91
+ var logger2 = createLogger("Extractor");
21
92
  var encodePixelsToPNG = (pixelData, width, height, channels) => {
22
93
  const png = new PNG({ width, height });
23
94
  if (channels === 4) {
@@ -44,6 +115,83 @@ var encodePixelsToPNG = (pixelData, width, height, channels) => {
44
115
  const pngBuffer = PNG.sync.write(png);
45
116
  return pngBuffer.toString("base64");
46
117
  };
118
+ var processImageData = (imageData, pageNum, arrayIndex) => {
119
+ if (!imageData || typeof imageData !== "object") {
120
+ return null;
121
+ }
122
+ const img = imageData;
123
+ if (!img.data || !img.width || !img.height) {
124
+ return null;
125
+ }
126
+ const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
127
+ const format = img.kind === 1 ? "grayscale" : img.kind === 3 ? "rgba" : "rgb";
128
+ const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
129
+ return {
130
+ page: pageNum,
131
+ index: arrayIndex,
132
+ width: img.width,
133
+ height: img.height,
134
+ format,
135
+ data: pngBase64
136
+ };
137
+ };
138
+ var retrieveImageData = async (page, imageName, pageNum) => {
139
+ if (imageName.startsWith("g_")) {
140
+ try {
141
+ const imageData = page.commonObjs.get(imageName);
142
+ if (imageData) {
143
+ return imageData;
144
+ }
145
+ } catch (error) {
146
+ const message = error instanceof Error ? error.message : String(error);
147
+ logger2.warn("Error getting image from commonObjs", { imageName, error: message });
148
+ }
149
+ }
150
+ try {
151
+ const imageData = page.objs.get(imageName);
152
+ if (imageData !== undefined) {
153
+ return imageData;
154
+ }
155
+ } catch (error) {
156
+ const message = error instanceof Error ? error.message : String(error);
157
+ logger2.warn("Sync image get failed, trying async", { imageName, error: message });
158
+ }
159
+ return new Promise((resolve) => {
160
+ let resolved = false;
161
+ let timeoutId = null;
162
+ const cleanup = () => {
163
+ if (timeoutId !== null) {
164
+ clearTimeout(timeoutId);
165
+ timeoutId = null;
166
+ }
167
+ };
168
+ timeoutId = setTimeout(() => {
169
+ if (!resolved) {
170
+ resolved = true;
171
+ cleanup();
172
+ logger2.warn("Image extraction timeout", { imageName, pageNum });
173
+ resolve(null);
174
+ }
175
+ }, 1e4);
176
+ try {
177
+ page.objs.get(imageName, (imageData) => {
178
+ if (!resolved) {
179
+ resolved = true;
180
+ cleanup();
181
+ resolve(imageData);
182
+ }
183
+ });
184
+ } catch (error) {
185
+ if (!resolved) {
186
+ resolved = true;
187
+ cleanup();
188
+ const message = error instanceof Error ? error.message : String(error);
189
+ logger2.warn("Error in async image get", { imageName, error: message });
190
+ resolve(null);
191
+ }
192
+ }
193
+ });
194
+ };
47
195
  var extractMetadataAndPageCount = async (pdfDocument, includeMetadata, includePageCount) => {
48
196
  const output = {};
49
197
  if (includePageCount) {
@@ -69,7 +217,8 @@ var extractMetadataAndPageCount = async (pdfDocument, includeMetadata, includePa
69
217
  output.metadata = metadataRecord;
70
218
  }
71
219
  } catch (metaError) {
72
- console.warn(`[PDF Reader MCP] Error extracting metadata: ${metaError instanceof Error ? metaError.message : String(metaError)}`);
220
+ const message = metaError instanceof Error ? metaError.message : String(metaError);
221
+ logger2.warn("Error extracting metadata", { error: message });
73
222
  }
74
223
  }
75
224
  return output;
@@ -118,11 +267,10 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
118
267
  imageIndices.push(i);
119
268
  }
120
269
  }
121
- const imagePromises = imageIndices.map((imgIndex, arrayIndex) => new Promise((resolve) => {
270
+ const imagePromises = imageIndices.map(async (imgIndex, arrayIndex) => {
122
271
  const argsArray = operatorList.argsArray[imgIndex];
123
272
  if (!argsArray || argsArray.length === 0) {
124
- resolve(null);
125
- return;
273
+ return null;
126
274
  }
127
275
  const imageName = argsArray[0];
128
276
  let yPosition = 0;
@@ -133,77 +281,28 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
133
281
  yPosition = Math.round(yCoord);
134
282
  }
135
283
  }
136
- const processImageData = (imageData) => {
137
- if (!imageData || typeof imageData !== "object") {
138
- return null;
139
- }
140
- const img = imageData;
141
- if (!img.data || !img.width || !img.height) {
142
- return null;
143
- }
144
- const channels = img.kind === 1 ? 1 : img.kind === 3 ? 4 : 3;
145
- const format = img.kind === 1 ? "grayscale" : img.kind === 3 ? "rgba" : "rgb";
146
- const pngBase64 = encodePixelsToPNG(img.data, img.width, img.height, channels);
284
+ const imageData = await retrieveImageData(page, imageName, pageNum);
285
+ const extractedImage = processImageData(imageData, pageNum, arrayIndex);
286
+ if (extractedImage) {
147
287
  return {
148
288
  type: "image",
149
289
  yPosition,
150
- imageData: {
151
- page: pageNum,
152
- index: arrayIndex,
153
- width: img.width,
154
- height: img.height,
155
- format,
156
- data: pngBase64
157
- }
290
+ imageData: extractedImage
158
291
  };
159
- };
160
- if (imageName.startsWith("g_")) {
161
- try {
162
- const imageData = page.commonObjs.get(imageName);
163
- if (imageData) {
164
- const result = processImageData(imageData);
165
- resolve(result);
166
- return;
167
- }
168
- } catch (error) {
169
- const message = error instanceof Error ? error.message : String(error);
170
- console.warn(`[PDF Reader MCP] Error getting image from commonObjs ${imageName}: ${message}`);
171
- }
172
- }
173
- try {
174
- const imageData = page.objs.get(imageName);
175
- if (imageData !== undefined) {
176
- const result = processImageData(imageData);
177
- resolve(result);
178
- return;
179
- }
180
- } catch (error) {
181
- const message = error instanceof Error ? error.message : String(error);
182
- console.warn(`[PDF Reader MCP] Sync image get failed for ${imageName}, trying async: ${message}`);
183
292
  }
184
- let resolved = false;
185
- const timeout = setTimeout(() => {
186
- if (!resolved) {
187
- resolved = true;
188
- console.warn(`[PDF Reader MCP] Image extraction timeout for ${imageName} on page ${String(pageNum)}`);
189
- resolve(null);
190
- }
191
- }, 1e4);
192
- page.objs.get(imageName, (imageData) => {
193
- if (!resolved) {
194
- resolved = true;
195
- clearTimeout(timeout);
196
- const result = processImageData(imageData);
197
- resolve(result);
198
- }
199
- });
200
- }));
293
+ return null;
294
+ });
201
295
  const resolvedImages = await Promise.all(imagePromises);
202
- contentItems.push(...resolvedImages.filter((item) => item !== null));
296
+ const validImages = resolvedImages.filter((item) => item !== null);
297
+ contentItems.push(...validImages);
203
298
  }
204
299
  } catch (error) {
205
300
  const message = error instanceof Error ? error.message : String(error);
206
- console.warn(`[PDF Reader MCP] Error extracting page content for page ${String(pageNum)} in ${sourceDescription}: ${message}`);
301
+ logger2.warn("Error extracting page content", {
302
+ pageNum,
303
+ sourceDescription,
304
+ error: message
305
+ });
207
306
  return [
208
307
  {
209
308
  type: "text",
@@ -217,62 +316,82 @@ var extractPageContent = async (pdfDocument, pageNum, includeImages, sourceDescr
217
316
 
218
317
  // src/pdf/loader.ts
219
318
  import fs from "node:fs/promises";
220
- import { ErrorCode as ErrorCode2, McpError as McpError2 } from "@modelcontextprotocol/sdk/types.js";
221
319
  import { getDocument } from "pdfjs-dist/legacy/build/pdf.mjs";
222
320
 
321
+ // src/utils/errors.ts
322
+ class PdfError extends Error {
323
+ code;
324
+ constructor(code, message, options) {
325
+ super(message, options?.cause ? { cause: options.cause } : undefined);
326
+ this.code = code;
327
+ this.name = "PdfError";
328
+ }
329
+ }
330
+
223
331
  // src/utils/pathUtils.ts
332
+ import os from "node:os";
224
333
  import path from "node:path";
225
- import { ErrorCode, McpError } from "@modelcontextprotocol/sdk/types.js";
226
334
  var PROJECT_ROOT = process.cwd();
335
+ var ALLOWED_ROOTS = [PROJECT_ROOT, os.homedir()];
227
336
  var resolvePath = (userPath) => {
228
337
  if (typeof userPath !== "string") {
229
- throw new McpError(ErrorCode.InvalidParams, "Path must be a string.");
338
+ throw new PdfError(-32602 /* InvalidParams */, "Path must be a string.");
230
339
  }
231
340
  const normalizedUserPath = path.normalize(userPath);
232
- if (path.isAbsolute(normalizedUserPath)) {
233
- return normalizedUserPath;
341
+ const resolvedPath = path.isAbsolute(normalizedUserPath) ? normalizedUserPath : path.resolve(PROJECT_ROOT, normalizedUserPath);
342
+ const isWithinAllowedRoot = ALLOWED_ROOTS.some((allowedRoot) => {
343
+ const relativePath = path.relative(allowedRoot, resolvedPath);
344
+ return relativePath !== "" && !relativePath.startsWith("..") && !path.isAbsolute(relativePath);
345
+ });
346
+ if (!isWithinAllowedRoot) {
347
+ throw new PdfError(-32602 /* InvalidParams */, "Access denied: Path resolves outside allowed directories.");
234
348
  }
235
- return path.resolve(PROJECT_ROOT, normalizedUserPath);
349
+ return resolvedPath;
236
350
  };
237
351
 
238
352
  // src/pdf/loader.ts
353
+ var logger3 = createLogger("Loader");
354
+ var MAX_PDF_SIZE = 100 * 1024 * 1024;
239
355
  var loadPdfDocument = async (source, sourceDescription) => {
240
356
  let pdfDataSource;
241
357
  try {
242
358
  if (source.path) {
243
359
  const safePath = resolvePath(source.path);
244
360
  const buffer = await fs.readFile(safePath);
361
+ if (buffer.length > MAX_PDF_SIZE) {
362
+ throw new PdfError(-32600 /* InvalidRequest */, `PDF file exceeds maximum size of ${MAX_PDF_SIZE} bytes (${(MAX_PDF_SIZE / 1024 / 1024).toFixed(0)}MB). File size: ${buffer.length} bytes.`);
363
+ }
245
364
  pdfDataSource = new Uint8Array(buffer);
246
365
  } else if (source.url) {
247
366
  pdfDataSource = { url: source.url };
248
367
  } else {
249
- throw new McpError2(ErrorCode2.InvalidParams, `Source ${sourceDescription} missing 'path' or 'url'.`);
368
+ throw new PdfError(-32602 /* InvalidParams */, `Source ${sourceDescription} missing 'path' or 'url'.`);
250
369
  }
251
370
  } catch (err) {
252
- if (err instanceof McpError2) {
371
+ if (err instanceof PdfError) {
253
372
  throw err;
254
373
  }
255
374
  const message = err instanceof Error ? err.message : String(err);
256
- const errorCode = ErrorCode2.InvalidRequest;
375
+ const errorCode = -32600 /* InvalidRequest */;
257
376
  if (typeof err === "object" && err !== null && "code" in err && err.code === "ENOENT" && source.path) {
258
- throw new McpError2(errorCode, `File not found at '${source.path}'.`, {
377
+ throw new PdfError(errorCode, `File not found at '${source.path}'.`, {
259
378
  cause: err instanceof Error ? err : undefined
260
379
  });
261
380
  }
262
- throw new McpError2(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
381
+ throw new PdfError(errorCode, `Failed to prepare PDF source ${sourceDescription}. Reason: ${message}`, { cause: err instanceof Error ? err : undefined });
263
382
  }
264
383
  const loadingTask = getDocument(pdfDataSource);
265
384
  try {
266
385
  return await loadingTask.promise;
267
386
  } catch (err) {
268
- console.error(`[PDF Reader MCP] PDF.js loading error for ${sourceDescription}:`, err);
269
387
  const message = err instanceof Error ? err.message : String(err);
270
- throw new McpError2(ErrorCode2.InvalidRequest, `Failed to load PDF document from ${sourceDescription}. Reason: ${message || "Unknown loading error"}`, { cause: err instanceof Error ? err : undefined });
388
+ logger3.error("PDF.js loading error", { sourceDescription, error: message });
389
+ throw new PdfError(-32600 /* InvalidRequest */, `Failed to load PDF document from ${sourceDescription}. Reason: ${message || "Unknown loading error"}`, { cause: err instanceof Error ? err : undefined });
271
390
  }
272
391
  };
273
392
 
274
393
  // src/pdf/parser.ts
275
- import { ErrorCode as ErrorCode3, McpError as McpError3 } from "@modelcontextprotocol/sdk/types.js";
394
+ var logger4 = createLogger("Parser");
276
395
  var MAX_RANGE_SIZE = 1e4;
277
396
  var parseRangePart = (part, pages) => {
278
397
  const trimmedPart = part.trim();
@@ -290,7 +409,7 @@ var parseRangePart = (part, pages) => {
290
409
  pages.add(i);
291
410
  }
292
411
  if (end === Infinity && practicalEnd === start + MAX_RANGE_SIZE) {
293
- console.warn(`[PDF Reader MCP] Open-ended range starting at ${String(start)} was truncated at page ${String(practicalEnd)}.`);
412
+ logger4.warn("Open-ended range truncated", { start, practicalEnd });
294
413
  }
295
414
  } else {
296
415
  const page = parseInt(trimmedPart, 10);
@@ -329,7 +448,7 @@ var getTargetPages = (sourcePages, sourceDescription) => {
329
448
  return uniquePages;
330
449
  } catch (error) {
331
450
  const message = error instanceof Error ? error.message : String(error);
332
- throw new McpError3(ErrorCode3.InvalidParams, `Invalid page specification for source ${sourceDescription}: ${message}`);
451
+ throw new PdfError(-32602 /* InvalidParams */, `Invalid page specification for source ${sourceDescription}: ${message}`);
333
452
  }
334
453
  };
335
454
  var determinePagesToProcess = (targetPages, totalPages, includeFullText) => {
@@ -369,13 +488,15 @@ var readPdfArgsSchema = z.object({
369
488
  }).strict();
370
489
 
371
490
  // src/handlers/readPdf.ts
491
+ var logger5 = createLogger("ReadPdf");
372
492
  var processSingleSource = async (source, options) => {
373
493
  const sourceDescription = source.path ?? source.url ?? "unknown source";
374
494
  let individualResult = { source: sourceDescription, success: false };
495
+ let pdfDocument = null;
375
496
  try {
376
497
  const targetPages = getTargetPages(source.pages, sourceDescription);
377
498
  const { pages: _pages, ...loadArgs } = source;
378
- const pdfDocument = await loadPdfDocument(loadArgs, sourceDescription);
499
+ pdfDocument = await loadPdfDocument(loadArgs, sourceDescription);
379
500
  const totalPages = pdfDocument.numPages;
380
501
  const metadataOutput = await extractMetadataAndPageCount(pdfDocument, options.includeMetadata, options.includePageCount);
381
502
  const output = { ...metadataOutput };
@@ -411,9 +532,7 @@ var processSingleSource = async (source, options) => {
411
532
  individualResult = { ...individualResult, data: output, success: true };
412
533
  } catch (error) {
413
534
  let errorMessage = `Failed to process PDF from ${sourceDescription}.`;
414
- if (error instanceof McpError4) {
415
- errorMessage = error.message;
416
- } else if (error instanceof Error) {
535
+ if (error instanceof Error) {
417
536
  errorMessage += ` Reason: ${error.message}`;
418
537
  } else {
419
538
  errorMessage += ` Unknown error: ${JSON.stringify(error)}`;
@@ -421,27 +540,38 @@ var processSingleSource = async (source, options) => {
421
540
  individualResult.error = errorMessage;
422
541
  individualResult.success = false;
423
542
  individualResult.data = undefined;
543
+ } finally {
544
+ if (pdfDocument && typeof pdfDocument.destroy === "function") {
545
+ try {
546
+ await pdfDocument.destroy();
547
+ } catch (destroyError) {
548
+ const message = destroyError instanceof Error ? destroyError.message : String(destroyError);
549
+ logger5.warn("Error destroying PDF document", { sourceDescription, error: message });
550
+ }
551
+ }
424
552
  }
425
553
  return individualResult;
426
554
  };
427
- var handleReadPdfFunc = async (args) => {
428
- let parsedArgs;
429
- try {
430
- parsedArgs = readPdfArgsSchema.parse(args);
431
- } catch (error) {
432
- if (error instanceof z2.ZodError) {
433
- throw new McpError4(ErrorCode4.InvalidParams, `Invalid arguments: ${error.issues.map((e) => `${e.path.join(".")} (${e.message})`).join(", ")}`);
434
- }
435
- const message = error instanceof Error ? error.message : String(error);
436
- throw new McpError4(ErrorCode4.InvalidParams, `Argument validation failed: ${message}`);
437
- }
438
- const { sources, include_full_text, include_metadata, include_page_count, include_images } = parsedArgs;
439
- const results = await Promise.all(sources.map((source) => processSingleSource(source, {
440
- includeFullText: include_full_text,
441
- includeMetadata: include_metadata,
442
- includePageCount: include_page_count,
443
- includeImages: include_images
444
- })));
555
+ var readPdf = tool().description("Reads content/metadata/images from one or more PDFs (local/URL). Each source can specify pages to extract.").input(readPdfArgsSchema).handler(async ({ input }) => {
556
+ const { sources, include_full_text, include_metadata, include_page_count, include_images } = input;
557
+ const MAX_CONCURRENT_SOURCES = 3;
558
+ const results = [];
559
+ const options = {
560
+ includeFullText: include_full_text ?? false,
561
+ includeMetadata: include_metadata ?? true,
562
+ includePageCount: include_page_count ?? true,
563
+ includeImages: include_images ?? false
564
+ };
565
+ for (let i = 0;i < sources.length; i += MAX_CONCURRENT_SOURCES) {
566
+ const batch = sources.slice(i, i + MAX_CONCURRENT_SOURCES);
567
+ const batchResults = await Promise.all(batch.map((source) => processSingleSource(source, options)));
568
+ results.push(...batchResults);
569
+ }
570
+ const allFailed = results.every((r) => !r.success);
571
+ if (allFailed) {
572
+ const errorMessages = results.map((r) => r.error).join("; ");
573
+ return toolError(`All PDF sources failed to process: ${errorMessages}`);
574
+ }
445
575
  const content = [];
446
576
  const resultsForJson = results.map((result) => {
447
577
  if (result.data) {
@@ -460,71 +590,33 @@ var handleReadPdfFunc = async (args) => {
460
590
  }
461
591
  return result;
462
592
  });
463
- content.push({
464
- type: "text",
465
- text: JSON.stringify({ results: resultsForJson }, null, 2)
466
- });
593
+ content.push(text(JSON.stringify({ results: resultsForJson }, null, 2)));
467
594
  for (const result of results) {
468
595
  if (!result.success || !result.data?.page_contents)
469
596
  continue;
470
597
  for (const pageContent of result.data.page_contents) {
471
598
  for (const item of pageContent.items) {
472
599
  if (item.type === "text" && item.textContent) {
473
- content.push({
474
- type: "text",
475
- text: item.textContent
476
- });
600
+ content.push(text(item.textContent));
477
601
  } else if (item.type === "image" && item.imageData) {
478
- content.push({
479
- type: "image",
480
- data: item.imageData.data,
481
- mimeType: "image/png"
482
- });
602
+ content.push(image(item.imageData.data, "image/png"));
483
603
  }
484
604
  }
485
605
  }
486
606
  }
487
- return { content };
488
- };
489
- var readPdfToolDefinition = {
490
- name: "read_pdf",
491
- description: "Reads content/metadata/images from one or more PDFs (local/URL). Each source can specify pages to extract.",
492
- schema: readPdfArgsSchema,
493
- handler: handleReadPdfFunc
494
- };
495
-
496
- // src/handlers/index.ts
497
- var allToolDefinitions = [readPdfToolDefinition];
607
+ return content;
608
+ });
498
609
 
499
610
  // src/index.ts
500
- var server = new Server({
611
+ var server = createServer({
501
612
  name: "pdf-reader-mcp",
502
613
  version: "1.3.0",
503
- description: "MCP Server for reading PDF files and extracting text, metadata, images, and page information."
504
- }, {
505
- capabilities: { tools: {} }
506
- });
507
- var generateInputSchema = (schema) => {
508
- return zodToJsonSchema(schema, { target: "openApi3" });
509
- };
510
- server.setRequestHandler(ListToolsRequestSchema, () => {
511
- const availableTools = allToolDefinitions.map((def) => ({
512
- name: def.name,
513
- description: def.description,
514
- inputSchema: generateInputSchema(def.schema)
515
- }));
516
- return { tools: availableTools };
517
- });
518
- server.setRequestHandler(CallToolRequestSchema, async (request) => {
519
- const toolDefinition = allToolDefinitions.find((def) => def.name === request.params.name);
520
- if (!toolDefinition) {
521
- throw new McpError5(ErrorCode5.MethodNotFound, `Unknown tool: ${request.params.name}`);
522
- }
523
- return toolDefinition.handler(request.params.arguments);
614
+ instructions: "MCP Server for reading PDF files and extracting text, metadata, images, and page information.",
615
+ tools: { read_pdf: readPdf },
616
+ transport: stdio()
524
617
  });
525
618
  async function main() {
526
- const transport = new StdioServerTransport;
527
- await server.connect(transport);
619
+ await server.start();
528
620
  if (process.env.DEBUG_MCP) {
529
621
  console.error("[PDF Reader MCP] Server running on stdio");
530
622
  console.error("[PDF Reader MCP] Project root:", process.cwd());
package/package.json CHANGED
@@ -1,110 +1,106 @@
1
1
  {
2
- "name": "@sylphx/pdf-reader-mcp",
3
- "version": "1.3.2",
4
- "description": "An MCP server providing tools to read PDF files.",
5
- "type": "module",
6
- "bin": {
7
- "pdf-reader-mcp": "./dist/index.js"
8
- },
9
- "files": [
10
- "dist/",
11
- "README.md",
12
- "LICENSE"
13
- ],
14
- "publishConfig": {
15
- "access": "public"
16
- },
17
- "engines": {
18
- "node": ">=22.0.0"
19
- },
20
- "repository": {
21
- "type": "git",
22
- "url": "git+https://github.com/SylphxAI/pdf-reader-mcp.git"
23
- },
24
- "bugs": {
25
- "url": "https://github.com/SylphxAI/pdf-reader-mcp/issues"
26
- },
27
- "homepage": "https://github.com/SylphxAI/pdf-reader-mcp#readme",
28
- "author": "Sylphx <contact@sylphx.com> (https://sylphx.com)",
29
- "license": "MIT",
30
- "keywords": [
31
- "mcp",
32
- "model-context-protocol",
33
- "pdf",
34
- "reader",
35
- "parser",
36
- "typescript",
37
- "node",
38
- "ai",
39
- "agent",
40
- "tool"
41
- ],
42
- "scripts": {
43
- "build": "bunup",
44
- "watch": "tsc --watch",
45
- "inspector": "npx @modelcontextprotocol/inspector dist/index.js",
46
- "test": "vitest run",
47
- "test:watch": "vitest watch",
48
- "test:cov": "vitest run --coverage",
49
- "lint": "biome lint .",
50
- "lint:fix": "biome lint --write .",
51
- "format": "biome format --write .",
52
- "check-format": "biome format .",
53
- "check": "biome check .",
54
- "check:fix": "biome check --write .",
55
- "validate": "bun run check && bun run test",
56
- "docs:dev": "vitepress dev docs",
57
- "docs:build": "vitepress build docs",
58
- "docs:preview": "vitepress preview docs",
59
- "start": "node dist/index.js",
60
- "typecheck": "tsc --noEmit",
61
- "benchmark": "vitest bench",
62
- "clean": "rm -rf dist coverage",
63
- "docs:api": "typedoc --entryPoints src/index.ts --tsconfig tsconfig.json --plugin typedoc-plugin-markdown --out docs/api --readme none",
64
- "prepublishOnly": "bun run clean && bun run build",
65
- "release": "standard-version",
66
- "prepare": "husky",
67
- "changeset": "changeset",
68
- "version-packages": "changeset version",
69
- "release:new": "bun run build && changeset publish"
70
- },
71
- "dependencies": {
72
- "@modelcontextprotocol/sdk": "^1.21.0",
73
- "glob": "^11.0.1",
74
- "pdfjs-dist": "^5.4.296",
75
- "pngjs": "^7.0.0",
76
- "zod": "^3.25.76",
77
- "zod-to-json-schema": "^3.24.6"
78
- },
79
- "devDependencies": {
80
- "@biomejs/biome": "^2.3.2",
81
- "@changesets/cli": "^2.29.7",
82
- "@commitlint/cli": "^20.1.0",
83
- "@commitlint/config-conventional": "^20.0.0",
84
- "@types/glob": "^8.1.0",
85
- "@types/node": "^24.0.7",
86
- "@types/pngjs": "^6.0.5",
87
- "@vitest/coverage-v8": "^4.0.8",
88
- "bunup": "^0.16.10",
89
- "husky": "^9.1.7",
90
- "lint-staged": "^16.2.6",
91
- "standard-version": "^9.5.0",
92
- "typedoc": "^0.28.2",
93
- "typedoc-plugin-markdown": "^4.9.0",
94
- "typescript": "^5.8.3",
95
- "vitepress": "^1.6.3",
96
- "vitest": "^4.0.7",
97
- "vue": "^3.5.13"
98
- },
99
- "commitlint": {
100
- "extends": [
101
- "@commitlint/config-conventional"
102
- ]
103
- },
104
- "lint-staged": {
105
- "*.{ts,tsx,js,cjs,json}": [
106
- "biome check --write --no-errors-on-unmatched --files-ignore-unknown=true"
107
- ]
108
- },
109
- "packageManager": "bun@1.3.1"
2
+ "name": "@sylphx/pdf-reader-mcp",
3
+ "version": "2.0.0",
4
+ "description": "An MCP server providing tools to read PDF files.",
5
+ "type": "module",
6
+ "bin": {
7
+ "pdf-reader-mcp": "./dist/index.js"
8
+ },
9
+ "files": [
10
+ "dist/",
11
+ "README.md",
12
+ "LICENSE"
13
+ ],
14
+ "exports": {
15
+ ".": {
16
+ "import": "./dist/index.js",
17
+ "types": "./dist/index.d.ts"
18
+ }
19
+ },
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "engines": {
24
+ "node": ">=22.0.0"
25
+ },
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "git+https://github.com/SylphxAI/pdf-reader-mcp.git"
29
+ },
30
+ "bugs": {
31
+ "url": "https://github.com/SylphxAI/pdf-reader-mcp/issues"
32
+ },
33
+ "homepage": "https://github.com/SylphxAI/pdf-reader-mcp#readme",
34
+ "author": "Sylphx <contact@sylphx.com> (https://sylphx.com)",
35
+ "license": "MIT",
36
+ "keywords": [
37
+ "mcp",
38
+ "model-context-protocol",
39
+ "pdf",
40
+ "reader",
41
+ "parser",
42
+ "typescript",
43
+ "node",
44
+ "ai",
45
+ "agent",
46
+ "tool"
47
+ ],
48
+ "scripts": {
49
+ "build": "bunup",
50
+ "watch": "tsc --watch",
51
+ "inspector": "npx @modelcontextprotocol/inspector dist/index.js",
52
+ "test": "bun test",
53
+ "test:watch": "bun test --watch",
54
+ "test:cov": "bun test --coverage",
55
+ "lint": "biome lint .",
56
+ "lint:fix": "biome lint --write .",
57
+ "format": "biome format --write .",
58
+ "check-format": "biome format .",
59
+ "check": "biome check .",
60
+ "check:fix": "biome check --write .",
61
+ "validate": "bun run check && bun run test",
62
+ "docs:dev": "leaf dev docs",
63
+ "docs:build": "leaf build docs",
64
+ "docs:preview": "leaf preview docs",
65
+ "start": "node dist/index.js",
66
+ "typecheck": "tsc --noEmit",
67
+ "benchmark": "bun bench",
68
+ "clean": "rm -rf dist coverage",
69
+ "docs:api": "typedoc --entryPoints src/index.ts --tsconfig tsconfig.json --plugin typedoc-plugin-markdown --out docs/api --readme none",
70
+ "prepublishOnly": "bunx @sylphx/doctor prepublish && bun run clean && bun run build",
71
+ "release": "standard-version",
72
+ "prepare": "node_modules/.bin/lefthook install || true"
73
+ },
74
+ "dependencies": {
75
+ "@sylphx/mcp-server-sdk": "1.1.1",
76
+ "glob": "^11.1.0",
77
+ "pdfjs-dist": "^5.4.394",
78
+ "pngjs": "^7.0.0",
79
+ "zod": "4.2.0-canary.20251124T022609",
80
+ "zod-to-json-schema": "^3.25.0"
81
+ },
82
+ "devDependencies": {
83
+ "@biomejs/biome": "^2.3.8",
84
+ "@solidjs/router": "^0.15.4",
85
+ "@sylphx/biome-config": "^0.4.0",
86
+ "@sylphx/bump": "^0.12.1",
87
+ "@sylphx/doctor": "^1.23.3",
88
+ "@sylphx/leaf": "^1.0.0",
89
+ "@sylphx/leaf-theme-default": "^1.0.0",
90
+ "@sylphx/tsconfig": "^0.3.0",
91
+ "@types/glob": "^8.1.0",
92
+ "@types/node": "^24.10.1",
93
+ "@types/pngjs": "^6.0.5",
94
+ "bunup": "^0.16.10",
95
+ "lefthook": "^2.0.4",
96
+ "solid-js": "^1.9.10",
97
+ "typedoc": "^0.28.14",
98
+ "typedoc-plugin-markdown": "^4.9.0",
99
+ "typescript": "^5.9.3",
100
+ "vite": "^7.2.4"
101
+ },
102
+ "packageManager": "bun@1.3.1",
103
+ "overrides": {
104
+ "js-yaml": "^4.1.0"
105
+ }
110
106
  }