@sylphx/pdf-reader-mcp 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sylphx/pdf-reader-mcp",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "An MCP server providing tools to read PDF files.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -11,6 +11,12 @@
11
11
  "README.md",
12
12
  "LICENSE"
13
13
  ],
14
+ "exports": {
15
+ ".": {
16
+ "import": "./dist/index.js",
17
+ "types": "./dist/index.d.ts"
18
+ }
19
+ },
14
20
  "publishConfig": {
15
21
  "access": "public"
16
22
  },
@@ -19,12 +25,12 @@
19
25
  },
20
26
  "repository": {
21
27
  "type": "git",
22
- "url": "git+https://github.com/sylphlab/pdf-reader-mcp.git"
28
+ "url": "git+https://github.com/SylphxAI/pdf-reader-mcp.git"
23
29
  },
24
30
  "bugs": {
25
- "url": "https://github.com/sylphlab/pdf-reader-mcp/issues"
31
+ "url": "https://github.com/SylphxAI/pdf-reader-mcp/issues"
26
32
  },
27
- "homepage": "https://github.com/sylphlab/pdf-reader-mcp#readme",
33
+ "homepage": "https://github.com/SylphxAI/pdf-reader-mcp#readme",
28
34
  "author": "Sylphx <contact@sylphx.com> (https://sylphx.com)",
29
35
  "license": "MIT",
30
36
  "keywords": [
@@ -40,65 +46,61 @@
40
46
  "tool"
41
47
  ],
42
48
  "scripts": {
43
- "build": "tsc",
49
+ "build": "bunup",
44
50
  "watch": "tsc --watch",
45
51
  "inspector": "npx @modelcontextprotocol/inspector dist/index.js",
46
- "test": "vitest run",
47
- "test:watch": "vitest watch",
48
- "test:cov": "vitest run --coverage --reporter=junit --outputFile=test-report.junit.xml",
52
+ "test": "bun test",
53
+ "test:watch": "bun test --watch",
54
+ "test:cov": "bun test --coverage",
49
55
  "lint": "biome lint .",
50
56
  "lint:fix": "biome lint --write .",
51
57
  "format": "biome format --write .",
52
58
  "check-format": "biome format .",
53
59
  "check": "biome check .",
54
60
  "check:fix": "biome check --write .",
55
- "validate": "npm run check && npm run test",
56
- "docs:dev": "vitepress dev docs",
57
- "docs:build": "vitepress build docs",
58
- "docs:preview": "vitepress preview docs",
61
+ "validate": "bun run check && bun run test",
62
+ "docs:dev": "leaf dev docs",
63
+ "docs:build": "leaf build docs",
64
+ "docs:preview": "leaf preview docs",
59
65
  "start": "node dist/index.js",
60
66
  "typecheck": "tsc --noEmit",
61
- "benchmark": "vitest bench",
67
+ "benchmark": "bun bench",
62
68
  "clean": "rm -rf dist coverage",
63
69
  "docs:api": "typedoc --entryPoints src/index.ts --tsconfig tsconfig.json --plugin typedoc-plugin-markdown --out docs/api --readme none",
64
- "prepublishOnly": "pnpm run clean && pnpm run build",
70
+ "prepublishOnly": "bunx @sylphx/doctor prepublish && bun run clean && bun run build",
65
71
  "release": "standard-version",
66
- "prepare": "husky"
72
+ "prepare": "node_modules/.bin/lefthook install || true"
67
73
  },
68
74
  "dependencies": {
69
- "@modelcontextprotocol/sdk": "^1.21.0",
70
- "glob": "^11.0.1",
71
- "pdfjs-dist": "^5.4.296",
75
+ "@sylphx/mcp-server-sdk": "1.0.0",
76
+ "glob": "^11.1.0",
77
+ "pdfjs-dist": "^5.4.394",
72
78
  "pngjs": "^7.0.0",
73
- "zod": "^3.25.76",
74
- "zod-to-json-schema": "^3.24.6"
79
+ "zod": "4.2.0-canary.20251124T022609",
80
+ "zod-to-json-schema": "^3.25.0"
75
81
  },
76
82
  "devDependencies": {
77
- "@biomejs/biome": "^2.3.2",
78
- "@commitlint/cli": "^20.1.0",
79
- "@commitlint/config-conventional": "^20.0.0",
83
+ "@biomejs/biome": "^2.3.8",
84
+ "@solidjs/router": "^0.15.4",
85
+ "@sylphx/biome-config": "^0.4.0",
86
+ "@sylphx/bump": "^0.12.1",
87
+ "@sylphx/doctor": "^1.23.3",
88
+ "@sylphx/leaf": "^1.0.0",
89
+ "@sylphx/leaf-theme-default": "^1.0.0",
90
+ "@sylphx/tsconfig": "^0.3.0",
80
91
  "@types/glob": "^8.1.0",
81
- "@types/node": "^24.0.7",
92
+ "@types/node": "^24.10.1",
82
93
  "@types/pngjs": "^6.0.5",
83
- "@vitest/coverage-v8": "^4.0.7",
84
- "husky": "^9.1.7",
85
- "lint-staged": "^16.2.6",
86
- "standard-version": "^9.5.0",
87
- "typedoc": "^0.28.2",
94
+ "bunup": "^0.16.10",
95
+ "lefthook": "^2.0.4",
96
+ "solid-js": "^1.9.10",
97
+ "typedoc": "^0.28.14",
88
98
  "typedoc-plugin-markdown": "^4.9.0",
89
- "typescript": "^5.8.3",
90
- "vitepress": "^1.6.3",
91
- "vitest": "^4.0.7",
92
- "vue": "^3.5.13"
93
- },
94
- "commitlint": {
95
- "extends": [
96
- "@commitlint/config-conventional"
97
- ]
99
+ "typescript": "^5.9.3",
100
+ "vite": "^7.2.4"
98
101
  },
99
- "lint-staged": {
100
- "*.{ts,tsx,js,cjs,json}": [
101
- "biome check --write --no-errors-on-unmatched --files-ignore-unknown=true"
102
- ]
102
+ "packageManager": "bun@1.3.1",
103
+ "overrides": {
104
+ "js-yaml": "^4.1.0"
103
105
  }
104
106
  }
@@ -1,4 +0,0 @@
1
- // Import only the consolidated PDF tool definition
2
- import { readPdfToolDefinition } from './readPdf.js';
3
- // Aggregate only the consolidated PDF tool definition
4
- export const allToolDefinitions = [readPdfToolDefinition];
@@ -1,170 +0,0 @@
1
- // PDF reading handler - orchestrates PDF processing workflow
2
- import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
3
- import { z } from 'zod';
4
- import { buildWarnings, extractMetadataAndPageCount, extractPageContent, } from '../pdf/extractor.js';
5
- import { loadPdfDocument } from '../pdf/loader.js';
6
- import { determinePagesToProcess, getTargetPages } from '../pdf/parser.js';
7
- import { readPdfArgsSchema } from '../schemas/readPdf.js';
8
- /**
9
- * Process a single PDF source
10
- */
11
- const processSingleSource = async (source, options) => {
12
- const sourceDescription = source.path ?? source.url ?? 'unknown source';
13
- let individualResult = { source: sourceDescription, success: false };
14
- try {
15
- // Parse target pages
16
- const targetPages = getTargetPages(source.pages, sourceDescription);
17
- // Load PDF document
18
- const { pages: _pages, ...loadArgs } = source;
19
- const pdfDocument = await loadPdfDocument(loadArgs, sourceDescription);
20
- const totalPages = pdfDocument.numPages;
21
- // Extract metadata and page count
22
- const metadataOutput = await extractMetadataAndPageCount(pdfDocument, options.includeMetadata, options.includePageCount);
23
- const output = { ...metadataOutput };
24
- // Determine pages to process
25
- const { pagesToProcess, invalidPages } = determinePagesToProcess(targetPages, totalPages, options.includeFullText);
26
- // Add warnings for invalid pages
27
- const warnings = buildWarnings(invalidPages, totalPages);
28
- if (warnings.length > 0) {
29
- output.warnings = warnings;
30
- }
31
- // Extract content with ordering preserved
32
- if (pagesToProcess.length > 0) {
33
- // Use new extractPageContent to preserve Y-coordinate ordering
34
- const pageContents = await Promise.all(pagesToProcess.map((pageNum) => extractPageContent(pdfDocument, pageNum, options.includeImages, sourceDescription)));
35
- // Store page contents for ordered retrieval
36
- output.page_contents = pageContents.map((items, idx) => ({
37
- page: pagesToProcess[idx],
38
- items,
39
- }));
40
- // For backward compatibility, also provide text-only outputs
41
- const extractedPageTexts = pageContents.map((items, idx) => ({
42
- page: pagesToProcess[idx],
43
- text: items
44
- .filter((item) => item.type === 'text')
45
- .map((item) => item.textContent)
46
- .join(''),
47
- }));
48
- if (targetPages) {
49
- // Specific pages requested
50
- output.page_texts = extractedPageTexts;
51
- }
52
- else {
53
- // Full text requested
54
- output.full_text = extractedPageTexts.map((p) => p.text).join('\n\n');
55
- }
56
- // Extract image metadata for JSON response
57
- if (options.includeImages) {
58
- const extractedImages = pageContents
59
- .flatMap((items) => items.filter((item) => item.type === 'image' && item.imageData))
60
- .map((item) => item.imageData)
61
- .filter((img) => img !== undefined);
62
- if (extractedImages.length > 0) {
63
- output.images = extractedImages;
64
- }
65
- }
66
- }
67
- individualResult = { ...individualResult, data: output, success: true };
68
- }
69
- catch (error) {
70
- let errorMessage = `Failed to process PDF from ${sourceDescription}.`;
71
- if (error instanceof McpError) {
72
- errorMessage = error.message;
73
- } /* c8 ignore next */
74
- else if (error instanceof Error) {
75
- errorMessage += ` Reason: ${error.message}`;
76
- }
77
- else {
78
- errorMessage += ` Unknown error: ${JSON.stringify(error)}`;
79
- }
80
- individualResult.error = errorMessage;
81
- individualResult.success = false;
82
- individualResult.data = undefined;
83
- }
84
- return individualResult;
85
- };
86
- /**
87
- * Main handler function for read_pdf tool
88
- */
89
- export const handleReadPdfFunc = async (args) => {
90
- let parsedArgs;
91
- try {
92
- parsedArgs = readPdfArgsSchema.parse(args);
93
- }
94
- catch (error) {
95
- if (error instanceof z.ZodError) {
96
- throw new McpError(ErrorCode.InvalidParams, `Invalid arguments: ${error.issues.map((e) => `${e.path.join('.')} (${e.message})`).join(', ')}`);
97
- }
98
- /* c8 ignore next */
99
- const message = error instanceof Error ? error.message : String(error);
100
- /* c8 ignore next */
101
- throw new McpError(ErrorCode.InvalidParams, `Argument validation failed: ${message}`);
102
- }
103
- const { sources, include_full_text, include_metadata, include_page_count, include_images } = parsedArgs;
104
- // Process all sources concurrently
105
- const results = await Promise.all(sources.map((source) => processSingleSource(source, {
106
- includeFullText: include_full_text,
107
- includeMetadata: include_metadata,
108
- includePageCount: include_page_count,
109
- includeImages: include_images,
110
- })));
111
- // Build content parts - start with structured JSON for backward compatibility
112
- const content = [];
113
- // Strip image data and page_contents from JSON to keep it manageable
114
- const resultsForJson = results.map((result) => {
115
- if (result.data) {
116
- const { images, page_contents, ...dataWithoutBinaryContent } = result.data;
117
- // Include image count and metadata in JSON, but not the base64 data
118
- if (images) {
119
- const imageInfo = images.map((img) => ({
120
- page: img.page,
121
- index: img.index,
122
- width: img.width,
123
- height: img.height,
124
- format: img.format,
125
- }));
126
- return { ...result, data: { ...dataWithoutBinaryContent, image_info: imageInfo } };
127
- }
128
- return { ...result, data: dataWithoutBinaryContent };
129
- }
130
- return result;
131
- });
132
- // First content part: Structured JSON results
133
- content.push({
134
- type: 'text',
135
- text: JSON.stringify({ results: resultsForJson }, null, 2),
136
- });
137
- // Add page content in exact Y-coordinate order
138
- for (const result of results) {
139
- if (!result.success || !result.data?.page_contents)
140
- continue;
141
- // Process each page's content items in order
142
- for (const pageContent of result.data.page_contents) {
143
- for (const item of pageContent.items) {
144
- if (item.type === 'text' && item.textContent) {
145
- // Add text content part
146
- content.push({
147
- type: 'text',
148
- text: item.textContent,
149
- });
150
- }
151
- else if (item.type === 'image' && item.imageData) {
152
- // Add image content part (all images are now encoded as PNG)
153
- content.push({
154
- type: 'image',
155
- data: item.imageData.data,
156
- mimeType: 'image/png',
157
- });
158
- }
159
- }
160
- }
161
- }
162
- return { content };
163
- };
164
- // Export the tool definition
165
- export const readPdfToolDefinition = {
166
- name: 'read_pdf',
167
- description: 'Reads content/metadata/images from one or more PDFs (local/URL). Each source can specify pages to extract.',
168
- schema: readPdfArgsSchema,
169
- handler: handleReadPdfFunc,
170
- };