mcp-server-docpipe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +35 -0
- package/dist/index.js.map +1 -0
- package/dist/tools/docx-convert.d.ts +2 -0
- package/dist/tools/docx-convert.js +54 -0
- package/dist/tools/docx-convert.js.map +1 -0
- package/dist/tools/image-process.d.ts +2 -0
- package/dist/tools/image-process.js +107 -0
- package/dist/tools/image-process.js.map +1 -0
- package/dist/tools/pdf-extract.d.ts +2 -0
- package/dist/tools/pdf-extract.js +36 -0
- package/dist/tools/pdf-extract.js.map +1 -0
- package/dist/tools/pdf-merge.d.ts +2 -0
- package/dist/tools/pdf-merge.js +35 -0
- package/dist/tools/pdf-merge.js.map +1 -0
- package/dist/tools/pdf-split.d.ts +2 -0
- package/dist/tools/pdf-split.js +64 -0
- package/dist/tools/pdf-split.js.map +1 -0
- package/package.json +29 -0
- package/src/index.ts +39 -0
- package/src/tools/docx-convert.ts +72 -0
- package/src/tools/image-process.ts +133 -0
- package/src/tools/pdf-extract.ts +45 -0
- package/src/tools/pdf-merge.ts +44 -0
- package/src/tools/pdf-split.ts +78 -0
- package/src/types.d.ts +41 -0
- package/tsconfig.json +16 -0
package/README.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# mcp-server-docpipe
|
|
2
|
+
|
|
3
|
+
MCP server for document processing — PDF, DOCX, and image manipulation powered by AI.
|
|
4
|
+
|
|
5
|
+
## Tools
|
|
6
|
+
|
|
7
|
+
| Tool | Description |
|
|
8
|
+
|------|-------------|
|
|
9
|
+
| `pdf-extract-text` | Extract text and metadata from PDF files |
|
|
10
|
+
| `pdf-merge` | Merge multiple PDFs into one |
|
|
11
|
+
| `pdf-split` | Extract specific pages from a PDF |
|
|
12
|
+
| `docx-to-markdown` | Convert Word documents to Markdown |
|
|
13
|
+
| `docx-to-html` | Convert Word documents to HTML |
|
|
14
|
+
| `image-resize` | Resize images with flexible fit options |
|
|
15
|
+
| `image-compress` | Compress/convert images (WebP, AVIF, JPEG, PNG) |
|
|
16
|
+
| `image-info` | Get image metadata without modification |
|
|
17
|
+
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npx mcp-server-docpipe
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Claude Desktop Configuration
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{
|
|
28
|
+
"mcpServers": {
|
|
29
|
+
"docpipe": {
|
|
30
|
+
"command": "npx",
|
|
31
|
+
"args": ["-y", "mcp-server-docpipe"]
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Claude Code Configuration
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
claude mcp add-json docpipe '{"type":"stdio","command":"npx","args":["-y","mcp-server-docpipe"]}'
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Examples
|
|
44
|
+
|
|
45
|
+
**Extract text from a PDF:**
|
|
46
|
+
> "Extract all text from C:/Documents/report.pdf"
|
|
47
|
+
|
|
48
|
+
**Merge PDFs:**
|
|
49
|
+
> "Merge invoice1.pdf and invoice2.pdf into combined.pdf"
|
|
50
|
+
|
|
51
|
+
**Convert Word to Markdown:**
|
|
52
|
+
> "Convert my-document.docx to markdown format"
|
|
53
|
+
|
|
54
|
+
**Compress an image:**
|
|
55
|
+
> "Compress photo.png to WebP format with 70% quality"
|
|
56
|
+
|
|
57
|
+
## Development
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
npm install
|
|
61
|
+
npm run build
|
|
62
|
+
npm start
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## License
|
|
66
|
+
|
|
67
|
+
MIT
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* mcp-server-docpipe
|
|
4
|
+
* MCP server for document processing:
|
|
5
|
+
* - PDF text extraction, merge, split
|
|
6
|
+
* - DOCX to Markdown conversion
|
|
7
|
+
* - Image resize and compression
|
|
8
|
+
*/
|
|
9
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
10
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
11
|
+
import { registerPdfTools } from './tools/pdf-extract.js';
|
|
12
|
+
import { registerPdfMerge } from './tools/pdf-merge.js';
|
|
13
|
+
import { registerPdfSplit } from './tools/pdf-split.js';
|
|
14
|
+
import { registerDocxConvert } from './tools/docx-convert.js';
|
|
15
|
+
import { registerImageTools } from './tools/image-process.js';
|
|
16
|
+
const server = new McpServer({
|
|
17
|
+
name: 'mcp-server-docpipe',
|
|
18
|
+
version: '1.0.0',
|
|
19
|
+
});
|
|
20
|
+
// Register all tools
|
|
21
|
+
registerPdfTools(server);
|
|
22
|
+
registerPdfMerge(server);
|
|
23
|
+
registerPdfSplit(server);
|
|
24
|
+
registerDocxConvert(server);
|
|
25
|
+
registerImageTools(server);
|
|
26
|
+
async function main() {
|
|
27
|
+
const transport = new StdioServerTransport();
|
|
28
|
+
await server.connect(transport);
|
|
29
|
+
console.error('mcp-server-docpipe started on stdio');
|
|
30
|
+
}
|
|
31
|
+
main().catch((error) => {
|
|
32
|
+
console.error('Server failed to start:', error);
|
|
33
|
+
process.exit(1);
|
|
34
|
+
});
|
|
35
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA;;;;;;GAMG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAC1D,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAC9D,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAE9D,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;IAC3B,IAAI,EAAE,oBAAoB;IAC1B,OAAO,EAAE,OAAO;CACjB,CAAC,CAAC;AAEH,qBAAqB;AACrB,gBAAgB,CAAC,MAAM,CAAC,CAAC;AACzB,gBAAgB,CAAC,MAAM,CAAC,CAAC;AACzB,gBAAgB,CAAC,MAAM,CAAC,CAAC;AACzB,mBAAmB,CAAC,MAAM,CAAC,CAAC;AAC5B,kBAAkB,CAAC,MAAM,CAAC,CAAC;AAE3B,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAC;AACvD,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,yBAAyB,EAAE,KAAK,CAAC,CAAC;IAChD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import * as mammoth from 'mammoth';
|
|
4
|
+
export function registerDocxConvert(server) {
|
|
5
|
+
server.tool('docx-to-markdown', 'Convert a DOCX (Word) file to Markdown format. Preserves headings, lists, bold/italic, tables, and links.', {
|
|
6
|
+
filePath: z.string().describe('Absolute path to the DOCX file'),
|
|
7
|
+
outputPath: z.string().optional().describe('Optional: save Markdown to this path. If omitted, returns content directly.'),
|
|
8
|
+
}, async ({ filePath, outputPath }) => {
|
|
9
|
+
const buffer = await fs.readFile(filePath);
|
|
10
|
+
const result = await mammoth.convertToMarkdown({ buffer });
|
|
11
|
+
const markdown = result.value;
|
|
12
|
+
const warnings = result.messages
|
|
13
|
+
.filter((m) => m.type === 'warning')
|
|
14
|
+
.map((m) => m.message);
|
|
15
|
+
if (outputPath) {
|
|
16
|
+
await fs.writeFile(outputPath, markdown, 'utf-8');
|
|
17
|
+
}
|
|
18
|
+
return {
|
|
19
|
+
content: [{
|
|
20
|
+
type: 'text',
|
|
21
|
+
text: JSON.stringify({
|
|
22
|
+
success: true,
|
|
23
|
+
markdown: outputPath ? `(saved to ${outputPath})` : markdown,
|
|
24
|
+
characterCount: markdown.length,
|
|
25
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
26
|
+
...(outputPath ? { outputPath } : {}),
|
|
27
|
+
}, null, 2),
|
|
28
|
+
}],
|
|
29
|
+
};
|
|
30
|
+
});
|
|
31
|
+
server.tool('docx-to-html', 'Convert a DOCX (Word) file to HTML format.', {
|
|
32
|
+
filePath: z.string().describe('Absolute path to the DOCX file'),
|
|
33
|
+
outputPath: z.string().optional().describe('Optional: save HTML to this path'),
|
|
34
|
+
}, async ({ filePath, outputPath }) => {
|
|
35
|
+
const buffer = await fs.readFile(filePath);
|
|
36
|
+
const result = await mammoth.convertToHtml({ buffer });
|
|
37
|
+
const html = result.value;
|
|
38
|
+
if (outputPath) {
|
|
39
|
+
await fs.writeFile(outputPath, html, 'utf-8');
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
content: [{
|
|
43
|
+
type: 'text',
|
|
44
|
+
text: JSON.stringify({
|
|
45
|
+
success: true,
|
|
46
|
+
html: outputPath ? `(saved to ${outputPath})` : html,
|
|
47
|
+
characterCount: html.length,
|
|
48
|
+
...(outputPath ? { outputPath } : {}),
|
|
49
|
+
}, null, 2),
|
|
50
|
+
}],
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
//# sourceMappingURL=docx-convert.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docx-convert.js","sourceRoot":"","sources":["../../src/tools/docx-convert.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AAEnC,MAAM,UAAU,mBAAmB,CAAC,MAAiB;IACnD,MAAM,CAAC,IAAI,CACT,kBAAkB,EAClB,2GAA2G,EAC3G;QACE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;QAC/D,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,6EAA6E,CAAC;KAC1H,EACD,KAAK,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,EAAE,EAAE;QACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAE3D,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC;QAC9B,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ;aAC7B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,SAAS,CAAC;aACnC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAEzB,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC;QACpD,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC,aAAa,UAAU,GAAG,CAAC,CAAC,CAAC,QAAQ;wBAC5D,cAAc,EAAE,QAAQ,CAAC,MAAM;wBAC/B,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;wBACpD,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;qBACtC,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,MAAM,CAAC,IAAI,CACT,cAAc,EACd,4CAA4C,EAC5C;QACE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;QAC/D,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;KAC/E,EACD,KAAK,EAAE,EAAE,QAAQ,EAAE,UAAU,EAAE,EAAE,EAAE;QACjC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC3C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QAEvD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC;QAE1B,IAAI,UAAU,EAAE,CAAC;YACf,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAChD,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC,aAAa,UAAU,GAAG,CAAC,CAAC,CAAC,IAAI;wBACpD,cAAc,EAAE,IAAI,CAAC,MAAM;wBAC3B,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;qBACtC,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import sharp from 'sharp';
|
|
4
|
+
export function registerImageTools(server) {
|
|
5
|
+
server.tool('image-resize', 'Resize an image to specified dimensions. Supports JPEG, PNG, WebP, AVIF, TIFF, GIF.', {
|
|
6
|
+
inputFile: z.string().describe('Absolute path to the source image'),
|
|
7
|
+
outputPath: z.string().describe('Absolute path for the resized output image'),
|
|
8
|
+
width: z.number().int().positive().optional().describe('Target width in pixels'),
|
|
9
|
+
height: z.number().int().positive().optional().describe('Target height in pixels'),
|
|
10
|
+
fit: z.enum(['cover', 'contain', 'fill', 'inside', 'outside']).optional().default('inside')
|
|
11
|
+
.describe('How to fit: cover (crop), contain (letterbox), fill (stretch), inside (max dimensions), outside (min dimensions)'),
|
|
12
|
+
}, async ({ inputFile, outputPath, width, height, fit }) => {
|
|
13
|
+
if (!width && !height) {
|
|
14
|
+
return {
|
|
15
|
+
content: [{
|
|
16
|
+
type: 'text',
|
|
17
|
+
text: JSON.stringify({ error: 'At least one of width or height must be specified' }),
|
|
18
|
+
}],
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
const info = await sharp(inputFile)
|
|
22
|
+
.resize({ width: width ?? undefined, height: height ?? undefined, fit })
|
|
23
|
+
.toFile(outputPath);
|
|
24
|
+
return {
|
|
25
|
+
content: [{
|
|
26
|
+
type: 'text',
|
|
27
|
+
text: JSON.stringify({
|
|
28
|
+
success: true,
|
|
29
|
+
outputPath,
|
|
30
|
+
output: {
|
|
31
|
+
width: info.width,
|
|
32
|
+
height: info.height,
|
|
33
|
+
format: info.format,
|
|
34
|
+
size: info.size,
|
|
35
|
+
},
|
|
36
|
+
}, null, 2),
|
|
37
|
+
}],
|
|
38
|
+
};
|
|
39
|
+
});
|
|
40
|
+
server.tool('image-compress', 'Compress and/or convert an image to a different format (WebP, AVIF, JPEG, PNG). Great for reducing file size.', {
|
|
41
|
+
inputFile: z.string().describe('Absolute path to the source image'),
|
|
42
|
+
outputPath: z.string().describe('Absolute path for the compressed output image'),
|
|
43
|
+
format: z.enum(['webp', 'avif', 'jpeg', 'png']).describe('Output format'),
|
|
44
|
+
quality: z.number().int().min(1).max(100).optional().default(80)
|
|
45
|
+
.describe('Compression quality 1-100 (lower = smaller file, default 80)'),
|
|
46
|
+
}, async ({ inputFile, outputPath, format, quality }) => {
|
|
47
|
+
const inputStat = await fs.stat(inputFile);
|
|
48
|
+
let pipeline = sharp(inputFile);
|
|
49
|
+
switch (format) {
|
|
50
|
+
case 'webp':
|
|
51
|
+
pipeline = pipeline.webp({ quality });
|
|
52
|
+
break;
|
|
53
|
+
case 'avif':
|
|
54
|
+
pipeline = pipeline.avif({ quality });
|
|
55
|
+
break;
|
|
56
|
+
case 'jpeg':
|
|
57
|
+
pipeline = pipeline.jpeg({ quality });
|
|
58
|
+
break;
|
|
59
|
+
case 'png':
|
|
60
|
+
pipeline = pipeline.png({ quality });
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
const info = await pipeline.toFile(outputPath);
|
|
64
|
+
const outputStat = await fs.stat(outputPath);
|
|
65
|
+
const compressionRatio = ((1 - outputStat.size / inputStat.size) * 100).toFixed(1);
|
|
66
|
+
return {
|
|
67
|
+
content: [{
|
|
68
|
+
type: 'text',
|
|
69
|
+
text: JSON.stringify({
|
|
70
|
+
success: true,
|
|
71
|
+
outputPath,
|
|
72
|
+
inputSize: inputStat.size,
|
|
73
|
+
outputSize: outputStat.size,
|
|
74
|
+
compressionRatio: `${compressionRatio}%`,
|
|
75
|
+
output: {
|
|
76
|
+
width: info.width,
|
|
77
|
+
height: info.height,
|
|
78
|
+
format: info.format,
|
|
79
|
+
},
|
|
80
|
+
}, null, 2),
|
|
81
|
+
}],
|
|
82
|
+
};
|
|
83
|
+
});
|
|
84
|
+
server.tool('image-info', 'Get metadata and dimensions of an image file without modifying it.', {
|
|
85
|
+
filePath: z.string().describe('Absolute path to the image file'),
|
|
86
|
+
}, async ({ filePath }) => {
|
|
87
|
+
const metadata = await sharp(filePath).metadata();
|
|
88
|
+
const stat = await fs.stat(filePath);
|
|
89
|
+
return {
|
|
90
|
+
content: [{
|
|
91
|
+
type: 'text',
|
|
92
|
+
text: JSON.stringify({
|
|
93
|
+
filePath,
|
|
94
|
+
width: metadata.width,
|
|
95
|
+
height: metadata.height,
|
|
96
|
+
format: metadata.format,
|
|
97
|
+
space: metadata.space,
|
|
98
|
+
channels: metadata.channels,
|
|
99
|
+
hasAlpha: metadata.hasAlpha,
|
|
100
|
+
fileSize: stat.size,
|
|
101
|
+
density: metadata.density,
|
|
102
|
+
}, null, 2),
|
|
103
|
+
}],
|
|
104
|
+
};
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=image-process.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"image-process.js","sourceRoot":"","sources":["../../src/tools/image-process.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,KAAK,MAAM,OAAO,CAAC;AAG1B,MAAM,UAAU,kBAAkB,CAAC,MAAiB;IAClD,MAAM,CAAC,IAAI,CACT,cAAc,EACd,qFAAqF,EACrF;QACE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;QACnE,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,4CAA4C,CAAC;QAC7E,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;QAChF,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;QAClF,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC;aACxF,QAAQ,CAAC,kHAAkH,CAAC;KAChI,EACD,KAAK,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,EAAE;QACtD,IAAI,CAAC,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC;YACtB,OAAO;gBACL,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,mDAAmD,EAAE,CAAC;qBACrF,CAAC;aACH,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC;aAChC,MAAM,CAAC,EAAE,KAAK,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,EAAE,MAAM,IAAI,SAAS,EAAE,GAAG,EAAE,CAAC;aACvE,MAAM,CAAC,UAAU,CAAC,CAAC;QAEtB,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,UAAU;wBACV,MAAM,EAAE;4BACN,KAAK,EAAE,IAAI,CAAC,KAAK;4BACjB,MAAM,EAAE,IAAI,CAAC,MAAM;4BACnB,MAAM,EAAE,IAAI,CAAC,MAAM;4BACnB,IAAI,EAAE,IAAI,CAAC,IAAI;yBAChB;qBACF,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,MAAM,CAAC,IAAI,CACT,gBAAgB,EAChB,+GAA+G,EAC/G;QACE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;QACnE,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;QAChF,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,eAAe,CAAC;QACzE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;aAC7D,QAAQ,CAAC,8DAA8D,CAAC;KAC5E,EACD,KAAK,EAAE,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,EAAE,EAAE;QACnD,MAAM,SAAS,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC3C,IAAI,QAAQ,GAAG,KAAK,CAAC,SAAS,CAAC,CAAC;QAEhC,QAAQ,MAAM,EAAE,CAAC;YACf,KAAK,MAAM;gBACT,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;gBACtC,MAAM;YACR,KAAK,MAAM;gBACT,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;gBACtC,MAAM;YACR,KAAK,MAAM;gBACT,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;gBACtC,MAAM;YACR,KAAK,KAAK;gBACR,QAAQ,GAAG,QAAQ,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;gBACrC,MAAM;QACV,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC/C,MAAM,UAAU,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAE7C,MAAM,gBAAgB,GAAG,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEnF,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,UAAU;wBACV,SAAS,EAAE,SAAS,CAAC,IAAI;wBACzB,UAAU,EAAE,UAAU,CAAC,IAAI;wBAC3B,gBAAgB,EAAE,GAAG,gBAAgB,GAAG;wBACxC,MAAM,EAAE;4BACN,KAAK,EAAE,IAAI,CAAC,KAAK;4BACjB,MAAM,EAAE,IAAI,CAAC,MAAM;4BACnB,MAAM,EAAE,IAAI,CAAC,MAAM;yBACpB;qBACF,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;IAEF,MAAM,CAAC,IAAI,CACT,YAAY,EACZ,oEAAoE,EACpE;QACE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;KACjE,EACD,KAAK,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE;QACrB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,CAAC,QAAQ,EAAE,CAAC;QAClD,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAErC,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,QAAQ;wBACR,KAAK,EAAE,QAAQ,CAAC,KAAK;wBACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;wBACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;wBACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;wBACrB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;wBAC3B,QAAQ,EAAE,QAAQ,CAAC,QAAQ;wBAC3B,QAAQ,EAAE,IAAI,CAAC,IAAI;wBACnB,OAAO,EAAE,QAAQ,CAAC,OAAO;qBAC1B,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import pdfParse from 'pdf-parse';
|
|
4
|
+
export function registerPdfTools(server) {
|
|
5
|
+
server.tool('pdf-extract-text', 'Extract text content from a PDF file. Returns the full text and metadata (page count, info).', {
|
|
6
|
+
filePath: z.string().describe('Absolute path to the PDF file'),
|
|
7
|
+
pageRange: z.string().optional().describe('Page range to extract, e.g. "1-5" or "3" (optional, defaults to all pages)'),
|
|
8
|
+
}, async ({ filePath, pageRange }) => {
|
|
9
|
+
const buffer = await fs.readFile(filePath);
|
|
10
|
+
const data = await pdfParse(buffer);
|
|
11
|
+
let text = data.text;
|
|
12
|
+
// If pageRange is specified, we extract from the full text
|
|
13
|
+
// pdf-parse doesn't support per-page extraction natively,
|
|
14
|
+
// so we provide the full text with metadata
|
|
15
|
+
const result = {
|
|
16
|
+
text: text.trim(),
|
|
17
|
+
pageCount: data.numpages,
|
|
18
|
+
info: {
|
|
19
|
+
title: data.info?.Title || null,
|
|
20
|
+
author: data.info?.Author || null,
|
|
21
|
+
subject: data.info?.Subject || null,
|
|
22
|
+
creator: data.info?.Creator || null,
|
|
23
|
+
},
|
|
24
|
+
...(pageRange ? { note: `pageRange "${pageRange}" requested; full text returned as pdf-parse extracts all pages. Use pdf-split first to isolate specific pages.` } : {}),
|
|
25
|
+
};
|
|
26
|
+
return {
|
|
27
|
+
content: [
|
|
28
|
+
{
|
|
29
|
+
type: 'text',
|
|
30
|
+
text: JSON.stringify(result, null, 2),
|
|
31
|
+
},
|
|
32
|
+
],
|
|
33
|
+
};
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=pdf-extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.js","sourceRoot":"","sources":["../../src/tools/pdf-extract.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,QAAQ,MAAM,WAAW,CAAC;AAEjC,MAAM,UAAU,gBAAgB,CAAC,MAAiB;IAChD,MAAM,CAAC,IAAI,CACT,kBAAkB,EAClB,8FAA8F,EAC9F;QACE,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;QAC9D,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4EAA4E,CAAC;KACxH,EACD,KAAK,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,EAAE,EAAE;QAChC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC3C,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpC,IAAI,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC;QAErB,2DAA2D;QAC3D,0DAA0D;QAC1D,4CAA4C;QAC5C,MAAM,MAAM,GAAG;YACb,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE;YACjB,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,IAAI,EAAE;gBACJ,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,KAAK,IAAI,IAAI;gBAC/B,MAAM,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,IAAI,IAAI;gBACjC,OAAO,EAAE,IAAI,CAAC,IAAI,EAAE,OAAO,IAAI,IAAI;gBACnC,OAAO,EAAE,IAAI,CAAC,IAAI,EAAE,OAAO,IAAI,IAAI;aACpC;YACD,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,cAAc,SAAS,iHAAiH,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzK,CAAC;QAEF,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC;iBACtC;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import { PDFDocument } from 'pdf-lib';
|
|
4
|
+
export function registerPdfMerge(server) {
|
|
5
|
+
server.tool('pdf-merge', 'Merge multiple PDF files into a single PDF. Provide an array of file paths and an output path.', {
|
|
6
|
+
inputFiles: z.array(z.string()).min(2).describe('Array of absolute paths to PDF files to merge (minimum 2)'),
|
|
7
|
+
outputPath: z.string().describe('Absolute path for the merged output PDF'),
|
|
8
|
+
}, async ({ inputFiles, outputPath }) => {
|
|
9
|
+
const mergedPdf = await PDFDocument.create();
|
|
10
|
+
for (const filePath of inputFiles) {
|
|
11
|
+
const bytes = await fs.readFile(filePath);
|
|
12
|
+
const sourcePdf = await PDFDocument.load(bytes);
|
|
13
|
+
const pages = await mergedPdf.copyPages(sourcePdf, sourcePdf.getPageIndices());
|
|
14
|
+
for (const page of pages) {
|
|
15
|
+
mergedPdf.addPage(page);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
const mergedBytes = await mergedPdf.save();
|
|
19
|
+
await fs.writeFile(outputPath, mergedBytes);
|
|
20
|
+
return {
|
|
21
|
+
content: [
|
|
22
|
+
{
|
|
23
|
+
type: 'text',
|
|
24
|
+
text: JSON.stringify({
|
|
25
|
+
success: true,
|
|
26
|
+
outputPath,
|
|
27
|
+
totalPages: mergedPdf.getPageCount(),
|
|
28
|
+
mergedFiles: inputFiles.length,
|
|
29
|
+
}, null, 2),
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
};
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=pdf-merge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-merge.js","sourceRoot":"","sources":["../../src/tools/pdf-merge.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,MAAM,UAAU,gBAAgB,CAAC,MAAiB;IAChD,MAAM,CAAC,IAAI,CACT,WAAW,EACX,gGAAgG,EAChG;QACE,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,2DAA2D,CAAC;QAC5G,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,yCAAyC,CAAC;KAC3E,EACD,KAAK,EAAE,EAAE,UAAU,EAAE,UAAU,EAAE,EAAE,EAAE;QACnC,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;QAE7C,KAAK,MAAM,QAAQ,IAAI,UAAU,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YAChD,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,SAAS,CAAC,SAAS,EAAE,SAAS,CAAC,cAAc,EAAE,CAAC,CAAC;YAC/E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;gBACzB,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,CAAC;QAC3C,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,WAAW,CAAC,CAAC;QAE5C,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,UAAU;wBACV,UAAU,EAAE,SAAS,CAAC,YAAY,EAAE;wBACpC,WAAW,EAAE,UAAU,CAAC,MAAM;qBAC/B,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ;aACF;SACF,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import fs from 'fs/promises';
|
|
3
|
+
import { PDFDocument } from 'pdf-lib';
|
|
4
|
+
function parsePageRange(range, totalPages) {
|
|
5
|
+
const pages = [];
|
|
6
|
+
const parts = range.split(',').map((s) => s.trim());
|
|
7
|
+
for (const part of parts) {
|
|
8
|
+
if (part.includes('-')) {
|
|
9
|
+
const [startStr, endStr] = part.split('-');
|
|
10
|
+
const start = Math.max(1, parseInt(startStr, 10));
|
|
11
|
+
const end = Math.min(totalPages, parseInt(endStr, 10));
|
|
12
|
+
for (let i = start; i <= end; i++) {
|
|
13
|
+
pages.push(i - 1); // 0-indexed
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
const pageNum = parseInt(part, 10);
|
|
18
|
+
if (pageNum >= 1 && pageNum <= totalPages) {
|
|
19
|
+
pages.push(pageNum - 1);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return [...new Set(pages)].sort((a, b) => a - b);
|
|
24
|
+
}
|
|
25
|
+
export function registerPdfSplit(server) {
|
|
26
|
+
server.tool('pdf-split', 'Split a PDF file by extracting specific pages into a new PDF. Supports page ranges like "1-3", "5", or "1,3,5-7".', {
|
|
27
|
+
inputFile: z.string().describe('Absolute path to the source PDF file'),
|
|
28
|
+
pageRange: z.string().describe('Pages to extract: e.g. "1-3", "5", "1,3,5-7"'),
|
|
29
|
+
outputPath: z.string().describe('Absolute path for the output PDF'),
|
|
30
|
+
}, async ({ inputFile, pageRange, outputPath }) => {
|
|
31
|
+
const bytes = await fs.readFile(inputFile);
|
|
32
|
+
const sourcePdf = await PDFDocument.load(bytes);
|
|
33
|
+
const totalPages = sourcePdf.getPageCount();
|
|
34
|
+
const pageIndices = parsePageRange(pageRange, totalPages);
|
|
35
|
+
if (pageIndices.length === 0) {
|
|
36
|
+
return {
|
|
37
|
+
content: [{
|
|
38
|
+
type: 'text',
|
|
39
|
+
text: JSON.stringify({ error: `No valid pages in range "${pageRange}". PDF has ${totalPages} pages.` }),
|
|
40
|
+
}],
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
const newPdf = await PDFDocument.create();
|
|
44
|
+
const copiedPages = await newPdf.copyPages(sourcePdf, pageIndices);
|
|
45
|
+
for (const page of copiedPages) {
|
|
46
|
+
newPdf.addPage(page);
|
|
47
|
+
}
|
|
48
|
+
const newBytes = await newPdf.save();
|
|
49
|
+
await fs.writeFile(outputPath, newBytes);
|
|
50
|
+
return {
|
|
51
|
+
content: [{
|
|
52
|
+
type: 'text',
|
|
53
|
+
text: JSON.stringify({
|
|
54
|
+
success: true,
|
|
55
|
+
outputPath,
|
|
56
|
+
extractedPages: pageIndices.map((i) => i + 1),
|
|
57
|
+
totalSourcePages: totalPages,
|
|
58
|
+
outputPageCount: newPdf.getPageCount(),
|
|
59
|
+
}, null, 2),
|
|
60
|
+
}],
|
|
61
|
+
};
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=pdf-split.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-split.js","sourceRoot":"","sources":["../../src/tools/pdf-split.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,MAAM,aAAa,CAAC;AAE7B,OAAO,EAAE,WAAW,EAAE,MAAM,SAAS,CAAC;AAEtC,SAAS,cAAc,CAAC,KAAa,EAAE,UAAkB;IACvD,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAEpD,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,QAAQ,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,CAAC;YAClD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBAClC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY;YACjC,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YACnC,IAAI,OAAO,IAAI,CAAC,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;gBAC1C,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,MAAiB;IAChD,MAAM,CAAC,IAAI,CACT,WAAW,EACX,mHAAmH,EACnH;QACE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;QACtE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,8CAA8C,CAAC;QAC9E,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kCAAkC,CAAC;KACpE,EACD,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7C,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;QAC3C,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAChD,MAAM,UAAU,GAAG,SAAS,CAAC,YAAY,EAAE,CAAC;QAE5C,MAAM,WAAW,GAAG,cAAc,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;QAE1D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC7B,OAAO;gBACL,OAAO,EAAE,CAAC;wBACR,IAAI,EAAE,MAAe;wBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,4BAA4B,SAAS,cAAc,UAAU,SAAS,EAAE,CAAC;qBACxG,CAAC;aACH,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;QAC1C,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,SAAS,CAAC,SAAS,EAAE,WAAW,CAAC,CAAC;QACnE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACvB,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QACrC,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAEzC,OAAO;YACL,OAAO,EAAE,CAAC;oBACR,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;wBACnB,OAAO,EAAE,IAAI;wBACb,UAAU;wBACV,cAAc,EAAE,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;wBAC7C,gBAAgB,EAAE,UAAU;wBAC5B,eAAe,EAAE,MAAM,CAAC,YAAY,EAAE;qBACvC,EAAE,IAAI,EAAE,CAAC,CAAC;iBACZ,CAAC;SACH,CAAC;IACJ,CAAC,CACF,CAAC;AACJ,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mcp-server-docpipe",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for document processing - PDF extract/merge/split, DOCX to Markdown, image resize/compress",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mcp-server-docpipe": "dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"start": "node dist/index.js",
|
|
12
|
+
"dev": "ts-node src/index.ts"
|
|
13
|
+
},
|
|
14
|
+
"keywords": ["mcp", "claude", "pdf", "docx", "image", "document-processing", "ai-tools"],
|
|
15
|
+
"license": "MIT",
|
|
16
|
+
"type": "module",
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
19
|
+
"mammoth": "^1.12.0",
|
|
20
|
+
"pdf-lib": "^1.17.1",
|
|
21
|
+
"pdf-parse": "^1.1.1",
|
|
22
|
+
"sharp": "^0.33.0",
|
|
23
|
+
"zod": "^3.23.0"
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@types/node": "^22.0.0",
|
|
27
|
+
"typescript": "^5.6.0"
|
|
28
|
+
}
|
|
29
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* mcp-server-docpipe
|
|
4
|
+
* MCP server for document processing:
|
|
5
|
+
* - PDF text extraction, merge, split
|
|
6
|
+
* - DOCX to Markdown conversion
|
|
7
|
+
* - Image resize and compression
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
11
|
+
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
12
|
+
import { registerPdfTools } from './tools/pdf-extract.js';
|
|
13
|
+
import { registerPdfMerge } from './tools/pdf-merge.js';
|
|
14
|
+
import { registerPdfSplit } from './tools/pdf-split.js';
|
|
15
|
+
import { registerDocxConvert } from './tools/docx-convert.js';
|
|
16
|
+
import { registerImageTools } from './tools/image-process.js';
|
|
17
|
+
|
|
18
|
+
const server = new McpServer({
|
|
19
|
+
name: 'mcp-server-docpipe',
|
|
20
|
+
version: '1.0.0',
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
// Register all tools
|
|
24
|
+
registerPdfTools(server);
|
|
25
|
+
registerPdfMerge(server);
|
|
26
|
+
registerPdfSplit(server);
|
|
27
|
+
registerDocxConvert(server);
|
|
28
|
+
registerImageTools(server);
|
|
29
|
+
|
|
30
|
+
async function main() {
|
|
31
|
+
const transport = new StdioServerTransport();
|
|
32
|
+
await server.connect(transport);
|
|
33
|
+
console.error('mcp-server-docpipe started on stdio');
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
main().catch((error) => {
|
|
37
|
+
console.error('Server failed to start:', error);
|
|
38
|
+
process.exit(1);
|
|
39
|
+
});
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import * as mammoth from 'mammoth';
|
|
5
|
+
|
|
6
|
+
export function registerDocxConvert(server: McpServer) {
|
|
7
|
+
server.tool(
|
|
8
|
+
'docx-to-markdown',
|
|
9
|
+
'Convert a DOCX (Word) file to Markdown format. Preserves headings, lists, bold/italic, tables, and links.',
|
|
10
|
+
{
|
|
11
|
+
filePath: z.string().describe('Absolute path to the DOCX file'),
|
|
12
|
+
outputPath: z.string().optional().describe('Optional: save Markdown to this path. If omitted, returns content directly.'),
|
|
13
|
+
},
|
|
14
|
+
async ({ filePath, outputPath }) => {
|
|
15
|
+
const buffer = await fs.readFile(filePath);
|
|
16
|
+
const result = await mammoth.convertToMarkdown({ buffer });
|
|
17
|
+
|
|
18
|
+
const markdown = result.value;
|
|
19
|
+
const warnings = result.messages
|
|
20
|
+
.filter((m) => m.type === 'warning')
|
|
21
|
+
.map((m) => m.message);
|
|
22
|
+
|
|
23
|
+
if (outputPath) {
|
|
24
|
+
await fs.writeFile(outputPath, markdown, 'utf-8');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
content: [{
|
|
29
|
+
type: 'text' as const,
|
|
30
|
+
text: JSON.stringify({
|
|
31
|
+
success: true,
|
|
32
|
+
markdown: outputPath ? `(saved to ${outputPath})` : markdown,
|
|
33
|
+
characterCount: markdown.length,
|
|
34
|
+
warnings: warnings.length > 0 ? warnings : undefined,
|
|
35
|
+
...(outputPath ? { outputPath } : {}),
|
|
36
|
+
}, null, 2),
|
|
37
|
+
}],
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
server.tool(
|
|
43
|
+
'docx-to-html',
|
|
44
|
+
'Convert a DOCX (Word) file to HTML format.',
|
|
45
|
+
{
|
|
46
|
+
filePath: z.string().describe('Absolute path to the DOCX file'),
|
|
47
|
+
outputPath: z.string().optional().describe('Optional: save HTML to this path'),
|
|
48
|
+
},
|
|
49
|
+
async ({ filePath, outputPath }) => {
|
|
50
|
+
const buffer = await fs.readFile(filePath);
|
|
51
|
+
const result = await mammoth.convertToHtml({ buffer });
|
|
52
|
+
|
|
53
|
+
const html = result.value;
|
|
54
|
+
|
|
55
|
+
if (outputPath) {
|
|
56
|
+
await fs.writeFile(outputPath, html, 'utf-8');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
content: [{
|
|
61
|
+
type: 'text' as const,
|
|
62
|
+
text: JSON.stringify({
|
|
63
|
+
success: true,
|
|
64
|
+
html: outputPath ? `(saved to ${outputPath})` : html,
|
|
65
|
+
characterCount: html.length,
|
|
66
|
+
...(outputPath ? { outputPath } : {}),
|
|
67
|
+
}, null, 2),
|
|
68
|
+
}],
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
);
|
|
72
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import sharp from 'sharp';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
|
|
7
|
+
export function registerImageTools(server: McpServer) {
|
|
8
|
+
server.tool(
|
|
9
|
+
'image-resize',
|
|
10
|
+
'Resize an image to specified dimensions. Supports JPEG, PNG, WebP, AVIF, TIFF, GIF.',
|
|
11
|
+
{
|
|
12
|
+
inputFile: z.string().describe('Absolute path to the source image'),
|
|
13
|
+
outputPath: z.string().describe('Absolute path for the resized output image'),
|
|
14
|
+
width: z.number().int().positive().optional().describe('Target width in pixels'),
|
|
15
|
+
height: z.number().int().positive().optional().describe('Target height in pixels'),
|
|
16
|
+
fit: z.enum(['cover', 'contain', 'fill', 'inside', 'outside']).optional().default('inside')
|
|
17
|
+
.describe('How to fit: cover (crop), contain (letterbox), fill (stretch), inside (max dimensions), outside (min dimensions)'),
|
|
18
|
+
},
|
|
19
|
+
async ({ inputFile, outputPath, width, height, fit }) => {
|
|
20
|
+
if (!width && !height) {
|
|
21
|
+
return {
|
|
22
|
+
content: [{
|
|
23
|
+
type: 'text' as const,
|
|
24
|
+
text: JSON.stringify({ error: 'At least one of width or height must be specified' }),
|
|
25
|
+
}],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const info = await sharp(inputFile)
|
|
30
|
+
.resize({ width: width ?? undefined, height: height ?? undefined, fit })
|
|
31
|
+
.toFile(outputPath);
|
|
32
|
+
|
|
33
|
+
return {
|
|
34
|
+
content: [{
|
|
35
|
+
type: 'text' as const,
|
|
36
|
+
text: JSON.stringify({
|
|
37
|
+
success: true,
|
|
38
|
+
outputPath,
|
|
39
|
+
output: {
|
|
40
|
+
width: info.width,
|
|
41
|
+
height: info.height,
|
|
42
|
+
format: info.format,
|
|
43
|
+
size: info.size,
|
|
44
|
+
},
|
|
45
|
+
}, null, 2),
|
|
46
|
+
}],
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
server.tool(
|
|
52
|
+
'image-compress',
|
|
53
|
+
'Compress and/or convert an image to a different format (WebP, AVIF, JPEG, PNG). Great for reducing file size.',
|
|
54
|
+
{
|
|
55
|
+
inputFile: z.string().describe('Absolute path to the source image'),
|
|
56
|
+
outputPath: z.string().describe('Absolute path for the compressed output image'),
|
|
57
|
+
format: z.enum(['webp', 'avif', 'jpeg', 'png']).describe('Output format'),
|
|
58
|
+
quality: z.number().int().min(1).max(100).optional().default(80)
|
|
59
|
+
.describe('Compression quality 1-100 (lower = smaller file, default 80)'),
|
|
60
|
+
},
|
|
61
|
+
async ({ inputFile, outputPath, format, quality }) => {
|
|
62
|
+
const inputStat = await fs.stat(inputFile);
|
|
63
|
+
let pipeline = sharp(inputFile);
|
|
64
|
+
|
|
65
|
+
switch (format) {
|
|
66
|
+
case 'webp':
|
|
67
|
+
pipeline = pipeline.webp({ quality });
|
|
68
|
+
break;
|
|
69
|
+
case 'avif':
|
|
70
|
+
pipeline = pipeline.avif({ quality });
|
|
71
|
+
break;
|
|
72
|
+
case 'jpeg':
|
|
73
|
+
pipeline = pipeline.jpeg({ quality });
|
|
74
|
+
break;
|
|
75
|
+
case 'png':
|
|
76
|
+
pipeline = pipeline.png({ quality });
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const info = await pipeline.toFile(outputPath);
|
|
81
|
+
const outputStat = await fs.stat(outputPath);
|
|
82
|
+
|
|
83
|
+
const compressionRatio = ((1 - outputStat.size / inputStat.size) * 100).toFixed(1);
|
|
84
|
+
|
|
85
|
+
return {
|
|
86
|
+
content: [{
|
|
87
|
+
type: 'text' as const,
|
|
88
|
+
text: JSON.stringify({
|
|
89
|
+
success: true,
|
|
90
|
+
outputPath,
|
|
91
|
+
inputSize: inputStat.size,
|
|
92
|
+
outputSize: outputStat.size,
|
|
93
|
+
compressionRatio: `${compressionRatio}%`,
|
|
94
|
+
output: {
|
|
95
|
+
width: info.width,
|
|
96
|
+
height: info.height,
|
|
97
|
+
format: info.format,
|
|
98
|
+
},
|
|
99
|
+
}, null, 2),
|
|
100
|
+
}],
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
server.tool(
|
|
106
|
+
'image-info',
|
|
107
|
+
'Get metadata and dimensions of an image file without modifying it.',
|
|
108
|
+
{
|
|
109
|
+
filePath: z.string().describe('Absolute path to the image file'),
|
|
110
|
+
},
|
|
111
|
+
async ({ filePath }) => {
|
|
112
|
+
const metadata = await sharp(filePath).metadata();
|
|
113
|
+
const stat = await fs.stat(filePath);
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
content: [{
|
|
117
|
+
type: 'text' as const,
|
|
118
|
+
text: JSON.stringify({
|
|
119
|
+
filePath,
|
|
120
|
+
width: metadata.width,
|
|
121
|
+
height: metadata.height,
|
|
122
|
+
format: metadata.format,
|
|
123
|
+
space: metadata.space,
|
|
124
|
+
channels: metadata.channels,
|
|
125
|
+
hasAlpha: metadata.hasAlpha,
|
|
126
|
+
fileSize: stat.size,
|
|
127
|
+
density: metadata.density,
|
|
128
|
+
}, null, 2),
|
|
129
|
+
}],
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
);
|
|
133
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import pdfParse from 'pdf-parse';
|
|
5
|
+
|
|
6
|
+
export function registerPdfTools(server: McpServer) {
|
|
7
|
+
server.tool(
|
|
8
|
+
'pdf-extract-text',
|
|
9
|
+
'Extract text content from a PDF file. Returns the full text and metadata (page count, info).',
|
|
10
|
+
{
|
|
11
|
+
filePath: z.string().describe('Absolute path to the PDF file'),
|
|
12
|
+
pageRange: z.string().optional().describe('Page range to extract, e.g. "1-5" or "3" (optional, defaults to all pages)'),
|
|
13
|
+
},
|
|
14
|
+
async ({ filePath, pageRange }) => {
|
|
15
|
+
const buffer = await fs.readFile(filePath);
|
|
16
|
+
const data = await pdfParse(buffer);
|
|
17
|
+
|
|
18
|
+
let text = data.text;
|
|
19
|
+
|
|
20
|
+
// If pageRange is specified, we extract from the full text
|
|
21
|
+
// pdf-parse doesn't support per-page extraction natively,
|
|
22
|
+
// so we provide the full text with metadata
|
|
23
|
+
const result = {
|
|
24
|
+
text: text.trim(),
|
|
25
|
+
pageCount: data.numpages,
|
|
26
|
+
info: {
|
|
27
|
+
title: data.info?.Title || null,
|
|
28
|
+
author: data.info?.Author || null,
|
|
29
|
+
subject: data.info?.Subject || null,
|
|
30
|
+
creator: data.info?.Creator || null,
|
|
31
|
+
},
|
|
32
|
+
...(pageRange ? { note: `pageRange "${pageRange}" requested; full text returned as pdf-parse extracts all pages. Use pdf-split first to isolate specific pages.` } : {}),
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
content: [
|
|
37
|
+
{
|
|
38
|
+
type: 'text' as const,
|
|
39
|
+
text: JSON.stringify(result, null, 2),
|
|
40
|
+
},
|
|
41
|
+
],
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
);
|
|
45
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import { PDFDocument } from 'pdf-lib';
|
|
5
|
+
|
|
6
|
+
export function registerPdfMerge(server: McpServer) {
|
|
7
|
+
server.tool(
|
|
8
|
+
'pdf-merge',
|
|
9
|
+
'Merge multiple PDF files into a single PDF. Provide an array of file paths and an output path.',
|
|
10
|
+
{
|
|
11
|
+
inputFiles: z.array(z.string()).min(2).describe('Array of absolute paths to PDF files to merge (minimum 2)'),
|
|
12
|
+
outputPath: z.string().describe('Absolute path for the merged output PDF'),
|
|
13
|
+
},
|
|
14
|
+
async ({ inputFiles, outputPath }) => {
|
|
15
|
+
const mergedPdf = await PDFDocument.create();
|
|
16
|
+
|
|
17
|
+
for (const filePath of inputFiles) {
|
|
18
|
+
const bytes = await fs.readFile(filePath);
|
|
19
|
+
const sourcePdf = await PDFDocument.load(bytes);
|
|
20
|
+
const pages = await mergedPdf.copyPages(sourcePdf, sourcePdf.getPageIndices());
|
|
21
|
+
for (const page of pages) {
|
|
22
|
+
mergedPdf.addPage(page);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const mergedBytes = await mergedPdf.save();
|
|
27
|
+
await fs.writeFile(outputPath, mergedBytes);
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
content: [
|
|
31
|
+
{
|
|
32
|
+
type: 'text' as const,
|
|
33
|
+
text: JSON.stringify({
|
|
34
|
+
success: true,
|
|
35
|
+
outputPath,
|
|
36
|
+
totalPages: mergedPdf.getPageCount(),
|
|
37
|
+
mergedFiles: inputFiles.length,
|
|
38
|
+
}, null, 2),
|
|
39
|
+
},
|
|
40
|
+
],
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
);
|
|
44
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import fs from 'fs/promises';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
import { PDFDocument } from 'pdf-lib';
|
|
6
|
+
|
|
7
|
+
function parsePageRange(range: string, totalPages: number): number[] {
|
|
8
|
+
const pages: number[] = [];
|
|
9
|
+
const parts = range.split(',').map((s) => s.trim());
|
|
10
|
+
|
|
11
|
+
for (const part of parts) {
|
|
12
|
+
if (part.includes('-')) {
|
|
13
|
+
const [startStr, endStr] = part.split('-');
|
|
14
|
+
const start = Math.max(1, parseInt(startStr, 10));
|
|
15
|
+
const end = Math.min(totalPages, parseInt(endStr, 10));
|
|
16
|
+
for (let i = start; i <= end; i++) {
|
|
17
|
+
pages.push(i - 1); // 0-indexed
|
|
18
|
+
}
|
|
19
|
+
} else {
|
|
20
|
+
const pageNum = parseInt(part, 10);
|
|
21
|
+
if (pageNum >= 1 && pageNum <= totalPages) {
|
|
22
|
+
pages.push(pageNum - 1);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return [...new Set(pages)].sort((a, b) => a - b);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function registerPdfSplit(server: McpServer) {
|
|
31
|
+
server.tool(
|
|
32
|
+
'pdf-split',
|
|
33
|
+
'Split a PDF file by extracting specific pages into a new PDF. Supports page ranges like "1-3", "5", or "1,3,5-7".',
|
|
34
|
+
{
|
|
35
|
+
inputFile: z.string().describe('Absolute path to the source PDF file'),
|
|
36
|
+
pageRange: z.string().describe('Pages to extract: e.g. "1-3", "5", "1,3,5-7"'),
|
|
37
|
+
outputPath: z.string().describe('Absolute path for the output PDF'),
|
|
38
|
+
},
|
|
39
|
+
async ({ inputFile, pageRange, outputPath }) => {
|
|
40
|
+
const bytes = await fs.readFile(inputFile);
|
|
41
|
+
const sourcePdf = await PDFDocument.load(bytes);
|
|
42
|
+
const totalPages = sourcePdf.getPageCount();
|
|
43
|
+
|
|
44
|
+
const pageIndices = parsePageRange(pageRange, totalPages);
|
|
45
|
+
|
|
46
|
+
if (pageIndices.length === 0) {
|
|
47
|
+
return {
|
|
48
|
+
content: [{
|
|
49
|
+
type: 'text' as const,
|
|
50
|
+
text: JSON.stringify({ error: `No valid pages in range "${pageRange}". PDF has ${totalPages} pages.` }),
|
|
51
|
+
}],
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const newPdf = await PDFDocument.create();
|
|
56
|
+
const copiedPages = await newPdf.copyPages(sourcePdf, pageIndices);
|
|
57
|
+
for (const page of copiedPages) {
|
|
58
|
+
newPdf.addPage(page);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const newBytes = await newPdf.save();
|
|
62
|
+
await fs.writeFile(outputPath, newBytes);
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
content: [{
|
|
66
|
+
type: 'text' as const,
|
|
67
|
+
text: JSON.stringify({
|
|
68
|
+
success: true,
|
|
69
|
+
outputPath,
|
|
70
|
+
extractedPages: pageIndices.map((i) => i + 1),
|
|
71
|
+
totalSourcePages: totalPages,
|
|
72
|
+
outputPageCount: newPdf.getPageCount(),
|
|
73
|
+
}, null, 2),
|
|
74
|
+
}],
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
);
|
|
78
|
+
}
|
package/src/types.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
declare module 'pdf-parse' {
|
|
2
|
+
interface PDFInfo {
|
|
3
|
+
Title?: string;
|
|
4
|
+
Author?: string;
|
|
5
|
+
Subject?: string;
|
|
6
|
+
Creator?: string;
|
|
7
|
+
[key: string]: unknown;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
interface PDFData {
|
|
11
|
+
numpages: number;
|
|
12
|
+
numrender: number;
|
|
13
|
+
info: PDFInfo;
|
|
14
|
+
metadata: unknown;
|
|
15
|
+
text: string;
|
|
16
|
+
version: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function pdfParse(buffer: Buffer): Promise<PDFData>;
|
|
20
|
+
export = pdfParse;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
declare module 'mammoth' {
|
|
24
|
+
interface ConversionMessage {
|
|
25
|
+
type: string;
|
|
26
|
+
message: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface ConversionResult {
|
|
30
|
+
value: string;
|
|
31
|
+
messages: ConversionMessage[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
interface ConvertOptions {
|
|
35
|
+
buffer: Buffer;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function convertToHtml(options: ConvertOptions): Promise<ConversionResult>;
|
|
39
|
+
export function convertToMarkdown(options: ConvertOptions): Promise<ConversionResult>;
|
|
40
|
+
export function extractRawText(options: ConvertOptions): Promise<ConversionResult>;
|
|
41
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "Node16",
|
|
5
|
+
"moduleResolution": "Node16",
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"declaration": true,
|
|
12
|
+
"sourceMap": true
|
|
13
|
+
},
|
|
14
|
+
"include": ["src/**/*"],
|
|
15
|
+
"exclude": ["node_modules", "dist"]
|
|
16
|
+
}
|