@promptx/mcp-office 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +5 -0
- package/LICENSE +21 -0
- package/build.mjs +12 -0
- package/dist/index.js +318288 -0
- package/package.json +25 -0
- package/src/index.ts +270 -0
- package/tsconfig.json +15 -0
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@promptx/mcp-office",
|
|
3
|
+
"version": "2.0.0",
|
|
4
|
+
"description": "MCP server for reading Office documents (docx, xlsx, pptx)",
|
|
5
|
+
"main": "./dist/index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mcp-office": "./dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"dependencies": {
|
|
10
|
+
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
11
|
+
"mammoth": "^1.8.0",
|
|
12
|
+
"pdf-parse": "^1.1.1",
|
|
13
|
+
"pptx-parser": "^1.0.3",
|
|
14
|
+
"xlsx": "^0.18.5"
|
|
15
|
+
},
|
|
16
|
+
"devDependencies": {
|
|
17
|
+
"@types/node": "^20.0.0",
|
|
18
|
+
"esbuild": "^0.24.2",
|
|
19
|
+
"typescript": "^5.0.0"
|
|
20
|
+
},
|
|
21
|
+
"scripts": {
|
|
22
|
+
"build": "node build.mjs",
|
|
23
|
+
"dev": "tsc --watch"
|
|
24
|
+
}
|
|
25
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP Office Server
|
|
4
|
+
*
|
|
5
|
+
* Provides tools to read Office documents (docx, xlsx, pptx, pdf)
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
9
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
10
|
+
import {
|
|
11
|
+
CallToolRequestSchema,
|
|
12
|
+
ListToolsRequestSchema,
|
|
13
|
+
} from "@modelcontextprotocol/sdk/types.js";
|
|
14
|
+
import * as fs from "node:fs";
|
|
15
|
+
import * as path from "node:path";
|
|
16
|
+
import mammoth from "mammoth";
|
|
17
|
+
import XLSX from "xlsx";
|
|
18
|
+
|
|
19
|
+
// Encoding description for tool schemas
|
|
20
|
+
const ENCODING_DESC = "Character encoding (optional). Common values: utf8 (default), gbk (Chinese Simplified), big5 (Chinese Traditional), utf16le, utf16be, latin1";
|
|
21
|
+
const CODEPAGE_DESC = "Character encoding codepage (optional). Common values: 65001 (UTF-8, default), 936 (GBK/Chinese Simplified), 950 (Big5/Chinese Traditional), 1200 (UTF-16LE), 1201 (UTF-16BE)";
|
|
22
|
+
|
|
23
|
+
// Map encoding names to Node.js buffer encodings
|
|
24
|
+
function getBufferEncoding(encoding?: string): BufferEncoding {
|
|
25
|
+
if (!encoding) return "utf8";
|
|
26
|
+
const map: Record<string, BufferEncoding> = {
|
|
27
|
+
"utf8": "utf8",
|
|
28
|
+
"utf-8": "utf8",
|
|
29
|
+
"gbk": "latin1", // Node doesn't support GBK directly, will handle separately
|
|
30
|
+
"gb2312": "latin1",
|
|
31
|
+
"big5": "latin1",
|
|
32
|
+
"utf16le": "utf16le",
|
|
33
|
+
"utf-16le": "utf16le",
|
|
34
|
+
"utf16be": "utf16le", // Will need to swap bytes
|
|
35
|
+
"utf-16be": "utf16le",
|
|
36
|
+
"latin1": "latin1",
|
|
37
|
+
"ascii": "ascii",
|
|
38
|
+
};
|
|
39
|
+
return map[encoding.toLowerCase()] || "utf8";
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const server = new Server(
|
|
43
|
+
{
|
|
44
|
+
name: "mcp-office",
|
|
45
|
+
version: "1.0.0",
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
capabilities: {
|
|
49
|
+
tools: {},
|
|
50
|
+
},
|
|
51
|
+
}
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
// List available tools
|
|
55
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
56
|
+
return {
|
|
57
|
+
tools: [
|
|
58
|
+
{
|
|
59
|
+
name: "read_docx",
|
|
60
|
+
description: "Read content from a Microsoft Word document (.docx file). Returns the text content of the document.",
|
|
61
|
+
inputSchema: {
|
|
62
|
+
type: "object",
|
|
63
|
+
properties: {
|
|
64
|
+
path: {
|
|
65
|
+
type: "string",
|
|
66
|
+
description: "Absolute path to the .docx file",
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
required: ["path"],
|
|
70
|
+
},
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
name: "read_xlsx",
|
|
74
|
+
description: "Read content from a Microsoft Excel spreadsheet (.xlsx or .xls file). Returns the data as formatted text or JSON.",
|
|
75
|
+
inputSchema: {
|
|
76
|
+
type: "object",
|
|
77
|
+
properties: {
|
|
78
|
+
path: {
|
|
79
|
+
type: "string",
|
|
80
|
+
description: "Absolute path to the Excel file",
|
|
81
|
+
},
|
|
82
|
+
sheet: {
|
|
83
|
+
type: "string",
|
|
84
|
+
description: "Sheet name to read (optional, defaults to first sheet)",
|
|
85
|
+
},
|
|
86
|
+
format: {
|
|
87
|
+
type: "string",
|
|
88
|
+
enum: ["text", "json", "csv"],
|
|
89
|
+
description: "Output format: json (default), text, or csv",
|
|
90
|
+
},
|
|
91
|
+
codepage: {
|
|
92
|
+
type: "number",
|
|
93
|
+
description: CODEPAGE_DESC,
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
required: ["path"],
|
|
97
|
+
},
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
name: "read_pptx",
|
|
101
|
+
description: "Read content from a Microsoft PowerPoint presentation (.pptx file). Returns the text content from all slides.",
|
|
102
|
+
inputSchema: {
|
|
103
|
+
type: "object",
|
|
104
|
+
properties: {
|
|
105
|
+
path: {
|
|
106
|
+
type: "string",
|
|
107
|
+
description: "Absolute path to the .pptx file",
|
|
108
|
+
},
|
|
109
|
+
encoding: {
|
|
110
|
+
type: "string",
|
|
111
|
+
description: ENCODING_DESC,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
required: ["path"],
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: "list_xlsx_sheets",
|
|
119
|
+
description: "List all sheet names in an Excel file",
|
|
120
|
+
inputSchema: {
|
|
121
|
+
type: "object",
|
|
122
|
+
properties: {
|
|
123
|
+
path: {
|
|
124
|
+
type: "string",
|
|
125
|
+
description: "Absolute path to the Excel file",
|
|
126
|
+
},
|
|
127
|
+
codepage: {
|
|
128
|
+
type: "number",
|
|
129
|
+
description: CODEPAGE_DESC,
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
required: ["path"],
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "read_pdf",
|
|
137
|
+
description: "Read text content from a PDF file. Returns the extracted text from all pages.",
|
|
138
|
+
inputSchema: {
|
|
139
|
+
type: "object",
|
|
140
|
+
properties: {
|
|
141
|
+
path: {
|
|
142
|
+
type: "string",
|
|
143
|
+
description: "Absolute path to the PDF file",
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
required: ["path"],
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
],
|
|
150
|
+
};
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
// Handle tool calls
|
|
154
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
155
|
+
const { name, arguments: args } = request.params;
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
switch (name) {
|
|
159
|
+
case "read_docx": {
|
|
160
|
+
const filePath = (args as { path: string }).path;
|
|
161
|
+
if (!fs.existsSync(filePath)) {
|
|
162
|
+
return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
|
|
163
|
+
}
|
|
164
|
+
const buffer = fs.readFileSync(filePath);
|
|
165
|
+
const result = await mammoth.extractRawText({ buffer });
|
|
166
|
+
return { content: [{ type: "text", text: result.value }] };
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
case "read_xlsx": {
|
|
170
|
+
const { path: filePath, sheet, format = "json", codepage = 65001 } = args as { path: string; sheet?: string; format?: string; codepage?: number };
|
|
171
|
+
if (!fs.existsSync(filePath)) {
|
|
172
|
+
return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
|
|
173
|
+
}
|
|
174
|
+
// Read file as buffer to handle encoding properly
|
|
175
|
+
const buffer = fs.readFileSync(filePath);
|
|
176
|
+
const workbook = XLSX.read(buffer, { type: 'buffer', codepage, raw: false });
|
|
177
|
+
const sheetName = sheet || workbook.SheetNames[0];
|
|
178
|
+
const worksheet = workbook.Sheets[sheetName];
|
|
179
|
+
|
|
180
|
+
if (!worksheet) {
|
|
181
|
+
return { content: [{ type: "text", text: `Error: Sheet "${sheetName}" not found. Available sheets: ${workbook.SheetNames.join(", ")}` }] };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
let output: string;
|
|
185
|
+
if (format === "json") {
|
|
186
|
+
const data = XLSX.utils.sheet_to_json(worksheet, { defval: "", raw: false });
|
|
187
|
+
output = JSON.stringify(data, null, 2);
|
|
188
|
+
} else if (format === "csv") {
|
|
189
|
+
output = XLSX.utils.sheet_to_csv(worksheet);
|
|
190
|
+
} else {
|
|
191
|
+
// text format: use csv as fallback
|
|
192
|
+
output = XLSX.utils.sheet_to_csv(worksheet);
|
|
193
|
+
}
|
|
194
|
+
return { content: [{ type: "text", text: `Sheet: ${sheetName}\n\n${output}` }] };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
case "read_pptx": {
|
|
198
|
+
const { path: filePath, encoding = "utf8" } = args as { path: string; encoding?: string };
|
|
199
|
+
if (!fs.existsSync(filePath)) {
|
|
200
|
+
return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
|
|
201
|
+
}
|
|
202
|
+
// PPTX is a ZIP file containing XML
|
|
203
|
+
const AdmZip = (await import("adm-zip")).default;
|
|
204
|
+
const zip = new AdmZip(filePath);
|
|
205
|
+
const entries = zip.getEntries();
|
|
206
|
+
|
|
207
|
+
const slideTexts: string[] = [];
|
|
208
|
+
const slideEntries = entries
|
|
209
|
+
.filter(e => e.entryName.match(/ppt\/slides\/slide\d+\.xml/))
|
|
210
|
+
.sort((a, b) => {
|
|
211
|
+
const numA = parseInt(a.entryName.match(/slide(\d+)/)?.[1] || "0");
|
|
212
|
+
const numB = parseInt(b.entryName.match(/slide(\d+)/)?.[1] || "0");
|
|
213
|
+
return numA - numB;
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
const bufferEncoding = getBufferEncoding(encoding);
|
|
217
|
+
for (const entry of slideEntries) {
|
|
218
|
+
const content = entry.getData().toString(bufferEncoding);
|
|
219
|
+
// Extract text from XML (simple regex approach)
|
|
220
|
+
const texts = content.match(/<a:t>([^<]*)<\/a:t>/g) || [];
|
|
221
|
+
const slideText = texts
|
|
222
|
+
.map(t => t.replace(/<\/?a:t>/g, ""))
|
|
223
|
+
.filter(t => t.trim())
|
|
224
|
+
.join(" ");
|
|
225
|
+
if (slideText) {
|
|
226
|
+
const slideNum = entry.entryName.match(/slide(\d+)/)?.[1];
|
|
227
|
+
slideTexts.push(`--- Slide ${slideNum} ---\n${slideText}`);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return { content: [{ type: "text", text: slideTexts.join("\n\n") || "No text content found in presentation" }] };
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
case "list_xlsx_sheets": {
|
|
235
|
+
const { path: filePath, codepage = 65001 } = args as { path: string; codepage?: number };
|
|
236
|
+
if (!fs.existsSync(filePath)) {
|
|
237
|
+
return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
|
|
238
|
+
}
|
|
239
|
+
const buffer = fs.readFileSync(filePath);
|
|
240
|
+
const workbook = XLSX.read(buffer, { type: 'buffer', codepage });
|
|
241
|
+
return { content: [{ type: "text", text: `Sheets in ${path.basename(filePath)}:\n${workbook.SheetNames.map((n, i) => `${i + 1}. ${n}`).join("\n")}` }] };
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
case "read_pdf": {
|
|
245
|
+
const filePath = (args as { path: string }).path;
|
|
246
|
+
if (!fs.existsSync(filePath)) {
|
|
247
|
+
return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
|
|
248
|
+
}
|
|
249
|
+
const pdfParse = (await import("pdf-parse")).default;
|
|
250
|
+
const buffer = fs.readFileSync(filePath);
|
|
251
|
+
const data = await pdfParse(buffer);
|
|
252
|
+
return { content: [{ type: "text", text: `PDF: ${path.basename(filePath)}\nPages: ${data.numpages}\n\n${data.text}` }] };
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
default:
|
|
256
|
+
return { content: [{ type: "text", text: `Unknown tool: ${name}` }] };
|
|
257
|
+
}
|
|
258
|
+
} catch (error) {
|
|
259
|
+
return { content: [{ type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}` }] };
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
// Start server
|
|
264
|
+
async function main() {
|
|
265
|
+
const transport = new StdioServerTransport();
|
|
266
|
+
await server.connect(transport);
|
|
267
|
+
console.error("MCP Office Server running on stdio");
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
main().catch(console.error);
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"declaration": true,
|
|
12
|
+
"noImplicitAny": false
|
|
13
|
+
},
|
|
14
|
+
"include": ["src/**/*"]
|
|
15
|
+
}
|