@promptx/mcp-office 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "@promptx/mcp-office",
3
+ "version": "2.0.0",
4
+ "description": "MCP server for reading Office documents (docx, xlsx, pptx)",
5
+ "main": "./dist/index.js",
6
+ "bin": {
7
+ "mcp-office": "./dist/index.js"
8
+ },
9
+ "dependencies": {
10
+ "@modelcontextprotocol/sdk": "^1.0.0",
11
+ "mammoth": "^1.8.0",
12
+ "pdf-parse": "^1.1.1",
13
+ "pptx-parser": "^1.0.3",
14
+ "xlsx": "^0.18.5"
15
+ },
16
+ "devDependencies": {
17
+ "@types/node": "^20.0.0",
18
+ "esbuild": "^0.24.2",
19
+ "typescript": "^5.0.0"
20
+ },
21
+ "scripts": {
22
+ "build": "node build.mjs",
23
+ "dev": "tsc --watch"
24
+ }
25
+ }
package/src/index.ts ADDED
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * MCP Office Server
4
+ *
5
+ * Provides tools to read Office documents (docx, xlsx, pptx, pdf)
6
+ */
7
+
8
+ import { Server } from "@modelcontextprotocol/sdk/server/index.js";
9
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
10
+ import {
11
+ CallToolRequestSchema,
12
+ ListToolsRequestSchema,
13
+ } from "@modelcontextprotocol/sdk/types.js";
14
+ import * as fs from "node:fs";
15
+ import * as path from "node:path";
16
+ import mammoth from "mammoth";
17
+ import XLSX from "xlsx";
18
+
19
+ // Encoding description for tool schemas
20
+ const ENCODING_DESC = "Character encoding (optional). Common values: utf8 (default), gbk (Chinese Simplified), big5 (Chinese Traditional), utf16le, utf16be, latin1";
21
+ const CODEPAGE_DESC = "Character encoding codepage (optional). Common values: 65001 (UTF-8, default), 936 (GBK/Chinese Simplified), 950 (Big5/Chinese Traditional), 1200 (UTF-16LE), 1201 (UTF-16BE)";
22
+
23
+ // Map encoding names to Node.js buffer encodings
24
+ function getBufferEncoding(encoding?: string): BufferEncoding {
25
+ if (!encoding) return "utf8";
26
+ const map: Record<string, BufferEncoding> = {
27
+ "utf8": "utf8",
28
+ "utf-8": "utf8",
29
+ "gbk": "latin1", // Node doesn't support GBK directly, will handle separately
30
+ "gb2312": "latin1",
31
+ "big5": "latin1",
32
+ "utf16le": "utf16le",
33
+ "utf-16le": "utf16le",
34
+ "utf16be": "utf16le", // Will need to swap bytes
35
+ "utf-16be": "utf16le",
36
+ "latin1": "latin1",
37
+ "ascii": "ascii",
38
+ };
39
+ return map[encoding.toLowerCase()] || "utf8";
40
+ }
41
+
42
+ const server = new Server(
43
+ {
44
+ name: "mcp-office",
45
+ version: "1.0.0",
46
+ },
47
+ {
48
+ capabilities: {
49
+ tools: {},
50
+ },
51
+ }
52
+ );
53
+
54
+ // List available tools
55
+ server.setRequestHandler(ListToolsRequestSchema, async () => {
56
+ return {
57
+ tools: [
58
+ {
59
+ name: "read_docx",
60
+ description: "Read content from a Microsoft Word document (.docx file). Returns the text content of the document.",
61
+ inputSchema: {
62
+ type: "object",
63
+ properties: {
64
+ path: {
65
+ type: "string",
66
+ description: "Absolute path to the .docx file",
67
+ },
68
+ },
69
+ required: ["path"],
70
+ },
71
+ },
72
+ {
73
+ name: "read_xlsx",
74
+ description: "Read content from a Microsoft Excel spreadsheet (.xlsx or .xls file). Returns the data as formatted text or JSON.",
75
+ inputSchema: {
76
+ type: "object",
77
+ properties: {
78
+ path: {
79
+ type: "string",
80
+ description: "Absolute path to the Excel file",
81
+ },
82
+ sheet: {
83
+ type: "string",
84
+ description: "Sheet name to read (optional, defaults to first sheet)",
85
+ },
86
+ format: {
87
+ type: "string",
88
+ enum: ["text", "json", "csv"],
89
+ description: "Output format: json (default), text, or csv",
90
+ },
91
+ codepage: {
92
+ type: "number",
93
+ description: CODEPAGE_DESC,
94
+ },
95
+ },
96
+ required: ["path"],
97
+ },
98
+ },
99
+ {
100
+ name: "read_pptx",
101
+ description: "Read content from a Microsoft PowerPoint presentation (.pptx file). Returns the text content from all slides.",
102
+ inputSchema: {
103
+ type: "object",
104
+ properties: {
105
+ path: {
106
+ type: "string",
107
+ description: "Absolute path to the .pptx file",
108
+ },
109
+ encoding: {
110
+ type: "string",
111
+ description: ENCODING_DESC,
112
+ },
113
+ },
114
+ required: ["path"],
115
+ },
116
+ },
117
+ {
118
+ name: "list_xlsx_sheets",
119
+ description: "List all sheet names in an Excel file",
120
+ inputSchema: {
121
+ type: "object",
122
+ properties: {
123
+ path: {
124
+ type: "string",
125
+ description: "Absolute path to the Excel file",
126
+ },
127
+ codepage: {
128
+ type: "number",
129
+ description: CODEPAGE_DESC,
130
+ },
131
+ },
132
+ required: ["path"],
133
+ },
134
+ },
135
+ {
136
+ name: "read_pdf",
137
+ description: "Read text content from a PDF file. Returns the extracted text from all pages.",
138
+ inputSchema: {
139
+ type: "object",
140
+ properties: {
141
+ path: {
142
+ type: "string",
143
+ description: "Absolute path to the PDF file",
144
+ },
145
+ },
146
+ required: ["path"],
147
+ },
148
+ },
149
+ ],
150
+ };
151
+ });
152
+
153
+ // Handle tool calls
154
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
155
+ const { name, arguments: args } = request.params;
156
+
157
+ try {
158
+ switch (name) {
159
+ case "read_docx": {
160
+ const filePath = (args as { path: string }).path;
161
+ if (!fs.existsSync(filePath)) {
162
+ return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
163
+ }
164
+ const buffer = fs.readFileSync(filePath);
165
+ const result = await mammoth.extractRawText({ buffer });
166
+ return { content: [{ type: "text", text: result.value }] };
167
+ }
168
+
169
+ case "read_xlsx": {
170
+ const { path: filePath, sheet, format = "json", codepage = 65001 } = args as { path: string; sheet?: string; format?: string; codepage?: number };
171
+ if (!fs.existsSync(filePath)) {
172
+ return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
173
+ }
174
+ // Read file as buffer to handle encoding properly
175
+ const buffer = fs.readFileSync(filePath);
176
+ const workbook = XLSX.read(buffer, { type: 'buffer', codepage, raw: false });
177
+ const sheetName = sheet || workbook.SheetNames[0];
178
+ const worksheet = workbook.Sheets[sheetName];
179
+
180
+ if (!worksheet) {
181
+ return { content: [{ type: "text", text: `Error: Sheet "${sheetName}" not found. Available sheets: ${workbook.SheetNames.join(", ")}` }] };
182
+ }
183
+
184
+ let output: string;
185
+ if (format === "json") {
186
+ const data = XLSX.utils.sheet_to_json(worksheet, { defval: "", raw: false });
187
+ output = JSON.stringify(data, null, 2);
188
+ } else if (format === "csv") {
189
+ output = XLSX.utils.sheet_to_csv(worksheet);
190
+ } else {
191
+ // text format: use csv as fallback
192
+ output = XLSX.utils.sheet_to_csv(worksheet);
193
+ }
194
+ return { content: [{ type: "text", text: `Sheet: ${sheetName}\n\n${output}` }] };
195
+ }
196
+
197
+ case "read_pptx": {
198
+ const { path: filePath, encoding = "utf8" } = args as { path: string; encoding?: string };
199
+ if (!fs.existsSync(filePath)) {
200
+ return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
201
+ }
202
+ // PPTX is a ZIP file containing XML
203
+ const AdmZip = (await import("adm-zip")).default;
204
+ const zip = new AdmZip(filePath);
205
+ const entries = zip.getEntries();
206
+
207
+ const slideTexts: string[] = [];
208
+ const slideEntries = entries
209
+ .filter(e => e.entryName.match(/ppt\/slides\/slide\d+\.xml/))
210
+ .sort((a, b) => {
211
+ const numA = parseInt(a.entryName.match(/slide(\d+)/)?.[1] || "0");
212
+ const numB = parseInt(b.entryName.match(/slide(\d+)/)?.[1] || "0");
213
+ return numA - numB;
214
+ });
215
+
216
+ const bufferEncoding = getBufferEncoding(encoding);
217
+ for (const entry of slideEntries) {
218
+ const content = entry.getData().toString(bufferEncoding);
219
+ // Extract text from XML (simple regex approach)
220
+ const texts = content.match(/<a:t>([^<]*)<\/a:t>/g) || [];
221
+ const slideText = texts
222
+ .map(t => t.replace(/<\/?a:t>/g, ""))
223
+ .filter(t => t.trim())
224
+ .join(" ");
225
+ if (slideText) {
226
+ const slideNum = entry.entryName.match(/slide(\d+)/)?.[1];
227
+ slideTexts.push(`--- Slide ${slideNum} ---\n${slideText}`);
228
+ }
229
+ }
230
+
231
+ return { content: [{ type: "text", text: slideTexts.join("\n\n") || "No text content found in presentation" }] };
232
+ }
233
+
234
+ case "list_xlsx_sheets": {
235
+ const { path: filePath, codepage = 65001 } = args as { path: string; codepage?: number };
236
+ if (!fs.existsSync(filePath)) {
237
+ return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
238
+ }
239
+ const buffer = fs.readFileSync(filePath);
240
+ const workbook = XLSX.read(buffer, { type: 'buffer', codepage });
241
+ return { content: [{ type: "text", text: `Sheets in ${path.basename(filePath)}:\n${workbook.SheetNames.map((n, i) => `${i + 1}. ${n}`).join("\n")}` }] };
242
+ }
243
+
244
+ case "read_pdf": {
245
+ const filePath = (args as { path: string }).path;
246
+ if (!fs.existsSync(filePath)) {
247
+ return { content: [{ type: "text", text: `Error: File not found: ${filePath}` }] };
248
+ }
249
+ const pdfParse = (await import("pdf-parse")).default;
250
+ const buffer = fs.readFileSync(filePath);
251
+ const data = await pdfParse(buffer);
252
+ return { content: [{ type: "text", text: `PDF: ${path.basename(filePath)}\nPages: ${data.numpages}\n\n${data.text}` }] };
253
+ }
254
+
255
+ default:
256
+ return { content: [{ type: "text", text: `Unknown tool: ${name}` }] };
257
+ }
258
+ } catch (error) {
259
+ return { content: [{ type: "text", text: `Error: ${error instanceof Error ? error.message : String(error)}` }] };
260
+ }
261
+ });
262
+
263
+ // Start server
264
+ async function main() {
265
+ const transport = new StdioServerTransport();
266
+ await server.connect(transport);
267
+ console.error("MCP Office Server running on stdio");
268
+ }
269
+
270
+ main().catch(console.error);
package/tsconfig.json ADDED
@@ -0,0 +1,15 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "NodeNext",
5
+ "moduleResolution": "NodeNext",
6
+ "outDir": "./dist",
7
+ "rootDir": "./src",
8
+ "strict": true,
9
+ "esModuleInterop": true,
10
+ "skipLibCheck": true,
11
+ "declaration": true,
12
+ "noImplicitAny": false
13
+ },
14
+ "include": ["src/**/*"]
15
+ }