docsmith-mcp 0.0.1-beta.1 → 0.0.1-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +750 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +1 -0
- package/dist/index.js +150 -103
- package/dist/index.js.map +1 -1
- package/package.json +24 -2
- package/python/pptx_handler.py +169 -0
- package/scripts/preload-packages.mjs +2 -0
- package/.github/workflows/test.yml +0 -35
- package/dist/python/excel_handler.py +0 -97
- package/dist/python/pdf_handler.py +0 -81
- package/dist/python/text_handler.py +0 -331
- package/dist/python/word_handler.py +0 -98
- package/examples/sample_data.csv +0 -6
- package/examples/sample_data.json +0 -9
- package/examples/sample_document.pdf +0 -80
- package/examples/sample_report.docx +0 -0
- package/examples/sample_sales_data.xlsx +0 -0
- package/examples/sample_text.txt +0 -10
- package/src/code-runner.ts +0 -136
- package/src/index.ts +0 -496
- package/src/utils.ts +0 -45
- package/tests/document-processing.test.ts +0 -230
- package/tsconfig.json +0 -20
- package/tsdown.config.ts +0 -21
- package/vitest.config.ts +0 -15
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
//#region rolldown:runtime
|
|
4
|
+
var __create = Object.create;
|
|
5
|
+
var __defProp = Object.defineProperty;
|
|
6
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
7
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
8
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
9
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") for (var keys = __getOwnPropNames(from), i = 0, n = keys.length, key; i < n; i++) {
|
|
12
|
+
key = keys[i];
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, {
|
|
14
|
+
get: ((k) => from[k]).bind(null, key),
|
|
15
|
+
enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", {
|
|
21
|
+
value: mod,
|
|
22
|
+
enumerable: true
|
|
23
|
+
}) : target, mod));
|
|
24
|
+
|
|
25
|
+
//#endregion
|
|
26
|
+
const __modelcontextprotocol_sdk_server_index_js = __toESM(require("@modelcontextprotocol/sdk/server/index.js"));
|
|
27
|
+
const __modelcontextprotocol_sdk_server_stdio_js = __toESM(require("@modelcontextprotocol/sdk/server/stdio.js"));
|
|
28
|
+
const __modelcontextprotocol_sdk_types_js = __toESM(require("@modelcontextprotocol/sdk/types.js"));
|
|
29
|
+
const zod = __toESM(require("zod"));
|
|
30
|
+
const __mcpc_tech_code_runner_mcp = __toESM(require("@mcpc-tech/code-runner-mcp"));
|
|
31
|
+
const fs = __toESM(require("fs"));
|
|
32
|
+
const url = __toESM(require("url"));
|
|
33
|
+
const path = __toESM(require("path"));
|
|
34
|
+
|
|
35
|
+
//#region src/code-runner.ts
|
|
36
|
+
const __filename$1 = (0, url.fileURLToPath)(require("url").pathToFileURL(__filename).href);
|
|
37
|
+
const __dirname$1 = (0, path.dirname)(__filename$1);
|
|
38
|
+
/**
|
|
39
|
+
* Convert absolute file path to Pyodide virtual path
|
|
40
|
+
* Determines the mount root and converts the path accordingly
|
|
41
|
+
*
|
|
42
|
+
* @param filePath - Absolute path to the file
|
|
43
|
+
* @returns Object with mountRoot (host path) and virtualPath (Pyodide path)
|
|
44
|
+
*/
|
|
45
|
+
function getFileSystemMapping(filePath) {
|
|
46
|
+
const absolutePath = (0, path.resolve)(filePath);
|
|
47
|
+
const mountRoot = (0, path.dirname)(absolutePath);
|
|
48
|
+
const virtualPath = absolutePath;
|
|
49
|
+
return {
|
|
50
|
+
mountRoot,
|
|
51
|
+
virtualPath
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Run a Python script file using code-runner-mcp
|
|
56
|
+
*
|
|
57
|
+
* @param scriptPath - Path to the Python script (relative to baseDir)
|
|
58
|
+
* @param options - Execution options
|
|
59
|
+
* @returns The execution result
|
|
60
|
+
*/
|
|
61
|
+
async function runPythonFile(scriptPath, options = {}) {
|
|
62
|
+
const { args = [], packages = {}, baseDir = "python", filePaths = [] } = options;
|
|
63
|
+
const fullPath = (0, path.join)(__dirname$1, "..", baseDir, scriptPath);
|
|
64
|
+
const scriptContent = (0, fs.readFileSync)(fullPath, "utf-8");
|
|
65
|
+
const wrapperCode = `
|
|
66
|
+
import sys
|
|
67
|
+
import json
|
|
68
|
+
|
|
69
|
+
# Set command line arguments
|
|
70
|
+
sys.argv = ['${scriptPath}'] + ${JSON.stringify(args)}
|
|
71
|
+
|
|
72
|
+
# Execute the script
|
|
73
|
+
${scriptContent}
|
|
74
|
+
`;
|
|
75
|
+
let mountRoot = (0, path.join)(__dirname$1, "..");
|
|
76
|
+
if (filePaths.length > 0) {
|
|
77
|
+
const mapping = getFileSystemMapping(filePaths[0]);
|
|
78
|
+
mountRoot = mapping.mountRoot;
|
|
79
|
+
}
|
|
80
|
+
const runPyOptions = {
|
|
81
|
+
packages,
|
|
82
|
+
nodeFSMountPoint: mountRoot,
|
|
83
|
+
nodeFSRoot: mountRoot
|
|
84
|
+
};
|
|
85
|
+
const stream = await (0, __mcpc_tech_code_runner_mcp.runPy)(wrapperCode, runPyOptions);
|
|
86
|
+
const reader = stream.getReader();
|
|
87
|
+
const decoder = new TextDecoder();
|
|
88
|
+
let stdout = "";
|
|
89
|
+
let stderr = "";
|
|
90
|
+
let error = "";
|
|
91
|
+
try {
|
|
92
|
+
while (true) {
|
|
93
|
+
const { done, value } = await reader.read();
|
|
94
|
+
if (done) break;
|
|
95
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
96
|
+
if (chunk.startsWith("[stderr] ")) stderr += chunk.slice(9);
|
|
97
|
+
else if (chunk.startsWith("[err]")) error += chunk;
|
|
98
|
+
else stdout += chunk;
|
|
99
|
+
}
|
|
100
|
+
} catch (streamError) {
|
|
101
|
+
return { error: String(streamError) };
|
|
102
|
+
}
|
|
103
|
+
if (error) return { error: error.replace(/\[err\]\[py\]\s*/g, "").trim() };
|
|
104
|
+
const lines = stdout.trim().split("\n");
|
|
105
|
+
const lastLine = lines[lines.length - 1];
|
|
106
|
+
try {
|
|
107
|
+
return JSON.parse(lastLine);
|
|
108
|
+
} catch {
|
|
109
|
+
return {
|
|
110
|
+
stdout,
|
|
111
|
+
stderr
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
//#endregion
|
|
117
|
+
//#region src/utils.ts
|
|
118
|
+
/**
|
|
119
|
+
* Utility functions for document processing
|
|
120
|
+
*/
|
|
121
|
+
/**
|
|
122
|
+
* Detect file type from file extension
|
|
123
|
+
*/
|
|
124
|
+
function detectFileType(filePath) {
|
|
125
|
+
const ext = filePath.toLowerCase().split(".").pop();
|
|
126
|
+
if (ext === "xlsx" || ext === "xls") return "excel";
|
|
127
|
+
if (ext === "docx") return "word";
|
|
128
|
+
if (ext === "pptx" || ext === "ppt") return "pptx";
|
|
129
|
+
if (ext === "pdf") return "pdf";
|
|
130
|
+
if (ext === "txt" || ext === "csv" || ext === "md" || ext === "json" || ext === "yaml" || ext === "yml") return "text";
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Get required packages for each file type
|
|
135
|
+
*/
|
|
136
|
+
function getPackages(fileType) {
|
|
137
|
+
const packages = {
|
|
138
|
+
excel: { openpyxl: "openpyxl" },
|
|
139
|
+
word: { docx: "python-docx" },
|
|
140
|
+
pptx: { pptx: "python-pptx" },
|
|
141
|
+
pdf: { PyPDF2: "PyPDF2" },
|
|
142
|
+
text: {}
|
|
143
|
+
};
|
|
144
|
+
return packages[fileType] || {};
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Get environment configuration
|
|
148
|
+
*/
|
|
149
|
+
function getConfig() {
|
|
150
|
+
return {
|
|
151
|
+
rawFullRead: process.env.DOC_RAW_FULL_READ === "true",
|
|
152
|
+
pageSize: parseInt(process.env.DOC_PAGE_SIZE || "100", 10),
|
|
153
|
+
maxFileSize: parseInt(process.env.DOC_MAX_FILE_SIZE || "50", 10) * 1024 * 1024
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
//#endregion
|
|
158
|
+
//#region src/index.ts
|
|
159
|
+
const ReadDocumentSchema = zod.z.object({
|
|
160
|
+
file_path: zod.z.string().describe("Absolute path to the document file"),
|
|
161
|
+
file_type: zod.z.enum([
|
|
162
|
+
"excel",
|
|
163
|
+
"word",
|
|
164
|
+
"pptx",
|
|
165
|
+
"pdf",
|
|
166
|
+
"text"
|
|
167
|
+
]).optional().describe("Override file type detection. Use this to explicitly specify the format instead of relying on file extension"),
|
|
168
|
+
mode: zod.z.enum(["raw", "paginated"]).optional().describe("Read mode: 'raw' for full content, 'paginated' for chunked reading"),
|
|
169
|
+
page: zod.z.number().optional().describe("Page number for paginated mode (1-based)"),
|
|
170
|
+
page_size: zod.z.number().optional().describe("Items per page for paginated mode"),
|
|
171
|
+
sheet_name: zod.z.string().optional().describe("Sheet name for Excel files")
|
|
172
|
+
});
|
|
173
|
+
const WriteDocumentSchema = zod.z.object({
|
|
174
|
+
file_path: zod.z.string().describe("Absolute path to save the document"),
|
|
175
|
+
format: zod.z.enum([
|
|
176
|
+
"excel",
|
|
177
|
+
"word",
|
|
178
|
+
"pptx",
|
|
179
|
+
"text"
|
|
180
|
+
]).describe("Document format"),
|
|
181
|
+
data: zod.z.any().describe("Document data structure")
|
|
182
|
+
});
|
|
183
|
+
const GetDocumentInfoSchema = zod.z.object({
|
|
184
|
+
file_path: zod.z.string().describe("Absolute path to the document file"),
|
|
185
|
+
file_type: zod.z.enum([
|
|
186
|
+
"excel",
|
|
187
|
+
"word",
|
|
188
|
+
"pptx",
|
|
189
|
+
"pdf",
|
|
190
|
+
"text"
|
|
191
|
+
]).optional().describe("Override file type detection. Use this to explicitly specify the format instead of relying on file extension")
|
|
192
|
+
});
|
|
193
|
+
const server = new __modelcontextprotocol_sdk_server_index_js.Server({
|
|
194
|
+
name: "docsmith-mcp",
|
|
195
|
+
version: "0.1.0"
|
|
196
|
+
}, { capabilities: { tools: {} } });
|
|
197
|
+
const BaseOutputSchema = {
|
|
198
|
+
type: "object",
|
|
199
|
+
properties: {
|
|
200
|
+
success: {
|
|
201
|
+
type: "boolean",
|
|
202
|
+
description: "Operation success status"
|
|
203
|
+
},
|
|
204
|
+
error: {
|
|
205
|
+
type: "string",
|
|
206
|
+
description: "Error message if failed"
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
const PaginationSchema = {
|
|
211
|
+
current_page: {
|
|
212
|
+
type: "number",
|
|
213
|
+
description: "Current page number"
|
|
214
|
+
},
|
|
215
|
+
page_size: {
|
|
216
|
+
type: "number",
|
|
217
|
+
description: "Items per page"
|
|
218
|
+
},
|
|
219
|
+
total_pages: {
|
|
220
|
+
type: "number",
|
|
221
|
+
description: "Total number of pages"
|
|
222
|
+
},
|
|
223
|
+
page: {
|
|
224
|
+
type: "number",
|
|
225
|
+
description: "Current page number (alternative)"
|
|
226
|
+
},
|
|
227
|
+
has_more: {
|
|
228
|
+
type: "boolean",
|
|
229
|
+
description: "Whether more pages exist"
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
const ExcelReadOutputSchema = {
|
|
233
|
+
type: "object",
|
|
234
|
+
properties: {
|
|
235
|
+
sheet_name: {
|
|
236
|
+
type: "string",
|
|
237
|
+
description: "Active sheet name"
|
|
238
|
+
},
|
|
239
|
+
sheets: {
|
|
240
|
+
type: "array",
|
|
241
|
+
items: { type: "string" },
|
|
242
|
+
description: "All sheet names"
|
|
243
|
+
},
|
|
244
|
+
total_rows: {
|
|
245
|
+
type: "number",
|
|
246
|
+
description: "Total rows in sheet"
|
|
247
|
+
},
|
|
248
|
+
total_cols: {
|
|
249
|
+
type: "number",
|
|
250
|
+
description: "Total columns in sheet"
|
|
251
|
+
},
|
|
252
|
+
data: {
|
|
253
|
+
type: "array",
|
|
254
|
+
items: {
|
|
255
|
+
type: "array",
|
|
256
|
+
items: {}
|
|
257
|
+
},
|
|
258
|
+
description: "Sheet data as array of rows"
|
|
259
|
+
},
|
|
260
|
+
...PaginationSchema
|
|
261
|
+
}
|
|
262
|
+
};
|
|
263
|
+
const WordReadOutputSchema = {
|
|
264
|
+
type: "object",
|
|
265
|
+
properties: {
|
|
266
|
+
paragraphs: {
|
|
267
|
+
type: "array",
|
|
268
|
+
items: { type: "string" },
|
|
269
|
+
description: "Document paragraphs"
|
|
270
|
+
},
|
|
271
|
+
tables: {
|
|
272
|
+
type: "array",
|
|
273
|
+
items: {
|
|
274
|
+
type: "array",
|
|
275
|
+
items: {
|
|
276
|
+
type: "array",
|
|
277
|
+
items: { type: "string" }
|
|
278
|
+
}
|
|
279
|
+
},
|
|
280
|
+
description: "Tables data"
|
|
281
|
+
},
|
|
282
|
+
total_paragraphs: {
|
|
283
|
+
type: "number",
|
|
284
|
+
description: "Total paragraph count"
|
|
285
|
+
},
|
|
286
|
+
total_tables: {
|
|
287
|
+
type: "number",
|
|
288
|
+
description: "Total table count"
|
|
289
|
+
},
|
|
290
|
+
...PaginationSchema
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
const TextReadOutputSchema = {
|
|
294
|
+
type: "object",
|
|
295
|
+
properties: {
|
|
296
|
+
...BaseOutputSchema.properties,
|
|
297
|
+
content: {
|
|
298
|
+
type: "string",
|
|
299
|
+
description: "Text content"
|
|
300
|
+
},
|
|
301
|
+
total_lines: {
|
|
302
|
+
type: "number",
|
|
303
|
+
description: "Total line count"
|
|
304
|
+
},
|
|
305
|
+
encoding: {
|
|
306
|
+
type: "string",
|
|
307
|
+
description: "File encoding"
|
|
308
|
+
},
|
|
309
|
+
...PaginationSchema
|
|
310
|
+
}
|
|
311
|
+
};
|
|
312
|
+
const CSVReadOutputSchema = {
|
|
313
|
+
type: "object",
|
|
314
|
+
properties: {
|
|
315
|
+
...BaseOutputSchema.properties,
|
|
316
|
+
headers: {
|
|
317
|
+
type: "array",
|
|
318
|
+
items: { type: "string" },
|
|
319
|
+
description: "CSV headers"
|
|
320
|
+
},
|
|
321
|
+
data: {
|
|
322
|
+
type: "array",
|
|
323
|
+
items: { type: "object" },
|
|
324
|
+
description: "Structured data as array of objects"
|
|
325
|
+
},
|
|
326
|
+
total_rows: {
|
|
327
|
+
type: "number",
|
|
328
|
+
description: "Total data rows"
|
|
329
|
+
},
|
|
330
|
+
encoding: {
|
|
331
|
+
type: "string",
|
|
332
|
+
description: "File encoding"
|
|
333
|
+
},
|
|
334
|
+
...PaginationSchema
|
|
335
|
+
}
|
|
336
|
+
};
|
|
337
|
+
const JSONReadOutputSchema = {
|
|
338
|
+
type: "object",
|
|
339
|
+
properties: {
|
|
340
|
+
...BaseOutputSchema.properties,
|
|
341
|
+
data: {
|
|
342
|
+
type: "object",
|
|
343
|
+
description: "Parsed JSON data"
|
|
344
|
+
},
|
|
345
|
+
encoding: {
|
|
346
|
+
type: "string",
|
|
347
|
+
description: "File encoding"
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
};
|
|
351
|
+
const WriteOutputSchema = {
|
|
352
|
+
type: "object",
|
|
353
|
+
properties: {
|
|
354
|
+
success: {
|
|
355
|
+
type: "boolean",
|
|
356
|
+
description: "Write operation success"
|
|
357
|
+
},
|
|
358
|
+
file_path: {
|
|
359
|
+
type: "string",
|
|
360
|
+
description: "Written file path"
|
|
361
|
+
},
|
|
362
|
+
message: {
|
|
363
|
+
type: "string",
|
|
364
|
+
description: "Success message"
|
|
365
|
+
},
|
|
366
|
+
error: {
|
|
367
|
+
type: "string",
|
|
368
|
+
description: "Error message if failed"
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
};
|
|
372
|
+
const TextInfoOutputSchema = {
|
|
373
|
+
type: "object",
|
|
374
|
+
properties: {
|
|
375
|
+
...BaseOutputSchema.properties,
|
|
376
|
+
file_size: {
|
|
377
|
+
type: "number",
|
|
378
|
+
description: "File size in bytes"
|
|
379
|
+
},
|
|
380
|
+
line_count: {
|
|
381
|
+
type: "number",
|
|
382
|
+
description: "Line count"
|
|
383
|
+
},
|
|
384
|
+
encoding: {
|
|
385
|
+
type: "string",
|
|
386
|
+
description: "File encoding"
|
|
387
|
+
},
|
|
388
|
+
file_type: {
|
|
389
|
+
type: "string",
|
|
390
|
+
description: "File extension"
|
|
391
|
+
},
|
|
392
|
+
headers: {
|
|
393
|
+
type: "array",
|
|
394
|
+
items: { type: "string" }
|
|
395
|
+
},
|
|
396
|
+
total_rows: { type: "number" },
|
|
397
|
+
total_cols: { type: "number" },
|
|
398
|
+
item_count: { type: "number" },
|
|
399
|
+
key_count: { type: "number" }
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
server.setRequestHandler(__modelcontextprotocol_sdk_types_js.ListToolsRequestSchema, async () => {
|
|
403
|
+
return { tools: [
|
|
404
|
+
{
|
|
405
|
+
name: "read_document",
|
|
406
|
+
description: "Read document content (Excel, Word, PowerPoint, PDF, TXT, CSV, Markdown, JSON, YAML). Supports raw full read or paginated mode.",
|
|
407
|
+
inputSchema: {
|
|
408
|
+
type: "object",
|
|
409
|
+
properties: {
|
|
410
|
+
file_path: {
|
|
411
|
+
type: "string",
|
|
412
|
+
description: "Absolute path to the document file"
|
|
413
|
+
},
|
|
414
|
+
file_type: {
|
|
415
|
+
type: "string",
|
|
416
|
+
enum: [
|
|
417
|
+
"excel",
|
|
418
|
+
"word",
|
|
419
|
+
"pptx",
|
|
420
|
+
"pdf",
|
|
421
|
+
"text"
|
|
422
|
+
],
|
|
423
|
+
description: "Override file type detection (optional). Specify format explicitly instead of relying on extension"
|
|
424
|
+
},
|
|
425
|
+
mode: {
|
|
426
|
+
type: "string",
|
|
427
|
+
enum: ["raw", "paginated"],
|
|
428
|
+
description: "Read mode"
|
|
429
|
+
},
|
|
430
|
+
page: {
|
|
431
|
+
type: "number",
|
|
432
|
+
description: "Page number for paginated mode"
|
|
433
|
+
},
|
|
434
|
+
page_size: {
|
|
435
|
+
type: "number",
|
|
436
|
+
description: "Items per page"
|
|
437
|
+
},
|
|
438
|
+
sheet_name: {
|
|
439
|
+
type: "string",
|
|
440
|
+
description: "Sheet name for Excel files"
|
|
441
|
+
}
|
|
442
|
+
},
|
|
443
|
+
required: ["file_path"]
|
|
444
|
+
},
|
|
445
|
+
outputSchema: {
|
|
446
|
+
type: "object",
|
|
447
|
+
description: "Document content with format-specific structure. Common fields: success (boolean), error (string, on failure).",
|
|
448
|
+
properties: {
|
|
449
|
+
success: { type: "boolean" },
|
|
450
|
+
error: { type: "string" },
|
|
451
|
+
encoding: { type: "string" },
|
|
452
|
+
sheet_name: { type: "string" },
|
|
453
|
+
sheets: {
|
|
454
|
+
type: "array",
|
|
455
|
+
items: { type: "string" }
|
|
456
|
+
},
|
|
457
|
+
total_rows: { type: "number" },
|
|
458
|
+
total_cols: { type: "number" },
|
|
459
|
+
current_page: { type: ["number", "null"] },
|
|
460
|
+
total_pages: { type: "number" },
|
|
461
|
+
paragraphs: { type: "array" },
|
|
462
|
+
tables: { type: "array" },
|
|
463
|
+
total_paragraphs: { type: "number" },
|
|
464
|
+
total_tables: { type: "number" },
|
|
465
|
+
total_slides: { type: "number" },
|
|
466
|
+
slides: {
|
|
467
|
+
type: "array",
|
|
468
|
+
items: {
|
|
469
|
+
type: "object",
|
|
470
|
+
properties: {
|
|
471
|
+
slide_number: { type: "number" },
|
|
472
|
+
title: { type: "string" },
|
|
473
|
+
content: { type: "array" },
|
|
474
|
+
notes: { type: "string" }
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
},
|
|
478
|
+
current_page_group: { type: ["number", "null"] },
|
|
479
|
+
total_page_groups: { type: "number" },
|
|
480
|
+
content: {},
|
|
481
|
+
total_lines: { type: "number" },
|
|
482
|
+
headers: {
|
|
483
|
+
type: "array",
|
|
484
|
+
items: { type: "string" }
|
|
485
|
+
},
|
|
486
|
+
data: {},
|
|
487
|
+
page: { type: "number" },
|
|
488
|
+
page_size: { type: ["number", "null"] },
|
|
489
|
+
has_more: { type: "boolean" }
|
|
490
|
+
},
|
|
491
|
+
additionalProperties: false
|
|
492
|
+
}
|
|
493
|
+
},
|
|
494
|
+
{
|
|
495
|
+
name: "write_document",
|
|
496
|
+
description: "Write document content (Excel, Word, PowerPoint, Text)",
|
|
497
|
+
inputSchema: {
|
|
498
|
+
type: "object",
|
|
499
|
+
properties: {
|
|
500
|
+
file_path: {
|
|
501
|
+
type: "string",
|
|
502
|
+
description: "Absolute path to save the document"
|
|
503
|
+
},
|
|
504
|
+
format: {
|
|
505
|
+
type: "string",
|
|
506
|
+
enum: [
|
|
507
|
+
"excel",
|
|
508
|
+
"word",
|
|
509
|
+
"pptx",
|
|
510
|
+
"text"
|
|
511
|
+
],
|
|
512
|
+
description: "Document format"
|
|
513
|
+
},
|
|
514
|
+
data: { description: "Document data structure. Excel: array of rows [[cell1, cell2], ...]. Word: {paragraphs: string[], tables?: [[[cell]]]}. Text/CSV/JSON: string or object" }
|
|
515
|
+
},
|
|
516
|
+
required: [
|
|
517
|
+
"file_path",
|
|
518
|
+
"format",
|
|
519
|
+
"data"
|
|
520
|
+
]
|
|
521
|
+
},
|
|
522
|
+
outputSchema: WriteOutputSchema
|
|
523
|
+
},
|
|
524
|
+
{
|
|
525
|
+
name: "get_document_info",
|
|
526
|
+
description: "Get document metadata (page count, sheet count, slide count, file size, etc.)",
|
|
527
|
+
inputSchema: {
|
|
528
|
+
type: "object",
|
|
529
|
+
properties: {
|
|
530
|
+
file_path: {
|
|
531
|
+
type: "string",
|
|
532
|
+
description: "Absolute path to the document file"
|
|
533
|
+
},
|
|
534
|
+
file_type: {
|
|
535
|
+
type: "string",
|
|
536
|
+
enum: [
|
|
537
|
+
"excel",
|
|
538
|
+
"word",
|
|
539
|
+
"pptx",
|
|
540
|
+
"pdf",
|
|
541
|
+
"text"
|
|
542
|
+
],
|
|
543
|
+
description: "Override file type detection (optional). Specify format explicitly instead of relying on extension"
|
|
544
|
+
}
|
|
545
|
+
},
|
|
546
|
+
required: ["file_path"]
|
|
547
|
+
},
|
|
548
|
+
outputSchema: {
|
|
549
|
+
type: "object",
|
|
550
|
+
description: "Document metadata with format-specific fields",
|
|
551
|
+
properties: {
|
|
552
|
+
success: { type: "boolean" },
|
|
553
|
+
error: { type: "string" },
|
|
554
|
+
file_size: { type: "number" },
|
|
555
|
+
sheets: {
|
|
556
|
+
type: "array",
|
|
557
|
+
items: {
|
|
558
|
+
type: "object",
|
|
559
|
+
properties: {
|
|
560
|
+
name: { type: "string" },
|
|
561
|
+
rows: { type: "number" },
|
|
562
|
+
cols: { type: "number" }
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
},
|
|
566
|
+
paragraphs: { type: "number" },
|
|
567
|
+
tables: { type: "number" },
|
|
568
|
+
slides: { type: "number" },
|
|
569
|
+
pages: { type: "number" },
|
|
570
|
+
total_words: { type: "number" },
|
|
571
|
+
metadata: { type: "object" },
|
|
572
|
+
line_count: { type: "number" },
|
|
573
|
+
encoding: { type: "string" },
|
|
574
|
+
file_type: { type: "string" },
|
|
575
|
+
headers: {
|
|
576
|
+
type: "array",
|
|
577
|
+
items: { type: "string" }
|
|
578
|
+
},
|
|
579
|
+
total_rows: { type: "number" },
|
|
580
|
+
total_cols: { type: "number" },
|
|
581
|
+
item_count: { type: "number" },
|
|
582
|
+
key_count: { type: "number" }
|
|
583
|
+
},
|
|
584
|
+
additionalProperties: false
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
] };
|
|
588
|
+
});
|
|
589
|
+
server.setRequestHandler(__modelcontextprotocol_sdk_types_js.CallToolRequestSchema, async (request) => {
|
|
590
|
+
const { name, arguments: args } = request.params;
|
|
591
|
+
try {
|
|
592
|
+
if (name === "read_document") {
|
|
593
|
+
const params = ReadDocumentSchema.parse(args);
|
|
594
|
+
const fileType = params.file_type || detectFileType(params.file_path);
|
|
595
|
+
if (!fileType) throw new Error(`Unsupported file type: ${params.file_path}`);
|
|
596
|
+
const config = getConfig();
|
|
597
|
+
const mode = params.mode || (config.rawFullRead ? "raw" : "paginated");
|
|
598
|
+
const page = mode === "paginated" ? params.page || 1 : void 0;
|
|
599
|
+
const pageSize = params.page_size || config.pageSize;
|
|
600
|
+
let scriptName;
|
|
601
|
+
let scriptArgs;
|
|
602
|
+
if (fileType === "excel") {
|
|
603
|
+
scriptName = "excel_handler.py";
|
|
604
|
+
scriptArgs = ["read", params.file_path];
|
|
605
|
+
if (params.sheet_name) scriptArgs.push(params.sheet_name);
|
|
606
|
+
if (page) {
|
|
607
|
+
scriptArgs.push(String(page));
|
|
608
|
+
scriptArgs.push(String(pageSize));
|
|
609
|
+
}
|
|
610
|
+
} else if (fileType === "word") {
|
|
611
|
+
scriptName = "word_handler.py";
|
|
612
|
+
scriptArgs = ["read", params.file_path];
|
|
613
|
+
if (page) {
|
|
614
|
+
scriptArgs.push(String(page));
|
|
615
|
+
scriptArgs.push(String(pageSize));
|
|
616
|
+
}
|
|
617
|
+
} else if (fileType === "pptx") {
|
|
618
|
+
scriptName = "pptx_handler.py";
|
|
619
|
+
scriptArgs = ["read", params.file_path];
|
|
620
|
+
if (page) {
|
|
621
|
+
scriptArgs.push(String(page));
|
|
622
|
+
scriptArgs.push(String(pageSize));
|
|
623
|
+
}
|
|
624
|
+
} else if (fileType === "pdf") {
|
|
625
|
+
scriptName = "pdf_handler.py";
|
|
626
|
+
scriptArgs = ["read", params.file_path];
|
|
627
|
+
if (page) {
|
|
628
|
+
scriptArgs.push(String(page));
|
|
629
|
+
scriptArgs.push(String(Math.min(pageSize, 10)));
|
|
630
|
+
}
|
|
631
|
+
} else {
|
|
632
|
+
scriptName = "text_handler.py";
|
|
633
|
+
scriptArgs = ["read", params.file_path];
|
|
634
|
+
if (page) {
|
|
635
|
+
scriptArgs.push(String(page));
|
|
636
|
+
scriptArgs.push(String(pageSize));
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
const result = await runPythonFile(scriptName, {
|
|
640
|
+
args: scriptArgs,
|
|
641
|
+
packages: getPackages(fileType),
|
|
642
|
+
filePaths: [params.file_path]
|
|
643
|
+
});
|
|
644
|
+
return {
|
|
645
|
+
content: [{
|
|
646
|
+
type: "text",
|
|
647
|
+
text: JSON.stringify(result, null, 2)
|
|
648
|
+
}],
|
|
649
|
+
structuredContent: result
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
if (name === "write_document") {
|
|
653
|
+
const params = WriteDocumentSchema.parse(args);
|
|
654
|
+
let scriptName;
|
|
655
|
+
let scriptArgs;
|
|
656
|
+
if (params.format === "excel") {
|
|
657
|
+
scriptName = "excel_handler.py";
|
|
658
|
+
scriptArgs = [
|
|
659
|
+
"write",
|
|
660
|
+
params.file_path,
|
|
661
|
+
JSON.stringify(params.data)
|
|
662
|
+
];
|
|
663
|
+
} else if (params.format === "word") {
|
|
664
|
+
scriptName = "word_handler.py";
|
|
665
|
+
const paragraphs = params.data.paragraphs || [];
|
|
666
|
+
const tables = params.data.tables || null;
|
|
667
|
+
scriptArgs = [
|
|
668
|
+
"write",
|
|
669
|
+
params.file_path,
|
|
670
|
+
JSON.stringify(paragraphs)
|
|
671
|
+
];
|
|
672
|
+
if (tables) scriptArgs.push(JSON.stringify(tables));
|
|
673
|
+
} else if (params.format === "pptx") {
|
|
674
|
+
scriptName = "pptx_handler.py";
|
|
675
|
+
const slides = params.data.slides || params.data || [];
|
|
676
|
+
scriptArgs = [
|
|
677
|
+
"write",
|
|
678
|
+
params.file_path,
|
|
679
|
+
JSON.stringify(slides)
|
|
680
|
+
];
|
|
681
|
+
} else if (params.format === "text") {
|
|
682
|
+
scriptName = "text_handler.py";
|
|
683
|
+
const content = typeof params.data === "string" ? params.data : JSON.stringify(params.data);
|
|
684
|
+
scriptArgs = [
|
|
685
|
+
"write",
|
|
686
|
+
params.file_path,
|
|
687
|
+
content
|
|
688
|
+
];
|
|
689
|
+
} else throw new Error(`Unsupported write format: ${params.format}`);
|
|
690
|
+
const result = await runPythonFile(scriptName, {
|
|
691
|
+
args: scriptArgs,
|
|
692
|
+
packages: getPackages(params.format),
|
|
693
|
+
filePaths: [params.file_path]
|
|
694
|
+
});
|
|
695
|
+
return {
|
|
696
|
+
content: [{
|
|
697
|
+
type: "text",
|
|
698
|
+
text: JSON.stringify(result, null, 2)
|
|
699
|
+
}],
|
|
700
|
+
structuredContent: result
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
if (name === "get_document_info") {
|
|
704
|
+
const params = GetDocumentInfoSchema.parse(args);
|
|
705
|
+
const fileType = params.file_type || detectFileType(params.file_path);
|
|
706
|
+
if (!fileType) throw new Error(`Unsupported file type: ${params.file_path}`);
|
|
707
|
+
let scriptName;
|
|
708
|
+
let scriptArgs = ["info", params.file_path];
|
|
709
|
+
if (fileType === "excel") scriptName = "excel_handler.py";
|
|
710
|
+
else if (fileType === "word") scriptName = "word_handler.py";
|
|
711
|
+
else if (fileType === "pptx") scriptName = "pptx_handler.py";
|
|
712
|
+
else if (fileType === "pdf") scriptName = "pdf_handler.py";
|
|
713
|
+
else scriptName = "text_handler.py";
|
|
714
|
+
const result = await runPythonFile(scriptName, {
|
|
715
|
+
args: scriptArgs,
|
|
716
|
+
packages: getPackages(fileType),
|
|
717
|
+
filePaths: [params.file_path]
|
|
718
|
+
});
|
|
719
|
+
return {
|
|
720
|
+
content: [{
|
|
721
|
+
type: "text",
|
|
722
|
+
text: JSON.stringify(result, null, 2)
|
|
723
|
+
}],
|
|
724
|
+
structuredContent: result
|
|
725
|
+
};
|
|
726
|
+
}
|
|
727
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
728
|
+
} catch (error) {
|
|
729
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
730
|
+
return {
|
|
731
|
+
content: [{
|
|
732
|
+
type: "text",
|
|
733
|
+
text: `Error: ${errorMessage}`
|
|
734
|
+
}],
|
|
735
|
+
isError: true
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
});
|
|
739
|
+
async function main() {
|
|
740
|
+
const transport = new __modelcontextprotocol_sdk_server_stdio_js.StdioServerTransport();
|
|
741
|
+
await server.connect(transport);
|
|
742
|
+
console.error("Docsmith MCP server running on stdio");
|
|
743
|
+
}
|
|
744
|
+
main().catch((error) => {
|
|
745
|
+
console.error("Server error:", error);
|
|
746
|
+
process.exit(1);
|
|
747
|
+
});
|
|
748
|
+
|
|
749
|
+
//#endregion
|
|
750
|
+
//# sourceMappingURL=index.cjs.map
|