docsmith-mcp 0.0.1-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/test.yml +35 -0
- package/LICENSE +21 -0
- package/README.md +109 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +679 -0
- package/dist/index.js.map +1 -0
- package/dist/python/excel_handler.py +97 -0
- package/dist/python/pdf_handler.py +81 -0
- package/dist/python/text_handler.py +331 -0
- package/dist/python/word_handler.py +98 -0
- package/examples/sample_data.csv +6 -0
- package/examples/sample_data.json +9 -0
- package/examples/sample_document.pdf +80 -0
- package/examples/sample_report.docx +0 -0
- package/examples/sample_sales_data.xlsx +0 -0
- package/examples/sample_text.txt +10 -0
- package/package.json +36 -0
- package/python/excel_handler.py +97 -0
- package/python/pdf_handler.py +81 -0
- package/python/text_handler.py +331 -0
- package/python/word_handler.py +98 -0
- package/scripts/preload-packages.mjs +64 -0
- package/src/code-runner.ts +136 -0
- package/src/index.ts +496 -0
- package/src/utils.ts +45 -0
- package/tests/document-processing.test.ts +230 -0
- package/tsconfig.json +20 -0
- package/tsdown.config.ts +21 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { runPythonFile } from "../src/code-runner.js";
|
|
3
|
+
import { fileURLToPath } from "url";
|
|
4
|
+
import { dirname, join } from "path";
|
|
5
|
+
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = dirname(__filename);
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Document processing tests using real files from examples/ directory
|
|
11
|
+
* These tests demonstrate reading, getting info, and processing actual documents
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
describe("Excel Document Processing", () => {
|
|
15
|
+
const excelFile = join(__dirname, "..", "examples", "sample_sales_data.xlsx");
|
|
16
|
+
|
|
17
|
+
it("should read Excel file content", async () => {
|
|
18
|
+
const result = await runPythonFile("excel_handler.py", {
|
|
19
|
+
args: ["read", excelFile],
|
|
20
|
+
packages: { openpyxl: "openpyxl" },
|
|
21
|
+
filePaths: [excelFile],
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
expect(result.sheet_name).toBe("Sales Report");
|
|
25
|
+
expect(result.sheets).toContain("Sales Report");
|
|
26
|
+
expect(result.total_rows).toBeGreaterThan(0);
|
|
27
|
+
expect(result.total_cols).toBeGreaterThan(0);
|
|
28
|
+
expect(result.data).toBeDefined();
|
|
29
|
+
expect(Array.isArray(result.data)).toBe(true);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("should read Excel file with pagination", async () => {
|
|
33
|
+
const result = await runPythonFile("excel_handler.py", {
|
|
34
|
+
args: ["read", excelFile, "Sales Report", "1", "3"],
|
|
35
|
+
packages: { openpyxl: "openpyxl" },
|
|
36
|
+
filePaths: [excelFile],
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
expect(result.current_page).toBe(1);
|
|
40
|
+
expect(result.page_size).toBe(3);
|
|
41
|
+
expect(result.data.length).toBeLessThanOrEqual(3);
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it("should get Excel file info", async () => {
|
|
45
|
+
const result = await runPythonFile("excel_handler.py", {
|
|
46
|
+
args: ["info", excelFile],
|
|
47
|
+
packages: { openpyxl: "openpyxl" },
|
|
48
|
+
filePaths: [excelFile],
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
expect(result.sheets).toBeDefined();
|
|
52
|
+
expect(Array.isArray(result.sheets)).toBe(true);
|
|
53
|
+
expect(result.file_size).toBeGreaterThan(0);
|
|
54
|
+
expect(result.sheets[0].name).toBe("Sales Report");
|
|
55
|
+
expect(result.sheets[0].rows).toBeGreaterThan(0);
|
|
56
|
+
expect(result.sheets[0].cols).toBeGreaterThan(0);
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
describe("Word Document Processing", () => {
|
|
61
|
+
const wordFile = join(__dirname, "..", "examples", "sample_report.docx");
|
|
62
|
+
|
|
63
|
+
it("should read Word document content", async () => {
|
|
64
|
+
const result = await runPythonFile("word_handler.py", {
|
|
65
|
+
args: ["read", wordFile],
|
|
66
|
+
packages: { docx: "python-docx" },
|
|
67
|
+
filePaths: [wordFile],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
expect(result.paragraphs).toBeDefined();
|
|
71
|
+
expect(Array.isArray(result.paragraphs)).toBe(true);
|
|
72
|
+
expect(result.paragraphs.length).toBeGreaterThan(0);
|
|
73
|
+
expect(result.total_paragraphs).toBeGreaterThan(0);
|
|
74
|
+
expect(result.total_tables).toBeGreaterThan(0);
|
|
75
|
+
expect(result.tables).toBeDefined();
|
|
76
|
+
expect(Array.isArray(result.tables)).toBe(true);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("should read Word document with pagination", async () => {
|
|
80
|
+
const result = await runPythonFile("word_handler.py", {
|
|
81
|
+
args: ["read", wordFile, "1", "5"],
|
|
82
|
+
packages: { docx: "python-docx" },
|
|
83
|
+
filePaths: [wordFile],
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
expect(result.current_page).toBe(1);
|
|
87
|
+
expect(result.page_size).toBe(5);
|
|
88
|
+
expect(result.paragraphs.length).toBeLessThanOrEqual(5);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("should get Word document info", async () => {
|
|
92
|
+
const result = await runPythonFile("word_handler.py", {
|
|
93
|
+
args: ["info", wordFile],
|
|
94
|
+
packages: { docx: "python-docx" },
|
|
95
|
+
filePaths: [wordFile],
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
expect(result.paragraphs).toBeGreaterThan(0);
|
|
99
|
+
expect(result.tables).toBeGreaterThan(0);
|
|
100
|
+
expect(result.file_size).toBeGreaterThan(0);
|
|
101
|
+
});
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
describe("PDF Document Processing", () => {
|
|
105
|
+
const pdfFile = join(__dirname, "..", "examples", "sample_document.pdf");
|
|
106
|
+
|
|
107
|
+
it("should read PDF content", async () => {
|
|
108
|
+
const result = await runPythonFile("pdf_handler.py", {
|
|
109
|
+
args: ["read", pdfFile],
|
|
110
|
+
packages: { PyPDF2: "PyPDF2" },
|
|
111
|
+
filePaths: [pdfFile],
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
expect(result.total_pages).toBeGreaterThan(0);
|
|
115
|
+
expect(result.content).toBeDefined();
|
|
116
|
+
expect(Array.isArray(result.content)).toBe(true);
|
|
117
|
+
expect(result.content.length).toBeGreaterThan(0);
|
|
118
|
+
expect(result.content[0].page_number).toBe(1);
|
|
119
|
+
expect(result.content[0].text).toBeDefined();
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("should read PDF with pagination", async () => {
|
|
123
|
+
const result = await runPythonFile("pdf_handler.py", {
|
|
124
|
+
args: ["read", pdfFile, "1", "1"],
|
|
125
|
+
packages: { PyPDF2: "PyPDF2" },
|
|
126
|
+
filePaths: [pdfFile],
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
expect(result.current_page_group).toBe(1);
|
|
130
|
+
expect(result.page_size).toBe(1);
|
|
131
|
+
expect(result.content.length).toBe(1);
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("should get PDF info", async () => {
|
|
135
|
+
const result = await runPythonFile("pdf_handler.py", {
|
|
136
|
+
args: ["info", pdfFile],
|
|
137
|
+
packages: { PyPDF2: "PyPDF2" },
|
|
138
|
+
filePaths: [pdfFile],
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
expect(result.pages).toBeGreaterThan(0);
|
|
142
|
+
expect(result.file_size).toBeGreaterThan(0);
|
|
143
|
+
expect(result.total_words).toBeGreaterThan(0);
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
describe("Text File Processing", () => {
|
|
148
|
+
const txtFile = join(__dirname, "..", "examples", "sample_text.txt");
|
|
149
|
+
const csvFile = join(__dirname, "..", "examples", "sample_data.csv");
|
|
150
|
+
|
|
151
|
+
it("should read text file content", async () => {
|
|
152
|
+
const result = await runPythonFile("text_handler.py", {
|
|
153
|
+
args: ["read", txtFile],
|
|
154
|
+
filePaths: [txtFile],
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
expect(result.success).toBe(true);
|
|
158
|
+
expect(result.content).toBeDefined();
|
|
159
|
+
expect(result.total_lines).toBeGreaterThan(0);
|
|
160
|
+
expect(result.encoding).toBe("utf-8");
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
it("should read text file with pagination", async () => {
|
|
164
|
+
const result = await runPythonFile("text_handler.py", {
|
|
165
|
+
args: ["read", txtFile, "1", "3"],
|
|
166
|
+
filePaths: [txtFile],
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
expect(result.success).toBe(true);
|
|
170
|
+
expect(result.page).toBe(1);
|
|
171
|
+
expect(result.page_size).toBe(3);
|
|
172
|
+
expect(result.has_more).toBe(true);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it("should read CSV as structured data", async () => {
|
|
176
|
+
const result = await runPythonFile("text_handler.py", {
|
|
177
|
+
args: ["read", csvFile],
|
|
178
|
+
filePaths: [csvFile],
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
expect(result.success).toBe(true);
|
|
182
|
+
expect(result.headers).toBeDefined();
|
|
183
|
+
expect(result.headers).toEqual(["Name", "Age", "City"]);
|
|
184
|
+
expect(result.data).toBeDefined();
|
|
185
|
+
expect(Array.isArray(result.data)).toBe(true);
|
|
186
|
+
expect(result.data.length).toBe(5);
|
|
187
|
+
expect(result.data[0]).toHaveProperty("Name");
|
|
188
|
+
expect(result.data[0]).toHaveProperty("Age");
|
|
189
|
+
expect(result.data[0]).toHaveProperty("City");
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it("should read CSV with pagination", async () => {
|
|
193
|
+
const result = await runPythonFile("text_handler.py", {
|
|
194
|
+
args: ["read", csvFile, "1", "2"],
|
|
195
|
+
filePaths: [csvFile],
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
expect(result.success).toBe(true);
|
|
199
|
+
expect(result.page).toBe(1);
|
|
200
|
+
expect(result.page_size).toBe(2);
|
|
201
|
+
expect(result.data.length).toBe(2);
|
|
202
|
+
expect(result.has_more).toBe(true);
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
it("should get text file info", async () => {
|
|
206
|
+
const result = await runPythonFile("text_handler.py", {
|
|
207
|
+
args: ["info", txtFile],
|
|
208
|
+
filePaths: [txtFile],
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
expect(result.success).toBe(true);
|
|
212
|
+
expect(result.file_size).toBeGreaterThan(0);
|
|
213
|
+
expect(result.line_count).toBeGreaterThan(0);
|
|
214
|
+
expect(result.file_type).toBe("txt");
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
it("should get CSV file info with headers", async () => {
|
|
218
|
+
const result = await runPythonFile("text_handler.py", {
|
|
219
|
+
args: ["info", csvFile],
|
|
220
|
+
filePaths: [csvFile],
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
expect(result.success).toBe(true);
|
|
224
|
+
expect(result.file_type).toBe("csv");
|
|
225
|
+
expect(result.headers).toBeDefined();
|
|
226
|
+
expect(result.headers).toEqual(["Name", "Age", "City"]);
|
|
227
|
+
expect(result.total_rows).toBe(5);
|
|
228
|
+
expect(result.total_cols).toBe(3);
|
|
229
|
+
});
|
|
230
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"lib": ["ES2022"],
|
|
7
|
+
"outDir": "./dist",
|
|
8
|
+
"rootDir": "./src",
|
|
9
|
+
"strict": true,
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"forceConsistentCasingInFileNames": true,
|
|
13
|
+
"declaration": true,
|
|
14
|
+
"declarationMap": true,
|
|
15
|
+
"sourceMap": true,
|
|
16
|
+
"resolveJsonModule": true
|
|
17
|
+
},
|
|
18
|
+
"include": ["src/**/*"],
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
20
|
+
}
|
package/tsdown.config.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { defineConfig } from "tsdown";
|
|
2
|
+
import { cp } from "fs/promises";
|
|
3
|
+
import { join } from "path";
|
|
4
|
+
|
|
5
|
+
export default defineConfig({
|
|
6
|
+
entry: ["./src/index.ts"],
|
|
7
|
+
outDir: "dist",
|
|
8
|
+
format: "esm",
|
|
9
|
+
clean: true,
|
|
10
|
+
dts: true,
|
|
11
|
+
sourcemap: true,
|
|
12
|
+
hooks: {
|
|
13
|
+
"build:done": async (ctx) => {
|
|
14
|
+
// Copy python directory to dist
|
|
15
|
+
const pythonSrc = join(process.cwd(), "python");
|
|
16
|
+
const pythonDest = join(ctx.options.outDir, "python");
|
|
17
|
+
await cp(pythonSrc, pythonDest, { recursive: true });
|
|
18
|
+
console.log("[tsdown] ✓ Copied python/ to dist/python/");
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
});
|
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { defineConfig } from "vitest/config";
|
|
2
|
+
|
|
3
|
+
export default defineConfig({
|
|
4
|
+
test: {
|
|
5
|
+
globals: true,
|
|
6
|
+
environment: "node",
|
|
7
|
+
include: ["tests/**/*.test.ts"],
|
|
8
|
+
testTimeout: 60000, // 60 seconds for Pyodide package installation
|
|
9
|
+
coverage: {
|
|
10
|
+
provider: "v8",
|
|
11
|
+
reporter: ["text", "json", "html"],
|
|
12
|
+
exclude: ["node_modules/", "dist/", "tests/"],
|
|
13
|
+
},
|
|
14
|
+
},
|
|
15
|
+
});
|