docsmith-mcp 0.0.1-beta.1 → 0.0.1-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,9 +1,31 @@
1
1
  {
2
2
  "name": "docsmith-mcp",
3
- "version": "0.0.1-beta.1",
3
+ "version": "0.0.1-beta.3",
4
4
  "description": "Python-powered document processing MCP for Excel, Word, PDF",
5
5
  "type": "module",
6
- "main": "dist/index.js",
6
+ "main": "./dist/index.js",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": {
12
+ "types": "./dist/index.d.ts",
13
+ "default": "./dist/index.js"
14
+ },
15
+ "require": {
16
+ "types": "./dist/index.d.cts",
17
+ "default": "./dist/index.cjs"
18
+ }
19
+ }
20
+ },
21
+ "bin": {
22
+ "docsmith-mcp": "./dist/index.js"
23
+ },
24
+ "files": [
25
+ "dist",
26
+ "python",
27
+ "scripts"
28
+ ],
7
29
  "scripts": {
8
30
  "build": "tsdown",
9
31
  "dev": "tsdown --watch",
@@ -0,0 +1,169 @@
1
+ """
2
+ PowerPoint document handler - read/write PPTX files
3
+ """
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ def read_pptx(file_path: str, page: int = None, page_size: int = 100):
10
+ """Read PowerPoint presentation with optional pagination by slides"""
11
+ from pptx import Presentation
12
+
13
+ prs = Presentation(file_path)
14
+
15
+ # Extract all slides content
16
+ slides = []
17
+ for i, slide in enumerate(prs.slides):
18
+ slide_data = {
19
+ "slide_number": i + 1,
20
+ "title": "",
21
+ "content": [],
22
+ "notes": ""
23
+ }
24
+
25
+ # Extract text from shapes
26
+ for shape in slide.shapes:
27
+ if hasattr(shape, "text") and shape.text.strip():
28
+ # Try to detect if it's a title
29
+ if hasattr(shape, "is_placeholder") and shape.is_placeholder:
30
+ placeholder = shape.placeholder_format
31
+ if placeholder.type == 1: # Title placeholder
32
+ slide_data["title"] = shape.text
33
+ continue
34
+
35
+ slide_data["content"].append(shape.text)
36
+
37
+ # Extract table data
38
+ if hasattr(shape, "table"):
39
+ table_data = []
40
+ for row in shape.table.rows:
41
+ row_data = [cell.text for cell in row.cells]
42
+ table_data.append(row_data)
43
+ slide_data["content"].append({"table": table_data})
44
+
45
+ # Extract notes
46
+ if slide.has_notes_slide:
47
+ notes_frame = slide.notes_slide.notes_text_frame
48
+ if notes_frame:
49
+ slide_data["notes"] = notes_frame.text
50
+
51
+ slides.append(slide_data)
52
+
53
+ total_slides = len(slides)
54
+
55
+ # Handle pagination
56
+ if page is not None:
57
+ start = (page - 1) * page_size
58
+ end = start + page_size
59
+ slides = slides[start:end]
60
+ total_pages = (total_slides + page_size - 1) // page_size if total_slides else 1
61
+ else:
62
+ total_pages = 1
63
+
64
+ return {
65
+ "total_slides": total_slides,
66
+ "slides": slides,
67
+ "current_page": page,
68
+ "page_size": page_size if page else None,
69
+ "total_pages": total_pages
70
+ }
71
+
72
+
73
+ def get_pptx_info(file_path: str):
74
+ """Get PowerPoint metadata"""
75
+ from pptx import Presentation
76
+
77
+ prs = Presentation(file_path)
78
+
79
+ info = {
80
+ "slides": len(prs.slides),
81
+ "file_size": Path(file_path).stat().st_size
82
+ }
83
+
84
+ # Try to get presentation properties
85
+ if prs.core_properties:
86
+ props = prs.core_properties
87
+ metadata = {}
88
+ if props.title:
89
+ metadata["title"] = props.title
90
+ if props.author:
91
+ metadata["author"] = props.author
92
+ if props.subject:
93
+ metadata["subject"] = props.subject
94
+ if props.created:
95
+ metadata["created"] = str(props.created)
96
+ if props.modified:
97
+ metadata["modified"] = str(props.modified)
98
+
99
+ if metadata:
100
+ info["metadata"] = metadata
101
+
102
+ return info
103
+
104
+
105
+ def write_pptx(file_path: str, slides_data: list):
106
+ """Write data to PowerPoint presentation"""
107
+ from pptx import Presentation
108
+ from pptx.util import Inches, Pt
109
+
110
+ prs = Presentation()
111
+ prs.slide_width = Inches(10)
112
+ prs.slide_height = Inches(7.5)
113
+
114
+ for slide_info in slides_data:
115
+ # Add blank slide
116
+ blank_layout = prs.slide_layouts[6] # Blank layout
117
+ slide = prs.slides.add_slide(blank_layout)
118
+
119
+ # Add title if provided
120
+ title = slide_info.get("title", "")
121
+ if title:
122
+ left = Inches(0.5)
123
+ top = Inches(0.5)
124
+ width = Inches(9)
125
+ height = Inches(1)
126
+ title_box = slide.shapes.add_textbox(left, top, width, height)
127
+ title_frame = title_box.text_frame
128
+ title_frame.text = title
129
+ title_frame.paragraphs[0].font.size = Pt(32)
130
+ title_frame.paragraphs[0].font.bold = True
131
+
132
+ # Add content
133
+ content = slide_info.get("content", [])
134
+ if content:
135
+ left = Inches(0.5)
136
+ top = Inches(2)
137
+ width = Inches(9)
138
+ height = Inches(5)
139
+ content_box = slide.shapes.add_textbox(left, top, width, height)
140
+ text_frame = content_box.text_frame
141
+
142
+ for item in content:
143
+ if isinstance(item, str):
144
+ p = text_frame.add_paragraph()
145
+ p.text = item
146
+ p.level = 0
147
+
148
+ prs.save(file_path)
149
+ return {"success": True, "file_path": file_path}
150
+
151
+
152
+ if __name__ == "__main__":
153
+ command = sys.argv[1]
154
+ file_path = sys.argv[2]
155
+
156
+ if command == "read":
157
+ page = int(sys.argv[3]) if len(sys.argv) > 3 else None
158
+ page_size = int(sys.argv[4]) if len(sys.argv) > 4 else 100
159
+ result = read_pptx(file_path, page, page_size)
160
+ elif command == "info":
161
+ result = get_pptx_info(file_path)
162
+ elif command == "write":
163
+ # Data passed as JSON string
164
+ slides_data = json.loads(sys.argv[3])
165
+ result = write_pptx(file_path, slides_data)
166
+ else:
167
+ result = {"error": f"Unknown command: {command}"}
168
+
169
+ print(json.dumps(result, default=str))
@@ -10,6 +10,7 @@ import { runPy } from "@mcpc-tech/code-runner-mcp";
10
10
  const PACKAGES = [
11
11
  "openpyxl",
12
12
  "python-docx",
13
+ "python-pptx",
13
14
  "PyPDF2",
14
15
  ];
15
16
 
@@ -39,6 +40,7 @@ asyncio.run(main())
39
40
  packages: {
40
41
  openpyxl: "openpyxl",
41
42
  "python-docx": "python-docx",
43
+ "python-pptx": "python-pptx",
42
44
  PyPDF2: "PyPDF2",
43
45
  },
44
46
  });
@@ -1,35 +0,0 @@
1
- name: Test
2
-
3
- on:
4
- push:
5
- branches: [main, master]
6
- pull_request:
7
- branches: [main, master]
8
-
9
- jobs:
10
- test:
11
- runs-on: ubuntu-latest
12
-
13
- steps:
14
- - name: Checkout code
15
- uses: actions/checkout@v4
16
-
17
- - name: Setup pnpm
18
- uses: pnpm/action-setup@v2
19
- with:
20
- version: 9
21
-
22
- - name: Setup Node.js
23
- uses: actions/setup-node@v4
24
- with:
25
- node-version: 24
26
- cache: "pnpm"
27
-
28
- - name: Install dependencies
29
- run: pnpm install --frozen-lockfile
30
-
31
- - name: Run tests
32
- run: pnpm test:run
33
-
34
- - name: Build
35
- run: pnpm build
@@ -1,97 +0,0 @@
1
- """
2
- Excel document handler - read/write Excel files
3
- """
4
- import json
5
- import sys
6
- from pathlib import Path
7
-
8
- def read_excel(file_path: str, sheet_name: str = None, page: int = None, page_size: int = 100):
9
- """Read Excel file with optional pagination"""
10
- import openpyxl
11
-
12
- wb = openpyxl.load_workbook(file_path, data_only=True)
13
-
14
- if sheet_name is None:
15
- sheet_name = wb.sheetnames[0]
16
-
17
- ws = wb[sheet_name]
18
-
19
- # Get all data
20
- data = []
21
- for row in ws.iter_rows(values_only=True):
22
- data.append(row)
23
-
24
- # Handle pagination
25
- if page is not None:
26
- start = (page - 1) * page_size
27
- end = start + page_size
28
- data = data[start:end]
29
- total_pages = (len(data) + page_size - 1) // page_size if data else 1
30
- else:
31
- total_pages = 1
32
-
33
- return {
34
- "sheet_name": sheet_name,
35
- "sheets": wb.sheetnames,
36
- "total_rows": ws.max_row,
37
- "total_cols": ws.max_column,
38
- "current_page": page,
39
- "page_size": page_size if page else None,
40
- "total_pages": total_pages,
41
- "data": data
42
- }
43
-
44
- def get_excel_info(file_path: str):
45
- """Get Excel file metadata"""
46
- import openpyxl
47
-
48
- wb = openpyxl.load_workbook(file_path, data_only=True)
49
- info = {
50
- "sheets": [],
51
- "file_size": Path(file_path).stat().st_size
52
- }
53
-
54
- for sheet_name in wb.sheetnames:
55
- ws = wb[sheet_name]
56
- info["sheets"].append({
57
- "name": sheet_name,
58
- "rows": ws.max_row,
59
- "cols": ws.max_column
60
- })
61
-
62
- return info
63
-
64
- def write_excel(file_path: str, data: list, sheet_name: str = "Sheet1"):
65
- """Write data to Excel file"""
66
- import openpyxl
67
-
68
- wb = openpyxl.Workbook()
69
- ws = wb.active
70
- ws.title = sheet_name
71
-
72
- for row in data:
73
- ws.append(row)
74
-
75
- wb.save(file_path)
76
- return {"success": True, "file_path": file_path}
77
-
78
- if __name__ == "__main__":
79
- command = sys.argv[1]
80
- file_path = sys.argv[2]
81
-
82
- if command == "read":
83
- sheet = sys.argv[3] if len(sys.argv) > 3 else None
84
- page = int(sys.argv[4]) if len(sys.argv) > 4 else None
85
- page_size = int(sys.argv[5]) if len(sys.argv) > 5 else 100
86
- result = read_excel(file_path, sheet, page, page_size)
87
- elif command == "info":
88
- result = get_excel_info(file_path)
89
- elif command == "write":
90
- # Data passed as JSON string
91
- data = json.loads(sys.argv[3])
92
- sheet = sys.argv[4] if len(sys.argv) > 4 else "Sheet1"
93
- result = write_excel(file_path, data, sheet)
94
- else:
95
- result = {"error": f"Unknown command: {command}"}
96
-
97
- print(json.dumps(result, default=str))
@@ -1,81 +0,0 @@
1
- """
2
- PDF document handler - read PDF files using PyPDF2
3
- """
4
- import json
5
- import sys
6
- from pathlib import Path
7
-
8
- def read_pdf(file_path: str, page: int = None, page_size: int = 100):
9
- """Read PDF with optional pagination by pages"""
10
- from PyPDF2 import PdfReader
11
-
12
- reader = PdfReader(file_path)
13
- total_pages = len(reader.pages)
14
-
15
- # Handle pagination
16
- if page is not None:
17
- start_page = (page - 1) * page_size
18
- end_page = min(start_page + page_size, total_pages)
19
- pages_to_read = range(start_page, end_page)
20
- current_page = page
21
- total_page_groups = (total_pages + page_size - 1) // page_size
22
- else:
23
- pages_to_read = range(total_pages)
24
- current_page = None
25
- total_page_groups = 1
26
-
27
- content = []
28
- for i in pages_to_read:
29
- page_obj = reader.pages[i]
30
- text = page_obj.extract_text()
31
- content.append({
32
- "page_number": i + 1,
33
- "text": text or "",
34
- "words": len(text.split()) if text else 0
35
- })
36
-
37
- return {
38
- "total_pages": total_pages,
39
- "current_page_group": current_page,
40
- "page_size": page_size if page else None,
41
- "total_page_groups": total_page_groups,
42
- "content": content
43
- }
44
-
45
- def get_pdf_info(file_path: str):
46
- """Get PDF metadata"""
47
- from PyPDF2 import PdfReader
48
-
49
- reader = PdfReader(file_path)
50
- info = {
51
- "pages": len(reader.pages),
52
- "file_size": Path(file_path).stat().st_size
53
- }
54
-
55
- # Try to get PDF metadata
56
- if reader.metadata:
57
- info["metadata"] = {k: str(v) for k, v in reader.metadata.items()}
58
-
59
- # Count total words
60
- total_words = 0
61
- for page in reader.pages:
62
- text = page.extract_text() or ""
63
- total_words += len(text.split())
64
- info["total_words"] = total_words
65
-
66
- return info
67
-
68
- if __name__ == "__main__":
69
- command = sys.argv[1]
70
- file_path = sys.argv[2]
71
-
72
- if command == "read":
73
- page = int(sys.argv[3]) if len(sys.argv) > 3 else None
74
- page_size = int(sys.argv[4]) if len(sys.argv) > 4 else 10
75
- result = read_pdf(file_path, page, page_size)
76
- elif command == "info":
77
- result = get_pdf_info(file_path)
78
- else:
79
- result = {"error": f"Unknown command: {command}"}
80
-
81
- print(json.dumps(result, default=str))