claude-code-hwp-mcp 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/python/hwp_service.py +9 -1
- package/python/ref_reader.py +167 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-hwp-mcp",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "MCP server for HWP (한글) document automation via pyhwpx COM API. 94 tools for document editing, analysis, table formatting, and AI-powered filling.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
package/python/hwp_service.py
CHANGED
|
@@ -1659,11 +1659,19 @@ def main():
|
|
|
1659
1659
|
if hwp is None:
|
|
1660
1660
|
from pyhwpx import Hwp
|
|
1661
1661
|
hwp = Hwp()
|
|
1662
|
-
#
|
|
1662
|
+
# 모든 대화상자 자동 수락 — COM 무한 대기 방지
|
|
1663
1663
|
try:
|
|
1664
1664
|
hwp.XHwpMessageBoxMode = 1 # 0=표시, 1=자동OK
|
|
1665
1665
|
except Exception:
|
|
1666
1666
|
pass
|
|
1667
|
+
try:
|
|
1668
|
+
hwp.SetMessageBoxMode(0x10000) # 모든 대화상자 자동 OK
|
|
1669
|
+
except Exception:
|
|
1670
|
+
pass
|
|
1671
|
+
try:
|
|
1672
|
+
hwp.RegisterModule('FilePathCheckDLL', 'FilePathCheckerModule')
|
|
1673
|
+
except Exception:
|
|
1674
|
+
pass
|
|
1667
1675
|
|
|
1668
1676
|
result = dispatch(hwp, method, params)
|
|
1669
1677
|
respond(req_id, True, result)
|
package/python/ref_reader.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
"""참고자료 텍스트 추출기.
|
|
2
|
-
지원: .txt, .csv, .xlsx, .json, .md
|
|
2
|
+
지원: .txt, .csv, .xlsx, .json, .md, .pdf
|
|
3
|
+
추가: .docx, .pptx, .doc, .ppt, .rtf 등 → PDF 변환 후 텍스트 추출
|
|
3
4
|
HWP/HWPX는 hwp_analyzer.analyze_document 사용 (이 모듈에서는 다루지 않음)
|
|
4
5
|
"""
|
|
5
6
|
import os
|
|
7
|
+
import sys
|
|
6
8
|
import json
|
|
9
|
+
import subprocess
|
|
10
|
+
import tempfile
|
|
7
11
|
|
|
8
12
|
|
|
9
13
|
def read_reference(file_path, max_chars=30000):
|
|
@@ -22,8 +26,15 @@ def read_reference(file_path, max_chars=30000):
|
|
|
22
26
|
return _read_excel(file_path, max_chars)
|
|
23
27
|
elif ext == '.json':
|
|
24
28
|
return _read_json(file_path, max_chars)
|
|
29
|
+
elif ext == '.pdf':
|
|
30
|
+
return _read_pdf(file_path, max_chars)
|
|
31
|
+
elif ext in ('.docx', '.doc', '.pptx', '.ppt', '.rtf', '.odt', '.odp'):
|
|
32
|
+
return _read_via_pdf_conversion(file_path, max_chars)
|
|
25
33
|
else:
|
|
26
|
-
raise ValueError(
|
|
34
|
+
raise ValueError(
|
|
35
|
+
f"지원하지 않는 파일 형식: {ext}. "
|
|
36
|
+
f"지원: .txt, .md, .csv, .xlsx, .json, .pdf, .docx, .pptx, .rtf"
|
|
37
|
+
)
|
|
27
38
|
|
|
28
39
|
|
|
29
40
|
def _read_text(path, max_chars):
|
|
@@ -113,3 +124,157 @@ def _read_json(path, max_chars):
|
|
|
113
124
|
"file_name": os.path.basename(path),
|
|
114
125
|
"data": data,
|
|
115
126
|
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _read_pdf(path, max_chars):
|
|
130
|
+
"""PDF에서 텍스트 추출 (PyMuPDF 사용)."""
|
|
131
|
+
try:
|
|
132
|
+
import fitz # PyMuPDF
|
|
133
|
+
except ImportError:
|
|
134
|
+
raise ImportError("PyMuPDF가 필요합니다. pip install PyMuPDF")
|
|
135
|
+
|
|
136
|
+
doc = fitz.open(path)
|
|
137
|
+
pages = []
|
|
138
|
+
total_chars = 0
|
|
139
|
+
for i, page in enumerate(doc):
|
|
140
|
+
text = page.get_text("text")
|
|
141
|
+
total_chars += len(text)
|
|
142
|
+
pages.append({"page": i + 1, "text": text})
|
|
143
|
+
if total_chars > max_chars:
|
|
144
|
+
break
|
|
145
|
+
doc.close()
|
|
146
|
+
|
|
147
|
+
full_text = "\n\n".join(p["text"] for p in pages)
|
|
148
|
+
return {
|
|
149
|
+
"format": "pdf",
|
|
150
|
+
"file_name": os.path.basename(path),
|
|
151
|
+
"content": full_text[:max_chars],
|
|
152
|
+
"page_count": len(pages),
|
|
153
|
+
"char_count": len(full_text[:max_chars]),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _read_via_pdf_conversion(path, max_chars):
|
|
158
|
+
"""DOCX/PPTX 등 비지원 확장자 → PDF 변환 후 텍스트 추출."""
|
|
159
|
+
ext = os.path.splitext(path)[1].lower()
|
|
160
|
+
|
|
161
|
+
# 1순위: LibreOffice CLI로 PDF 변환
|
|
162
|
+
pdf_path = _convert_to_pdf_libreoffice(path)
|
|
163
|
+
if pdf_path:
|
|
164
|
+
result = _read_pdf(pdf_path, max_chars)
|
|
165
|
+
result["original_format"] = ext.lstrip('.')
|
|
166
|
+
result["conversion_method"] = "libreoffice"
|
|
167
|
+
# 임시 PDF 삭제
|
|
168
|
+
try:
|
|
169
|
+
os.remove(pdf_path)
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
172
|
+
return result
|
|
173
|
+
|
|
174
|
+
# 2순위: python-docx로 직접 텍스트 추출 (DOCX만)
|
|
175
|
+
if ext == '.docx':
|
|
176
|
+
result = _read_docx_direct(path, max_chars)
|
|
177
|
+
if result:
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
# 3순위: python-pptx로 직접 텍스트 추출 (PPTX만)
|
|
181
|
+
if ext == '.pptx':
|
|
182
|
+
result = _read_pptx_direct(path, max_chars)
|
|
183
|
+
if result:
|
|
184
|
+
return result
|
|
185
|
+
|
|
186
|
+
raise ValueError(
|
|
187
|
+
f"{ext} 파일을 읽을 수 없습니다. "
|
|
188
|
+
f"LibreOffice를 설치하면 자동 변환됩니다: https://www.libreoffice.org/download/"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _convert_to_pdf_libreoffice(path):
|
|
193
|
+
"""LibreOffice CLI로 PDF 변환. 성공 시 PDF 경로 반환, 실패 시 None."""
|
|
194
|
+
# LibreOffice 경로 탐색
|
|
195
|
+
soffice_paths = [
|
|
196
|
+
"soffice", # PATH에 있으면
|
|
197
|
+
r"C:\Program Files\LibreOffice\program\soffice.exe",
|
|
198
|
+
r"C:\Program Files (x86)\LibreOffice\program\soffice.exe",
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
soffice = None
|
|
202
|
+
for p in soffice_paths:
|
|
203
|
+
try:
|
|
204
|
+
subprocess.run([p, "--version"], capture_output=True, timeout=5)
|
|
205
|
+
soffice = p
|
|
206
|
+
break
|
|
207
|
+
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if not soffice:
|
|
211
|
+
print("[INFO] LibreOffice 미설치 — PDF 변환 불가, 대체 방법 시도", file=sys.stderr)
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
outdir = tempfile.gettempdir()
|
|
216
|
+
subprocess.run(
|
|
217
|
+
[soffice, "--headless", "--convert-to", "pdf", "--outdir", outdir, path],
|
|
218
|
+
capture_output=True, timeout=60
|
|
219
|
+
)
|
|
220
|
+
basename = os.path.splitext(os.path.basename(path))[0]
|
|
221
|
+
pdf_path = os.path.join(outdir, f"{basename}.pdf")
|
|
222
|
+
if os.path.exists(pdf_path):
|
|
223
|
+
return pdf_path
|
|
224
|
+
except Exception as e:
|
|
225
|
+
print(f"[WARN] LibreOffice 변환 실패: {e}", file=sys.stderr)
|
|
226
|
+
|
|
227
|
+
return None
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _read_docx_direct(path, max_chars):
|
|
231
|
+
"""python-docx로 DOCX 텍스트 직접 추출."""
|
|
232
|
+
try:
|
|
233
|
+
from docx import Document
|
|
234
|
+
except ImportError:
|
|
235
|
+
return None
|
|
236
|
+
|
|
237
|
+
doc = Document(path)
|
|
238
|
+
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
|
239
|
+
content = "\n".join(paragraphs)[:max_chars]
|
|
240
|
+
return {
|
|
241
|
+
"format": "docx",
|
|
242
|
+
"file_name": os.path.basename(path),
|
|
243
|
+
"content": content,
|
|
244
|
+
"paragraph_count": len(paragraphs),
|
|
245
|
+
"char_count": len(content),
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _read_pptx_direct(path, max_chars):
|
|
250
|
+
"""python-pptx로 PPTX 텍스트 직접 추출."""
|
|
251
|
+
try:
|
|
252
|
+
from pptx import Presentation
|
|
253
|
+
except ImportError:
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
prs = Presentation(path)
|
|
257
|
+
slides = []
|
|
258
|
+
total_chars = 0
|
|
259
|
+
for i, slide in enumerate(prs.slides):
|
|
260
|
+
texts = []
|
|
261
|
+
for shape in slide.shapes:
|
|
262
|
+
if shape.has_text_frame:
|
|
263
|
+
for para in shape.text_frame.paragraphs:
|
|
264
|
+
text = para.text.strip()
|
|
265
|
+
if text:
|
|
266
|
+
texts.append(text)
|
|
267
|
+
slide_text = "\n".join(texts)
|
|
268
|
+
total_chars += len(slide_text)
|
|
269
|
+
slides.append({"slide": i + 1, "text": slide_text})
|
|
270
|
+
if total_chars > max_chars:
|
|
271
|
+
break
|
|
272
|
+
|
|
273
|
+
full_text = "\n\n".join(s["text"] for s in slides)
|
|
274
|
+
return {
|
|
275
|
+
"format": "pptx",
|
|
276
|
+
"file_name": os.path.basename(path),
|
|
277
|
+
"content": full_text[:max_chars],
|
|
278
|
+
"slide_count": len(slides),
|
|
279
|
+
"char_count": len(full_text[:max_chars]),
|
|
280
|
+
}
|