npm - claude-code-hwp-mcp - Versions diffs - 0.5.0 → 0.5.2 - Mend

claude-code-hwp-mcp 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/tools/analysis-tools.js +3 -0
package/dist/tools/editing-tools.js +10 -0
package/package.json +1 -1
package/python/hwp_service.py +36 -1
package/python/ref_reader.py +167 -2

package/dist/tools/analysis-tools.js CHANGED Viewed

@@ -151,6 +151,9 @@ export function registerAnalysisTools(server, bridge, toolset = 'standard') {
             // HWPX → XML 직접 검색 시도. EBUSY 시 COM 폴백.
             if (bridge.getCurrentDocumentFormat() === 'HWPX') {
                 try {
+                    // COM 메모리 변경사항을 파일에 반영 (XML 엔진이 최신 내용을 읽도록)
+                    await bridge.ensureRunning();
+                    await bridge.send('save_document', {});
                     const doc = await readHwpxXml(filePath, 'Contents/section0.xml');
                     const result = searchTextInSection(doc, search);
                     const limited = max_results ? result.results.slice(0, max_results) : result.results.slice(0, 50);

package/dist/tools/editing-tools.js CHANGED Viewed

@@ -135,6 +135,9 @@ export function registerEditingTools(server, bridge, toolset = 'standard') {
             // HWPX → XML 직접 치환 시도 (COM 우회). EBUSY 시 COM 폴백.
             if (bridge.getCurrentDocumentFormat() === 'HWPX' && !use_regex) {
                 try {
+                    // COM 메모리 변경사항을 파일에 반영 (XML 엔진이 최신 내용을 읽도록)
+                    await bridge.ensureRunning();
+                    await bridge.send('save_document', {});
                     const doc = await readHwpxXml(filePath, 'Contents/section0.xml');
                     const count = replaceTextInSection(doc, find, replace);
                     await writeHwpxXml(filePath, filePath, 'Contents/section0.xml', doc);
@@ -183,6 +186,9 @@ export function registerEditingTools(server, bridge, toolset = 'standard') {
                 // HWPX → XML 직접 다건 치환 시도. EBUSY 시 COM 폴백.
                 if (bridge.getCurrentDocumentFormat() === 'HWPX' && !use_regex) {
                     try {
+                        // COM 메모리 변경사항을 파일에 반영
+                        await bridge.ensureRunning();
+                        await bridge.send('save_document', {});
                         const doc = await readHwpxXml(filePath, 'Contents/section0.xml');
                         const results = [];
                         let totalCount = 0;
@@ -235,6 +241,8 @@ export function registerEditingTools(server, bridge, toolset = 'standard') {
                 // HWPX → XML 직접 조작 시도. EBUSY 시 COM 폴백.
                 if (bridge.getCurrentDocumentFormat() === 'HWPX' && !color) {
                     try {
+                        await bridge.ensureRunning();
+                        await bridge.send('save_document', {});
                         const doc = await readHwpxXml(filePath, 'Contents/section0.xml');
                         const found = findAndAppendInSection(doc, find, append_text);
                         if (!found) {
@@ -418,6 +426,8 @@ export function registerEditingTools(server, bridge, toolset = 'standard') {
                 // HWPX → XML 직접 N번째 치환 시도. EBUSY 시 COM 폴백.
                 if (bridge.getCurrentDocumentFormat() === 'HWPX') {
                     try {
+                        await bridge.ensureRunning();
+                        await bridge.send('save_document', {});
                         const doc = await readHwpxXml(filePath, 'Contents/section0.xml');
                         const replaced = replaceTextNthInSection(doc, find, replace, nth);
                         if (replaced) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-code-hwp-mcp",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "description": "MCP server for HWP (한글) document automation via pyhwpx COM API. 94 tools for document editing, analysis, table formatting, and AI-powered filling.",
   "type": "module",
   "main": "dist/index.js",

package/python/hwp_service.py CHANGED Viewed

@@ -201,6 +201,12 @@ def dispatch(hwp, method, params):
         validate_params(params, ["path"], method)
         save_path = validate_file_path(params["path"], must_exist=False)
         fmt = params.get("format", "HWP").upper()  # pyhwpx는 대문자 포맷 필요 (HWP, HWPX, PDF 등)
+        # 내보내기 전 현재 문서 저장 (COM 메모리 → 파일 반영, 빈 PDF 방지)
+        if _current_doc_path and fmt in ("PDF", "DOCX", "HTML"):
+            try:
+                hwp.save()
+            except Exception:
+                pass
         hwp.save_as(save_path, fmt)
         # 파일 실제 생성 확인
         if not os.path.exists(save_path):
@@ -654,6 +660,21 @@ def dispatch(hwp, method, params):
         validate_params(params, ["path", "format"], method)
         save_path = validate_file_path(params["path"], must_exist=False)
         fmt = params["format"].upper()  # HWP, HWPX, PDF, HTML, TXT 등
+        # DOCX/HTML은 HWP COM에서 미지원 — 타임아웃 방지
+        if fmt in ("DOCX", "DOC"):
+            return {"status": "not_supported",
+                    "message": "DOCX 직접 내보내기는 한/글 COM에서 지원되지 않습니다. PDF로 내보내기를 권장합니다.",
+                    "alternative": "hwp_export_pdf"}
+        if fmt == "HTML":
+            return {"status": "not_supported",
+                    "message": "HTML 직접 내보내기는 한/글 COM에서 지원되지 않습니다. hwp_get_as_markdown으로 마크다운 변환 후 HTML로 변환하세요.",
+                    "alternative": "hwp_get_as_markdown"}
+        # PDF/내보내기 전 현재 문서 저장 (COM 메모리 → 파일 반영, 빈 PDF 방지)
+        if _current_doc_path:
+            try:
+                hwp.save()
+            except Exception:
+                pass
         result = hwp.save_as(save_path, fmt)
         # 파일 실제 생성 확인
         file_exists = os.path.exists(save_path)
@@ -665,6 +686,12 @@ def dispatch(hwp, method, params):
     if method == "verify_layout":
         # PDF로 내보내고 PNG 이미지로 변환 → Claude Code의 Read로 시각적 검증
         import tempfile
+        # 먼저 현재 문서 저장 (COM 메모리 → 파일 반영, 빈 PDF 방지)
+        if _current_doc_path:
+            try:
+                hwp.save()
+            except Exception:
+                pass
         tmp_pdf = os.path.join(tempfile.gettempdir(), "hwp_verify_layout.pdf")
         try:
             hwp.save_as(tmp_pdf, "PDF")
@@ -1659,11 +1686,19 @@ def main():
                 if hwp is None:
                     from pyhwpx import Hwp
                     hwp = Hwp()
-                    # 메시지박스(얼럿/다이얼로그) 자동 확인 — COM 무한 대기 방지
+                    # 모든 대화상자 자동 수락 — COM 무한 대기 방지
                     try:
                         hwp.XHwpMessageBoxMode = 1  # 0=표시, 1=자동OK
                     except Exception:
                         pass
+                    try:
+                        hwp.SetMessageBoxMode(0x10000)  # 모든 대화상자 자동 OK
+                    except Exception:
+                        pass
+                    try:
+                        hwp.RegisterModule('FilePathCheckDLL', 'FilePathCheckerModule')
+                    except Exception:
+                        pass
                 result = dispatch(hwp, method, params)
                 respond(req_id, True, result)

package/python/ref_reader.py CHANGED Viewed

@@ -1,9 +1,13 @@
 """참고자료 텍스트 추출기.
-지원: .txt, .csv, .xlsx, .json, .md
+지원: .txt, .csv, .xlsx, .json, .md, .pdf
+추가: .docx, .pptx, .doc, .ppt, .rtf 등 → PDF 변환 후 텍스트 추출
 HWP/HWPX는 hwp_analyzer.analyze_document 사용 (이 모듈에서는 다루지 않음)
 """
 import os
+import sys
 import json
+import subprocess
+import tempfile
 def read_reference(file_path, max_chars=30000):
@@ -22,8 +26,15 @@ def read_reference(file_path, max_chars=30000):
         return _read_excel(file_path, max_chars)
     elif ext == '.json':
         return _read_json(file_path, max_chars)
+    elif ext == '.pdf':
+        return _read_pdf(file_path, max_chars)
+    elif ext in ('.docx', '.doc', '.pptx', '.ppt', '.rtf', '.odt', '.odp'):
+        return _read_via_pdf_conversion(file_path, max_chars)
     else:
-        raise ValueError(f"지원하지 않는 파일 형식: {ext}. 지원: .txt, .md, .csv, .xlsx, .json")
+        raise ValueError(
+            f"지원하지 않는 파일 형식: {ext}. "
+            f"지원: .txt, .md, .csv, .xlsx, .json, .pdf, .docx, .pptx, .rtf"
+        )
 def _read_text(path, max_chars):
@@ -113,3 +124,157 @@ def _read_json(path, max_chars):
         "file_name": os.path.basename(path),
         "data": data,
     }
+def _read_pdf(path, max_chars):
+    """PDF에서 텍스트 추출 (PyMuPDF 사용)."""
+    try:
+        import fitz  # PyMuPDF
+    except ImportError:
+        raise ImportError("PyMuPDF가 필요합니다. pip install PyMuPDF")
+    doc = fitz.open(path)
+    pages = []
+    total_chars = 0
+    for i, page in enumerate(doc):
+        text = page.get_text("text")
+        total_chars += len(text)
+        pages.append({"page": i + 1, "text": text})
+        if total_chars > max_chars:
+            break
+    doc.close()
+    full_text = "\n\n".join(p["text"] for p in pages)
+    return {
+        "format": "pdf",
+        "file_name": os.path.basename(path),
+        "content": full_text[:max_chars],
+        "page_count": len(pages),
+        "char_count": len(full_text[:max_chars]),
+    }
+def _read_via_pdf_conversion(path, max_chars):
+    """DOCX/PPTX 등 비지원 확장자 → PDF 변환 후 텍스트 추출."""
+    ext = os.path.splitext(path)[1].lower()
+    # 1순위: LibreOffice CLI로 PDF 변환
+    pdf_path = _convert_to_pdf_libreoffice(path)
+    if pdf_path:
+        result = _read_pdf(pdf_path, max_chars)
+        result["original_format"] = ext.lstrip('.')
+        result["conversion_method"] = "libreoffice"
+        # 임시 PDF 삭제
+        try:
+            os.remove(pdf_path)
+        except Exception:
+            pass
+        return result
+    # 2순위: python-docx로 직접 텍스트 추출 (DOCX만)
+    if ext == '.docx':
+        result = _read_docx_direct(path, max_chars)
+        if result:
+            return result
+    # 3순위: python-pptx로 직접 텍스트 추출 (PPTX만)
+    if ext == '.pptx':
+        result = _read_pptx_direct(path, max_chars)
+        if result:
+            return result
+    raise ValueError(
+        f"{ext} 파일을 읽을 수 없습니다. "
+        f"LibreOffice를 설치하면 자동 변환됩니다: https://www.libreoffice.org/download/"
+    )
+def _convert_to_pdf_libreoffice(path):
+    """LibreOffice CLI로 PDF 변환. 성공 시 PDF 경로 반환, 실패 시 None."""
+    # LibreOffice 경로 탐색
+    soffice_paths = [
+        "soffice",  # PATH에 있으면
+        r"C:\Program Files\LibreOffice\program\soffice.exe",
+        r"C:\Program Files (x86)\LibreOffice\program\soffice.exe",
+    ]
+    soffice = None
+    for p in soffice_paths:
+        try:
+            subprocess.run([p, "--version"], capture_output=True, timeout=5)
+            soffice = p
+            break
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            continue
+    if not soffice:
+        print("[INFO] LibreOffice 미설치 — PDF 변환 불가, 대체 방법 시도", file=sys.stderr)
+        return None
+    try:
+        outdir = tempfile.gettempdir()
+        subprocess.run(
+            [soffice, "--headless", "--convert-to", "pdf", "--outdir", outdir, path],
+            capture_output=True, timeout=60
+        )
+        basename = os.path.splitext(os.path.basename(path))[0]
+        pdf_path = os.path.join(outdir, f"{basename}.pdf")
+        if os.path.exists(pdf_path):
+            return pdf_path
+    except Exception as e:
+        print(f"[WARN] LibreOffice 변환 실패: {e}", file=sys.stderr)
+    return None
+def _read_docx_direct(path, max_chars):
+    """python-docx로 DOCX 텍스트 직접 추출."""
+    try:
+        from docx import Document
+    except ImportError:
+        return None
+    doc = Document(path)
+    paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+    content = "\n".join(paragraphs)[:max_chars]
+    return {
+        "format": "docx",
+        "file_name": os.path.basename(path),
+        "content": content,
+        "paragraph_count": len(paragraphs),
+        "char_count": len(content),
+    }
+def _read_pptx_direct(path, max_chars):
+    """python-pptx로 PPTX 텍스트 직접 추출."""
+    try:
+        from pptx import Presentation
+    except ImportError:
+        return None
+    prs = Presentation(path)
+    slides = []
+    total_chars = 0
+    for i, slide in enumerate(prs.slides):
+        texts = []
+        for shape in slide.shapes:
+            if shape.has_text_frame:
+                for para in shape.text_frame.paragraphs:
+                    text = para.text.strip()
+                    if text:
+                        texts.append(text)
+        slide_text = "\n".join(texts)
+        total_chars += len(slide_text)
+        slides.append({"slide": i + 1, "text": slide_text})
+        if total_chars > max_chars:
+            break
+    full_text = "\n\n".join(s["text"] for s in slides)
+    return {
+        "format": "pptx",
+        "file_name": os.path.basename(path),
+        "content": full_text[:max_chars],
+        "slide_count": len(slides),
+        "char_count": len(full_text[:max_chars]),
+    }