claude-code-hwp-mcp 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-hwp-mcp",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "MCP server for HWP (한글) document automation via pyhwpx COM API. 94 tools for document editing, analysis, table formatting, and AI-powered filling.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -1659,11 +1659,19 @@ def main():
1659
1659
  if hwp is None:
1660
1660
  from pyhwpx import Hwp
1661
1661
  hwp = Hwp()
1662
- # 메시지박스(얼럿/다이얼로그) 자동 확인 — COM 무한 대기 방지
1662
+ # 모든 대화상자 자동 수락 — COM 무한 대기 방지
1663
1663
  try:
1664
1664
  hwp.XHwpMessageBoxMode = 1 # 0=표시, 1=자동OK
1665
1665
  except Exception:
1666
1666
  pass
1667
+ try:
1668
+ hwp.SetMessageBoxMode(0x10000) # 모든 대화상자 자동 OK
1669
+ except Exception:
1670
+ pass
1671
+ try:
1672
+ hwp.RegisterModule('FilePathCheckDLL', 'FilePathCheckerModule')
1673
+ except Exception:
1674
+ pass
1667
1675
 
1668
1676
  result = dispatch(hwp, method, params)
1669
1677
  respond(req_id, True, result)
@@ -1,9 +1,13 @@
1
1
  """참고자료 텍스트 추출기.
2
- 지원: .txt, .csv, .xlsx, .json, .md
2
+ 지원: .txt, .csv, .xlsx, .json, .md, .pdf
3
+ 추가: .docx, .pptx, .doc, .ppt, .rtf 등 → PDF 변환 후 텍스트 추출
3
4
  HWP/HWPX는 hwp_analyzer.analyze_document 사용 (이 모듈에서는 다루지 않음)
4
5
  """
5
6
  import os
7
+ import sys
6
8
  import json
9
+ import subprocess
10
+ import tempfile
7
11
 
8
12
 
9
13
  def read_reference(file_path, max_chars=30000):
@@ -22,8 +26,15 @@ def read_reference(file_path, max_chars=30000):
22
26
  return _read_excel(file_path, max_chars)
23
27
  elif ext == '.json':
24
28
  return _read_json(file_path, max_chars)
29
+ elif ext == '.pdf':
30
+ return _read_pdf(file_path, max_chars)
31
+ elif ext in ('.docx', '.doc', '.pptx', '.ppt', '.rtf', '.odt', '.odp'):
32
+ return _read_via_pdf_conversion(file_path, max_chars)
25
33
  else:
26
- raise ValueError(f"지원하지 않는 파일 형식: {ext}. 지원: .txt, .md, .csv, .xlsx, .json")
34
+ raise ValueError(
35
+ f"지원하지 않는 파일 형식: {ext}. "
36
+ f"지원: .txt, .md, .csv, .xlsx, .json, .pdf, .docx, .pptx, .rtf"
37
+ )
27
38
 
28
39
 
29
40
  def _read_text(path, max_chars):
@@ -113,3 +124,157 @@ def _read_json(path, max_chars):
113
124
  "file_name": os.path.basename(path),
114
125
  "data": data,
115
126
  }
127
+
128
+
129
+ def _read_pdf(path, max_chars):
130
+ """PDF에서 텍스트 추출 (PyMuPDF 사용)."""
131
+ try:
132
+ import fitz # PyMuPDF
133
+ except ImportError:
134
+ raise ImportError("PyMuPDF가 필요합니다. pip install PyMuPDF")
135
+
136
+ doc = fitz.open(path)
137
+ pages = []
138
+ total_chars = 0
139
+ for i, page in enumerate(doc):
140
+ text = page.get_text("text")
141
+ total_chars += len(text)
142
+ pages.append({"page": i + 1, "text": text})
143
+ if total_chars > max_chars:
144
+ break
145
+ doc.close()
146
+
147
+ full_text = "\n\n".join(p["text"] for p in pages)
148
+ return {
149
+ "format": "pdf",
150
+ "file_name": os.path.basename(path),
151
+ "content": full_text[:max_chars],
152
+ "page_count": len(pages),
153
+ "char_count": len(full_text[:max_chars]),
154
+ }
155
+
156
+
157
+ def _read_via_pdf_conversion(path, max_chars):
158
+ """DOCX/PPTX 등 비지원 확장자 → PDF 변환 후 텍스트 추출."""
159
+ ext = os.path.splitext(path)[1].lower()
160
+
161
+ # 1순위: LibreOffice CLI로 PDF 변환
162
+ pdf_path = _convert_to_pdf_libreoffice(path)
163
+ if pdf_path:
164
+ result = _read_pdf(pdf_path, max_chars)
165
+ result["original_format"] = ext.lstrip('.')
166
+ result["conversion_method"] = "libreoffice"
167
+ # 임시 PDF 삭제
168
+ try:
169
+ os.remove(pdf_path)
170
+ except Exception:
171
+ pass
172
+ return result
173
+
174
+ # 2순위: python-docx로 직접 텍스트 추출 (DOCX만)
175
+ if ext == '.docx':
176
+ result = _read_docx_direct(path, max_chars)
177
+ if result:
178
+ return result
179
+
180
+ # 3순위: python-pptx로 직접 텍스트 추출 (PPTX만)
181
+ if ext == '.pptx':
182
+ result = _read_pptx_direct(path, max_chars)
183
+ if result:
184
+ return result
185
+
186
+ raise ValueError(
187
+ f"{ext} 파일을 읽을 수 없습니다. "
188
+ f"LibreOffice를 설치하면 자동 변환됩니다: https://www.libreoffice.org/download/"
189
+ )
190
+
191
+
192
+ def _convert_to_pdf_libreoffice(path):
193
+ """LibreOffice CLI로 PDF 변환. 성공 시 PDF 경로 반환, 실패 시 None."""
194
+ # LibreOffice 경로 탐색
195
+ soffice_paths = [
196
+ "soffice", # PATH에 있으면
197
+ r"C:\Program Files\LibreOffice\program\soffice.exe",
198
+ r"C:\Program Files (x86)\LibreOffice\program\soffice.exe",
199
+ ]
200
+
201
+ soffice = None
202
+ for p in soffice_paths:
203
+ try:
204
+ subprocess.run([p, "--version"], capture_output=True, timeout=5)
205
+ soffice = p
206
+ break
207
+ except (FileNotFoundError, subprocess.TimeoutExpired):
208
+ continue
209
+
210
+ if not soffice:
211
+ print("[INFO] LibreOffice 미설치 — PDF 변환 불가, 대체 방법 시도", file=sys.stderr)
212
+ return None
213
+
214
+ try:
215
+ outdir = tempfile.gettempdir()
216
+ subprocess.run(
217
+ [soffice, "--headless", "--convert-to", "pdf", "--outdir", outdir, path],
218
+ capture_output=True, timeout=60
219
+ )
220
+ basename = os.path.splitext(os.path.basename(path))[0]
221
+ pdf_path = os.path.join(outdir, f"{basename}.pdf")
222
+ if os.path.exists(pdf_path):
223
+ return pdf_path
224
+ except Exception as e:
225
+ print(f"[WARN] LibreOffice 변환 실패: {e}", file=sys.stderr)
226
+
227
+ return None
228
+
229
+
230
+ def _read_docx_direct(path, max_chars):
231
+ """python-docx로 DOCX 텍스트 직접 추출."""
232
+ try:
233
+ from docx import Document
234
+ except ImportError:
235
+ return None
236
+
237
+ doc = Document(path)
238
+ paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
239
+ content = "\n".join(paragraphs)[:max_chars]
240
+ return {
241
+ "format": "docx",
242
+ "file_name": os.path.basename(path),
243
+ "content": content,
244
+ "paragraph_count": len(paragraphs),
245
+ "char_count": len(content),
246
+ }
247
+
248
+
249
+ def _read_pptx_direct(path, max_chars):
250
+ """python-pptx로 PPTX 텍스트 직접 추출."""
251
+ try:
252
+ from pptx import Presentation
253
+ except ImportError:
254
+ return None
255
+
256
+ prs = Presentation(path)
257
+ slides = []
258
+ total_chars = 0
259
+ for i, slide in enumerate(prs.slides):
260
+ texts = []
261
+ for shape in slide.shapes:
262
+ if shape.has_text_frame:
263
+ for para in shape.text_frame.paragraphs:
264
+ text = para.text.strip()
265
+ if text:
266
+ texts.append(text)
267
+ slide_text = "\n".join(texts)
268
+ total_chars += len(slide_text)
269
+ slides.append({"slide": i + 1, "text": slide_text})
270
+ if total_chars > max_chars:
271
+ break
272
+
273
+ full_text = "\n\n".join(s["text"] for s in slides)
274
+ return {
275
+ "format": "pptx",
276
+ "file_name": os.path.basename(path),
277
+ "content": full_text[:max_chars],
278
+ "slide_count": len(slides),
279
+ "char_count": len(full_text[:max_chars]),
280
+ }