claude-code-hwp-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +409 -0
  2. package/dist/hwp-bridge.d.ts +67 -0
  3. package/dist/hwp-bridge.js +320 -0
  4. package/dist/hwpx-engine.d.ts +39 -0
  5. package/dist/hwpx-engine.js +187 -0
  6. package/dist/index.d.ts +2 -0
  7. package/dist/index.js +54 -0
  8. package/dist/prompts/hwp-prompts.d.ts +2 -0
  9. package/dist/prompts/hwp-prompts.js +368 -0
  10. package/dist/resources/document-resources.d.ts +3 -0
  11. package/dist/resources/document-resources.js +109 -0
  12. package/dist/server.d.ts +12 -0
  13. package/dist/server.js +29 -0
  14. package/dist/tools/analysis-tools.d.ts +4 -0
  15. package/dist/tools/analysis-tools.js +414 -0
  16. package/dist/tools/composite-tools.d.ts +3 -0
  17. package/dist/tools/composite-tools.js +664 -0
  18. package/dist/tools/document-tools.d.ts +3 -0
  19. package/dist/tools/document-tools.js +264 -0
  20. package/dist/tools/editing-tools.d.ts +4 -0
  21. package/dist/tools/editing-tools.js +916 -0
  22. package/package.json +31 -0
  23. package/python/__pycache__/hwp_analyzer.cpython-313.pyc +0 -0
  24. package/python/__pycache__/hwp_editor.cpython-313.pyc +0 -0
  25. package/python/__pycache__/hwp_service.cpython-313.pyc +0 -0
  26. package/python/__pycache__/privacy_scanner.cpython-313.pyc +0 -0
  27. package/python/__pycache__/ref_reader.cpython-313.pyc +0 -0
  28. package/python/__pycache__/test_integration.cpython-313.pyc +0 -0
  29. package/python/hwp_analyzer.py +544 -0
  30. package/python/hwp_editor.py +933 -0
  31. package/python/hwp_service.py +1291 -0
  32. package/python/privacy_scanner.py +115 -0
  33. package/python/ref_reader.py +115 -0
  34. package/python/requirements.txt +2 -0
@@ -0,0 +1,115 @@
1
+ """개인정보 스캔 — 문서 텍스트에서 민감 정보를 정규식으로 감지.
2
+
3
+ 지원 감지 항목:
4
+ - 주민등록번호 (risk: critical)
5
+ - 전화번호 (risk: high)
6
+ - 이메일 (risk: medium)
7
+ - 계좌번호 (risk: high)
8
+ - 여권번호 (risk: high)
9
+
10
+ 패턴 매칭 순서가 중요: 주민번호 → 전화번호 → 이메일 → 계좌번호 → 여권번호
11
+ 이미 매칭된 위치는 후속 패턴에서 제외하여 오탐 방지.
12
+ """
13
+ import re
14
+
15
+
16
+ # 매칭 순서가 우선순위: 먼저 매칭된 것이 확정, 겹치는 위치는 스킵
17
+ _PATTERNS = [
18
+ {
19
+ "type": "주민등록번호",
20
+ "pattern": r"\b(\d{6})\s*[-–]\s*([1-4]\d{6})\b",
21
+ "risk": "critical",
22
+ "mask": lambda m: m.group(1) + "-" + m.group(2)[0] + "******",
23
+ },
24
+ {
25
+ "type": "전화번호",
26
+ "pattern": r"\b(0\d{1,2})[-.\s]?(\d{3,4})[-.\s]?(\d{4})\b",
27
+ "risk": "high",
28
+ "mask": lambda m: m.group(1) + "-" + "****" + "-" + m.group(3),
29
+ },
30
+ {
31
+ "type": "이메일",
32
+ "pattern": r"\b([a-zA-Z0-9._%+-]+)@([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})\b",
33
+ "risk": "medium",
34
+ "mask": lambda m: m.group(1)[:2] + "***@" + m.group(2),
35
+ },
36
+ {
37
+ "type": "계좌번호",
38
+ "pattern": r"\b(\d{3,6})[-](\d{2,6})[-](\d{4,8})\b",
39
+ "risk": "high",
40
+ "mask": lambda m: m.group(1) + "-****-" + m.group(3)[-2:],
41
+ },
42
+ {
43
+ "type": "여권번호",
44
+ "pattern": r"\b([A-Z]{1,2})(\d{7,8})\b",
45
+ "risk": "high",
46
+ "mask": lambda m: m.group(1) + "*" * len(m.group(2)),
47
+ },
48
+ ]
49
+
50
+
51
+ def _ranges_overlap(start1, end1, start2, end2):
52
+ """두 범위가 겹치는지 확인."""
53
+ return start1 < end2 and start2 < end1
54
+
55
+
56
+ def scan_privacy(text):
57
+ """텍스트에서 개인정보를 스캔하여 결과 반환.
58
+
59
+ 패턴 우선순위 순서로 매칭하며, 이미 매칭된 위치와 겹치는 후속 매칭은 제외.
60
+
61
+ Returns: {
62
+ "found": bool,
63
+ "total_findings": int,
64
+ "findings": [{type, value, masked_value, risk, position}, ...],
65
+ "risk_summary": {critical: N, high: N, medium: N, low: N},
66
+ }
67
+ """
68
+ if not isinstance(text, str) or not text:
69
+ return {"found": False, "total_findings": 0, "findings": [],
70
+ "risk_summary": {"critical": 0, "high": 0, "medium": 0, "low": 0},
71
+ "recommendation": "검사할 텍스트가 없습니다."}
72
+
73
+ findings = []
74
+ risk_summary = {"critical": 0, "high": 0, "medium": 0, "low": 0}
75
+ matched_ranges = [] # (start, end) 튜플 목록
76
+
77
+ for pat_info in _PATTERNS:
78
+ for m in re.finditer(pat_info["pattern"], text):
79
+ start, end = m.start(), m.end()
80
+
81
+ # 이미 매칭된 범위와 겹치면 스킵
82
+ if any(_ranges_overlap(start, end, s, e) for s, e in matched_ranges):
83
+ continue
84
+
85
+ matched_ranges.append((start, end))
86
+ masked = pat_info["mask"](m)
87
+ findings.append({
88
+ "type": pat_info["type"],
89
+ "value": masked, # 원본 대신 마스킹된 값만 노출
90
+ "masked_value": masked,
91
+ "risk": pat_info["risk"],
92
+ "position": start,
93
+ })
94
+ risk_summary[pat_info["risk"]] = risk_summary.get(pat_info["risk"], 0) + 1
95
+
96
+ # 위치 순 정렬
97
+ findings.sort(key=lambda f: f["position"])
98
+
99
+ recommendation = ""
100
+ if risk_summary["critical"] > 0:
101
+ recommendation = "주민등록번호가 포함되어 있습니다. 즉시 마스킹 처리가 필요합니다."
102
+ elif risk_summary["high"] > 0:
103
+ recommendation = "민감 개인정보가 포함되어 있습니다. 마스킹을 권장합니다."
104
+ elif findings:
105
+ recommendation = "개인정보가 일부 포함되어 있습니다. 확인이 필요합니다."
106
+ else:
107
+ recommendation = "개인정보가 감지되지 않았습니다."
108
+
109
+ return {
110
+ "found": len(findings) > 0,
111
+ "total_findings": len(findings),
112
+ "findings": findings,
113
+ "risk_summary": risk_summary,
114
+ "recommendation": recommendation,
115
+ }
@@ -0,0 +1,115 @@
1
+ """참고자료 텍스트 추출기.
2
+ 지원: .txt, .csv, .xlsx, .json, .md
3
+ HWP/HWPX는 hwp_analyzer.analyze_document 사용 (이 모듈에서는 다루지 않음)
4
+ """
5
+ import os
6
+ import json
7
+
8
+
9
+ def read_reference(file_path, max_chars=30000):
10
+ """참고자료 파일에서 텍스트 추출."""
11
+ file_path = os.path.abspath(file_path)
12
+ if not os.path.exists(file_path):
13
+ raise FileNotFoundError(f"파일을 찾을 수 없습니다: {file_path}")
14
+
15
+ ext = os.path.splitext(file_path)[1].lower()
16
+
17
+ if ext in ('.txt', '.md', '.log'):
18
+ return _read_text(file_path, max_chars)
19
+ elif ext == '.csv':
20
+ return _read_csv(file_path, max_chars)
21
+ elif ext in ('.xlsx', '.xls'):
22
+ return _read_excel(file_path, max_chars)
23
+ elif ext == '.json':
24
+ return _read_json(file_path, max_chars)
25
+ else:
26
+ raise ValueError(f"지원하지 않는 파일 형식: {ext}. 지원: .txt, .md, .csv, .xlsx, .json")
27
+
28
+
29
+ def _read_text(path, max_chars):
30
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
31
+ content = f.read(max_chars)
32
+ return {
33
+ "format": "text",
34
+ "file_name": os.path.basename(path),
35
+ "content": content,
36
+ "char_count": len(content),
37
+ }
38
+
39
+
40
+ def _read_csv(path, max_chars):
41
+ import csv
42
+ rows = []
43
+ total_chars = 0
44
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
45
+ reader = csv.reader(f)
46
+ for row in reader:
47
+ row_text = ','.join(row)
48
+ total_chars += len(row_text)
49
+ if total_chars > max_chars:
50
+ break
51
+ rows.append(row)
52
+
53
+ headers = rows[0] if rows else []
54
+ data = rows[1:] if len(rows) > 1 else []
55
+ return {
56
+ "format": "csv",
57
+ "file_name": os.path.basename(path),
58
+ "headers": headers,
59
+ "data": data,
60
+ "row_count": len(data),
61
+ }
62
+
63
+
64
+ def _read_excel(path, max_chars):
65
+ try:
66
+ import openpyxl
67
+ except ImportError:
68
+ raise ImportError("openpyxl이 필요합니다. pip install openpyxl")
69
+
70
+ wb = None
71
+ try:
72
+ wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
73
+ sheets = []
74
+
75
+ for sheet_name in wb.sheetnames:
76
+ ws = wb[sheet_name]
77
+ rows = []
78
+ total_chars = 0
79
+ for row in ws.iter_rows(values_only=True):
80
+ row_data = [str(cell) if cell is not None else "" for cell in row]
81
+ total_chars += sum(len(c) for c in row_data)
82
+ if total_chars > max_chars:
83
+ break
84
+ rows.append(row_data)
85
+
86
+ headers = rows[0] if rows else []
87
+ data = rows[1:] if len(rows) > 1 else []
88
+ sheets.append({
89
+ "sheet_name": sheet_name,
90
+ "headers": headers,
91
+ "data": data,
92
+ "row_count": len(data),
93
+ })
94
+
95
+ return {
96
+ "format": "excel",
97
+ "file_name": os.path.basename(path),
98
+ "sheets": sheets,
99
+ "sheet_count": len(sheets),
100
+ }
101
+ finally:
102
+ if wb:
103
+ wb.close()
104
+
105
+
106
+ def _read_json(path, max_chars):
107
+ with open(path, 'r', encoding='utf-8', errors='replace') as f:
108
+ content = f.read(max_chars)
109
+
110
+ data = json.loads(content)
111
+ return {
112
+ "format": "json",
113
+ "file_name": os.path.basename(path),
114
+ "data": data,
115
+ }
@@ -0,0 +1,2 @@
1
+ pyhwpx>=0.8.0
2
+ pywin32>=306