claude-code-hwp-mcp 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +409 -0
  2. package/dist/hwp-bridge.d.ts +67 -0
  3. package/dist/hwp-bridge.js +320 -0
  4. package/dist/hwpx-engine.d.ts +39 -0
  5. package/dist/hwpx-engine.js +187 -0
  6. package/dist/index.d.ts +2 -0
  7. package/dist/index.js +54 -0
  8. package/dist/prompts/hwp-prompts.d.ts +2 -0
  9. package/dist/prompts/hwp-prompts.js +368 -0
  10. package/dist/resources/document-resources.d.ts +3 -0
  11. package/dist/resources/document-resources.js +109 -0
  12. package/dist/server.d.ts +12 -0
  13. package/dist/server.js +29 -0
  14. package/dist/tools/analysis-tools.d.ts +4 -0
  15. package/dist/tools/analysis-tools.js +414 -0
  16. package/dist/tools/composite-tools.d.ts +3 -0
  17. package/dist/tools/composite-tools.js +664 -0
  18. package/dist/tools/document-tools.d.ts +3 -0
  19. package/dist/tools/document-tools.js +264 -0
  20. package/dist/tools/editing-tools.d.ts +4 -0
  21. package/dist/tools/editing-tools.js +916 -0
  22. package/package.json +31 -0
  23. package/python/__pycache__/hwp_analyzer.cpython-313.pyc +0 -0
  24. package/python/__pycache__/hwp_editor.cpython-313.pyc +0 -0
  25. package/python/__pycache__/hwp_service.cpython-313.pyc +0 -0
  26. package/python/__pycache__/privacy_scanner.cpython-313.pyc +0 -0
  27. package/python/__pycache__/ref_reader.cpython-313.pyc +0 -0
  28. package/python/__pycache__/test_integration.cpython-313.pyc +0 -0
  29. package/python/hwp_analyzer.py +544 -0
  30. package/python/hwp_editor.py +933 -0
  31. package/python/hwp_service.py +1291 -0
  32. package/python/privacy_scanner.py +115 -0
  33. package/python/ref_reader.py +115 -0
  34. package/python/requirements.txt +2 -0
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "claude-code-hwp-mcp",
3
+ "version": "0.2.0",
4
+ "description": "MCP server for HWP (한글) document automation via pyhwpx COM API. 85+ tools for document editing, analysis, and AI-powered filling.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "bin": {
8
+ "hwp-mcp": "dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist/",
12
+ "python/"
13
+ ],
14
+ "scripts": {
15
+ "build": "tsc",
16
+ "prepublishOnly": "npm run build",
17
+ "start": "node dist/index.js",
18
+ "dev": "tsx src/index.ts"
19
+ },
20
+ "dependencies": {
21
+ "@modelcontextprotocol/sdk": "^1.12.0",
22
+ "@xmldom/xmldom": "^0.8.11",
23
+ "jszip": "^3.10.1",
24
+ "zod": "^3.23.0"
25
+ },
26
+ "devDependencies": {
27
+ "@types/node": "^22.0.0",
28
+ "tsx": "^4.0.0",
29
+ "typescript": "~5.7.0"
30
+ }
31
+ }
@@ -0,0 +1,544 @@
1
+ """HWP Document Analyzer - Extract structure and content from HWP documents.
2
+ Uses pyhwpx Hwp() only. Raw win32com is forbidden.
3
+ """
4
+ import sys
5
+ import os
6
+ import re
7
+
8
+
9
+ MAX_TABLES = 50 # 표 스캔 상한 (통장사본 등 반복 표 방지)
10
+
11
+
12
+ # ── 공백 정규화 ──
13
+ def _normalize(text):
14
+ """모든 공백(스페이스, 탭, NBSP 등)을 제거하여 비교용 문자열 반환."""
15
+ return re.sub(r"\s+", "", text)
16
+
17
+
18
+ # ── 라벨 별칭(alias) 사전 ──
19
+ # key: 정규화된 표준명, value: 정규화된 동의어 리스트
20
+ _LABEL_ALIASES = {
21
+ "기업명": ["기업이름", "회사명", "상호명", "상호", "법인명", "업체명", "회사이름"],
22
+ "사업자등록번호": ["사업자번호", "사업자등록NO", "사업자No"],
23
+ "법인등록번호": ["법인번호", "법인등록No", "법인No"],
24
+ "사업장주소": ["주소", "소재지", "본점소재지", "사업장소재지", "회사주소", "기업주소"],
25
+ "대표자성명": ["대표자", "대표자명", "대표이사", "대표자이름", "대표이사명", "성명"],
26
+ "대표전화번호": ["대표전화", "전화번호", "연락처", "대표번호", "전화", "TEL"],
27
+ "홈페이지URL": ["홈페이지", "웹사이트", "URL", "홈페이지주소", "웹주소"],
28
+ "이메일": ["이메일주소", "EMAIL", "E-MAIL"],
29
+ "팩스번호": ["팩스", "FAX", "FAX번호"],
30
+ "설립일": ["설립일자", "설립년월일", "법인설립일"],
31
+ "업종": ["업종명", "주업종"],
32
+ "업태": ["업태명", "주업태"],
33
+ "종업원수": ["직원수", "임직원수", "종업원"],
34
+ "자본금": ["납입자본금", "자본금액"],
35
+ "매출액": ["연매출", "연매출액", "매출"],
36
+ }
37
+
38
+ # 역방향 룩업 테이블 생성: 동의어 -> 표준명
39
+ _ALIAS_LOOKUP = {}
40
+ for canonical, aliases in _LABEL_ALIASES.items():
41
+ norm_canonical = _normalize(canonical)
42
+ _ALIAS_LOOKUP[norm_canonical] = norm_canonical
43
+ for alias in aliases:
44
+ _ALIAS_LOOKUP[_normalize(alias)] = norm_canonical
45
+
46
+
47
+ def _canonical_label(label):
48
+ """라벨을 정규화하고 표준명으로 변환. 별칭 없으면 정규화된 원본 반환."""
49
+ norm = _normalize(label)
50
+ return _ALIAS_LOOKUP.get(norm.upper(), _ALIAS_LOOKUP.get(norm, norm))
51
+
52
+
53
+ def _match_label(cell_text, search_label):
54
+ """셀 텍스트와 검색 라벨이 같은 의미인지 판단.
55
+
56
+ Returns: (is_match, is_exact, ratio)
57
+ - is_match: 매칭 여부
58
+ - is_exact: exact match 여부 (정규화 후 완전 일치)
59
+ - ratio: 매칭률 (0.0~1.0, exact이면 1.0)
60
+ """
61
+ norm_cell = _normalize(cell_text)
62
+ norm_label = _normalize(search_label)
63
+
64
+ if not norm_cell or not norm_label:
65
+ return False, False, 0.0
66
+
67
+ # 1) 정규화 후 exact match (공백만 달랐던 경우)
68
+ if norm_cell == norm_label:
69
+ return True, True, 1.0
70
+
71
+ # 2) 별칭 매칭: 둘 다 같은 표준명으로 매핑되는지
72
+ canon_cell = _canonical_label(cell_text)
73
+ canon_label = _canonical_label(search_label)
74
+ if canon_cell == canon_label:
75
+ return True, True, 1.0
76
+
77
+ # 3) 정규화된 문자열 포함 관계 (partial match)
78
+ if norm_label in norm_cell:
79
+ return True, False, len(norm_label) / len(norm_cell)
80
+ if norm_cell in norm_label:
81
+ return True, False, len(norm_cell) / len(norm_label)
82
+
83
+ return False, False, 0.0
84
+
85
+
86
+ def analyze_document(hwp, file_path, already_open=False):
87
+ """Analyze an HWP document: pages, tables, fields, text."""
88
+ file_path = os.path.abspath(file_path)
89
+ # 항상 문서를 열어서 활성화 보장 (이미 열려있으면 해당 문서가 포커스됨)
90
+ hwp.open(file_path)
91
+ # 커서를 문서 처음으로 이동
92
+ try:
93
+ hwp.MovePos(2) # movePOS_START: 문서 처음으로
94
+ except Exception:
95
+ pass
96
+
97
+ result = {
98
+ "file_path": file_path,
99
+ "file_name": os.path.basename(file_path),
100
+ "file_format": "HWPX" if file_path.lower().endswith(".hwpx") else "HWP",
101
+ "pages": 0,
102
+ "tables": [],
103
+ "fields": [],
104
+ "text_preview": "",
105
+ "full_text": "",
106
+ }
107
+
108
+ scan_started = False
109
+
110
+ try:
111
+ # Page count
112
+ try:
113
+ result["pages"] = hwp.PageCount
114
+ except Exception as e:
115
+ print(f"[WARN] PageCount failed: {e}", file=sys.stderr)
116
+
117
+ # Extract tables (with data for AI context, max MAX_TABLES)
118
+ try:
119
+ table_idx = 0
120
+ while table_idx < MAX_TABLES:
121
+ try:
122
+ hwp.get_into_nth_table(table_idx)
123
+ df = hwp.table_to_df()
124
+ table_info = {
125
+ "index": table_idx,
126
+ "rows": len(df) + 1, # +1 for header
127
+ "cols": len(df.columns) if len(df) > 0 else 0,
128
+ "headers": [str(c) for c in df.columns],
129
+ "data": df.values.tolist(),
130
+ }
131
+ result["tables"].append(table_info)
132
+ try:
133
+ hwp.Cancel()
134
+ except Exception:
135
+ pass
136
+ table_idx += 1
137
+ except Exception:
138
+ break
139
+ if table_idx >= MAX_TABLES:
140
+ result["tables_truncated"] = True
141
+ print(f"[WARN] Table scan capped at {MAX_TABLES}", file=sys.stderr)
142
+ except Exception as e:
143
+ print(f"[WARN] Table extraction failed: {e}", file=sys.stderr)
144
+
145
+ # Extract fields
146
+ try:
147
+ field_list = hwp.GetFieldList()
148
+ if field_list:
149
+ fields = field_list.split("\x02") if "\x02" in field_list else [field_list]
150
+ for field in fields:
151
+ if field.strip():
152
+ value = ""
153
+ try:
154
+ value = hwp.GetFieldText(field.strip()) or ""
155
+ except Exception:
156
+ pass
157
+ result["fields"].append({
158
+ "name": field.strip(),
159
+ "value": value,
160
+ })
161
+ except Exception as e:
162
+ print(f"[WARN] Field extraction failed: {e}", file=sys.stderr)
163
+
164
+ # Extract full text (up to 15,000 chars for AI context)
165
+ try:
166
+ hwp.InitScan(0x0077)
167
+ scan_started = True
168
+ text_parts = []
169
+ total_len = 0
170
+ count = 0
171
+ while total_len < 15000 and count < 5000:
172
+ try:
173
+ state, text = hwp.GetText()
174
+ if state <= 0:
175
+ break
176
+ # state 1=일반텍스트, 2=표 안 텍스트 등
177
+ if text and text.strip():
178
+ text_parts.append(text.strip())
179
+ total_len += len(text)
180
+ count += 1
181
+ except Exception:
182
+ break
183
+ hwp.ReleaseScan()
184
+ scan_started = False
185
+
186
+ full = "\n".join(text_parts)
187
+ result["full_text"] = full[:15000]
188
+ result["text_preview"] = full[:500]
189
+ except Exception as e:
190
+ print(f"[WARN] Text extraction failed: {e}", file=sys.stderr)
191
+
192
+ finally:
193
+ # Guarantee ReleaseScan if InitScan was called
194
+ if scan_started:
195
+ try:
196
+ hwp.ReleaseScan()
197
+ except Exception:
198
+ pass
199
+
200
+ return result
201
+
202
+
203
+ def map_table_cells(hwp, table_idx, max_cells=200):
204
+ """Map all navigable cells in a table by Tab traversal.
205
+
206
+ Returns a list of cell entries with tab index and the text content
207
+ found at each position. This helps identify which tab index
208
+ corresponds to which cell in tables with merged cells.
209
+ """
210
+ cell_map = []
211
+
212
+ try:
213
+ hwp.get_into_nth_table(table_idx)
214
+ except Exception as e:
215
+ return {"error": f"Cannot enter table {table_idx}: {e}", "cell_map": []}
216
+
217
+ prev_pos = None
218
+
219
+ for i in range(max_cells):
220
+ try:
221
+ cur = hwp.GetPos()
222
+ pos = (cur[0], cur[1], cur[2]) if cur else None
223
+
224
+ # Detect if we've looped back to the start
225
+ if i > 0 and pos == prev_pos:
226
+ break
227
+
228
+ # Read cell text (select all in cell, get text, then deselect)
229
+ cell_text = ""
230
+ try:
231
+ hwp.HAction.Run("SelectAll")
232
+ cell_text = hwp.GetTextFile("TEXT", "saveblock").strip()
233
+ except Exception:
234
+ cell_text = ""
235
+ finally:
236
+ try:
237
+ hwp.HAction.Run("Cancel")
238
+ except Exception:
239
+ pass
240
+
241
+ cell_map.append({
242
+ "tab": i,
243
+ "text": cell_text[:100], # Truncate long text
244
+ "pos": list(pos) if pos else None,
245
+ })
246
+
247
+ prev_pos = pos
248
+ hwp.TableRightCell()
249
+ except Exception:
250
+ break
251
+
252
+ try:
253
+ hwp.Cancel()
254
+ except Exception:
255
+ pass
256
+
257
+ return {
258
+ "table_index": table_idx,
259
+ "total_cells": len(cell_map),
260
+ "cell_map": cell_map,
261
+ }
262
+
263
+
264
+ def _group_cells_into_rows(cell_map):
265
+ """셀 맵을 행 단위로 그룹화한다.
266
+
267
+ 행 경계 감지: list_id가 감소하면 새 행 시작.
268
+ (병합 셀이 재방문되면 list_id가 이전 값으로 돌아감)
269
+ """
270
+ rows = []
271
+ current_row = []
272
+ prev_list_id = -1
273
+
274
+ for cell in cell_map:
275
+ list_id = cell["pos"][0] if cell.get("pos") else -1
276
+ if list_id <= prev_list_id and current_row:
277
+ rows.append(current_row)
278
+ current_row = []
279
+ current_row.append(cell)
280
+ prev_list_id = list_id
281
+
282
+ if current_row:
283
+ rows.append(current_row)
284
+ return rows
285
+
286
+
287
+ def _find_label_column(rows, label):
288
+ """label 텍스트가 있는 셀의 (col_index, row_index, is_partial)를 반환.
289
+
290
+ 공백 정규화 + 별칭 사전으로 매칭. exact 우선, partial은 상위 행 우선.
291
+ """
292
+ # Exact match (정규화 + 별칭 포함)
293
+ for row_idx, row in enumerate(rows):
294
+ for col_idx, cell in enumerate(row):
295
+ is_match, is_exact, _ = _match_label(cell["text"], label)
296
+ if is_match and is_exact:
297
+ return col_idx, row_idx, False
298
+ # Partial match
299
+ if len(_normalize(label)) < 2:
300
+ return None, None, False
301
+ best_col, best_row, best_score = None, None, 0
302
+ for row_idx, row in enumerate(rows):
303
+ for col_idx, cell in enumerate(row):
304
+ is_match, is_exact, ratio = _match_label(cell["text"], label)
305
+ if is_match and not is_exact and ratio > 0:
306
+ score = ratio * max(0.1, 1.0 - row_idx * 0.05)
307
+ if score > best_score:
308
+ best_col, best_row, best_score = col_idx, row_idx, score
309
+ if best_col is not None:
310
+ return best_col, best_row, True
311
+ return None, None, False
312
+
313
+
314
+ def _find_label_row(rows, row_label):
315
+ """row_label 텍스트가 있는 행의 (row_index, is_partial, matched_text)를 반환.
316
+
317
+ 공백 정규화 + 별칭 사전으로 매칭. exact 우선, partial fallback.
318
+ """
319
+ # Exact match (정규화 + 별칭 포함)
320
+ for row_idx, row in enumerate(rows):
321
+ for cell in row:
322
+ is_match, is_exact, _ = _match_label(cell["text"], row_label)
323
+ if is_match and is_exact:
324
+ return row_idx, False, cell["text"].strip()
325
+ # Partial match
326
+ if len(_normalize(row_label)) < 2:
327
+ return None, False, ""
328
+ best_row, best_score, best_text = None, 0, ""
329
+ for row_idx, row in enumerate(rows):
330
+ for cell in row:
331
+ is_match, is_exact, ratio = _match_label(cell["text"], row_label)
332
+ if is_match and not is_exact and ratio > 0:
333
+ if ratio > best_score:
334
+ best_row, best_score, best_text = row_idx, ratio, cell["text"].strip()
335
+ if best_row is not None:
336
+ return best_row, True, best_text
337
+ return None, False, ""
338
+
339
+
340
+ def _find_cell_position_in_rows(rows, flat_idx):
341
+ """flat cell_map 인덱스 → (row_idx, col_idx_in_row) 변환"""
342
+ idx = 0
343
+ for row_idx, row in enumerate(rows):
344
+ for col_idx, cell in enumerate(row):
345
+ if idx == flat_idx:
346
+ return row_idx, col_idx
347
+ idx += 1
348
+ return None, None
349
+
350
+
351
+ def _find_cell_in_flat(cell_map, label):
352
+ """flat cell_map에서 라벨 텍스트 매칭. exact 우선, partial fallback.
353
+
354
+ 공백 정규화 + 별칭 사전 적용.
355
+ Returns (matched_idx, is_partial).
356
+ """
357
+ if not label:
358
+ return None, False # 이중 방어: 호출부에서도 체크하지만 안전장치 유지
359
+ # Exact match (정규화 + 별칭 포함)
360
+ for i, cell in enumerate(cell_map):
361
+ is_match, is_exact, _ = _match_label(cell["text"], label)
362
+ if is_match and is_exact:
363
+ return i, False
364
+ # Partial match
365
+ if len(_normalize(label)) < 2:
366
+ return None, False
367
+ best_idx, best_ratio = None, 0
368
+ for i, cell in enumerate(cell_map):
369
+ is_match, is_exact, ratio = _match_label(cell["text"], label)
370
+ if is_match and not is_exact and ratio > 0:
371
+ if ratio > best_ratio:
372
+ best_idx, best_ratio = i, ratio
373
+ if best_idx is not None:
374
+ return best_idx, True
375
+ return None, False
376
+
377
+
378
+ def resolve_labels_to_tabs(hwp, table_idx, labels):
379
+ """라벨 텍스트로 타겟 셀의 tab 인덱스를 찾는다.
380
+
381
+ labels: [{"label": "계약금액", "text": "값", "direction": "right"|"below",
382
+ "row_label": "전체기간" (optional)}, ...]
383
+
384
+ 로직:
385
+ 1. map_table_cells()로 전체 셀 맵 수집
386
+ 2. row_label이 있으면 → 2D 그리드 교차 매칭 (열 헤더 × 행 라벨)
387
+ 3. direction == "below"이면 → 행 그룹 기반 아래 셀 찾기
388
+ 4. 그 외(right) → 기존 tab+1 방식
389
+ """
390
+ cell_data = map_table_cells(hwp, table_idx)
391
+ cell_map = cell_data.get("cell_map", [])
392
+
393
+ if not cell_map:
394
+ return {
395
+ "resolved": [],
396
+ "errors": ["표에서 셀을 찾을 수 없습니다."],
397
+ }
398
+
399
+ rows = _group_cells_into_rows(cell_map)
400
+ resolved = []
401
+ errors = []
402
+
403
+ for item in labels:
404
+ label = item.get("label", "").strip()
405
+ text = item.get("text", "")
406
+ direction = item.get("direction", "right")
407
+ row_label = item.get("row_label", "").strip() if item.get("row_label") else ""
408
+
409
+ if not label:
410
+ errors.append("빈 라벨이 전달되었습니다.")
411
+ continue
412
+
413
+ if row_label:
414
+ # ── 교차 매칭 모드: label(열 헤더) × row_label(행 라벨) ──
415
+ if len(rows) <= 1:
416
+ errors.append(
417
+ f"라벨 '{label}'+'{row_label}': 행 경계를 감지할 수 없습니다. "
418
+ "tab 인덱스를 직접 지정하세요."
419
+ )
420
+ continue
421
+
422
+ all_texts = [c["text"][:20] for row in rows for c in row][:10]
423
+
424
+ col_idx, header_row_idx, col_partial = _find_label_column(rows, label)
425
+ if col_idx is None:
426
+ errors.append(
427
+ f"열 라벨 '{label}'을(를) 표에서 찾을 수 없습니다. "
428
+ f"표 내 셀: {all_texts}"
429
+ )
430
+ continue
431
+
432
+ target_row_idx, row_partial, row_matched_text = _find_label_row(rows, row_label)
433
+ if target_row_idx is None:
434
+ errors.append(
435
+ f"행 라벨 '{row_label}'을(를) 표에서 찾을 수 없습니다. "
436
+ f"표 내 셀: {all_texts}"
437
+ )
438
+ continue
439
+
440
+ if col_partial:
441
+ matched_cell = rows[header_row_idx][col_idx]
442
+ print(f"[WARN] 열 라벨 '{label}' partial match: '{matched_cell['text'].strip()}'", file=sys.stderr)
443
+ if row_partial:
444
+ print(f"[WARN] 행 라벨 '{row_label}' partial match: '{row_matched_text}'", file=sys.stderr)
445
+
446
+ target_row = rows[target_row_idx]
447
+ if col_idx >= len(target_row):
448
+ errors.append(
449
+ f"라벨 '{label}'+'{row_label}': 열 인덱스({col_idx})가 "
450
+ f"해당 행의 셀 수({len(target_row)})를 초과합니다."
451
+ )
452
+ continue
453
+
454
+ target = target_row[col_idx]
455
+ entry = {
456
+ "tab": target["tab"],
457
+ "text": text,
458
+ "matched_label": f"{label}×{row_label}",
459
+ }
460
+ if col_partial or row_partial:
461
+ entry["partial_match"] = True
462
+ resolved.append(entry)
463
+
464
+ elif direction == "below":
465
+ # ── below 모드: 행 그룹 기반 아래 셀 찾기 ──
466
+ matched_idx, is_partial = _find_cell_in_flat(cell_map, label)
467
+ if matched_idx is None:
468
+ errors.append(
469
+ f"라벨 '{label}'을(를) 표에서 찾을 수 없습니다. "
470
+ f"표 내 셀: {[c['text'][:20] for c in cell_map[:10]]}"
471
+ )
472
+ continue
473
+ if is_partial:
474
+ print(
475
+ f"[WARN] below 라벨 '{label}' partial match: "
476
+ f"'{cell_map[matched_idx]['text'].strip()}'",
477
+ file=sys.stderr,
478
+ )
479
+
480
+ if len(rows) <= 1:
481
+ errors.append(
482
+ f"라벨 '{label}' (direction=below): 행 경계를 감지할 수 없어 "
483
+ "정확한 아래 셀을 찾을 수 없습니다. tab 인덱스를 직접 지정하세요."
484
+ )
485
+ continue
486
+ else:
487
+ # 행 그룹 기반: 같은 열의 다음 행 셀
488
+ label_row_idx, col_idx = _find_cell_position_in_rows(rows, matched_idx)
489
+ if label_row_idx is None:
490
+ errors.append(f"라벨 '{label}'의 행 위치를 결정할 수 없습니다.")
491
+ continue
492
+ if label_row_idx + 1 >= len(rows):
493
+ errors.append(f"라벨 '{label}'의 아래 행이 없습니다.")
494
+ continue
495
+ next_row = rows[label_row_idx + 1]
496
+ if col_idx >= len(next_row):
497
+ errors.append(
498
+ f"라벨 '{label}': 아래 행의 셀 수({len(next_row)})가 "
499
+ f"열 인덱스({col_idx})보다 적습니다."
500
+ )
501
+ continue
502
+ target = next_row[col_idx]
503
+ entry = {
504
+ "tab": target["tab"],
505
+ "text": text,
506
+ "matched_label": label,
507
+ }
508
+ if is_partial:
509
+ entry["partial_match"] = True
510
+ resolved.append(entry)
511
+
512
+ else:
513
+ # ── right 모드: 라벨의 다음 셀 (tab+1) ──
514
+ matched_idx, is_partial = _find_cell_in_flat(cell_map, label)
515
+ if matched_idx is None:
516
+ errors.append(
517
+ f"라벨 '{label}'을(를) 표에서 찾을 수 없습니다. "
518
+ f"표 내 셀: {[c['text'][:20] for c in cell_map[:10]]}"
519
+ )
520
+ continue
521
+ if is_partial:
522
+ print(
523
+ f"[WARN] right 라벨 '{label}' partial match: "
524
+ f"'{cell_map[matched_idx]['text'].strip()}'",
525
+ file=sys.stderr,
526
+ )
527
+
528
+ target_idx = matched_idx + 1
529
+ if target_idx >= len(cell_map):
530
+ errors.append(
531
+ f"라벨 '{label}'의 오른쪽 셀이 없습니다 (표 범위 밖)."
532
+ )
533
+ continue
534
+
535
+ entry = {
536
+ "tab": cell_map[target_idx]["tab"],
537
+ "text": text,
538
+ "matched_label": label,
539
+ }
540
+ if is_partial:
541
+ entry["partial_match"] = True
542
+ resolved.append(entry)
543
+
544
+ return {"resolved": resolved, "errors": errors}