@simplysm/sd-claude 14.0.77 → 14.0.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/claude/output-styles/sd-tone.md +26 -2
  2. package/claude/references/sd-simplysm14/manuals/logging.md +1 -1
  3. package/claude/rules/sd-base-rules.md +109 -90
  4. package/claude/skills/sd-dev/SKILL.md +1 -1
  5. package/claude/skills/sd-impl/SKILL.md +15 -14
  6. package/claude/skills/sd-impl/references/spec-cross-check.md +2 -2
  7. package/claude/skills/sd-spec/SKILL.md +746 -192
  8. package/claude/skills/sd-spec/references/example-spec.md +107 -35
  9. package/claude/skills/sd-unpack/SKILL.md +39 -14
  10. package/claude/skills/sd-unpack/scripts/handlers/__pycache__/_common.cpython-314.pyc +0 -0
  11. package/claude/skills/sd-unpack/scripts/handlers/__pycache__/eml_handler.cpython-314.pyc +0 -0
  12. package/claude/skills/sd-unpack/scripts/handlers/__pycache__/office_com.cpython-314.pyc +0 -0
  13. package/claude/skills/sd-unpack/scripts/handlers/__pycache__/pdf_handler.cpython-314.pyc +0 -0
  14. package/claude/skills/sd-unpack/scripts/handlers/_common.py +59 -0
  15. package/claude/skills/sd-unpack/scripts/handlers/eml_handler.py +7 -0
  16. package/claude/skills/sd-unpack/scripts/handlers/msg_handler.py +11 -0
  17. package/claude/skills/sd-unpack/scripts/handlers/office_com.py +288 -79
  18. package/claude/skills/sd-unpack/scripts/handlers/office_worker.py +3 -2
  19. package/claude/skills/sd-unpack/scripts/handlers/pdf_handler.py +78 -10
  20. package/package.json +1 -1
  21. package/claude/skills/sd-spec/references/spec-authoring.md +0 -298
  22. package/claude/skills/sd-spec/references/spec-md-template.md +0 -29
  23. package/claude/skills/sd-wip/SKILL.md +0 -38
  24. package/claude/skills/sd-wip/evals/fixtures/empty/.gitkeep +0 -0
  25. package/claude/skills/sd-wip/evals/fixtures/with-artifact/projects/acct/_wip.md +0 -3
  26. package/claude/skills/sd-wip/evals/fixtures/with-artifact/projects/acct/spec.md +0 -15
  27. package/claude/skills/sd-wip/evals/fixtures/with-existing-wip/.wips/260101120000_acct.md +0 -6
  28. package/claude/skills/sd-wip/evals/fixtures/with-existing-wip-for-compact/.wips/260101120000_acct.md +0 -14
  29. package/claude/skills/sd-wip/evals/golden.jsonl +0 -4
  30. package/claude/skills/sd-wip/references/compact.md +0 -79
@@ -224,7 +224,7 @@ def _docx_extract_nodes(input_path: Path) -> tuple[list[dict], dict[str, int]]:
224
224
  if vm == "continue":
225
225
  # vMerge continue cell — origin 의 rowspan 영역. skip.
226
226
  continue
227
- cell_text = (cell.text or "").strip()
227
+ cell_text = cell.text or "" # 원본 그대로 (strip X)
228
228
  colspan = _docx_cell_colspan(cell)
229
229
  cell_node = {
230
230
  "node": node_idx,
@@ -414,13 +414,13 @@ def _run_pptx(
414
414
  slide_summaries: list[str] = []
415
415
  slide_has_notes: dict[str, bool] = {}
416
416
  slide_charts: dict[str, list[str]] = {} # idx -> chart filenames
417
- slide_cores: dict[str, str] = {} # idx -> 핵심 텍스트 (title 또는 첫 텍스트)
418
417
 
419
418
  _common.mkdir(slides_dir)
420
419
  for i, slide in enumerate(prs.slides, start=1):
421
420
  idx = f"{i:02d}"
422
421
  title = _pptx_slide_title(slide)
423
- safe_title = _common.slugify_filename(title or f"슬라이드{i}", max_len=40)
422
+ # title 없으면 idx 만 (자체 한국어 라벨 부착 X)
423
+ safe_title = _common.slugify_filename(title, max_len=40) if title else ""
424
424
  slide_titles.append((idx, safe_title))
425
425
 
426
426
  nodes, chart_refs = _pptx_extract_slide_nodes(
@@ -440,41 +440,37 @@ def _run_pptx(
440
440
  lines = [json.dumps(meta, ensure_ascii=False, default=_json_default)]
441
441
  for n in nodes:
442
442
  lines.append(json.dumps(n, ensure_ascii=False, default=_json_default))
443
- _common.write_text(slides_dir / f"{idx}_{safe_title}.jsonl", "\n".join(lines))
443
+ stem = _pptx_slide_stem(idx, safe_title)
444
+ _common.write_text(slides_dir / f"{stem}.jsonl", "\n".join(lines))
444
445
 
445
446
  if chart_refs:
446
447
  slide_charts[idx] = chart_refs
447
448
 
448
449
  if slide.has_notes_slide:
449
450
  notes_text = slide.notes_slide.notes_text_frame.text or ""
450
- if notes_text.strip():
451
+ if notes_text:
451
452
  _common.write_text(
452
- slides_dir / f"{idx}_{safe_title}.notes.md",
453
+ slides_dir / f"{stem}.notes.md",
453
454
  notes_text,
454
455
  )
455
456
  slide_has_notes[idx] = True
456
457
 
457
- core = title or _pptx_first_text(nodes)
458
- if core:
459
- slide_cores[idx] = core[:60]
460
-
461
- parts = [f"`slides/{idx}_{safe_title}.png`", "`.jsonl`"]
458
+ parts = [f"`slides/{stem}.png`", "`.jsonl`"]
462
459
  if slide_has_notes.get(idx):
463
460
  parts.append("`.notes.md`")
464
461
  if chart_refs:
465
462
  chart_str = ", ".join(f"`charts/{c}`" for c in chart_refs)
466
463
  parts.append(f"(차트: {chart_str})")
467
- if slide_cores.get(idx):
468
- parts.append(f"— {slide_cores[idx]}")
469
464
  slide_summaries.append(" ".join(parts))
470
465
 
471
466
  # COM PowerPoint 의 Slide.Export 로 슬라이드별 PNG 직접 출력. 임시 폴더에서 만든 후 long-path-safe copy.
472
467
  with _common.com_lock(), _common.temp_workdir() as tmp:
473
468
  _powerpoint_export_slides(input_path, tmp, slide_titles)
474
469
  for idx, safe_title in slide_titles:
475
- tmp_png = tmp / f"{idx}_{safe_title}.png"
470
+ stem = _pptx_slide_stem(idx, safe_title)
471
+ tmp_png = tmp / f"{stem}.png"
476
472
  if tmp_png.exists():
477
- _common.copy(tmp_png, slides_dir / f"{idx}_{safe_title}.png")
473
+ _common.copy(tmp_png, slides_dir / f"{stem}.png")
478
474
 
479
475
  # pptx 의 시각은 슬라이드 PNG 에 모두 포함 → ZIP media 전체 복제 skip
480
476
  # (개별 picture shape 은 _pptx_extract_slide_nodes 에서 image ref 와 함께 저장됨).
@@ -509,26 +505,22 @@ def _run_pptx(
509
505
  )
510
506
 
511
507
 
508
+ def _pptx_slide_stem(idx: str, safe_title: str) -> str:
509
+ """슬라이드 파일 stem. safe_title 빈 문자열이면 idx 만 (자체 라벨 부착 X)."""
510
+ return f"{idx}_{safe_title}" if safe_title else idx
511
+
512
+
512
513
  def _pptx_slide_title(slide) -> str:
513
- """슬라이드 title placeholder 텍스트. 없으면 빈 문자열."""
514
+ """슬라이드 title placeholder 텍스트. 없으면 빈 문자열. 원본 그대로 (strip X)."""
514
515
  try:
515
516
  title_shape = slide.shapes.title
516
517
  if title_shape is not None and title_shape.text:
517
- return title_shape.text.strip()
518
+ return title_shape.text
518
519
  except (AttributeError, ValueError):
519
520
  pass
520
521
  return ""
521
522
 
522
523
 
523
- def _pptx_first_text(nodes: list[dict]) -> str:
524
- """노드 리스트 중 첫 비어있지 않은 text. 없으면 빈 문자열."""
525
- for n in nodes:
526
- t = (n.get("text") or "").strip()
527
- if t:
528
- return t
529
- return ""
530
-
531
-
532
524
  def _pptx_extract_slide_nodes(
533
525
  slide,
534
526
  slide_num: int,
@@ -568,7 +560,7 @@ def _pptx_extract_slide_nodes(
568
560
  table_idx = shape_idx + 1
569
561
  for r_idx, row in enumerate(table.rows, start=1):
570
562
  for c_idx, cell in enumerate(row.cells, start=1):
571
- cell_text = (cell.text or "").strip()
563
+ cell_text = cell.text or "" # 원본 그대로 (strip X)
572
564
  nodes.append({
573
565
  **common,
574
566
  "type": "table_cell",
@@ -724,7 +716,8 @@ def _run_xlsx(
724
716
  sheets_dir = out_dir / "sheets"
725
717
  charts_dir = out_dir / "charts"
726
718
  sheet_summaries: list[str] = []
727
- sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name)
719
+ sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name) — 일반 Worksheet
720
+ chart_sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name) — Chartsheet
728
721
  sheet_charts: dict[str, list[str]] = {} # idx -> chart filenames
729
722
  sheet_formula_count: dict[str, int] = {}
730
723
  sheet_dims: dict[str, tuple[int, int]] = {}
@@ -733,16 +726,20 @@ def _run_xlsx(
733
726
  wb_formulas = load_workbook(_common.long_str(input_path), data_only=False)
734
727
  try:
735
728
  _common.mkdir(sheets_dir)
736
- # openpyxl 의 sheetnames 는 chart sheet 포함하지만 COM Worksheets() 는 일반 시트만.
737
- # 양쪽 인덱스 mismatch 피하려면 일반 Worksheet 처리하고, lookup 은 raw name 으로.
729
+ # openpyxl 의 sheetnames 는 일반 Worksheet Chartsheet 포함.
730
+ # 시트 순서 그대로 idx 통합 부여 (사용자 워크북 순서 보존).
731
+ # 일반 Worksheet 만 COM Excel PNG export 대상, Chartsheet 는 차트 데이터만 추출.
738
732
  idx_counter = 0
739
733
  for name in wb_values.sheetnames:
740
- if not isinstance(wb_values[name], Worksheet):
741
- continue
734
+ obj = wb_values[name]
742
735
  idx_counter += 1
743
736
  idx = f"{idx_counter:02d}"
744
737
  safe_name = _common.slugify_filename(name, max_len=40)
745
- sheet_names.append((idx, safe_name, name))
738
+ if isinstance(obj, Worksheet):
739
+ sheet_names.append((idx, safe_name, name))
740
+ else:
741
+ # Chartsheet 등 비-worksheet
742
+ chart_sheet_names.append((idx, safe_name, name))
746
743
 
747
744
  # COM Excel 호출: 데이터 영역 → ChartObject + Range.CopyPicture → 시트별 PNG.
748
745
  # 시트별 (last_row, last_col) 도 같이 반환되어 .jsonl 이 같은 데이터 영역으로 통일됨.
@@ -772,20 +769,56 @@ def _run_xlsx(
772
769
  )
773
770
  sheet_charts.setdefault(idx, []).append(chart_filename)
774
771
 
775
- # 워크북 단위 메타 (defined names 등) — 시트 jsonl 외부 분리.
776
- wb_meta = _workbook_meta(wb_formulas)
777
- if wb_meta:
778
- _common.write_text(
779
- out_dir / "workbook.meta.json",
780
- json.dumps(wb_meta, ensure_ascii=False, indent=2),
781
- )
782
-
772
+ # Chartsheet 처리: 차트 데이터를 charts/sheet<idx>_chart.data.json 으로 저장
773
+ chart_sheet_chart_files: dict[str, str] = {} # idx -> chart filename
774
+ for idx, safe_name, raw_name in chart_sheet_names:
775
+ cs = wb_formulas[raw_name]
776
+ chart = None
777
+ # Chartsheet.charts 또는 _charts 속성 (openpyxl 버전 따라 다름)
778
+ for attr in ("charts", "_charts"):
779
+ v = getattr(cs, attr, None)
780
+ if v:
781
+ if hasattr(v, "__iter__"):
782
+ try:
783
+ chart = next(iter(v), None)
784
+ except Exception:
785
+ chart = None
786
+ else:
787
+ chart = v
788
+ if chart is not None:
789
+ break
790
+ if chart is None:
791
+ # 단일 chart 속성 fallback
792
+ chart = getattr(cs, "chart", None)
793
+ if chart is not None:
794
+ try:
795
+ data = _extract_openpyxl_chart_data(chart)
796
+ except Exception:
797
+ data = None
798
+ if data is not None:
799
+ _common.mkdir(charts_dir)
800
+ chart_filename = f"sheet{idx}_chart.data.json"
801
+ _common.write_text(
802
+ charts_dir / chart_filename,
803
+ json.dumps(data, ensure_ascii=False, indent=2),
804
+ )
805
+ chart_sheet_chart_files[idx] = chart_filename
806
+
807
+ # 워크북 단위 메타 (defined names·pivots·sheet codeName 등) — 시트 jsonl 외부 분리.
808
+ wb_meta = _workbook_meta(wb_formulas, input_path)
783
809
  # VBA 시트 객체명 ↔ raw 시트명 매핑 (시트 codeName 기반)
784
810
  sheet_code_map: dict[str, str] = {}
785
811
  for ws in wb_formulas.worksheets:
786
812
  code = getattr(ws.sheet_properties, "codeName", None)
787
813
  if code:
788
814
  sheet_code_map[code] = ws.title
815
+ if sheet_code_map:
816
+ wb_meta["sheet_code_map"] = sheet_code_map
817
+ if wb_meta:
818
+ _common.write_text(
819
+ out_dir / "workbook.meta.json",
820
+ json.dumps(wb_meta, ensure_ascii=False, indent=2),
821
+ )
789
822
  finally:
790
823
  wb_values.close()
791
824
  wb_formulas.close()
@@ -800,7 +833,8 @@ def _run_xlsx(
800
833
  embed_zip_prefix="xl/embeddings/",
801
834
  )
802
835
 
803
- # 시트별 산출물 풀목록 (모든 시트 처리 끝난 sheet_images 매핑까지 합쳐서)
836
+ # 시트별 산출물 풀목록 일반 시트 + chart sheet 통합, 시트 순서 (idx) 대로
837
+ sheet_summary_map: dict[str, str] = {}
804
838
  for idx, safe_name, raw_name in sheet_names:
805
839
  last_row, last_col = sheet_dims.get(idx, (0, 0))
806
840
  formula_n = sheet_formula_count.get(idx, 0)
@@ -821,12 +855,21 @@ def _run_xlsx(
821
855
  if formula_n:
822
856
  meta += f", 수식 {formula_n}개"
823
857
  meta += ")"
824
- sheet_summaries.append(" ".join(parts) + " " + meta)
858
+ sheet_summary_map[idx] = " ".join(parts) + " " + meta
859
+
860
+ for idx, safe_name, raw_name in chart_sheet_names:
861
+ chart_filename = chart_sheet_chart_files.get(idx)
862
+ if chart_filename:
863
+ sheet_summary_map[idx] = f"`charts/{chart_filename}` (chart sheet — \"{raw_name}\")"
864
+ else:
865
+ sheet_summary_map[idx] = f"(chart sheet — \"{raw_name}\", 차트 데이터 추출 실패)"
866
+
867
+ # idx 순서대로 통합
868
+ for idx in sorted(sheet_summary_map.keys()):
869
+ sheet_summaries.append(sheet_summary_map[idx])
825
870
 
826
871
  source_name, source_size = _source_meta(input_path, out_dir, source_name_override)
827
- macro_modules = _extract_macros(
828
- _source_path(out_dir, source_name), out_dir, sheet_code_map=sheet_code_map,
829
- )
872
+ macro_modules = _extract_macros(_source_path(out_dir, source_name), out_dir)
830
873
 
831
874
  sections: dict[str, list[str]] = {}
832
875
  if sheet_summaries:
@@ -840,9 +883,9 @@ def _run_xlsx(
840
883
  source_size=source_size,
841
884
  tool=("openpyxl + COM Excel + ZIP " + tool_extra).strip(),
842
885
  loss_notes=(
843
- "셀 서식·조건부 서식·데이터 검증 규칙은 미보존. "
844
- "시각은 시트별 PNG, 데이터·수식·시트 메타는 시트별 .jsonl 한 줄=한 행(좌표 명시), "
845
- "워크북 단위 메타(defined names 등)는 workbook.meta.json."
886
+ "셀 서식(바탕색·border·폰트)·frozen·dims 미보존 (필요 _source.xlsx 직접 추출). "
887
+ "시각은 시트별 PNG, 분석 데이터(셀값·number_format·수식·merges·hyperlinks·comments) "
888
+ "시트별 .jsonl 한 줄=한 행(좌표 명시), 워크북 단위 메타(defined names 등) 는 workbook.meta.json."
846
889
  ),
847
890
  sections=sections or None,
848
891
  attachments=attachment_links,
@@ -1002,18 +1045,13 @@ def _source_path(out_dir: Path, source_name: str) -> Path:
1002
1045
  return out_dir / f"_source.{ext}"
1003
1046
 
1004
1047
 
1005
- def _extract_macros(
1006
- input_path: Path,
1007
- out_dir: Path,
1008
- sheet_code_map: Optional[dict[str, str]] = None,
1009
- ) -> list[str]:
1010
- """OLE/OOXML 파일에서 VBA 매크로 추출. macros/<모듈명>.vba 로 저장.
1048
+ def _extract_macros(input_path: Path, out_dir: Path) -> list[str]:
1049
+ """OLE/OOXML 파일에서 VBA 매크로 추출. macros/<모듈명>.vba 로 저장 (원본 코드 그대로).
1011
1050
 
1012
1051
  추출된 모듈 파일명 list 반환 (예: ["Module1.vba", "ThisWorkbook.vba"]).
1013
1052
  매크로 없으면 빈 list.
1014
1053
 
1015
- sheet_code_map: VBA 시트 객체 codeName → raw 시트명 (예: {"Sheet1": "BOA"}).
1016
- 매크로 파일 첫 줄에 코멘트로 매핑 정보 prepend (시트 모듈만).
1054
+ 시트 객체명↔raw 시트명 매핑은 호출자(_run_xlsx)가 workbook.meta.json 에 별도 보관.
1017
1055
  """
1018
1056
  _common.ensure_pip("oletools")
1019
1057
  from oletools.olevba import VBA_Parser
@@ -1028,11 +1066,8 @@ def _extract_macros(
1028
1066
  for (_filename, stream_path, vba_filename, vba_code) in parser.extract_macros():
1029
1067
  module_name = vba_filename or stream_path or "module"
1030
1068
  stem = Path(module_name).stem or "module"
1031
- prefix = ""
1032
- if sheet_code_map and stem in sheet_code_map:
1033
- prefix = f'\' (object: {stem}, sheet: "{sheet_code_map[stem]}")\n\n'
1034
1069
  dst = _common.unique_path(macros_dir, f"{stem}.vba")
1035
- _common.write_text(dst, prefix + (vba_code or ""))
1070
+ _common.write_text(dst, vba_code or "")
1036
1071
  module_files.append(dst.name)
1037
1072
  return module_files
1038
1073
  finally:
@@ -1153,34 +1188,38 @@ def _json_default(obj: Any) -> str:
1153
1188
  raise TypeError(f"not JSON serializable: {type(obj).__name__}")
1154
1189
 
1155
1190
 
1156
- def _sheet_to_jsonl(ws_v, ws_f, last_row: int, last_col: int) -> tuple[list[str], int]:
1157
- """openpyxl Worksheet (1,1)~(last_row,last_col) 범위를 행 단위 JSONL 라인으로.
1191
+ def _sheet_to_jsonl(
1192
+ ws_v, ws_f, last_row: int, last_col: int,
1193
+ ) -> tuple[list[str], int]:
1194
+ """openpyxl Worksheet 의 (1,1)~(last_row,last_col) → 행 단위 JSONL.
1195
+
1196
+ 분석 핵심: 데이터·number_format·수식. 시각 표시(바탕색·border·폰트·frozen)·dims 는 미보존
1197
+ (필요 시 Claude 가 _source.xlsx 직접 추출).
1158
1198
 
1159
- = 한 행. 빈 셀 키 생략. 좌표는 `r`(1-based 행번호) + 열문자 키(`A`·`B`·...·`AA`·...).
1160
- 같은 수식은 `_f` (열문자 → 수식문자열). 빈 행도 `{"r":N}` 한 줄 유지 → Read offset = 행번호.
1161
- 줄은 `{"_meta":{...}}` (시트 dims·merges·frozen·hyperlinks·comments).
1162
- 타입은 JSON 네이티브(int·float·bool) + datetime ISO 8601.
1199
+ 데이터 jsonl (줄=한 행. 빈 셀 키 생략):
1200
+ - 줄: `{"_meta":{"merges":[...], "number_formats":{...}, "hyperlinks":{...}, "comments":{...}}}`
1201
+ - merges: 셀 좌표 해석 필수 (머지 영역 안 빈 셀 오해 차단)
1202
+ - number_formats: Date·통화·% 의미 단서
1203
+ - hyperlinks·comments: 셀 부가 정보
1204
+ - 비어있는 키는 생략
1205
+ - 데이터 줄: `{"r":N, "<col>":value, ..., "_f":{<col>:formula}}`
1206
+ - 빈 행도 `{"r":N}` 한 줄 유지
1163
1207
 
1164
1208
  반환: (lines, formula_count)
1165
1209
  """
1166
1210
  from openpyxl.utils import get_column_letter
1167
1211
 
1168
1212
  if last_row < 1 or last_col < 1:
1169
- meta = {"_meta": {"dims": [0, 0]}}
1170
- return [json.dumps(meta, ensure_ascii=False)], 0
1213
+ return [json.dumps({"_meta": {}}, ensure_ascii=False)], 0
1171
1214
 
1172
- # 메타 수집: 머지·frozen·hyperlinks·comments
1173
- meta: dict[str, Any] = {"dims": [last_row, last_col]}
1215
+ meta: dict[str, Any] = {}
1174
1216
  merges = [str(r) for r in ws_v.merged_cells.ranges]
1175
1217
  if merges:
1176
1218
  meta["merges"] = merges
1177
- frozen = ws_v.freeze_panes
1178
- if frozen:
1179
- meta["frozen"] = frozen
1180
1219
 
1181
1220
  hyperlinks: dict[str, str] = {}
1182
1221
  comments: dict[str, str] = {}
1183
- number_formats: dict[str, str] = {} # General(기본) 외 셀의 표시 형식
1222
+ number_formats: dict[str, str] = {}
1184
1223
  for row in ws_v.iter_rows(min_row=1, max_row=last_row, min_col=1, max_col=last_col):
1185
1224
  for cell in row:
1186
1225
  hl = getattr(cell, "hyperlink", None)
@@ -1192,12 +1231,12 @@ def _sheet_to_jsonl(ws_v, ws_f, last_row: int, last_col: int) -> tuple[list[str]
1192
1231
  nf = getattr(cell, "number_format", None)
1193
1232
  if nf and nf != "General":
1194
1233
  number_formats[cell.coordinate] = nf
1234
+ if number_formats:
1235
+ meta["number_formats"] = number_formats
1195
1236
  if hyperlinks:
1196
1237
  meta["hyperlinks"] = hyperlinks
1197
1238
  if comments:
1198
1239
  meta["comments"] = comments
1199
- if number_formats:
1200
- meta["number_formats"] = number_formats
1201
1240
 
1202
1241
  lines: list[str] = [json.dumps({"_meta": meta}, ensure_ascii=False, default=_json_default)]
1203
1242
  formula_count = 0
@@ -1223,8 +1262,8 @@ def _sheet_to_jsonl(ws_v, ws_f, last_row: int, last_col: int) -> tuple[list[str]
1223
1262
  return lines, formula_count
1224
1263
 
1225
1264
 
1226
- def _workbook_meta(wb) -> dict[str, Any]:
1227
- """워크북 단위 메타 (defined names 등). 비어있으면 빈 dict 반환."""
1265
+ def _workbook_meta(wb, input_path: Path) -> dict[str, Any]:
1266
+ """워크북 단위 메타 (defined names·pivot tables 등). 비어있으면 빈 dict 반환."""
1228
1267
  meta: dict[str, Any] = {}
1229
1268
  defined_names: dict[str, list[str]] = {}
1230
1269
  # openpyxl 3.x: wb.defined_names 는 DefinedNameDict (dict-like)
@@ -1241,9 +1280,179 @@ def _workbook_meta(wb) -> dict[str, Any]:
1241
1280
  pass
1242
1281
  if defined_names:
1243
1282
  meta["defined_names"] = defined_names
1283
+
1284
+ pivots = _extract_pivots(input_path)
1285
+ if pivots:
1286
+ meta["pivots"] = pivots
1287
+
1244
1288
  return meta
1245
1289
 
1246
1290
 
1291
+ _XLSX_NS = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
1292
+ _XLSX_REL_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
1293
+ _PKG_REL_NS = "{http://schemas.openxmlformats.org/package/2006/relationships}"
1294
+
1295
+
1296
+ def _extract_pivots(input_path: Path) -> list[dict]:
1297
+ """xlsx 의 pivot table 정의 list. ZIP 안 `xl/pivotTables/*.xml` + `xl/pivotCache/*.xml` 파싱.
1298
+
1299
+ cacheId 매핑은 workbook.xml 의 pivotCaches + workbook.xml.rels 통해 정확히 해결.
1300
+ - workbook.xml 의 pivotCaches: cacheId → r:id
1301
+ - workbook.xml.rels: Id → Target (cache xml 파일)
1302
+
1303
+ 각 pivot 의 정보:
1304
+ - name: pivot table 이름
1305
+ - location: 펼쳐진 위치 (예: "A1:E20")
1306
+ - source: 원본 데이터 위치 (예: "'Sheet1'!A1:D100")
1307
+ - rowFields/colFields/pageFields: 행·열·필터 필드명 list
1308
+ - dataFields: 값 필드 [{name, field, subtotal}, ...] (subtotal = sum/count/average/...)
1309
+ """
1310
+ import xml.etree.ElementTree as ET
1311
+
1312
+ pivots: list[dict] = []
1313
+ try:
1314
+ with zipfile.ZipFile(_common.long_str(input_path), "r") as zf:
1315
+ namelist = zf.namelist()
1316
+ pivot_files = sorted(
1317
+ n for n in namelist
1318
+ if n.startswith("xl/pivotTables/pivotTable") and n.endswith(".xml")
1319
+ )
1320
+ if not pivot_files:
1321
+ return pivots
1322
+
1323
+ # 1. workbook.xml.rels 에서 Id → Target 매핑
1324
+ rid_to_target: dict[str, str] = {}
1325
+ try:
1326
+ rels_root = ET.fromstring(zf.read("xl/_rels/workbook.xml.rels"))
1327
+ for rel in rels_root.findall(f"{_PKG_REL_NS}Relationship"):
1328
+ rid_to_target[rel.get("Id", "")] = rel.get("Target", "")
1329
+ except Exception:
1330
+ pass
1331
+
1332
+ # 2. workbook.xml 의 pivotCaches 에서 cacheId → cache 파일 경로 매핑
1333
+ cache_id_to_file: dict[str, str] = {}
1334
+ try:
1335
+ wb_root = ET.fromstring(zf.read("xl/workbook.xml"))
1336
+ pcs = wb_root.find(f"{_XLSX_NS}pivotCaches")
1337
+ if pcs is not None:
1338
+ for pc in pcs:
1339
+ cid = pc.get("cacheId")
1340
+ rid = pc.get(f"{_XLSX_REL_NS}id")
1341
+ if not cid or not rid:
1342
+ continue
1343
+ target = rid_to_target.get(rid, "")
1344
+ if not target:
1345
+ continue
1346
+ # target 의 상대 경로 → ZIP 안 절대 경로
1347
+ if target.startswith("/"):
1348
+ cache_path = target.lstrip("/")
1349
+ else:
1350
+ cache_path = "xl/" + target
1351
+ cache_id_to_file[cid] = cache_path
1352
+ except Exception:
1353
+ pass
1354
+
1355
+ # 3. cache 파일 파싱: cacheId → {source, field_names}
1356
+ cache_info: dict[str, dict] = {}
1357
+ for cid, cf in cache_id_to_file.items():
1358
+ try:
1359
+ root = ET.fromstring(zf.read(cf))
1360
+ except Exception:
1361
+ continue
1362
+ info: dict = {}
1363
+ cs = root.find(f"{_XLSX_NS}cacheSource")
1364
+ if cs is not None:
1365
+ ws = cs.find(f"{_XLSX_NS}worksheetSource")
1366
+ if ws is not None:
1367
+ sheet = ws.get("sheet", "")
1368
+ ref = ws.get("ref", "")
1369
+ named = ws.get("name", "")
1370
+ if sheet and ref:
1371
+ info["source"] = f"'{sheet}'!{ref}"
1372
+ elif named:
1373
+ info["source"] = named
1374
+ fields_elem = root.find(f"{_XLSX_NS}cacheFields")
1375
+ if fields_elem is not None:
1376
+ field_names: list[str] = []
1377
+ for f in fields_elem:
1378
+ if f.tag == f"{_XLSX_NS}cacheField":
1379
+ field_names.append(f.get("name", ""))
1380
+ info["field_names"] = field_names
1381
+ cache_info[cid] = info
1382
+
1383
+ # pivot table 파일 파싱
1384
+ for pf in pivot_files:
1385
+ try:
1386
+ root = ET.fromstring(zf.read(pf))
1387
+ except Exception:
1388
+ continue
1389
+ pivot: dict = {"name": root.get("name", "")}
1390
+ cache_id = root.get("cacheId", "")
1391
+ field_names: list[str] = []
1392
+ if cache_id and cache_id in cache_info:
1393
+ ci = cache_info[cache_id]
1394
+ if "source" in ci:
1395
+ pivot["source"] = ci["source"]
1396
+ field_names = ci.get("field_names", [])
1397
+
1398
+ loc = root.find(f"{_XLSX_NS}location")
1399
+ if loc is not None:
1400
+ pivot["location"] = loc.get("ref", "")
1401
+
1402
+ # row·col·page fields (인덱스 → 이름)
1403
+ for tag, key in (
1404
+ ("rowFields", "rowFields"),
1405
+ ("colFields", "colFields"),
1406
+ ("pageFields", "pageFields"),
1407
+ ):
1408
+ elem = root.find(f"{_XLSX_NS}{tag}")
1409
+ if elem is None:
1410
+ continue
1411
+ names: list[str] = []
1412
+ for child in elem:
1413
+ x = child.get("x") or child.get("fld")
1414
+ if x is None:
1415
+ continue
1416
+ try:
1417
+ idx = int(x)
1418
+ except (TypeError, ValueError):
1419
+ continue
1420
+ if 0 <= idx < len(field_names) and field_names[idx]:
1421
+ names.append(field_names[idx])
1422
+ else:
1423
+ names.append(f"field_{idx}")
1424
+ if names:
1425
+ pivot[key] = names
1426
+
1427
+ # dataFields (값 필드 + 집계 함수)
1428
+ df_elem = root.find(f"{_XLSX_NS}dataFields")
1429
+ if df_elem is not None:
1430
+ df_list: list[dict] = []
1431
+ for df in df_elem:
1432
+ if df.tag != f"{_XLSX_NS}dataField":
1433
+ continue
1434
+ fld = df.get("fld", "")
1435
+ field_name = ""
1436
+ try:
1437
+ idx = int(fld)
1438
+ if 0 <= idx < len(field_names):
1439
+ field_name = field_names[idx]
1440
+ except (TypeError, ValueError):
1441
+ pass
1442
+ df_list.append({
1443
+ "name": df.get("name", ""),
1444
+ "field": field_name,
1445
+ "subtotal": df.get("subtotal", "sum"),
1446
+ })
1447
+ if df_list:
1448
+ pivot["dataFields"] = df_list
1449
+
1450
+ pivots.append(pivot)
1451
+ except (zipfile.BadZipFile, Exception):
1452
+ pass
1453
+ return pivots
1454
+
1455
+
1247
1456
  def _extract_pptx_chart_data(chart) -> dict:
1248
1457
  data: dict = {"type": str(getattr(chart, "chart_type", "")), "categories": [], "series": []}
1249
1458
  if chart.plots:
@@ -106,8 +106,9 @@ def cmd_ppt_png(args) -> None:
106
106
  if i - 1 < len(slide_titles):
107
107
  idx, safe_title = slide_titles[i - 1]
108
108
  else:
109
- idx, safe_title = f"{i:02d}", f"슬라이드{i}"
110
- out_png = out_dir / f"{idx}_{safe_title}.png"
109
+ idx, safe_title = f"{i:02d}", ""
110
+ stem = f"{idx}_{safe_title}" if safe_title else idx
111
+ out_png = out_dir / f"{stem}.png"
111
112
  slide.Export(short_str(out_png), "PNG", width_px, height_px)
112
113
  finally:
113
114
  pres.Close()