@simplysm/sd-claude 14.0.78 → 14.0.79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/claude/output-styles/sd-tone.md +26 -2
- package/claude/references/sd-simplysm14/manuals/logging.md +1 -1
- package/claude/rules/sd-base-rules.md +109 -87
- package/claude/skills/sd-dev/SKILL.md +1 -1
- package/claude/skills/sd-impl/SKILL.md +15 -14
- package/claude/skills/sd-impl/references/spec-cross-check.md +2 -2
- package/claude/skills/sd-spec/SKILL.md +746 -192
- package/claude/skills/sd-spec/references/example-spec.md +107 -35
- package/claude/skills/sd-unpack/SKILL.md +39 -14
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/_common.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/eml_handler.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/office_com.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/pdf_handler.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/_common.py +59 -0
- package/claude/skills/sd-unpack/scripts/handlers/eml_handler.py +7 -0
- package/claude/skills/sd-unpack/scripts/handlers/msg_handler.py +11 -0
- package/claude/skills/sd-unpack/scripts/handlers/office_com.py +288 -79
- package/claude/skills/sd-unpack/scripts/handlers/office_worker.py +3 -2
- package/claude/skills/sd-unpack/scripts/handlers/pdf_handler.py +78 -10
- package/package.json +1 -1
- package/claude/skills/sd-spec/references/spec-authoring.md +0 -298
- package/claude/skills/sd-spec/references/spec-md-template.md +0 -29
- package/claude/skills/sd-wip/SKILL.md +0 -38
- package/claude/skills/sd-wip/evals/fixtures/empty/.gitkeep +0 -0
- package/claude/skills/sd-wip/evals/fixtures/with-artifact/projects/acct/_wip.md +0 -3
- package/claude/skills/sd-wip/evals/fixtures/with-artifact/projects/acct/spec.md +0 -15
- package/claude/skills/sd-wip/evals/fixtures/with-existing-wip/.wips/260101120000_acct.md +0 -6
- package/claude/skills/sd-wip/evals/fixtures/with-existing-wip-for-compact/.wips/260101120000_acct.md +0 -14
- package/claude/skills/sd-wip/evals/golden.jsonl +0 -4
- package/claude/skills/sd-wip/references/compact.md +0 -79
|
@@ -224,7 +224,7 @@ def _docx_extract_nodes(input_path: Path) -> tuple[list[dict], dict[str, int]]:
|
|
|
224
224
|
if vm == "continue":
|
|
225
225
|
# vMerge continue cell — origin 의 rowspan 영역. skip.
|
|
226
226
|
continue
|
|
227
|
-
cell_text =
|
|
227
|
+
cell_text = cell.text or "" # 원본 그대로 (strip X)
|
|
228
228
|
colspan = _docx_cell_colspan(cell)
|
|
229
229
|
cell_node = {
|
|
230
230
|
"node": node_idx,
|
|
@@ -414,13 +414,13 @@ def _run_pptx(
|
|
|
414
414
|
slide_summaries: list[str] = []
|
|
415
415
|
slide_has_notes: dict[str, bool] = {}
|
|
416
416
|
slide_charts: dict[str, list[str]] = {} # idx -> chart filenames
|
|
417
|
-
slide_cores: dict[str, str] = {} # idx -> 핵심 텍스트 (title 또는 첫 텍스트)
|
|
418
417
|
|
|
419
418
|
_common.mkdir(slides_dir)
|
|
420
419
|
for i, slide in enumerate(prs.slides, start=1):
|
|
421
420
|
idx = f"{i:02d}"
|
|
422
421
|
title = _pptx_slide_title(slide)
|
|
423
|
-
|
|
422
|
+
# title 없으면 idx 만 (자체 한국어 라벨 부착 X)
|
|
423
|
+
safe_title = _common.slugify_filename(title, max_len=40) if title else ""
|
|
424
424
|
slide_titles.append((idx, safe_title))
|
|
425
425
|
|
|
426
426
|
nodes, chart_refs = _pptx_extract_slide_nodes(
|
|
@@ -440,41 +440,37 @@ def _run_pptx(
|
|
|
440
440
|
lines = [json.dumps(meta, ensure_ascii=False, default=_json_default)]
|
|
441
441
|
for n in nodes:
|
|
442
442
|
lines.append(json.dumps(n, ensure_ascii=False, default=_json_default))
|
|
443
|
-
|
|
443
|
+
stem = _pptx_slide_stem(idx, safe_title)
|
|
444
|
+
_common.write_text(slides_dir / f"{stem}.jsonl", "\n".join(lines))
|
|
444
445
|
|
|
445
446
|
if chart_refs:
|
|
446
447
|
slide_charts[idx] = chart_refs
|
|
447
448
|
|
|
448
449
|
if slide.has_notes_slide:
|
|
449
450
|
notes_text = slide.notes_slide.notes_text_frame.text or ""
|
|
450
|
-
if notes_text
|
|
451
|
+
if notes_text:
|
|
451
452
|
_common.write_text(
|
|
452
|
-
slides_dir / f"{
|
|
453
|
+
slides_dir / f"{stem}.notes.md",
|
|
453
454
|
notes_text,
|
|
454
455
|
)
|
|
455
456
|
slide_has_notes[idx] = True
|
|
456
457
|
|
|
457
|
-
|
|
458
|
-
if core:
|
|
459
|
-
slide_cores[idx] = core[:60]
|
|
460
|
-
|
|
461
|
-
parts = [f"`slides/{idx}_{safe_title}.png`", "`.jsonl`"]
|
|
458
|
+
parts = [f"`slides/{stem}.png`", "`.jsonl`"]
|
|
462
459
|
if slide_has_notes.get(idx):
|
|
463
460
|
parts.append("`.notes.md`")
|
|
464
461
|
if chart_refs:
|
|
465
462
|
chart_str = ", ".join(f"`charts/{c}`" for c in chart_refs)
|
|
466
463
|
parts.append(f"(차트: {chart_str})")
|
|
467
|
-
if slide_cores.get(idx):
|
|
468
|
-
parts.append(f"— {slide_cores[idx]}")
|
|
469
464
|
slide_summaries.append(" ".join(parts))
|
|
470
465
|
|
|
471
466
|
# COM PowerPoint 의 Slide.Export 로 슬라이드별 PNG 직접 출력. 임시 폴더에서 만든 후 long-path-safe copy.
|
|
472
467
|
with _common.com_lock(), _common.temp_workdir() as tmp:
|
|
473
468
|
_powerpoint_export_slides(input_path, tmp, slide_titles)
|
|
474
469
|
for idx, safe_title in slide_titles:
|
|
475
|
-
|
|
470
|
+
stem = _pptx_slide_stem(idx, safe_title)
|
|
471
|
+
tmp_png = tmp / f"{stem}.png"
|
|
476
472
|
if tmp_png.exists():
|
|
477
|
-
_common.copy(tmp_png, slides_dir / f"{
|
|
473
|
+
_common.copy(tmp_png, slides_dir / f"{stem}.png")
|
|
478
474
|
|
|
479
475
|
# pptx 의 시각은 슬라이드 PNG 에 모두 포함 → ZIP media 전체 복제 skip
|
|
480
476
|
# (개별 picture shape 은 _pptx_extract_slide_nodes 에서 image ref 와 함께 저장됨).
|
|
@@ -509,26 +505,22 @@ def _run_pptx(
|
|
|
509
505
|
)
|
|
510
506
|
|
|
511
507
|
|
|
508
|
+
def _pptx_slide_stem(idx: str, safe_title: str) -> str:
|
|
509
|
+
"""슬라이드 파일 stem. safe_title 빈 문자열이면 idx 만 (자체 라벨 부착 X)."""
|
|
510
|
+
return f"{idx}_{safe_title}" if safe_title else idx
|
|
511
|
+
|
|
512
|
+
|
|
512
513
|
def _pptx_slide_title(slide) -> str:
|
|
513
|
-
"""슬라이드 title placeholder 텍스트. 없으면 빈 문자열."""
|
|
514
|
+
"""슬라이드 title placeholder 텍스트. 없으면 빈 문자열. 원본 그대로 (strip X)."""
|
|
514
515
|
try:
|
|
515
516
|
title_shape = slide.shapes.title
|
|
516
517
|
if title_shape is not None and title_shape.text:
|
|
517
|
-
return title_shape.text
|
|
518
|
+
return title_shape.text
|
|
518
519
|
except (AttributeError, ValueError):
|
|
519
520
|
pass
|
|
520
521
|
return ""
|
|
521
522
|
|
|
522
523
|
|
|
523
|
-
def _pptx_first_text(nodes: list[dict]) -> str:
|
|
524
|
-
"""노드 리스트 중 첫 비어있지 않은 text. 없으면 빈 문자열."""
|
|
525
|
-
for n in nodes:
|
|
526
|
-
t = (n.get("text") or "").strip()
|
|
527
|
-
if t:
|
|
528
|
-
return t
|
|
529
|
-
return ""
|
|
530
|
-
|
|
531
|
-
|
|
532
524
|
def _pptx_extract_slide_nodes(
|
|
533
525
|
slide,
|
|
534
526
|
slide_num: int,
|
|
@@ -568,7 +560,7 @@ def _pptx_extract_slide_nodes(
|
|
|
568
560
|
table_idx = shape_idx + 1
|
|
569
561
|
for r_idx, row in enumerate(table.rows, start=1):
|
|
570
562
|
for c_idx, cell in enumerate(row.cells, start=1):
|
|
571
|
-
cell_text =
|
|
563
|
+
cell_text = cell.text or "" # 원본 그대로 (strip X)
|
|
572
564
|
nodes.append({
|
|
573
565
|
**common,
|
|
574
566
|
"type": "table_cell",
|
|
@@ -724,7 +716,8 @@ def _run_xlsx(
|
|
|
724
716
|
sheets_dir = out_dir / "sheets"
|
|
725
717
|
charts_dir = out_dir / "charts"
|
|
726
718
|
sheet_summaries: list[str] = []
|
|
727
|
-
sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name)
|
|
719
|
+
sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name) — 일반 Worksheet
|
|
720
|
+
chart_sheet_names: list[tuple[str, str, str]] = [] # (idx, safe_name, raw_name) — Chartsheet
|
|
728
721
|
sheet_charts: dict[str, list[str]] = {} # idx -> chart filenames
|
|
729
722
|
sheet_formula_count: dict[str, int] = {}
|
|
730
723
|
sheet_dims: dict[str, tuple[int, int]] = {}
|
|
@@ -733,16 +726,20 @@ def _run_xlsx(
|
|
|
733
726
|
wb_formulas = load_workbook(_common.long_str(input_path), data_only=False)
|
|
734
727
|
try:
|
|
735
728
|
_common.mkdir(sheets_dir)
|
|
736
|
-
# openpyxl 의 sheetnames 는
|
|
737
|
-
#
|
|
729
|
+
# openpyxl 의 sheetnames 는 일반 Worksheet 와 Chartsheet 둘 다 포함.
|
|
730
|
+
# 시트 순서 그대로 idx 통합 부여 (사용자 워크북 순서 보존).
|
|
731
|
+
# 일반 Worksheet 만 COM Excel PNG export 대상, Chartsheet 는 차트 데이터만 추출.
|
|
738
732
|
idx_counter = 0
|
|
739
733
|
for name in wb_values.sheetnames:
|
|
740
|
-
|
|
741
|
-
continue
|
|
734
|
+
obj = wb_values[name]
|
|
742
735
|
idx_counter += 1
|
|
743
736
|
idx = f"{idx_counter:02d}"
|
|
744
737
|
safe_name = _common.slugify_filename(name, max_len=40)
|
|
745
|
-
|
|
738
|
+
if isinstance(obj, Worksheet):
|
|
739
|
+
sheet_names.append((idx, safe_name, name))
|
|
740
|
+
else:
|
|
741
|
+
# Chartsheet 등 비-worksheet
|
|
742
|
+
chart_sheet_names.append((idx, safe_name, name))
|
|
746
743
|
|
|
747
744
|
# COM Excel 호출: 데이터 영역 → ChartObject + Range.CopyPicture → 시트별 PNG.
|
|
748
745
|
# 시트별 (last_row, last_col) 도 같이 반환되어 .jsonl 이 같은 데이터 영역으로 통일됨.
|
|
@@ -772,20 +769,56 @@ def _run_xlsx(
|
|
|
772
769
|
)
|
|
773
770
|
sheet_charts.setdefault(idx, []).append(chart_filename)
|
|
774
771
|
|
|
775
|
-
#
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
)
|
|
782
|
-
|
|
772
|
+
# Chartsheet 처리: 차트 데이터를 charts/sheet<idx>_chart.data.json 으로 저장
|
|
773
|
+
chart_sheet_chart_files: dict[str, str] = {} # idx -> chart filename
|
|
774
|
+
for idx, safe_name, raw_name in chart_sheet_names:
|
|
775
|
+
cs = wb_formulas[raw_name]
|
|
776
|
+
chart = None
|
|
777
|
+
# Chartsheet.charts 또는 _charts 속성 (openpyxl 버전 따라 다름)
|
|
778
|
+
for attr in ("charts", "_charts"):
|
|
779
|
+
v = getattr(cs, attr, None)
|
|
780
|
+
if v:
|
|
781
|
+
if hasattr(v, "__iter__"):
|
|
782
|
+
try:
|
|
783
|
+
chart = next(iter(v), None)
|
|
784
|
+
except Exception:
|
|
785
|
+
chart = None
|
|
786
|
+
else:
|
|
787
|
+
chart = v
|
|
788
|
+
if chart is not None:
|
|
789
|
+
break
|
|
790
|
+
if chart is None:
|
|
791
|
+
# 단일 chart 속성 fallback
|
|
792
|
+
chart = getattr(cs, "chart", None)
|
|
793
|
+
if chart is not None:
|
|
794
|
+
try:
|
|
795
|
+
data = _extract_openpyxl_chart_data(chart)
|
|
796
|
+
except Exception:
|
|
797
|
+
data = None
|
|
798
|
+
if data is not None:
|
|
799
|
+
_common.mkdir(charts_dir)
|
|
800
|
+
chart_filename = f"sheet{idx}_chart.data.json"
|
|
801
|
+
_common.write_text(
|
|
802
|
+
charts_dir / chart_filename,
|
|
803
|
+
json.dumps(data, ensure_ascii=False, indent=2),
|
|
804
|
+
)
|
|
805
|
+
chart_sheet_chart_files[idx] = chart_filename
|
|
806
|
+
|
|
807
|
+
# 워크북 단위 메타 (defined names·pivots·sheet codeName 등) — 시트 jsonl 외부 분리.
|
|
808
|
+
wb_meta = _workbook_meta(wb_formulas, input_path)
|
|
783
809
|
# VBA 시트 객체명 ↔ raw 시트명 매핑 (시트 codeName 기반)
|
|
784
810
|
sheet_code_map: dict[str, str] = {}
|
|
785
811
|
for ws in wb_formulas.worksheets:
|
|
786
812
|
code = getattr(ws.sheet_properties, "codeName", None)
|
|
787
813
|
if code:
|
|
788
814
|
sheet_code_map[code] = ws.title
|
|
815
|
+
if sheet_code_map:
|
|
816
|
+
wb_meta["sheet_code_map"] = sheet_code_map
|
|
817
|
+
if wb_meta:
|
|
818
|
+
_common.write_text(
|
|
819
|
+
out_dir / "workbook.meta.json",
|
|
820
|
+
json.dumps(wb_meta, ensure_ascii=False, indent=2),
|
|
821
|
+
)
|
|
789
822
|
finally:
|
|
790
823
|
wb_values.close()
|
|
791
824
|
wb_formulas.close()
|
|
@@ -800,7 +833,8 @@ def _run_xlsx(
|
|
|
800
833
|
embed_zip_prefix="xl/embeddings/",
|
|
801
834
|
)
|
|
802
835
|
|
|
803
|
-
# 시트별 산출물 풀목록
|
|
836
|
+
# 시트별 산출물 풀목록 — 일반 시트 + chart sheet 통합, 시트 순서 (idx) 대로
|
|
837
|
+
sheet_summary_map: dict[str, str] = {}
|
|
804
838
|
for idx, safe_name, raw_name in sheet_names:
|
|
805
839
|
last_row, last_col = sheet_dims.get(idx, (0, 0))
|
|
806
840
|
formula_n = sheet_formula_count.get(idx, 0)
|
|
@@ -821,12 +855,21 @@ def _run_xlsx(
|
|
|
821
855
|
if formula_n:
|
|
822
856
|
meta += f", 수식 {formula_n}개"
|
|
823
857
|
meta += ")"
|
|
824
|
-
|
|
858
|
+
sheet_summary_map[idx] = " ".join(parts) + " " + meta
|
|
859
|
+
|
|
860
|
+
for idx, safe_name, raw_name in chart_sheet_names:
|
|
861
|
+
chart_filename = chart_sheet_chart_files.get(idx)
|
|
862
|
+
if chart_filename:
|
|
863
|
+
sheet_summary_map[idx] = f"`charts/{chart_filename}` (chart sheet — \"{raw_name}\")"
|
|
864
|
+
else:
|
|
865
|
+
sheet_summary_map[idx] = f"(chart sheet — \"{raw_name}\", 차트 데이터 추출 실패)"
|
|
866
|
+
|
|
867
|
+
# idx 순서대로 통합
|
|
868
|
+
for idx in sorted(sheet_summary_map.keys()):
|
|
869
|
+
sheet_summaries.append(sheet_summary_map[idx])
|
|
825
870
|
|
|
826
871
|
source_name, source_size = _source_meta(input_path, out_dir, source_name_override)
|
|
827
|
-
macro_modules = _extract_macros(
|
|
828
|
-
_source_path(out_dir, source_name), out_dir, sheet_code_map=sheet_code_map,
|
|
829
|
-
)
|
|
872
|
+
macro_modules = _extract_macros(_source_path(out_dir, source_name), out_dir)
|
|
830
873
|
|
|
831
874
|
sections: dict[str, list[str]] = {}
|
|
832
875
|
if sheet_summaries:
|
|
@@ -840,9 +883,9 @@ def _run_xlsx(
|
|
|
840
883
|
source_size=source_size,
|
|
841
884
|
tool=("openpyxl + COM Excel + ZIP " + tool_extra).strip(),
|
|
842
885
|
loss_notes=(
|
|
843
|
-
"셀
|
|
844
|
-
"시각은 시트별 PNG,
|
|
845
|
-
"워크북 단위 메타(defined names 등)는 workbook.meta.json."
|
|
886
|
+
"셀 서식(바탕색·border·폰트)·frozen·dims 미보존 (필요 시 _source.xlsx 직접 추출). "
|
|
887
|
+
"시각은 시트별 PNG, 분석 데이터(셀값·number_format·수식·merges·hyperlinks·comments) 는 "
|
|
888
|
+
"시트별 .jsonl 한 줄=한 행(좌표 명시), 워크북 단위 메타(defined names 등) 는 workbook.meta.json."
|
|
846
889
|
),
|
|
847
890
|
sections=sections or None,
|
|
848
891
|
attachments=attachment_links,
|
|
@@ -1002,18 +1045,13 @@ def _source_path(out_dir: Path, source_name: str) -> Path:
|
|
|
1002
1045
|
return out_dir / f"_source.{ext}"
|
|
1003
1046
|
|
|
1004
1047
|
|
|
1005
|
-
def _extract_macros(
|
|
1006
|
-
|
|
1007
|
-
out_dir: Path,
|
|
1008
|
-
sheet_code_map: Optional[dict[str, str]] = None,
|
|
1009
|
-
) -> list[str]:
|
|
1010
|
-
"""OLE/OOXML 파일에서 VBA 매크로 추출. macros/<모듈명>.vba 로 저장.
|
|
1048
|
+
def _extract_macros(input_path: Path, out_dir: Path) -> list[str]:
|
|
1049
|
+
"""OLE/OOXML 파일에서 VBA 매크로 추출. macros/<모듈명>.vba 로 저장 (원본 코드 그대로).
|
|
1011
1050
|
|
|
1012
1051
|
추출된 모듈 파일명 list 반환 (예: ["Module1.vba", "ThisWorkbook.vba"]).
|
|
1013
1052
|
매크로 없으면 빈 list.
|
|
1014
1053
|
|
|
1015
|
-
|
|
1016
|
-
매크로 파일 첫 줄에 코멘트로 매핑 정보 prepend (시트 모듈만).
|
|
1054
|
+
시트 객체명↔raw 시트명 매핑은 호출자(_run_xlsx)가 workbook.meta.json 에 별도 보관.
|
|
1017
1055
|
"""
|
|
1018
1056
|
_common.ensure_pip("oletools")
|
|
1019
1057
|
from oletools.olevba import VBA_Parser
|
|
@@ -1028,11 +1066,8 @@ def _extract_macros(
|
|
|
1028
1066
|
for (_filename, stream_path, vba_filename, vba_code) in parser.extract_macros():
|
|
1029
1067
|
module_name = vba_filename or stream_path or "module"
|
|
1030
1068
|
stem = Path(module_name).stem or "module"
|
|
1031
|
-
prefix = ""
|
|
1032
|
-
if sheet_code_map and stem in sheet_code_map:
|
|
1033
|
-
prefix = f'\' (object: {stem}, sheet: "{sheet_code_map[stem]}")\n\n'
|
|
1034
1069
|
dst = _common.unique_path(macros_dir, f"{stem}.vba")
|
|
1035
|
-
_common.write_text(dst,
|
|
1070
|
+
_common.write_text(dst, vba_code or "")
|
|
1036
1071
|
module_files.append(dst.name)
|
|
1037
1072
|
return module_files
|
|
1038
1073
|
finally:
|
|
@@ -1153,34 +1188,38 @@ def _json_default(obj: Any) -> str:
|
|
|
1153
1188
|
raise TypeError(f"not JSON serializable: {type(obj).__name__}")
|
|
1154
1189
|
|
|
1155
1190
|
|
|
1156
|
-
def _sheet_to_jsonl(
|
|
1157
|
-
|
|
1191
|
+
def _sheet_to_jsonl(
|
|
1192
|
+
ws_v, ws_f, last_row: int, last_col: int,
|
|
1193
|
+
) -> tuple[list[str], int]:
|
|
1194
|
+
"""openpyxl Worksheet 의 (1,1)~(last_row,last_col) → 행 단위 JSONL.
|
|
1195
|
+
|
|
1196
|
+
분석 핵심: 데이터·number_format·수식. 시각 표시(바탕색·border·폰트·frozen)·dims 는 미보존
|
|
1197
|
+
(필요 시 Claude 가 _source.xlsx 직접 추출).
|
|
1158
1198
|
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1199
|
+
데이터 jsonl (한 줄=한 행. 빈 셀 키 생략):
|
|
1200
|
+
- 첫 줄: `{"_meta":{"merges":[...], "number_formats":{...}, "hyperlinks":{...}, "comments":{...}}}`
|
|
1201
|
+
- merges: 셀 좌표 해석 필수 (머지 영역 안 빈 셀 오해 차단)
|
|
1202
|
+
- number_formats: Date·통화·% 등 셀 값 의미 단서
|
|
1203
|
+
- hyperlinks·comments: 셀 부가 정보
|
|
1204
|
+
- 비어있는 키는 생략
|
|
1205
|
+
- 데이터 줄: `{"r":N, "<col>":value, ..., "_f":{<col>:formula}}`
|
|
1206
|
+
- 빈 행도 `{"r":N}` 한 줄 유지
|
|
1163
1207
|
|
|
1164
1208
|
반환: (lines, formula_count)
|
|
1165
1209
|
"""
|
|
1166
1210
|
from openpyxl.utils import get_column_letter
|
|
1167
1211
|
|
|
1168
1212
|
if last_row < 1 or last_col < 1:
|
|
1169
|
-
|
|
1170
|
-
return [json.dumps(meta, ensure_ascii=False)], 0
|
|
1213
|
+
return [json.dumps({"_meta": {}}, ensure_ascii=False)], 0
|
|
1171
1214
|
|
|
1172
|
-
|
|
1173
|
-
meta: dict[str, Any] = {"dims": [last_row, last_col]}
|
|
1215
|
+
meta: dict[str, Any] = {}
|
|
1174
1216
|
merges = [str(r) for r in ws_v.merged_cells.ranges]
|
|
1175
1217
|
if merges:
|
|
1176
1218
|
meta["merges"] = merges
|
|
1177
|
-
frozen = ws_v.freeze_panes
|
|
1178
|
-
if frozen:
|
|
1179
|
-
meta["frozen"] = frozen
|
|
1180
1219
|
|
|
1181
1220
|
hyperlinks: dict[str, str] = {}
|
|
1182
1221
|
comments: dict[str, str] = {}
|
|
1183
|
-
number_formats: dict[str, str] = {}
|
|
1222
|
+
number_formats: dict[str, str] = {}
|
|
1184
1223
|
for row in ws_v.iter_rows(min_row=1, max_row=last_row, min_col=1, max_col=last_col):
|
|
1185
1224
|
for cell in row:
|
|
1186
1225
|
hl = getattr(cell, "hyperlink", None)
|
|
@@ -1192,12 +1231,12 @@ def _sheet_to_jsonl(ws_v, ws_f, last_row: int, last_col: int) -> tuple[list[str]
|
|
|
1192
1231
|
nf = getattr(cell, "number_format", None)
|
|
1193
1232
|
if nf and nf != "General":
|
|
1194
1233
|
number_formats[cell.coordinate] = nf
|
|
1234
|
+
if number_formats:
|
|
1235
|
+
meta["number_formats"] = number_formats
|
|
1195
1236
|
if hyperlinks:
|
|
1196
1237
|
meta["hyperlinks"] = hyperlinks
|
|
1197
1238
|
if comments:
|
|
1198
1239
|
meta["comments"] = comments
|
|
1199
|
-
if number_formats:
|
|
1200
|
-
meta["number_formats"] = number_formats
|
|
1201
1240
|
|
|
1202
1241
|
lines: list[str] = [json.dumps({"_meta": meta}, ensure_ascii=False, default=_json_default)]
|
|
1203
1242
|
formula_count = 0
|
|
@@ -1223,8 +1262,8 @@ def _sheet_to_jsonl(ws_v, ws_f, last_row: int, last_col: int) -> tuple[list[str]
|
|
|
1223
1262
|
return lines, formula_count
|
|
1224
1263
|
|
|
1225
1264
|
|
|
1226
|
-
def _workbook_meta(wb) -> dict[str, Any]:
|
|
1227
|
-
"""워크북 단위 메타 (defined names 등). 비어있으면 빈 dict 반환."""
|
|
1265
|
+
def _workbook_meta(wb, input_path: Path) -> dict[str, Any]:
|
|
1266
|
+
"""워크북 단위 메타 (defined names·pivot tables 등). 비어있으면 빈 dict 반환."""
|
|
1228
1267
|
meta: dict[str, Any] = {}
|
|
1229
1268
|
defined_names: dict[str, list[str]] = {}
|
|
1230
1269
|
# openpyxl 3.x: wb.defined_names 는 DefinedNameDict (dict-like)
|
|
@@ -1241,9 +1280,179 @@ def _workbook_meta(wb) -> dict[str, Any]:
|
|
|
1241
1280
|
pass
|
|
1242
1281
|
if defined_names:
|
|
1243
1282
|
meta["defined_names"] = defined_names
|
|
1283
|
+
|
|
1284
|
+
pivots = _extract_pivots(input_path)
|
|
1285
|
+
if pivots:
|
|
1286
|
+
meta["pivots"] = pivots
|
|
1287
|
+
|
|
1244
1288
|
return meta
|
|
1245
1289
|
|
|
1246
1290
|
|
|
1291
|
+
_XLSX_NS = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
|
|
1292
|
+
_XLSX_REL_NS = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
|
|
1293
|
+
_PKG_REL_NS = "{http://schemas.openxmlformats.org/package/2006/relationships}"
|
|
1294
|
+
|
|
1295
|
+
|
|
1296
|
+
def _extract_pivots(input_path: Path) -> list[dict]:
|
|
1297
|
+
"""xlsx 의 pivot table 정의 list. ZIP 안 `xl/pivotTables/*.xml` + `xl/pivotCache/*.xml` 파싱.
|
|
1298
|
+
|
|
1299
|
+
cacheId 매핑은 workbook.xml 의 pivotCaches + workbook.xml.rels 통해 정확히 해결.
|
|
1300
|
+
- workbook.xml 의 pivotCaches: cacheId → r:id
|
|
1301
|
+
- workbook.xml.rels: Id → Target (cache xml 파일)
|
|
1302
|
+
|
|
1303
|
+
각 pivot 의 정보:
|
|
1304
|
+
- name: pivot table 이름
|
|
1305
|
+
- location: 펼쳐진 위치 (예: "A1:E20")
|
|
1306
|
+
- source: 원본 데이터 위치 (예: "'Sheet1'!A1:D100")
|
|
1307
|
+
- rowFields/colFields/pageFields: 행·열·필터 필드명 list
|
|
1308
|
+
- dataFields: 값 필드 [{name, field, subtotal}, ...] (subtotal = sum/count/average/...)
|
|
1309
|
+
"""
|
|
1310
|
+
import xml.etree.ElementTree as ET
|
|
1311
|
+
|
|
1312
|
+
pivots: list[dict] = []
|
|
1313
|
+
try:
|
|
1314
|
+
with zipfile.ZipFile(_common.long_str(input_path), "r") as zf:
|
|
1315
|
+
namelist = zf.namelist()
|
|
1316
|
+
pivot_files = sorted(
|
|
1317
|
+
n for n in namelist
|
|
1318
|
+
if n.startswith("xl/pivotTables/pivotTable") and n.endswith(".xml")
|
|
1319
|
+
)
|
|
1320
|
+
if not pivot_files:
|
|
1321
|
+
return pivots
|
|
1322
|
+
|
|
1323
|
+
# 1. workbook.xml.rels 에서 Id → Target 매핑
|
|
1324
|
+
rid_to_target: dict[str, str] = {}
|
|
1325
|
+
try:
|
|
1326
|
+
rels_root = ET.fromstring(zf.read("xl/_rels/workbook.xml.rels"))
|
|
1327
|
+
for rel in rels_root.findall(f"{_PKG_REL_NS}Relationship"):
|
|
1328
|
+
rid_to_target[rel.get("Id", "")] = rel.get("Target", "")
|
|
1329
|
+
except Exception:
|
|
1330
|
+
pass
|
|
1331
|
+
|
|
1332
|
+
# 2. workbook.xml 의 pivotCaches 에서 cacheId → cache 파일 경로 매핑
|
|
1333
|
+
cache_id_to_file: dict[str, str] = {}
|
|
1334
|
+
try:
|
|
1335
|
+
wb_root = ET.fromstring(zf.read("xl/workbook.xml"))
|
|
1336
|
+
pcs = wb_root.find(f"{_XLSX_NS}pivotCaches")
|
|
1337
|
+
if pcs is not None:
|
|
1338
|
+
for pc in pcs:
|
|
1339
|
+
cid = pc.get("cacheId")
|
|
1340
|
+
rid = pc.get(f"{_XLSX_REL_NS}id")
|
|
1341
|
+
if not cid or not rid:
|
|
1342
|
+
continue
|
|
1343
|
+
target = rid_to_target.get(rid, "")
|
|
1344
|
+
if not target:
|
|
1345
|
+
continue
|
|
1346
|
+
# target 의 상대 경로 → ZIP 안 절대 경로
|
|
1347
|
+
if target.startswith("/"):
|
|
1348
|
+
cache_path = target.lstrip("/")
|
|
1349
|
+
else:
|
|
1350
|
+
cache_path = "xl/" + target
|
|
1351
|
+
cache_id_to_file[cid] = cache_path
|
|
1352
|
+
except Exception:
|
|
1353
|
+
pass
|
|
1354
|
+
|
|
1355
|
+
# 3. cache 파일 파싱: cacheId → {source, field_names}
|
|
1356
|
+
cache_info: dict[str, dict] = {}
|
|
1357
|
+
for cid, cf in cache_id_to_file.items():
|
|
1358
|
+
try:
|
|
1359
|
+
root = ET.fromstring(zf.read(cf))
|
|
1360
|
+
except Exception:
|
|
1361
|
+
continue
|
|
1362
|
+
info: dict = {}
|
|
1363
|
+
cs = root.find(f"{_XLSX_NS}cacheSource")
|
|
1364
|
+
if cs is not None:
|
|
1365
|
+
ws = cs.find(f"{_XLSX_NS}worksheetSource")
|
|
1366
|
+
if ws is not None:
|
|
1367
|
+
sheet = ws.get("sheet", "")
|
|
1368
|
+
ref = ws.get("ref", "")
|
|
1369
|
+
named = ws.get("name", "")
|
|
1370
|
+
if sheet and ref:
|
|
1371
|
+
info["source"] = f"'{sheet}'!{ref}"
|
|
1372
|
+
elif named:
|
|
1373
|
+
info["source"] = named
|
|
1374
|
+
fields_elem = root.find(f"{_XLSX_NS}cacheFields")
|
|
1375
|
+
if fields_elem is not None:
|
|
1376
|
+
field_names: list[str] = []
|
|
1377
|
+
for f in fields_elem:
|
|
1378
|
+
if f.tag == f"{_XLSX_NS}cacheField":
|
|
1379
|
+
field_names.append(f.get("name", ""))
|
|
1380
|
+
info["field_names"] = field_names
|
|
1381
|
+
cache_info[cid] = info
|
|
1382
|
+
|
|
1383
|
+
# pivot table 파일 파싱
|
|
1384
|
+
for pf in pivot_files:
|
|
1385
|
+
try:
|
|
1386
|
+
root = ET.fromstring(zf.read(pf))
|
|
1387
|
+
except Exception:
|
|
1388
|
+
continue
|
|
1389
|
+
pivot: dict = {"name": root.get("name", "")}
|
|
1390
|
+
cache_id = root.get("cacheId", "")
|
|
1391
|
+
field_names: list[str] = []
|
|
1392
|
+
if cache_id and cache_id in cache_info:
|
|
1393
|
+
ci = cache_info[cache_id]
|
|
1394
|
+
if "source" in ci:
|
|
1395
|
+
pivot["source"] = ci["source"]
|
|
1396
|
+
field_names = ci.get("field_names", [])
|
|
1397
|
+
|
|
1398
|
+
loc = root.find(f"{_XLSX_NS}location")
|
|
1399
|
+
if loc is not None:
|
|
1400
|
+
pivot["location"] = loc.get("ref", "")
|
|
1401
|
+
|
|
1402
|
+
# row·col·page fields (인덱스 → 이름)
|
|
1403
|
+
for tag, key in (
|
|
1404
|
+
("rowFields", "rowFields"),
|
|
1405
|
+
("colFields", "colFields"),
|
|
1406
|
+
("pageFields", "pageFields"),
|
|
1407
|
+
):
|
|
1408
|
+
elem = root.find(f"{_XLSX_NS}{tag}")
|
|
1409
|
+
if elem is None:
|
|
1410
|
+
continue
|
|
1411
|
+
names: list[str] = []
|
|
1412
|
+
for child in elem:
|
|
1413
|
+
x = child.get("x") or child.get("fld")
|
|
1414
|
+
if x is None:
|
|
1415
|
+
continue
|
|
1416
|
+
try:
|
|
1417
|
+
idx = int(x)
|
|
1418
|
+
except (TypeError, ValueError):
|
|
1419
|
+
continue
|
|
1420
|
+
if 0 <= idx < len(field_names) and field_names[idx]:
|
|
1421
|
+
names.append(field_names[idx])
|
|
1422
|
+
else:
|
|
1423
|
+
names.append(f"field_{idx}")
|
|
1424
|
+
if names:
|
|
1425
|
+
pivot[key] = names
|
|
1426
|
+
|
|
1427
|
+
# dataFields (값 필드 + 집계 함수)
|
|
1428
|
+
df_elem = root.find(f"{_XLSX_NS}dataFields")
|
|
1429
|
+
if df_elem is not None:
|
|
1430
|
+
df_list: list[dict] = []
|
|
1431
|
+
for df in df_elem:
|
|
1432
|
+
if df.tag != f"{_XLSX_NS}dataField":
|
|
1433
|
+
continue
|
|
1434
|
+
fld = df.get("fld", "")
|
|
1435
|
+
field_name = ""
|
|
1436
|
+
try:
|
|
1437
|
+
idx = int(fld)
|
|
1438
|
+
if 0 <= idx < len(field_names):
|
|
1439
|
+
field_name = field_names[idx]
|
|
1440
|
+
except (TypeError, ValueError):
|
|
1441
|
+
pass
|
|
1442
|
+
df_list.append({
|
|
1443
|
+
"name": df.get("name", ""),
|
|
1444
|
+
"field": field_name,
|
|
1445
|
+
"subtotal": df.get("subtotal", "sum"),
|
|
1446
|
+
})
|
|
1447
|
+
if df_list:
|
|
1448
|
+
pivot["dataFields"] = df_list
|
|
1449
|
+
|
|
1450
|
+
pivots.append(pivot)
|
|
1451
|
+
except (zipfile.BadZipFile, Exception):
|
|
1452
|
+
pass
|
|
1453
|
+
return pivots
|
|
1454
|
+
|
|
1455
|
+
|
|
1247
1456
|
def _extract_pptx_chart_data(chart) -> dict:
|
|
1248
1457
|
data: dict = {"type": str(getattr(chart, "chart_type", "")), "categories": [], "series": []}
|
|
1249
1458
|
if chart.plots:
|
|
@@ -106,8 +106,9 @@ def cmd_ppt_png(args) -> None:
|
|
|
106
106
|
if i - 1 < len(slide_titles):
|
|
107
107
|
idx, safe_title = slide_titles[i - 1]
|
|
108
108
|
else:
|
|
109
|
-
idx, safe_title = f"{i:02d}",
|
|
110
|
-
|
|
109
|
+
idx, safe_title = f"{i:02d}", ""
|
|
110
|
+
stem = f"{idx}_{safe_title}" if safe_title else idx
|
|
111
|
+
out_png = out_dir / f"{stem}.png"
|
|
111
112
|
slide.Export(short_str(out_png), "PNG", width_px, height_px)
|
|
112
113
|
finally:
|
|
113
114
|
pres.Close()
|