skdconv 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skdconv/__init__.py +105 -0
- skdconv/__main__.py +3 -0
- skdconv/cli.py +152 -0
- skdconv/detect.py +96 -0
- skdconv/docx/__init__.py +0 -0
- skdconv/docx/equation.py +119 -0
- skdconv/docx/parser.py +579 -0
- skdconv/hwp3/__init__.py +0 -0
- skdconv/hwp3/johab.py +101 -0
- skdconv/hwp3/johab_symbols.py +745 -0
- skdconv/hwp3/parser.py +240 -0
- skdconv/hwp3/reader.py +63 -0
- skdconv/hwp3/records.py +57 -0
- skdconv/hwp5/__init__.py +0 -0
- skdconv/hwp5/aes.py +136 -0
- skdconv/hwp5/cfb_lenient.py +313 -0
- skdconv/hwp5/crypto.py +116 -0
- skdconv/hwp5/parser.py +836 -0
- skdconv/hwp5/record.py +254 -0
- skdconv/hwpml/__init__.py +0 -0
- skdconv/hwpml/parser.py +326 -0
- skdconv/hwpx/__init__.py +0 -0
- skdconv/hwpx/equation.py +270 -0
- skdconv/hwpx/parser.py +972 -0
- skdconv/page_range.py +50 -0
- skdconv/table/__init__.py +0 -0
- skdconv/table/builder.py +484 -0
- skdconv/types.py +256 -0
- skdconv/utils.py +139 -0
- skdconv/xls/__init__.py +0 -0
- skdconv/xls/cell.py +210 -0
- skdconv/xls/encoding.py +34 -0
- skdconv/xls/parser.py +301 -0
- skdconv/xls/record.py +153 -0
- skdconv/xls/sst.py +114 -0
- skdconv/xlsx/__init__.py +0 -0
- skdconv/xlsx/parser.py +419 -0
- skdconv-1.0.2.dist-info/METADATA +109 -0
- skdconv-1.0.2.dist-info/RECORD +42 -0
- skdconv-1.0.2.dist-info/WHEEL +4 -0
- skdconv-1.0.2.dist-info/entry_points.txt +2 -0
- skdconv-1.0.2.dist-info/licenses/LICENSE +21 -0
skdconv/__init__.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""skdconv — 한국 공문서를 마크다운으로 변환하는 파서 라이브러리."""
|
|
2
|
+
|
|
3
|
+
from .types import (
|
|
4
|
+
IRBlock,
|
|
5
|
+
IRCell,
|
|
6
|
+
IRTable,
|
|
7
|
+
CellContext,
|
|
8
|
+
ParseResult,
|
|
9
|
+
ParseSuccess,
|
|
10
|
+
ParseFailure,
|
|
11
|
+
ParseOptions,
|
|
12
|
+
DocumentMetadata,
|
|
13
|
+
ParseWarning,
|
|
14
|
+
OutlineItem,
|
|
15
|
+
ExtractedImage,
|
|
16
|
+
FileType,
|
|
17
|
+
InternalParseResult,
|
|
18
|
+
)
|
|
19
|
+
from .utils import KordocError, VERSION
|
|
20
|
+
from .detect import detect_format, detect_zip_format, detect_ole2_format
|
|
21
|
+
|
|
22
|
+
__version__ = VERSION
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse(data: bytes, options: "ParseOptions | None" = None) -> ParseResult:
|
|
26
|
+
"""포맷 자동 감지 후 파싱 → ParseResult."""
|
|
27
|
+
from .utils import classify_error
|
|
28
|
+
|
|
29
|
+
if not data:
|
|
30
|
+
return ParseFailure(file_type="unknown", error="입력 데이터가 비어 있습니다", code="EMPTY_INPUT")
|
|
31
|
+
|
|
32
|
+
fmt = detect_format(data)
|
|
33
|
+
# ZIP/OLE2 내부 구조로 세분화
|
|
34
|
+
if fmt == "hwpx":
|
|
35
|
+
refined = detect_zip_format(data)
|
|
36
|
+
if refined in ("xlsx", "docx", "hwpx"):
|
|
37
|
+
fmt = refined
|
|
38
|
+
elif fmt == "hwp":
|
|
39
|
+
refined = detect_ole2_format(data)
|
|
40
|
+
if refined in ("hwp", "xls"):
|
|
41
|
+
fmt = refined
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
result: InternalParseResult
|
|
45
|
+
if fmt == "hwpx":
|
|
46
|
+
from .hwpx.parser import parse_hwpx_document
|
|
47
|
+
result = parse_hwpx_document(data, options)
|
|
48
|
+
elif fmt == "hwp":
|
|
49
|
+
from .hwp5.parser import parse_hwp5_document
|
|
50
|
+
result = parse_hwp5_document(data, options)
|
|
51
|
+
elif fmt == "hwp3":
|
|
52
|
+
from .hwp3.parser import parse_hwp3_document
|
|
53
|
+
result = parse_hwp3_document(data, options)
|
|
54
|
+
elif fmt == "hwpml":
|
|
55
|
+
from .hwpml.parser import parse_hwpml_document
|
|
56
|
+
result = parse_hwpml_document(data, options)
|
|
57
|
+
elif fmt == "xlsx":
|
|
58
|
+
from .xlsx.parser import parse_xlsx_document
|
|
59
|
+
result = parse_xlsx_document(data, options)
|
|
60
|
+
elif fmt == "xls":
|
|
61
|
+
from .xls.parser import parse_xls_document
|
|
62
|
+
result = parse_xls_document(data, options)
|
|
63
|
+
elif fmt == "docx":
|
|
64
|
+
from .docx.parser import parse_docx_document
|
|
65
|
+
result = parse_docx_document(data, options)
|
|
66
|
+
else:
|
|
67
|
+
return ParseFailure(file_type=fmt, error=f"지원하지 않는 포맷: {fmt}", code="UNSUPPORTED_FORMAT")
|
|
68
|
+
|
|
69
|
+
md = result.metadata
|
|
70
|
+
return ParseSuccess(
|
|
71
|
+
file_type=fmt,
|
|
72
|
+
markdown=result.markdown,
|
|
73
|
+
blocks=result.blocks,
|
|
74
|
+
page_count=md.page_count if md else None,
|
|
75
|
+
metadata=md,
|
|
76
|
+
outline=result.outline,
|
|
77
|
+
warnings=result.warnings,
|
|
78
|
+
images=result.images,
|
|
79
|
+
)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
code = classify_error(e)
|
|
82
|
+
return ParseFailure(file_type=fmt, error=str(e), code=code)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
__all__ = [
|
|
86
|
+
"IRBlock",
|
|
87
|
+
"IRCell",
|
|
88
|
+
"IRTable",
|
|
89
|
+
"CellContext",
|
|
90
|
+
"ParseResult",
|
|
91
|
+
"ParseSuccess",
|
|
92
|
+
"ParseFailure",
|
|
93
|
+
"ParseOptions",
|
|
94
|
+
"DocumentMetadata",
|
|
95
|
+
"ParseWarning",
|
|
96
|
+
"OutlineItem",
|
|
97
|
+
"ExtractedImage",
|
|
98
|
+
"FileType",
|
|
99
|
+
"KordocError",
|
|
100
|
+
"VERSION",
|
|
101
|
+
"detect_format",
|
|
102
|
+
"detect_zip_format",
|
|
103
|
+
"detect_ole2_format",
|
|
104
|
+
"parse",
|
|
105
|
+
]
|
skdconv/__main__.py
ADDED
skdconv/cli.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""skdconv CLI — 문서를 마크다운으로 변환."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .utils import VERSION
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_fields(fields_str: str) -> dict[str, str]:
|
|
16
|
+
"""'key=value,key2=value2' 또는 JSON 문자열 → dict."""
|
|
17
|
+
s = fields_str.strip()
|
|
18
|
+
if s.startswith("{"):
|
|
19
|
+
return json.loads(s)
|
|
20
|
+
result: dict[str, str] = {}
|
|
21
|
+
# 쉼표+한글/영문+= 패턴으로 분리
|
|
22
|
+
import re
|
|
23
|
+
pairs = re.split(r",(?=[가-힣A-Za-z])", s)
|
|
24
|
+
for pair in pairs:
|
|
25
|
+
idx = pair.find("=")
|
|
26
|
+
if idx > 0:
|
|
27
|
+
result[pair[:idx].strip()] = pair[idx + 1:].strip()
|
|
28
|
+
return result
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _cmd_parse(args: argparse.Namespace) -> int:
|
|
32
|
+
from . import parse
|
|
33
|
+
from .detect import detect_format
|
|
34
|
+
from .types import ParseOptions
|
|
35
|
+
|
|
36
|
+
valid_formats = {"markdown", "json"}
|
|
37
|
+
if args.format not in valid_formats:
|
|
38
|
+
sys.stderr.write(f"[skdconv] 지원하지 않는 형식: {args.format} (markdown 또는 json)\n")
|
|
39
|
+
return 1
|
|
40
|
+
|
|
41
|
+
exit_code = 0
|
|
42
|
+
files = args.files
|
|
43
|
+
for fi, file_path in enumerate(files):
|
|
44
|
+
abs_path = os.path.realpath(file_path)
|
|
45
|
+
file_name = os.path.basename(abs_path)
|
|
46
|
+
file_prefix = f"[{fi + 1}/{len(files)}] " if len(files) > 1 else ""
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
file_size = os.path.getsize(abs_path)
|
|
50
|
+
if file_size > 500 * 1024 * 1024:
|
|
51
|
+
sys.stderr.write(
|
|
52
|
+
f"\n[skdconv] SKIP: {file_name} — 파일이 너무 큽니다 "
|
|
53
|
+
f"({file_size / 1024 / 1024:.1f}MB)\n"
|
|
54
|
+
)
|
|
55
|
+
exit_code = 1
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
with open(abs_path, "rb") as f:
|
|
59
|
+
data = f.read()
|
|
60
|
+
|
|
61
|
+
fmt = detect_format(data)
|
|
62
|
+
if not args.silent:
|
|
63
|
+
sys.stderr.write(f"[skdconv] {file_prefix}{file_name} ({fmt}) ...")
|
|
64
|
+
|
|
65
|
+
opts = ParseOptions(file_path=abs_path)
|
|
66
|
+
if args.pages:
|
|
67
|
+
opts.pages = args.pages
|
|
68
|
+
if args.no_header_footer:
|
|
69
|
+
opts.remove_header_footer = True
|
|
70
|
+
|
|
71
|
+
result = parse(data, opts)
|
|
72
|
+
|
|
73
|
+
if not result.success:
|
|
74
|
+
sys.stderr.write(" FAIL\n")
|
|
75
|
+
sys.stderr.write(f" → {result.error}\n")
|
|
76
|
+
exit_code = 1
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
if not args.silent:
|
|
80
|
+
sys.stderr.write(" OK\n")
|
|
81
|
+
|
|
82
|
+
if args.format == "json":
|
|
83
|
+
output = json.dumps(
|
|
84
|
+
{
|
|
85
|
+
"file_type": result.file_type,
|
|
86
|
+
"markdown": result.markdown,
|
|
87
|
+
"page_count": result.page_count,
|
|
88
|
+
"warnings": [
|
|
89
|
+
{"message": w.message, "code": w.code, "page": w.page}
|
|
90
|
+
for w in (result.warnings or [])
|
|
91
|
+
],
|
|
92
|
+
},
|
|
93
|
+
ensure_ascii=False,
|
|
94
|
+
indent=2,
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
output = result.markdown
|
|
98
|
+
|
|
99
|
+
if args.output and len(files) == 1:
|
|
100
|
+
Path(args.output).write_text(output, encoding="utf-8")
|
|
101
|
+
if not args.silent:
|
|
102
|
+
sys.stderr.write(f" → {args.output}\n")
|
|
103
|
+
elif args.out_dir:
|
|
104
|
+
os.makedirs(args.out_dir, exist_ok=True)
|
|
105
|
+
ext = ".json" if args.format == "json" else ".md"
|
|
106
|
+
stem = os.path.splitext(file_name)[0]
|
|
107
|
+
out_path = os.path.join(args.out_dir, stem + ext)
|
|
108
|
+
Path(out_path).write_text(output, encoding="utf-8")
|
|
109
|
+
if not args.silent:
|
|
110
|
+
sys.stderr.write(f" → {out_path}\n")
|
|
111
|
+
else:
|
|
112
|
+
sys.stdout.write(output + "\n")
|
|
113
|
+
|
|
114
|
+
except Exception as err:
|
|
115
|
+
sys.stderr.write(f"\n[skdconv] ERROR: {file_name} — {err}\n")
|
|
116
|
+
exit_code = 1
|
|
117
|
+
|
|
118
|
+
return exit_code
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def main(argv: Optional[list[str]] = None) -> None:
|
|
122
|
+
if argv is None:
|
|
123
|
+
argv = sys.argv[1:]
|
|
124
|
+
|
|
125
|
+
parser = argparse.ArgumentParser(
|
|
126
|
+
prog="skdconv",
|
|
127
|
+
description="모두 파싱해버리겠다 - HWP, HWPX, XLSX, XLS, DOCX -> Markdown",
|
|
128
|
+
)
|
|
129
|
+
parser.add_argument("--version", action="version", version=f"skdconv {VERSION}")
|
|
130
|
+
parser.add_argument("files", nargs="*", metavar="FILE", help="변환할 파일 경로")
|
|
131
|
+
parser.add_argument("-o", "--output", metavar="PATH", help="출력 파일 경로 (단일 파일 시)")
|
|
132
|
+
parser.add_argument("-d", "--out-dir", metavar="DIR", help="출력 디렉토리 (다중 파일 시)")
|
|
133
|
+
parser.add_argument("-p", "--pages", metavar="RANGE", help="페이지/섹션 범위 (예: 1-3, 1,3,5)")
|
|
134
|
+
parser.add_argument(
|
|
135
|
+
"--format", choices=["markdown", "json"], default="markdown", help="출력 형식"
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"--no-header-footer", action="store_true", help="PDF 머리글/바닥글 자동 제거"
|
|
139
|
+
)
|
|
140
|
+
parser.add_argument("--silent", action="store_true", help="진행 메시지 숨기기")
|
|
141
|
+
|
|
142
|
+
args = parser.parse_args(argv)
|
|
143
|
+
|
|
144
|
+
if args.files:
|
|
145
|
+
sys.exit(_cmd_parse(args))
|
|
146
|
+
else:
|
|
147
|
+
parser.print_help()
|
|
148
|
+
sys.exit(0)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
main()
|
skdconv/detect.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""매직 바이트 기반 파일 포맷 감지"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import io
|
|
6
|
+
import zipfile
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
from .types import FileType
|
|
10
|
+
|
|
11
|
+
_HWP3_PREFIX = b"HWP Document File V3.00"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def is_zip_file(data: bytes) -> bool:
|
|
15
|
+
return len(data) >= 4 and data[:4] == b"PK\x03\x04"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_hwpx_file(data: bytes) -> bool:
|
|
19
|
+
return is_zip_file(data)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def is_old_hwp_file(data: bytes) -> bool:
|
|
23
|
+
return len(data) >= 4 and data[:4] == b"\xd0\xcf\x11\xe0"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def is_hwp3_file(data: bytes) -> bool:
|
|
27
|
+
return len(data) >= len(_HWP3_PREFIX) and data[: len(_HWP3_PREFIX)] == _HWP3_PREFIX
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def is_pdf_file(data: bytes) -> bool:
|
|
31
|
+
return len(data) >= 4 and data[:4] == b"%PDF"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_hwpml_file(data: bytes) -> bool:
|
|
35
|
+
head = data[:512].decode("utf-8", errors="replace").lstrip("")
|
|
36
|
+
return head.lstrip().startswith("<?xml") and "<HWPML" in head
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def detect_format(data: bytes) -> FileType:
|
|
40
|
+
"""동기 포맷 감지 — ZIP은 모두 'hwpx'로 반환 (세분화는 detect_zip_format 사용)."""
|
|
41
|
+
if len(data) < 4:
|
|
42
|
+
return "unknown"
|
|
43
|
+
if is_hwp3_file(data):
|
|
44
|
+
return "hwp3"
|
|
45
|
+
if is_zip_file(data):
|
|
46
|
+
return "hwpx"
|
|
47
|
+
if is_old_hwp_file(data):
|
|
48
|
+
return "hwp"
|
|
49
|
+
if is_pdf_file(data):
|
|
50
|
+
return "pdf"
|
|
51
|
+
if is_hwpml_file(data):
|
|
52
|
+
return "hwpml"
|
|
53
|
+
return "unknown"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def detect_ole2_format(data: bytes) -> Literal["hwp", "xls", "unknown"]:
|
|
57
|
+
"""OLE2 컨테이너 내부 스트림 기반 포맷 세분화.
|
|
58
|
+
|
|
59
|
+
HWP 5.x, XLS 모두 OLE2이므로 스트림 이름으로 구분.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
import olefile # type: ignore
|
|
63
|
+
|
|
64
|
+
with olefile.OleFileIO(io.BytesIO(data)) as ole:
|
|
65
|
+
entries = {e[0] for e in ole.listdir()}
|
|
66
|
+
if "Workbook" in entries or "Book" in entries:
|
|
67
|
+
return "xls"
|
|
68
|
+
if "FileHeader" in entries:
|
|
69
|
+
return "hwp"
|
|
70
|
+
if any(e == "DocInfo" or e.startswith("Section") for e in entries):
|
|
71
|
+
return "hwp"
|
|
72
|
+
return "unknown"
|
|
73
|
+
except Exception:
|
|
74
|
+
return "unknown"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def detect_zip_format(data: bytes) -> Literal["hwpx", "xlsx", "docx", "unknown"]:
|
|
78
|
+
"""ZIP 내부 구조 기반 포맷 세분화.
|
|
79
|
+
|
|
80
|
+
HWPX, XLSX, DOCX 모두 ZIP이므로 내부 파일로 구분.
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
|
84
|
+
names = zf.namelist()
|
|
85
|
+
name_set = set(names)
|
|
86
|
+
if "xl/workbook.xml" in name_set:
|
|
87
|
+
return "xlsx"
|
|
88
|
+
if "word/document.xml" in name_set:
|
|
89
|
+
return "docx"
|
|
90
|
+
if "Contents/content.hpf" in name_set or "mimetype" in name_set:
|
|
91
|
+
return "hwpx"
|
|
92
|
+
if any(n.startswith("Contents/") for n in names):
|
|
93
|
+
return "hwpx"
|
|
94
|
+
return "unknown"
|
|
95
|
+
except Exception:
|
|
96
|
+
return "unknown"
|
skdconv/docx/__init__.py
ADDED
|
File without changes
|
skdconv/docx/equation.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""DOCX OMML (Office Math ML) → LaTeX 변환 — 간략 구현."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import lxml.etree as ET
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _tag(el: ET._Element) -> str:
|
|
9
|
+
t = el.tag
|
|
10
|
+
return t.split("}", 1)[1] if "}" in t else t
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _kids(parent: ET._Element, name: str) -> list[ET._Element]:
|
|
14
|
+
return [child for child in parent if _tag(child) == name]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _first_kid(parent: ET._Element, name: str) -> ET._Element | None:
|
|
18
|
+
for child in parent:
|
|
19
|
+
if _tag(child) == name:
|
|
20
|
+
return child
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _run_to_latex(r: ET._Element) -> str:
|
|
25
|
+
return "".join(t.text or "" for t in _kids(r, "t"))
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_FUNC_NAMES = {
|
|
29
|
+
"sin", "cos", "tan", "cot", "sec", "csc",
|
|
30
|
+
"sinh", "cosh", "tanh", "coth",
|
|
31
|
+
"arcsin", "arccos", "arctan",
|
|
32
|
+
"log", "ln", "lg", "exp",
|
|
33
|
+
"det", "dim", "gcd", "inf", "sup", "lim", "max", "min",
|
|
34
|
+
"Pr", "arg",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _el_to_latex(el: ET._Element) -> str:
|
|
39
|
+
tag = _tag(el)
|
|
40
|
+
|
|
41
|
+
if tag in ("oMath", "oMathPara"):
|
|
42
|
+
return "".join(_el_to_latex(c) for c in el)
|
|
43
|
+
|
|
44
|
+
if tag == "r":
|
|
45
|
+
return _run_to_latex(el)
|
|
46
|
+
|
|
47
|
+
if tag == "f": # 분수
|
|
48
|
+
num = _first_kid(el, "num")
|
|
49
|
+
den = _first_kid(el, "den")
|
|
50
|
+
n = "".join(_el_to_latex(c) for c in num) if num is not None else ""
|
|
51
|
+
d = "".join(_el_to_latex(c) for c in den) if den is not None else ""
|
|
52
|
+
return f"\\frac{{{n}}}{{{d}}}"
|
|
53
|
+
|
|
54
|
+
if tag == "rad": # 근호
|
|
55
|
+
deg = _first_kid(el, "deg")
|
|
56
|
+
e = _first_kid(el, "e")
|
|
57
|
+
body = "".join(_el_to_latex(c) for c in e) if e is not None else ""
|
|
58
|
+
if deg is not None:
|
|
59
|
+
d = "".join(_el_to_latex(c) for c in deg)
|
|
60
|
+
if d:
|
|
61
|
+
return f"\\sqrt[{d}]{{{body}}}"
|
|
62
|
+
return f"\\sqrt{{{body}}}"
|
|
63
|
+
|
|
64
|
+
if tag == "sSup": # 위첨자
|
|
65
|
+
e = _first_kid(el, "e")
|
|
66
|
+
sup = _first_kid(el, "sup")
|
|
67
|
+
base = "".join(_el_to_latex(c) for c in e) if e is not None else ""
|
|
68
|
+
exp = "".join(_el_to_latex(c) for c in sup) if sup is not None else ""
|
|
69
|
+
return f"{{{base}}}^{{{exp}}}"
|
|
70
|
+
|
|
71
|
+
if tag == "sSub": # 아래첨자
|
|
72
|
+
e = _first_kid(el, "e")
|
|
73
|
+
sub = _first_kid(el, "sub")
|
|
74
|
+
base = "".join(_el_to_latex(c) for c in e) if e is not None else ""
|
|
75
|
+
s = "".join(_el_to_latex(c) for c in sub) if sub is not None else ""
|
|
76
|
+
return f"{{{base}}}_{{{s}}}"
|
|
77
|
+
|
|
78
|
+
if tag == "sSubSup": # 위아래첨자
|
|
79
|
+
e = _first_kid(el, "e")
|
|
80
|
+
sub = _first_kid(el, "sub")
|
|
81
|
+
sup = _first_kid(el, "sup")
|
|
82
|
+
base = "".join(_el_to_latex(c) for c in e) if e is not None else ""
|
|
83
|
+
s = "".join(_el_to_latex(c) for c in sub) if sub is not None else ""
|
|
84
|
+
p = "".join(_el_to_latex(c) for c in sup) if sup is not None else ""
|
|
85
|
+
return f"{{{base}}}_{{{s}}}^{{{p}}}"
|
|
86
|
+
|
|
87
|
+
if tag == "nary": # 연산자 (∑, ∏, ∫ 등)
|
|
88
|
+
e = _first_kid(el, "e")
|
|
89
|
+
body = "".join(_el_to_latex(c) for c in e) if e is not None else ""
|
|
90
|
+
return f"\\sum {body}"
|
|
91
|
+
|
|
92
|
+
if tag == "d": # 괄호
|
|
93
|
+
e_els = _kids(el, "e")
|
|
94
|
+
content = " ".join("".join(_el_to_latex(c) for c in e) for e in e_els)
|
|
95
|
+
return f"\\left({content}\\right)"
|
|
96
|
+
|
|
97
|
+
if tag == "m": # 행렬
|
|
98
|
+
mr_els = _kids(el, "mr")
|
|
99
|
+
rows = []
|
|
100
|
+
for mr in mr_els:
|
|
101
|
+
e_els = _kids(mr, "e")
|
|
102
|
+
rows.append(" & ".join("".join(_el_to_latex(c) for c in e) for e in e_els))
|
|
103
|
+
return "\\begin{matrix}" + " \\\\ ".join(rows) + "\\end{matrix}"
|
|
104
|
+
|
|
105
|
+
# 기타 — 자식 재귀
|
|
106
|
+
return "".join(_el_to_latex(c) for c in el)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def omml_element_to_latex(el: ET._Element) -> str:
|
|
110
|
+
"""<m:oMath> 또는 <m:oMathPara> 엘리먼트를 LaTeX 문자열로 변환."""
|
|
111
|
+
try:
|
|
112
|
+
return _el_to_latex(el).strip()
|
|
113
|
+
except Exception:
|
|
114
|
+
return ""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def is_display_math(el: ET._Element) -> bool:
|
|
118
|
+
"""oMathPara이면 display math ($$...$$)."""
|
|
119
|
+
return _tag(el) == "oMathPara"
|