@simplysm/sd-claude 14.0.76 → 14.0.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/claude/output-styles/sd-tone.md +128 -0
- package/claude/references/sd-simplysm14/apis/angular/README.md +28 -89
- package/claude/references/sd-simplysm14/apis/angular/app-structure.md +75 -32
- package/claude/references/sd-simplysm14/apis/angular/buttons.md +65 -29
- package/claude/references/sd-simplysm14/apis/angular/crud.md +86 -21
- package/claude/references/sd-simplysm14/apis/angular/forms.md +168 -42
- package/claude/references/sd-simplysm14/apis/angular/infrastructure.md +200 -49
- package/claude/references/sd-simplysm14/apis/angular/kanban.md +64 -20
- package/claude/references/sd-simplysm14/apis/angular/layout.md +75 -30
- package/claude/references/sd-simplysm14/apis/angular/modal.md +92 -40
- package/claude/references/sd-simplysm14/apis/angular/routing.md +86 -25
- package/claude/references/sd-simplysm14/apis/angular/selection-managers.md +72 -41
- package/claude/references/sd-simplysm14/apis/angular/shared-data.md +113 -21
- package/claude/references/sd-simplysm14/apis/angular/sheet.md +108 -33
- package/claude/references/sd-simplysm14/apis/angular/toast.md +81 -30
- package/claude/references/sd-simplysm14/apis/angular/visual.md +140 -32
- package/claude/references/sd-simplysm14/apis/capacitor-plugin-auto-update/README.md +46 -43
- package/claude/references/sd-simplysm14/apis/capacitor-plugin-intent/README.md +59 -48
- package/claude/references/sd-simplysm14/apis/capacitor-plugin-usb-storage/README.md +17 -7
- package/claude/references/sd-simplysm14/apis/core-common/README.md +43 -116
- package/claude/references/sd-simplysm14/apis/core-common/extensions.md +74 -109
- package/claude/references/sd-simplysm14/apis/core-common/features.md +40 -35
- package/claude/references/sd-simplysm14/apis/core-common/types.md +80 -106
- package/claude/references/sd-simplysm14/apis/core-common/utils.md +142 -111
- package/claude/references/sd-simplysm14/apis/core-node/README.md +7 -16
- package/claude/references/sd-simplysm14/apis/core-node/consola.md +33 -38
- package/claude/references/sd-simplysm14/apis/core-node/cpx.md +25 -33
- package/claude/references/sd-simplysm14/apis/core-node/fs-watcher.md +27 -38
- package/claude/references/sd-simplysm14/apis/core-node/fsx.md +32 -60
- package/claude/references/sd-simplysm14/apis/core-node/pathx.md +14 -45
- package/claude/references/sd-simplysm14/apis/core-node/worker.md +35 -81
- package/claude/references/sd-simplysm14/apis/excel/README.md +178 -80
- package/claude/references/sd-simplysm14/apis/lint/README.md +5 -0
- package/claude/references/sd-simplysm14/apis/orm-node/README.md +1 -1
- package/claude/references/sd-simplysm14/apis/sd-claude/README.md +28 -5
- package/claude/references/sd-simplysm14/apis/sd-cli/README.md +1 -1
- package/claude/references/sd-simplysm14/apis/service-client/README.md +57 -50
- package/claude/references/sd-simplysm14/apis/service-server/README.md +8 -15
- package/claude/references/sd-simplysm14/apis/service-server/auth.md +24 -16
- package/claude/references/sd-simplysm14/apis/service-server/builtin-services.md +55 -31
- package/claude/references/sd-simplysm14/apis/service-server/define-service.md +28 -44
- package/claude/references/sd-simplysm14/apis/service-server/internals.md +59 -18
- package/claude/references/sd-simplysm14/apis/service-server/server.md +37 -46
- package/claude/references/sd-simplysm14/manuals/client-component.md +3 -1
- package/claude/references/sd-simplysm14/manuals/logging.md +9 -8
- package/claude/rules/sd-base-rules.md +377 -219
- package/claude/settings.json +1 -0
- package/claude/skills/sd-commit/SKILL.md +31 -8
- package/claude/skills/sd-docs/SKILL.md +15 -10
- package/claude/skills/sd-docs/references/subagent-prompt.md +26 -8
- package/claude/skills/sd-impl/SKILL.md +1 -1
- package/claude/skills/sd-skill/references/skill-authoring.md +1 -1
- package/claude/skills/sd-spec/SKILL.md +22 -13
- package/claude/skills/sd-spec/references/spec-authoring.md +1 -1
- package/claude/skills/sd-unpack/SKILL.md +150 -26
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/_common.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/eml_handler.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/office_com.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/__pycache__/pdf_handler.cpython-314.pyc +0 -0
- package/claude/skills/sd-unpack/scripts/handlers/_common.py +17 -2
- package/claude/skills/sd-unpack/scripts/handlers/eml_handler.py +100 -24
- package/claude/skills/sd-unpack/scripts/handlers/msg_handler.py +140 -27
- package/claude/skills/sd-unpack/scripts/handlers/office_com.py +698 -107
- package/claude/skills/sd-unpack/scripts/handlers/office_worker.py +34 -26
- package/claude/skills/sd-unpack/scripts/handlers/pdf_handler.py +130 -8
- package/package.json +1 -1
|
@@ -1,10 +1,21 @@
|
|
|
1
|
-
"""EML 핸들러. 표준 라이브러리 email 모듈 사용.
|
|
1
|
+
"""EML 핸들러. 표준 라이브러리 email 모듈 사용.
|
|
2
|
+
|
|
3
|
+
본문 일관화:
|
|
4
|
+
- text/plain 있으면 그걸 body.md
|
|
5
|
+
- text/plain 없고 text/html 있으면 HTML → 평문 변환(html2text) → body.md
|
|
6
|
+
- HTML 변환 시 inline image (cid:) → 첨부 파일명 placeholder 치환
|
|
7
|
+
|
|
8
|
+
CID 매핑:
|
|
9
|
+
- Content-ID 헤더 있는 첨부는 cid_map 에 등록
|
|
10
|
+
- images.rels.json 으로 CID↔파일명 양방향 추적
|
|
11
|
+
"""
|
|
2
12
|
from __future__ import annotations
|
|
3
13
|
|
|
4
14
|
import email
|
|
5
15
|
import hashlib
|
|
6
16
|
import json
|
|
7
17
|
import os
|
|
18
|
+
import re
|
|
8
19
|
from email.header import decode_header, make_header
|
|
9
20
|
from pathlib import Path
|
|
10
21
|
|
|
@@ -29,24 +40,36 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
29
40
|
raw = input_path.read_bytes()
|
|
30
41
|
msg = email.message_from_bytes(raw)
|
|
31
42
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
# 모든 헤더 보존 (envelope·X-* 등 원본 그대로). 동일 키 다수 등장 → list 누적.
|
|
44
|
+
headers: dict = {}
|
|
45
|
+
for key, raw in msg.items():
|
|
46
|
+
decoded = _decode_header(raw)
|
|
47
|
+
if key in headers:
|
|
48
|
+
existing = headers[key]
|
|
49
|
+
if isinstance(existing, list):
|
|
50
|
+
existing.append(decoded)
|
|
51
|
+
else:
|
|
52
|
+
headers[key] = [existing, decoded]
|
|
53
|
+
else:
|
|
54
|
+
headers[key] = decoded
|
|
40
55
|
_common.write_text(
|
|
41
56
|
out_dir / "headers.json",
|
|
42
57
|
json.dumps(headers, ensure_ascii=False, indent=2),
|
|
43
58
|
)
|
|
44
59
|
|
|
60
|
+
# README 의 헤더 섹션 표기용 envelope 키 (write_readme 의 dict 출력 한정)
|
|
61
|
+
envelope_keys = [
|
|
62
|
+
"From", "To", "Cc", "Bcc", "Subject", "Date", "Message-ID",
|
|
63
|
+
"Reply-To", "In-Reply-To", "References",
|
|
64
|
+
]
|
|
65
|
+
readme_headers = {k: headers.get(k, "") for k in envelope_keys}
|
|
66
|
+
|
|
45
67
|
body_text: str | None = None
|
|
46
68
|
body_html: str | None = None
|
|
47
69
|
attachments_dir = out_dir / "attachments"
|
|
48
70
|
saved_attachments: list[Path] = []
|
|
49
71
|
seen_hashes: set[str] = set()
|
|
72
|
+
cid_map: dict[str, str] = {} # cid (without <>) → 첨부 파일명 (basename)
|
|
50
73
|
|
|
51
74
|
for part in msg.walk():
|
|
52
75
|
if part.is_multipart():
|
|
@@ -56,8 +79,10 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
56
79
|
filename = part.get_filename()
|
|
57
80
|
if filename:
|
|
58
81
|
filename = _decode_header(filename)
|
|
82
|
+
cid_raw = (part.get("Content-ID") or "").strip()
|
|
83
|
+
cid = cid_raw.strip("<>") if cid_raw else ""
|
|
59
84
|
|
|
60
|
-
is_attachment = bool(filename) or "attachment" in disp
|
|
85
|
+
is_attachment = bool(filename) or "attachment" in disp or bool(cid)
|
|
61
86
|
|
|
62
87
|
if is_attachment:
|
|
63
88
|
payload = part.get_payload(decode=True) or b""
|
|
@@ -67,45 +92,96 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
67
92
|
continue
|
|
68
93
|
seen_hashes.add(digest)
|
|
69
94
|
_common.mkdir(attachments_dir)
|
|
70
|
-
|
|
95
|
+
dst_name = filename or (f"{cid}.bin" if cid else "attachment.bin")
|
|
96
|
+
dst = _common.unique_path(attachments_dir, dst_name)
|
|
71
97
|
_common.write_bytes(dst, payload)
|
|
72
98
|
saved_attachments.append(dst)
|
|
99
|
+
if cid:
|
|
100
|
+
cid_map[cid] = dst.name
|
|
73
101
|
elif ctype == "text/plain" and body_text is None:
|
|
74
102
|
body_text = _decode_payload(part)
|
|
75
103
|
elif ctype == "text/html" and body_html is None:
|
|
76
104
|
body_html = _decode_payload(part)
|
|
77
105
|
|
|
78
|
-
|
|
106
|
+
# body.md: text/plain 우선, 없으면 HTML→평문
|
|
107
|
+
# body.from_html.md: HTML 있으면 항상 별도 생성 (인라인 이미지 위치 placeholder 포함)
|
|
108
|
+
if body_text:
|
|
109
|
+
body_md = body_text
|
|
110
|
+
elif body_html:
|
|
111
|
+
body_md = _html_to_md(body_html, cid_map)
|
|
112
|
+
else:
|
|
113
|
+
body_md = ""
|
|
114
|
+
|
|
79
115
|
body_file_link = None
|
|
80
116
|
body_html_link = None
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
_common.write_text(out_dir / "body.md", body_text)
|
|
86
|
-
body_file_link = "body.md"
|
|
117
|
+
body_from_html_link = None
|
|
118
|
+
if body_md:
|
|
119
|
+
_common.write_text(out_dir / "body.md", body_md)
|
|
120
|
+
body_file_link = "body.md"
|
|
87
121
|
if body_html:
|
|
88
122
|
_common.write_text(out_dir / "body.html", body_html)
|
|
89
123
|
body_html_link = "body.html"
|
|
124
|
+
# text/plain 우선 정책으로 body.md 가 plain 인 경우, HTML→평문 변환본도 별도로
|
|
125
|
+
# 보존 (인라인 이미지 위치 단서). body.md 자체가 from_html 이면 중복 회피.
|
|
126
|
+
if body_text:
|
|
127
|
+
from_html_md = _html_to_md(body_html, cid_map)
|
|
128
|
+
_common.write_text(out_dir / "body.from_html.md", from_html_md)
|
|
129
|
+
body_from_html_link = "body.from_html.md"
|
|
130
|
+
|
|
131
|
+
# CID↔파일명 매핑 (인라인 이미지 있을 때만)
|
|
132
|
+
if cid_map:
|
|
133
|
+
rels = {cid: f"attachments/{fname}" for cid, fname in cid_map.items()}
|
|
134
|
+
_common.write_text(
|
|
135
|
+
out_dir / "images.rels.json",
|
|
136
|
+
json.dumps(rels, ensure_ascii=False, indent=2),
|
|
137
|
+
)
|
|
90
138
|
|
|
91
139
|
attachment_links: list[str] = []
|
|
92
140
|
for ap in saved_attachments:
|
|
141
|
+
size = ap.stat().st_size
|
|
93
142
|
recursed = maybe_recurse_attachment(ap, attachments_dir)
|
|
94
143
|
if recursed is not None:
|
|
95
144
|
os.unlink(_common.long_str(ap))
|
|
96
|
-
attachment_links.append(f"attachments/{recursed.name}/")
|
|
145
|
+
attachment_links.append(f"attachments/{recursed.name}/ ({_common.format_size(size)})")
|
|
97
146
|
else:
|
|
98
|
-
attachment_links.append(f"attachments/{ap.name}")
|
|
147
|
+
attachment_links.append(f"attachments/{ap.name} ({_common.format_size(size)})")
|
|
99
148
|
|
|
100
149
|
_common.write_readme(
|
|
101
150
|
out_dir,
|
|
102
151
|
source_name=input_path.name,
|
|
103
152
|
source_size=input_path.stat().st_size,
|
|
104
|
-
tool="email (표준 라이브러리)",
|
|
105
|
-
loss_notes=
|
|
106
|
-
|
|
153
|
+
tool="email (표준 라이브러리) + html2text",
|
|
154
|
+
loss_notes=(
|
|
155
|
+
"본문은 body.md (text/plain 우선, 없으면 HTML→평문). "
|
|
156
|
+
"text/plain·HTML 둘 다 있을 때 HTML→평문(인라인 이미지 위치 placeholder 포함)은 body.from_html.md 별도. "
|
|
157
|
+
"원본 HTML 은 body.html, CID↔첨부 매핑은 images.rels.json (인라인 이미지 있을 때)."
|
|
158
|
+
),
|
|
107
159
|
body_file_link=body_file_link,
|
|
108
160
|
body_html_link=body_html_link,
|
|
109
|
-
|
|
161
|
+
body_from_html_link=body_from_html_link,
|
|
162
|
+
headers=readme_headers,
|
|
110
163
|
attachments=attachment_links,
|
|
111
164
|
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _html_to_md(html: str, cid_map: dict[str, str]) -> str:
|
|
168
|
+
"""HTML 본문 → 평문 md 변환. cid: 이미지 src 는 첨부 파일명 placeholder 로 치환."""
|
|
169
|
+
_common.ensure_pip("html2text")
|
|
170
|
+
import html2text
|
|
171
|
+
|
|
172
|
+
h = html2text.HTML2Text()
|
|
173
|
+
h.body_width = 0
|
|
174
|
+
h.ignore_links = False
|
|
175
|
+
h.ignore_images = False
|
|
176
|
+
md = h.handle(html)
|
|
177
|
+
|
|
178
|
+
# 1) `` → ``
|
|
179
|
+
def replace_cid_img(m: re.Match) -> str:
|
|
180
|
+
alt, cid_value = m.group(1), m.group(2).strip()
|
|
181
|
+
fname = cid_map.get(cid_value) or cid_map.get(cid_value.split("@")[0])
|
|
182
|
+
if fname:
|
|
183
|
+
return f""
|
|
184
|
+
return f""
|
|
185
|
+
|
|
186
|
+
md = re.sub(r"!\[([^\]]*)\]\(cid:([^)]+)\)", replace_cid_img, md)
|
|
187
|
+
return md
|
|
@@ -1,8 +1,13 @@
|
|
|
1
|
-
"""MSG (Outlook) 핸들러. extract-msg 라이브러리 사용.
|
|
1
|
+
"""MSG (Outlook) 핸들러. extract-msg 라이브러리 사용.
|
|
2
|
+
|
|
3
|
+
본문·CID·envelope 헤더 규약은 eml_handler 와 동일.
|
|
4
|
+
"""
|
|
2
5
|
from __future__ import annotations
|
|
3
6
|
|
|
7
|
+
import email as stdemail
|
|
4
8
|
import json
|
|
5
9
|
import os
|
|
10
|
+
import re
|
|
6
11
|
from pathlib import Path
|
|
7
12
|
|
|
8
13
|
from . import _common
|
|
@@ -15,18 +20,60 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
15
20
|
|
|
16
21
|
msg = extract_msg.Message(str(input_path))
|
|
17
22
|
try:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
}
|
|
23
|
+
raw_header = getattr(msg, "header", None) or ""
|
|
24
|
+
# extract-msg 일부 버전 header 는 EmailMessage 객체 — str() 로 정규화
|
|
25
|
+
if not isinstance(raw_header, str):
|
|
26
|
+
raw_header = str(raw_header)
|
|
27
|
+
|
|
28
|
+
# 모든 헤더 보존: msg.header 의 raw RFC822 파싱 + extract-msg 의 정형 필드 보강
|
|
29
|
+
headers: dict = {}
|
|
30
|
+
if raw_header:
|
|
31
|
+
parsed = stdemail.message_from_string(raw_header)
|
|
32
|
+
for key, val in parsed.items():
|
|
33
|
+
if key in headers:
|
|
34
|
+
existing = headers[key]
|
|
35
|
+
if isinstance(existing, list):
|
|
36
|
+
existing.append(val)
|
|
37
|
+
else:
|
|
38
|
+
headers[key] = [existing, val]
|
|
39
|
+
else:
|
|
40
|
+
headers[key] = val
|
|
41
|
+
# extract-msg 정형 필드 (raw header 없을 때 fallback)
|
|
42
|
+
if not headers:
|
|
43
|
+
headers = {
|
|
44
|
+
"From": msg.sender or "",
|
|
45
|
+
"To": msg.to or "",
|
|
46
|
+
"Cc": msg.cc or "",
|
|
47
|
+
"Bcc": getattr(msg, "bcc", "") or "",
|
|
48
|
+
"Subject": msg.subject or "",
|
|
49
|
+
"Date": str(msg.date) if msg.date else "",
|
|
50
|
+
}
|
|
25
51
|
_common.write_text(
|
|
26
52
|
out_dir / "headers.json",
|
|
27
53
|
json.dumps(headers, ensure_ascii=False, indent=2),
|
|
28
54
|
)
|
|
29
55
|
|
|
56
|
+
envelope_keys = [
|
|
57
|
+
"From", "To", "Cc", "Bcc", "Subject", "Date", "Message-ID",
|
|
58
|
+
"Reply-To", "In-Reply-To", "References",
|
|
59
|
+
]
|
|
60
|
+
readme_headers: dict = {}
|
|
61
|
+
for k in envelope_keys:
|
|
62
|
+
v = headers.get(k)
|
|
63
|
+
if v:
|
|
64
|
+
readme_headers[k] = v
|
|
65
|
+
# extract-msg 정형 필드로 envelope 보강 (raw header 에 없을 때)
|
|
66
|
+
if not readme_headers.get("From"):
|
|
67
|
+
readme_headers["From"] = msg.sender or ""
|
|
68
|
+
if not readme_headers.get("To"):
|
|
69
|
+
readme_headers["To"] = msg.to or ""
|
|
70
|
+
if not readme_headers.get("Cc"):
|
|
71
|
+
readme_headers["Cc"] = msg.cc or ""
|
|
72
|
+
if not readme_headers.get("Subject"):
|
|
73
|
+
readme_headers["Subject"] = msg.subject or ""
|
|
74
|
+
if not readme_headers.get("Date"):
|
|
75
|
+
readme_headers["Date"] = str(msg.date) if msg.date else ""
|
|
76
|
+
|
|
30
77
|
body_text = msg.body or ""
|
|
31
78
|
body_html_raw = getattr(msg, "htmlBody", None)
|
|
32
79
|
body_html: str | None = None
|
|
@@ -36,21 +83,10 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
36
83
|
else:
|
|
37
84
|
body_html = body_html_raw
|
|
38
85
|
|
|
39
|
-
body_inline = None
|
|
40
|
-
body_file_link = None
|
|
41
|
-
body_html_link = None
|
|
42
|
-
if body_text:
|
|
43
|
-
if len(body_text) < 1000:
|
|
44
|
-
body_inline = body_text
|
|
45
|
-
else:
|
|
46
|
-
_common.write_text(out_dir / "body.md", body_text)
|
|
47
|
-
body_file_link = "body.md"
|
|
48
|
-
if body_html:
|
|
49
|
-
_common.write_text(out_dir / "body.html", body_html)
|
|
50
|
-
body_html_link = "body.html"
|
|
51
|
-
|
|
52
86
|
attachments_dir = out_dir / "attachments"
|
|
53
87
|
attachment_links: list[str] = []
|
|
88
|
+
cid_map: dict[str, str] = {}
|
|
89
|
+
|
|
54
90
|
for att in msg.attachments:
|
|
55
91
|
_common.mkdir(attachments_dir)
|
|
56
92
|
filename = (
|
|
@@ -58,6 +94,13 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
58
94
|
or getattr(att, "shortFilename", None)
|
|
59
95
|
or "attachment.bin"
|
|
60
96
|
)
|
|
97
|
+
cid = (
|
|
98
|
+
getattr(att, "cid", None)
|
|
99
|
+
or getattr(att, "contentId", None)
|
|
100
|
+
or ""
|
|
101
|
+
)
|
|
102
|
+
if cid:
|
|
103
|
+
cid = str(cid).strip("<>")
|
|
61
104
|
data = att.data
|
|
62
105
|
if isinstance(data, str):
|
|
63
106
|
data = data.encode("utf-8")
|
|
@@ -65,24 +108,94 @@ def run(input_path: Path, out_dir: Path) -> None:
|
|
|
65
108
|
data = b""
|
|
66
109
|
dst = _common.unique_path(attachments_dir, filename)
|
|
67
110
|
_common.write_bytes(dst, data)
|
|
111
|
+
size = dst.stat().st_size
|
|
112
|
+
if cid:
|
|
113
|
+
cid_map[cid] = dst.name
|
|
68
114
|
recursed = maybe_recurse_attachment(dst, attachments_dir)
|
|
69
115
|
if recursed is not None:
|
|
70
116
|
os.unlink(_common.long_str(dst))
|
|
71
|
-
attachment_links.append(f"attachments/{recursed.name}/")
|
|
117
|
+
attachment_links.append(f"attachments/{recursed.name}/ ({_common.format_size(size)})")
|
|
72
118
|
else:
|
|
73
|
-
attachment_links.append(f"attachments/{dst.name}")
|
|
119
|
+
attachment_links.append(f"attachments/{dst.name} ({_common.format_size(size)})")
|
|
120
|
+
|
|
121
|
+
# body.md: text 우선, 없으면 HTML→평문
|
|
122
|
+
# body.from_html.md: text·HTML 둘 다 있을 때 HTML→평문 변환본 별도 (이미지 위치 placeholder)
|
|
123
|
+
if body_text:
|
|
124
|
+
body_md = body_text
|
|
125
|
+
elif body_html:
|
|
126
|
+
body_md = _html_to_md(body_html, cid_map)
|
|
127
|
+
else:
|
|
128
|
+
body_md = ""
|
|
129
|
+
|
|
130
|
+
body_file_link = None
|
|
131
|
+
body_html_link = None
|
|
132
|
+
body_from_html_link = None
|
|
133
|
+
if body_md:
|
|
134
|
+
_common.write_text(out_dir / "body.md", body_md)
|
|
135
|
+
body_file_link = "body.md"
|
|
136
|
+
if body_html:
|
|
137
|
+
_common.write_text(out_dir / "body.html", body_html)
|
|
138
|
+
body_html_link = "body.html"
|
|
139
|
+
if body_text:
|
|
140
|
+
from_html_md = _html_to_md(body_html, cid_map)
|
|
141
|
+
_common.write_text(out_dir / "body.from_html.md", from_html_md)
|
|
142
|
+
body_from_html_link = "body.from_html.md"
|
|
143
|
+
|
|
144
|
+
if cid_map:
|
|
145
|
+
rels = {cid: f"attachments/{fname}" for cid, fname in cid_map.items()}
|
|
146
|
+
_common.write_text(
|
|
147
|
+
out_dir / "images.rels.json",
|
|
148
|
+
json.dumps(rels, ensure_ascii=False, indent=2),
|
|
149
|
+
)
|
|
74
150
|
|
|
75
151
|
_common.write_readme(
|
|
76
152
|
out_dir,
|
|
77
153
|
source_name=input_path.name,
|
|
78
154
|
source_size=input_path.stat().st_size,
|
|
79
|
-
tool="extract-msg",
|
|
80
|
-
loss_notes=
|
|
81
|
-
|
|
155
|
+
tool="extract-msg + html2text",
|
|
156
|
+
loss_notes=(
|
|
157
|
+
"본문은 body.md (text 우선, 없으면 HTML→평문). "
|
|
158
|
+
"text·HTML 둘 다 있을 때 HTML→평문(인라인 이미지 위치 placeholder 포함)은 body.from_html.md 별도. "
|
|
159
|
+
"원본 HTML 은 body.html, CID↔첨부 매핑은 images.rels.json (인라인 이미지 있을 때)."
|
|
160
|
+
),
|
|
82
161
|
body_file_link=body_file_link,
|
|
83
162
|
body_html_link=body_html_link,
|
|
84
|
-
|
|
163
|
+
body_from_html_link=body_from_html_link,
|
|
164
|
+
headers=readme_headers,
|
|
85
165
|
attachments=attachment_links,
|
|
86
166
|
)
|
|
87
167
|
finally:
|
|
88
168
|
msg.close()
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _header_field(raw_header: str, key: str) -> str:
|
|
172
|
+
"""raw rfc822 header 문자열에서 특정 키 추출. 없으면 빈 문자열."""
|
|
173
|
+
if not raw_header:
|
|
174
|
+
return ""
|
|
175
|
+
try:
|
|
176
|
+
parsed = stdemail.message_from_string(raw_header)
|
|
177
|
+
return parsed.get(key, "") or ""
|
|
178
|
+
except Exception:
|
|
179
|
+
return ""
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _html_to_md(html: str, cid_map: dict[str, str]) -> str:
|
|
183
|
+
"""HTML 본문 → 평문 md 변환. cid: 이미지 src 는 첨부 파일명 placeholder 로 치환."""
|
|
184
|
+
_common.ensure_pip("html2text")
|
|
185
|
+
import html2text
|
|
186
|
+
|
|
187
|
+
h = html2text.HTML2Text()
|
|
188
|
+
h.body_width = 0
|
|
189
|
+
h.ignore_links = False
|
|
190
|
+
h.ignore_images = False
|
|
191
|
+
md = h.handle(html)
|
|
192
|
+
|
|
193
|
+
def replace_cid_img(m: re.Match) -> str:
|
|
194
|
+
alt, cid_value = m.group(1), m.group(2).strip()
|
|
195
|
+
fname = cid_map.get(cid_value) or cid_map.get(cid_value.split("@")[0])
|
|
196
|
+
if fname:
|
|
197
|
+
return f""
|
|
198
|
+
return f""
|
|
199
|
+
|
|
200
|
+
md = re.sub(r"!\[([^\]]*)\]\(cid:([^)]+)\)", replace_cid_img, md)
|
|
201
|
+
return md
|