@simplysm/sd-claude 13.0.77 → 13.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/claude/rules/sd-claude-rules.md +4 -63
- package/claude/rules/sd-simplysm-usage.md +7 -0
- package/claude/sd-session-start.sh +10 -0
- package/claude/skills/sd-api-review/SKILL.md +89 -0
- package/claude/skills/sd-check/SKILL.md +55 -57
- package/claude/skills/sd-commit/SKILL.md +37 -42
- package/claude/skills/sd-debug/SKILL.md +75 -265
- package/claude/skills/sd-document/SKILL.md +63 -53
- package/claude/skills/sd-document/_common.py +94 -0
- package/claude/skills/sd-document/extract_docx.py +19 -48
- package/claude/skills/sd-document/extract_pdf.py +22 -50
- package/claude/skills/sd-document/extract_pptx.py +17 -40
- package/claude/skills/sd-document/extract_xlsx.py +19 -40
- package/claude/skills/sd-email-analyze/SKILL.md +23 -31
- package/claude/skills/sd-email-analyze/email-analyzer.py +79 -65
- package/claude/skills/sd-init/SKILL.md +133 -0
- package/claude/skills/sd-plan/SKILL.md +69 -120
- package/claude/skills/sd-readme/SKILL.md +106 -131
- package/claude/skills/sd-review/SKILL.md +38 -155
- package/claude/skills/sd-simplify/SKILL.md +59 -0
- package/package.json +3 -2
- package/README.md +0 -297
- package/claude/refs/sd-angular.md +0 -127
- package/claude/refs/sd-code-conventions.md +0 -155
- package/claude/refs/sd-directories.md +0 -7
- package/claude/refs/sd-library-issue.md +0 -7
- package/claude/refs/sd-migration.md +0 -7
- package/claude/refs/sd-orm-v12.md +0 -81
- package/claude/refs/sd-orm.md +0 -23
- package/claude/refs/sd-service.md +0 -5
- package/claude/refs/sd-simplysm-docs.md +0 -52
- package/claude/refs/sd-solid.md +0 -68
- package/claude/refs/sd-workflow.md +0 -25
- package/claude/rules/sd-refs-linker.md +0 -52
- package/claude/sd-statusline.js +0 -296
- package/claude/skills/sd-api-name-review/SKILL.md +0 -154
- package/claude/skills/sd-brainstorm/SKILL.md +0 -215
- package/claude/skills/sd-debug/condition-based-waiting-example.ts +0 -158
- package/claude/skills/sd-debug/condition-based-waiting.md +0 -114
- package/claude/skills/sd-debug/defense-in-depth.md +0 -128
- package/claude/skills/sd-debug/find-polluter.sh +0 -64
- package/claude/skills/sd-debug/root-cause-tracing.md +0 -168
- package/claude/skills/sd-discuss/SKILL.md +0 -91
- package/claude/skills/sd-explore/SKILL.md +0 -118
- package/claude/skills/sd-plan-dev/SKILL.md +0 -294
- package/claude/skills/sd-plan-dev/code-quality-reviewer-prompt.md +0 -49
- package/claude/skills/sd-plan-dev/final-review-prompt.md +0 -50
- package/claude/skills/sd-plan-dev/implementer-prompt.md +0 -60
- package/claude/skills/sd-plan-dev/spec-reviewer-prompt.md +0 -45
- package/claude/skills/sd-review/api-reviewer-prompt.md +0 -75
- package/claude/skills/sd-review/code-reviewer-prompt.md +0 -82
- package/claude/skills/sd-review/convention-checker-prompt.md +0 -61
- package/claude/skills/sd-review/refactoring-analyzer-prompt.md +0 -92
- package/claude/skills/sd-skill/SKILL.md +0 -417
- package/claude/skills/sd-skill/anthropic-best-practices.md +0 -156
- package/claude/skills/sd-skill/cso-guide.md +0 -161
- package/claude/skills/sd-skill/examples/CLAUDE_MD_TESTING.md +0 -200
- package/claude/skills/sd-skill/persuasion-principles.md +0 -220
- package/claude/skills/sd-skill/testing-skills-with-subagents.md +0 -408
- package/claude/skills/sd-skill/writing-guide.md +0 -159
- package/claude/skills/sd-tdd/SKILL.md +0 -385
- package/claude/skills/sd-tdd/testing-anti-patterns.md +0 -317
- package/claude/skills/sd-use/SKILL.md +0 -67
- package/claude/skills/sd-worktree/SKILL.md +0 -78
|
@@ -2,24 +2,13 @@
|
|
|
2
2
|
"""Extract text and images from DOCX files in paragraph flow order."""
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
from _common import (
|
|
6
|
+
setup_encoding, make_output_paths, print_header, save_image,
|
|
7
|
+
ext_from_content_type, print_image_summary, run_cli,
|
|
8
|
+
normalize_cell, parse_heading_level,
|
|
9
|
+
)
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def ensure_packages():
|
|
15
|
-
packages = {"python-docx": "docx"}
|
|
16
|
-
for pip_name, import_name in packages.items():
|
|
17
|
-
try:
|
|
18
|
-
__import__(import_name)
|
|
19
|
-
except ImportError:
|
|
20
|
-
print(f"Installing package: {pip_name}...", file=sys.stderr)
|
|
21
|
-
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name],
|
|
22
|
-
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
11
|
+
setup_encoding()
|
|
23
12
|
|
|
24
13
|
|
|
25
14
|
def extract(file_path):
|
|
@@ -27,14 +16,12 @@ def extract(file_path):
|
|
|
27
16
|
from docx.oxml.ns import qn
|
|
28
17
|
|
|
29
18
|
doc = Document(file_path)
|
|
30
|
-
|
|
31
|
-
out_dir = Path(file_path).parent / f"{stem}_files"
|
|
19
|
+
fp, out_dir = make_output_paths(file_path)
|
|
32
20
|
img_idx = 0
|
|
33
21
|
|
|
34
|
-
|
|
22
|
+
print_header(fp)
|
|
35
23
|
|
|
36
|
-
for
|
|
37
|
-
has_image = False
|
|
24
|
+
for para in doc.paragraphs:
|
|
38
25
|
for run in para.runs:
|
|
39
26
|
drawings = run._element.findall(f".//{qn('wp:inline')}") + run._element.findall(f".//{qn('wp:anchor')}")
|
|
40
27
|
for drawing in drawings:
|
|
@@ -45,48 +32,32 @@ def extract(file_path):
|
|
|
45
32
|
rel = doc.part.rels.get(embed)
|
|
46
33
|
if rel:
|
|
47
34
|
img_idx += 1
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
ext = content_type.split("/")[-1].replace("jpeg", "jpg")
|
|
51
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
-
img_path = out_dir / f"img_{img_idx:03d}.{ext}"
|
|
53
|
-
img_path.write_bytes(blob)
|
|
35
|
+
ext = ext_from_content_type(rel.target_part.content_type)
|
|
36
|
+
img_path = save_image(out_dir, img_idx, rel.target_part.blob, ext)
|
|
54
37
|
print(f"[IMG] {img_path}")
|
|
55
|
-
has_image = True
|
|
56
38
|
|
|
57
39
|
text = para.text.strip()
|
|
58
40
|
if text:
|
|
59
41
|
style = para.style.name if para.style else ""
|
|
60
42
|
prefix = ""
|
|
61
43
|
if "Heading" in style:
|
|
62
|
-
level = style
|
|
63
|
-
|
|
64
|
-
prefix = "#" *
|
|
65
|
-
|
|
44
|
+
level = parse_heading_level(style)
|
|
45
|
+
if level is not None:
|
|
46
|
+
prefix = "#" * level + " "
|
|
47
|
+
else:
|
|
66
48
|
prefix = "## "
|
|
67
49
|
print(f"{prefix}{text}")
|
|
68
50
|
|
|
69
|
-
if has_image or text:
|
|
70
|
-
pass # already printed
|
|
71
|
-
elif not has_image and not text:
|
|
72
|
-
continue
|
|
73
|
-
|
|
74
51
|
# Table extraction
|
|
75
52
|
for t_idx, table in enumerate(doc.tables):
|
|
76
53
|
print(f"\n### Table {t_idx + 1}\n")
|
|
77
54
|
for row in table.rows:
|
|
78
|
-
cells = [cell.text
|
|
55
|
+
cells = [normalize_cell(cell.text) for cell in row.cells]
|
|
79
56
|
print("| " + " | ".join(cells) + " |")
|
|
80
57
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
else:
|
|
84
|
-
print("\n---\nNo images")
|
|
58
|
+
print()
|
|
59
|
+
print_image_summary(img_idx, out_dir)
|
|
85
60
|
|
|
86
61
|
|
|
87
62
|
if __name__ == "__main__":
|
|
88
|
-
|
|
89
|
-
print("Usage: python extract_docx.py <file.docx>", file=sys.stderr)
|
|
90
|
-
sys.exit(1)
|
|
91
|
-
ensure_packages()
|
|
92
|
-
extract(sys.argv[1])
|
|
63
|
+
run_cli(extract, "extract_docx.py", {"python-docx": "docx"})
|
|
@@ -2,35 +2,22 @@
|
|
|
2
2
|
"""Extract text, tables, and images from PDF files page by page."""
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
from _common import (
|
|
6
|
+
setup_encoding, make_output_paths, print_header, save_image,
|
|
7
|
+
print_image_summary, run_cli, normalize_cell,
|
|
8
|
+
)
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def ensure_packages():
|
|
14
|
-
packages = {"pdfplumber": "pdfplumber", "pypdf": "pypdf"}
|
|
15
|
-
for pip_name, import_name in packages.items():
|
|
16
|
-
try:
|
|
17
|
-
__import__(import_name)
|
|
18
|
-
except ImportError:
|
|
19
|
-
print(f"Installing package: {pip_name}...", file=sys.stderr)
|
|
20
|
-
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name],
|
|
21
|
-
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
10
|
+
setup_encoding()
|
|
22
11
|
|
|
23
12
|
|
|
24
13
|
def extract(file_path):
|
|
25
14
|
import pdfplumber
|
|
26
15
|
from pypdf import PdfReader
|
|
27
16
|
|
|
28
|
-
|
|
29
|
-
out_dir = Path(file_path).parent / f"{stem}_files"
|
|
17
|
+
fp, out_dir = make_output_paths(file_path)
|
|
30
18
|
img_idx = 0
|
|
31
|
-
total_text_len = 0
|
|
32
19
|
|
|
33
|
-
|
|
20
|
+
print_header(fp)
|
|
34
21
|
|
|
35
22
|
# Text + table extraction (pdfplumber)
|
|
36
23
|
with pdfplumber.open(file_path) as pdf:
|
|
@@ -38,17 +25,18 @@ def extract(file_path):
|
|
|
38
25
|
print(f"## Page {page_num}\n")
|
|
39
26
|
|
|
40
27
|
text = page.extract_text()
|
|
41
|
-
if text
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
28
|
+
if text:
|
|
29
|
+
stripped = text.strip()
|
|
30
|
+
if stripped:
|
|
31
|
+
print(stripped)
|
|
32
|
+
print()
|
|
45
33
|
|
|
46
34
|
tables = page.extract_tables()
|
|
47
35
|
for t_idx, table in enumerate(tables):
|
|
48
36
|
if table:
|
|
49
37
|
print(f"### Table {t_idx + 1}\n")
|
|
50
38
|
for row in table:
|
|
51
|
-
cells = [(c
|
|
39
|
+
cells = [normalize_cell(c) for c in row]
|
|
52
40
|
print("| " + " | ".join(cells) + " |")
|
|
53
41
|
print()
|
|
54
42
|
|
|
@@ -65,38 +53,22 @@ def extract(file_path):
|
|
|
65
53
|
filters = obj.get("/Filter", "")
|
|
66
54
|
if isinstance(filters, list):
|
|
67
55
|
filters = filters[0] if filters else ""
|
|
56
|
+
filter_str = str(filters)
|
|
68
57
|
ext = "png"
|
|
69
|
-
if "/DCTDecode" in
|
|
58
|
+
if "/DCTDecode" in filter_str:
|
|
70
59
|
ext = "jpg"
|
|
71
|
-
elif "/JPXDecode" in
|
|
60
|
+
elif "/JPXDecode" in filter_str:
|
|
72
61
|
ext = "jp2"
|
|
73
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
74
|
-
img_path = out_dir / f"img_{img_idx:03d}.{ext}"
|
|
75
62
|
try:
|
|
76
|
-
img_path
|
|
77
|
-
except Exception:
|
|
78
|
-
|
|
79
|
-
img_path
|
|
63
|
+
img_path = save_image(out_dir, img_idx, obj.get_data(), ext)
|
|
64
|
+
except Exception as exc:
|
|
65
|
+
print(f"Warning: failed to decode image {img_idx}: {exc}", file=sys.stderr)
|
|
66
|
+
img_path = save_image(out_dir, img_idx, obj._data if hasattr(obj, "_data") else b"", "bin")
|
|
80
67
|
print(f"[IMG] (page={page_num}) {img_path}")
|
|
81
68
|
|
|
82
|
-
# OCR notice
|
|
83
|
-
if total_text_len == 0:
|
|
84
|
-
print("\n⚠ No text was extracted (may be a scanned PDF).")
|
|
85
|
-
print("OCR is required:")
|
|
86
|
-
print(" 1. Install Tesseract OCR: https://github.com/tesseract-ocr/tesseract")
|
|
87
|
-
print(" 2. pip install pytesseract pdf2image")
|
|
88
|
-
print(" 3. Extract with pytesseract.image_to_string()")
|
|
89
|
-
|
|
90
69
|
print()
|
|
91
|
-
|
|
92
|
-
print(f"---\n{img_idx} image(s) saved: {out_dir}")
|
|
93
|
-
else:
|
|
94
|
-
print("---\nNo images")
|
|
70
|
+
print_image_summary(img_idx, out_dir)
|
|
95
71
|
|
|
96
72
|
|
|
97
73
|
if __name__ == "__main__":
|
|
98
|
-
|
|
99
|
-
print("Usage: python extract_pdf.py <file.pdf>", file=sys.stderr)
|
|
100
|
-
sys.exit(1)
|
|
101
|
-
ensure_packages()
|
|
102
|
-
extract(sys.argv[1])
|
|
74
|
+
run_cli(extract, "extract_pdf.py", {"pdfplumber": "pdfplumber", "pypdf": "pypdf"})
|
|
@@ -2,23 +2,12 @@
|
|
|
2
2
|
"""Extract text and images from PPTX files with per-slide coordinates."""
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
from _common import (
|
|
6
|
+
setup_encoding, make_output_paths, print_header, save_image,
|
|
7
|
+
ext_from_content_type, print_image_summary, run_cli,
|
|
8
|
+
)
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
def ensure_packages():
|
|
14
|
-
packages = {"python-pptx": "pptx"}
|
|
15
|
-
for pip_name, import_name in packages.items():
|
|
16
|
-
try:
|
|
17
|
-
__import__(import_name)
|
|
18
|
-
except ImportError:
|
|
19
|
-
print(f"Installing package: {pip_name}...", file=sys.stderr)
|
|
20
|
-
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name],
|
|
21
|
-
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
10
|
+
setup_encoding()
|
|
22
11
|
|
|
23
12
|
|
|
24
13
|
def emu_to_inches(emu):
|
|
@@ -28,50 +17,38 @@ def emu_to_inches(emu):
|
|
|
28
17
|
return f"{emu / 914400:.1f}"
|
|
29
18
|
|
|
30
19
|
|
|
20
|
+
def _pos(shape):
|
|
21
|
+
return f"(left={emu_to_inches(shape.left)}\", top={emu_to_inches(shape.top)}\")"
|
|
22
|
+
|
|
23
|
+
|
|
31
24
|
def extract(file_path):
|
|
32
25
|
from pptx import Presentation
|
|
33
26
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
34
27
|
|
|
35
28
|
prs = Presentation(file_path)
|
|
36
|
-
|
|
37
|
-
out_dir = Path(file_path).parent / f"{stem}_files"
|
|
29
|
+
fp, out_dir = make_output_paths(file_path)
|
|
38
30
|
img_idx = 0
|
|
39
31
|
|
|
40
|
-
|
|
32
|
+
print_header(fp)
|
|
41
33
|
|
|
42
34
|
for slide_num, slide in enumerate(prs.slides, 1):
|
|
43
35
|
print(f"## Slide {slide_num}\n")
|
|
44
36
|
|
|
45
37
|
for shape in slide.shapes:
|
|
46
|
-
left = emu_to_inches(shape.left)
|
|
47
|
-
top = emu_to_inches(shape.top)
|
|
48
|
-
pos = f"(left={left}\", top={top}\")"
|
|
49
|
-
|
|
50
38
|
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
|
|
51
39
|
img_idx += 1
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
56
|
-
img_path = out_dir / f"img_{img_idx:03d}.{ext}"
|
|
57
|
-
img_path.write_bytes(blob)
|
|
58
|
-
print(f"[IMG] {pos} {img_path}")
|
|
40
|
+
ext = ext_from_content_type(shape.image.content_type)
|
|
41
|
+
img_path = save_image(out_dir, img_idx, shape.image.blob, ext)
|
|
42
|
+
print(f"[IMG] {_pos(shape)} {img_path}")
|
|
59
43
|
|
|
60
44
|
elif hasattr(shape, "text") and shape.text.strip():
|
|
61
45
|
text = shape.text.strip().replace("\n", "\n ")
|
|
62
|
-
print(f"[TXT] {
|
|
46
|
+
print(f"[TXT] {_pos(shape)} {text}")
|
|
63
47
|
|
|
64
48
|
print()
|
|
65
49
|
|
|
66
|
-
|
|
67
|
-
print(f"---\n{img_idx} image(s) saved: {out_dir}")
|
|
68
|
-
else:
|
|
69
|
-
print("---\nNo images")
|
|
50
|
+
print_image_summary(img_idx, out_dir)
|
|
70
51
|
|
|
71
52
|
|
|
72
53
|
if __name__ == "__main__":
|
|
73
|
-
|
|
74
|
-
print("Usage: python extract_pptx.py <file.pptx>", file=sys.stderr)
|
|
75
|
-
sys.exit(1)
|
|
76
|
-
ensure_packages()
|
|
77
|
-
extract(sys.argv[1])
|
|
54
|
+
run_cli(extract, "extract_pptx.py", {"python-pptx": "pptx"})
|
|
@@ -2,36 +2,22 @@
|
|
|
2
2
|
"""Extract data and images from XLSX files with cell positions."""
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
from _common import (
|
|
6
|
+
setup_encoding, make_output_paths, print_header, save_image,
|
|
7
|
+
print_image_summary, run_cli,
|
|
8
|
+
)
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace")
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def ensure_packages():
|
|
15
|
-
packages = {"openpyxl": "openpyxl"}
|
|
16
|
-
for pip_name, import_name in packages.items():
|
|
17
|
-
try:
|
|
18
|
-
__import__(import_name)
|
|
19
|
-
except ImportError:
|
|
20
|
-
print(f"Installing package: {pip_name}...", file=sys.stderr)
|
|
21
|
-
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name],
|
|
22
|
-
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
10
|
+
setup_encoding()
|
|
23
11
|
|
|
24
12
|
|
|
25
13
|
def extract(file_path):
|
|
26
14
|
from openpyxl import load_workbook
|
|
27
|
-
from openpyxl.drawing.image import Image as XlImage
|
|
28
15
|
|
|
29
16
|
wb = load_workbook(file_path, data_only=True)
|
|
30
|
-
|
|
31
|
-
out_dir = Path(file_path).parent / f"{stem}_files"
|
|
17
|
+
fp, out_dir = make_output_paths(file_path)
|
|
32
18
|
img_idx = 0
|
|
33
19
|
|
|
34
|
-
|
|
20
|
+
print_header(fp)
|
|
35
21
|
|
|
36
22
|
for sheet_name in wb.sheetnames:
|
|
37
23
|
ws = wb[sheet_name]
|
|
@@ -51,33 +37,26 @@ def extract(file_path):
|
|
|
51
37
|
cells.append("")
|
|
52
38
|
else:
|
|
53
39
|
cells.append(str(val).strip())
|
|
54
|
-
print(f"[{row[0].
|
|
40
|
+
print(f"[{row[0].column_letter}{row[0].row}] " + " | ".join(cells))
|
|
55
41
|
|
|
56
42
|
# Image extraction
|
|
57
|
-
|
|
58
|
-
|
|
43
|
+
images = getattr(ws, '_images', [])
|
|
44
|
+
if images:
|
|
45
|
+
for img in images:
|
|
59
46
|
img_idx += 1
|
|
60
47
|
anchor = ""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
f.write(img._data())
|
|
48
|
+
anchor_from = getattr(getattr(img, 'anchor', None), '_from', None)
|
|
49
|
+
if anchor_from is not None:
|
|
50
|
+
anchor = f" (near {anchor_from.col},{anchor_from.row})"
|
|
51
|
+
data_fn = getattr(img, '_data', None)
|
|
52
|
+
blob = data_fn() if callable(data_fn) else b""
|
|
53
|
+
img_path = save_image(out_dir, img_idx, blob, "png")
|
|
68
54
|
print(f"[IMG]{anchor} {img_path}")
|
|
69
55
|
|
|
70
56
|
print()
|
|
71
57
|
|
|
72
|
-
|
|
73
|
-
print(f"---\n{img_idx} image(s) saved: {out_dir}")
|
|
74
|
-
else:
|
|
75
|
-
print("---\nNo images")
|
|
58
|
+
print_image_summary(img_idx, out_dir)
|
|
76
59
|
|
|
77
60
|
|
|
78
61
|
if __name__ == "__main__":
|
|
79
|
-
|
|
80
|
-
print("Usage: python extract_xlsx.py <file.xlsx>", file=sys.stderr)
|
|
81
|
-
sys.exit(1)
|
|
82
|
-
ensure_packages()
|
|
83
|
-
extract(sys.argv[1])
|
|
62
|
+
run_cli(extract, "extract_xlsx.py", {"openpyxl": "openpyxl"})
|
|
@@ -1,52 +1,44 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: sd-email-analyze
|
|
3
|
-
description:
|
|
4
|
-
model: haiku
|
|
3
|
+
description: .eml 또는 .msg 파일과 관련하여 "이메일 파일 분석", "이메일 내용 추출", "첨부파일 추출", "이메일 요약"을 요청할 때 사용.
|
|
5
4
|
---
|
|
6
5
|
|
|
7
|
-
# Email
|
|
6
|
+
# SD Email Analyze — 이메일 파일 분석 및 내용 추출
|
|
8
7
|
|
|
9
|
-
|
|
8
|
+
`.eml` 및 `.msg`(Outlook) 이메일 파일을 파싱하여 메일 헤더, 본문 텍스트, 인라인 이미지, 첨부파일을 추출하고 분석한다.
|
|
10
9
|
|
|
11
|
-
|
|
10
|
+
ARGUMENTS: 이메일 파일 경로 (필수). `.eml` 또는 `.msg` 파일 경로를 지정한다.
|
|
12
11
|
|
|
13
|
-
|
|
12
|
+
---
|
|
14
13
|
|
|
15
|
-
|
|
16
|
-
- Email content needs proper decoding
|
|
14
|
+
## Step 1: 이메일 파일 파싱
|
|
17
15
|
|
|
18
|
-
|
|
16
|
+
ARGUMENTS에서 이메일 파일 경로를 추출하여 아래 명령을 실행하라:
|
|
19
17
|
|
|
20
18
|
```bash
|
|
21
|
-
python .claude/skills/sd-email-analyze/email-analyzer.py
|
|
19
|
+
python .claude/skills/sd-email-analyze/email-analyzer.py <이메일_파일_경로>
|
|
22
20
|
```
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
### After Running
|
|
22
|
+
- 최초 실행 시 `extract-msg`를 자동 설치한다.
|
|
23
|
+
- 실행 결과로 마크다운 보고서가 표준 출력되며, `<이메일_파일명>_files/` 디렉토리에 추출된 파일이 저장된다.
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
2. **Inline images**: Use **Read** tool on each saved path to view
|
|
30
|
-
3. **Attachments**: Use **Read** tool (images) or **sd-document** skill scripts (DOCX, XLSX, PPTX, PDF)
|
|
25
|
+
### 출력 구조
|
|
31
26
|
|
|
32
|
-
|
|
27
|
+
1. **메일 정보 테이블**: 제목, 보낸 사람, 받는 사람, 참조, 날짜, 개수
|
|
28
|
+
2. **본문 텍스트**: 일반 텍스트 (일반 텍스트가 없으면 HTML에서 태그 제거)
|
|
29
|
+
3. **인라인 이미지**: 저장된 파일 경로 테이블
|
|
30
|
+
4. **첨부파일**: 저장된 파일 경로 테이블
|
|
33
31
|
|
|
34
|
-
|
|
35
|
-
- Markdown report to stdout:
|
|
36
|
-
1. **Mail info table**: Subject, From, To, Cc, Date, counts
|
|
37
|
-
2. **Body text**: Plain text (HTML stripped if no plain text)
|
|
38
|
-
3. **Inline images**: Table with saved file paths
|
|
39
|
-
4. **Attachments**: Table with saved file paths
|
|
32
|
+
## Step 2: 추출된 파일 분석
|
|
40
33
|
|
|
41
|
-
|
|
34
|
+
Step 1의 출력에서 추출된 파일 경로를 확인하고 아래를 수행하라:
|
|
42
35
|
|
|
43
|
-
|
|
36
|
+
1. **인라인 이미지**: 저장된 각 경로에 **Read** 도구를 사용하여 확인
|
|
37
|
+
2. **첨부파일**: **Read** 도구(이미지) 또는 **sd-document** 스킬 스크립트(DOCX, XLSX, PPTX, PDF) 사용
|
|
44
38
|
|
|
45
|
-
|
|
46
|
-
2. **Data URI images**: Base64-encoded images in HTML (`data:image/...;base64,...`)
|
|
39
|
+
### 인라인 이미지 처리
|
|
47
40
|
|
|
48
|
-
|
|
41
|
+
두 가지 소스에서 추출된다:
|
|
49
42
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
- **Forgetting inline images**: Always check the "Inline images" section and read each path
|
|
43
|
+
1. **CID 이미지**: Content-ID가 있는 MIME 파트 (HTML의 `cid:` 참조)
|
|
44
|
+
2. **Data URI 이미지**: HTML 내 Base64 인코딩 이미지 (`data:image/...;base64,...`)
|