devlyn-cli 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/bin/devlyn.js +1 -0
  2. package/optional-skills/dokkit/ANALYSIS.md +198 -0
  3. package/optional-skills/dokkit/COMMANDS.md +365 -0
  4. package/optional-skills/dokkit/DOCX-XML.md +76 -0
  5. package/optional-skills/dokkit/EXPORT.md +102 -0
  6. package/optional-skills/dokkit/FILLING.md +377 -0
  7. package/optional-skills/dokkit/HWPX-XML.md +73 -0
  8. package/optional-skills/dokkit/IMAGE-SOURCING.md +127 -0
  9. package/optional-skills/dokkit/INGESTION.md +65 -0
  10. package/optional-skills/dokkit/SKILL.md +153 -0
  11. package/optional-skills/dokkit/STATE.md +60 -0
  12. package/optional-skills/dokkit/references/docx-field-patterns.md +151 -0
  13. package/optional-skills/dokkit/references/docx-structure.md +58 -0
  14. package/optional-skills/dokkit/references/field-detection-patterns.md +130 -0
  15. package/optional-skills/dokkit/references/hwpx-field-patterns.md +461 -0
  16. package/optional-skills/dokkit/references/hwpx-structure.md +159 -0
  17. package/optional-skills/dokkit/references/image-opportunity-heuristics.md +121 -0
  18. package/optional-skills/dokkit/references/image-xml-patterns.md +338 -0
  19. package/optional-skills/dokkit/references/section-image-interleaving.md +346 -0
  20. package/optional-skills/dokkit/references/section-range-detection.md +118 -0
  21. package/optional-skills/dokkit/references/state-schema.md +143 -0
  22. package/optional-skills/dokkit/references/supported-formats.md +67 -0
  23. package/optional-skills/dokkit/scripts/compile_hwpx.py +134 -0
  24. package/optional-skills/dokkit/scripts/detect_fields.py +301 -0
  25. package/optional-skills/dokkit/scripts/detect_fields_hwpx.py +286 -0
  26. package/optional-skills/dokkit/scripts/export_pdf.py +99 -0
  27. package/optional-skills/dokkit/scripts/parse_hwpx.py +185 -0
  28. package/optional-skills/dokkit/scripts/parse_image_with_gemini.py +159 -0
  29. package/optional-skills/dokkit/scripts/parse_xlsx.py +98 -0
  30. package/optional-skills/dokkit/scripts/source_images.py +365 -0
  31. package/optional-skills/dokkit/scripts/validate_docx.py +142 -0
  32. package/optional-skills/dokkit/scripts/validate_hwpx.py +281 -0
  33. package/optional-skills/dokkit/scripts/validate_state.py +132 -0
  34. package/package.json +1 -1
@@ -0,0 +1,286 @@
1
+ #!/usr/bin/env python3
2
+ """Detect fillable fields in an HWPX section XML file.
3
+
4
+ Usage:
5
+ python detect_fields_hwpx.py <path-to-section.xml>
6
+
7
+ Output:
8
+ JSON array of detected fields to stdout.
9
+ """
10
+
11
+ import json
12
+ import re
13
+ import sys
14
+ import xml.etree.ElementTree as ET
15
+ from pathlib import Path
16
+
17
+
18
+ NS = {
19
+ "hp": "http://www.hancom.co.kr/hwpml/2011/paragraph",
20
+ "hs": "http://www.hancom.co.kr/hwpml/2011/section",
21
+ "hc": "http://www.hancom.co.kr/hwpml/2011/common",
22
+ }
23
+
24
+ # Keywords that indicate image fields (Korean and English)
25
+ IMAGE_KEYWORDS_KO = ["사진", "증명사진", "여권사진", "로고", "서명", "날인", "도장", "직인"]
26
+ IMAGE_KEYWORDS_EN = ["photo", "picture", "logo", "signature", "stamp", "seal", "image", "portrait"]
27
+ IMAGE_KEYWORDS = IMAGE_KEYWORDS_KO + IMAGE_KEYWORDS_EN
28
+
29
+ # Map keywords to image_type classifier
30
+ IMAGE_TYPE_MAP = {
31
+ "사진": "photo", "증명사진": "photo", "여권사진": "photo",
32
+ "photo": "photo", "picture": "photo", "portrait": "photo", "image": "photo",
33
+ "로고": "logo", "logo": "logo",
34
+ "서명": "signature", "날인": "signature", "stamp": "signature", "seal": "signature",
35
+ "도장": "signature", "직인": "signature",
36
+ }
37
+
38
+
39
+ def get_text(elem) -> str:
40
+ """Extract all text from an element and its children."""
41
+ texts = []
42
+ for t in elem.iter("{%s}t" % NS["hp"]):
43
+ if t.text:
44
+ texts.append(t.text)
45
+ return "".join(texts)
46
+
47
+
48
+ def _classify_image_type(text: str) -> str:
49
+ """Classify image type from text. Returns photo/logo/signature/figure."""
50
+ lower = text.lower().strip()
51
+ for keyword, img_type in IMAGE_TYPE_MAP.items():
52
+ if keyword in lower:
53
+ return img_type
54
+ return "figure"
55
+
56
+
57
+ def _is_image_keyword(text: str) -> bool:
58
+ """Check if text contains an image-related keyword."""
59
+ lower = text.lower().strip()
60
+ return any(kw in lower for kw in IMAGE_KEYWORDS)
61
+
62
+
63
+ def detect_empty_table_cells(root) -> list[dict]:
64
+ """Find empty table cells adjacent to label cells in HWPX tables (excluding image keywords)."""
65
+ fields = []
66
+
67
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["hp"])):
68
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["hp"])):
69
+ cells = list(tr.iter("{%s}tc" % NS["hp"]))
70
+ for ci in range(len(cells) - 1):
71
+ label_text = get_text(cells[ci]).strip()
72
+ next_text = get_text(cells[ci + 1]).strip()
73
+
74
+ if label_text and not next_text and len(label_text) < 50:
75
+ # Skip image keywords — handled by detect_image_fields
76
+ if _is_image_keyword(label_text):
77
+ continue
78
+ fields.append({
79
+ "label": label_text,
80
+ "field_type": "empty_cell",
81
+ "pattern": "(empty cell)",
82
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci + 1}]",
83
+ })
84
+ return fields
85
+
86
+
87
+ def detect_instruction_text(root) -> list[dict]:
88
+ """Find Korean instruction text patterns."""
89
+ fields = []
90
+ pattern = re.compile(
91
+ r"\(.*?(?:입력|기재|작성|enter|type|fill).*?\)",
92
+ re.IGNORECASE
93
+ )
94
+
95
+ for i, p in enumerate(root.iter("{%s}p" % NS["hp"])):
96
+ text = get_text(p)
97
+ for match in pattern.finditer(text):
98
+ fields.append({
99
+ "label": match.group(0).strip("()"),
100
+ "field_type": "instruction_text",
101
+ "pattern": match.group(0),
102
+ "xml_path": f"p[{i}]",
103
+ })
104
+ return fields
105
+
106
+
107
+ def detect_placeholder_text(root) -> list[dict]:
108
+ """Find {{placeholder}} patterns in HWPX (excluding image keywords)."""
109
+ fields = []
110
+ pattern = re.compile(r"\{\{([^}]+)\}\}|<<([^>]+)>>")
111
+
112
+ for i, p in enumerate(root.iter("{%s}p" % NS["hp"])):
113
+ text = get_text(p)
114
+ for match in pattern.finditer(text):
115
+ label = match.group(1) or match.group(2)
116
+ # Skip image keywords — handled by detect_image_fields
117
+ if _is_image_keyword(label):
118
+ continue
119
+ fields.append({
120
+ "label": label.strip(),
121
+ "field_type": "placeholder_text",
122
+ "pattern": match.group(0),
123
+ "xml_path": f"p[{i}]",
124
+ })
125
+ return fields
126
+
127
+
128
+ def detect_image_fields(root) -> list[dict]:
129
+ """Detect image placeholders in an HWPX section XML.
130
+
131
+ Detects:
132
+ - Existing <hp:pic> elements in table cells (pre-positioned image slots)
133
+ - Image placeholder text: {{photo}}, {{사진}}, <<signature>>, etc.
134
+ - Empty cells adjacent to image-keyword labels
135
+ """
136
+ fields = []
137
+ placeholder_pattern = re.compile(r"\{\{([^}]+)\}\}|<<([^>]+)>>")
138
+
139
+ # 1. Detect image placeholder text
140
+ for i, p in enumerate(root.iter("{%s}p" % NS["hp"])):
141
+ text = get_text(p)
142
+ for match in placeholder_pattern.finditer(text):
143
+ label = match.group(1) or match.group(2)
144
+ if _is_image_keyword(label):
145
+ fields.append({
146
+ "label": label.strip(),
147
+ "field_type": "image",
148
+ "image_type": _classify_image_type(label),
149
+ "pattern": match.group(0),
150
+ "xml_path": f"p[{i}]",
151
+ })
152
+
153
+ # 2. Detect existing <hp:pic> elements in table cells
154
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["hp"])):
155
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["hp"])):
156
+ cells = list(tr.iter("{%s}tc" % NS["hp"]))
157
+ for ci, cell in enumerate(cells):
158
+ pics = list(cell.iter("{%s}pic" % NS["hp"]))
159
+ if pics:
160
+ label_text = ""
161
+ if ci > 0:
162
+ label_text = get_text(cells[ci - 1]).strip()
163
+ if not _is_image_keyword(label_text) and ci + 1 < len(cells):
164
+ label_text = get_text(cells[ci + 1]).strip()
165
+ if not _is_image_keyword(label_text):
166
+ label_text = "image_placeholder"
167
+
168
+ fields.append({
169
+ "label": label_text,
170
+ "field_type": "image",
171
+ "image_type": _classify_image_type(label_text),
172
+ "pattern": "(existing pic)",
173
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci}]",
174
+ })
175
+
176
+ # 3. Detect empty cells adjacent to image-keyword labels
177
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["hp"])):
178
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["hp"])):
179
+ cells = list(tr.iter("{%s}tc" % NS["hp"]))
180
+ for ci in range(len(cells) - 1):
181
+ label_text = get_text(cells[ci]).strip()
182
+ next_text = get_text(cells[ci + 1]).strip()
183
+
184
+ if _is_image_keyword(label_text) and not next_text:
185
+ has_pic = bool(list(cells[ci + 1].iter("{%s}pic" % NS["hp"])))
186
+ if not has_pic:
187
+ fields.append({
188
+ "label": label_text,
189
+ "field_type": "image",
190
+ "image_type": _classify_image_type(label_text),
191
+ "pattern": "(empty cell, image label)",
192
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci + 1}]",
193
+ })
194
+
195
+ return fields
196
+
197
+
198
+ def _build_nested_tip_set(root) -> set:
199
+ """Build set of table element IDs that are nested inside subList elements."""
200
+ nested = set()
201
+ for tbl in root.iter("{%s}tbl" % NS["hp"]):
202
+ for sub_list in tbl.iter("{%s}subList" % NS["hp"]):
203
+ for nested_tbl in sub_list.iter("{%s}tbl" % NS["hp"]):
204
+ nested.add(id(nested_tbl))
205
+ return nested
206
+
207
+
208
+ def detect_tip_boxes(root) -> list[dict]:
209
+ """Detect writing tip boxes (작성 팁) — 1×1 tables with ※ guidance text."""
210
+ fields = []
211
+ tip_pattern = re.compile(r"^※|작성\s?팁|작성\s?요령")
212
+ nested_ids = _build_nested_tip_set(root)
213
+
214
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["hp"])):
215
+ if tbl.get("rowCnt", "") != "1" or tbl.get("colCnt", "") != "1":
216
+ continue
217
+
218
+ text = get_text(tbl).strip()
219
+ if not text or not tip_pattern.search(text):
220
+ continue
221
+
222
+ container = "nested" if id(tbl) in nested_ids else "standalone"
223
+ fields.append({
224
+ "label": text[:60] + ("..." if len(text) > 60 else ""),
225
+ "field_type": "tip_box",
226
+ "action": "delete",
227
+ "container": container,
228
+ "pattern": "(tip box: 1×1 table with ※ text)",
229
+ "xml_path": f"tbl[{ti}]",
230
+ })
231
+
232
+ return fields
233
+
234
+
235
+ def detect_date_fields(root) -> list[dict]:
236
+ """Find date component cells (cells before 년/월/일 markers)."""
237
+ fields = []
238
+
239
+ for ti, tbl in enumerate(root.iter("{%s}tbl" % NS["hp"])):
240
+ for ri, tr in enumerate(tbl.iter("{%s}tr" % NS["hp"])):
241
+ cells = list(tr.iter("{%s}tc" % NS["hp"]))
242
+ for ci, cell in enumerate(cells):
243
+ text = get_text(cell).strip()
244
+ if text in ("년", "월", "일") and ci > 0:
245
+ prev_text = get_text(cells[ci - 1]).strip()
246
+ if not prev_text:
247
+ date_part = {"년": "year", "월": "month", "일": "day"}[text]
248
+ fields.append({
249
+ "label": date_part,
250
+ "field_type": "empty_cell",
251
+ "pattern": f"(date: {date_part})",
252
+ "xml_path": f"tbl[{ti}]/tr[{ri}]/tc[{ci - 1}]",
253
+ })
254
+ return fields
255
+
256
+
257
+ def main():
258
+ if len(sys.argv) != 2:
259
+ print("Usage: python detect_fields_hwpx.py <section.xml>", file=sys.stderr)
260
+ sys.exit(1)
261
+
262
+ path = Path(sys.argv[1])
263
+ if not path.exists():
264
+ print(json.dumps({"error": f"File not found: {path}"}))
265
+ sys.exit(1)
266
+
267
+ tree = ET.parse(path)
268
+ root = tree.getroot()
269
+
270
+ all_fields = []
271
+ all_fields.extend(detect_empty_table_cells(root))
272
+ all_fields.extend(detect_instruction_text(root))
273
+ all_fields.extend(detect_placeholder_text(root))
274
+ all_fields.extend(detect_date_fields(root))
275
+ all_fields.extend(detect_image_fields(root))
276
+ all_fields.extend(detect_tip_boxes(root))
277
+
278
+ # Assign IDs
279
+ for i, field in enumerate(all_fields):
280
+ field["id"] = f"field_{i + 1:03d}"
281
+
282
+ print(json.dumps(all_fields, ensure_ascii=False, indent=2))
283
+
284
+
285
+ if __name__ == "__main__":
286
+ main()
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env python3
2
+ """Export a document to PDF using LibreOffice.
3
+
4
+ Usage:
5
+ python export_pdf.py <input_file> <output.pdf>
6
+
7
+ Requires:
8
+ LibreOffice installed and 'soffice' in PATH.
9
+ """
10
+
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ import tempfile
15
+ from pathlib import Path
16
+
17
+
18
+ def find_soffice() -> str:
19
+ """Find the LibreOffice soffice executable."""
20
+ # Check PATH
21
+ soffice = shutil.which("soffice")
22
+ if soffice:
23
+ return soffice
24
+
25
+ # Common locations
26
+ candidates = [
27
+ r"C:\Program Files\LibreOffice\program\soffice.exe",
28
+ r"C:\Program Files (x86)\LibreOffice\program\soffice.exe",
29
+ "/usr/bin/soffice",
30
+ "/usr/local/bin/soffice",
31
+ "/Applications/LibreOffice.app/Contents/MacOS/soffice",
32
+ ]
33
+ for path in candidates:
34
+ if Path(path).exists():
35
+ return path
36
+
37
+ return ""
38
+
39
+
40
+ def export_pdf(input_file: str, output_file: str) -> str:
41
+ """Convert a document to PDF using LibreOffice."""
42
+ soffice = find_soffice()
43
+ if not soffice:
44
+ print("Error: LibreOffice not found. Install it or add 'soffice' to PATH.",
45
+ file=sys.stderr)
46
+ sys.exit(1)
47
+
48
+ input_path = Path(input_file).resolve()
49
+ output_path = Path(output_file).resolve()
50
+
51
+ if not input_path.exists():
52
+ print(f"Error: Input file not found: {input_path}", file=sys.stderr)
53
+ sys.exit(1)
54
+
55
+ # Use a temp directory for LibreOffice output
56
+ with tempfile.TemporaryDirectory() as tmpdir:
57
+ cmd = [
58
+ soffice,
59
+ "--headless",
60
+ "--convert-to", "pdf",
61
+ "--outdir", tmpdir,
62
+ str(input_path),
63
+ ]
64
+
65
+ try:
66
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
67
+ except subprocess.TimeoutExpired:
68
+ print("Error: LibreOffice conversion timed out", file=sys.stderr)
69
+ sys.exit(1)
70
+
71
+ if result.returncode != 0:
72
+ print(f"Error: LibreOffice conversion failed: {result.stderr}", file=sys.stderr)
73
+ sys.exit(1)
74
+
75
+ # Find the generated PDF
76
+ pdf_files = list(Path(tmpdir).glob("*.pdf"))
77
+ if not pdf_files:
78
+ print("Error: No PDF file generated", file=sys.stderr)
79
+ sys.exit(1)
80
+
81
+ # Move to output location
82
+ output_path.parent.mkdir(parents=True, exist_ok=True)
83
+ shutil.move(str(pdf_files[0]), str(output_path))
84
+
85
+ size = output_path.stat().st_size
86
+ print(f"Exported: {output_path} ({size:,} bytes)", file=sys.stderr)
87
+ return str(output_path)
88
+
89
+
90
+ def main():
91
+ if len(sys.argv) != 3:
92
+ print("Usage: python export_pdf.py <input_file> <output.pdf>", file=sys.stderr)
93
+ sys.exit(1)
94
+
95
+ export_pdf(sys.argv[1], sys.argv[2])
96
+
97
+
98
+ if __name__ == "__main__":
99
+ main()
@@ -0,0 +1,185 @@
1
+ #!/usr/bin/env python3
2
+ """Parse HWPX files into Dokkit's dual-file format (Markdown + JSON sidecar).
3
+
4
+ HWPX is a ZIP archive containing XML files following the OWPML standard.
5
+ Structure: Contents/section0.xml, Contents/section1.xml, etc.
6
+
7
+ Usage:
8
+ python parse_hwpx.py <input.hwpx>
9
+
10
+ Output:
11
+ JSON to stdout with 'content_md' and 'metadata' fields.
12
+ """
13
+
14
+ import json
15
+ import sys
16
+ import zipfile
17
+ import xml.etree.ElementTree as ET
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+
21
+
22
+ # HWPX XML namespaces
23
+ NS = {
24
+ "hp": "http://www.hancom.co.kr/hwpml/2011/paragraph",
25
+ "hs": "http://www.hancom.co.kr/hwpml/2011/section",
26
+ "hc": "http://www.hancom.co.kr/hwpml/2011/core",
27
+ "hh": "http://www.hancom.co.kr/hwpml/2011/head",
28
+ }
29
+
30
+
31
+ def extract_text_from_element(elem) -> str:
32
+ """Recursively extract text from an XML element and its children."""
33
+ texts = []
34
+ # Check for direct text
35
+ if elem.text:
36
+ texts.append(elem.text)
37
+
38
+ for child in elem:
39
+ # Look for text runs
40
+ tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
41
+ if tag in ("t", "text"):
42
+ if child.text:
43
+ texts.append(child.text)
44
+ elif tag == "run":
45
+ texts.append(extract_text_from_element(child))
46
+ elif tag == "lineseg":
47
+ texts.append(extract_text_from_element(child))
48
+ else:
49
+ texts.append(extract_text_from_element(child))
50
+
51
+ if child.tail:
52
+ texts.append(child.tail)
53
+
54
+ return "".join(texts)
55
+
56
+
57
+ def parse_table(table_elem) -> list[list[str]]:
58
+ """Parse a table element into a 2D list of cell values."""
59
+ rows = []
60
+ for row_elem in table_elem:
61
+ tag = row_elem.tag.split("}")[-1] if "}" in row_elem.tag else row_elem.tag
62
+ if tag != "tr":
63
+ continue
64
+ cells = []
65
+ for cell_elem in row_elem:
66
+ cell_tag = cell_elem.tag.split("}")[-1] if "}" in cell_elem.tag else cell_elem.tag
67
+ if cell_tag != "tc":
68
+ continue
69
+ cell_text = extract_text_from_element(cell_elem).strip()
70
+ cells.append(cell_text)
71
+ if cells:
72
+ rows.append(cells)
73
+ return rows
74
+
75
+
76
+ def parse_section(xml_content: str, section_name: str) -> tuple[str, dict]:
77
+ """Parse a single section XML and return markdown content + key-value pairs."""
78
+ root = ET.fromstring(xml_content)
79
+ content_parts = []
80
+ key_value_pairs = {}
81
+
82
+ content_parts.append(f"## {section_name}\n")
83
+
84
+ for elem in root.iter():
85
+ tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
86
+
87
+ if tag == "p":
88
+ text = extract_text_from_element(elem).strip()
89
+ if text:
90
+ content_parts.append(text)
91
+
92
+ elif tag == "tbl":
93
+ rows = parse_table(elem)
94
+ if rows:
95
+ # Convert to markdown table
96
+ if len(rows) > 0:
97
+ max_cols = max(len(r) for r in rows)
98
+ # Pad rows to same length
99
+ for r in rows:
100
+ while len(r) < max_cols:
101
+ r.append("")
102
+
103
+ content_parts.append("")
104
+ content_parts.append("| " + " | ".join(rows[0]) + " |")
105
+ content_parts.append("| " + " | ".join(["---"] * max_cols) + " |")
106
+ for row in rows[1:]:
107
+ content_parts.append("| " + " | ".join(row) + " |")
108
+ content_parts.append("")
109
+
110
+ # Extract key-value pairs from 2-column tables
111
+ for row in rows:
112
+ if len(row) >= 2 and row[0] and row[1]:
113
+ label = row[0].strip()
114
+ value = row[1].strip()
115
+ if len(label) < 50:
116
+ key_value_pairs[label] = value
117
+
118
+ return "\n".join(content_parts), key_value_pairs
119
+
120
+
121
+ def parse_hwpx(file_path: str) -> dict:
122
+ """Parse an HWPX file and return content + metadata."""
123
+ path = Path(file_path)
124
+
125
+ if not zipfile.is_zipfile(path):
126
+ return {"error": f"Not a valid HWPX (ZIP) file: {path}"}
127
+
128
+ all_content = []
129
+ all_kvp = {}
130
+ sections = []
131
+
132
+ with zipfile.ZipFile(path, "r") as zf:
133
+ # Find section files
134
+ section_files = sorted([
135
+ f for f in zf.namelist()
136
+ if f.startswith("Contents/section") and f.endswith(".xml")
137
+ ])
138
+
139
+ if not section_files:
140
+ # Try alternate paths
141
+ section_files = sorted([
142
+ f for f in zf.namelist()
143
+ if "section" in f.lower() and f.endswith(".xml")
144
+ ])
145
+
146
+ for i, section_file in enumerate(section_files):
147
+ section_name = f"Section {i + 1}"
148
+ sections.append(section_name)
149
+
150
+ xml_content = zf.read(section_file).decode("utf-8")
151
+ md_content, kvp = parse_section(xml_content, section_name)
152
+ all_content.append(md_content)
153
+ all_kvp.update(kvp)
154
+
155
+ content_md = f"# {path.stem}\n\n" + "\n\n".join(all_content)
156
+
157
+ return {
158
+ "content_md": content_md,
159
+ "metadata": {
160
+ "file_name": path.name,
161
+ "file_type": "hwpx",
162
+ "parse_date": datetime.now().isoformat(),
163
+ "key_value_pairs": all_kvp,
164
+ "sections": sections,
165
+ "section_count": len(sections),
166
+ }
167
+ }
168
+
169
+
170
+ def main():
171
+ if len(sys.argv) != 2:
172
+ print("Usage: python parse_hwpx.py <input.hwpx>", file=sys.stderr)
173
+ sys.exit(1)
174
+
175
+ file_path = sys.argv[1]
176
+ if not Path(file_path).exists():
177
+ print(json.dumps({"error": f"File not found: {file_path}"}))
178
+ sys.exit(1)
179
+
180
+ result = parse_hwpx(file_path)
181
+ print(json.dumps(result, ensure_ascii=False, indent=2))
182
+
183
+
184
+ if __name__ == "__main__":
185
+ main()