devlyn-cli 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/bin/devlyn.js +1 -0
  2. package/config/commands/devlyn.team-resolve.md +31 -2
  3. package/optional-skills/dokkit/ANALYSIS.md +198 -0
  4. package/optional-skills/dokkit/COMMANDS.md +365 -0
  5. package/optional-skills/dokkit/DOCX-XML.md +76 -0
  6. package/optional-skills/dokkit/EXPORT.md +102 -0
  7. package/optional-skills/dokkit/FILLING.md +377 -0
  8. package/optional-skills/dokkit/HWPX-XML.md +73 -0
  9. package/optional-skills/dokkit/IMAGE-SOURCING.md +127 -0
  10. package/optional-skills/dokkit/INGESTION.md +65 -0
  11. package/optional-skills/dokkit/SKILL.md +153 -0
  12. package/optional-skills/dokkit/STATE.md +60 -0
  13. package/optional-skills/dokkit/references/docx-field-patterns.md +151 -0
  14. package/optional-skills/dokkit/references/docx-structure.md +58 -0
  15. package/optional-skills/dokkit/references/field-detection-patterns.md +130 -0
  16. package/optional-skills/dokkit/references/hwpx-field-patterns.md +461 -0
  17. package/optional-skills/dokkit/references/hwpx-structure.md +159 -0
  18. package/optional-skills/dokkit/references/image-opportunity-heuristics.md +121 -0
  19. package/optional-skills/dokkit/references/image-xml-patterns.md +338 -0
  20. package/optional-skills/dokkit/references/section-image-interleaving.md +346 -0
  21. package/optional-skills/dokkit/references/section-range-detection.md +118 -0
  22. package/optional-skills/dokkit/references/state-schema.md +143 -0
  23. package/optional-skills/dokkit/references/supported-formats.md +67 -0
  24. package/optional-skills/dokkit/scripts/compile_hwpx.py +134 -0
  25. package/optional-skills/dokkit/scripts/detect_fields.py +301 -0
  26. package/optional-skills/dokkit/scripts/detect_fields_hwpx.py +286 -0
  27. package/optional-skills/dokkit/scripts/export_pdf.py +99 -0
  28. package/optional-skills/dokkit/scripts/parse_hwpx.py +185 -0
  29. package/optional-skills/dokkit/scripts/parse_image_with_gemini.py +159 -0
  30. package/optional-skills/dokkit/scripts/parse_xlsx.py +98 -0
  31. package/optional-skills/dokkit/scripts/source_images.py +365 -0
  32. package/optional-skills/dokkit/scripts/validate_docx.py +142 -0
  33. package/optional-skills/dokkit/scripts/validate_hwpx.py +281 -0
  34. package/optional-skills/dokkit/scripts/validate_state.py +132 -0
  35. package/package.json +1 -1
@@ -0,0 +1,159 @@
1
+ # HWPX XML Structure Reference
2
+
3
+ ## Unpacking an HWPX
4
+
5
+ ```bash
6
+ mkdir -p .dokkit/template_work
7
+ cd .dokkit/template_work
8
+ unzip -o /path/to/template.hwpx
9
+ ```
10
+
11
+ ## Reading Section XML
12
+
13
+ ```python
14
+ import xml.etree.ElementTree as ET
15
+
16
+ # Parse section file
17
+ tree = ET.parse("Contents/section0.xml")
18
+ root = tree.getroot()
19
+
20
+ # HWPX namespaces
21
+ ns = {
22
+ "hp": "http://www.hancom.co.kr/hwpml/2011/paragraph",
23
+ "hs": "http://www.hancom.co.kr/hwpml/2011/section",
24
+ "hc": "http://www.hancom.co.kr/hwpml/2011/core",
25
+ "hh": "http://www.hancom.co.kr/hwpml/2011/head",
26
+ "opf": "http://www.idpf.org/2007/opf",
27
+ }
28
+
29
+ # Find all paragraphs
30
+ for p in root.iter("{http://www.hancom.co.kr/hwpml/2011/paragraph}p"):
31
+ texts = []
32
+ for t in p.iter("{http://www.hancom.co.kr/hwpml/2011/paragraph}t"):
33
+ if t.text:
34
+ texts.append(t.text)
35
+ if texts:
36
+ print("".join(texts))
37
+ ```
38
+
39
+ ## CRITICAL: Preserving Namespace Declarations
40
+
41
+ Python's `xml.etree.ElementTree` **strips unused namespace declarations** when re-serializing XML. This breaks Hancom/Polaris Office, which requires ALL original namespace declarations on EVERY XML root element, even if no elements use those prefixes.
42
+
43
+ **This applies to ALL HWPX XML files**, not just `section0.xml`:
44
+ - `Contents/section0.xml` — root `<hs:sec>` needs 14+ xmlns
45
+ - `Contents/content.hpf` — root `<opf:package>` needs 14+ xmlns
46
+ - `Contents/header.xml` — root `<hh:head>` needs 14+ xmlns
47
+
48
+ **After any ET-based XML modification**, you MUST restore the original namespace declarations:
49
+
50
+ ```python
51
+ # After tree.write(), fix the root element:
52
+ import re
53
+
54
+ with open(section_xml_path, 'r', encoding='utf-8') as f:
55
+ content = f.read()
56
+
57
+ # Capture original namespace declarations BEFORE any ET parsing
58
+ ORIGINAL_ROOT_NS = (
59
+ 'xmlns:ha="http://www.hancom.co.kr/hwpml/2011/app" '
60
+ 'xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" '
61
+ 'xmlns:hp10="http://www.hancom.co.kr/hwpml/2016/paragraph" '
62
+ 'xmlns:hs="http://www.hancom.co.kr/hwpml/2011/section" '
63
+ 'xmlns:hc="http://www.hancom.co.kr/hwpml/2011/core" '
64
+ 'xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" '
65
+ 'xmlns:hhs="http://www.hancom.co.kr/hwpml/2011/history" '
66
+ 'xmlns:hm="http://www.hancom.co.kr/hwpml/2011/master-page" '
67
+ 'xmlns:hpf="http://www.hancom.co.kr/schema/2011/hpf" '
68
+ 'xmlns:dc="http://purl.org/dc/elements/1.1/" '
69
+ 'xmlns:ooxmlchart="http://www.hancom.co.kr/hwpml/2016/ooxmlchart" '
70
+ 'xmlns:epub="http://www.idpf.org/2007/ops" '
71
+ 'xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" '
72
+ 'xmlns:opf="http://www.idpf.org/2007/opf/"'
73
+ )
74
+
75
+ # Replace stripped root with full original declarations
76
+ content = re.sub(
77
+ r'<hs:sec\s+xmlns:[^>]+>',
78
+ f'<hs:sec {ORIGINAL_ROOT_NS}>',
79
+ content, count=1
80
+ )
81
+
82
+ # Also restore XML declaration to original format
83
+ content = re.sub(
84
+ r"<\?xml version='1\.0' encoding='UTF-8'\?>",
85
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>',
86
+ content, count=1
87
+ )
88
+
89
+ with open(section_xml_path, 'w', encoding='utf-8') as f:
90
+ f.write(content)
91
+ ```
92
+
93
+ **Also remove newlines** that ET inserts between the XML declaration and root element:
94
+ ```python
95
+ content = content.replace('?>\n<', '?><')
96
+ ```
97
+
98
+ **Best practice**: Before calling `ET.parse()`, save the original root opening tag. After `tree.write()`, replace the new root tag with the saved original. Apply this to EVERY HWPX XML file you modify (section0.xml, content.hpf, header.xml).
99
+
100
+ ## Repackaging an HWPX
101
+
102
+ CRITICAL: The `mimetype` file must be first and uncompressed.
103
+
104
+ ```python
105
+ import zipfile
106
+ import os
107
+
108
+ def repackage_hwpx(work_dir, output_path):
109
+ """Repackage modified XML files into a valid HWPX."""
110
+ with zipfile.ZipFile(output_path, 'w') as zf:
111
+ # mimetype MUST be first and uncompressed
112
+ mimetype_path = os.path.join(work_dir, "mimetype")
113
+ if os.path.exists(mimetype_path):
114
+ zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)
115
+
116
+ # Add all other files with compression
117
+ for root, dirs, files in os.walk(work_dir):
118
+ for file in files:
119
+ if file == "mimetype":
120
+ continue
121
+ file_path = os.path.join(root, file)
122
+ arcname = os.path.relpath(file_path, work_dir)
123
+ zf.write(file_path, arcname, compress_type=zipfile.ZIP_DEFLATED)
124
+ ```
125
+
126
+ ## BinData and Image Handling
127
+
128
+ ### BinData Directory
129
+ The `BinData/` directory (at the archive root) stores embedded binary resources — primarily images. Files are named sequentially: `image1.png`, `image2.jpg`, etc.
130
+
131
+ ### Image Registration — Manifest Only
132
+ Images are registered ONLY in `Contents/content.hpf` via `<opf:item>` elements:
133
+ ```xml
134
+ <opf:item id="image1" href="BinData/image1.png" media-type="image/png" isEmbeded="1"/>
135
+ ```
136
+
137
+ **Critical**: Do NOT add `<hh:binDataItems>` entries to `header.xml` for images. The `content.hpf` manifest is the sole registration point. No entries are needed in `META-INF/manifest.xml` either.
138
+
139
+ ### Image Elements Use `hc:` Namespace
140
+ The `<img>` element inside `<hp:pic>` uses the **core** namespace (`hc:`), not the paragraph namespace (`hp:`):
141
+ ```xml
142
+ <!-- CORRECT -->
143
+ <hc:img binaryItemIDRef="image1" bright="0" contrast="0" effect="REAL_PIC" alpha="0"/>
144
+
145
+ <!-- WRONG — will not render -->
146
+ <hp:img binaryItemIDRef="image1" .../>
147
+ ```
148
+
149
+ See the `dokkit-image-sourcing` skill for the complete `<hp:pic>` element structure with all required children.
150
+
151
+ ## Critical Rules for HWPX Surgery
152
+
153
+ 1. **`mimetype` must be first in ZIP** — stored uncompressed
154
+ 2. **Preserve `hp:rPr` elements** — character formatting
155
+ 3. **Don't modify `hp:cellSpan`** — cell merging must remain intact
156
+ 4. **Keep `hp:cellAddr` — and ensure `rowAddr` = row index** — Each `<hp:tc>` has `<hp:cellAddr colAddr="C" rowAddr="R"/>` where `R` MUST equal the 0-based index of the parent `<hp:tr>` within the `<hp:tbl>`. If two rows share the same `rowAddr`, Polaris Office **silently hides** the duplicate — the table renders with missing data and no error. After any row insertion, deletion, or reordering, re-index ALL `rowAddr` values and update `<hp:tbl rowCnt="N">`.
157
+ 5. **Preserve paragraph properties** — `hp:pPr` controls alignment, spacing
158
+ 6. **Korean font references** — don't change `hangulFont`, `latinFont` attributes
159
+ 7. **Section boundaries** — each section file is independent
@@ -0,0 +1,121 @@
1
+ # Image Opportunity Heuristics for Section Content Fields
2
+
3
+ Guide for detecting image insertion opportunities within `section_content` fields. These heuristics help the analyzer identify where AI-generated images can be interleaved with text to create visually rich proposals.
4
+
5
+ ## Content Signal Keywords
6
+
7
+ Scan the `mapped_value` text for these signal keywords that indicate an image would add value.
8
+
9
+ ### By `content_type`
10
+
11
+ | content_type | Korean keywords | English keywords |
12
+ |---|---|---|
13
+ | flowchart | 프로세스, 절차, 단계, 흐름, 순서, 워크플로우 | process, procedure, step, flow, workflow, pipeline |
14
+ | diagram | 구조, 아키텍처, 시스템, 모듈, 구성도, 체계 | architecture, structure, system, module, framework, topology |
15
+ | data | 시장규모, 성장률, 통계, 수치, 비율, 점유율, 매출 | market size, growth rate, statistics, data, ratio, share, revenue |
16
+ | concept | 개념, 비전, 전략, 핵심, 모델, 방법론, 기술 | concept, vision, strategy, core, model, methodology, technology |
17
+ | infographic | 비교, 장점, 특징, 차별점, 효과, 기대효과 | comparison, advantage, feature, differentiation, effect, benefit |
18
+
19
+ ### Signal strength
20
+
21
+ - **Strong signal** (2+ keywords in same paragraph): high-priority opportunity
22
+ - **Moderate signal** (1 keyword): include if surrounding context supports it
23
+ - **Weak signal** (keyword in passing mention): skip unless the paragraph is >300 chars
24
+
25
+ ## Placement Rules
26
+
27
+ ### Where to insert
28
+
29
+ - **After the paragraph** that introduces the concept — not before, not mid-paragraph
30
+ - `insertion_point.strategy` = `"after_paragraph"`
31
+ - `insertion_point.anchor_text` = a distinctive phrase (5-15 words) from the paragraph that signals the concept. Choose a phrase that is unique within the field's mapped_value.
32
+
33
+ ### Where NOT to insert
34
+
35
+ - Never as the first element (before any text)
36
+ - Never as the last element (after all text)
37
+ - Never between two consecutive images (min 150 chars of text between image opportunities)
38
+ - Never inside bulleted/numbered list sequences
39
+ - Never right after a heading (insert after the explanatory paragraph instead)
40
+
41
+ ## Prompt Composition Strategy
42
+
43
+ For each detected opportunity:
44
+
45
+ 1. **Read the anchor paragraph** in Korean
46
+ 2. **Identify the core concept** being described
47
+ 3. **Compose an English generation prompt** that:
48
+ - Describes the visual content (what to show)
49
+ - Specifies the style (technical diagram, data chart, concept illustration)
50
+ - Includes domain context (e.g., "for a government R&D proposal")
51
+ - Avoids text/labels in the image (these are hard to control)
52
+ 4. **Map to a preset**: `technical_illustration` (default for diagram, flowchart, concept), `infographic` (for data, infographic)
53
+
54
+ ### Prompt template
55
+
56
+ ```
57
+ [content_type] showing [core concept from paragraph].
58
+ Context: [section name] of a Korean government R&D project proposal.
59
+ Style: Clean, professional, minimal text labels.
60
+ Color scheme: Modern, corporate blue/teal tones.
61
+ ```
62
+
63
+ ## Skip Conditions
64
+
65
+ Do NOT create image opportunities when:
66
+
67
+ 1. **Short content**: `mapped_value` < 400 characters total
68
+ 2. **Team/personnel lists**: Content is primarily names, roles, and qualifications (look for patterns like `이름:`, `직책:`, `담당:`, `학력:`, `경력:`)
69
+ 3. **Budget/financial tables**: Content is primarily numbers, amounts, costs (look for `원`, `만원`, `억원`, `비용`, `예산`)
70
+ 4. **Already has explicit image fields**: If the section already contains `field_type: "image"` fields in analysis.json, reduce max opportunities to 1
71
+ 5. **Simple form data**: Content is short key-value pairs without narrative text
72
+ 6. **Repeating field**: If the same section has multiple `section_content` fields that overlap in content
73
+
74
+ ## Limits and Spacing
75
+
76
+ | Constraint | Value |
77
+ |---|---|
78
+ | Max opportunities per `section_content` field | 3 |
79
+ | Min chars before first opportunity | 200 |
80
+ | Min chars between opportunities | 150 |
81
+ | Min total mapped_value length | 400 chars |
82
+ | Max total opportunities across all sections | 12 |
83
+
84
+ ## Output Schema
85
+
86
+ Each opportunity is added to the field's `image_opportunities` array:
87
+
88
+ ```json
89
+ {
90
+ "opportunity_id": "imgop_{field_id}_{seq}",
91
+ "insertion_point": {
92
+ "strategy": "after_paragraph",
93
+ "anchor_text": "AI 유사도 탐색 알고리즘을 통해 기존 특허와의 유사성을 분석"
94
+ },
95
+ "generation_prompt": "Technical architecture diagram of an AI-powered IP similarity search system showing document ingestion, vector embedding, and similarity matching pipeline. Context: Korean government R&D proposal. Style: Clean, professional, minimal text. Color: Modern blue/teal.",
96
+ "preset": "technical_illustration",
97
+ "content_type": "diagram",
98
+ "rationale": "Text describes the AI algorithm workflow; a diagram clarifies the system architecture",
99
+ "dimensions": {
100
+ "width_hwpml": 36000,
101
+ "height_hwpml": 24000,
102
+ "width_emu": 4572000,
103
+ "height_emu": 3048000
104
+ },
105
+ "image_file": null,
106
+ "status": "pending"
107
+ }
108
+ ```
109
+
110
+ ### Field meanings
111
+
112
+ - `opportunity_id`: Unique ID, format `imgop_{field_id}_{sequence_number}`
113
+ - `insertion_point.strategy`: Always `"after_paragraph"` for section content
114
+ - `insertion_point.anchor_text`: Distinctive Korean phrase from the paragraph (used by filler to locate insertion point)
115
+ - `generation_prompt`: English prompt for AI image generation
116
+ - `preset`: Maps to `scripts/source_images.py` preset parameter
117
+ - `content_type`: One of `flowchart`, `diagram`, `data`, `concept`, `infographic`
118
+ - `rationale`: Brief explanation of why an image helps here
119
+ - `dimensions`: Default size — filler may adjust based on content_type
120
+ - `image_file`: `null` until sourced (set by fill-doc orchestrator)
121
+ - `status`: `"pending"` → `"sourced"` → `"inserted"` (or `"skipped"`)
@@ -0,0 +1,338 @@
1
+ # Image XML Patterns
2
+
3
+ ## DOCX Image Pattern
4
+
5
+ ### Required Namespace Declarations
6
+ These namespaces must be present on the root `<w:document>` element:
7
+ ```xml
8
+ xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
9
+ xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
10
+ xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
11
+ xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"
12
+ xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
13
+ ```
14
+
15
+ ### Relationship Entry (word/_rels/document.xml.rels)
16
+ ```xml
17
+ <Relationship
18
+ Id="rId8"
19
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
20
+ Target="media/image1.png"/>
21
+ ```
22
+
23
+ ### Content_Types Entry ([Content_Types].xml)
24
+ Add if the image extension is not already registered:
25
+ ```xml
26
+ <!-- For PNG images -->
27
+ <Default Extension="png" ContentType="image/png"/>
28
+
29
+ <!-- For JPEG images -->
30
+ <Default Extension="jpeg" ContentType="image/jpeg"/>
31
+ <Default Extension="jpg" ContentType="image/jpeg"/>
32
+ ```
33
+
34
+ ### Drawing Element (in document.xml)
35
+ Wrap in a `<w:r>` element within a paragraph in the target cell:
36
+ ```xml
37
+ <w:r>
38
+ <w:drawing>
39
+ <wp:inline distT="0" distB="0" distL="0" distR="0">
40
+ <wp:extent cx="914400" cy="1219200"/>
41
+ <wp:docPr id="1" name="Picture 1"/>
42
+ <a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
43
+ <a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
44
+ <pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
45
+ <pic:nvPicPr>
46
+ <pic:cNvPr id="1" name="image1.png"/>
47
+ <pic:cNvPicPr/>
48
+ </pic:nvPicPr>
49
+ <pic:blipFill>
50
+ <a:blip r:embed="rId8"/>
51
+ <a:stretch><a:fillRect/></a:stretch>
52
+ </pic:blipFill>
53
+ <pic:spPr>
54
+ <a:xfrm>
55
+ <a:off x="0" y="0"/>
56
+ <a:ext cx="914400" cy="1219200"/>
57
+ </a:xfrm>
58
+ <a:prstGeom prst="rect"><a:avLst/></a:prstGeom>
59
+ </pic:spPr>
60
+ </pic:pic>
61
+ </a:graphicData>
62
+ </a:graphic>
63
+ </wp:inline>
64
+ </w:drawing>
65
+ </w:r>
66
+ ```
67
+
68
+ ## HWPX Image Pattern
69
+
70
+ ### A. Registration — Manifest Only
71
+
72
+ Images are registered ONLY in `Contents/content.hpf` manifest. Do NOT add `<hh:binDataItems>` to `header.xml`. No entries needed in `META-INF/manifest.xml`.
73
+
74
+ ```xml
75
+ <!-- Contents/content.hpf — add <opf:item> for each image -->
76
+ <opf:item id="image1" href="BinData/image1.png" media-type="image/png" isEmbeded="1"/>
77
+ ```
78
+
79
+ The `id` attribute becomes the `binaryItemIDRef` in the `<hc:img>` element below.
80
+
81
+ ### B. Paragraph Structure (CRITICAL)
82
+
83
+ Images MUST be placed **inside** the `<hp:run>` element, with `<hp:t/>` **after** the `<hp:pic>`. This matches real Hancom Office output.
84
+
85
+ ```xml
86
+ <!-- CORRECT: pic inside run, t after pic -->
87
+ <hp:p id="..." paraPrIDRef="..." styleIDRef="0" pageBreak="0" columnBreak="0" merged="0">
88
+ <hp:linesegarray>
89
+ <hp:lineseg textpos="0" vertpos="0" vertsize="{H}" textheight="{H}"
90
+ baseline="{H*0.85}" spacing="500" horzpos="0" horzsize="..." flags="393216"/>
91
+ </hp:linesegarray>
92
+ <hp:run charPrIDRef="0">
93
+ <hp:pic ...>...</hp:pic>
94
+ <hp:t/>
95
+ </hp:run>
96
+ </hp:p>
97
+
98
+ <!-- WRONG: pic as sibling of run — images will NOT render -->
99
+ <hp:run charPrIDRef="0"><hp:t/></hp:run>
100
+ <hp:pic ...>...</hp:pic>
101
+ ```
102
+
103
+ ### C. Complete `<hp:pic>` Structure (Hancom Canonical Order)
104
+
105
+ Element order matches real Hancom Office output. Every child element listed is **required**. Do NOT include `<hp:lineShape>` (not present in real Hancom files).
106
+
107
+ ```xml
108
+ <hp:pic id="{seq_id}" zOrder="{z}" numberingType="PICTURE" textWrap="TOP_AND_BOTTOM"
109
+ textFlow="BOTH_SIDES" lock="0" dropcapstyle="None"
110
+ href="" groupLevel="0" instid="{seq_id}" reverse="0">
111
+ <!-- Group 1: Geometry (Hancom canonical order) -->
112
+ <hp:offset x="0" y="0"/>
113
+ <hp:orgSz width="{W}" height="{H}"/>
114
+ <hp:curSz width="{W}" height="{H}"/>
115
+ <hp:flip horizontal="0" vertical="0"/>
116
+ <hp:rotationInfo angle="0" centerX="{W_half}" centerY="{H_half}" rotateimage="1"/>
117
+ <hp:renderingInfo>
118
+ <hc:transMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/>
119
+ <hc:scaMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/>
120
+ <hc:rotMatrix e1="1" e2="-0" e3="0" e4="0" e5="1" e6="0"/>
121
+ </hp:renderingInfo>
122
+ <!-- Group 2: Image data -->
123
+ <hp:imgRect>
124
+ <hc:pt0 x="0" y="0"/>
125
+ <hc:pt1 x="{W}" y="0"/>
126
+ <hc:pt2 x="{W}" y="{H}"/>
127
+ <hc:pt3 x="0" y="{H}"/>
128
+ </hp:imgRect>
129
+ <hp:imgClip left="0" right="{pixW}" top="0" bottom="{pixH}"/>
130
+ <hp:inMargin left="0" right="0" top="0" bottom="0"/>
131
+ <hp:imgDim dimwidth="{pixW}" dimheight="{pixH}"/>
132
+ <hc:img binaryItemIDRef="{manifest_id}" bright="0" contrast="0" effect="REAL_PIC" alpha="0"/>
133
+ <!-- Group 3: Layout (AFTER hc:img in Hancom canonical order) -->
134
+ <hp:sz width="{W}" widthRelTo="ABSOLUTE" height="{H}" heightRelTo="ABSOLUTE" protect="0"/>
135
+ <hp:pos treatAsChar="1" affectLSpacing="0" flowWithText="0" allowOverlap="0"
136
+ holdAnchorAndSO="0" vertRelTo="PARA" horzRelTo="COLUMN"
137
+ vertAlign="TOP" horzAlign="LEFT" vertOffset="0" horzOffset="0"/>
138
+ <hp:outMargin left="0" right="0" top="0" bottom="0"/>
139
+ </hp:pic>
140
+ ```
141
+
142
+ **Variable definitions**:
143
+ - `{W}` / `{H}` — Display size in HWPML units (1/7200 inch). Use defaults from Size Calculations table or analysis.json dimensions.
144
+ - `{W_half}` / `{H_half}` — Half of W/H for rotation center.
145
+ - `{pixW}` / `{pixH}` — Actual pixel dimensions from PIL/Pillow `Image.open(path).size`.
146
+ - `{manifest_id}` — The `id` attribute from the `<opf:item>` in `content.hpf`.
147
+ - `{seq_id}` — Sequential ID: find max existing `id` in section XML + 1.
148
+ - `{z}` — zOrder: find max existing `zOrder` in section XML + 1.
149
+
150
+ ### D. Python `build_hwpx_pic_element()` Function
151
+
152
+ ```python
153
+ import xml.etree.ElementTree as ET
154
+ from PIL import Image
155
+
156
+ HP = "http://www.hancom.co.kr/hwpml/2011/paragraph"
157
+ HC = "http://www.hancom.co.kr/hwpml/2011/core"
158
+
159
+ def build_hwpx_pic_element(
160
+ manifest_id: str,
161
+ image_path: str,
162
+ width_hwpml: int,
163
+ height_hwpml: int,
164
+ seq_id: int,
165
+ z_order: int,
166
+ ) -> ET.Element:
167
+ """Build a complete <hp:pic> element for HWPX image insertion.
168
+ Uses Hancom canonical element order (verified against real Hancom Office output).
169
+
170
+ Args:
171
+ manifest_id: The id from content.hpf <opf:item> (e.g. "image1")
172
+ image_path: Path to the image file (for reading pixel dimensions)
173
+ width_hwpml: Display width in HWPML units (1/7200 inch)
174
+ height_hwpml: Display height in HWPML units (1/7200 inch)
175
+ seq_id: Sequential element ID (max existing + 1)
176
+ z_order: Z-order value (max existing + 1)
177
+ """
178
+ with Image.open(image_path) as img:
179
+ pix_w, pix_h = img.size
180
+
181
+ W = str(width_hwpml)
182
+ H = str(height_hwpml)
183
+ W_half = str(width_hwpml // 2)
184
+ H_half = str(height_hwpml // 2)
185
+ pixW = str(pix_w)
186
+ pixH = str(pix_h)
187
+ sid = str(seq_id)
188
+
189
+ pic = ET.Element(f"{{{HP}}}pic", {
190
+ "id": sid, "zOrder": str(z_order), "numberingType": "PICTURE",
191
+ "textWrap": "TOP_AND_BOTTOM", "textFlow": "BOTH_SIDES", "lock": "0",
192
+ "dropcapstyle": "None", "href": "", "groupLevel": "0",
193
+ "instid": sid, "reverse": "0",
194
+ })
195
+
196
+ # Hancom canonical order: offset, orgSz, curSz, flip, rotationInfo,
197
+ # renderingInfo, imgRect, imgClip, inMargin, imgDim, hc:img, sz, pos, outMargin
198
+ ET.SubElement(pic, f"{{{HP}}}offset", x="0", y="0")
199
+ ET.SubElement(pic, f"{{{HP}}}orgSz", width=W, height=H)
200
+ ET.SubElement(pic, f"{{{HP}}}curSz", width=W, height=H)
201
+ ET.SubElement(pic, f"{{{HP}}}flip", horizontal="0", vertical="0")
202
+ ET.SubElement(pic, f"{{{HP}}}rotationInfo", {
203
+ "angle": "0", "centerX": W_half, "centerY": H_half, "rotateimage": "1",
204
+ })
205
+
206
+ ri = ET.SubElement(pic, f"{{{HP}}}renderingInfo")
207
+ ET.SubElement(ri, f"{{{HC}}}transMatrix",
208
+ e1="1", e2="0", e3="0", e4="0", e5="1", e6="0")
209
+ ET.SubElement(ri, f"{{{HC}}}scaMatrix",
210
+ e1="1", e2="0", e3="0", e4="0", e5="1", e6="0")
211
+ ET.SubElement(ri, f"{{{HC}}}rotMatrix",
212
+ e1="1", e2="-0", e3="0", e4="0", e5="1", e6="0")
213
+
214
+ imgRect = ET.SubElement(pic, f"{{{HP}}}imgRect")
215
+ ET.SubElement(imgRect, f"{{{HC}}}pt0", x="0", y="0")
216
+ ET.SubElement(imgRect, f"{{{HC}}}pt1", x=W, y="0")
217
+ ET.SubElement(imgRect, f"{{{HC}}}pt2", x=W, y=H)
218
+ ET.SubElement(imgRect, f"{{{HC}}}pt3", x="0", y=H)
219
+
220
+ ET.SubElement(pic, f"{{{HP}}}imgClip",
221
+ left="0", right=pixW, top="0", bottom=pixH)
222
+ ET.SubElement(pic, f"{{{HP}}}inMargin",
223
+ left="0", right="0", top="0", bottom="0")
224
+ ET.SubElement(pic, f"{{{HP}}}imgDim", dimwidth=pixW, dimheight=pixH)
225
+ ET.SubElement(pic, f"{{{HC}}}img", {
226
+ "binaryItemIDRef": manifest_id, "bright": "0", "contrast": "0",
227
+ "effect": "REAL_PIC", "alpha": "0",
228
+ })
229
+
230
+ # sz, pos, outMargin come AFTER hc:img (Hancom canonical order)
231
+ ET.SubElement(pic, f"{{{HP}}}sz", {
232
+ "width": W, "widthRelTo": "ABSOLUTE",
233
+ "height": H, "heightRelTo": "ABSOLUTE", "protect": "0",
234
+ })
235
+ ET.SubElement(pic, f"{{{HP}}}pos", {
236
+ "treatAsChar": "1", "affectLSpacing": "0", "flowWithText": "0",
237
+ "allowOverlap": "0", "holdAnchorAndSO": "0",
238
+ "vertRelTo": "PARA", "horzRelTo": "COLUMN",
239
+ "vertAlign": "TOP", "horzAlign": "LEFT",
240
+ "vertOffset": "0", "horzOffset": "0",
241
+ })
242
+ ET.SubElement(pic, f"{{{HP}}}outMargin",
243
+ left="0", right="0", top="0", bottom="0")
244
+
245
+ return pic
246
+ ```
247
+
248
+ ### E. Critical Rules for HWPX `<hp:pic>`
249
+
250
+ > **All 9 rules must be followed. Violating any one causes broken image rendering.**
251
+
252
+ | # | Rule | Correct | Wrong |
253
+ |---|------|---------|-------|
254
+ | 1 | **`<img>` uses `hc:` namespace** | `<hc:img binaryItemIDRef="..."/>` | `<hp:img .../>` |
255
+ | 2 | **`<imgRect>` has 4 `<hc:pt>` children** | `<hc:pt0 x="0" y="0"/>` ... `<hc:pt3>` | Inline `x1/y1/x2/y2` attributes |
256
+ | 3 | **All required children present** | `offset`, `orgSz`, `curSz`, `flip`, `rotationInfo`, `renderingInfo`, `inMargin` | Missing any of these |
257
+ | 4 | **No spurious elements** | Do NOT include `hp:lineShape` | `hp:caption`, `hp:shapeComment`, `hp:lineShape` |
258
+ | 5 | **`imgClip` right/bottom = pixel dims** | `right="{pixW}" bottom="{pixH}"` | All zeros |
259
+ | 6 | **Hancom canonical element order** | offset, orgSz, ..., hc:img, **then** sz, pos, outMargin | sz/pos first (pre-2026 incorrect order) |
260
+ | 7 | **Register in `content.hpf` only** | `<opf:item>` in manifest | `<hh:binDataItems>` in header.xml |
261
+ | 8 | **`hp:pos` attributes** | `flowWithText="0"` `horzRelTo="COLUMN"` | `flowWithText="1"` `horzRelTo="PARA"` |
262
+ | 9 | **pic INSIDE run, t AFTER pic** | `<hp:run><hp:pic>...</hp:pic><hp:t/></hp:run>` | `<hp:run><hp:t/></hp:run><hp:pic>` |
263
+
264
+ ## Size Calculations
265
+
266
+ ### Dimension Conversion
267
+ - **DOCX**: Uses EMUs (English Metric Units)
268
+ - 1 inch = 914,400 EMU
269
+ - 1 mm = 36,000 EMU
270
+ - Formula: `emu = mm * 36000`
271
+ - **HWPX**: Uses HWPML units (1/7200 inch)
272
+ - 1 inch = 7,200 units
273
+ - 1 mm ≈ 283.46 units
274
+ - Formula: `hwpml = mm * 283.46` (or `inches * 7200`)
275
+ - Typical A4 text width: ~46,648 units (~165mm)
276
+
277
+ ### Default Dimensions by Image Type (Cell Images)
278
+ | image_type | Width (mm) | Height (mm) | DOCX cx (EMU) | DOCX cy (EMU) | HWPX width | HWPX height |
279
+ |-----------|-----------|------------|--------------|--------------|-----------|------------|
280
+ | photo | 35 | 45 | 1,260,000 | 1,620,000 | 9,922 | 12,757 |
281
+ | logo | 50 | 50 | 1,800,000 | 1,800,000 | 14,173 | 14,173 |
282
+ | signature | 40 | 15 | 1,440,000 | 540,000 | 11,339 | 4,252 |
283
+ | figure (cell) | — | — | — | — | Fit to cell | Fit to cell |
284
+
285
+ **Cell images**: Use `cellSz` width/height minus margins for aspect-ratio-preserving fit.
286
+
287
+ ### Default Dimensions for Section Content Images
288
+ | content_type | HWPX width | HWPX height | Approx mm | Note |
289
+ |---|---|---|---|---|
290
+ | diagram | 36,000 | 24,000 | 127x85 | ~77% of page width |
291
+ | flowchart | 36,000 | 24,000 | 127x85 | ~77% of page width |
292
+ | data (charts) | 36,000 | 20,000 | 127x71 | Wide format |
293
+ | concept | 28,000 | 28,000 | 99x99 | Square |
294
+ | infographic | 36,000 | 24,000 | 127x85 | ~77% of page width |
295
+
296
+ ### Python Code Snippet for DOCX Drawing Element Construction
297
+ ```python
298
+ import xml.etree.ElementTree as ET
299
+
300
+ def build_drawing_element(rel_id: str, width_emu: int, height_emu: int, pic_id: int, filename: str) -> ET.Element:
301
+ """Build a w:drawing element for image insertion."""
302
+ WP = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
303
+ A = "http://schemas.openxmlformats.org/drawingml/2006/main"
304
+ PIC = "http://schemas.openxmlformats.org/drawingml/2006/picture"
305
+ R = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
306
+ W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
307
+
308
+ drawing = ET.Element(f"{{{W}}}drawing")
309
+ inline = ET.SubElement(drawing, f"{{{WP}}}inline",
310
+ distT="0", distB="0", distL="0", distR="0")
311
+ ET.SubElement(inline, f"{{{WP}}}extent",
312
+ cx=str(width_emu), cy=str(height_emu))
313
+ ET.SubElement(inline, f"{{{WP}}}docPr",
314
+ id=str(pic_id), name=f"Picture {pic_id}")
315
+
316
+ graphic = ET.SubElement(inline, f"{{{A}}}graphic")
317
+ graphicData = ET.SubElement(graphic, f"{{{A}}}graphicData",
318
+ uri="http://schemas.openxmlformats.org/drawingml/2006/picture")
319
+
320
+ pic = ET.SubElement(graphicData, f"{{{PIC}}}pic")
321
+ nvPicPr = ET.SubElement(pic, f"{{{PIC}}}nvPicPr")
322
+ ET.SubElement(nvPicPr, f"{{{PIC}}}cNvPr", id=str(pic_id), name=filename)
323
+ ET.SubElement(nvPicPr, f"{{{PIC}}}cNvPicPr")
324
+
325
+ blipFill = ET.SubElement(pic, f"{{{PIC}}}blipFill")
326
+ ET.SubElement(blipFill, f"{{{A}}}blip", attrib={f"{{{R}}}embed": rel_id})
327
+ stretch = ET.SubElement(blipFill, f"{{{A}}}stretch")
328
+ ET.SubElement(stretch, f"{{{A}}}fillRect")
329
+
330
+ spPr = ET.SubElement(pic, f"{{{PIC}}}spPr")
331
+ xfrm = ET.SubElement(spPr, f"{{{A}}}xfrm")
332
+ ET.SubElement(xfrm, f"{{{A}}}off", x="0", y="0")
333
+ ET.SubElement(xfrm, f"{{{A}}}ext", cx=str(width_emu), cy=str(height_emu))
334
+ prstGeom = ET.SubElement(spPr, f"{{{A}}}prstGeom", prst="rect")
335
+ ET.SubElement(prstGeom, f"{{{A}}}avLst")
336
+
337
+ return drawing
338
+ ```