devlyn-cli 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/devlyn.js +1 -0
- package/optional-skills/dokkit/ANALYSIS.md +198 -0
- package/optional-skills/dokkit/COMMANDS.md +365 -0
- package/optional-skills/dokkit/DOCX-XML.md +76 -0
- package/optional-skills/dokkit/EXPORT.md +102 -0
- package/optional-skills/dokkit/FILLING.md +377 -0
- package/optional-skills/dokkit/HWPX-XML.md +73 -0
- package/optional-skills/dokkit/IMAGE-SOURCING.md +127 -0
- package/optional-skills/dokkit/INGESTION.md +65 -0
- package/optional-skills/dokkit/SKILL.md +153 -0
- package/optional-skills/dokkit/STATE.md +60 -0
- package/optional-skills/dokkit/references/docx-field-patterns.md +151 -0
- package/optional-skills/dokkit/references/docx-structure.md +58 -0
- package/optional-skills/dokkit/references/field-detection-patterns.md +130 -0
- package/optional-skills/dokkit/references/hwpx-field-patterns.md +461 -0
- package/optional-skills/dokkit/references/hwpx-structure.md +159 -0
- package/optional-skills/dokkit/references/image-opportunity-heuristics.md +121 -0
- package/optional-skills/dokkit/references/image-xml-patterns.md +338 -0
- package/optional-skills/dokkit/references/section-image-interleaving.md +346 -0
- package/optional-skills/dokkit/references/section-range-detection.md +118 -0
- package/optional-skills/dokkit/references/state-schema.md +143 -0
- package/optional-skills/dokkit/references/supported-formats.md +67 -0
- package/optional-skills/dokkit/scripts/compile_hwpx.py +134 -0
- package/optional-skills/dokkit/scripts/detect_fields.py +301 -0
- package/optional-skills/dokkit/scripts/detect_fields_hwpx.py +286 -0
- package/optional-skills/dokkit/scripts/export_pdf.py +99 -0
- package/optional-skills/dokkit/scripts/parse_hwpx.py +185 -0
- package/optional-skills/dokkit/scripts/parse_image_with_gemini.py +159 -0
- package/optional-skills/dokkit/scripts/parse_xlsx.py +98 -0
- package/optional-skills/dokkit/scripts/source_images.py +365 -0
- package/optional-skills/dokkit/scripts/validate_docx.py +142 -0
- package/optional-skills/dokkit/scripts/validate_hwpx.py +281 -0
- package/optional-skills/dokkit/scripts/validate_state.py +132 -0
- package/package.json +1 -1
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Image Opportunity Heuristics for Section Content Fields
|
|
2
|
+
|
|
3
|
+
Guide for detecting image insertion opportunities within `section_content` fields. These heuristics help the analyzer identify where AI-generated images can be interleaved with text to create visually rich proposals.
|
|
4
|
+
|
|
5
|
+
## Content Signal Keywords
|
|
6
|
+
|
|
7
|
+
Scan the `mapped_value` text for these signal keywords that indicate an image would add value.
|
|
8
|
+
|
|
9
|
+
### By `content_type`
|
|
10
|
+
|
|
11
|
+
| content_type | Korean keywords | English keywords |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| flowchart | 프로세스, 절차, 단계, 흐름, 순서, 워크플로우 | process, procedure, step, flow, workflow, pipeline |
|
|
14
|
+
| diagram | 구조, 아키텍처, 시스템, 모듈, 구성도, 체계 | architecture, structure, system, module, framework, topology |
|
|
15
|
+
| data | 시장규모, 성장률, 통계, 수치, 비율, 점유율, 매출 | market size, growth rate, statistics, data, ratio, share, revenue |
|
|
16
|
+
| concept | 개념, 비전, 전략, 핵심, 모델, 방법론, 기술 | concept, vision, strategy, core, model, methodology, technology |
|
|
17
|
+
| infographic | 비교, 장점, 특징, 차별점, 효과, 기대효과 | comparison, advantage, feature, differentiation, effect, benefit |
|
|
18
|
+
|
|
19
|
+
### Signal strength
|
|
20
|
+
|
|
21
|
+
- **Strong signal** (2+ keywords in same paragraph): high-priority opportunity
|
|
22
|
+
- **Moderate signal** (1 keyword): include if surrounding context supports it
|
|
23
|
+
- **Weak signal** (keyword in passing mention): skip unless the paragraph is >300 chars
|
|
24
|
+
|
|
25
|
+
## Placement Rules
|
|
26
|
+
|
|
27
|
+
### Where to insert
|
|
28
|
+
|
|
29
|
+
- **After the paragraph** that introduces the concept — not before, not mid-paragraph
|
|
30
|
+
- `insertion_point.strategy` = `"after_paragraph"`
|
|
31
|
+
- `insertion_point.anchor_text` = a distinctive phrase (5-15 words) from the paragraph that signals the concept. Choose a phrase that is unique within the field's mapped_value.
|
|
32
|
+
|
|
33
|
+
### Where NOT to insert
|
|
34
|
+
|
|
35
|
+
- Never as the first element (before any text)
|
|
36
|
+
- Never as the last element (after all text)
|
|
37
|
+
- Never between two consecutive images (min 150 chars of text between image opportunities)
|
|
38
|
+
- Never inside bulleted/numbered list sequences
|
|
39
|
+
- Never right after a heading (insert after the explanatory paragraph instead)
|
|
40
|
+
|
|
41
|
+
## Prompt Composition Strategy
|
|
42
|
+
|
|
43
|
+
For each detected opportunity:
|
|
44
|
+
|
|
45
|
+
1. **Read the anchor paragraph** in Korean
|
|
46
|
+
2. **Identify the core concept** being described
|
|
47
|
+
3. **Compose an English generation prompt** that:
|
|
48
|
+
- Describes the visual content (what to show)
|
|
49
|
+
- Specifies the style (technical diagram, data chart, concept illustration)
|
|
50
|
+
- Includes domain context (e.g., "for a government R&D proposal")
|
|
51
|
+
- Avoids text/labels in the image (these are hard to control)
|
|
52
|
+
4. **Map to a preset**: `technical_illustration` (default for diagram, flowchart, concept), `infographic` (for data, infographic)
|
|
53
|
+
|
|
54
|
+
### Prompt template
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
[content_type] showing [core concept from paragraph].
|
|
58
|
+
Context: [section name] of a Korean government R&D project proposal.
|
|
59
|
+
Style: Clean, professional, minimal text labels.
|
|
60
|
+
Color scheme: Modern, corporate blue/teal tones.
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Skip Conditions
|
|
64
|
+
|
|
65
|
+
Do NOT create image opportunities when:
|
|
66
|
+
|
|
67
|
+
1. **Short content**: `mapped_value` < 400 characters total
|
|
68
|
+
2. **Team/personnel lists**: Content is primarily names, roles, and qualifications (look for patterns like `이름:`, `직책:`, `담당:`, `학력:`, `경력:`)
|
|
69
|
+
3. **Budget/financial tables**: Content is primarily numbers, amounts, costs (look for `원`, `만원`, `억원`, `비용`, `예산`)
|
|
70
|
+
4. **Already has explicit image fields**: If the section already contains `field_type: "image"` fields in analysis.json, reduce max opportunities to 1
|
|
71
|
+
5. **Simple form data**: Content is short key-value pairs without narrative text
|
|
72
|
+
6. **Repeating field**: If the same section has multiple `section_content` fields that overlap in content
|
|
73
|
+
|
|
74
|
+
## Limits and Spacing
|
|
75
|
+
|
|
76
|
+
| Constraint | Value |
|
|
77
|
+
|---|---|
|
|
78
|
+
| Max opportunities per `section_content` field | 3 |
|
|
79
|
+
| Min chars before first opportunity | 200 |
|
|
80
|
+
| Min chars between opportunities | 150 |
|
|
81
|
+
| Min total mapped_value length | 400 chars |
|
|
82
|
+
| Max total opportunities across all sections | 12 |
|
|
83
|
+
|
|
84
|
+
## Output Schema
|
|
85
|
+
|
|
86
|
+
Each opportunity is added to the field's `image_opportunities` array:
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"opportunity_id": "imgop_{field_id}_{seq}",
|
|
91
|
+
"insertion_point": {
|
|
92
|
+
"strategy": "after_paragraph",
|
|
93
|
+
"anchor_text": "AI 유사도 탐색 알고리즘을 통해 기존 특허와의 유사성을 분석"
|
|
94
|
+
},
|
|
95
|
+
"generation_prompt": "Technical architecture diagram of an AI-powered IP similarity search system showing document ingestion, vector embedding, and similarity matching pipeline. Context: Korean government R&D proposal. Style: Clean, professional, minimal text. Color: Modern blue/teal.",
|
|
96
|
+
"preset": "technical_illustration",
|
|
97
|
+
"content_type": "diagram",
|
|
98
|
+
"rationale": "Text describes the AI algorithm workflow; a diagram clarifies the system architecture",
|
|
99
|
+
"dimensions": {
|
|
100
|
+
"width_hwpml": 36000,
|
|
101
|
+
"height_hwpml": 24000,
|
|
102
|
+
"width_emu": 4572000,
|
|
103
|
+
"height_emu": 3048000
|
|
104
|
+
},
|
|
105
|
+
"image_file": null,
|
|
106
|
+
"status": "pending"
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Field meanings
|
|
111
|
+
|
|
112
|
+
- `opportunity_id`: Unique ID, format `imgop_{field_id}_{sequence_number}`
|
|
113
|
+
- `insertion_point.strategy`: Always `"after_paragraph"` for section content
|
|
114
|
+
- `insertion_point.anchor_text`: Distinctive Korean phrase from the paragraph (used by filler to locate insertion point)
|
|
115
|
+
- `generation_prompt`: English prompt for AI image generation
|
|
116
|
+
- `preset`: Maps to `scripts/source_images.py` preset parameter
|
|
117
|
+
- `content_type`: One of `flowchart`, `diagram`, `data`, `concept`, `infographic`
|
|
118
|
+
- `rationale`: Brief explanation of why an image helps here
|
|
119
|
+
- `dimensions`: Default size — filler may adjust based on content_type
|
|
120
|
+
- `image_file`: `null` until sourced (set by fill-doc orchestrator)
|
|
121
|
+
- `status`: `"pending"` → `"sourced"` → `"inserted"` (or `"skipped"`)
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# Image XML Patterns
|
|
2
|
+
|
|
3
|
+
## DOCX Image Pattern
|
|
4
|
+
|
|
5
|
+
### Required Namespace Declarations
|
|
6
|
+
These namespaces must be present on the root `<w:document>` element:
|
|
7
|
+
```xml
|
|
8
|
+
xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
9
|
+
xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
|
10
|
+
xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
11
|
+
xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture"
|
|
12
|
+
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### Relationship Entry (word/_rels/document.xml.rels)
|
|
16
|
+
```xml
|
|
17
|
+
<Relationship
|
|
18
|
+
Id="rId8"
|
|
19
|
+
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
|
|
20
|
+
Target="media/image1.png"/>
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Content_Types Entry ([Content_Types].xml)
|
|
24
|
+
Add if the image extension is not already registered:
|
|
25
|
+
```xml
|
|
26
|
+
<!-- For PNG images -->
|
|
27
|
+
<Default Extension="png" ContentType="image/png"/>
|
|
28
|
+
|
|
29
|
+
<!-- For JPEG images -->
|
|
30
|
+
<Default Extension="jpeg" ContentType="image/jpeg"/>
|
|
31
|
+
<Default Extension="jpg" ContentType="image/jpeg"/>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Drawing Element (in document.xml)
|
|
35
|
+
Wrap in a `<w:r>` element within a paragraph in the target cell:
|
|
36
|
+
```xml
|
|
37
|
+
<w:r>
|
|
38
|
+
<w:drawing>
|
|
39
|
+
<wp:inline distT="0" distB="0" distL="0" distR="0">
|
|
40
|
+
<wp:extent cx="914400" cy="1219200"/>
|
|
41
|
+
<wp:docPr id="1" name="Picture 1"/>
|
|
42
|
+
<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
|
|
43
|
+
<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
|
|
44
|
+
<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
|
|
45
|
+
<pic:nvPicPr>
|
|
46
|
+
<pic:cNvPr id="1" name="image1.png"/>
|
|
47
|
+
<pic:cNvPicPr/>
|
|
48
|
+
</pic:nvPicPr>
|
|
49
|
+
<pic:blipFill>
|
|
50
|
+
<a:blip r:embed="rId8"/>
|
|
51
|
+
<a:stretch><a:fillRect/></a:stretch>
|
|
52
|
+
</pic:blipFill>
|
|
53
|
+
<pic:spPr>
|
|
54
|
+
<a:xfrm>
|
|
55
|
+
<a:off x="0" y="0"/>
|
|
56
|
+
<a:ext cx="914400" cy="1219200"/>
|
|
57
|
+
</a:xfrm>
|
|
58
|
+
<a:prstGeom prst="rect"><a:avLst/></a:prstGeom>
|
|
59
|
+
</pic:spPr>
|
|
60
|
+
</pic:pic>
|
|
61
|
+
</a:graphicData>
|
|
62
|
+
</a:graphic>
|
|
63
|
+
</wp:inline>
|
|
64
|
+
</w:drawing>
|
|
65
|
+
</w:r>
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## HWPX Image Pattern
|
|
69
|
+
|
|
70
|
+
### A. Registration — Manifest Only
|
|
71
|
+
|
|
72
|
+
Images are registered ONLY in `Contents/content.hpf` manifest. Do NOT add `<hh:binDataItems>` to `header.xml`. No entries needed in `META-INF/manifest.xml`.
|
|
73
|
+
|
|
74
|
+
```xml
|
|
75
|
+
<!-- Contents/content.hpf — add <opf:item> for each image -->
|
|
76
|
+
<opf:item id="image1" href="BinData/image1.png" media-type="image/png" isEmbeded="1"/>
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The `id` attribute becomes the `binaryItemIDRef` in the `<hc:img>` element below.
|
|
80
|
+
|
|
81
|
+
### B. Paragraph Structure (CRITICAL)
|
|
82
|
+
|
|
83
|
+
Images MUST be placed **inside** the `<hp:run>` element, with `<hp:t/>` **after** the `<hp:pic>`. This matches real Hancom Office output.
|
|
84
|
+
|
|
85
|
+
```xml
|
|
86
|
+
<!-- CORRECT: pic inside run, t after pic -->
|
|
87
|
+
<hp:p id="..." paraPrIDRef="..." styleIDRef="0" pageBreak="0" columnBreak="0" merged="0">
|
|
88
|
+
<hp:linesegarray>
|
|
89
|
+
<hp:lineseg textpos="0" vertpos="0" vertsize="{H}" textheight="{H}"
|
|
90
|
+
baseline="{H*0.85}" spacing="500" horzpos="0" horzsize="..." flags="393216"/>
|
|
91
|
+
</hp:linesegarray>
|
|
92
|
+
<hp:run charPrIDRef="0">
|
|
93
|
+
<hp:pic ...>...</hp:pic>
|
|
94
|
+
<hp:t/>
|
|
95
|
+
</hp:run>
|
|
96
|
+
</hp:p>
|
|
97
|
+
|
|
98
|
+
<!-- WRONG: pic as sibling of run — images will NOT render -->
|
|
99
|
+
<hp:run charPrIDRef="0"><hp:t/></hp:run>
|
|
100
|
+
<hp:pic ...>...</hp:pic>
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### C. Complete `<hp:pic>` Structure (Hancom Canonical Order)
|
|
104
|
+
|
|
105
|
+
Element order matches real Hancom Office output. Every child element listed is **required**. Do NOT include `<hp:lineShape>` (not present in real Hancom files).
|
|
106
|
+
|
|
107
|
+
```xml
|
|
108
|
+
<hp:pic id="{seq_id}" zOrder="{z}" numberingType="PICTURE" textWrap="TOP_AND_BOTTOM"
|
|
109
|
+
textFlow="BOTH_SIDES" lock="0" dropcapstyle="None"
|
|
110
|
+
href="" groupLevel="0" instid="{seq_id}" reverse="0">
|
|
111
|
+
<!-- Group 1: Geometry (Hancom canonical order) -->
|
|
112
|
+
<hp:offset x="0" y="0"/>
|
|
113
|
+
<hp:orgSz width="{W}" height="{H}"/>
|
|
114
|
+
<hp:curSz width="{W}" height="{H}"/>
|
|
115
|
+
<hp:flip horizontal="0" vertical="0"/>
|
|
116
|
+
<hp:rotationInfo angle="0" centerX="{W_half}" centerY="{H_half}" rotateimage="1"/>
|
|
117
|
+
<hp:renderingInfo>
|
|
118
|
+
<hc:transMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/>
|
|
119
|
+
<hc:scaMatrix e1="1" e2="0" e3="0" e4="0" e5="1" e6="0"/>
|
|
120
|
+
<hc:rotMatrix e1="1" e2="-0" e3="0" e4="0" e5="1" e6="0"/>
|
|
121
|
+
</hp:renderingInfo>
|
|
122
|
+
<!-- Group 2: Image data -->
|
|
123
|
+
<hp:imgRect>
|
|
124
|
+
<hc:pt0 x="0" y="0"/>
|
|
125
|
+
<hc:pt1 x="{W}" y="0"/>
|
|
126
|
+
<hc:pt2 x="{W}" y="{H}"/>
|
|
127
|
+
<hc:pt3 x="0" y="{H}"/>
|
|
128
|
+
</hp:imgRect>
|
|
129
|
+
<hp:imgClip left="0" right="{pixW}" top="0" bottom="{pixH}"/>
|
|
130
|
+
<hp:inMargin left="0" right="0" top="0" bottom="0"/>
|
|
131
|
+
<hp:imgDim dimwidth="{pixW}" dimheight="{pixH}"/>
|
|
132
|
+
<hc:img binaryItemIDRef="{manifest_id}" bright="0" contrast="0" effect="REAL_PIC" alpha="0"/>
|
|
133
|
+
<!-- Group 3: Layout (AFTER hc:img in Hancom canonical order) -->
|
|
134
|
+
<hp:sz width="{W}" widthRelTo="ABSOLUTE" height="{H}" heightRelTo="ABSOLUTE" protect="0"/>
|
|
135
|
+
<hp:pos treatAsChar="1" affectLSpacing="0" flowWithText="0" allowOverlap="0"
|
|
136
|
+
holdAnchorAndSO="0" vertRelTo="PARA" horzRelTo="COLUMN"
|
|
137
|
+
vertAlign="TOP" horzAlign="LEFT" vertOffset="0" horzOffset="0"/>
|
|
138
|
+
<hp:outMargin left="0" right="0" top="0" bottom="0"/>
|
|
139
|
+
</hp:pic>
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Variable definitions**:
|
|
143
|
+
- `{W}` / `{H}` — Display size in HWPML units (1/7200 inch). Use defaults from Size Calculations table or analysis.json dimensions.
|
|
144
|
+
- `{W_half}` / `{H_half}` — Half of W/H for rotation center.
|
|
145
|
+
- `{pixW}` / `{pixH}` — Actual pixel dimensions from PIL/Pillow `Image.open(path).size`.
|
|
146
|
+
- `{manifest_id}` — The `id` attribute from the `<opf:item>` in `content.hpf`.
|
|
147
|
+
- `{seq_id}` — Sequential ID: find max existing `id` in section XML + 1.
|
|
148
|
+
- `{z}` — zOrder: find max existing `zOrder` in section XML + 1.
|
|
149
|
+
|
|
150
|
+
### D. Python `build_hwpx_pic_element()` Function
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
import xml.etree.ElementTree as ET
|
|
154
|
+
from PIL import Image
|
|
155
|
+
|
|
156
|
+
HP = "http://www.hancom.co.kr/hwpml/2011/paragraph"
|
|
157
|
+
HC = "http://www.hancom.co.kr/hwpml/2011/core"
|
|
158
|
+
|
|
159
|
+
def build_hwpx_pic_element(
|
|
160
|
+
manifest_id: str,
|
|
161
|
+
image_path: str,
|
|
162
|
+
width_hwpml: int,
|
|
163
|
+
height_hwpml: int,
|
|
164
|
+
seq_id: int,
|
|
165
|
+
z_order: int,
|
|
166
|
+
) -> ET.Element:
|
|
167
|
+
"""Build a complete <hp:pic> element for HWPX image insertion.
|
|
168
|
+
Uses Hancom canonical element order (verified against real Hancom Office output).
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
manifest_id: The id from content.hpf <opf:item> (e.g. "image1")
|
|
172
|
+
image_path: Path to the image file (for reading pixel dimensions)
|
|
173
|
+
width_hwpml: Display width in HWPML units (1/7200 inch)
|
|
174
|
+
height_hwpml: Display height in HWPML units (1/7200 inch)
|
|
175
|
+
seq_id: Sequential element ID (max existing + 1)
|
|
176
|
+
z_order: Z-order value (max existing + 1)
|
|
177
|
+
"""
|
|
178
|
+
with Image.open(image_path) as img:
|
|
179
|
+
pix_w, pix_h = img.size
|
|
180
|
+
|
|
181
|
+
W = str(width_hwpml)
|
|
182
|
+
H = str(height_hwpml)
|
|
183
|
+
W_half = str(width_hwpml // 2)
|
|
184
|
+
H_half = str(height_hwpml // 2)
|
|
185
|
+
pixW = str(pix_w)
|
|
186
|
+
pixH = str(pix_h)
|
|
187
|
+
sid = str(seq_id)
|
|
188
|
+
|
|
189
|
+
pic = ET.Element(f"{{{HP}}}pic", {
|
|
190
|
+
"id": sid, "zOrder": str(z_order), "numberingType": "PICTURE",
|
|
191
|
+
"textWrap": "TOP_AND_BOTTOM", "textFlow": "BOTH_SIDES", "lock": "0",
|
|
192
|
+
"dropcapstyle": "None", "href": "", "groupLevel": "0",
|
|
193
|
+
"instid": sid, "reverse": "0",
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
# Hancom canonical order: offset, orgSz, curSz, flip, rotationInfo,
|
|
197
|
+
# renderingInfo, imgRect, imgClip, inMargin, imgDim, hc:img, sz, pos, outMargin
|
|
198
|
+
ET.SubElement(pic, f"{{{HP}}}offset", x="0", y="0")
|
|
199
|
+
ET.SubElement(pic, f"{{{HP}}}orgSz", width=W, height=H)
|
|
200
|
+
ET.SubElement(pic, f"{{{HP}}}curSz", width=W, height=H)
|
|
201
|
+
ET.SubElement(pic, f"{{{HP}}}flip", horizontal="0", vertical="0")
|
|
202
|
+
ET.SubElement(pic, f"{{{HP}}}rotationInfo", {
|
|
203
|
+
"angle": "0", "centerX": W_half, "centerY": H_half, "rotateimage": "1",
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
ri = ET.SubElement(pic, f"{{{HP}}}renderingInfo")
|
|
207
|
+
ET.SubElement(ri, f"{{{HC}}}transMatrix",
|
|
208
|
+
e1="1", e2="0", e3="0", e4="0", e5="1", e6="0")
|
|
209
|
+
ET.SubElement(ri, f"{{{HC}}}scaMatrix",
|
|
210
|
+
e1="1", e2="0", e3="0", e4="0", e5="1", e6="0")
|
|
211
|
+
ET.SubElement(ri, f"{{{HC}}}rotMatrix",
|
|
212
|
+
e1="1", e2="-0", e3="0", e4="0", e5="1", e6="0")
|
|
213
|
+
|
|
214
|
+
imgRect = ET.SubElement(pic, f"{{{HP}}}imgRect")
|
|
215
|
+
ET.SubElement(imgRect, f"{{{HC}}}pt0", x="0", y="0")
|
|
216
|
+
ET.SubElement(imgRect, f"{{{HC}}}pt1", x=W, y="0")
|
|
217
|
+
ET.SubElement(imgRect, f"{{{HC}}}pt2", x=W, y=H)
|
|
218
|
+
ET.SubElement(imgRect, f"{{{HC}}}pt3", x="0", y=H)
|
|
219
|
+
|
|
220
|
+
ET.SubElement(pic, f"{{{HP}}}imgClip",
|
|
221
|
+
left="0", right=pixW, top="0", bottom=pixH)
|
|
222
|
+
ET.SubElement(pic, f"{{{HP}}}inMargin",
|
|
223
|
+
left="0", right="0", top="0", bottom="0")
|
|
224
|
+
ET.SubElement(pic, f"{{{HP}}}imgDim", dimwidth=pixW, dimheight=pixH)
|
|
225
|
+
ET.SubElement(pic, f"{{{HC}}}img", {
|
|
226
|
+
"binaryItemIDRef": manifest_id, "bright": "0", "contrast": "0",
|
|
227
|
+
"effect": "REAL_PIC", "alpha": "0",
|
|
228
|
+
})
|
|
229
|
+
|
|
230
|
+
# sz, pos, outMargin come AFTER hc:img (Hancom canonical order)
|
|
231
|
+
ET.SubElement(pic, f"{{{HP}}}sz", {
|
|
232
|
+
"width": W, "widthRelTo": "ABSOLUTE",
|
|
233
|
+
"height": H, "heightRelTo": "ABSOLUTE", "protect": "0",
|
|
234
|
+
})
|
|
235
|
+
ET.SubElement(pic, f"{{{HP}}}pos", {
|
|
236
|
+
"treatAsChar": "1", "affectLSpacing": "0", "flowWithText": "0",
|
|
237
|
+
"allowOverlap": "0", "holdAnchorAndSO": "0",
|
|
238
|
+
"vertRelTo": "PARA", "horzRelTo": "COLUMN",
|
|
239
|
+
"vertAlign": "TOP", "horzAlign": "LEFT",
|
|
240
|
+
"vertOffset": "0", "horzOffset": "0",
|
|
241
|
+
})
|
|
242
|
+
ET.SubElement(pic, f"{{{HP}}}outMargin",
|
|
243
|
+
left="0", right="0", top="0", bottom="0")
|
|
244
|
+
|
|
245
|
+
return pic
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### E. Critical Rules for HWPX `<hp:pic>`
|
|
249
|
+
|
|
250
|
+
> **All 9 rules must be followed. Violating any one causes broken image rendering.**
|
|
251
|
+
|
|
252
|
+
| # | Rule | Correct | Wrong |
|
|
253
|
+
|---|------|---------|-------|
|
|
254
|
+
| 1 | **`<img>` uses `hc:` namespace** | `<hc:img binaryItemIDRef="..."/>` | `<hp:img .../>` |
|
|
255
|
+
| 2 | **`<imgRect>` has 4 `<hc:pt>` children** | `<hc:pt0 x="0" y="0"/>` ... `<hc:pt3>` | Inline `x1/y1/x2/y2` attributes |
|
|
256
|
+
| 3 | **All required children present** | `offset`, `orgSz`, `curSz`, `flip`, `rotationInfo`, `renderingInfo`, `inMargin` | Missing any of these |
|
|
257
|
+
| 4 | **No spurious elements** | Do NOT include `hp:lineShape` | `hp:caption`, `hp:shapeComment`, `hp:lineShape` |
|
|
258
|
+
| 5 | **`imgClip` right/bottom = pixel dims** | `right="{pixW}" bottom="{pixH}"` | All zeros |
|
|
259
|
+
| 6 | **Hancom canonical element order** | offset, orgSz, ..., hc:img, **then** sz, pos, outMargin | sz/pos first (pre-2026 incorrect order) |
|
|
260
|
+
| 7 | **Register in `content.hpf` only** | `<opf:item>` in manifest | `<hh:binDataItems>` in header.xml |
|
|
261
|
+
| 8 | **`hp:pos` attributes** | `flowWithText="0"` `horzRelTo="COLUMN"` | `flowWithText="1"` `horzRelTo="PARA"` |
|
|
262
|
+
| 9 | **pic INSIDE run, t AFTER pic** | `<hp:run><hp:pic>...</hp:pic><hp:t/></hp:run>` | `<hp:run><hp:t/></hp:run><hp:pic>` |
|
|
263
|
+
|
|
264
|
+
## Size Calculations
|
|
265
|
+
|
|
266
|
+
### Dimension Conversion
|
|
267
|
+
- **DOCX**: Uses EMUs (English Metric Units)
|
|
268
|
+
- 1 inch = 914,400 EMU
|
|
269
|
+
- 1 mm = 36,000 EMU
|
|
270
|
+
- Formula: `emu = mm * 36000`
|
|
271
|
+
- **HWPX**: Uses HWPML units (1/7200 inch)
|
|
272
|
+
- 1 inch = 7,200 units
|
|
273
|
+
- 1 mm ≈ 283.46 units
|
|
274
|
+
- Formula: `hwpml = mm * 283.46` (or `inches * 7200`)
|
|
275
|
+
- Typical A4 text width: ~46,648 units (~165mm)
|
|
276
|
+
|
|
277
|
+
### Default Dimensions by Image Type (Cell Images)
|
|
278
|
+
| image_type | Width (mm) | Height (mm) | DOCX cx (EMU) | DOCX cy (EMU) | HWPX width | HWPX height |
|
|
279
|
+
|-----------|-----------|------------|--------------|--------------|-----------|------------|
|
|
280
|
+
| photo | 35 | 45 | 1,260,000 | 1,620,000 | 9,922 | 12,757 |
|
|
281
|
+
| logo | 50 | 50 | 1,800,000 | 1,800,000 | 14,173 | 14,173 |
|
|
282
|
+
| signature | 40 | 15 | 1,440,000 | 540,000 | 11,339 | 4,252 |
|
|
283
|
+
| figure (cell) | — | — | — | — | Fit to cell | Fit to cell |
|
|
284
|
+
|
|
285
|
+
**Cell images**: Use `cellSz` width/height minus margins for aspect-ratio-preserving fit.
|
|
286
|
+
|
|
287
|
+
### Default Dimensions for Section Content Images
|
|
288
|
+
| content_type | HWPX width | HWPX height | Approx mm | Note |
|
|
289
|
+
|---|---|---|---|---|
|
|
290
|
+
| diagram | 36,000 | 24,000 | 127x85 | ~77% of page width |
|
|
291
|
+
| flowchart | 36,000 | 24,000 | 127x85 | ~77% of page width |
|
|
292
|
+
| data (charts) | 36,000 | 20,000 | 127x71 | Wide format |
|
|
293
|
+
| concept | 28,000 | 28,000 | 99x99 | Square |
|
|
294
|
+
| infographic | 36,000 | 24,000 | 127x85 | ~77% of page width |
|
|
295
|
+
|
|
296
|
+
### Python Code Snippet for DOCX Drawing Element Construction
|
|
297
|
+
```python
|
|
298
|
+
import xml.etree.ElementTree as ET
|
|
299
|
+
|
|
300
|
+
def build_drawing_element(rel_id: str, width_emu: int, height_emu: int, pic_id: int, filename: str) -> ET.Element:
|
|
301
|
+
"""Build a w:drawing element for image insertion."""
|
|
302
|
+
WP = "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
|
303
|
+
A = "http://schemas.openxmlformats.org/drawingml/2006/main"
|
|
304
|
+
PIC = "http://schemas.openxmlformats.org/drawingml/2006/picture"
|
|
305
|
+
R = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
|
306
|
+
W = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
307
|
+
|
|
308
|
+
drawing = ET.Element(f"{{{W}}}drawing")
|
|
309
|
+
inline = ET.SubElement(drawing, f"{{{WP}}}inline",
|
|
310
|
+
distT="0", distB="0", distL="0", distR="0")
|
|
311
|
+
ET.SubElement(inline, f"{{{WP}}}extent",
|
|
312
|
+
cx=str(width_emu), cy=str(height_emu))
|
|
313
|
+
ET.SubElement(inline, f"{{{WP}}}docPr",
|
|
314
|
+
id=str(pic_id), name=f"Picture {pic_id}")
|
|
315
|
+
|
|
316
|
+
graphic = ET.SubElement(inline, f"{{{A}}}graphic")
|
|
317
|
+
graphicData = ET.SubElement(graphic, f"{{{A}}}graphicData",
|
|
318
|
+
uri="http://schemas.openxmlformats.org/drawingml/2006/picture")
|
|
319
|
+
|
|
320
|
+
pic = ET.SubElement(graphicData, f"{{{PIC}}}pic")
|
|
321
|
+
nvPicPr = ET.SubElement(pic, f"{{{PIC}}}nvPicPr")
|
|
322
|
+
ET.SubElement(nvPicPr, f"{{{PIC}}}cNvPr", id=str(pic_id), name=filename)
|
|
323
|
+
ET.SubElement(nvPicPr, f"{{{PIC}}}cNvPicPr")
|
|
324
|
+
|
|
325
|
+
blipFill = ET.SubElement(pic, f"{{{PIC}}}blipFill")
|
|
326
|
+
ET.SubElement(blipFill, f"{{{A}}}blip", attrib={f"{{{R}}}embed": rel_id})
|
|
327
|
+
stretch = ET.SubElement(blipFill, f"{{{A}}}stretch")
|
|
328
|
+
ET.SubElement(stretch, f"{{{A}}}fillRect")
|
|
329
|
+
|
|
330
|
+
spPr = ET.SubElement(pic, f"{{{PIC}}}spPr")
|
|
331
|
+
xfrm = ET.SubElement(spPr, f"{{{A}}}xfrm")
|
|
332
|
+
ET.SubElement(xfrm, f"{{{A}}}off", x="0", y="0")
|
|
333
|
+
ET.SubElement(xfrm, f"{{{A}}}ext", cx=str(width_emu), cy=str(height_emu))
|
|
334
|
+
prstGeom = ET.SubElement(spPr, f"{{{A}}}prstGeom", prst="rect")
|
|
335
|
+
ET.SubElement(prstGeom, f"{{{A}}}avLst")
|
|
336
|
+
|
|
337
|
+
return drawing
|
|
338
|
+
```
|