hwpkit-dev 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/ .npmignore +4 -1
  2. package/README.md +39 -2
  3. package/dist/index.d.mts +74 -16
  4. package/dist/index.d.ts +70 -16
  5. package/dist/index.js +4985 -698
  6. package/dist/index.js.map +1 -1
  7. package/dist/index.mjs +4981 -698
  8. package/dist/index.mjs.map +1 -1
  9. package/package.json +4 -1
  10. package/playground/index.html +346 -0
  11. package/playground/main.ts +302 -0
  12. package/playground/vite.config.ts +16 -0
  13. package/src/contract/decoder.ts +1 -0
  14. package/src/contract/encoder.ts +6 -1
  15. package/src/core/BaseDecoder.ts +118 -0
  16. package/src/core/BaseEncoder.ts +146 -0
  17. package/src/decoders/docx/DocxDecoder.ts +867 -150
  18. package/src/decoders/html/HtmlDecoder.ts +366 -0
  19. package/src/decoders/hwp/HwpScanner.ts +477 -88
  20. package/src/decoders/hwpx/HwpxDecoder.ts +789 -293
  21. package/src/decoders/md/MdDecoder.ts +4 -4
  22. package/src/encoders/docx/DocxEncoder.ts +600 -295
  23. package/src/encoders/html/HtmlEncoder.ts +203 -0
  24. package/src/encoders/hwp/HwpEncoder.ts +1647 -398
  25. package/src/encoders/hwpx/HwpxEncoder.ts +1512 -444
  26. package/src/encoders/hwpx/constants.ts +148 -0
  27. package/src/encoders/hwpx/utils.ts +198 -0
  28. package/src/encoders/md/MdEncoder.ts +117 -30
  29. package/src/index.ts +1 -0
  30. package/src/model/builders.ts +8 -6
  31. package/src/model/doc-props.ts +19 -5
  32. package/src/model/doc-tree.ts +13 -5
  33. package/src/pipeline/Pipeline.ts +21 -4
  34. package/src/pipeline/registry.ts +13 -2
  35. package/src/safety/StyleBridge.ts +52 -7
  36. package/src/toolkit/ArchiveKit.ts +56 -0
  37. package/src/toolkit/StyleMapper.ts +221 -0
  38. package/src/toolkit/UnitConverter.ts +138 -0
  39. package/src/toolkit/XmlKit.ts +0 -5
  40. package/test-styling.ts +210 -0
@@ -0,0 +1,203 @@
1
+ import type { DocRoot, ParaNode, SpanNode, GridNode, ContentNode, ImgNode, LinkNode } from '../../model/doc-tree';
2
+ import type { Outcome } from '../../contract/result';
3
+ import { succeed, fail } from '../../contract/result';
4
+ import { TextKit } from '../../toolkit/TextKit';
5
+ import { registry } from '../../pipeline/registry';
6
+ import { BaseEncoder } from '../../core/BaseEncoder';
7
+
8
+ export class HtmlEncoder extends BaseEncoder {
9
+ protected getFormat(): string { return 'html'; }
10
+
11
+ async encode(doc: DocRoot): Promise<Outcome<Uint8Array>> {
12
+ try {
13
+ const warns: string[] = [];
14
+ const bodyParts: string[] = [];
15
+
16
+ for (const sheet of doc.kids) {
17
+ // Header/footer as comments
18
+ if (sheet.headers?.default && sheet.headers.default.length > 0) {
19
+ const hText = sheet.headers.default.map((p: ParaNode) => encodePara(p, warns)).join('');
20
+ bodyParts.push(`<div class="hwp-header">${hText}</div>`);
21
+ }
22
+
23
+ for (const kid of sheet.kids) {
24
+ bodyParts.push(encodeContent(kid, warns));
25
+ }
26
+
27
+ if (sheet.footers?.default && sheet.footers.default.length > 0) {
28
+ const fText = sheet.footers.default.map((p: ParaNode) => encodePara(p, warns)).join('');
29
+ bodyParts.push(`<div class="hwp-footer">${fText}</div>`);
30
+ }
31
+ }
32
+
33
+ const title = this.escapeXml(doc.meta?.title ?? '');
34
+ const html = `<!DOCTYPE html>\n<html lang="ko">\n<head>\n<meta charset="UTF-8">\n<meta name="viewport" content="width=device-width, initial-scale=1.0">\n<title>${title}</title>\n<style>\n${BASE_CSS}\n</style>\n</head>\n<body>\n<div class="hwp-doc">\n${bodyParts.join('\n')}\n</div>\n</body>\n</html>`;
35
+
36
+ return succeed(this.stringToBytes(html), warns);
37
+ } catch (e: any) {
38
+ return fail(`HTML encode error: ${e?.message ?? String(e)}`);
39
+ }
40
+ }
41
+ }
42
+
43
+ const BASE_CSS = `
44
+ body { margin: 0; padding: 0; background: #f0f0f0; }
45
+ .hwp-doc { max-width: 800px; margin: 0 auto; background: #fff; padding: 40px 60px; box-shadow: 0 0 8px rgba(0,0,0,0.15); }
46
+ .hwp-header, .hwp-footer { color: #666; font-size: 0.9em; border-bottom: 1px solid #ddd; margin-bottom: 8px; padding-bottom: 4px; }
47
+ .hwp-footer { border-top: 1px solid #ddd; border-bottom: none; margin-top: 8px; padding-top: 4px; }
48
+ p { margin: 0; padding: 0; line-height: 1; }
49
+ table { border-collapse: collapse; width: 100%; margin: 8px 0; }
50
+ td, th { border: 1px solid #ccc; padding: 4px 8px; vertical-align: top; }
51
+ img { max-width: 100%; height: auto; }
52
+ `.trim();
53
+
54
+ function encodeContent(node: ContentNode, warns: string[]): string {
55
+ return node.tag === 'grid' ? encodeGrid(node, warns) : encodePara(node, warns);
56
+ }
57
+
58
+ function encodePara(para: ParaNode, warns: string[]): string {
59
+ const kids = para.kids.map((k): string => {
60
+ if (k.tag === 'span') return encodeSpan(k, warns);
61
+ if (k.tag === 'img') return encodeImage(k);
62
+ if (k.tag === 'link') {
63
+ const link = k as LinkNode;
64
+ const inner = link.kids.map(s => encodeSpan(s, warns)).join('');
65
+ return `<a href="${TextKit.escapeXml(link.href)}">${inner}</a>`;
66
+ }
67
+ return '';
68
+ }).join('');
69
+
70
+ // Heading
71
+ if (para.props.heading) {
72
+ const tag = `h${para.props.heading}`;
73
+ return `<${tag}>${kids}</${tag}>\n`;
74
+ }
75
+
76
+ // List
77
+ if (para.props.listOrd !== undefined) {
78
+ const indent = (para.props.listLv ?? 0) * 20;
79
+ const style = indent > 0 ? ` style="margin-left:${indent}px"` : '';
80
+ const marker = para.props.listOrd ? `<span class="list-marker">1. </span>` : `<span class="list-marker">• </span>`;
81
+ return `<p${style}>${marker}${kids}</p>\n`;
82
+ }
83
+
84
+ // Alignment
85
+ const align = para.props.align;
86
+ const styleAttrs: string[] = [];
87
+ if (align && align !== 'left') styleAttrs.push(`text-align:${align}`);
88
+ if (para.props.indentPt) styleAttrs.push(`margin-left:${para.props.indentPt.toFixed(1)}pt`);
89
+ if (para.props.spaceBefore) styleAttrs.push(`margin-top:${para.props.spaceBefore.toFixed(1)}pt`);
90
+ if (para.props.spaceAfter) styleAttrs.push(`margin-bottom:${para.props.spaceAfter.toFixed(1)}pt`);
91
+ if (para.props.lineHeight) styleAttrs.push(`line-height:${para.props.lineHeight}`);
92
+
93
+ const styleAttr = styleAttrs.length > 0 ? ` style="${styleAttrs.join(';')}"` : '';
94
+ return `<p${styleAttr}>${kids || '&nbsp;'}</p>\n`;
95
+ }
96
+
97
+ function encodeSpan(span: SpanNode, warns: string[]): string {
98
+ const parts: string[] = [];
99
+ let hasPageNum = false;
100
+
101
+ for (const kid of span.kids) {
102
+ if (kid.tag === 'txt') {
103
+ // __EXT_N__ 또는 __EXT_N_W<w>_H<h>__ 자리표시자 제거
104
+ const content = kid.content.replace(/__EXT_\d+(?:_W\d+_H\d+)?__/g, '');
105
+ if (content) parts.push(TextKit.escapeXml(content));
106
+ } else if (kid.tag === 'br') {
107
+ parts.push('<br>');
108
+ } else if (kid.tag === 'pb') {
109
+ parts.push('<div style="page-break-after:always"></div>');
110
+ } else if (kid.tag === 'pagenum') {
111
+ hasPageNum = true;
112
+ warns.push('[SHIELD] HTML: 페이지 번호 — 정적 값으로 대체됨');
113
+ parts.push('<span class="page-num">[페이지]</span>');
114
+ }
115
+ }
116
+
117
+ let text = parts.join('');
118
+ if (hasPageNum && text.trim() === '<span class="page-num">[페이지]</span>') {
119
+ // keep as-is
120
+ }
121
+
122
+ const p = span.props;
123
+ const css: string[] = [];
124
+ if (p.font) css.push(`font-family:${TextKit.escapeXml(p.font)}`);
125
+ if (p.pt) css.push(`font-size:${p.pt}pt`);
126
+ if (p.color) css.push(`color:#${p.color}`);
127
+ if (p.bg) css.push(`background-color:#${p.bg}`);
128
+ if (p.b) css.push('font-weight:bold');
129
+ if (p.i) css.push('font-style:italic');
130
+
131
+ const decorations: string[] = [];
132
+ if (p.u) decorations.push('underline');
133
+ if (p.s) decorations.push('line-through');
134
+ if (decorations.length > 0) css.push(`text-decoration:${decorations.join(' ')}`);
135
+
136
+ if (p.sup) return `<sup${css.length ? ` style="${css.join(';')}"` : ''}>${text}</sup>`;
137
+ if (p.sub) return `<sub${css.length ? ` style="${css.join(';')}"` : ''}>${text}</sub>`;
138
+ if (css.length > 0) return `<span style="${css.join(';')}">${text}</span>`;
139
+ return text;
140
+ }
141
+
142
+ function encodeImage(img: ImgNode): string {
143
+ const wStyle = img.w ? ` width="${Math.round(img.w / 72 * 96)}px"` : '';
144
+ const hStyle = img.h ? ` height="${Math.round(img.h / 72 * 96)}px"` : '';
145
+ const alt = TextKit.escapeXml(img.alt ?? '');
146
+ return `<img src="data:${img.mime};base64,${img.b64}" alt="${alt}"${wStyle}${hStyle}>`;
147
+ }
148
+
149
+ function encodeGrid(grid: GridNode, warns: string[]): string {
150
+ if (grid.kids.length === 0) return '';
151
+
152
+ // Build occupancy map for rowspan
153
+ const rowCount = grid.kids.length;
154
+ const occupancy: Set<number>[] = Array.from({ length: rowCount }, () => new Set());
155
+ let colCount = 0;
156
+ for (let ri = 0; ri < rowCount; ri++) {
157
+ const row = grid.kids[ri];
158
+ let ci = 0;
159
+ for (const cell of row.kids) {
160
+ while (occupancy[ri].has(ci)) ci++;
161
+ if (cell.rs > 1) {
162
+ for (let r = ri + 1; r < ri + cell.rs && r < rowCount; r++) {
163
+ for (let c = ci; c < ci + cell.cs; c++) occupancy[r].add(c);
164
+ }
165
+ }
166
+ ci += cell.cs;
167
+ }
168
+ while (occupancy[ri].has(ci)) ci++;
169
+ if (ci > colCount) colCount = ci;
170
+ }
171
+
172
+ let rows = '';
173
+ for (let ri = 0; ri < rowCount; ri++) {
174
+ const row = grid.kids[ri];
175
+ let cells = '';
176
+ let ci = 0;
177
+ for (const cell of row.kids) {
178
+ while (occupancy[ri].has(ci)) ci++;
179
+
180
+ const isHeader = cell.props.isHeader || (grid.props.headerRow && ri === 0);
181
+ const tag = isHeader ? 'th' : 'td';
182
+
183
+ const cs = cell.cs > 1 ? ` colspan="${cell.cs}"` : '';
184
+ const rs = cell.rs > 1 ? ` rowspan="${cell.rs}"` : '';
185
+
186
+ const styleAttrs: string[] = [];
187
+ if (cell.props.bg) styleAttrs.push(`background-color:#${cell.props.bg}`);
188
+ const va = cell.props.va;
189
+ if (va === 'mid') styleAttrs.push('vertical-align:middle');
190
+ else if (va === 'bot') styleAttrs.push('vertical-align:bottom');
191
+ const styleAttr = styleAttrs.length > 0 ? ` style="${styleAttrs.join(';')}"` : '';
192
+
193
+ const content = cell.kids.map(p => p.tag === 'para' ? encodePara(p, warns) : encodeGrid(p, warns)).join('');
194
+ cells += `<${tag}${cs}${rs}${styleAttr}>${content}</${tag}>`;
195
+ ci += cell.cs;
196
+ }
197
+ rows += `<tr>${cells}</tr>\n`;
198
+ }
199
+
200
+ return `<table>\n<tbody>\n${rows}</tbody>\n</table>\n`;
201
+ }
202
+
203
+ registry.registerEncoder(new HtmlEncoder());