hwp-convert 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +185 -0
- package/LICENSE +25 -0
- package/NOTICE +23 -0
- package/README.md +338 -0
- package/dist/browser/hwp-convert.browser.mjs +20677 -0
- package/dist/browser/hwp-convert.browser.mjs.map +7 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +267 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/lib/errors.d.ts +9 -0
- package/dist/lib/errors.js +18 -0
- package/dist/lib/hwp/binData.d.ts +15 -0
- package/dist/lib/hwp/binData.js +64 -0
- package/dist/lib/hwp/bodyText.d.ts +31 -0
- package/dist/lib/hwp/bodyText.js +208 -0
- package/dist/lib/hwp/byteReader.d.ts +40 -0
- package/dist/lib/hwp/byteReader.js +116 -0
- package/dist/lib/hwp/cfbReader.d.ts +44 -0
- package/dist/lib/hwp/cfbReader.js +134 -0
- package/dist/lib/hwp/control.d.ts +17 -0
- package/dist/lib/hwp/control.js +290 -0
- package/dist/lib/hwp/converter.d.ts +22 -0
- package/dist/lib/hwp/converter.js +41 -0
- package/dist/lib/hwp/docInfo.d.ts +26 -0
- package/dist/lib/hwp/docInfo.js +396 -0
- package/dist/lib/hwp/fileHeader.d.ts +42 -0
- package/dist/lib/hwp/fileHeader.js +66 -0
- package/dist/lib/hwp/htmlReader.d.ts +17 -0
- package/dist/lib/hwp/htmlReader.js +602 -0
- package/dist/lib/hwp/hwpxBuilder.d.ts +19 -0
- package/dist/lib/hwp/hwpxBuilder.js +633 -0
- package/dist/lib/hwp/index.d.ts +68 -0
- package/dist/lib/hwp/index.js +149 -0
- package/dist/lib/hwp/mdReader.d.ts +16 -0
- package/dist/lib/hwp/mdReader.js +485 -0
- package/dist/lib/hwp/mdWriter.d.ts +23 -0
- package/dist/lib/hwp/mdWriter.js +182 -0
- package/dist/lib/hwp/owpml.d.ts +33 -0
- package/dist/lib/hwp/owpml.js +86 -0
- package/dist/lib/hwp/record.d.ts +24 -0
- package/dist/lib/hwp/record.js +59 -0
- package/dist/lib/hwp/tags.d.ts +115 -0
- package/dist/lib/hwp/tags.js +217 -0
- package/dist/lib/hwp/types.d.ts +214 -0
- package/dist/lib/hwp/types.js +5 -0
- package/dist/lib/hwpxReader.d.ts +60 -0
- package/dist/lib/hwpxReader.js +1104 -0
- package/dist/lib/types.d.ts +47 -0
- package/dist/lib/types.js +1 -0
- package/dist/lib/writer.d.ts +19 -0
- package/dist/lib/writer.js +149 -0
- package/package.json +94 -0
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HTML → HwpDocument IR.
|
|
3
|
+
*
|
|
4
|
+
* `htmlparser2` 로 SAX 파싱 → 트리 구축 → IR 변환.
|
|
5
|
+
* - p, div, h1~h6 → paragraph (heading 은 굵게 + 큰 사이즈)
|
|
6
|
+
* - strong/b, em/i → 굵게/기울임 run
|
|
7
|
+
* - br → 줄바꿈
|
|
8
|
+
* - ul/ol/li → "- " / "1. " prefix paragraph
|
|
9
|
+
* - table/thead/tbody/tr/th/td → HwpTableControl
|
|
10
|
+
* - img → HwpPictureControl (src 가 data: URI 일 때만)
|
|
11
|
+
* - blockquote → "> " prefix paragraph
|
|
12
|
+
* - code/pre → 모노스페이스
|
|
13
|
+
* - a → 텍스트만 (URL 미보존)
|
|
14
|
+
* - 기타 (style/script/head 등) → 무시
|
|
15
|
+
*/
|
|
16
|
+
import { Parser } from "htmlparser2";
|
|
17
|
+
function parseToTree(html) {
|
|
18
|
+
const root = { tag: "#root", attrs: {}, children: [], parent: null };
|
|
19
|
+
let current = root;
|
|
20
|
+
const voidTags = new Set([
|
|
21
|
+
"br", "img", "hr", "input", "meta", "link", "source", "track", "wbr", "col", "area", "base", "embed",
|
|
22
|
+
]);
|
|
23
|
+
const skipTags = new Set(["script", "style", "head", "noscript", "template"]);
|
|
24
|
+
let inSkippedTag = 0;
|
|
25
|
+
const parser = new Parser({
|
|
26
|
+
onopentag(name, attrs) {
|
|
27
|
+
if (skipTags.has(name)) {
|
|
28
|
+
inSkippedTag++;
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
if (inSkippedTag > 0)
|
|
32
|
+
return;
|
|
33
|
+
const node = { tag: name, attrs, children: [], parent: current };
|
|
34
|
+
current.children.push(node);
|
|
35
|
+
if (!voidTags.has(name))
|
|
36
|
+
current = node;
|
|
37
|
+
},
|
|
38
|
+
ontext(text) {
|
|
39
|
+
if (inSkippedTag > 0)
|
|
40
|
+
return;
|
|
41
|
+
current.children.push(text);
|
|
42
|
+
},
|
|
43
|
+
onclosetag(name) {
|
|
44
|
+
if (skipTags.has(name)) {
|
|
45
|
+
inSkippedTag = Math.max(0, inSkippedTag - 1);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
if (inSkippedTag > 0)
|
|
49
|
+
return;
|
|
50
|
+
if (voidTags.has(name))
|
|
51
|
+
return;
|
|
52
|
+
if (current.tag === name && current.parent) {
|
|
53
|
+
current = current.parent;
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
}, { decodeEntities: true, lowerCaseTags: true });
|
|
57
|
+
parser.write(html);
|
|
58
|
+
parser.end();
|
|
59
|
+
return root;
|
|
60
|
+
}
|
|
61
|
+
export function htmlToHwpDocument(html, options) {
|
|
62
|
+
const tree = parseToTree(html);
|
|
63
|
+
const ctx = {
|
|
64
|
+
charShapeIds: new Map(),
|
|
65
|
+
binData: new Map(),
|
|
66
|
+
nextBinDataId: 1,
|
|
67
|
+
imageResolver: options?.imageResolver,
|
|
68
|
+
};
|
|
69
|
+
const charShapes = [defaultCharShape()];
|
|
70
|
+
ctx.charShapeIds.set("default", 0);
|
|
71
|
+
const ids = {
|
|
72
|
+
idDefault: 0,
|
|
73
|
+
idBold: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true }),
|
|
74
|
+
idItalic: registerCharShape(charShapes, ctx, { ...defaultCharShape(), italic: true }),
|
|
75
|
+
idBoldItalic: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, italic: true }),
|
|
76
|
+
idMono: registerCharShape(charShapes, ctx, {
|
|
77
|
+
...defaultCharShape(),
|
|
78
|
+
faceNameIds: { hangul: 2, latin: 2, hanja: 2, japanese: 2, other: 2, symbol: 2, user: 2 },
|
|
79
|
+
}),
|
|
80
|
+
idH1: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1800 }),
|
|
81
|
+
idH2: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1600 }),
|
|
82
|
+
idH3: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1400 }),
|
|
83
|
+
idHmin: registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1200 }),
|
|
84
|
+
};
|
|
85
|
+
const paragraphs = [];
|
|
86
|
+
const initialState = { bold: false, italic: false, mono: false };
|
|
87
|
+
for (const child of tree.children) {
|
|
88
|
+
paragraphs.push(...renderNode(child, ids, ctx, initialState, ""));
|
|
89
|
+
}
|
|
90
|
+
// 빈 paragraph 제거
|
|
91
|
+
const filtered = paragraphs.filter((p) => p.text.trim().length > 0 || p.controls.length > 0);
|
|
92
|
+
return {
|
|
93
|
+
header: defaultFileHeader(),
|
|
94
|
+
docInfo: {
|
|
95
|
+
fontFaces: [
|
|
96
|
+
[{ name: "함초롬바탕" }, { name: "맑은 고딕" }, { name: "Courier New" }],
|
|
97
|
+
[{ name: "Times New Roman" }],
|
|
98
|
+
[],
|
|
99
|
+
[],
|
|
100
|
+
[],
|
|
101
|
+
[],
|
|
102
|
+
[],
|
|
103
|
+
],
|
|
104
|
+
charShapes,
|
|
105
|
+
paraShapes: [defaultParaShape()],
|
|
106
|
+
styles: [{ name: "바탕글", engName: "Normal", paraShapeId: 0, charShapeId: 0 }],
|
|
107
|
+
binData: [],
|
|
108
|
+
borderFills: [],
|
|
109
|
+
numberings: [],
|
|
110
|
+
bullets: [],
|
|
111
|
+
tabDefs: [],
|
|
112
|
+
},
|
|
113
|
+
sections: [{ paragraphs: filtered }],
|
|
114
|
+
binData: ctx.binData,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function renderNode(node, ids, ctx, state, prefix) {
|
|
118
|
+
if (typeof node === "string") {
|
|
119
|
+
const text = collapseWhitespace(node);
|
|
120
|
+
if (!text)
|
|
121
|
+
return [];
|
|
122
|
+
return [
|
|
123
|
+
{
|
|
124
|
+
paraShapeId: 0,
|
|
125
|
+
styleId: 0,
|
|
126
|
+
text: prefix + text,
|
|
127
|
+
runs: prefix.length > 0
|
|
128
|
+
? [
|
|
129
|
+
{ charShapeId: ids.idDefault, text: prefix },
|
|
130
|
+
{ charShapeId: pickInlineId(ids, state), text },
|
|
131
|
+
]
|
|
132
|
+
: [{ charShapeId: pickInlineId(ids, state), text }],
|
|
133
|
+
controls: [],
|
|
134
|
+
},
|
|
135
|
+
];
|
|
136
|
+
}
|
|
137
|
+
const tag = node.tag.toLowerCase();
|
|
138
|
+
// 블록 레벨 태그 처리
|
|
139
|
+
switch (tag) {
|
|
140
|
+
case "p":
|
|
141
|
+
case "div":
|
|
142
|
+
case "section":
|
|
143
|
+
case "article": {
|
|
144
|
+
const runs = collectInlineRuns(node, ids, ctx, state);
|
|
145
|
+
const text = runsToText(runs);
|
|
146
|
+
const controls = collectInlineControls(node, ctx);
|
|
147
|
+
if (text.length === 0 && controls.length === 0)
|
|
148
|
+
return [];
|
|
149
|
+
return [
|
|
150
|
+
{
|
|
151
|
+
paraShapeId: 0,
|
|
152
|
+
styleId: 0,
|
|
153
|
+
text: prefix + text,
|
|
154
|
+
runs: prefix.length > 0
|
|
155
|
+
? [{ charShapeId: ids.idDefault, text: prefix }, ...runs]
|
|
156
|
+
: runs,
|
|
157
|
+
controls,
|
|
158
|
+
},
|
|
159
|
+
];
|
|
160
|
+
}
|
|
161
|
+
case "h1":
|
|
162
|
+
case "h2":
|
|
163
|
+
case "h3":
|
|
164
|
+
case "h4":
|
|
165
|
+
case "h5":
|
|
166
|
+
case "h6": {
|
|
167
|
+
const depth = Number(tag[1]);
|
|
168
|
+
const baseShapeId = depth === 1 ? ids.idH1 : depth === 2 ? ids.idH2 : depth === 3 ? ids.idH3 : ids.idHmin;
|
|
169
|
+
const runs = collectInlineRuns(node, ids, ctx, state, baseShapeId);
|
|
170
|
+
const text = runsToText(runs);
|
|
171
|
+
if (!text)
|
|
172
|
+
return [];
|
|
173
|
+
return [
|
|
174
|
+
{
|
|
175
|
+
paraShapeId: 0,
|
|
176
|
+
styleId: 0,
|
|
177
|
+
text,
|
|
178
|
+
runs,
|
|
179
|
+
controls: [],
|
|
180
|
+
},
|
|
181
|
+
];
|
|
182
|
+
}
|
|
183
|
+
case "ul":
|
|
184
|
+
case "ol": {
|
|
185
|
+
const out = [];
|
|
186
|
+
let idx = 1;
|
|
187
|
+
for (const child of node.children) {
|
|
188
|
+
if (typeof child === "string")
|
|
189
|
+
continue;
|
|
190
|
+
if (child.tag !== "li")
|
|
191
|
+
continue;
|
|
192
|
+
const liPrefix = tag === "ul" ? "- " : `${idx}. `;
|
|
193
|
+
const inner = renderNodeChildren(child, ids, ctx, state, liPrefix);
|
|
194
|
+
if (inner.length === 0) {
|
|
195
|
+
out.push({
|
|
196
|
+
paraShapeId: 0,
|
|
197
|
+
styleId: 0,
|
|
198
|
+
text: liPrefix,
|
|
199
|
+
runs: [{ charShapeId: ids.idDefault, text: liPrefix }],
|
|
200
|
+
controls: [],
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
out.push(...inner);
|
|
205
|
+
}
|
|
206
|
+
idx++;
|
|
207
|
+
}
|
|
208
|
+
return out;
|
|
209
|
+
}
|
|
210
|
+
case "blockquote": {
|
|
211
|
+
const inner = renderNodeChildren(node, ids, ctx, state, "");
|
|
212
|
+
return inner.map((p) => {
|
|
213
|
+
const text = `> ${p.text}`;
|
|
214
|
+
const runs = [
|
|
215
|
+
{ charShapeId: p.runs[0]?.charShapeId ?? ids.idDefault, text: "> " },
|
|
216
|
+
...p.runs,
|
|
217
|
+
];
|
|
218
|
+
return { ...p, text, runs };
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
case "table": {
|
|
222
|
+
return [collectTableParagraph(node, ids, ctx)];
|
|
223
|
+
}
|
|
224
|
+
case "br":
|
|
225
|
+
return [
|
|
226
|
+
{
|
|
227
|
+
paraShapeId: 0,
|
|
228
|
+
styleId: 0,
|
|
229
|
+
text: "",
|
|
230
|
+
runs: [],
|
|
231
|
+
controls: [],
|
|
232
|
+
},
|
|
233
|
+
];
|
|
234
|
+
case "hr":
|
|
235
|
+
return [
|
|
236
|
+
{
|
|
237
|
+
paraShapeId: 0,
|
|
238
|
+
styleId: 0,
|
|
239
|
+
text: "─────",
|
|
240
|
+
runs: [{ charShapeId: ids.idDefault, text: "─────" }],
|
|
241
|
+
controls: [],
|
|
242
|
+
},
|
|
243
|
+
];
|
|
244
|
+
case "pre": {
|
|
245
|
+
// pre 안의 텍스트는 모노스페이스로 보존 (개행 유지)
|
|
246
|
+
const monoState = { ...state, mono: true };
|
|
247
|
+
const text = extractPreText(node);
|
|
248
|
+
const lines = text.split("\n");
|
|
249
|
+
return lines.map((line) => ({
|
|
250
|
+
paraShapeId: 0,
|
|
251
|
+
styleId: 0,
|
|
252
|
+
text: line,
|
|
253
|
+
runs: line.length > 0 ? [{ charShapeId: pickInlineId(ids, monoState), text: line }] : [],
|
|
254
|
+
controls: [],
|
|
255
|
+
}));
|
|
256
|
+
}
|
|
257
|
+
case "img": {
|
|
258
|
+
const ctrl = imageNodeToControl(node, ctx);
|
|
259
|
+
if (!ctrl)
|
|
260
|
+
return [];
|
|
261
|
+
return [
|
|
262
|
+
{
|
|
263
|
+
paraShapeId: 0,
|
|
264
|
+
styleId: 0,
|
|
265
|
+
text: "",
|
|
266
|
+
runs: [],
|
|
267
|
+
controls: [ctrl],
|
|
268
|
+
},
|
|
269
|
+
];
|
|
270
|
+
}
|
|
271
|
+
case "html":
|
|
272
|
+
case "body":
|
|
273
|
+
case "main":
|
|
274
|
+
case "header":
|
|
275
|
+
case "footer":
|
|
276
|
+
case "nav":
|
|
277
|
+
case "aside":
|
|
278
|
+
case "figure":
|
|
279
|
+
case "figcaption":
|
|
280
|
+
return renderNodeChildren(node, ids, ctx, state, prefix);
|
|
281
|
+
default:
|
|
282
|
+
// 인라인 컨테이너로 처리 (span/strong/em/code/a 등)
|
|
283
|
+
// 단, blockquote/list 등은 위에서 처리됨
|
|
284
|
+
return renderNodeChildren(node, ids, ctx, state, prefix);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
function renderNodeChildren(node, ids, ctx, state, prefix) {
|
|
288
|
+
// 자식이 모두 인라인이면 단일 paragraph 로 합치기
|
|
289
|
+
const allInline = node.children.every((c) => typeof c === "string" || isInlineTag(c.tag));
|
|
290
|
+
if (allInline) {
|
|
291
|
+
const runs = collectInlineRuns(node, ids, ctx, state);
|
|
292
|
+
const text = runsToText(runs);
|
|
293
|
+
const controls = collectInlineControls(node, ctx);
|
|
294
|
+
if (!text && controls.length === 0)
|
|
295
|
+
return [];
|
|
296
|
+
return [
|
|
297
|
+
{
|
|
298
|
+
paraShapeId: 0,
|
|
299
|
+
styleId: 0,
|
|
300
|
+
text: prefix + text,
|
|
301
|
+
runs: prefix.length > 0
|
|
302
|
+
? [{ charShapeId: ids.idDefault, text: prefix }, ...runs]
|
|
303
|
+
: runs,
|
|
304
|
+
controls,
|
|
305
|
+
},
|
|
306
|
+
];
|
|
307
|
+
}
|
|
308
|
+
// 블록 자식이 섞여있으면 각각 별도 paragraph 로
|
|
309
|
+
const out = [];
|
|
310
|
+
let blockPrefix = prefix;
|
|
311
|
+
for (const child of node.children) {
|
|
312
|
+
out.push(...renderNode(child, ids, ctx, state, blockPrefix));
|
|
313
|
+
blockPrefix = ""; // prefix 는 첫 paragraph 에만 적용
|
|
314
|
+
}
|
|
315
|
+
return out;
|
|
316
|
+
}
|
|
317
|
+
function isInlineTag(tag) {
|
|
318
|
+
return [
|
|
319
|
+
"a", "abbr", "b", "bdi", "bdo", "br", "cite", "code", "data", "dfn", "em", "i", "kbd",
|
|
320
|
+
"mark", "q", "s", "samp", "small", "span", "strong", "sub", "sup", "time", "u", "var",
|
|
321
|
+
"wbr", "del", "ins", "img",
|
|
322
|
+
].includes(tag);
|
|
323
|
+
}
|
|
324
|
+
function collectInlineRuns(node, ids, ctx, state, baseId) {
|
|
325
|
+
const runs = [];
|
|
326
|
+
walkInline(node, ids, ctx, state, runs, baseId ?? null);
|
|
327
|
+
return mergeRuns(runs);
|
|
328
|
+
}
|
|
329
|
+
function walkInline(node, ids, ctx, state, runs, baseId) {
|
|
330
|
+
if (typeof node === "string") {
|
|
331
|
+
const text = collapseWhitespace(node);
|
|
332
|
+
if (text.length === 0)
|
|
333
|
+
return;
|
|
334
|
+
runs.push({
|
|
335
|
+
charShapeId: baseId !== null ? baseId : pickInlineId(ids, state),
|
|
336
|
+
text,
|
|
337
|
+
});
|
|
338
|
+
return;
|
|
339
|
+
}
|
|
340
|
+
const tag = node.tag.toLowerCase();
|
|
341
|
+
if (tag === "img") {
|
|
342
|
+
// 이미지는 별도 컨트롤. 인라인에서는 alt 만 노출.
|
|
343
|
+
const alt = node.attrs.alt;
|
|
344
|
+
if (alt) {
|
|
345
|
+
runs.push({ charShapeId: pickInlineId(ids, state), text: alt });
|
|
346
|
+
}
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
if (tag === "br") {
|
|
350
|
+
runs.push({ charShapeId: pickInlineId(ids, state), text: "\n" });
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
let nextState = state;
|
|
354
|
+
if (tag === "strong" || tag === "b")
|
|
355
|
+
nextState = { ...nextState, bold: true };
|
|
356
|
+
if (tag === "em" || tag === "i")
|
|
357
|
+
nextState = { ...nextState, italic: true };
|
|
358
|
+
if (tag === "code" || tag === "samp" || tag === "kbd")
|
|
359
|
+
nextState = { ...nextState, mono: true };
|
|
360
|
+
for (const child of node.children) {
|
|
361
|
+
walkInline(child, ids, ctx, nextState, runs, baseId);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
function collectInlineControls(node, ctx) {
|
|
365
|
+
const out = [];
|
|
366
|
+
const visit = (n) => {
|
|
367
|
+
if (typeof n === "string")
|
|
368
|
+
return;
|
|
369
|
+
if (n.tag === "img") {
|
|
370
|
+
const ctrl = imageNodeToControl(n, ctx);
|
|
371
|
+
if (ctrl)
|
|
372
|
+
out.push(ctrl);
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
for (const c of n.children)
|
|
376
|
+
visit(c);
|
|
377
|
+
};
|
|
378
|
+
for (const c of node.children)
|
|
379
|
+
visit(c);
|
|
380
|
+
return out;
|
|
381
|
+
}
|
|
382
|
+
function collectTableParagraph(table, ids, ctx) {
|
|
383
|
+
// <tr> 수집 (thead/tbody/tfoot 평탄화)
|
|
384
|
+
const trs = [];
|
|
385
|
+
const collectTrs = (n) => {
|
|
386
|
+
for (const c of n.children) {
|
|
387
|
+
if (typeof c === "string")
|
|
388
|
+
continue;
|
|
389
|
+
if (c.tag === "tr")
|
|
390
|
+
trs.push(c);
|
|
391
|
+
else if (c.tag === "thead" || c.tag === "tbody" || c.tag === "tfoot")
|
|
392
|
+
collectTrs(c);
|
|
393
|
+
}
|
|
394
|
+
};
|
|
395
|
+
collectTrs(table);
|
|
396
|
+
// rowspan/colspan 점유 그리드로 실제 셀 좌표(colAddr/rowAddr)를 계산한다.
|
|
397
|
+
// 단순히 셀마다 col 을 1 증가시키면 병합 셀이 점유한 칸을 무시해 좌표가 어긋난다.
|
|
398
|
+
let maxCols = 0;
|
|
399
|
+
const occupied = new Set();
|
|
400
|
+
const tcs = [];
|
|
401
|
+
for (let r = 0; r < trs.length; r++) {
|
|
402
|
+
let col = 0;
|
|
403
|
+
for (const c of trs[r].children) {
|
|
404
|
+
if (typeof c === "string")
|
|
405
|
+
continue;
|
|
406
|
+
if (c.tag !== "td" && c.tag !== "th")
|
|
407
|
+
continue;
|
|
408
|
+
// 위쪽 행의 rowspan 이나 같은 행 colspan 이 점유한 칸은 건너뛴다.
|
|
409
|
+
while (occupied.has(`${r},${col}`))
|
|
410
|
+
col++;
|
|
411
|
+
const colSpan = Math.max(1, Number(c.attrs.colspan ?? "1") || 1);
|
|
412
|
+
const rowSpan = Math.max(1, Number(c.attrs.rowspan ?? "1") || 1);
|
|
413
|
+
tcs.push({ row: r, col, isHeader: c.tag === "th", node: c, colSpan, rowSpan });
|
|
414
|
+
for (let dr = 0; dr < rowSpan; dr++) {
|
|
415
|
+
for (let dc = 0; dc < colSpan; dc++) {
|
|
416
|
+
occupied.add(`${r + dr},${col + dc}`);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
col += colSpan;
|
|
420
|
+
if (col > maxCols)
|
|
421
|
+
maxCols = col;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
const cells = tcs.map(({ row, col, isHeader, node, colSpan, rowSpan }) => {
|
|
425
|
+
const baseId = isHeader ? ids.idBold : ids.idDefault;
|
|
426
|
+
const runs = collectInlineRuns(node, ids, ctx, { bold: isHeader, italic: false, mono: false }, baseId);
|
|
427
|
+
return {
|
|
428
|
+
col,
|
|
429
|
+
row,
|
|
430
|
+
colSpan,
|
|
431
|
+
rowSpan,
|
|
432
|
+
paragraphs: [
|
|
433
|
+
{
|
|
434
|
+
paraShapeId: 0,
|
|
435
|
+
styleId: 0,
|
|
436
|
+
text: runsToText(runs),
|
|
437
|
+
runs,
|
|
438
|
+
controls: [],
|
|
439
|
+
},
|
|
440
|
+
],
|
|
441
|
+
};
|
|
442
|
+
});
|
|
443
|
+
return {
|
|
444
|
+
paraShapeId: 0,
|
|
445
|
+
styleId: 0,
|
|
446
|
+
text: "",
|
|
447
|
+
runs: [],
|
|
448
|
+
controls: [{ kind: "table", rowCount: trs.length, colCount: maxCols, cells }],
|
|
449
|
+
};
|
|
450
|
+
}
|
|
451
|
+
function imageNodeToControl(node, ctx) {
|
|
452
|
+
const src = node.attrs.src ?? "";
|
|
453
|
+
const match = /^data:([^;]+);base64,(.*)$/i.exec(src);
|
|
454
|
+
if (!match) {
|
|
455
|
+
// data URI 가 아니면 resolver(주입 시)로 file://·로컬 경로 해석. 없으면 skip.
|
|
456
|
+
const resolved = src ? ctx.imageResolver?.(src) : null;
|
|
457
|
+
if (resolved && resolved.data.length > 0) {
|
|
458
|
+
const id = ctx.nextBinDataId++;
|
|
459
|
+
ctx.binData.set(id, { data: resolved.data, extension: resolved.extension.toLowerCase() });
|
|
460
|
+
return { kind: "picture", binDataId: id };
|
|
461
|
+
}
|
|
462
|
+
return null;
|
|
463
|
+
}
|
|
464
|
+
const mime = match[1].toLowerCase();
|
|
465
|
+
const ext = mime === "image/png"
|
|
466
|
+
? "png"
|
|
467
|
+
: mime === "image/jpeg"
|
|
468
|
+
? "jpg"
|
|
469
|
+
: mime === "image/gif"
|
|
470
|
+
? "gif"
|
|
471
|
+
: mime === "image/bmp"
|
|
472
|
+
? "bmp"
|
|
473
|
+
: "bin";
|
|
474
|
+
let bytes;
|
|
475
|
+
try {
|
|
476
|
+
if (typeof Buffer !== "undefined") {
|
|
477
|
+
bytes = new Uint8Array(Buffer.from(match[2], "base64"));
|
|
478
|
+
}
|
|
479
|
+
else {
|
|
480
|
+
const bin = globalThis.atob?.(match[2]) ?? "";
|
|
481
|
+
bytes = new Uint8Array(bin.length);
|
|
482
|
+
for (let i = 0; i < bin.length; i++)
|
|
483
|
+
bytes[i] = bin.charCodeAt(i);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
catch {
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
const id = ctx.nextBinDataId++;
|
|
490
|
+
ctx.binData.set(id, { data: bytes, extension: ext });
|
|
491
|
+
return { kind: "picture", binDataId: id };
|
|
492
|
+
}
|
|
493
|
+
function extractPreText(node) {
|
|
494
|
+
let out = "";
|
|
495
|
+
const visit = (n) => {
|
|
496
|
+
if (typeof n === "string") {
|
|
497
|
+
out += n;
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
if (n.tag === "br") {
|
|
501
|
+
out += "\n";
|
|
502
|
+
return;
|
|
503
|
+
}
|
|
504
|
+
for (const c of n.children)
|
|
505
|
+
visit(c);
|
|
506
|
+
};
|
|
507
|
+
for (const c of node.children)
|
|
508
|
+
visit(c);
|
|
509
|
+
return out;
|
|
510
|
+
}
|
|
511
|
+
function pickInlineId(ids, state) {
|
|
512
|
+
if (state.mono)
|
|
513
|
+
return ids.idMono;
|
|
514
|
+
if (state.bold && state.italic)
|
|
515
|
+
return ids.idBoldItalic;
|
|
516
|
+
if (state.bold)
|
|
517
|
+
return ids.idBold;
|
|
518
|
+
if (state.italic)
|
|
519
|
+
return ids.idItalic;
|
|
520
|
+
return ids.idDefault;
|
|
521
|
+
}
|
|
522
|
+
function mergeRuns(runs) {
|
|
523
|
+
const out = [];
|
|
524
|
+
for (const r of runs) {
|
|
525
|
+
if (r.text.length === 0)
|
|
526
|
+
continue;
|
|
527
|
+
const last = out[out.length - 1];
|
|
528
|
+
if (last && last.charShapeId === r.charShapeId) {
|
|
529
|
+
last.text += r.text;
|
|
530
|
+
}
|
|
531
|
+
else {
|
|
532
|
+
out.push({ ...r });
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
return out;
|
|
536
|
+
}
|
|
537
|
+
function runsToText(runs) {
|
|
538
|
+
return runs.map((r) => r.text).join("");
|
|
539
|
+
}
|
|
540
|
+
function collapseWhitespace(s) {
|
|
541
|
+
// HTML 텍스트 노드의 연속 공백을 단일 공백으로
|
|
542
|
+
return s.replace(/[\s ]+/g, " ");
|
|
543
|
+
}
|
|
544
|
+
// ============================================================
|
|
545
|
+
// 기본 IR
|
|
546
|
+
// ============================================================
|
|
547
|
+
function defaultCharShape() {
|
|
548
|
+
return {
|
|
549
|
+
faceNameIds: { hangul: 0, latin: 1, hanja: 0, japanese: 0, other: 0, symbol: 0, user: 0 },
|
|
550
|
+
baseSize: 1000,
|
|
551
|
+
property: 0,
|
|
552
|
+
textColor: 0,
|
|
553
|
+
shadeColor: 0xffffff,
|
|
554
|
+
underlineColor: 0,
|
|
555
|
+
shadowColor: 0,
|
|
556
|
+
bold: false,
|
|
557
|
+
italic: false,
|
|
558
|
+
underline: false,
|
|
559
|
+
strikeout: false,
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
function defaultParaShape() {
|
|
563
|
+
return {
|
|
564
|
+
alignment: "justify",
|
|
565
|
+
property: 0,
|
|
566
|
+
leftMargin: 0,
|
|
567
|
+
rightMargin: 0,
|
|
568
|
+
indent: 0,
|
|
569
|
+
prevSpacing: 0,
|
|
570
|
+
nextSpacing: 0,
|
|
571
|
+
lineSpacing: 160,
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
function defaultFileHeader() {
|
|
575
|
+
return {
|
|
576
|
+
version: { major: 5, minor: 0, build: 6, revision: 0 },
|
|
577
|
+
flags: {
|
|
578
|
+
raw: 0,
|
|
579
|
+
compressed: false,
|
|
580
|
+
encrypted: false,
|
|
581
|
+
distribution: false,
|
|
582
|
+
script: false,
|
|
583
|
+
drm: false,
|
|
584
|
+
xmlTemplate: false,
|
|
585
|
+
documentHistory: false,
|
|
586
|
+
digitalSignature: false,
|
|
587
|
+
publicKeyEncrypted: false,
|
|
588
|
+
modifiedCertificate: false,
|
|
589
|
+
prepareDistribution: false,
|
|
590
|
+
},
|
|
591
|
+
};
|
|
592
|
+
}
|
|
593
|
+
function registerCharShape(shapes, ctx, cs) {
|
|
594
|
+
const key = JSON.stringify(cs);
|
|
595
|
+
const existing = ctx.charShapeIds.get(key);
|
|
596
|
+
if (existing !== undefined)
|
|
597
|
+
return existing;
|
|
598
|
+
const id = shapes.length;
|
|
599
|
+
shapes.push(cs);
|
|
600
|
+
ctx.charShapeIds.set(key, id);
|
|
601
|
+
return id;
|
|
602
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HwpDocument IR → HWPX(OWPML) 패키지 빌더 (스타일 보존 포함).
|
|
3
|
+
*
|
|
4
|
+
* - DocInfo 의 fontFaces / charShapes / paraShapes / styles 를 header.xml refList 로 매핑
|
|
5
|
+
* - paragraph: paraPrIDRef = paraShapeId, styleIDRef = styleId
|
|
6
|
+
* - run: charPrIDRef = charShapeId
|
|
7
|
+
* - 표/이미지 + BinData 패키징 + manifest 등록
|
|
8
|
+
*
|
|
9
|
+
* 1차 포팅 한계: BorderFill/Numbering/TabDef 는 paraShape 의 참조 ID 만 보존하고
|
|
10
|
+
* 실제 정의는 default(0) 로 둠. 추후 단계에서 정의 자체도 옮길 예정.
|
|
11
|
+
*/
|
|
12
|
+
import type { HwpDocument } from "./types.js";
|
|
13
|
+
export interface BuildOptions {
|
|
14
|
+
title?: string;
|
|
15
|
+
creator?: string;
|
|
16
|
+
}
|
|
17
|
+
export declare function buildHwpxFromDocument(doc: HwpDocument, options?: BuildOptions): Promise<Uint8Array>;
|
|
18
|
+
/** HWP ColorRef (u32 LE 의 0xAABBGGRR 형식) → "#RRGGBB" */
|
|
19
|
+
export declare function colorBgrToHex(color: number): string;
|