hwp-convert 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +185 -0
- package/LICENSE +25 -0
- package/NOTICE +23 -0
- package/README.md +338 -0
- package/dist/browser/hwp-convert.browser.mjs +20677 -0
- package/dist/browser/hwp-convert.browser.mjs.map +7 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +267 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +5 -0
- package/dist/lib/errors.d.ts +9 -0
- package/dist/lib/errors.js +18 -0
- package/dist/lib/hwp/binData.d.ts +15 -0
- package/dist/lib/hwp/binData.js +64 -0
- package/dist/lib/hwp/bodyText.d.ts +31 -0
- package/dist/lib/hwp/bodyText.js +208 -0
- package/dist/lib/hwp/byteReader.d.ts +40 -0
- package/dist/lib/hwp/byteReader.js +116 -0
- package/dist/lib/hwp/cfbReader.d.ts +44 -0
- package/dist/lib/hwp/cfbReader.js +134 -0
- package/dist/lib/hwp/control.d.ts +17 -0
- package/dist/lib/hwp/control.js +290 -0
- package/dist/lib/hwp/converter.d.ts +22 -0
- package/dist/lib/hwp/converter.js +41 -0
- package/dist/lib/hwp/docInfo.d.ts +26 -0
- package/dist/lib/hwp/docInfo.js +396 -0
- package/dist/lib/hwp/fileHeader.d.ts +42 -0
- package/dist/lib/hwp/fileHeader.js +66 -0
- package/dist/lib/hwp/htmlReader.d.ts +17 -0
- package/dist/lib/hwp/htmlReader.js +602 -0
- package/dist/lib/hwp/hwpxBuilder.d.ts +19 -0
- package/dist/lib/hwp/hwpxBuilder.js +633 -0
- package/dist/lib/hwp/index.d.ts +68 -0
- package/dist/lib/hwp/index.js +149 -0
- package/dist/lib/hwp/mdReader.d.ts +16 -0
- package/dist/lib/hwp/mdReader.js +485 -0
- package/dist/lib/hwp/mdWriter.d.ts +23 -0
- package/dist/lib/hwp/mdWriter.js +182 -0
- package/dist/lib/hwp/owpml.d.ts +33 -0
- package/dist/lib/hwp/owpml.js +86 -0
- package/dist/lib/hwp/record.d.ts +24 -0
- package/dist/lib/hwp/record.js +59 -0
- package/dist/lib/hwp/tags.d.ts +115 -0
- package/dist/lib/hwp/tags.js +217 -0
- package/dist/lib/hwp/types.d.ts +214 -0
- package/dist/lib/hwp/types.js +5 -0
- package/dist/lib/hwpxReader.d.ts +60 -0
- package/dist/lib/hwpxReader.js +1104 -0
- package/dist/lib/types.d.ts +47 -0
- package/dist/lib/types.js +1 -0
- package/dist/lib/writer.d.ts +19 -0
- package/dist/lib/writer.js +149 -0
- package/package.json +94 -0
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown → HwpDocument IR.
|
|
3
|
+
*
|
|
4
|
+
* `marked` 의 lexer 로 토큰 트리를 만들고 IR 로 변환.
|
|
5
|
+
* - heading → 굵은 paragraph (charShape 별도 정의: 큰 사이즈 + bold)
|
|
6
|
+
* - paragraph → text + bold/italic run 분할
|
|
7
|
+
* - list (ordered/unordered) → "1. " / "- " prefix 가 포함된 paragraph (간단 표현)
|
|
8
|
+
* - blockquote → 인용 paragraph (회색 배경)
|
|
9
|
+
* - code (block / inline) → 모노스페이스 charShape
|
|
10
|
+
* - table → HwpTableControl (셀 paragraph 재귀)
|
|
11
|
+
* - image → HwpPictureControl + binData 등록 (data: URI 만 지원)
|
|
12
|
+
* - link → 텍스트 그대로 (링크 자체는 보존하지 않음 — HWP 필드 컨트롤은 별도 작업)
|
|
13
|
+
*/
|
|
14
|
+
import { marked } from "marked";
|
|
15
|
+
/** Markdown 텍스트를 HwpDocument 로 변환. */
|
|
16
|
+
export function markdownToHwpDocument(md, options) {
|
|
17
|
+
const tokens = marked.lexer(md);
|
|
18
|
+
const ctx = {
|
|
19
|
+
charShapeIds: new Map(),
|
|
20
|
+
binData: new Map(),
|
|
21
|
+
nextBinDataId: 1,
|
|
22
|
+
imageResolver: options?.imageResolver,
|
|
23
|
+
};
|
|
24
|
+
// 기본 charShapes / paraShapes / styles / fontFaces 등록
|
|
25
|
+
// ID 0: 기본 (10pt, 함초롬바탕)
|
|
26
|
+
// 추가 ID 는 paragraph 처리 중에 동적으로 발급
|
|
27
|
+
const charShapes = [defaultCharShape()];
|
|
28
|
+
ctx.charShapeIds.set("default", 0);
|
|
29
|
+
const paraShapes = [defaultParaShape()];
|
|
30
|
+
const styles = [{ name: "바탕글", engName: "Normal", paraShapeId: 0, charShapeId: 0 }];
|
|
31
|
+
const fontFaces = [
|
|
32
|
+
[{ name: "함초롬바탕" }, { name: "맑은 고딕" }, { name: "Courier New" }],
|
|
33
|
+
[{ name: "Times New Roman" }],
|
|
34
|
+
[],
|
|
35
|
+
[],
|
|
36
|
+
[],
|
|
37
|
+
[],
|
|
38
|
+
[],
|
|
39
|
+
];
|
|
40
|
+
// bold/italic charShape 사전 등록
|
|
41
|
+
const idBold = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true });
|
|
42
|
+
const idItalic = registerCharShape(charShapes, ctx, { ...defaultCharShape(), italic: true });
|
|
43
|
+
const idBoldItalic = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, italic: true });
|
|
44
|
+
// 헤딩(굵게 + 큰 사이즈) — h1=1800, h2=1600, h3=1400, h4-6=1200
|
|
45
|
+
const idH1 = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1800 });
|
|
46
|
+
const idH2 = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1600 });
|
|
47
|
+
const idH3 = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1400 });
|
|
48
|
+
const idHmin = registerCharShape(charShapes, ctx, { ...defaultCharShape(), bold: true, baseSize: 1200 });
|
|
49
|
+
// 모노스페이스
|
|
50
|
+
const idMono = registerCharShape(charShapes, ctx, {
|
|
51
|
+
...defaultCharShape(),
|
|
52
|
+
faceNameIds: { hangul: 2, latin: 2, hanja: 2, japanese: 2, other: 2, symbol: 2, user: 2 },
|
|
53
|
+
});
|
|
54
|
+
const headingShapeId = (depth) => (depth === 1 ? idH1 : depth === 2 ? idH2 : depth === 3 ? idH3 : idHmin);
|
|
55
|
+
const paragraphs = [];
|
|
56
|
+
const visitTokens = (tks) => {
|
|
57
|
+
for (const tk of tks) {
|
|
58
|
+
paragraphs.push(...renderToken(tk, { idBold, idItalic, idBoldItalic, headingShapeId, idMono }, ctx, charShapes));
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
visitTokens(tokens);
|
|
62
|
+
return {
|
|
63
|
+
header: defaultFileHeader(),
|
|
64
|
+
docInfo: {
|
|
65
|
+
fontFaces,
|
|
66
|
+
charShapes,
|
|
67
|
+
paraShapes,
|
|
68
|
+
styles,
|
|
69
|
+
binData: [],
|
|
70
|
+
borderFills: [],
|
|
71
|
+
numberings: [],
|
|
72
|
+
bullets: [],
|
|
73
|
+
tabDefs: [],
|
|
74
|
+
},
|
|
75
|
+
sections: [{ paragraphs }],
|
|
76
|
+
binData: ctx.binData,
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
function renderToken(tk, ids, ctx, charShapes) {
|
|
80
|
+
switch (tk.type) {
|
|
81
|
+
case "heading": {
|
|
82
|
+
const t = tk;
|
|
83
|
+
const csId = ids.headingShapeId(t.depth);
|
|
84
|
+
const runs = inlineToRuns(t.tokens ?? [], ids, csId);
|
|
85
|
+
return [
|
|
86
|
+
{
|
|
87
|
+
paraShapeId: 0,
|
|
88
|
+
styleId: 0,
|
|
89
|
+
text: runsToText(runs),
|
|
90
|
+
runs,
|
|
91
|
+
controls: [],
|
|
92
|
+
},
|
|
93
|
+
];
|
|
94
|
+
}
|
|
95
|
+
case "paragraph": {
|
|
96
|
+
const t = tk;
|
|
97
|
+
const runs = inlineToRuns(t.tokens ?? [], ids, 0);
|
|
98
|
+
const controls = [];
|
|
99
|
+
// image 토큰은 별도 컨트롤로
|
|
100
|
+
const imageControls = collectImagesFromInline(t.tokens ?? [], ctx);
|
|
101
|
+
controls.push(...imageControls);
|
|
102
|
+
return [
|
|
103
|
+
{
|
|
104
|
+
paraShapeId: 0,
|
|
105
|
+
styleId: 0,
|
|
106
|
+
text: runsToText(runs),
|
|
107
|
+
runs,
|
|
108
|
+
controls,
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
}
|
|
112
|
+
case "blockquote": {
|
|
113
|
+
const t = tk;
|
|
114
|
+
const inner = (t.tokens ?? []).flatMap((sub) => renderToken(sub, ids, ctx, charShapes));
|
|
115
|
+
// "> " prefix 로 시각적 표시
|
|
116
|
+
return inner.map((p) => ({
|
|
117
|
+
...p,
|
|
118
|
+
text: `> ${p.text}`,
|
|
119
|
+
runs: p.runs.length > 0 ? [{ charShapeId: p.runs[0].charShapeId, text: `> ${runsToText(p.runs)}` }] : [],
|
|
120
|
+
}));
|
|
121
|
+
}
|
|
122
|
+
case "list": {
|
|
123
|
+
const t = tk;
|
|
124
|
+
const out = [];
|
|
125
|
+
let idx = t.start === "" ? 1 : Number(t.start) || 1;
|
|
126
|
+
for (const item of t.items) {
|
|
127
|
+
const prefix = t.ordered ? `${idx}. ` : "- ";
|
|
128
|
+
const inner = (item.tokens ?? []).flatMap((sub) => renderToken(sub, ids, ctx, charShapes));
|
|
129
|
+
if (inner.length === 0) {
|
|
130
|
+
out.push({
|
|
131
|
+
paraShapeId: 0,
|
|
132
|
+
styleId: 0,
|
|
133
|
+
text: prefix,
|
|
134
|
+
runs: [{ charShapeId: 0, text: prefix }],
|
|
135
|
+
controls: [],
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
// 첫 paragraph 에 prefix 추가
|
|
140
|
+
const first = inner[0];
|
|
141
|
+
const newText = prefix + first.text;
|
|
142
|
+
const newRuns = [
|
|
143
|
+
{ charShapeId: 0, text: prefix },
|
|
144
|
+
...first.runs,
|
|
145
|
+
];
|
|
146
|
+
out.push({ ...first, text: newText, runs: newRuns });
|
|
147
|
+
for (let i = 1; i < inner.length; i++)
|
|
148
|
+
out.push(inner[i]);
|
|
149
|
+
}
|
|
150
|
+
idx++;
|
|
151
|
+
}
|
|
152
|
+
return out;
|
|
153
|
+
}
|
|
154
|
+
case "code": {
|
|
155
|
+
const t = tk;
|
|
156
|
+
// 코드 블록 — 모노스페이스 paragraph 들로 분할
|
|
157
|
+
const lines = t.text.split("\n");
|
|
158
|
+
return lines.map((line) => ({
|
|
159
|
+
paraShapeId: 0,
|
|
160
|
+
styleId: 0,
|
|
161
|
+
text: line,
|
|
162
|
+
runs: line.length > 0 ? [{ charShapeId: ids.idMono, text: line }] : [],
|
|
163
|
+
controls: [],
|
|
164
|
+
}));
|
|
165
|
+
}
|
|
166
|
+
case "table": {
|
|
167
|
+
const t = tk;
|
|
168
|
+
const rowCount = 1 + t.rows.length;
|
|
169
|
+
const colCount = t.header.length;
|
|
170
|
+
const cells = [];
|
|
171
|
+
// 헤더 행
|
|
172
|
+
for (let c = 0; c < t.header.length; c++) {
|
|
173
|
+
const cellTokens = t.header[c].tokens ?? [];
|
|
174
|
+
const runs = inlineToRuns(cellTokens, ids, ids.idBold);
|
|
175
|
+
cells.push({
|
|
176
|
+
col: c,
|
|
177
|
+
row: 0,
|
|
178
|
+
colSpan: 1,
|
|
179
|
+
rowSpan: 1,
|
|
180
|
+
paragraphs: [
|
|
181
|
+
{
|
|
182
|
+
paraShapeId: 0,
|
|
183
|
+
styleId: 0,
|
|
184
|
+
text: runsToText(runs),
|
|
185
|
+
runs,
|
|
186
|
+
controls: [],
|
|
187
|
+
},
|
|
188
|
+
],
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
// 본 행
|
|
192
|
+
for (let r = 0; r < t.rows.length; r++) {
|
|
193
|
+
for (let c = 0; c < t.rows[r].length; c++) {
|
|
194
|
+
const cellTokens = t.rows[r][c].tokens ?? [];
|
|
195
|
+
const runs = inlineToRuns(cellTokens, ids, 0);
|
|
196
|
+
cells.push({
|
|
197
|
+
col: c,
|
|
198
|
+
row: r + 1,
|
|
199
|
+
colSpan: 1,
|
|
200
|
+
rowSpan: 1,
|
|
201
|
+
paragraphs: [
|
|
202
|
+
{
|
|
203
|
+
paraShapeId: 0,
|
|
204
|
+
styleId: 0,
|
|
205
|
+
text: runsToText(runs),
|
|
206
|
+
runs,
|
|
207
|
+
controls: [],
|
|
208
|
+
},
|
|
209
|
+
],
|
|
210
|
+
});
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return [
|
|
214
|
+
{
|
|
215
|
+
paraShapeId: 0,
|
|
216
|
+
styleId: 0,
|
|
217
|
+
text: "",
|
|
218
|
+
runs: [],
|
|
219
|
+
controls: [{ kind: "table", rowCount, colCount, cells }],
|
|
220
|
+
},
|
|
221
|
+
];
|
|
222
|
+
}
|
|
223
|
+
case "hr":
|
|
224
|
+
return [
|
|
225
|
+
{
|
|
226
|
+
paraShapeId: 0,
|
|
227
|
+
styleId: 0,
|
|
228
|
+
text: "─────",
|
|
229
|
+
runs: [{ charShapeId: 0, text: "─────" }],
|
|
230
|
+
controls: [],
|
|
231
|
+
},
|
|
232
|
+
];
|
|
233
|
+
case "space":
|
|
234
|
+
return [];
|
|
235
|
+
case "text": {
|
|
236
|
+
// 블록 레벨 text 토큰 — list item 안 등에서 등장 (tight list).
|
|
237
|
+
const t = tk;
|
|
238
|
+
const inlineTokens = t.tokens ??
|
|
239
|
+
[{ type: "text", text: t.text, raw: t.text }];
|
|
240
|
+
const runs = inlineToRuns(inlineTokens, ids, 0);
|
|
241
|
+
return [
|
|
242
|
+
{
|
|
243
|
+
paraShapeId: 0,
|
|
244
|
+
styleId: 0,
|
|
245
|
+
text: runsToText(runs),
|
|
246
|
+
runs,
|
|
247
|
+
controls: [],
|
|
248
|
+
},
|
|
249
|
+
];
|
|
250
|
+
}
|
|
251
|
+
default:
|
|
252
|
+
return [];
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
/** 인라인 토큰 (text / strong / em / codespan / link / br / image) → HwpRun[] */
|
|
256
|
+
function inlineToRuns(tokens, ids, baseCharShapeId) {
|
|
257
|
+
const runs = [];
|
|
258
|
+
for (const t of tokens) {
|
|
259
|
+
walkInline(t, baseCharShapeId, ids, runs, false, false);
|
|
260
|
+
}
|
|
261
|
+
// 인접 동일 charShape 합치기
|
|
262
|
+
return mergeRuns(runs);
|
|
263
|
+
}
|
|
264
|
+
function walkInline(tk, baseId, ids, runs, bold, italic) {
|
|
265
|
+
switch (tk.type) {
|
|
266
|
+
case "text": {
|
|
267
|
+
const t = tk;
|
|
268
|
+
const text = t.text;
|
|
269
|
+
// 자식 토큰이 있으면 재귀, 없으면 그대로
|
|
270
|
+
if (t.tokens) {
|
|
271
|
+
for (const sub of t.tokens) {
|
|
272
|
+
walkInline(sub, baseId, ids, runs, bold, italic);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
else if (text.length > 0) {
|
|
276
|
+
runs.push({ charShapeId: pickShapeId(baseId, ids, bold, italic), text: decodeEntities(text) });
|
|
277
|
+
}
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
case "strong": {
|
|
281
|
+
const t = tk;
|
|
282
|
+
for (const sub of t.tokens ?? [])
|
|
283
|
+
walkInline(sub, baseId, ids, runs, true, italic);
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
case "em": {
|
|
287
|
+
const t = tk;
|
|
288
|
+
for (const sub of t.tokens ?? [])
|
|
289
|
+
walkInline(sub, baseId, ids, runs, bold, true);
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
case "codespan": {
|
|
293
|
+
const t = tk;
|
|
294
|
+
runs.push({ charShapeId: ids.idMono, text: decodeEntities(t.text) });
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
case "link": {
|
|
298
|
+
const t = tk;
|
|
299
|
+
// 링크 텍스트만 보존 (URL 은 미보존 — 필드 컨트롤은 별도 작업)
|
|
300
|
+
for (const sub of t.tokens ?? [])
|
|
301
|
+
walkInline(sub, baseId, ids, runs, bold, italic);
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
case "br":
|
|
305
|
+
runs.push({ charShapeId: pickShapeId(baseId, ids, bold, italic), text: "\n" });
|
|
306
|
+
break;
|
|
307
|
+
case "del": {
|
|
308
|
+
const t = tk;
|
|
309
|
+
for (const sub of t.tokens ?? [])
|
|
310
|
+
walkInline(sub, baseId, ids, runs, bold, italic);
|
|
311
|
+
break;
|
|
312
|
+
}
|
|
313
|
+
case "image":
|
|
314
|
+
// 이미지는 paragraph 레벨에서 별도 처리 — runs 에는 alt 만
|
|
315
|
+
runs.push({ charShapeId: baseId, text: tk.text });
|
|
316
|
+
break;
|
|
317
|
+
case "escape":
|
|
318
|
+
runs.push({ charShapeId: pickShapeId(baseId, ids, bold, italic), text: tk.text });
|
|
319
|
+
break;
|
|
320
|
+
case "html":
|
|
321
|
+
// raw HTML 은 텍스트로
|
|
322
|
+
runs.push({ charShapeId: baseId, text: tk.text });
|
|
323
|
+
break;
|
|
324
|
+
default:
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
function pickShapeId(baseId, ids, bold, italic) {
|
|
329
|
+
if (bold && italic)
|
|
330
|
+
return ids.idBoldItalic;
|
|
331
|
+
if (bold)
|
|
332
|
+
return ids.idBold;
|
|
333
|
+
if (italic)
|
|
334
|
+
return ids.idItalic;
|
|
335
|
+
return baseId;
|
|
336
|
+
}
|
|
337
|
+
function mergeRuns(runs) {
|
|
338
|
+
const out = [];
|
|
339
|
+
for (const r of runs) {
|
|
340
|
+
const last = out[out.length - 1];
|
|
341
|
+
if (last && last.charShapeId === r.charShapeId) {
|
|
342
|
+
last.text += r.text;
|
|
343
|
+
}
|
|
344
|
+
else {
|
|
345
|
+
out.push({ ...r });
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
return out;
|
|
349
|
+
}
|
|
350
|
+
function runsToText(runs) {
|
|
351
|
+
return runs.map((r) => r.text).join("");
|
|
352
|
+
}
|
|
353
|
+
function decodeEntities(s) {
|
|
354
|
+
return s
|
|
355
|
+
.replace(/&/g, "&")
|
|
356
|
+
.replace(/</g, "<")
|
|
357
|
+
.replace(/>/g, ">")
|
|
358
|
+
.replace(/"/g, '"')
|
|
359
|
+
.replace(/'/g, "'");
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* paragraph 토큰 안의 image 토큰을 추출 → HwpPictureControl 로.
|
|
363
|
+
* src 가 `data:` URI 인 경우만 binData 로 등록 (외부 URL/상대 경로는 보존 불가).
|
|
364
|
+
*/
|
|
365
|
+
function collectImagesFromInline(tokens, ctx) {
|
|
366
|
+
const out = [];
|
|
367
|
+
const visit = (tks) => {
|
|
368
|
+
for (const tk of tks) {
|
|
369
|
+
if (tk.type === "image") {
|
|
370
|
+
const img = tk;
|
|
371
|
+
const ctrl = imageTokenToControl(img.href, ctx);
|
|
372
|
+
if (ctrl)
|
|
373
|
+
out.push(ctrl);
|
|
374
|
+
}
|
|
375
|
+
const subTokens = tk.tokens;
|
|
376
|
+
if (Array.isArray(subTokens))
|
|
377
|
+
visit(subTokens);
|
|
378
|
+
}
|
|
379
|
+
};
|
|
380
|
+
visit(tokens);
|
|
381
|
+
return out;
|
|
382
|
+
}
|
|
383
|
+
function imageTokenToControl(href, ctx) {
|
|
384
|
+
// data URI 처리
|
|
385
|
+
const match = /^data:([^;]+);base64,(.*)$/i.exec(href);
|
|
386
|
+
if (match) {
|
|
387
|
+
const mime = match[1].toLowerCase();
|
|
388
|
+
const ext = mime === "image/png"
|
|
389
|
+
? "png"
|
|
390
|
+
: mime === "image/jpeg"
|
|
391
|
+
? "jpg"
|
|
392
|
+
: mime === "image/gif"
|
|
393
|
+
? "gif"
|
|
394
|
+
: mime === "image/bmp"
|
|
395
|
+
? "bmp"
|
|
396
|
+
: mime === "image/webp"
|
|
397
|
+
? "webp"
|
|
398
|
+
: "bin";
|
|
399
|
+
let bytes;
|
|
400
|
+
try {
|
|
401
|
+
if (typeof Buffer !== "undefined") {
|
|
402
|
+
bytes = new Uint8Array(Buffer.from(match[2], "base64"));
|
|
403
|
+
}
|
|
404
|
+
else {
|
|
405
|
+
const bin = globalThis.atob?.(match[2]) ?? "";
|
|
406
|
+
bytes = new Uint8Array(bin.length);
|
|
407
|
+
for (let i = 0; i < bin.length; i++)
|
|
408
|
+
bytes[i] = bin.charCodeAt(i);
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
catch {
|
|
412
|
+
return null;
|
|
413
|
+
}
|
|
414
|
+
const id = ctx.nextBinDataId++;
|
|
415
|
+
ctx.binData.set(id, { data: bytes, extension: ext });
|
|
416
|
+
return { kind: "picture", binDataId: id };
|
|
417
|
+
}
|
|
418
|
+
// data URI 가 아니면 resolver(주입 시)로 file://·로컬 경로를 해석. 없으면 skip.
|
|
419
|
+
const resolved = ctx.imageResolver?.(href);
|
|
420
|
+
if (resolved && resolved.data.length > 0) {
|
|
421
|
+
const id = ctx.nextBinDataId++;
|
|
422
|
+
ctx.binData.set(id, { data: resolved.data, extension: resolved.extension.toLowerCase() });
|
|
423
|
+
return { kind: "picture", binDataId: id };
|
|
424
|
+
}
|
|
425
|
+
return null;
|
|
426
|
+
}
|
|
427
|
+
// ============================================================
|
|
428
|
+
// 기본 IR 빌더
|
|
429
|
+
// ============================================================
|
|
430
|
+
function defaultCharShape() {
|
|
431
|
+
return {
|
|
432
|
+
faceNameIds: { hangul: 0, latin: 1, hanja: 0, japanese: 0, other: 0, symbol: 0, user: 0 },
|
|
433
|
+
baseSize: 1000, // 10pt
|
|
434
|
+
property: 0,
|
|
435
|
+
textColor: 0,
|
|
436
|
+
shadeColor: 0xffffff,
|
|
437
|
+
underlineColor: 0,
|
|
438
|
+
shadowColor: 0,
|
|
439
|
+
bold: false,
|
|
440
|
+
italic: false,
|
|
441
|
+
underline: false,
|
|
442
|
+
strikeout: false,
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
function defaultParaShape() {
|
|
446
|
+
return {
|
|
447
|
+
alignment: "justify",
|
|
448
|
+
property: 0,
|
|
449
|
+
leftMargin: 0,
|
|
450
|
+
rightMargin: 0,
|
|
451
|
+
indent: 0,
|
|
452
|
+
prevSpacing: 0,
|
|
453
|
+
nextSpacing: 0,
|
|
454
|
+
lineSpacing: 160,
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
function defaultFileHeader() {
|
|
458
|
+
return {
|
|
459
|
+
version: { major: 5, minor: 0, build: 6, revision: 0 },
|
|
460
|
+
flags: {
|
|
461
|
+
raw: 0,
|
|
462
|
+
compressed: false,
|
|
463
|
+
encrypted: false,
|
|
464
|
+
distribution: false,
|
|
465
|
+
script: false,
|
|
466
|
+
drm: false,
|
|
467
|
+
xmlTemplate: false,
|
|
468
|
+
documentHistory: false,
|
|
469
|
+
digitalSignature: false,
|
|
470
|
+
publicKeyEncrypted: false,
|
|
471
|
+
modifiedCertificate: false,
|
|
472
|
+
prepareDistribution: false,
|
|
473
|
+
},
|
|
474
|
+
};
|
|
475
|
+
}
|
|
476
|
+
function registerCharShape(shapes, ctx, cs) {
|
|
477
|
+
const key = JSON.stringify(cs);
|
|
478
|
+
const existing = ctx.charShapeIds.get(key);
|
|
479
|
+
if (existing !== undefined)
|
|
480
|
+
return existing;
|
|
481
|
+
const id = shapes.length;
|
|
482
|
+
shapes.push(cs);
|
|
483
|
+
ctx.charShapeIds.set(key, id);
|
|
484
|
+
return id;
|
|
485
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HwpDocument IR → Markdown.
|
|
3
|
+
*
|
|
4
|
+
* 보존:
|
|
5
|
+
* - 문단 텍스트
|
|
6
|
+
* - 글자 모양 굵게/기울임 (charShape lookup)
|
|
7
|
+
* - 표 (markdown table; 셀 병합은 평탄화)
|
|
8
|
+
* - 이미지 (`` 또는 `data:` URI 인라인)
|
|
9
|
+
* - 머리말/꼬리말/각주 (인용 블록)
|
|
10
|
+
*
|
|
11
|
+
* 의도적으로 단순화한 부분:
|
|
12
|
+
* - 헤딩 레벨 자동 판별 안 함 (paraShape.heading 정보 부족)
|
|
13
|
+
* - 색상/사이즈는 마크다운 표준에서 표현 못 함 → 무시
|
|
14
|
+
* - 줄바꿈은 \n\n 으로 문단 구분
|
|
15
|
+
*/
|
|
16
|
+
import type { HwpDocument } from "./types.js";
|
|
17
|
+
export interface MarkdownWriteOptions {
|
|
18
|
+
/** 이미지를 base64 data URI 로 인라인 (브라우저에서 즉시 렌더링) */
|
|
19
|
+
embedImages?: boolean;
|
|
20
|
+
/** 이미지 src 경로 변환 (embedImages=false 일 때) */
|
|
21
|
+
imageSrcResolver?: (binPath: string, storageId: number) => string;
|
|
22
|
+
}
|
|
23
|
+
export declare function hwpDocumentToMarkdown(doc: HwpDocument, options?: MarkdownWriteOptions): string;
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HwpDocument IR → Markdown.
|
|
3
|
+
*
|
|
4
|
+
* 보존:
|
|
5
|
+
* - 문단 텍스트
|
|
6
|
+
* - 글자 모양 굵게/기울임 (charShape lookup)
|
|
7
|
+
* - 표 (markdown table; 셀 병합은 평탄화)
|
|
8
|
+
* - 이미지 (`` 또는 `data:` URI 인라인)
|
|
9
|
+
* - 머리말/꼬리말/각주 (인용 블록)
|
|
10
|
+
*
|
|
11
|
+
* 의도적으로 단순화한 부분:
|
|
12
|
+
* - 헤딩 레벨 자동 판별 안 함 (paraShape.heading 정보 부족)
|
|
13
|
+
* - 색상/사이즈는 마크다운 표준에서 표현 못 함 → 무시
|
|
14
|
+
* - 줄바꿈은 \n\n 으로 문단 구분
|
|
15
|
+
*/
|
|
16
|
+
import { detectImageMime } from "./binData.js";
|
|
17
|
+
export function hwpDocumentToMarkdown(doc, options) {
|
|
18
|
+
const blocks = [];
|
|
19
|
+
for (const section of doc.sections) {
|
|
20
|
+
for (const para of section.paragraphs) {
|
|
21
|
+
const md = renderParagraph(para, doc, options);
|
|
22
|
+
if (md.length > 0)
|
|
23
|
+
blocks.push(md);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
return blocks.join("\n\n").replace(/\n{3,}/g, "\n\n").trim() + "\n";
|
|
27
|
+
}
|
|
28
|
+
function renderParagraph(p, doc, options) {
|
|
29
|
+
const parts = [];
|
|
30
|
+
// 본문 runs (charShape 기반 굵게/기울임 보존)
|
|
31
|
+
if (p.runs.length > 0) {
|
|
32
|
+
parts.push(renderRuns(p.runs, doc));
|
|
33
|
+
}
|
|
34
|
+
else if (p.text.length > 0) {
|
|
35
|
+
parts.push(escapeMd(p.text));
|
|
36
|
+
}
|
|
37
|
+
// 컨트롤
|
|
38
|
+
for (const ctrl of p.controls) {
|
|
39
|
+
switch (ctrl.kind) {
|
|
40
|
+
case "table":
|
|
41
|
+
parts.push(renderTable(ctrl, doc, options));
|
|
42
|
+
break;
|
|
43
|
+
case "picture": {
|
|
44
|
+
const md = renderPicture(ctrl.binDataId, doc, options);
|
|
45
|
+
if (md)
|
|
46
|
+
parts.push(md);
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
49
|
+
case "header":
|
|
50
|
+
case "footer":
|
|
51
|
+
case "footnote": {
|
|
52
|
+
const inner = ctrl.paragraphs
|
|
53
|
+
.map((q) => renderParagraph(q, doc, options))
|
|
54
|
+
.filter((s) => s.length > 0)
|
|
55
|
+
.join("\n");
|
|
56
|
+
if (inner) {
|
|
57
|
+
// 인용 블록으로 표시
|
|
58
|
+
parts.push(inner.split("\n").map((line) => `> ${line}`).join("\n"));
|
|
59
|
+
}
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
case "equation":
|
|
63
|
+
if (ctrl.script.length > 0) {
|
|
64
|
+
parts.push("```\n" + ctrl.script + "\n```");
|
|
65
|
+
}
|
|
66
|
+
break;
|
|
67
|
+
default:
|
|
68
|
+
break;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return parts.filter((s) => s.length > 0).join("\n\n");
|
|
72
|
+
}
|
|
73
|
+
function renderRuns(runs, doc) {
|
|
74
|
+
const out = [];
|
|
75
|
+
for (const run of runs) {
|
|
76
|
+
if (run.text.length === 0)
|
|
77
|
+
continue;
|
|
78
|
+
const cs = doc.docInfo.charShapes[run.charShapeId];
|
|
79
|
+
out.push(applyInlineStyle(escapeMd(run.text), cs));
|
|
80
|
+
}
|
|
81
|
+
return out.join("");
|
|
82
|
+
}
|
|
83
|
+
function applyInlineStyle(text, cs) {
|
|
84
|
+
if (!cs)
|
|
85
|
+
return text;
|
|
86
|
+
let s = text;
|
|
87
|
+
if (cs.bold)
|
|
88
|
+
s = `**${s}**`;
|
|
89
|
+
if (cs.italic)
|
|
90
|
+
s = `*${s}*`;
|
|
91
|
+
// 밑줄/취소선: 표준 MD 부재 — 생략
|
|
92
|
+
return s;
|
|
93
|
+
}
|
|
94
|
+
function renderTable(t, doc, options) {
|
|
95
|
+
if (t.rowCount === 0 || t.colCount === 0)
|
|
96
|
+
return "";
|
|
97
|
+
// 셀을 행별로 그룹핑하고 col 순으로 정렬
|
|
98
|
+
const grid = Array.from({ length: t.rowCount }, () => new Array(t.colCount).fill(undefined));
|
|
99
|
+
for (const cell of t.cells) {
|
|
100
|
+
if (cell.row >= 0 && cell.row < t.rowCount && cell.col >= 0 && cell.col < t.colCount) {
|
|
101
|
+
// 병합된 셀 영역도 마크다운에서는 동일 컨텐츠로 채움 (표준 MD 표는 병합 미지원)
|
|
102
|
+
for (let r = 0; r < cell.rowSpan; r++) {
|
|
103
|
+
for (let c = 0; c < cell.colSpan; c++) {
|
|
104
|
+
const ri = cell.row + r;
|
|
105
|
+
const ci = cell.col + c;
|
|
106
|
+
if (ri < t.rowCount && ci < t.colCount && !grid[ri][ci]) {
|
|
107
|
+
grid[ri][ci] = cell;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
const rows = [];
|
|
114
|
+
for (const row of grid) {
|
|
115
|
+
const cellTexts = row.map((cell) => {
|
|
116
|
+
if (!cell)
|
|
117
|
+
return "";
|
|
118
|
+
const inner = cell.paragraphs
|
|
119
|
+
.map((q) => renderParagraph(q, doc, options))
|
|
120
|
+
.filter((s) => s.length > 0)
|
|
121
|
+
.join(" ")
|
|
122
|
+
.replace(/\n+/g, " ")
|
|
123
|
+
.replace(/\|/g, "\\|");
|
|
124
|
+
return inner;
|
|
125
|
+
});
|
|
126
|
+
rows.push(cellTexts);
|
|
127
|
+
}
|
|
128
|
+
if (rows.length === 0)
|
|
129
|
+
return "";
|
|
130
|
+
// 첫 행을 헤더로 (마크다운 표 규칙 — 항상 헤더 1행 + 구분선)
|
|
131
|
+
const header = rows[0];
|
|
132
|
+
const sep = header.map(() => "---");
|
|
133
|
+
const body = rows.slice(1);
|
|
134
|
+
const fmt = (cells) => `| ${cells.map((c) => c || " ").join(" | ")} |`;
|
|
135
|
+
const lines = [];
|
|
136
|
+
lines.push(fmt(header));
|
|
137
|
+
lines.push(fmt(sep));
|
|
138
|
+
for (const row of body)
|
|
139
|
+
lines.push(fmt(row));
|
|
140
|
+
return lines.join("\n");
|
|
141
|
+
}
|
|
142
|
+
function renderPicture(binDataId, doc, options) {
|
|
143
|
+
const entry = doc.binData.get(binDataId);
|
|
144
|
+
if (!entry)
|
|
145
|
+
return "";
|
|
146
|
+
const ext = entry.extension.toLowerCase();
|
|
147
|
+
const binPath = `BinData/image${binDataId}.${ext}`;
|
|
148
|
+
if (options?.embedImages) {
|
|
149
|
+
const mime = detectImageMime(ext);
|
|
150
|
+
const b64 = bytesToBase64(entry.data);
|
|
151
|
+
return ``;
|
|
152
|
+
}
|
|
153
|
+
if (options?.imageSrcResolver) {
|
|
154
|
+
return `})`;
|
|
155
|
+
}
|
|
156
|
+
return ``;
|
|
157
|
+
}
|
|
158
|
+
function bytesToBase64(bytes) {
|
|
159
|
+
if (typeof Buffer !== "undefined") {
|
|
160
|
+
return Buffer.from(bytes).toString("base64");
|
|
161
|
+
}
|
|
162
|
+
// 브라우저 fallback
|
|
163
|
+
let bin = "";
|
|
164
|
+
for (let i = 0; i < bytes.length; i++)
|
|
165
|
+
bin += String.fromCharCode(bytes[i]);
|
|
166
|
+
return globalThis.btoa?.(bin) ?? "";
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* 마크다운 메타문자 이스케이프.
|
|
170
|
+
* 너무 공격적으로 하면 문서가 읽기 어려워지므로 핵심만 처리.
|
|
171
|
+
* - 백슬래시
|
|
172
|
+
* - 인라인 강조: `*`, `_`, `` ` ``, `~`
|
|
173
|
+
* - 줄 시작의 헤딩(#)/인용(>) 마커
|
|
174
|
+
*
|
|
175
|
+
* 줄 시작의 `1.`, `-`, `+` 는 일부러 이스케이프하지 않는다 — 일반 텍스트(특히 날짜)에 너무 자주 등장.
|
|
176
|
+
*/
|
|
177
|
+
function escapeMd(text) {
|
|
178
|
+
return text
|
|
179
|
+
.replace(/\\/g, "\\\\")
|
|
180
|
+
.replace(/([*_`~])/g, "\\$1")
|
|
181
|
+
.replace(/^([#>])/gm, "\\$1");
|
|
182
|
+
}
|