hwpkit-dev 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +11 -0
- package/README.md +223 -0
- package/dist/index.d.mts +313 -0
- package/dist/index.d.ts +317 -0
- package/dist/index.js +3546 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3479 -0
- package/dist/index.mjs.map +1 -0
- package/license.md +136 -0
- package/package.json +45 -0
- package/src/contract/decoder.ts +7 -0
- package/src/contract/encoder.ts +7 -0
- package/src/contract/result.ts +21 -0
- package/src/decoders/docx/DocxDecoder.ts +986 -0
- package/src/decoders/hwp/HwpScanner.ts +809 -0
- package/src/decoders/hwpx/HwpxDecoder.ts +759 -0
- package/src/decoders/md/MdDecoder.ts +180 -0
- package/src/encoders/docx/DocxEncoder.ts +710 -0
- package/src/encoders/hwp/HwpEncoder.ts +711 -0
- package/src/encoders/hwpx/HwpxEncoder.ts +770 -0
- package/src/encoders/md/MdEncoder.ts +108 -0
- package/src/index.ts +47 -0
- package/src/model/builders.ts +66 -0
- package/src/model/doc-props.ts +138 -0
- package/src/model/doc-tree.ts +90 -0
- package/src/pipeline/Pipeline.ts +71 -0
- package/src/pipeline/registry.ts +18 -0
- package/src/safety/ShieldedParser.ts +91 -0
- package/src/safety/StyleBridge.ts +106 -0
- package/src/toolkit/ArchiveKit.ts +150 -0
- package/src/toolkit/BinaryKit.ts +187 -0
- package/src/toolkit/TextKit.ts +57 -0
- package/src/toolkit/XmlKit.ts +91 -0
- package/src/walk/TreeWalker.ts +42 -0
- package/src/walk/tree-ops.ts +26 -0
- package/tsconfig.json +23 -0
- package/tsup.config.ts +12 -0
|
@@ -0,0 +1,809 @@
|
|
|
1
|
+
import type { Decoder } from '../../contract/decoder';
|
|
2
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode } from '../../model/doc-tree';
|
|
3
|
+
import type { Outcome } from '../../contract/result';
|
|
4
|
+
import type { Align, Stroke, StrokeKind, PageDims, TextProps, ParaProps, CellProps, GridProps } from '../../model/doc-props';
|
|
5
|
+
import { succeed, fail } from '../../contract/result';
|
|
6
|
+
import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell } from '../../model/builders';
|
|
7
|
+
import { ShieldedParser } from '../../safety/ShieldedParser';
|
|
8
|
+
import { BinaryKit } from '../../toolkit/BinaryKit';
|
|
9
|
+
import { Metric, safeHex, safeFont } from '../../safety/StyleBridge';
|
|
10
|
+
import { registry } from '../../pipeline/registry';
|
|
11
|
+
import { A4 } from '../../model/doc-props';
|
|
12
|
+
import pako from 'pako';
|
|
13
|
+
|
|
14
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
15
|
+
HWP 5.0 Tag Constants
|
|
16
|
+
═══════════════════════════════════════════════════════════════ */
|
|
17
|
+
|
|
18
|
+
const HWPTAG_BEGIN = 16;
|
|
19
|
+
|
|
20
|
+
const TAG_FACE_NAME = HWPTAG_BEGIN + 3; // 19
|
|
21
|
+
const TAG_BORDER_FILL = HWPTAG_BEGIN + 4; // 20
|
|
22
|
+
const TAG_CHAR_SHAPE = HWPTAG_BEGIN + 5; // 21
|
|
23
|
+
const TAG_PARA_SHAPE = HWPTAG_BEGIN + 9; // 25
|
|
24
|
+
const TAG_PARA_HEADER = HWPTAG_BEGIN + 50; // 66
|
|
25
|
+
const TAG_PARA_TEXT = HWPTAG_BEGIN + 51; // 67
|
|
26
|
+
const TAG_PARA_CHAR_SHAPE = HWPTAG_BEGIN + 52; // 68
|
|
27
|
+
const TAG_CTRL_HEADER = HWPTAG_BEGIN + 55; // 71
|
|
28
|
+
const TAG_PAGE_DEF = HWPTAG_BEGIN + 57; // 73
|
|
29
|
+
|
|
30
|
+
// TABLE / CELL tags vary by HWP version
|
|
31
|
+
const TAG_LIST_HEADER = HWPTAG_BEGIN + 56; // 72
|
|
32
|
+
const TAG_TABLE_A = HWPTAG_BEGIN + 61; // 77
|
|
33
|
+
const TAG_CELL_A = HWPTAG_BEGIN + 62; // 78
|
|
34
|
+
const TAG_TABLE_B = HWPTAG_BEGIN + 64; // 80
|
|
35
|
+
const TAG_CELL_B = HWPTAG_BEGIN + 65; // 81
|
|
36
|
+
|
|
37
|
+
function isTableTag(t: number) { return t === TAG_TABLE_A || t === TAG_TABLE_B; }
|
|
38
|
+
function isCellTag(t: number) { return t === TAG_CELL_A || t === TAG_CELL_B || t === TAG_LIST_HEADER; }
|
|
39
|
+
|
|
40
|
+
// CTRL_HEADER ctrlId for table: ' lbt' as UINT32-LE = 0x74626C20
|
|
41
|
+
const CTRL_TABLE = 0x74626C20;
|
|
42
|
+
|
|
43
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
44
|
+
Types
|
|
45
|
+
═══════════════════════════════════════════════════════════════ */
|
|
46
|
+
|
|
47
|
+
interface HwpRecord {
|
|
48
|
+
tag: number;
|
|
49
|
+
level: number;
|
|
50
|
+
data: Uint8Array;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
interface HwpCharShape {
|
|
54
|
+
faceIds: number[];
|
|
55
|
+
height: number;
|
|
56
|
+
bold: boolean;
|
|
57
|
+
italic: boolean;
|
|
58
|
+
underline: boolean;
|
|
59
|
+
strikeout: boolean;
|
|
60
|
+
superscript: boolean;
|
|
61
|
+
subscript: boolean;
|
|
62
|
+
textColor: string;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
interface HwpParaShape {
|
|
66
|
+
align: Align;
|
|
67
|
+
spaceBefore: number;
|
|
68
|
+
spaceAfter: number;
|
|
69
|
+
lineSpacing: number;
|
|
70
|
+
indent: number;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
interface HwpBorderFill {
|
|
74
|
+
borders: { type: number; widthPt: number; color: string }[];
|
|
75
|
+
bgColor?: string;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
interface DocInfo {
|
|
79
|
+
faceNames: string[];
|
|
80
|
+
charShapes: HwpCharShape[];
|
|
81
|
+
paraShapes: HwpParaShape[];
|
|
82
|
+
borderFills: HwpBorderFill[];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
interface ParsedChar { pos: number; ch: string }
|
|
86
|
+
interface ParaTextResult { chars: ParsedChar[]; controlPositions: number[] }
|
|
87
|
+
|
|
88
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
89
|
+
Low-level record parsing
|
|
90
|
+
═══════════════════════════════════════════════════════════════ */
|
|
91
|
+
|
|
92
|
+
function parseRecords(data: Uint8Array): HwpRecord[] {
|
|
93
|
+
const out: HwpRecord[] = [];
|
|
94
|
+
let off = 0;
|
|
95
|
+
while (off + 4 <= data.length) {
|
|
96
|
+
const hdr = BinaryKit.readU32LE(data, off);
|
|
97
|
+
const tag = hdr & 0x3FF;
|
|
98
|
+
const level = (hdr >> 10) & 0x3FF;
|
|
99
|
+
let size = (hdr >> 20) & 0xFFF;
|
|
100
|
+
off += 4;
|
|
101
|
+
if (size === 0xFFF) {
|
|
102
|
+
if (off + 4 > data.length) break;
|
|
103
|
+
size = BinaryKit.readU32LE(data, off);
|
|
104
|
+
off += 4;
|
|
105
|
+
}
|
|
106
|
+
if (off + size > data.length) break;
|
|
107
|
+
out.push({ tag, level, data: data.subarray(off, off + size) });
|
|
108
|
+
off += size;
|
|
109
|
+
}
|
|
110
|
+
return out;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function tryInflate(data: Uint8Array): Uint8Array {
|
|
114
|
+
try { return pako.inflateRaw(data); } catch { return data; }
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
118
|
+
FileHeader
|
|
119
|
+
═══════════════════════════════════════════════════════════════ */
|
|
120
|
+
|
|
121
|
+
function parseFileHeader(buf: Uint8Array) {
|
|
122
|
+
if (buf.length < 40) return { compressed: true, encrypted: false };
|
|
123
|
+
const props = BinaryKit.readU32LE(buf, 36);
|
|
124
|
+
return { compressed: (props & 1) !== 0, encrypted: (props & 2) !== 0 };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
128
|
+
DocInfo parsing
|
|
129
|
+
═══════════════════════════════════════════════════════════════ */
|
|
130
|
+
|
|
131
|
+
function parseDocInfo(data: Uint8Array, compressed: boolean): DocInfo {
|
|
132
|
+
const raw = compressed ? tryInflate(data) : data;
|
|
133
|
+
const recs = parseRecords(raw);
|
|
134
|
+
const info: DocInfo = { faceNames: [], charShapes: [], paraShapes: [], borderFills: [] };
|
|
135
|
+
|
|
136
|
+
for (const r of recs) {
|
|
137
|
+
try {
|
|
138
|
+
if (r.tag === TAG_FACE_NAME) info.faceNames.push(parseFaceName(r.data));
|
|
139
|
+
if (r.tag === TAG_CHAR_SHAPE) info.charShapes.push(parseCharShape(r.data));
|
|
140
|
+
if (r.tag === TAG_PARA_SHAPE) info.paraShapes.push(parseParaShape(r.data));
|
|
141
|
+
if (r.tag === TAG_BORDER_FILL) info.borderFills.push(parseBorderFill(r.data));
|
|
142
|
+
} catch { /* skip malformed record */ }
|
|
143
|
+
}
|
|
144
|
+
return info;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/* ── FACE_NAME ──────────────────────────────────────────────── */
|
|
148
|
+
|
|
149
|
+
function parseFaceName(d: Uint8Array): string {
|
|
150
|
+
if (d.length < 3) return '';
|
|
151
|
+
const len = BinaryKit.readU16LE(d, 1); // UTF-16 char count
|
|
152
|
+
if (d.length < 3 + len * 2) return '';
|
|
153
|
+
return new TextDecoder('utf-16le').decode(d.subarray(3, 3 + len * 2));
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/* ── CHAR_SHAPE ─────────────────────────────────────────────── */
|
|
157
|
+
/* offset size field
|
|
158
|
+
0 14 faceId[7] (UINT16 × 7)
|
|
159
|
+
14 7 ratio[7]
|
|
160
|
+
21 7 spacing[7]
|
|
161
|
+
28 7 relSize[7]
|
|
162
|
+
35 7 offset[7]
|
|
163
|
+
42 4 height (UINT32, HWP-units 100 = 1pt)
|
|
164
|
+
46 4 attr (UINT32, bit flags)
|
|
165
|
+
50 1 shadowX
|
|
166
|
+
51 1 shadowY
|
|
167
|
+
52 4 textColor (COLORREF R,G,B,0) */
|
|
168
|
+
|
|
169
|
+
function parseCharShape(d: Uint8Array): HwpCharShape {
|
|
170
|
+
const faceIds: number[] = [];
|
|
171
|
+
for (let i = 0; i < 7; i++) faceIds.push(d.length >= (i + 1) * 2 ? BinaryKit.readU16LE(d, i * 2) : 0);
|
|
172
|
+
|
|
173
|
+
const height = d.length >= 46 ? BinaryKit.readU32LE(d, 42) : 1000;
|
|
174
|
+
const attr = d.length >= 50 ? BinaryKit.readU32LE(d, 46) : 0;
|
|
175
|
+
|
|
176
|
+
// attr bit layout (HWP 5.0 spec Table 35):
|
|
177
|
+
// 0: italic, 1: bold, 2-4: underline type(3), 5-8: underline shape(4),
|
|
178
|
+
// 9-11: outline(3), 12-13: shadow(2), 14: emboss, 15: engrave,
|
|
179
|
+
// 16-17: super/sub(2, 0=none,1=super,2=sub), 18-20: strikeout type(3),
|
|
180
|
+
// 21-24: strikeout shape(4), 25: annotLine, 26-28: annotLine type,
|
|
181
|
+
// 29: useFontSpace, 30: kerning
|
|
182
|
+
const ulType = (attr >> 2) & 0x7; // 3 bits at 2-4
|
|
183
|
+
const skType = (attr >> 18) & 0x7; // 3 bits at 18-20
|
|
184
|
+
const suType = (attr >> 16) & 0x3; // 2 bits at 16-17 (0=none,1=super,2=sub)
|
|
185
|
+
|
|
186
|
+
return {
|
|
187
|
+
faceIds,
|
|
188
|
+
height: (height > 0 && height < 100000) ? height : 1000,
|
|
189
|
+
italic: (attr & 1) !== 0,
|
|
190
|
+
bold: ((attr >> 1) & 1) !== 0,
|
|
191
|
+
underline: ulType !== 0,
|
|
192
|
+
strikeout: skType !== 0,
|
|
193
|
+
superscript: suType === 1,
|
|
194
|
+
subscript: suType === 2,
|
|
195
|
+
textColor: d.length >= 56 ? colorRef(d, 52) : '000000',
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/* ── PARA_SHAPE ─────────────────────────────────────────────── */
|
|
200
|
+
/* offset size field
|
|
201
|
+
0 4 attr1 (bits 0-1 = alignment: 0=justify,1=left,2=right,3=center)
|
|
202
|
+
4 4 leftMargin (HWPUNIT)
|
|
203
|
+
8 4 rightMargin
|
|
204
|
+
12 4 indent
|
|
205
|
+
16 4 spaceBefore
|
|
206
|
+
20 4 spaceAfter
|
|
207
|
+
24 4 lineSpacing */
|
|
208
|
+
|
|
209
|
+
const ALIGN_TBL: Record<number, Align> = { 0: 'justify', 1: 'left', 2: 'right', 3: 'center', 4: 'justify' };
|
|
210
|
+
|
|
211
|
+
function parseParaShape(d: Uint8Array): HwpParaShape {
|
|
212
|
+
if (d.length < 4) return { align: 'left', spaceBefore: 0, spaceAfter: 0, lineSpacing: 160, indent: 0 };
|
|
213
|
+
const attr = BinaryKit.readU32LE(d, 0);
|
|
214
|
+
return {
|
|
215
|
+
align: ALIGN_TBL[attr & 0x7] ?? 'left',
|
|
216
|
+
indent: d.length >= 16 ? i32(d, 12) : 0,
|
|
217
|
+
spaceBefore: d.length >= 20 ? i32(d, 16) : 0,
|
|
218
|
+
spaceAfter: d.length >= 24 ? i32(d, 20) : 0,
|
|
219
|
+
lineSpacing: d.length >= 28 ? i32(d, 24) : 160,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/* ── BORDER_FILL ────────────────────────────────────────────── */
|
|
224
|
+
/* [0:2] attr
|
|
225
|
+
For each of 5 borders (left,right,top,bottom,diagonal): 6 bytes
|
|
226
|
+
+0 type(BYTE) +1 widthIdx(BYTE) +2 color(COLORREF)
|
|
227
|
+
[32:4] fillType
|
|
228
|
+
[36:4] faceColor (bgColor for solid fill) */
|
|
229
|
+
|
|
230
|
+
const BORDER_W_PT = [0.28, 0.34, 0.43, 0.57, 0.71, 0.85, 1.13, 1.42, 1.70, 1.98, 2.84, 4.25, 5.67, 8.50, 11.34, 14.17];
|
|
231
|
+
const BORDER_KIND: Record<number, StrokeKind> = { 0:'none',1:'solid',2:'dash',3:'dot',4:'dash',5:'dash',6:'dash',7:'dot',8:'double',9:'double',10:'double' };
|
|
232
|
+
|
|
233
|
+
function parseBorderFill(d: Uint8Array): HwpBorderFill {
|
|
234
|
+
const borders: HwpBorderFill['borders'] = [];
|
|
235
|
+
for (let i = 0; i < 4; i++) {
|
|
236
|
+
const b = 2 + i * 6;
|
|
237
|
+
if (b + 6 <= d.length) {
|
|
238
|
+
borders.push({ type: d[b], widthPt: BORDER_W_PT[d[b + 1]] ?? 0.5, color: colorRef(d, b + 2) });
|
|
239
|
+
} else {
|
|
240
|
+
borders.push({ type: 0, widthPt: 0.5, color: '000000' });
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
let bgColor: string | undefined;
|
|
244
|
+
const fOff = 32; // after attr(2) + 5 borders(30)
|
|
245
|
+
if (d.length >= fOff + 8) {
|
|
246
|
+
const ft = BinaryKit.readU32LE(d, fOff);
|
|
247
|
+
if (ft & 1) bgColor = colorRef(d, fOff + 4);
|
|
248
|
+
}
|
|
249
|
+
return { borders, bgColor };
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
253
|
+
Body section parsing
|
|
254
|
+
═══════════════════════════════════════════════════════════════ */
|
|
255
|
+
|
|
256
|
+
function parseBody(
|
|
257
|
+
raw: Uint8Array, compressed: boolean, di: DocInfo, shield: ShieldedParser,
|
|
258
|
+
): { content: ContentNode[]; pageDims?: PageDims } {
|
|
259
|
+
const recs = parseRecords(compressed ? tryInflate(raw) : raw);
|
|
260
|
+
const content: ContentNode[] = [];
|
|
261
|
+
let pageDims: PageDims | undefined;
|
|
262
|
+
let i = 0;
|
|
263
|
+
|
|
264
|
+
while (i < recs.length) {
|
|
265
|
+
if (recs[i].tag === TAG_PAGE_DEF) {
|
|
266
|
+
pageDims = shield.guard(() => parsePageDef(recs[i].data), A4, 'hwp:pageDef');
|
|
267
|
+
i++;
|
|
268
|
+
} else if (recs[i].tag === TAG_PARA_HEADER) {
|
|
269
|
+
const r = shield.guard(
|
|
270
|
+
() => parseParagraphGroup(recs, i, di, shield),
|
|
271
|
+
{ nodes: [] as ContentNode[], next: i + 1 },
|
|
272
|
+
`hwp:para@${i}`,
|
|
273
|
+
);
|
|
274
|
+
content.push(...r.nodes);
|
|
275
|
+
i = r.next;
|
|
276
|
+
} else {
|
|
277
|
+
i++;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
return { content, pageDims };
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
/* ── Paragraph group ────────────────────────────────────────── */
|
|
284
|
+
|
|
285
|
+
function parseParagraphGroup(
|
|
286
|
+
recs: HwpRecord[], start: number, di: DocInfo, shield: ShieldedParser,
|
|
287
|
+
): { nodes: ContentNode[]; next: number } {
|
|
288
|
+
const hdr = recs[start];
|
|
289
|
+
const lv = hdr.level;
|
|
290
|
+
|
|
291
|
+
// paraShapeId at offset 8 (UINT16)
|
|
292
|
+
const psId = hdr.data.length >= 10 ? BinaryKit.readU16LE(hdr.data, 8) : 0;
|
|
293
|
+
const ps = di.paraShapes[psId];
|
|
294
|
+
|
|
295
|
+
let text: ParaTextResult | null = null;
|
|
296
|
+
let csPairs: [number, number][] = [];
|
|
297
|
+
const grids: ContentNode[] = [];
|
|
298
|
+
let i = start + 1;
|
|
299
|
+
|
|
300
|
+
while (i < recs.length && recs[i].level > lv) {
|
|
301
|
+
const r = recs[i];
|
|
302
|
+
|
|
303
|
+
if (r.tag === TAG_PARA_TEXT && r.level === lv + 1) {
|
|
304
|
+
text = decodeParaText(r.data);
|
|
305
|
+
i++;
|
|
306
|
+
} else if (r.tag === TAG_PARA_CHAR_SHAPE && r.level === lv + 1) {
|
|
307
|
+
csPairs = parseCharShapePairs(r.data);
|
|
308
|
+
i++;
|
|
309
|
+
} else if (r.tag === TAG_CTRL_HEADER && r.level === lv + 1) {
|
|
310
|
+
if (r.data.length >= 4 && BinaryKit.readU32LE(r.data, 0) === CTRL_TABLE) {
|
|
311
|
+
const tr = shield.guard(
|
|
312
|
+
() => parseTableCtrl(recs, i, di, shield),
|
|
313
|
+
{ grid: null, next: skipKids(recs, i) },
|
|
314
|
+
`hwp:tbl@${i}`,
|
|
315
|
+
);
|
|
316
|
+
if (tr.grid) grids.push(tr.grid);
|
|
317
|
+
i = tr.next;
|
|
318
|
+
} else {
|
|
319
|
+
i = skipKids(recs, i);
|
|
320
|
+
}
|
|
321
|
+
} else {
|
|
322
|
+
i++;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const nodes: ContentNode[] = [];
|
|
327
|
+
|
|
328
|
+
// Build paragraph from text
|
|
329
|
+
if (text && text.chars.length > 0) {
|
|
330
|
+
const joined = text.chars.map(c => c.ch).join('');
|
|
331
|
+
if (joined.trim()) {
|
|
332
|
+
const spans = resolveCharShapes(text.chars, csPairs, di);
|
|
333
|
+
nodes.push(buildPara(spans, buildParaProps(ps)));
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
nodes.push(...grids);
|
|
338
|
+
return { nodes, next: i };
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
function skipKids(recs: HwpRecord[], idx: number): number {
|
|
342
|
+
const lv = recs[idx].level;
|
|
343
|
+
let i = idx + 1;
|
|
344
|
+
while (i < recs.length && recs[i].level > lv) i++;
|
|
345
|
+
return i;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/* ── PARA_TEXT ───────────────────────────────────────────────── */
|
|
349
|
+
|
|
350
|
+
// Extended controls: 8 WORDs, associated CTRL_HEADER
|
|
351
|
+
const EXT_CTRL = new Set([2, 3, 11, 12, 14, 15]);
|
|
352
|
+
// Inline controls: 8 WORDs, no CTRL_HEADER
|
|
353
|
+
const INL_CTRL = new Set([4, 5, 6, 7, 8]);
|
|
354
|
+
|
|
355
|
+
function decodeParaText(d: Uint8Array): ParaTextResult {
|
|
356
|
+
const chars: ParsedChar[] = [];
|
|
357
|
+
const controlPositions: number[] = [];
|
|
358
|
+
let i = 0, pos = 0;
|
|
359
|
+
|
|
360
|
+
while (i + 1 < d.length) {
|
|
361
|
+
const c = d[i] | (d[i + 1] << 8);
|
|
362
|
+
if (c === 0) { i += 2; pos++; continue; }
|
|
363
|
+
if (c === 13) { break; } // paragraph end
|
|
364
|
+
if (c === 10) { chars.push({ pos, ch: '\n' }); i += 2; pos++; continue; }
|
|
365
|
+
|
|
366
|
+
if (EXT_CTRL.has(c)) {
|
|
367
|
+
controlPositions.push(pos);
|
|
368
|
+
i += 16; pos += 8; continue; // 8 WORDs
|
|
369
|
+
}
|
|
370
|
+
if (INL_CTRL.has(c)) {
|
|
371
|
+
i += 16; pos += 8; continue;
|
|
372
|
+
}
|
|
373
|
+
if (c === 9) { // tab (inline 8 WORDs)
|
|
374
|
+
chars.push({ pos, ch: '\t' });
|
|
375
|
+
i += 16; pos += 8; continue;
|
|
376
|
+
}
|
|
377
|
+
if (c >= 1 && c <= 31) { i += 2; pos++; continue; } // other control
|
|
378
|
+
|
|
379
|
+
chars.push({ pos, ch: String.fromCharCode(c) });
|
|
380
|
+
i += 2; pos++;
|
|
381
|
+
}
|
|
382
|
+
return { chars, controlPositions };
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/* ── PARA_CHAR_SHAPE ────────────────────────────────────────── */
|
|
386
|
+
|
|
387
|
+
function parseCharShapePairs(d: Uint8Array): [number, number][] {
|
|
388
|
+
const out: [number, number][] = [];
|
|
389
|
+
for (let i = 0; i + 7 < d.length; i += 8)
|
|
390
|
+
out.push([BinaryKit.readU32LE(d, i), BinaryKit.readU32LE(d, i + 4)]);
|
|
391
|
+
return out;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/* ── Char-shape → SpanNode resolution ───────────────────────── */
|
|
395
|
+
|
|
396
|
+
function resolveCharShapes(chars: ParsedChar[], pairs: [number, number][], di: DocInfo): SpanNode[] {
|
|
397
|
+
if (chars.length === 0) return [buildSpan('')];
|
|
398
|
+
|
|
399
|
+
const defaultId = pairs.length > 0 ? pairs[0][1] : 0;
|
|
400
|
+
|
|
401
|
+
function idFor(pos: number): number {
|
|
402
|
+
let id = defaultId;
|
|
403
|
+
for (const [p, sid] of pairs) { if (p <= pos) id = sid; else break; }
|
|
404
|
+
return id;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const spans: SpanNode[] = [];
|
|
408
|
+
let curId = idFor(chars[0].pos);
|
|
409
|
+
let buf = chars[0].ch;
|
|
410
|
+
|
|
411
|
+
for (let k = 1; k < chars.length; k++) {
|
|
412
|
+
const sid = idFor(chars[k].pos);
|
|
413
|
+
if (sid !== curId) { spans.push(styledSpan(buf, curId, di)); buf = ''; curId = sid; }
|
|
414
|
+
buf += chars[k].ch;
|
|
415
|
+
}
|
|
416
|
+
if (buf) spans.push(styledSpan(buf, curId, di));
|
|
417
|
+
return spans;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function styledSpan(text: string, shapeId: number, di: DocInfo): SpanNode {
|
|
421
|
+
const cs = di.charShapes[shapeId];
|
|
422
|
+
if (!cs) return buildSpan(text);
|
|
423
|
+
|
|
424
|
+
const props: TextProps = {};
|
|
425
|
+
const fid = cs.faceIds[0] ?? 0;
|
|
426
|
+
if (fid < di.faceNames.length && di.faceNames[fid]) props.font = safeFont(di.faceNames[fid]);
|
|
427
|
+
if (cs.height > 0) props.pt = Metric.hwpToPt(cs.height);
|
|
428
|
+
if (cs.bold) props.b = true;
|
|
429
|
+
if (cs.italic) props.i = true;
|
|
430
|
+
if (cs.underline) props.u = true;
|
|
431
|
+
if (cs.strikeout) props.s = true;
|
|
432
|
+
if (cs.superscript) props.sup = true;
|
|
433
|
+
if (cs.subscript) props.sub = true;
|
|
434
|
+
|
|
435
|
+
const hex = safeHex(cs.textColor);
|
|
436
|
+
if (hex && hex !== '000000') props.color = hex;
|
|
437
|
+
|
|
438
|
+
return buildSpan(text, props);
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/* ── Table control parsing ──────────────────────────────────── */
|
|
442
|
+
|
|
443
|
+
function parseTableCtrl(
|
|
444
|
+
recs: HwpRecord[], ctrlIdx: number, di: DocInfo, shield: ShieldedParser,
|
|
445
|
+
): { grid: ContentNode | null; next: number } {
|
|
446
|
+
const ctrlLv = recs[ctrlIdx].level;
|
|
447
|
+
let i = ctrlIdx + 1;
|
|
448
|
+
|
|
449
|
+
let tblData: Uint8Array | null = null;
|
|
450
|
+
const cells: { data: Uint8Array; tag: number; cStart: number; cEnd: number }[] = [];
|
|
451
|
+
|
|
452
|
+
// Collect TABLE and cell records within this control's scope
|
|
453
|
+
const tblLevel = ctrlLv + 1;
|
|
454
|
+
|
|
455
|
+
while (i < recs.length && recs[i].level > ctrlLv) {
|
|
456
|
+
const r = recs[i];
|
|
457
|
+
|
|
458
|
+
if (isTableTag(r.tag) && r.level === tblLevel) {
|
|
459
|
+
tblData = r.data;
|
|
460
|
+
i++;
|
|
461
|
+
} else if (r.tag === TAG_LIST_HEADER && r.level === tblLevel) {
|
|
462
|
+
// LIST_HEADER as cell: paraCount tells how many paragraphs follow
|
|
463
|
+
const cellData = r.data;
|
|
464
|
+
const paraCount = cellData.length >= 2 ? BinaryKit.readU16LE(cellData, 0) : 0;
|
|
465
|
+
i++;
|
|
466
|
+
const cStart = i;
|
|
467
|
+
// Consume exactly paraCount paragraphs (each with its child records)
|
|
468
|
+
let consumed = 0;
|
|
469
|
+
while (i < recs.length && consumed < paraCount) {
|
|
470
|
+
if (recs[i].tag === TAG_PARA_HEADER && recs[i].level === tblLevel) {
|
|
471
|
+
consumed++;
|
|
472
|
+
i++;
|
|
473
|
+
// Skip child records of this paragraph
|
|
474
|
+
while (i < recs.length && recs[i].level > tblLevel) i++;
|
|
475
|
+
} else if (recs[i].level > tblLevel) {
|
|
476
|
+
i++;
|
|
477
|
+
} else {
|
|
478
|
+
break; // hit next sibling at same level
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
cells.push({ data: cellData, tag: TAG_LIST_HEADER, cStart, cEnd: i });
|
|
482
|
+
} else if (isCellTag(r.tag) && r.level === tblLevel) {
|
|
483
|
+
// Full CELL record (with cell-specific fields)
|
|
484
|
+
const cellData = r.data;
|
|
485
|
+
const cellTag = r.tag;
|
|
486
|
+
i++;
|
|
487
|
+
const cStart = i;
|
|
488
|
+
while (i < recs.length && recs[i].level > tblLevel) i++;
|
|
489
|
+
cells.push({ data: cellData, tag: cellTag, cStart, cEnd: i });
|
|
490
|
+
} else {
|
|
491
|
+
i++;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
if (!tblData || cells.length === 0) return { grid: null, next: i };
|
|
496
|
+
|
|
497
|
+
const rowCnt = tblData.length >= 6 ? BinaryKit.readU16LE(tblData, 4) : 1;
|
|
498
|
+
const colCnt = tblData.length >= 8 ? BinaryKit.readU16LE(tblData, 6) : 1;
|
|
499
|
+
|
|
500
|
+
interface PC { row: number; col: number; cs: number; rs: number; widthHwp: number; props: CellProps; paras: ParaNode[] }
|
|
501
|
+
const parsed: PC[] = [];
|
|
502
|
+
|
|
503
|
+
for (let ci = 0; ci < cells.length; ci++) {
|
|
504
|
+
const c = cells[ci];
|
|
505
|
+
const seqIdx = ci;
|
|
506
|
+
const pc = shield.guard(
|
|
507
|
+
() => parseCellRec(c.data, c.tag, recs, c.cStart, c.cEnd, di, shield, seqIdx, colCnt),
|
|
508
|
+
{ row: Math.floor(ci / (colCnt || 1)), col: ci % (colCnt || 1), cs: 1, rs: 1, widthHwp: 0, props: {}, paras: [buildPara([buildSpan('')])] },
|
|
509
|
+
`hwp:cell@${c.cStart}`,
|
|
510
|
+
);
|
|
511
|
+
parsed.push(pc);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Determine actual row count from cell data (may exceed rowCnt for merged cells)
|
|
515
|
+
const maxRow = parsed.reduce((m, c) => Math.max(m, c.row + c.rs), 0);
|
|
516
|
+
const actualRowCnt = Math.max(rowCnt, maxRow);
|
|
517
|
+
|
|
518
|
+
// Validate cell positions; fallback to sequential layout if invalid
|
|
519
|
+
const posValid = parsed.every(c => c.row >= 0 && c.col >= 0 && c.col < colCnt);
|
|
520
|
+
if (!posValid) {
|
|
521
|
+
let idx = 0;
|
|
522
|
+
for (const c of parsed) { c.row = Math.floor(idx / colCnt); c.col = idx % colCnt; idx++; }
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// Compute column widths in points from cell widths
|
|
526
|
+
const colWidthsPt: number[] = new Array(colCnt).fill(0);
|
|
527
|
+
// Pass 1: use cells with cs=1 for exact column widths
|
|
528
|
+
for (const c of parsed) {
|
|
529
|
+
if (c.cs === 1 && c.widthHwp > 0) {
|
|
530
|
+
const wPt = Metric.hwpToPt(c.widthHwp);
|
|
531
|
+
if (wPt > colWidthsPt[c.col]) colWidthsPt[c.col] = wPt;
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
// Pass 2: for columns still 0, try to derive from multi-span cells
|
|
535
|
+
const zeroColumns = colWidthsPt.filter(w => w === 0).length;
|
|
536
|
+
if (zeroColumns > 0) {
|
|
537
|
+
for (const c of parsed) {
|
|
538
|
+
if (c.cs > 1 && c.widthHwp > 0) {
|
|
539
|
+
// Subtract known column widths from the span
|
|
540
|
+
let known = 0;
|
|
541
|
+
let unknownCols = 0;
|
|
542
|
+
for (let ci = c.col; ci < c.col + c.cs && ci < colCnt; ci++) {
|
|
543
|
+
if (colWidthsPt[ci] > 0) known += colWidthsPt[ci];
|
|
544
|
+
else unknownCols++;
|
|
545
|
+
}
|
|
546
|
+
if (unknownCols > 0) {
|
|
547
|
+
const remaining = Metric.hwpToPt(c.widthHwp) - known;
|
|
548
|
+
const each = remaining > 0 ? remaining / unknownCols : 0;
|
|
549
|
+
for (let ci = c.col; ci < c.col + c.cs && ci < colCnt; ci++) {
|
|
550
|
+
if (colWidthsPt[ci] === 0 && each > 0) colWidthsPt[ci] = each;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
const rows = [];
|
|
558
|
+
for (let r = 0; r < actualRowCnt; r++) {
|
|
559
|
+
const rc = parsed.filter(c => c.row === r).sort((a, b) => a.col - b.col);
|
|
560
|
+
if (rc.length === 0) continue;
|
|
561
|
+
rows.push(buildRow(rc.map(c =>
|
|
562
|
+
buildCell(c.paras.length ? c.paras : [buildPara([buildSpan('')])], { cs: c.cs, rs: c.rs, props: c.props }),
|
|
563
|
+
)));
|
|
564
|
+
}
|
|
565
|
+
if (rows.length === 0) return { grid: null, next: i };
|
|
566
|
+
|
|
567
|
+
// Table-level default stroke
|
|
568
|
+
let defStroke: Stroke | undefined;
|
|
569
|
+
const bfOff = 18 + rowCnt * 2;
|
|
570
|
+
if (tblData.length >= bfOff + 2) {
|
|
571
|
+
const bfId = BinaryKit.readU16LE(tblData, bfOff);
|
|
572
|
+
defStroke = strokeFromBF(bfId, di);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const gp: GridProps = {};
|
|
576
|
+
if (defStroke) gp.defaultStroke = defStroke;
|
|
577
|
+
const hasWidths = colWidthsPt.some(w => w > 0);
|
|
578
|
+
if (hasWidths) gp.colWidths = colWidthsPt;
|
|
579
|
+
return { grid: buildGrid(rows, gp), next: i };
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
/* ── Cell record ────────────────────────────────────────────── */
|
|
583
|
+
/* LIST_HEADER for cells (HWP 5.0/5.1):
|
|
584
|
+
[0:2] paraCount [2:4] attr (bits 6-7 = vertAlign)
|
|
585
|
+
[6:2] unknown [8:2] rowAddr [10:2] colAddr
|
|
586
|
+
[12:2] rowSpan [14:2] colSpan
|
|
587
|
+
[16:4] width(HWPUNIT) [20:4] height(HWPUNIT)
|
|
588
|
+
[24:8] padding[4] [32:2] borderFillId */
|
|
589
|
+
|
|
590
|
+
function parseCellRec(
|
|
591
|
+
d: Uint8Array, tag: number, recs: HwpRecord[], cStart: number, cEnd: number,
|
|
592
|
+
di: DocInfo, shield: ShieldedParser, seqIdx: number, colCnt: number,
|
|
593
|
+
) {
|
|
594
|
+
let col: number, row: number, cs = 1, rs = 1;
|
|
595
|
+
let widthHwp = 0;
|
|
596
|
+
const props: CellProps = {};
|
|
597
|
+
|
|
598
|
+
const attr = d.length >= 6 ? BinaryKit.readU32LE(d, 2) : 0;
|
|
599
|
+
const va = (attr >> 6) & 0x3;
|
|
600
|
+
if (va === 1) props.va = 'mid';
|
|
601
|
+
else if (va === 2) props.va = 'bot';
|
|
602
|
+
|
|
603
|
+
if (tag === TAG_LIST_HEADER && d.length >= 22) {
|
|
604
|
+
// LIST_HEADER with cell-specific fields
|
|
605
|
+
// offset 8: colAddr, offset 10: rowAddr (HWP 5.0 spec)
|
|
606
|
+
col = BinaryKit.readU16LE(d, 8);
|
|
607
|
+
row = BinaryKit.readU16LE(d, 10);
|
|
608
|
+
rs = Math.max(1, BinaryKit.readU16LE(d, 12));
|
|
609
|
+
cs = Math.max(1, BinaryKit.readU16LE(d, 14));
|
|
610
|
+
widthHwp = BinaryKit.readU32LE(d, 16);
|
|
611
|
+
|
|
612
|
+
const bfId = d.length >= 34 ? BinaryKit.readU16LE(d, 32) : 0;
|
|
613
|
+
if (bfId > 0 && bfId <= di.borderFills.length) {
|
|
614
|
+
const bf = di.borderFills[bfId - 1];
|
|
615
|
+
if (bf.borders.length >= 4) {
|
|
616
|
+
props.left = toStroke(bf.borders[0]);
|
|
617
|
+
props.right = toStroke(bf.borders[1]);
|
|
618
|
+
props.top = toStroke(bf.borders[2]);
|
|
619
|
+
props.bot = toStroke(bf.borders[3]);
|
|
620
|
+
}
|
|
621
|
+
if (bf.bgColor && bf.bgColor !== 'FFFFFF') props.bg = bf.bgColor;
|
|
622
|
+
}
|
|
623
|
+
} else if (tag !== TAG_LIST_HEADER) {
|
|
624
|
+
// Full CELL record with position/span/borderFill
|
|
625
|
+
col = d.length >= 8 ? BinaryKit.readU16LE(d, 6) : seqIdx % (colCnt || 1);
|
|
626
|
+
row = d.length >= 10 ? BinaryKit.readU16LE(d, 8) : Math.floor(seqIdx / (colCnt || 1));
|
|
627
|
+
cs = d.length >= 12 ? Math.max(1, BinaryKit.readU16LE(d, 10)) : 1;
|
|
628
|
+
rs = d.length >= 14 ? Math.max(1, BinaryKit.readU16LE(d, 12)) : 1;
|
|
629
|
+
widthHwp = d.length >= 18 ? BinaryKit.readU32LE(d, 14) : 0;
|
|
630
|
+
|
|
631
|
+
const bfId = d.length >= 32 ? BinaryKit.readU16LE(d, 30) : 0;
|
|
632
|
+
if (bfId > 0 && bfId <= di.borderFills.length) {
|
|
633
|
+
const bf = di.borderFills[bfId - 1];
|
|
634
|
+
if (bf.borders.length >= 4) {
|
|
635
|
+
props.left = toStroke(bf.borders[0]);
|
|
636
|
+
props.right = toStroke(bf.borders[1]);
|
|
637
|
+
props.top = toStroke(bf.borders[2]);
|
|
638
|
+
props.bot = toStroke(bf.borders[3]);
|
|
639
|
+
}
|
|
640
|
+
if (bf.bgColor && bf.bgColor !== 'FFFFFF') props.bg = bf.bgColor;
|
|
641
|
+
}
|
|
642
|
+
} else {
|
|
643
|
+
// Fallback: LIST_HEADER too short, compute sequentially
|
|
644
|
+
row = Math.floor(seqIdx / (colCnt || 1));
|
|
645
|
+
col = seqIdx % (colCnt || 1);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// Parse cell content paragraphs
|
|
649
|
+
const paras: ParaNode[] = [];
|
|
650
|
+
let k = cStart;
|
|
651
|
+
while (k < cEnd) {
|
|
652
|
+
if (recs[k].tag === TAG_PARA_HEADER) {
|
|
653
|
+
// For cell paragraphs, they might be at various nesting levels
|
|
654
|
+
const r = shield.guard(
|
|
655
|
+
() => {
|
|
656
|
+
const hdr = recs[k];
|
|
657
|
+
const lv = hdr.level;
|
|
658
|
+
const psId = hdr.data.length >= 10 ? BinaryKit.readU16LE(hdr.data, 8) : 0;
|
|
659
|
+
const ps = di.paraShapes[psId];
|
|
660
|
+
let txt: ParaTextResult | null = null;
|
|
661
|
+
let csp: [number, number][] = [];
|
|
662
|
+
let j = k + 1;
|
|
663
|
+
while (j < cEnd && recs[j].level > lv) {
|
|
664
|
+
if (recs[j].tag === TAG_PARA_TEXT) { txt = decodeParaText(recs[j].data); j++; }
|
|
665
|
+
else if (recs[j].tag === TAG_PARA_CHAR_SHAPE) { csp = parseCharShapePairs(recs[j].data); j++; }
|
|
666
|
+
else j++;
|
|
667
|
+
}
|
|
668
|
+
const spans = txt && txt.chars.length > 0 ? resolveCharShapes(txt.chars, csp, di) : [buildSpan('')];
|
|
669
|
+
return { para: buildPara(spans, buildParaProps(ps)), next: j };
|
|
670
|
+
},
|
|
671
|
+
{ para: buildPara([buildSpan('')]), next: k + 1 },
|
|
672
|
+
`hwp:cellP@${k}`,
|
|
673
|
+
);
|
|
674
|
+
paras.push(r.para);
|
|
675
|
+
k = r.next;
|
|
676
|
+
} else { k++; }
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return { row, col, cs, rs, props, widthHwp, paras: paras.length ? paras : [buildPara([buildSpan('')])] };
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
/* ── PAGE_DEF ───────────────────────────────────────────────── */
|
|
683
|
+
/* [0:4] width [4:4] height [8:4] ml [12:4] mr
|
|
684
|
+
[16:4] mt [20:4] mb [36:4] attr (bit0=landscape) */
|
|
685
|
+
|
|
686
|
+
function parsePageDef(d: Uint8Array): PageDims {
|
|
687
|
+
if (d.length < 24) return A4;
|
|
688
|
+
const w = BinaryKit.readU32LE(d, 0);
|
|
689
|
+
const h = BinaryKit.readU32LE(d, 4);
|
|
690
|
+
const ml = BinaryKit.readU32LE(d, 8);
|
|
691
|
+
const mr = BinaryKit.readU32LE(d, 12);
|
|
692
|
+
const mt = BinaryKit.readU32LE(d, 16);
|
|
693
|
+
const mb = BinaryKit.readU32LE(d, 20);
|
|
694
|
+
const at = d.length >= 40 ? BinaryKit.readU32LE(d, 36) : 0;
|
|
695
|
+
return {
|
|
696
|
+
wPt: Metric.hwpToPt(w), hPt: Metric.hwpToPt(h),
|
|
697
|
+
ml: Metric.hwpToPt(ml), mr: Metric.hwpToPt(mr),
|
|
698
|
+
mt: Metric.hwpToPt(mt), mb: Metric.hwpToPt(mb),
|
|
699
|
+
orient: (at & 1) ? 'landscape' : 'portrait',
|
|
700
|
+
};
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
704
|
+
Helpers
|
|
705
|
+
═══════════════════════════════════════════════════════════════ */
|
|
706
|
+
|
|
707
|
+
function i32(d: Uint8Array, o: number): number {
|
|
708
|
+
const u = BinaryKit.readU32LE(d, o);
|
|
709
|
+
return u > 0x7FFFFFFF ? u - 0x100000000 : u;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
function colorRef(d: Uint8Array, o: number): string {
|
|
713
|
+
if (o + 3 > d.length) return '000000';
|
|
714
|
+
return ((d[o] << 16) | (d[o + 1] << 8) | d[o + 2]).toString(16).padStart(6, '0').toUpperCase();
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
function toStroke(b: { type: number; widthPt: number; color: string }): Stroke {
|
|
718
|
+
return { kind: BORDER_KIND[b.type] ?? 'solid', pt: b.widthPt, color: b.color };
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
function strokeFromBF(bfId: number, di: DocInfo): Stroke | undefined {
|
|
722
|
+
if (bfId <= 0 || bfId > di.borderFills.length) return undefined;
|
|
723
|
+
const bf = di.borderFills[bfId - 1];
|
|
724
|
+
if (!bf.borders.length) return undefined;
|
|
725
|
+
const b = bf.borders[0];
|
|
726
|
+
return { kind: BORDER_KIND[b.type] ?? 'solid', pt: b.widthPt, color: b.color };
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
function buildParaProps(ps?: HwpParaShape): ParaProps {
|
|
730
|
+
if (!ps) return {};
|
|
731
|
+
const p: ParaProps = {};
|
|
732
|
+
if (ps.align && ps.align !== 'left') p.align = ps.align;
|
|
733
|
+
if (ps.spaceBefore > 0) p.spaceBefore = Metric.hwpToPt(ps.spaceBefore);
|
|
734
|
+
if (ps.spaceAfter > 0) p.spaceAfter = Metric.hwpToPt(ps.spaceAfter);
|
|
735
|
+
if (ps.lineSpacing > 0 && ps.lineSpacing !== 160) p.lineHeight = ps.lineSpacing / 100;
|
|
736
|
+
if (ps.indent > 0) p.indentPt = Metric.hwpToPt(ps.indent);
|
|
737
|
+
return p;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
741
|
+
Decoder class
|
|
742
|
+
═══════════════════════════════════════════════════════════════ */
|
|
743
|
+
|
|
744
|
+
export class HwpScanner implements Decoder {
|
|
745
|
+
readonly format = 'hwp';
|
|
746
|
+
|
|
747
|
+
async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
|
|
748
|
+
const shield = new ShieldedParser();
|
|
749
|
+
const warns: string[] = [];
|
|
750
|
+
|
|
751
|
+
try {
|
|
752
|
+
if (!BinaryKit.isOle2(data)) return fail('HWP: Invalid OLE2 signature');
|
|
753
|
+
const streams = BinaryKit.parseCfb(data);
|
|
754
|
+
|
|
755
|
+
// FileHeader
|
|
756
|
+
const fh = streams.get('FileHeader');
|
|
757
|
+
const { compressed, encrypted } = fh ? parseFileHeader(fh) : { compressed: true, encrypted: false };
|
|
758
|
+
if (encrypted) return fail('HWP: 암호화된 파일은 지원하지 않습니다');
|
|
759
|
+
|
|
760
|
+
// DocInfo
|
|
761
|
+
const diRaw = streams.get('DocInfo');
|
|
762
|
+
let di: DocInfo = { faceNames: [], charShapes: [], paraShapes: [], borderFills: [] };
|
|
763
|
+
if (diRaw) {
|
|
764
|
+
di = shield.guard(() => parseDocInfo(diRaw, compressed), di, 'hwp:docInfo');
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
// Body sections
|
|
768
|
+
const allContent: ContentNode[] = [];
|
|
769
|
+
let pageDims: PageDims = A4;
|
|
770
|
+
|
|
771
|
+
for (let s = 0; s < 100; s++) {
|
|
772
|
+
const sec = streams.get(`BodyText/Section${s}`) ?? streams.get(`Section${s}`);
|
|
773
|
+
if (!sec) {
|
|
774
|
+
if (s === 0) {
|
|
775
|
+
const fb = findBodySection(streams);
|
|
776
|
+
if (fb) {
|
|
777
|
+
const r = parseBody(fb, compressed, di, shield);
|
|
778
|
+
allContent.push(...r.content);
|
|
779
|
+
if (r.pageDims) pageDims = r.pageDims;
|
|
780
|
+
}
|
|
781
|
+
}
|
|
782
|
+
break;
|
|
783
|
+
}
|
|
784
|
+
const r = shield.guard(
|
|
785
|
+
() => parseBody(sec, compressed, di, shield),
|
|
786
|
+
{ content: [], pageDims: undefined },
|
|
787
|
+
`hwp:sec${s}`,
|
|
788
|
+
);
|
|
789
|
+
allContent.push(...r.content);
|
|
790
|
+
if (r.pageDims) pageDims = r.pageDims;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
warns.push(...shield.flush());
|
|
794
|
+
const content = allContent.length > 0 ? allContent : [buildPara([buildSpan('')])];
|
|
795
|
+
return succeed(buildRoot({}, [buildSheet(content, pageDims)]), warns);
|
|
796
|
+
} catch (e: any) {
|
|
797
|
+
warns.push(...shield.flush());
|
|
798
|
+
return fail(`HWP decode error: ${e?.message ?? String(e)}`, warns);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
function findBodySection(streams: Map<string, Uint8Array>): Uint8Array | undefined {
|
|
804
|
+
for (const [k, v] of streams)
|
|
805
|
+
if (k.includes('Section') && !k.includes('Header') && !k.includes('Info')) return v;
|
|
806
|
+
return undefined;
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
registry.registerDecoder(new HwpScanner());
|