hwpkit-dev 0.0.2 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +4 -1
- package/README.md +44 -7
- package/dist/index.d.mts +46 -16
- package/dist/index.d.ts +46 -16
- package/dist/index.js +3964 -1227
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +3964 -1227
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -1
- package/playground/index.html +346 -0
- package/playground/main.ts +302 -0
- package/playground/vite.config.ts +16 -0
- package/src/contract/decoder.ts +1 -0
- package/src/contract/encoder.ts +6 -1
- package/src/core/BaseDecoder.ts +118 -0
- package/src/core/BaseEncoder.ts +146 -0
- package/src/decoders/docx/DocxDecoder.ts +743 -151
- package/src/decoders/html/HtmlDecoder.ts +366 -0
- package/src/decoders/hwp/HwpScanner.ts +478 -193
- package/src/decoders/hwpx/HwpxDecoder.ts +796 -297
- package/src/decoders/md/MdDecoder.ts +4 -4
- package/src/encoders/docx/DocxEncoder.ts +549 -240
- package/src/encoders/html/HtmlEncoder.ts +17 -19
- package/src/encoders/hwp/HwpEncoder.ts +1643 -890
- package/src/encoders/hwpx/HwpxEncoder.ts +1626 -472
- package/src/encoders/hwpx/constants.ts +148 -0
- package/src/encoders/hwpx/utils.ts +198 -0
- package/src/encoders/md/MdEncoder.ts +20 -15
- package/src/model/builders.ts +4 -4
- package/src/model/doc-props.ts +24 -10
- package/src/model/doc-tree.ts +13 -5
- package/src/pipeline/Pipeline.ts +7 -3
- package/src/pipeline/registry.ts +13 -2
- package/src/safety/StyleBridge.ts +51 -6
- package/src/toolkit/ArchiveKit.ts +56 -0
- package/src/toolkit/StyleMapper.ts +221 -0
- package/src/toolkit/UnitConverter.ts +138 -0
- package/src/toolkit/XmlKit.ts +0 -5
- package/hwp-analyze.ts +0 -90
- package/inspect-doc.ts +0 -57
- package/output_test.hwp +0 -0
- package/test-docx-to-hwp.ts +0 -45
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import type { Decoder } from '../../contract/decoder';
|
|
2
|
-
import type { DocRoot, ContentNode, ParaNode, SpanNode, ImgNode } from '../../model/doc-tree';
|
|
2
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode, ImgNode, GridNode, PageNumNode } from '../../model/doc-tree';
|
|
3
3
|
import type { Outcome } from '../../contract/result';
|
|
4
4
|
import type { Align, Stroke, StrokeKind, PageDims, TextProps, ParaProps, CellProps, GridProps } from '../../model/doc-props';
|
|
5
5
|
import { succeed, fail } from '../../contract/result';
|
|
6
|
-
import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell, buildImg } from '../../model/builders';
|
|
6
|
+
import { buildRoot, buildSheet, buildPara, buildSpan, buildGrid, buildRow, buildCell, buildImg, buildPb, buildPageNum } from '../../model/builders';
|
|
7
7
|
import { ShieldedParser } from '../../safety/ShieldedParser';
|
|
8
8
|
import { BinaryKit } from '../../toolkit/BinaryKit';
|
|
9
|
+
import { TextKit } from '../../toolkit/TextKit';
|
|
9
10
|
import { Metric, safeHex, safeFont } from '../../safety/StyleBridge';
|
|
10
11
|
import { registry } from '../../pipeline/registry';
|
|
11
12
|
import { A4 } from '../../model/doc-props';
|
|
@@ -38,10 +39,14 @@ function isTableTag(t: number) { return t === TAG_TABLE_A || t === TAG_TABLE_B;
|
|
|
38
39
|
function isCellTag(t: number) { return t === TAG_CELL_A || t === TAG_CELL_B || t === TAG_LIST_HEADER; }
|
|
39
40
|
|
|
40
41
|
// CTRL_HEADER ctrlId values (UINT32-LE as ASCII)
|
|
41
|
-
const CTRL_TABLE = 0x74626C20; // '
|
|
42
|
+
const CTRL_TABLE = 0x74626C20; // 'tbl ' = 표(table)
|
|
42
43
|
const CTRL_IMAGE = 0x696D6720; // 'img '
|
|
43
44
|
const CTRL_OBJ = 0x6F626A20; // 'obj '
|
|
44
45
|
const CTRL_FIG = 0x66696720; // 'fig '
|
|
46
|
+
const CTRL_GSO = 0x67736F20; // 'gso ' = 그리기 객체 (drawing object, contains embedded images)
|
|
47
|
+
const CTRL_HEAD = 0x68656164; // 'head' = 머리말
|
|
48
|
+
const CTRL_FOOT = 0x666F6F74; // 'foot' = 꼬리말
|
|
49
|
+
const CTRL_ATNO = 0x61746E6F; // 'atno' = 자동 번호 (쪽번호 등)
|
|
45
50
|
|
|
46
51
|
/* ═══════════════════════════════════════════════════════════════
|
|
47
52
|
Types
|
|
@@ -64,15 +69,18 @@ interface HwpCharShape {
|
|
|
64
69
|
subscript: boolean;
|
|
65
70
|
textColor: string;
|
|
66
71
|
}
|
|
67
|
-
|
|
68
72
|
interface HwpParaShape {
|
|
69
73
|
align: Align;
|
|
70
74
|
spaceBefore: number;
|
|
71
75
|
spaceAfter: number;
|
|
72
76
|
lineSpacing: number;
|
|
77
|
+
lineSpacingType: 0 | 1 | 2 | 3; // 0=PERCENT, 1=FIXED, 2=BETWEEN_LINES, 3=AT_LEAST
|
|
78
|
+
leftMargin: number;
|
|
79
|
+
rightMargin: number;
|
|
73
80
|
indent: number;
|
|
81
|
+
verAlign?: 'baseline' | 'top' | 'center' | 'bottom';
|
|
82
|
+
lineWrap?: 'break' | 'squeeze' | 'keep';
|
|
74
83
|
}
|
|
75
|
-
|
|
76
84
|
interface HwpBorderFill {
|
|
77
85
|
borders: { type: number; widthPt: number; color: string }[];
|
|
78
86
|
bgColor?: string;
|
|
@@ -210,7 +218,7 @@ function parseCharShape(d: Uint8Array): HwpCharShape {
|
|
|
210
218
|
|
|
211
219
|
/* ── PARA_SHAPE ─────────────────────────────────────────────── */
|
|
212
220
|
/* offset size field
|
|
213
|
-
0 4 attr1 (bits 0-1 =
|
|
221
|
+
0 4 attr1 (bits 0-1 = line spacing type, bits 2-4 = alignment)
|
|
214
222
|
4 4 leftMargin (HWPUNIT)
|
|
215
223
|
8 4 rightMargin
|
|
216
224
|
12 4 indent
|
|
@@ -218,17 +226,36 @@ function parseCharShape(d: Uint8Array): HwpCharShape {
|
|
|
218
226
|
20 4 spaceAfter
|
|
219
227
|
24 4 lineSpacing */
|
|
220
228
|
|
|
221
|
-
const ALIGN_TBL: Record<number, Align> = { 0: 'justify', 1: 'left', 2: 'right', 3: 'center', 4: '
|
|
229
|
+
const ALIGN_TBL: Record<number, Align> = { 0: 'justify', 1: 'left', 2: 'right', 3: 'center', 4: 'distribute', 5: 'distribute_space' };
|
|
222
230
|
|
|
223
231
|
function parseParaShape(d: Uint8Array): HwpParaShape {
|
|
224
|
-
if (d.length < 4) return { align: 'left', spaceBefore: 0, spaceAfter: 0, lineSpacing: 160, indent: 0 };
|
|
232
|
+
if (d.length < 4) return { align: 'left', spaceBefore: 0, spaceAfter: 0, lineSpacing: 160, lineSpacingType: 0, leftMargin: 0, rightMargin: 0, indent: 0 };
|
|
225
233
|
const attr = BinaryKit.readU32LE(d, 0);
|
|
234
|
+
|
|
235
|
+
// bits 0-1: 줄 간격 종류 (0=PERCENT, 1=FIXED, 2=BETWEEN_LINES, 3=AT_LEAST)
|
|
236
|
+
const lineSpacingType = (attr & 0x3) as 0 | 1 | 2 | 3;
|
|
237
|
+
|
|
238
|
+
// bits 2-4: 정렬 방식 (0=justify,1=left,2=right,3=center,4=distribute,5=split)
|
|
239
|
+
const align = ALIGN_TBL[(attr >> 2) & 0x7] ?? 'left';
|
|
240
|
+
|
|
241
|
+
// 세로 정렬 (Bit 18 ~ Bit 19)
|
|
242
|
+
const vVal = (attr >> 18) & 0x3;
|
|
243
|
+
const verAlign = vVal === 1 ? 'top' : vVal === 2 ? 'center' : vVal === 3 ? 'bottom' : 'baseline';
|
|
244
|
+
|
|
245
|
+
// 줄 바꿈 기준: attr1 에는 별도 비트 없음, 기본값 'break'
|
|
246
|
+
const lineWrap: 'break' = 'break';
|
|
247
|
+
|
|
226
248
|
return {
|
|
227
|
-
align
|
|
228
|
-
|
|
249
|
+
align,
|
|
250
|
+
lineSpacingType,
|
|
251
|
+
leftMargin: d.length >= 8 ? i32(d, 4) : 0, // offset 4: 문단 몸체 왼쪽 여백 (HWPUNIT)
|
|
252
|
+
rightMargin: d.length >= 12 ? i32(d, 8) : 0, // offset 8: 문단 몸체 오른쪽 여백 (HWPUNIT)
|
|
253
|
+
indent: d.length >= 16 ? i32(d, 12) : 0, // offset 12: 첫 줄 들여쓰기 (HWPUNIT)
|
|
229
254
|
spaceBefore: d.length >= 20 ? i32(d, 16) : 0,
|
|
230
255
|
spaceAfter: d.length >= 24 ? i32(d, 20) : 0,
|
|
231
256
|
lineSpacing: d.length >= 28 ? i32(d, 24) : 160,
|
|
257
|
+
verAlign,
|
|
258
|
+
lineWrap,
|
|
232
259
|
};
|
|
233
260
|
}
|
|
234
261
|
|
|
@@ -275,8 +302,17 @@ function parseBorderFill(d: Uint8Array): HwpBorderFill {
|
|
|
275
302
|
Body section parsing
|
|
276
303
|
═══════════════════════════════════════════════════════════════ */
|
|
277
304
|
|
|
305
|
+
// gsoCtx: shared mutable counter for 'gso ' drawing objects.
|
|
306
|
+
// Each 'gso ' CTRL_HEADER encountered increments this counter.
|
|
307
|
+
// objectMap is keyed by 0-based gso order = sequential BinData insertion order.
|
|
308
|
+
interface GsoCtx {
|
|
309
|
+
count: number;
|
|
310
|
+
headers?: ParaNode[];
|
|
311
|
+
footers?: ParaNode[];
|
|
312
|
+
}
|
|
313
|
+
|
|
278
314
|
function parseBody(
|
|
279
|
-
raw: Uint8Array, compressed: boolean, di: DocInfo, shield: ShieldedParser,
|
|
315
|
+
raw: Uint8Array, compressed: boolean, di: DocInfo, shield: ShieldedParser, gsoCtx: GsoCtx,
|
|
280
316
|
): { content: ContentNode[]; pageDims?: PageDims } {
|
|
281
317
|
const recs = parseRecords(compressed ? tryInflate(raw) : raw);
|
|
282
318
|
const content: ContentNode[] = [];
|
|
@@ -296,7 +332,7 @@ function parseBody(
|
|
|
296
332
|
i++; // already handled above; skip at top level
|
|
297
333
|
} else if (recs[i].tag === TAG_PARA_HEADER) {
|
|
298
334
|
const r = shield.guard(
|
|
299
|
-
() => parseParagraphGroup(recs, i, di, shield),
|
|
335
|
+
() => parseParagraphGroup(recs, i, di, shield, gsoCtx),
|
|
300
336
|
{ nodes: [] as ContentNode[], next: i + 1 },
|
|
301
337
|
`hwp:para@${i}`,
|
|
302
338
|
);
|
|
@@ -312,19 +348,25 @@ function parseBody(
|
|
|
312
348
|
/* ── Paragraph group ────────────────────────────────────────── */
|
|
313
349
|
|
|
314
350
|
function parseParagraphGroup(
|
|
315
|
-
recs: HwpRecord[], start: number, di: DocInfo, shield: ShieldedParser,
|
|
351
|
+
recs: HwpRecord[], start: number, di: DocInfo, shield: ShieldedParser, gsoCtx: GsoCtx,
|
|
316
352
|
): { nodes: ContentNode[]; next: number } {
|
|
317
353
|
const hdr = recs[start];
|
|
318
354
|
const lv = hdr.level;
|
|
319
355
|
|
|
320
|
-
//
|
|
321
|
-
|
|
322
|
-
|
|
356
|
+
// P1: PARA_HEADER 레이아웃
|
|
357
|
+
// offset 8-9: paraShapeId (UINT16)
|
|
358
|
+
// offset 10: styleId (UINT8)
|
|
359
|
+
// offset 11: divideSort (UINT8) — 0x04=쪽나누기
|
|
360
|
+
const psId = hdr.data.length >= 10 ? BinaryKit.readU16LE(hdr.data, 8) : 0;
|
|
361
|
+
const hwpStyleId = hdr.data.length >= 11 ? hdr.data[10] : 0;
|
|
362
|
+
const divideSort = hdr.data.length >= 12 ? hdr.data[11] : 0;
|
|
363
|
+
const ps = di.paraShapes[psId];
|
|
323
364
|
|
|
324
365
|
let text: ParaTextResult | null = null;
|
|
325
366
|
let csPairs: [number, number][] = [];
|
|
326
367
|
const grids: ContentNode[] = [];
|
|
327
|
-
|
|
368
|
+
// imgId: for 'gso' uses sequential gsoCtx.count; for others uses flags-based objId
|
|
369
|
+
const ctrlHeaders: { ctrlId: number; imgId: number; wPt: number; hPt: number; atnoType?: number }[] = [];
|
|
328
370
|
let i = start + 1;
|
|
329
371
|
|
|
330
372
|
while (i < recs.length && recs[i].level > lv) {
|
|
@@ -339,20 +381,60 @@ function parseParagraphGroup(
|
|
|
339
381
|
} else if (r.tag === TAG_CTRL_HEADER && r.level === lv + 1) {
|
|
340
382
|
if (r.data.length >= 4) {
|
|
341
383
|
const ctrlId = BinaryKit.readU32LE(r.data, 0);
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
384
|
+
|
|
385
|
+
if (ctrlId === CTRL_HEAD || ctrlId === CTRL_FOOT) {
|
|
386
|
+
// P8: 머리말/꼬리말 컨트롤 — 자식 문단을 파싱해 gsoCtx에 저장
|
|
387
|
+
const ctrlLv = r.level;
|
|
388
|
+
const hfParas: ParaNode[] = [];
|
|
389
|
+
let j = i + 1;
|
|
390
|
+
while (j < recs.length && recs[j].level > ctrlLv) {
|
|
391
|
+
if (recs[j].tag === TAG_PARA_HEADER) {
|
|
392
|
+
const pr = shield.guard(
|
|
393
|
+
() => parseParagraphGroup(recs, j, di, shield, gsoCtx),
|
|
394
|
+
{ nodes: [] as ContentNode[], next: j + 1 },
|
|
395
|
+
`hwp:hf@${j}`,
|
|
396
|
+
);
|
|
397
|
+
hfParas.push(...pr.nodes.filter((n): n is ParaNode => n.tag === 'para'));
|
|
398
|
+
j = pr.next;
|
|
399
|
+
} else {
|
|
400
|
+
j++;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
if (hfParas.length > 0) {
|
|
404
|
+
const key = ctrlId === CTRL_HEAD ? 'headers' : 'footers';
|
|
405
|
+
if (!gsoCtx[key]) gsoCtx[key] = hfParas;
|
|
406
|
+
}
|
|
407
|
+
i = j;
|
|
354
408
|
} else {
|
|
355
|
-
|
|
409
|
+
// HWP 5.0 general-object layout:
|
|
410
|
+
// [0:4] ctrlId [4:4] flags [8:4] xOff [12:4] yOff
|
|
411
|
+
// [16:4] width(HWPUNIT) [20:4] height(HWPUNIT)
|
|
412
|
+
const MAX_HWP = 1_000_000;
|
|
413
|
+
const rawW = r.data.length >= 24 ? BinaryKit.readU32LE(r.data, 16) : 0;
|
|
414
|
+
const rawH = r.data.length >= 28 ? BinaryKit.readU32LE(r.data, 20) : 0;
|
|
415
|
+
const wPt = rawW > 0 && rawW < MAX_HWP ? Metric.hwpToPt(rawW) : 0;
|
|
416
|
+
const hPt = rawH > 0 && rawH < MAX_HWP ? Metric.hwpToPt(rawH) : 0;
|
|
417
|
+
|
|
418
|
+
// P9: atno — offset 4 u32 하위 4bit = 번호 종류 (0=쪽번호, 6=전체쪽수)
|
|
419
|
+
const atnoType = ctrlId === CTRL_ATNO && r.data.length >= 8
|
|
420
|
+
? BinaryKit.readU32LE(r.data, 4) & 15
|
|
421
|
+
: undefined;
|
|
422
|
+
|
|
423
|
+
// 'gso ' (그리기 객체) uses sequential counter; others use flags-based id
|
|
424
|
+
const imgId = ctrlId === CTRL_GSO ? gsoCtx.count++ : (r.data.length >= 6 ? BinaryKit.readU16LE(r.data, 4) : 0);
|
|
425
|
+
ctrlHeaders.push({ ctrlId, imgId, wPt, hPt, atnoType });
|
|
426
|
+
|
|
427
|
+
if (ctrlId === CTRL_TABLE) {
|
|
428
|
+
const tr = shield.guard(
|
|
429
|
+
() => parseTableCtrl(recs, i, di, shield, gsoCtx),
|
|
430
|
+
{ grid: null, next: skipKids(recs, i) },
|
|
431
|
+
`hwp:tbl@${i}`,
|
|
432
|
+
);
|
|
433
|
+
if (tr.grid) grids.push(tr.grid);
|
|
434
|
+
i = tr.next;
|
|
435
|
+
} else {
|
|
436
|
+
i = skipKids(recs, i);
|
|
437
|
+
}
|
|
356
438
|
}
|
|
357
439
|
} else {
|
|
358
440
|
i = skipKids(recs, i);
|
|
@@ -362,43 +444,68 @@ function parseParagraphGroup(
|
|
|
362
444
|
}
|
|
363
445
|
}
|
|
364
446
|
|
|
365
|
-
// Match extended controls with CTRL_HEADER entries
|
|
366
|
-
if (text && ctrlHeaders.length > 0) {
|
|
367
|
-
for (let ci = 0; ci < text.controls.length; ci++) {
|
|
368
|
-
if (ci < ctrlHeaders.length) {
|
|
369
|
-
text.controls[ci].ctrlId = ctrlHeaders[ci].ctrlId;
|
|
370
|
-
text.controls[ci].matched = true;
|
|
371
|
-
}
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
447
|
const nodes: ContentNode[] = [];
|
|
376
448
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
const paraContent: (SpanNode | ContentNode)[] = [];
|
|
449
|
+
{
|
|
450
|
+
const paraContent: Array<SpanNode | GridNode | PageNumNode> = [];
|
|
380
451
|
|
|
381
|
-
//
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
452
|
+
// P9: atno 컨트롤 위치 수집 (pos 기준 정렬)
|
|
453
|
+
const atnoCtrls: { pos: number; type: number }[] = [];
|
|
454
|
+
if (text && text.controls.length > 0) {
|
|
455
|
+
for (let ci = 0; ci < text.controls.length; ci++) {
|
|
456
|
+
const ch = ctrlHeaders[ci];
|
|
457
|
+
if (ch && ch.ctrlId === CTRL_ATNO)
|
|
458
|
+
atnoCtrls.push({ pos: text.controls[ci].pos, type: ch.atnoType ?? 0 });
|
|
459
|
+
}
|
|
460
|
+
atnoCtrls.sort((a, b) => a.pos - b.pos);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// P9: 텍스트 chars를 atno 위치 기준으로 분할하여 PageNumNode 삽입
|
|
464
|
+
if (text && text.chars.length > 0) {
|
|
465
|
+
if (atnoCtrls.length > 0) {
|
|
466
|
+
let k = 0;
|
|
467
|
+
for (const ac of atnoCtrls) {
|
|
468
|
+
const seg: ParsedChar[] = [];
|
|
469
|
+
while (k < text.chars.length && text.chars[k].pos < ac.pos) seg.push(text.chars[k++]);
|
|
470
|
+
if (seg.length > 0) paraContent.push(...resolveCharShapes(seg, csPairs, di));
|
|
471
|
+
paraContent.push(buildPageNum(ac.type === 0 ? 'decimal' : 'total'));
|
|
472
|
+
}
|
|
473
|
+
const rest = text.chars.slice(k);
|
|
474
|
+
if (rest.length > 0) paraContent.push(...resolveCharShapes(rest, csPairs, di));
|
|
475
|
+
} else {
|
|
476
|
+
paraContent.push(...resolveCharShapes(text.chars, csPairs, di));
|
|
477
|
+
}
|
|
478
|
+
} else if (atnoCtrls.length > 0) {
|
|
479
|
+
for (const ac of atnoCtrls) paraContent.push(buildPageNum(ac.type === 0 ? 'decimal' : 'total'));
|
|
385
480
|
}
|
|
386
481
|
|
|
387
|
-
//
|
|
388
|
-
|
|
482
|
+
// Image placeholder spans: only for actual image controls.
|
|
483
|
+
// Non-image controls (footnotes, TOC entries, etc.) are silently skipped.
|
|
484
|
+
if (text && text.controls.length > 0) {
|
|
389
485
|
for (let ci = 0; ci < text.controls.length; ci++) {
|
|
390
|
-
|
|
391
|
-
//
|
|
392
|
-
|
|
486
|
+
const ch = ctrlHeaders[ci];
|
|
487
|
+
if (!ch) continue; // anchor-only ctrl (gso is sibling, not inline)
|
|
488
|
+
const isImg = ch.ctrlId === CTRL_IMAGE || ch.ctrlId === CTRL_FIG || ch.ctrlId === CTRL_OBJ || ch.ctrlId === CTRL_GSO;
|
|
489
|
+
if (!isImg) continue; // skip footnotes, TOC, page num, etc.
|
|
490
|
+
const dimStr = (ch.wPt > 0 && ch.hPt > 0)
|
|
491
|
+
? `_W${Math.round(ch.wPt)}_H${Math.round(ch.hPt)}`
|
|
492
|
+
: '';
|
|
493
|
+
paraContent.push(buildSpan(`__EXT_${ch.imgId}${dimStr}__`));
|
|
393
494
|
}
|
|
394
495
|
}
|
|
395
496
|
|
|
396
|
-
|
|
397
|
-
|
|
497
|
+
// P5: 쪽나누기(divideSort & 4) → page-break 문단 먼저 출력
|
|
498
|
+
if (divideSort & 4) {
|
|
499
|
+
nodes.push(buildPara([{ tag: 'span', props: {}, kids: [buildPb()] } as SpanNode]));
|
|
398
500
|
}
|
|
501
|
+
// P5: 표 → 앵커 문단 순서 (앵커 문단 드롭 금지)
|
|
502
|
+
nodes.push(...grids);
|
|
503
|
+
nodes.push(buildPara(
|
|
504
|
+
paraContent.length > 0 ? paraContent as any : [buildSpan('')],
|
|
505
|
+
buildParaProps(ps, hwpStyleId),
|
|
506
|
+
));
|
|
399
507
|
}
|
|
400
508
|
|
|
401
|
-
nodes.push(...grids);
|
|
402
509
|
return { nodes, next: i };
|
|
403
510
|
}
|
|
404
511
|
|
|
@@ -411,8 +518,8 @@ function skipKids(recs: HwpRecord[], idx: number): number {
|
|
|
411
518
|
|
|
412
519
|
/* ── PARA_TEXT ───────────────────────────────────────────────── */
|
|
413
520
|
|
|
414
|
-
// Extended controls: 8 WORDs, associated CTRL_HEADER
|
|
415
|
-
const EXT_CTRL = new Set([2, 3, 11, 12, 14, 15]);
|
|
521
|
+
// Extended controls: 8 WORDs, associated CTRL_HEADER (16-25 also skip 16 bytes)
|
|
522
|
+
const EXT_CTRL = new Set([2, 3, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]);
|
|
416
523
|
// Inline controls: 8 WORDs, no CTRL_HEADER
|
|
417
524
|
const INL_CTRL = new Set([4, 5, 6, 7, 8]);
|
|
418
525
|
|
|
@@ -511,7 +618,7 @@ function styledSpan(text: string, shapeId: number, di: DocInfo): SpanNode {
|
|
|
511
618
|
/* ── Table control parsing ──────────────────────────────────── */
|
|
512
619
|
|
|
513
620
|
function parseTableCtrl(
|
|
514
|
-
recs: HwpRecord[], ctrlIdx: number, di: DocInfo, shield: ShieldedParser,
|
|
621
|
+
recs: HwpRecord[], ctrlIdx: number, di: DocInfo, shield: ShieldedParser, gsoCtx: GsoCtx,
|
|
515
622
|
): { grid: ContentNode | null; next: number } {
|
|
516
623
|
const ctrlLv = recs[ctrlIdx].level;
|
|
517
624
|
let i = ctrlIdx + 1;
|
|
@@ -567,15 +674,15 @@ function parseTableCtrl(
|
|
|
567
674
|
const rowCnt = tblData.length >= 6 ? BinaryKit.readU16LE(tblData, 4) : 1;
|
|
568
675
|
const colCnt = tblData.length >= 8 ? BinaryKit.readU16LE(tblData, 6) : 1;
|
|
569
676
|
|
|
570
|
-
interface PC { row: number; col: number; cs: number; rs: number; widthHwp: number; props: CellProps;
|
|
677
|
+
interface PC { row: number; col: number; cs: number; rs: number; widthHwp: number; heightHwp?: number; props: CellProps; cellChildren: (ParaNode | GridNode)[] }
|
|
571
678
|
const parsed: PC[] = [];
|
|
572
679
|
|
|
573
680
|
for (let ci = 0; ci < cells.length; ci++) {
|
|
574
681
|
const c = cells[ci];
|
|
575
682
|
const seqIdx = ci;
|
|
576
683
|
const pc = shield.guard(
|
|
577
|
-
() => parseCellRec(c.data, c.tag, recs, c.cStart, c.cEnd, di, shield, seqIdx, colCnt),
|
|
578
|
-
{ row: Math.floor(ci / (colCnt || 1)), col: ci % (colCnt || 1), cs: 1, rs: 1, widthHwp: 0, props: {},
|
|
684
|
+
() => parseCellRec(c.data, c.tag, recs, c.cStart, c.cEnd, di, shield, seqIdx, colCnt, gsoCtx),
|
|
685
|
+
{ row: Math.floor(ci / (colCnt || 1)), col: ci % (colCnt || 1), cs: 1, rs: 1, widthHwp: 0, heightHwp: undefined, props: {}, cellChildren: [buildPara([buildSpan('')])] },
|
|
579
686
|
`hwp:cell@${c.cStart}`,
|
|
580
687
|
);
|
|
581
688
|
parsed.push(pc);
|
|
@@ -602,9 +709,11 @@ function parseTableCtrl(
|
|
|
602
709
|
}
|
|
603
710
|
}
|
|
604
711
|
// Pass 2: for columns still 0, try to derive from multi-span cells
|
|
712
|
+
// Sort by span size ascending so smaller, more precise spans fill widths before larger spans
|
|
605
713
|
const zeroColumns = colWidthsPt.filter(w => w === 0).length;
|
|
606
714
|
if (zeroColumns > 0) {
|
|
607
|
-
|
|
715
|
+
const spanCells = parsed.filter(c => c.cs > 1 && c.widthHwp > 0).sort((a, b) => a.cs - b.cs);
|
|
716
|
+
for (const c of spanCells) {
|
|
608
717
|
if (c.cs > 1 && c.widthHwp > 0) {
|
|
609
718
|
// Subtract known column widths from the span
|
|
610
719
|
let known = 0;
|
|
@@ -624,13 +733,37 @@ function parseTableCtrl(
|
|
|
624
733
|
}
|
|
625
734
|
}
|
|
626
735
|
|
|
736
|
+
// Post-process: clamp near-zero column widths (< 1pt = floating-point artifact) to minimum 1pt
|
|
737
|
+
for (let i = 0; i < colWidthsPt.length; i++) {
|
|
738
|
+
if (colWidthsPt[i] > 0 && colWidthsPt[i] < 1) colWidthsPt[i] = 1;
|
|
739
|
+
}
|
|
740
|
+
|
|
627
741
|
const rows = [];
|
|
628
742
|
for (let r = 0; r < actualRowCnt; r++) {
|
|
629
743
|
const rc = parsed.filter(c => c.row === r).sort((a, b) => a.col - b.col);
|
|
630
744
|
if (rc.length === 0) continue;
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
745
|
+
|
|
746
|
+
// Calculate row height — prefer rs=1 cells (exact per-row height)
|
|
747
|
+
let rowHeightPt: number | undefined = undefined;
|
|
748
|
+
for (const c of rc) {
|
|
749
|
+
if (c.heightHwp && c.heightHwp > 0 && c.rs === 1) {
|
|
750
|
+
const hPt = Metric.hwpToPt(c.heightHwp);
|
|
751
|
+
if (rowHeightPt == null || hPt > rowHeightPt) rowHeightPt = hPt;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
// Fallback: all cells span multiple rows → approximate height per row
|
|
755
|
+
if (rowHeightPt == null) {
|
|
756
|
+
for (const c of rc) {
|
|
757
|
+
if (c.heightHwp && c.heightHwp > 0) {
|
|
758
|
+
const hPt = Metric.hwpToPt(c.heightHwp) / c.rs;
|
|
759
|
+
if (rowHeightPt == null || hPt > rowHeightPt) rowHeightPt = hPt;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
rows.push(buildRow(rc.map(c => {
|
|
765
|
+
return buildCell(c.cellChildren, { cs: c.cs, rs: c.rs, props: c.props });
|
|
766
|
+
}), rowHeightPt));
|
|
634
767
|
}
|
|
635
768
|
if (rows.length === 0) return { grid: null, next: i };
|
|
636
769
|
|
|
@@ -659,10 +792,11 @@ function parseTableCtrl(
|
|
|
659
792
|
|
|
660
793
|
function parseCellRec(
|
|
661
794
|
d: Uint8Array, tag: number, recs: HwpRecord[], cStart: number, cEnd: number,
|
|
662
|
-
di: DocInfo, shield: ShieldedParser, seqIdx: number, colCnt: number,
|
|
795
|
+
di: DocInfo, shield: ShieldedParser, seqIdx: number, colCnt: number, gsoCtx: GsoCtx,
|
|
663
796
|
) {
|
|
664
797
|
let col: number, row: number, cs = 1, rs = 1;
|
|
665
798
|
let widthHwp = 0;
|
|
799
|
+
let heightHwp = 0;
|
|
666
800
|
const props: CellProps = {};
|
|
667
801
|
|
|
668
802
|
const attr = d.length >= 6 ? BinaryKit.readU32LE(d, 2) : 0;
|
|
@@ -670,88 +804,158 @@ function parseCellRec(
|
|
|
670
804
|
if (va === 1) props.va = 'mid';
|
|
671
805
|
else if (va === 2) props.va = 'bot';
|
|
672
806
|
|
|
807
|
+
const HWP_PAD_LR_DEFAULT = 360;
|
|
808
|
+
const HWP_PAD_TB_DEFAULT = 141;
|
|
809
|
+
|
|
673
810
|
if (tag === TAG_LIST_HEADER && d.length >= 22) {
|
|
674
|
-
// LIST_HEADER with cell-specific fields
|
|
675
|
-
// offset 8: colAddr, offset 10: rowAddr (HWP 5.0 spec)
|
|
676
811
|
col = BinaryKit.readU16LE(d, 8);
|
|
677
812
|
row = BinaryKit.readU16LE(d, 10);
|
|
678
813
|
cs = Math.max(1, BinaryKit.readU16LE(d, 12));
|
|
679
814
|
rs = Math.max(1, BinaryKit.readU16LE(d, 14));
|
|
680
815
|
widthHwp = BinaryKit.readU32LE(d, 16);
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
const
|
|
685
|
-
if (
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
props.bot = toStroke(bf.borders[3]);
|
|
690
|
-
}
|
|
691
|
-
if (bf.bgColor && bf.bgColor !== 'FFFFFF') props.bg = bf.bgColor;
|
|
816
|
+
heightHwp = d.length >= 24 ? BinaryKit.readU32LE(d, 20) : 0;
|
|
817
|
+
if (d.length >= 32) {
|
|
818
|
+
const pL = BinaryKit.readU16LE(d, 24); const pR = BinaryKit.readU16LE(d, 26);
|
|
819
|
+
const pT = BinaryKit.readU16LE(d, 28); const pB = BinaryKit.readU16LE(d, 30);
|
|
820
|
+
if (pL !== HWP_PAD_LR_DEFAULT) props.padL = Metric.hwpToPt(pL);
|
|
821
|
+
if (pR !== HWP_PAD_LR_DEFAULT) props.padR = Metric.hwpToPt(pR);
|
|
822
|
+
if (pT !== HWP_PAD_TB_DEFAULT) props.padT = Metric.hwpToPt(pT);
|
|
823
|
+
if (pB !== HWP_PAD_TB_DEFAULT) props.padB = Metric.hwpToPt(pB);
|
|
692
824
|
}
|
|
825
|
+
const bfId = d.length >= 34 ? BinaryKit.readU16LE(d, 32) : 0;
|
|
826
|
+
if (bfId > 0 && bfId <= di.borderFills.length) applyCellBorderFill(di.borderFills[bfId - 1], props);
|
|
693
827
|
} else if (tag !== TAG_LIST_HEADER) {
|
|
694
|
-
// Full CELL record with position/span/borderFill
|
|
695
828
|
col = d.length >= 8 ? BinaryKit.readU16LE(d, 6) : seqIdx % (colCnt || 1);
|
|
696
829
|
row = d.length >= 10 ? BinaryKit.readU16LE(d, 8) : Math.floor(seqIdx / (colCnt || 1));
|
|
697
830
|
cs = d.length >= 12 ? Math.max(1, BinaryKit.readU16LE(d, 10)) : 1;
|
|
698
831
|
rs = d.length >= 14 ? Math.max(1, BinaryKit.readU16LE(d, 12)) : 1;
|
|
699
832
|
widthHwp = d.length >= 18 ? BinaryKit.readU32LE(d, 14) : 0;
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
const
|
|
704
|
-
if (
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
props.bot = toStroke(bf.borders[3]);
|
|
709
|
-
}
|
|
710
|
-
if (bf.bgColor && bf.bgColor !== 'FFFFFF') props.bg = bf.bgColor;
|
|
833
|
+
heightHwp = d.length >= 22 ? BinaryKit.readU32LE(d, 18) : 0;
|
|
834
|
+
if (d.length >= 30) {
|
|
835
|
+
const pL = BinaryKit.readU16LE(d, 22); const pR = BinaryKit.readU16LE(d, 24);
|
|
836
|
+
const pT = BinaryKit.readU16LE(d, 26); const pB = BinaryKit.readU16LE(d, 28);
|
|
837
|
+
if (pL !== HWP_PAD_LR_DEFAULT) props.padL = Metric.hwpToPt(pL);
|
|
838
|
+
if (pR !== HWP_PAD_LR_DEFAULT) props.padR = Metric.hwpToPt(pR);
|
|
839
|
+
if (pT !== HWP_PAD_TB_DEFAULT) props.padT = Metric.hwpToPt(pT);
|
|
840
|
+
if (pB !== HWP_PAD_TB_DEFAULT) props.padB = Metric.hwpToPt(pB);
|
|
711
841
|
}
|
|
842
|
+
const bfId = d.length >= 32 ? BinaryKit.readU16LE(d, 30) : 0;
|
|
843
|
+
if (bfId > 0 && bfId <= di.borderFills.length) applyCellBorderFill(di.borderFills[bfId - 1], props);
|
|
712
844
|
} else {
|
|
713
|
-
// Fallback: LIST_HEADER too short, compute sequentially
|
|
714
845
|
row = Math.floor(seqIdx / (colCnt || 1));
|
|
715
846
|
col = seqIdx % (colCnt || 1);
|
|
716
847
|
}
|
|
717
848
|
|
|
718
|
-
|
|
719
|
-
const
|
|
849
|
+
const cellChildren: (ParaNode | GridNode)[] = [];
|
|
850
|
+
const MAX_HWP = 1_000_000;
|
|
720
851
|
let k = cStart;
|
|
852
|
+
|
|
721
853
|
while (k < cEnd) {
|
|
722
854
|
if (recs[k].tag === TAG_PARA_HEADER) {
|
|
723
|
-
//
|
|
855
|
+
// Parse paragraph inside cell — also extracts nested tables within the paragraph
|
|
724
856
|
const r = shield.guard(
|
|
725
857
|
() => {
|
|
726
858
|
const hdr = recs[k];
|
|
727
859
|
const lv = hdr.level;
|
|
728
860
|
const psId = hdr.data.length >= 10 ? BinaryKit.readU16LE(hdr.data, 8) : 0;
|
|
861
|
+
// P6: 셀 내부 문단의 styleId / divideSort 읽기
|
|
862
|
+
const cellStyleId = hdr.data.length >= 11 ? hdr.data[10] : 0;
|
|
863
|
+
const cellDivide = hdr.data.length >= 12 ? hdr.data[11] : 0;
|
|
729
864
|
const ps = di.paraShapes[psId];
|
|
730
865
|
let txt: ParaTextResult | null = null;
|
|
731
866
|
let csp: [number, number][] = [];
|
|
867
|
+
const ctrlHdrs: { ctrlId: number; imgId: number; wPt: number; hPt: number }[] = [];
|
|
868
|
+
const innerGrids: GridNode[] = [];
|
|
732
869
|
let j = k + 1;
|
|
733
870
|
while (j < cEnd && recs[j].level > lv) {
|
|
734
871
|
if (recs[j].tag === TAG_PARA_TEXT) { txt = decodeParaText(recs[j].data); j++; }
|
|
735
872
|
else if (recs[j].tag === TAG_PARA_CHAR_SHAPE) { csp = parseCharShapePairs(recs[j].data); j++; }
|
|
873
|
+
else if (recs[j].tag === TAG_CTRL_HEADER && recs[j].level === lv + 1) {
|
|
874
|
+
if (recs[j].data.length >= 4) {
|
|
875
|
+
const ctrlId = BinaryKit.readU32LE(recs[j].data, 0);
|
|
876
|
+
if (ctrlId === CTRL_TABLE) {
|
|
877
|
+
// Nested table inside a cell paragraph — recurse into parseTableCtrl
|
|
878
|
+
const nestedTr = shield.guard(
|
|
879
|
+
() => parseTableCtrl(recs, j, di, shield, gsoCtx),
|
|
880
|
+
{ grid: null, next: skipKids(recs, j) },
|
|
881
|
+
`hwp:innerNestedTbl@${j}`,
|
|
882
|
+
);
|
|
883
|
+
if (nestedTr.grid) innerGrids.push(nestedTr.grid as GridNode);
|
|
884
|
+
j = nestedTr.next;
|
|
885
|
+
} else {
|
|
886
|
+
const rawW = recs[j].data.length >= 24 ? BinaryKit.readU32LE(recs[j].data, 16) : 0;
|
|
887
|
+
const rawH = recs[j].data.length >= 28 ? BinaryKit.readU32LE(recs[j].data, 20) : 0;
|
|
888
|
+
const wPt = rawW > 0 && rawW < MAX_HWP ? Metric.hwpToPt(rawW) : 0;
|
|
889
|
+
const hPt = rawH > 0 && rawH < MAX_HWP ? Metric.hwpToPt(rawH) : 0;
|
|
890
|
+
const imgId = ctrlId === CTRL_GSO ? gsoCtx.count++ : (recs[j].data.length >= 6 ? BinaryKit.readU16LE(recs[j].data, 4) : 0);
|
|
891
|
+
ctrlHdrs.push({ ctrlId, imgId, wPt, hPt });
|
|
892
|
+
j = skipKids(recs, j);
|
|
893
|
+
}
|
|
894
|
+
} else {
|
|
895
|
+
j = skipKids(recs, j);
|
|
896
|
+
}
|
|
897
|
+
}
|
|
736
898
|
else j++;
|
|
737
899
|
}
|
|
738
|
-
const
|
|
739
|
-
|
|
900
|
+
const paraContent: (SpanNode | ContentNode)[] = [];
|
|
901
|
+
if (txt && txt.chars.length > 0) paraContent.push(...resolveCharShapes(txt.chars, csp, di));
|
|
902
|
+
if (txt && txt.controls.length > 0) {
|
|
903
|
+
for (let ci = 0; ci < txt.controls.length; ci++) {
|
|
904
|
+
const ch = ctrlHdrs[ci];
|
|
905
|
+
if (!ch) continue;
|
|
906
|
+
const isImg = ch.ctrlId === CTRL_IMAGE || ch.ctrlId === CTRL_FIG || ch.ctrlId === CTRL_OBJ || ch.ctrlId === CTRL_GSO;
|
|
907
|
+
if (!isImg) continue;
|
|
908
|
+
const dimStr = (ch.wPt > 0 && ch.hPt > 0) ? `_W${Math.round(ch.wPt)}_H${Math.round(ch.hPt)}` : '';
|
|
909
|
+
paraContent.push(buildSpan(`__EXT_${ch.imgId}${dimStr}__`));
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
const kids = paraContent.length > 0 ? paraContent as any : [buildSpan('')];
|
|
913
|
+
// P6: innerGrids 먼저, 앵커 문단 나중 (P5와 동일한 순서)
|
|
914
|
+
const items: (ParaNode | GridNode)[] = [...innerGrids, buildPara(kids, buildParaProps(ps, cellStyleId))];
|
|
915
|
+
if (cellDivide & 4) items.unshift(buildPara([{ tag: 'span', props: {}, kids: [buildPb()] } as SpanNode]));
|
|
916
|
+
return { items, next: j };
|
|
740
917
|
},
|
|
741
|
-
{
|
|
918
|
+
{ items: [buildPara([buildSpan('')])] as (ParaNode | GridNode)[], next: k + 1 },
|
|
742
919
|
`hwp:cellP@${k}`,
|
|
743
920
|
);
|
|
744
|
-
|
|
921
|
+
cellChildren.push(...r.items);
|
|
745
922
|
k = r.next;
|
|
923
|
+
} else if (recs[k].tag === TAG_CTRL_HEADER && recs[k].data.length >= 4) {
|
|
924
|
+
// CTRL_HEADER at cell level (sibling of PARA_HEADER) — anchored 'gso' images and outer-level nested tables
|
|
925
|
+
const cellCtrlId = BinaryKit.readU32LE(recs[k].data, 0);
|
|
926
|
+
if (cellCtrlId === CTRL_GSO) {
|
|
927
|
+
const gsoId = gsoCtx.count++;
|
|
928
|
+
const rawW = recs[k].data.length >= 24 ? BinaryKit.readU32LE(recs[k].data, 16) : 0;
|
|
929
|
+
const rawH = recs[k].data.length >= 28 ? BinaryKit.readU32LE(recs[k].data, 20) : 0;
|
|
930
|
+
const wPt = rawW > 0 && rawW < MAX_HWP ? Metric.hwpToPt(rawW) : 0;
|
|
931
|
+
const hPt = rawH > 0 && rawH < MAX_HWP ? Metric.hwpToPt(rawH) : 0;
|
|
932
|
+
const dimStr = (wPt > 0 && hPt > 0) ? `_W${Math.round(wPt)}_H${Math.round(hPt)}` : '';
|
|
933
|
+
cellChildren.push(buildPara([buildSpan(`__EXT_${gsoId}${dimStr}__`)]));
|
|
934
|
+
k = skipKids(recs, k);
|
|
935
|
+
} else if (cellCtrlId === CTRL_TABLE) {
|
|
936
|
+
const tr = shield.guard(
|
|
937
|
+
() => parseTableCtrl(recs, k, di, shield, gsoCtx),
|
|
938
|
+
{ grid: null, next: skipKids(recs, k) },
|
|
939
|
+
`hwp:nestedTbl@${k}`,
|
|
940
|
+
);
|
|
941
|
+
if (tr.grid) cellChildren.push(tr.grid as GridNode);
|
|
942
|
+
k = tr.next;
|
|
943
|
+
} else {
|
|
944
|
+
k = skipKids(recs, k);
|
|
945
|
+
}
|
|
746
946
|
} else { k++; }
|
|
747
947
|
}
|
|
748
948
|
|
|
749
|
-
return {
|
|
949
|
+
return {
|
|
950
|
+
row, col, cs, rs, props, widthHwp,
|
|
951
|
+
heightHwp: heightHwp || undefined,
|
|
952
|
+
cellChildren: cellChildren.length ? cellChildren : [buildPara([buildSpan('')])],
|
|
953
|
+
};
|
|
750
954
|
}
|
|
751
955
|
|
|
752
956
|
/* ── PAGE_DEF ───────────────────────────────────────────────── */
|
|
753
957
|
/* [0:4] width [4:4] height [8:4] ml [12:4] mr
|
|
754
|
-
[16:4] mt [20:4] mb [36:4] attr (bit0=landscape)
|
|
958
|
+
[16:4] mt [20:4] mb [24:4] header [28:4] footer [36:4] attr (bit0=landscape) */
|
|
755
959
|
|
|
756
960
|
function parsePageDef(d: Uint8Array): PageDims {
|
|
757
961
|
if (d.length < 24) return A4;
|
|
@@ -761,11 +965,15 @@ function parsePageDef(d: Uint8Array): PageDims {
|
|
|
761
965
|
const mr = BinaryKit.readU32LE(d, 12);
|
|
762
966
|
const mt = BinaryKit.readU32LE(d, 16);
|
|
763
967
|
const mb = BinaryKit.readU32LE(d, 20);
|
|
968
|
+
const header = d.length >= 28 ? BinaryKit.readU32LE(d, 24) : 0;
|
|
969
|
+
const footer = d.length >= 32 ? BinaryKit.readU32LE(d, 28) : 0;
|
|
764
970
|
const at = d.length >= 40 ? BinaryKit.readU32LE(d, 36) : 0;
|
|
765
971
|
return {
|
|
766
972
|
wPt: Metric.hwpToPt(w), hPt: Metric.hwpToPt(h),
|
|
767
973
|
ml: Metric.hwpToPt(ml), mr: Metric.hwpToPt(mr),
|
|
768
974
|
mt: Metric.hwpToPt(mt), mb: Metric.hwpToPt(mb),
|
|
975
|
+
headerPt: header > 0 ? Metric.hwpToPt(header) : undefined,
|
|
976
|
+
footerPt: footer > 0 ? Metric.hwpToPt(footer) : undefined,
|
|
769
977
|
orient: (at & 1) ? 'landscape' : 'portrait',
|
|
770
978
|
};
|
|
771
979
|
}
|
|
@@ -788,6 +996,18 @@ function toStroke(b: { type: number; widthPt: number; color: string }): Stroke {
|
|
|
788
996
|
return { kind: BORDER_KIND[b.type] ?? 'solid', pt: b.widthPt, color: b.color };
|
|
789
997
|
}
|
|
790
998
|
|
|
999
|
+
// Apply borderFill to CellProps. Preserve explicit NONE so DOCX tcBorders can
|
|
1000
|
+
// override the table-level tblBorders. Filtering NONE would let tblBorders bleed through.
|
|
1001
|
+
function applyCellBorderFill(bf: HwpBorderFill, props: CellProps): void {
|
|
1002
|
+
if (bf.borders.length >= 4) {
|
|
1003
|
+
props.left = toStroke(bf.borders[0]);
|
|
1004
|
+
props.right = toStroke(bf.borders[1]);
|
|
1005
|
+
props.top = toStroke(bf.borders[2]);
|
|
1006
|
+
props.bot = toStroke(bf.borders[3]);
|
|
1007
|
+
}
|
|
1008
|
+
if (bf.bgColor && bf.bgColor !== 'FFFFFF') props.bg = bf.bgColor;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
791
1011
|
function strokeFromBF(bfId: number, di: DocInfo): Stroke | undefined {
|
|
792
1012
|
if (bfId <= 0 || bfId > di.borderFills.length) return undefined;
|
|
793
1013
|
const bf = di.borderFills[bfId - 1];
|
|
@@ -796,14 +1016,30 @@ function strokeFromBF(bfId: number, di: DocInfo): Stroke | undefined {
|
|
|
796
1016
|
return { kind: BORDER_KIND[b.type] ?? 'solid', pt: b.widthPt, color: b.color };
|
|
797
1017
|
}
|
|
798
1018
|
|
|
799
|
-
function buildParaProps(ps?: HwpParaShape): ParaProps {
|
|
800
|
-
|
|
801
|
-
const p: ParaProps = {};
|
|
1019
|
+
function buildParaProps(ps?: HwpParaShape, hwpStyleId?: number): ParaProps {
|
|
1020
|
+
// P2: hwpStyleId를 초기값으로 포함 (undefined이면 빈 객체)
|
|
1021
|
+
const p: ParaProps = hwpStyleId !== undefined ? { hwpStyleId } : {};
|
|
1022
|
+
if (!ps) return p;
|
|
802
1023
|
if (ps.align && ps.align !== 'left') p.align = ps.align;
|
|
803
1024
|
if (ps.spaceBefore > 0) p.spaceBefore = Metric.hwpToPt(ps.spaceBefore);
|
|
804
1025
|
if (ps.spaceAfter > 0) p.spaceAfter = Metric.hwpToPt(ps.spaceAfter);
|
|
805
|
-
|
|
806
|
-
if (ps.
|
|
1026
|
+
// 줄 간격: type=0(PERCENT) → lineHeight, type=1(FIXED) → lineHeightFixed
|
|
1027
|
+
if (ps.lineSpacingType === 1) {
|
|
1028
|
+
if (ps.lineSpacing > 0) p.lineHeightFixed = Metric.hwpToPt(ps.lineSpacing);
|
|
1029
|
+
} else {
|
|
1030
|
+
// P10: 160%(HWP 기본값) 생략 버그 수정 — 항상 lineHeight 설정
|
|
1031
|
+
if (ps.lineSpacing > 0) p.lineHeight = ps.lineSpacing / 100;
|
|
1032
|
+
}
|
|
1033
|
+
// leftMargin (offset 4) = 문단 몸체 왼쪽 여백 → leftMargin (pt), ensure non-negative
|
|
1034
|
+
const leftMarginPt = Math.max(0, Metric.hwpToPt(ps.leftMargin));
|
|
1035
|
+
if (leftMarginPt > 0) p.leftMargin = leftMarginPt;
|
|
1036
|
+
// rightMargin (offset 8) = 문단 몸체 오른쪽 여백 → indentRightPt (pt)
|
|
1037
|
+
const rightMarginPt = Math.max(0, Metric.hwpToPt(ps.rightMargin));
|
|
1038
|
+
if (rightMarginPt > 0) p.indentRightPt = rightMarginPt;
|
|
1039
|
+
// indent (offset 12) = 첫 줄 들여쓰기(양수) / 내어쓰기(음수) → firstLineIndentPt
|
|
1040
|
+
if (ps.indent !== 0) p.firstLineIndentPt = Metric.hwpToPt(ps.indent);
|
|
1041
|
+
if (ps.verAlign && ps.verAlign !== 'baseline') p.verAlign = ps.verAlign;
|
|
1042
|
+
if (ps.lineWrap && ps.lineWrap !== 'break') p.lineWrap = ps.lineWrap;
|
|
807
1043
|
return p;
|
|
808
1044
|
}
|
|
809
1045
|
|
|
@@ -813,6 +1049,7 @@ function buildParaProps(ps?: HwpParaShape): ParaProps {
|
|
|
813
1049
|
|
|
814
1050
|
export class HwpScanner implements Decoder {
|
|
815
1051
|
readonly format = 'hwp';
|
|
1052
|
+
readonly aliases = ['application/vnd.hancom.hwp'];
|
|
816
1053
|
|
|
817
1054
|
async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
|
|
818
1055
|
const shield = new ShieldedParser();
|
|
@@ -834,52 +1071,36 @@ export class HwpScanner implements Decoder {
|
|
|
834
1071
|
di = shield.guard(() => parseDocInfo(diRaw, compressed), di, 'hwp:docInfo');
|
|
835
1072
|
}
|
|
836
1073
|
|
|
837
|
-
// Extract images from BinData streams
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
1074
|
+
// Extract images from BinData streams.
|
|
1075
|
+
// HWP duplicates each BinData entry: once as "BinData/BIN0001.jpg" and once as "BIN0001.jpg".
|
|
1076
|
+
// We keep only the "BinData/" prefixed versions, sort by BIN number, then assign 0-based keys
|
|
1077
|
+
// matching the order 'gso' CTRL_HEADER records are encountered during body parsing.
|
|
1078
|
+
const binEntries: { binNum: number; data: Uint8Array }[] = [];
|
|
1079
|
+
for (const [path, streamData] of streams) {
|
|
1080
|
+
// Match "BinData/BIN0001.jpg" style — the canonical form
|
|
1081
|
+
const m = path.match(/^BinData[/\\]BIN(\d+)\.\w+$/i);
|
|
1082
|
+
if (m) binEntries.push({ binNum: parseInt(m[1], 10), data: streamData });
|
|
845
1083
|
}
|
|
1084
|
+
// Sort by BIN number (ascending) so BIN0001→idx0, BIN0002→idx1, …
|
|
1085
|
+
binEntries.sort((a, b) => a.binNum - b.binNum);
|
|
846
1086
|
|
|
847
|
-
// Create image nodes for each image stream (deduplicated by hash)
|
|
848
1087
|
const objectMap = new Map<number, ImgNode>();
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
// Determine MIME type from
|
|
853
|
-
let mimeType = 'image/jpeg';
|
|
854
|
-
|
|
855
|
-
if (
|
|
856
|
-
else if (
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
else if (data[0] === 0x47 && data[1] === 0x49 && data[2] === 0x46 && data[3] === 0x3538) mimeType = 'image/gif';
|
|
862
|
-
else if (data[0] === 0x42 && data[1] === 0x4D) mimeType = 'image/bmp';
|
|
863
|
-
|
|
864
|
-
const imgData = Buffer.from(data);
|
|
865
|
-
const base64 = imgData.toString('base64');
|
|
866
|
-
const hash = base64.slice(0, 20); // Use first 20 chars as simple hash
|
|
867
|
-
if (!seenHashes.has(hash)) {
|
|
868
|
-
seenHashes.add(hash);
|
|
869
|
-
objectMap.set(imgIdx++, buildImg(
|
|
870
|
-
base64,
|
|
871
|
-
mimeType as any,
|
|
872
|
-
0, // w
|
|
873
|
-
0, // h
|
|
874
|
-
`Image from ${path}`,
|
|
875
|
-
));
|
|
876
|
-
console.log(`[HwpScanner] Added unique image: ${hash}... (${data.length} bytes)`);
|
|
877
|
-
} else {
|
|
878
|
-
console.log(`[HwpScanner] Duplicate image skipped: ${hash}...`);
|
|
879
|
-
}
|
|
1088
|
+
for (let idx = 0; idx < binEntries.length; idx++) {
|
|
1089
|
+
const { data: imgData } = binEntries[idx];
|
|
1090
|
+
|
|
1091
|
+
// Determine MIME type from binary signature first, then fall back to extension
|
|
1092
|
+
let mimeType: ImgNode['mime'] = 'image/jpeg';
|
|
1093
|
+
if (imgData[0] === 0x89 && imgData[1] === 0x50) mimeType = 'image/png';
|
|
1094
|
+
else if (imgData[0] === 0x47 && imgData[1] === 0x49) mimeType = 'image/gif';
|
|
1095
|
+
else if (imgData[0] === 0x42 && imgData[1] === 0x4D) mimeType = 'image/bmp';
|
|
1096
|
+
|
|
1097
|
+
const base64 = TextKit.base64Encode(imgData);
|
|
1098
|
+
const { wPt, hPt } = getImageDimsPt(imgData, mimeType);
|
|
1099
|
+
objectMap.set(idx, buildImg(base64, mimeType, wPt, hPt));
|
|
880
1100
|
}
|
|
881
1101
|
|
|
882
|
-
|
|
1102
|
+
// gsoCtx tracks sequential 'gso' encounter order — must be shared across all sections
|
|
1103
|
+
const gsoCtx: GsoCtx = { count: 0 };
|
|
883
1104
|
|
|
884
1105
|
// Body sections
|
|
885
1106
|
const allContent: ContentNode[] = [];
|
|
@@ -891,7 +1112,7 @@ export class HwpScanner implements Decoder {
|
|
|
891
1112
|
if (s === 0) {
|
|
892
1113
|
const fb = findBodySection(streams);
|
|
893
1114
|
if (fb) {
|
|
894
|
-
const r = parseBody(fb, compressed, di, shield);
|
|
1115
|
+
const r = parseBody(fb, compressed, di, shield, gsoCtx);
|
|
895
1116
|
allContent.push(...r.content);
|
|
896
1117
|
if (r.pageDims) pageDims = r.pageDims;
|
|
897
1118
|
}
|
|
@@ -899,7 +1120,7 @@ export class HwpScanner implements Decoder {
|
|
|
899
1120
|
break;
|
|
900
1121
|
}
|
|
901
1122
|
const r = shield.guard(
|
|
902
|
-
() => parseBody(sec, compressed, di, shield),
|
|
1123
|
+
() => parseBody(sec, compressed, di, shield, gsoCtx),
|
|
903
1124
|
{ content: [], pageDims: undefined },
|
|
904
1125
|
`hwp:sec${s}`,
|
|
905
1126
|
);
|
|
@@ -907,33 +1128,17 @@ export class HwpScanner implements Decoder {
|
|
|
907
1128
|
if (r.pageDims) pageDims = r.pageDims;
|
|
908
1129
|
}
|
|
909
1130
|
|
|
910
|
-
// Inject images into paragraphs (only if images are available)
|
|
911
|
-
console.log(`[HwpScanner] Before injection: ${allContent.length} nodes, ${objectMap.size} images available`);
|
|
912
1131
|
if (objectMap.size > 0) {
|
|
913
1132
|
injectImagesIntoContent(allContent, objectMap);
|
|
914
|
-
console.log(`[HwpScanner] After injection: ${allContent.length} nodes`);
|
|
915
1133
|
}
|
|
916
1134
|
|
|
917
|
-
// Count images (recursively)
|
|
918
|
-
const countImages = (nodes: ContentNode[]): number => {
|
|
919
|
-
let count = 0;
|
|
920
|
-
for (const node of nodes) {
|
|
921
|
-
if ((node as any).tag === 'img') count++;
|
|
922
|
-
if ((node as any).tag === 'para' && (node as any).kids) count += countImages((node as any).kids);
|
|
923
|
-
if ((node as any).tag === 'grid' && (node as any).kids) {
|
|
924
|
-
for (const row of (node as any).kids) {
|
|
925
|
-
if (row.kids) count += countImages(row.kids);
|
|
926
|
-
}
|
|
927
|
-
}
|
|
928
|
-
}
|
|
929
|
-
return count;
|
|
930
|
-
};
|
|
931
|
-
const imgCount = countImages(allContent);
|
|
932
|
-
console.log(`[HwpScanner] Images in content: ${imgCount}`);
|
|
933
|
-
|
|
934
1135
|
warns.push(...shield.flush());
|
|
935
1136
|
const content = allContent.length > 0 ? allContent : [buildPara([buildSpan('')])];
|
|
936
|
-
|
|
1137
|
+
// P8: 머리말/꼬리말을 gsoCtx에서 가져와 buildSheet에 전달
|
|
1138
|
+
return succeed(buildRoot({}, [buildSheet(content, pageDims, {
|
|
1139
|
+
headers: gsoCtx.headers ? { default: gsoCtx.headers } : undefined,
|
|
1140
|
+
footers: gsoCtx.footers ? { default: gsoCtx.footers } : undefined,
|
|
1141
|
+
})]), warns);
|
|
937
1142
|
} catch (e: any) {
|
|
938
1143
|
warns.push(...shield.flush());
|
|
939
1144
|
return fail(`HWP decode error: ${e?.message ?? String(e)}`, warns);
|
|
@@ -947,6 +1152,52 @@ function findBodySection(streams: Map<string, Uint8Array>): Uint8Array | undefin
|
|
|
947
1152
|
return undefined;
|
|
948
1153
|
}
|
|
949
1154
|
|
|
1155
|
+
/* ═══════════════════════════════════════════════════════════════
|
|
1156
|
+
Image dimension extraction from binary headers
|
|
1157
|
+
════════════════════════════════════════════════════════════ */
|
|
1158
|
+
|
|
1159
|
+
// Returns { wPt, hPt } by parsing image headers; falls back to { wPt: 72, hPt: 72 } (1-inch)
|
|
1160
|
+
function getImageDimsPt(data: Uint8Array, mime: string): { wPt: number; hPt: number } {
|
|
1161
|
+
const fallback = { wPt: 72, hPt: 72 };
|
|
1162
|
+
try {
|
|
1163
|
+
if (mime === 'image/png' && data.length >= 24) {
|
|
1164
|
+
// PNG IHDR: sig(8) + length(4) + type(4) + width(4) + height(4) — all big-endian
|
|
1165
|
+
const w = (data[16] << 24 | data[17] << 16 | data[18] << 8 | data[19]) >>> 0;
|
|
1166
|
+
const h = (data[20] << 24 | data[21] << 16 | data[22] << 8 | data[23]) >>> 0;
|
|
1167
|
+
if (w > 0 && h > 0) return { wPt: w * 0.75, hPt: h * 0.75 }; // 96 DPI → pt
|
|
1168
|
+
}
|
|
1169
|
+
if (mime === 'image/jpeg') {
|
|
1170
|
+
// Scan for SOF markers: FF C0 / C1 / C2 / C3
|
|
1171
|
+
let i = 2;
|
|
1172
|
+
while (i + 8 < data.length) {
|
|
1173
|
+
if (data[i] !== 0xFF) { i++; continue; }
|
|
1174
|
+
const marker = data[i + 1];
|
|
1175
|
+
if (marker >= 0xC0 && marker <= 0xC3) {
|
|
1176
|
+
// SOF: 2-byte marker + 2-byte length + 1-byte precision + 2-byte height + 2-byte width
|
|
1177
|
+
const h = (data[i + 5] << 8 | data[i + 6]) >>> 0;
|
|
1178
|
+
const w = (data[i + 7] << 8 | data[i + 8]) >>> 0;
|
|
1179
|
+
if (w > 0 && h > 0) return { wPt: w * 0.75, hPt: h * 0.75 };
|
|
1180
|
+
}
|
|
1181
|
+
const segLen = data[i + 2] << 8 | data[i + 3];
|
|
1182
|
+
i += 2 + (segLen > 0 ? segLen : 2);
|
|
1183
|
+
}
|
|
1184
|
+
}
|
|
1185
|
+
if (mime === 'image/bmp' && data.length >= 26) {
|
|
1186
|
+
// BMP DIB header: width at 18, height at 22 (signed int32 LE; negative = top-down)
|
|
1187
|
+
const w = BinaryKit.readU32LE(data, 18);
|
|
1188
|
+
const h = Math.abs(BinaryKit.readU32LE(data, 22) | 0);
|
|
1189
|
+
if (w > 0 && h > 0) return { wPt: w * 0.75, hPt: h * 0.75 };
|
|
1190
|
+
}
|
|
1191
|
+
if (mime === 'image/gif' && data.length >= 10) {
|
|
1192
|
+
// GIF: width at 6, height at 8 (uint16 LE)
|
|
1193
|
+
const w = data[6] | data[7] << 8;
|
|
1194
|
+
const h = data[8] | data[9] << 8;
|
|
1195
|
+
if (w > 0 && h > 0) return { wPt: w * 0.75, hPt: h * 0.75 };
|
|
1196
|
+
}
|
|
1197
|
+
} catch { /* ignore */ }
|
|
1198
|
+
return fallback;
|
|
1199
|
+
}
|
|
1200
|
+
|
|
950
1201
|
/* ═══════════════════════════════════════════════════════════════
|
|
951
1202
|
OLE Object extraction (images)
|
|
952
1203
|
════════════════════════════════════════════════════════════ */
|
|
@@ -994,35 +1245,69 @@ function injectImagesIntoContent(
|
|
|
994
1245
|
content: ContentNode[],
|
|
995
1246
|
objectMap: Map<number, ImgNode>
|
|
996
1247
|
): void {
|
|
997
|
-
|
|
998
|
-
|
|
1248
|
+
if (objectMap.size === 0) return;
|
|
1249
|
+
|
|
1250
|
+
// Helper function to process a list of kids (spans, images, etc.)
|
|
1251
|
+
const processKids = (kids: any[]) => {
|
|
1252
|
+
for (let i = 0; i < kids.length; i++) {
|
|
1253
|
+
const kid = kids[i];
|
|
1254
|
+
// Span node structure: { tag: 'span', props, kids: [{ tag: 'txt', content }] }
|
|
1255
|
+
if (kid.tag === 'span' && kid.kids && kid.kids[0]?.tag === 'txt') {
|
|
1256
|
+
const text = kid.kids[0].content;
|
|
1257
|
+
// __EXT_N__ or __EXT_N_W<wPt>_H<hPt>__ (with encoded display size)
|
|
1258
|
+
// N is the objId that matches the index in objectMap
|
|
1259
|
+
const match = text.match?.(/^__(?:IMG|EXT)_(\d+)(?:_W(\d+)_H(\d+))?__$/);
|
|
1260
|
+
if (match) {
|
|
1261
|
+
const objId = parseInt(match[1], 10);
|
|
1262
|
+
const base = objectMap.get(objId);
|
|
1263
|
+
if (base) {
|
|
1264
|
+
const wPt = match[2] ? parseInt(match[2], 10) : 0;
|
|
1265
|
+
const hPt = match[3] ? parseInt(match[3], 10) : 0;
|
|
1266
|
+
// Use encoded display size when valid; otherwise keep pixel-based dims
|
|
1267
|
+
kids[i] = (wPt > 0 && hPt > 0) ? { ...base, w: wPt, h: hPt } : base;
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
};
|
|
999
1273
|
|
|
1000
|
-
//
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1274
|
+
// Recursively process a grid (table): resolves image placeholders in all cells,
|
|
1275
|
+
// including nested grids inside cells.
|
|
1276
|
+
const processGridKids = (grid: any) => {
|
|
1277
|
+
if (!grid.kids || !Array.isArray(grid.kids)) return;
|
|
1278
|
+
|
|
1279
|
+
for (const row of grid.kids) {
|
|
1280
|
+
if (!row.kids || !Array.isArray(row.kids)) continue;
|
|
1281
|
+
|
|
1282
|
+
for (const cell of row.kids) {
|
|
1283
|
+
if (!cell.kids || !Array.isArray(cell.kids)) continue;
|
|
1284
|
+
|
|
1285
|
+
for (const cellKid of cell.kids) {
|
|
1286
|
+
if (cellKid.tag === 'grid') {
|
|
1287
|
+
// Nested table inside cell — recurse
|
|
1288
|
+
processGridKids(cellKid);
|
|
1289
|
+
} else if (cellKid.tag === 'para' && cellKid.kids) {
|
|
1290
|
+
processKids(cellKid.kids);
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
};
|
|
1005
1296
|
|
|
1006
|
-
let imgIdx = 0;
|
|
1007
1297
|
for (const node of content) {
|
|
1008
1298
|
if (node.tag === 'para' && node.kids) {
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
if (match) {
|
|
1017
|
-
// Replace placeholder with next available image (round-robin)
|
|
1018
|
-
const imgNode = uniqueImages[imgIdx % uniqueImages.length];
|
|
1019
|
-
if (imgNode) {
|
|
1020
|
-
node.kids[i] = imgNode;
|
|
1021
|
-
imgIdx++;
|
|
1022
|
-
}
|
|
1023
|
-
}
|
|
1299
|
+
// Process paragraph kids (spans, images, links, grids)
|
|
1300
|
+
processKids(node.kids);
|
|
1301
|
+
|
|
1302
|
+
// Also process any nested grids inside the paragraph
|
|
1303
|
+
for (const kid of node.kids) {
|
|
1304
|
+
if (kid.tag === 'grid') {
|
|
1305
|
+
processGridKids(kid);
|
|
1024
1306
|
}
|
|
1025
1307
|
}
|
|
1308
|
+
} else if (node.tag === 'grid') {
|
|
1309
|
+
// Process grid nodes (tables)
|
|
1310
|
+
processGridKids(node);
|
|
1026
1311
|
}
|
|
1027
1312
|
}
|
|
1028
1313
|
}
|