hwpkit-dev 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,759 @@
1
+ import type { Decoder } from '../../contract/decoder';
2
+ import type { DocRoot, ContentNode, ParaNode, SpanNode, GridNode, ImgNode, PageNumNode } from '../../model/doc-tree';
3
+ import type { Outcome } from '../../contract/result';
4
+ import type { DocMeta, PageDims, TextProps, ParaProps, CellProps, GridProps, Stroke, ImgLayout, ImgWrap, ImgHorzAlign, ImgVertAlign, ImgHorzRelTo, ImgVertRelTo } from '../../model/doc-props';
5
+ import { A4 } from '../../model/doc-props';
6
+ import { succeed, fail } from '../../contract/result';
7
+ import { buildRoot, buildSheet, buildPara, buildSpan, buildImg, buildGrid, buildRow, buildCell, buildPb } from '../../model/builders';
8
+ import { ShieldedParser } from '../../safety/ShieldedParser';
9
+ import { Metric, safeAlign, safeFont, safeHex, safeStrokeHwpx } from '../../safety/StyleBridge';
10
+ import { ArchiveKit } from '../../toolkit/ArchiveKit';
11
+ import { XmlKit } from '../../toolkit/XmlKit';
12
+ import { TextKit } from '../../toolkit/TextKit';
13
+ import { registry } from '../../pipeline/registry';
14
+
15
+ interface BorderFillInfo {
16
+ stroke?: Stroke;
17
+ bgColor?: string;
18
+ }
19
+
20
+ interface CharPrInfo {
21
+ b?: boolean; i?: boolean; u?: boolean; s?: boolean;
22
+ pt?: number; color?: string; font?: string; bg?: string;
23
+ }
24
+
25
+ interface ParaPrInfo {
26
+ align?: string;
27
+ indentPt?: number;
28
+ spaceBefore?: number;
29
+ spaceAfter?: number;
30
+ lineHeight?: number;
31
+ }
32
+
33
+ interface DecCtx {
34
+ files: Map<string, Uint8Array>;
35
+ shield: ShieldedParser;
36
+ borderFills: Map<number, BorderFillInfo>;
37
+ charPrs: Map<number, CharPrInfo>;
38
+ paraPrs: Map<number, ParaPrInfo>;
39
+ warns: string[];
40
+ }
41
+
42
+ export class HwpxDecoder implements Decoder {
43
+ readonly format = 'hwpx';
44
+
45
+ async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
46
+ const shield = new ShieldedParser();
47
+ const warns: string[] = [];
48
+
49
+ try {
50
+ const files = await ArchiveKit.unzip(data);
51
+
52
+ const bodyXml = files.get('Contents/section0.xml')
53
+ ?? files.get('section0.xml')
54
+ ?? findSectionFile(files);
55
+
56
+ if (!bodyXml) return fail('HWPX: section0.xml not found in archive');
57
+
58
+ const headXml = files.get('Contents/header.xml') ?? files.get('header.xml');
59
+
60
+ let meta: DocMeta = {};
61
+ let dims: PageDims = { ...A4 };
62
+ let borderFills = new Map<number, BorderFillInfo>();
63
+ let charPrs = new Map<number, CharPrInfo>();
64
+ let paraPrs = new Map<number, ParaPrInfo>();
65
+
66
+ if (headXml) {
67
+ try {
68
+ const headStr = TextKit.decode(headXml);
69
+ const headObj: any = await XmlKit.parseStrict(headStr);
70
+ if (headObj) {
71
+ meta = extractMeta(headObj);
72
+ dims = extractDims(headObj) ?? dims;
73
+ borderFills = extractBorderFills(headObj);
74
+ charPrs = extractCharPrs(headObj);
75
+ paraPrs = extractParaPrs(headObj);
76
+ }
77
+ } catch {
78
+ // header parse failure is non-fatal
79
+ }
80
+ }
81
+
82
+ const ctx: DecCtx = { files, shield, borderFills, charPrs, paraPrs, warns };
83
+
84
+ const bodyStr = TextKit.decode(bodyXml);
85
+ const bodyObj: any = await XmlKit.parseStrict(bodyStr);
86
+
87
+ const sections = normalizeSections(bodyObj);
88
+ const kids = shield.guardAll(
89
+ sections,
90
+ (sec: any) => decodeSection(sec, dims, ctx),
91
+ () => buildSheet([buildPara([buildSpan('[섹션 파싱 실패]')])], dims),
92
+ 'hwpx:section',
93
+ );
94
+
95
+ warns.push(...shield.flush());
96
+ return succeed(buildRoot(meta, kids), warns);
97
+ } catch (e: any) {
98
+ warns.push(...shield.flush());
99
+ return fail(`HWPX decode error: ${e?.message ?? String(e)}`, warns);
100
+ }
101
+ }
102
+ }
103
+
104
+ // ─── helpers ────────────────────────────────────────────────
105
+
106
+ function findSectionFile(files: Map<string, Uint8Array>): Uint8Array | undefined {
107
+ for (const [key, val] of files) {
108
+ if (key.toLowerCase().includes('section') && key.endsWith('.xml')) return val;
109
+ }
110
+ return undefined;
111
+ }
112
+
113
+ function normalizeSections(bodyObj: any): any[] {
114
+ // <hs:sec> (real HWPX), <hp:SEC> (legacy)
115
+ if (bodyObj?.['hs:sec']) return toArr(bodyObj['hs:sec']);
116
+ if (bodyObj?.['hp:SEC']) return toArr(bodyObj['hp:SEC']);
117
+
118
+ const root = bodyObj?.['hp:HWPML'] ?? bodyObj?.HWPML ?? bodyObj;
119
+ const body = root?.['hp:BODY']?.[0] ?? root?.BODY?.[0] ?? root?.['hp:BODY'] ?? root?.BODY;
120
+ if (!body) return [bodyObj];
121
+ const sections = body?.['hp:SECTION'] ?? body?.SECTION ?? [];
122
+ return Array.isArray(sections) ? sections : [sections];
123
+ }
124
+
125
+ // Get a tag regardless of namespace/case variations
126
+ function getTag(obj: any, ...names: string[]): any[] {
127
+ for (const n of names) {
128
+ const v = obj?.[n];
129
+ if (v != null) return toArr(v);
130
+ }
131
+ return [];
132
+ }
133
+
134
+ function extractMeta(headObj: any): DocMeta {
135
+ try {
136
+ // Support both <hh:HEAD> and <hh:head>
137
+ const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
138
+ const info = root?.['hh:DOCSUMMARY']?.[0] ?? root?.DOCSUMMARY?.[0];
139
+ if (!info) return {};
140
+ const a = (k: string) => info?.[`hh:${k}`]?.[0]?._text ?? info?.[k]?.[0]?._text ?? '';
141
+ return { title: a('TITLE') || undefined, author: a('AUTHOR') || undefined, subject: a('SUBJECT') || undefined };
142
+ } catch { return {}; }
143
+ }
144
+
145
+ function extractDims(headObj: any): PageDims | null {
146
+ try {
147
+ const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
148
+ const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
149
+ if (!refList) return null;
150
+
151
+ const secPrList = refList?.['hh:SECPRLST']?.[0]?.['hh:SECPR']
152
+ ?? refList?.SECPRLST?.[0]?.SECPR;
153
+ const sec = Array.isArray(secPrList) ? secPrList[0] : secPrList;
154
+ if (!sec) return null;
155
+
156
+ const pa = sec?.['hh:PAGEPROPERTY']?.[0]?._attr ?? sec?.PAGEPROPERTY?.[0]?._attr;
157
+ if (!pa) return null;
158
+
159
+ return {
160
+ wPt: Metric.hwpToPt(Number(pa.Width ?? 59528)),
161
+ hPt: Metric.hwpToPt(Number(pa.Height ?? 84188)),
162
+ mt: Metric.hwpToPt(Number(pa.TopMargin ?? 5670)),
163
+ mb: Metric.hwpToPt(Number(pa.BottomMargin ?? 4252)),
164
+ ml: Metric.hwpToPt(Number(pa.LeftMargin ?? 8504)),
165
+ mr: Metric.hwpToPt(Number(pa.RightMargin ?? 8504)),
166
+ orient: Number(pa.Landscape) === 1 ? 'landscape' : 'portrait',
167
+ };
168
+ } catch { return null; }
169
+ }
170
+
171
+ function extractBorderFills(headObj: any): Map<number, BorderFillInfo> {
172
+ const map = new Map<number, BorderFillInfo>();
173
+ try {
174
+ const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
175
+ const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
176
+ if (!refList) return map;
177
+
178
+ const bfList = refList?.['hh:borderFills']?.[0] ?? refList?.['hh:BORDERFILLLIST']?.[0] ?? refList?.BORDERFILLLIST?.[0];
179
+ if (!bfList) return map;
180
+
181
+ const bfs = getTag(bfList, 'hh:borderFill', 'hh:BORDERFILL');
182
+ for (const bf of bfs) {
183
+ const attr = bf?._attr ?? {};
184
+ const id = Number(attr.id ?? 0);
185
+ if (id === 0) continue;
186
+
187
+ const info: BorderFillInfo = {};
188
+
189
+ // Parse border (take top as representative)
190
+ const top = bf?.['hh:topBorder']?.[0]?._attr ?? bf?.['hh:top']?.[0]?._attr ?? bf?.top?.[0]?._attr;
191
+ if (top) {
192
+ // width is in mm (e.g. "0.18 mm"), convert mm → pt (1mm ≈ 2.835pt), then pt → hwp (*100) for safeStrokeHwpx
193
+ const mmVal = parseFloat(top.width) || undefined;
194
+ const hwpVal = mmVal != null ? mmVal * 2.835 * 100 : undefined;
195
+ info.stroke = safeStrokeHwpx(top.type, hwpVal, top.color);
196
+ }
197
+
198
+ // Parse fill (real HWPX uses hc:fillBrush, not hh:fillBrush)
199
+ const fillBrush = bf?.['hc:fillBrush']?.[0] ?? bf?.['hh:fillBrush']?.[0] ?? bf?.['hh:fill']?.[0] ?? bf?.fill?.[0] ?? bf?.fillBrush?.[0];
200
+ if (fillBrush) {
201
+ const winBrush = fillBrush?.['hc:winBrush']?.[0]?._attr ?? fillBrush?.['hh:winBrush']?.[0]?._attr ?? fillBrush?.winBrush?.[0]?._attr;
202
+ if (winBrush?.faceColor && winBrush.faceColor !== 'none') {
203
+ info.bgColor = safeHex(winBrush.faceColor);
204
+ }
205
+ }
206
+
207
+ map.set(id, info);
208
+ }
209
+ } catch { /* non-fatal */ }
210
+ return map;
211
+ }
212
+
213
+ function extractCharPrs(headObj: any): Map<number, CharPrInfo> {
214
+ const map = new Map<number, CharPrInfo>();
215
+ try {
216
+ const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
217
+ const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
218
+ if (!refList) return map;
219
+
220
+ const cpList = refList?.['hh:charProperties']?.[0] ?? refList?.['hh:CHARPROPERTIES']?.[0];
221
+ if (!cpList) return map;
222
+
223
+ const cps = getTag(cpList, 'hh:charPr', 'hh:CHARPR');
224
+ for (const cp of cps) {
225
+ const attr = cp?._attr ?? {};
226
+ const id = Number(attr.id ?? -1);
227
+ if (id < 0) continue;
228
+
229
+ const info: CharPrInfo = {};
230
+
231
+ // height → pt
232
+ if (attr.height) info.pt = Metric.hHeightToPt(Number(attr.height));
233
+
234
+ // textColor
235
+ if (attr.textColor) info.color = safeHex(attr.textColor);
236
+
237
+ // bold
238
+ if (cp?.['hh:bold']?.[0] != null) info.b = true;
239
+
240
+ // italic
241
+ if (cp?.['hh:italic']?.[0] != null) info.i = true;
242
+
243
+ // underline
244
+ const ulAttr = cp?.['hh:underline']?.[0]?._attr;
245
+ if (ulAttr?.type && ulAttr.type !== 'NONE') info.u = true;
246
+
247
+ // strikeout — shape="3D" is default "no strikeout" in real HWPX; only SOLID/etc means active
248
+ const stAttr = cp?.['hh:strikeout']?.[0]?._attr;
249
+ if (stAttr?.shape && stAttr.shape !== 'NONE' && stAttr.shape !== '3D') info.s = true;
250
+
251
+ // font — from fontRef + fontface
252
+ // (simplified: just store what we find)
253
+
254
+ map.set(id, info);
255
+ }
256
+ } catch { /* non-fatal */ }
257
+ return map;
258
+ }
259
+
260
+ function extractParaPrs(headObj: any): Map<number, ParaPrInfo> {
261
+ const map = new Map<number, ParaPrInfo>();
262
+ try {
263
+ const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
264
+ const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
265
+ if (!refList) return map;
266
+
267
+ const ppList = refList?.['hh:paraProperties']?.[0] ?? refList?.['hh:PARAPROPERTIES']?.[0];
268
+ if (!ppList) return map;
269
+
270
+ const pps = getTag(ppList, 'hh:paraPr', 'hh:PARAPR');
271
+ for (const pp of pps) {
272
+ const attr = pp?._attr ?? {};
273
+ const id = Number(attr.id ?? -1);
274
+ if (id < 0) continue;
275
+
276
+ const alignNode = pp?.['hh:align']?.[0]?._attr ?? pp?.['hh:ALIGN']?.[0]?._attr;
277
+ const align = alignNode?.horizontal ?? alignNode?.Horizontal;
278
+
279
+ // Read margin and lineSpacing from direct child OR hp:switch > hp:default/hp:case
280
+ let marginEl = pp?.['hh:margin']?.[0] ?? null;
281
+ let lineSpEl = pp?.['hh:lineSpacing']?.[0] ?? null;
282
+ if (!marginEl) {
283
+ const sw = pp?.['hp:switch']?.[0];
284
+ const container = sw?.['hp:default']?.[0] ?? sw?.['hp:case']?.[0];
285
+ marginEl = container?.['hh:margin']?.[0] ?? null;
286
+ lineSpEl = lineSpEl ?? container?.['hh:lineSpacing']?.[0] ?? null;
287
+ }
288
+
289
+ let indentPt: number | undefined;
290
+ let spaceBefore: number | undefined;
291
+ let spaceAfter: number | undefined;
292
+ let lineHeight: number | undefined;
293
+
294
+ if (marginEl) {
295
+ // Handle both hc:intent (our encoder) and hc:indent (Hancom standard)
296
+ const intentEl = marginEl?.['hc:intent']?.[0] ?? marginEl?.['hc:indent']?.[0];
297
+ const prevEl = marginEl?.['hc:prev']?.[0];
298
+ const nextEl = marginEl?.['hc:next']?.[0];
299
+ const intentVal = Number(intentEl?._attr?.value ?? 0);
300
+ const prevVal = Number(prevEl?._attr?.value ?? 0);
301
+ const nextVal = Number(nextEl?._attr?.value ?? 0);
302
+ if (intentVal !== 0) indentPt = Metric.hwpToPt(intentVal);
303
+ if (prevVal > 0) spaceBefore = Metric.hwpToPt(prevVal);
304
+ if (nextVal > 0) spaceAfter = Metric.hwpToPt(nextVal);
305
+ }
306
+
307
+ if (lineSpEl) {
308
+ const lsAttr = lineSpEl._attr ?? {};
309
+ const lsType = lsAttr.type ?? 'PERCENT';
310
+ const lsVal = Number(lsAttr.value ?? 160);
311
+ if (lsType === 'PERCENT' && lsVal > 0) lineHeight = lsVal / 100;
312
+ }
313
+
314
+ map.set(id, { align, indentPt, spaceBefore, spaceAfter, lineHeight });
315
+ }
316
+ } catch { /* non-fatal */ }
317
+ return map;
318
+ }
319
+
320
+ // ─── Section decoding ──────────────────────────────────────
321
+
322
+ function addParaItems(p: any, items: { type: string; node: any }[]): void {
323
+ // Check if this paragraph contains a table in its runs
324
+ const runs = getTag(p, 'hp:run', 'hp:RUN');
325
+ let hasTable = false;
326
+ for (const run of runs) {
327
+ const tbls = getTag(run, 'hp:tbl', 'hp:TABLE');
328
+ for (const tbl of tbls) {
329
+ items.push({ type: 'table', node: tbl });
330
+ hasTable = true;
331
+ }
332
+ }
333
+ // Also add as paragraph unless it's just a table container
334
+ const hasText = runs.some((run: any) => {
335
+ const ts = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
336
+ return ts.some((t: any) => {
337
+ const text = typeof t === 'string' ? t : t?._text ?? '';
338
+ return text.trim().length > 0;
339
+ });
340
+ });
341
+ if (hasText || !hasTable) {
342
+ items.push({ type: 'para', node: p });
343
+ }
344
+ }
345
+
346
+ function decodeSection(sec: any, dims: PageDims, ctx: DecCtx) {
347
+ // Try to extract dims from first paragraph's secPr
348
+ const firstParas = getTag(sec, 'hp:p', 'hp:P');
349
+ const pageDims = extractSecPrDims(firstParas[0]) ?? dims;
350
+
351
+ // Build items list preserving document order via _childOrder
352
+ const items: { type: string; node: any }[] = [];
353
+ const paras = getTag(sec, 'hp:p', 'hp:P');
354
+ const childOrder = sec?.['_childOrder'] as string[] | undefined;
355
+
356
+ if (Array.isArray(childOrder)) {
357
+ let pi = 0;
358
+ for (const tag of childOrder) {
359
+ if ((tag === 'hp:p' || tag === 'hp:P') && pi < paras.length) {
360
+ const p = paras[pi++];
361
+ addParaItems(p, items);
362
+ }
363
+ }
364
+ // Append any remaining
365
+ while (pi < paras.length) addParaItems(paras[pi++], items);
366
+ } else {
367
+ // No order info — process paragraphs sequentially
368
+ for (const p of paras) addParaItems(p, items);
369
+ }
370
+
371
+ const kids: ContentNode[] = ctx.shield.guardAll(
372
+ items,
373
+ (item: any) => {
374
+ if (item.type === 'table') {
375
+ const { value } = ctx.shield.guardGrid(
376
+ item.node,
377
+ (n) => decodeGrid(n, ctx),
378
+ (n) => decodeGridSimple(n, ctx),
379
+ (n) => decodeGridFlat(n),
380
+ (n) => decodeGridText(n) as unknown as GridNode,
381
+ 'hwpx:table',
382
+ );
383
+ return value;
384
+ }
385
+ return decodePara(item.node, ctx);
386
+ },
387
+ () => buildPara([buildSpan('[파싱 실패]')]),
388
+ 'hwpx:content',
389
+ );
390
+
391
+ // Decode header/footer
392
+ const headerParas = decodeHeaderFooter(sec, 'header', ctx);
393
+ const footerParas = decodeHeaderFooter(sec, 'footer', ctx);
394
+
395
+ return buildSheet(
396
+ kids.filter(Boolean) as ContentNode[],
397
+ pageDims,
398
+ { header: headerParas, footer: footerParas },
399
+ );
400
+ }
401
+
402
+ function extractSecPrDims(p: any): PageDims | null {
403
+ if (!p) return null;
404
+ try {
405
+ const runs = getTag(p, 'hp:run', 'hp:RUN');
406
+ for (const run of runs) {
407
+ const secPr = run?.['hp:secPr']?.[0] ?? run?.['hp:SECPR']?.[0];
408
+ if (!secPr) continue;
409
+ const pagePr = secPr?.['hp:pagePr']?.[0]?._attr ?? secPr?.['hp:PAGEPR']?.[0]?._attr;
410
+ if (!pagePr) continue;
411
+ const margin = secPr?.['hp:pagePr']?.[0]?.['hp:margin']?.[0]?._attr
412
+ ?? secPr?.['hp:PAGEPR']?.[0]?.['hp:MARGIN']?.[0]?._attr ?? {};
413
+ return {
414
+ wPt: Metric.hwpToPt(Number(pagePr.width ?? 59528)),
415
+ hPt: Metric.hwpToPt(Number(pagePr.height ?? 84188)),
416
+ mt: Metric.hwpToPt(Number(margin.top ?? 5670)),
417
+ mb: Metric.hwpToPt(Number(margin.bottom ?? 4252)),
418
+ ml: Metric.hwpToPt(Number(margin.left ?? 8504)),
419
+ mr: Metric.hwpToPt(Number(margin.right ?? 8504)),
420
+ orient: pagePr.landscape === 'NARROWLY' ? 'landscape' : 'portrait',
421
+ };
422
+ }
423
+ } catch { /* ignore */ }
424
+ return null;
425
+ }
426
+
427
+ function decodeHeaderFooter(sec: any, kind: 'header' | 'footer', ctx: DecCtx): ParaNode[] | undefined {
428
+ try {
429
+ const hf = sec?.['hp:headerFooter']?.[0] ?? sec?.['hp:HEADERFOOTER']?.[0]
430
+ ?? sec?.headerFooter?.[0] ?? sec?.HEADERFOOTER?.[0];
431
+ if (!hf) return undefined;
432
+
433
+ const part = hf?.['hp:' + kind]?.[0] ?? hf?.['hp:' + kind.toUpperCase()]?.[0]
434
+ ?? hf?.[kind]?.[0] ?? hf?.[kind.toUpperCase()]?.[0];
435
+ if (!part) return undefined;
436
+
437
+ const paras = getTag(part, 'hp:p', 'hp:P');
438
+ if (paras.length === 0) return undefined;
439
+
440
+ return paras.map((p: any) => decodePara(p, ctx));
441
+ } catch {
442
+ return undefined;
443
+ }
444
+ }
445
+
446
+ // ─── Paragraph & run decoding ──────────────────────────────
447
+
448
+ function decodePara(p: any, ctx: DecCtx): ParaNode {
449
+ const pAttr = p?._attr ?? {};
450
+ const paraPrIdRef = Number(pAttr.paraPrIDRef ?? -1);
451
+
452
+ // Resolve paraPr from IDRef or inline
453
+ let align: string | undefined;
454
+ const paraPrDef = ctx.paraPrs.get(paraPrIdRef);
455
+ if (paraPrDef?.align) align = paraPrDef.align;
456
+
457
+ // Check inline PARAPR too
458
+ const inlineParaPr = p?.['hp:PARAPR']?.[0] ?? p?.['hp:paraPr']?.[0] ?? p?.PARAPR?.[0];
459
+ if (inlineParaPr) {
460
+ const alignNode = inlineParaPr?.['hp:ALIGN']?.[0]?._attr ?? inlineParaPr?.['hp:align']?.[0]?._attr
461
+ ?? inlineParaPr?.ALIGN?.[0]?._attr;
462
+ if (alignNode?.Type) align = alignNode.Type;
463
+ if (alignNode?.horizontal) align = alignNode.horizontal;
464
+ }
465
+
466
+ const inlineAttr = inlineParaPr?._attr ?? {};
467
+ const props: ParaProps = { align: safeAlign(align) };
468
+
469
+ // Apply spacing/indent/lineHeight from paraPr definition
470
+ if (paraPrDef) {
471
+ if (paraPrDef.indentPt !== undefined) props.indentPt = paraPrDef.indentPt;
472
+ if (paraPrDef.spaceBefore !== undefined) props.spaceBefore = paraPrDef.spaceBefore;
473
+ if (paraPrDef.spaceAfter !== undefined) props.spaceAfter = paraPrDef.spaceAfter;
474
+ if (paraPrDef.lineHeight !== undefined) props.lineHeight = paraPrDef.lineHeight;
475
+ }
476
+
477
+ // List support (from inline attr)
478
+ if (inlineAttr.listType) {
479
+ props.listOrd = inlineAttr.listType === 'DIGIT' || inlineAttr.listType === 'DECIMAL';
480
+ props.listLv = Number(inlineAttr.listLevel ?? 0);
481
+ }
482
+
483
+ const runs = getTag(p, 'hp:run', 'hp:RUN');
484
+ const kids: (SpanNode | ImgNode)[] = [];
485
+
486
+ for (const run of runs) {
487
+ // Images inside run
488
+ const pics = getTag(run, 'hp:pic', 'hp:PIC');
489
+ for (const pic of pics) {
490
+ const img = decodePic(pic, ctx);
491
+ if (img) kids.push(img);
492
+ }
493
+
494
+ // Page number
495
+ const pageNums = getTag(run, 'hp:pageNum', 'hp:PAGENUM');
496
+ if (pageNums.length > 0) {
497
+ const pn = pageNums[0]?._attr ?? {};
498
+ const fmt = pn.formatType === 'ROMAN_LOWER' ? 'roman' as const
499
+ : pn.formatType === 'ROMAN_UPPER' ? 'romanCaps' as const
500
+ : 'decimal' as const;
501
+ const pageNumNode: PageNumNode = { tag: 'pagenum', format: fmt };
502
+ const spanProps = resolveCharPr(run, ctx);
503
+ kids.push({ tag: 'span', props: spanProps, kids: [pageNumNode] });
504
+ continue;
505
+ }
506
+
507
+ // Text
508
+ const textNodes = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
509
+ const content = textNodes.map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join('');
510
+
511
+ // Skip empty secPr-only runs
512
+ if (content === '' && (run?.['hp:secPr']?.[0] || run?.['hp:SECPR']?.[0]) && pics.length === 0) continue;
513
+
514
+ const spanProps = resolveCharPr(run, ctx);
515
+ kids.push(buildSpan(content, spanProps));
516
+ }
517
+
518
+ // pageBreak="1" → prepend a pb node in its own span
519
+ if (pAttr.pageBreak === '1') {
520
+ kids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
521
+ }
522
+
523
+ return buildPara(kids.filter(Boolean) as ParaNode['kids'], props);
524
+ }
525
+
526
+ function resolveCharPr(run: any, ctx: DecCtx): TextProps {
527
+ const runAttr = run?._attr ?? {};
528
+ const charPrIdRef = Number(runAttr.charPrIDRef ?? -1);
529
+
530
+ // Try IDRef first
531
+ const def = ctx.charPrs.get(charPrIdRef);
532
+ if (def) {
533
+ return {
534
+ b: def.b, i: def.i, u: def.u, s: def.s,
535
+ pt: def.pt, color: def.color, font: def.font, bg: def.bg,
536
+ };
537
+ }
538
+
539
+ // Fallback: inline CHARPR
540
+ const ca = run?.['hp:CHARPR']?.[0]?._attr ?? run?.['hp:charPr']?.[0]?._attr ?? run?.CHARPR?.[0]?._attr ?? {};
541
+ return {
542
+ b: ca.Bold === '1' || ca.Bold === 'true' || undefined,
543
+ i: ca.Italic === '1' || ca.Italic === 'true' || undefined,
544
+ u: ca.Underline ? ca.Underline !== 'NONE' : undefined,
545
+ s: ca.Strikeout ? ca.Strikeout !== 'NONE' : undefined,
546
+ font: safeFont(ca.FontName ?? ca.FaceNameHangul),
547
+ pt: ca.Height ? Metric.hHeightToPt(Number(ca.Height)) : undefined,
548
+ color: safeHex(ca.TextColor),
549
+ bg: safeHex(ca.BgColor),
550
+ };
551
+ }
552
+
553
+ // ─── Image decoding ────────────────────────────────────────
554
+
555
+ function decodePic(pic: any, ctx: DecCtx): ImgNode | null {
556
+ try {
557
+ const szAttr = pic?.['hp:sz']?.[0]?._attr ?? pic?.sz?.[0]?._attr ?? {};
558
+ const w = Metric.hwpToPt(Number(szAttr.width ?? 0));
559
+ const h = Metric.hwpToPt(Number(szAttr.height ?? 0));
560
+
561
+ // Try multiple tag patterns for image reference
562
+ const imgNode = pic?.['hp:img']?.[0]?._attr ?? pic?.['hc:img']?.[0]?._attr
563
+ ?? pic?.img?.[0]?._attr ?? {};
564
+ const binRef = imgNode.binaryItemIDRef ?? imgNode.BinaryItemIDRef;
565
+ if (!binRef) return null;
566
+
567
+ // Find binary data
568
+ let imgData: Uint8Array | undefined;
569
+ for (const [key, val] of ctx.files) {
570
+ if (key.includes(binRef) || key.toLowerCase().includes(binRef.toLowerCase())) {
571
+ imgData = val;
572
+ break;
573
+ }
574
+ }
575
+ if (!imgData) return null;
576
+
577
+ const ext = binRef.split('.').pop()?.toLowerCase() ?? 'png';
578
+ const mimeMap: Record<string, ImgNode['mime']> = {
579
+ png: 'image/png', jpg: 'image/jpeg', jpeg: 'image/jpeg',
580
+ gif: 'image/gif', bmp: 'image/bmp',
581
+ };
582
+
583
+ // ── hp:pos에서 layout 추출 ───────────────────────────────
584
+ const posAttr = pic?.['hp:pos']?.[0]?._attr ?? pic?.pos?.[0]?._attr ?? {};
585
+ const layout = extractHwpxLayout(posAttr, pic);
586
+
587
+ return buildImg(TextKit.base64Encode(imgData), mimeMap[ext] ?? 'image/png', w, h, undefined, layout);
588
+ } catch {
589
+ return null;
590
+ }
591
+ }
592
+
593
+ function extractHwpxLayout(posAttr: any, pic: any): ImgLayout {
594
+ const treatAsChar = posAttr.treatAsChar === '1' || posAttr.treatAsChar === 'true';
595
+ if (treatAsChar) return { wrap: 'inline' };
596
+
597
+ // textWrap → wrap
598
+ const textWrap: string = (pic?._attr?.textWrap ?? pic?.['hp:pic']?.[0]?._attr?.textWrap ?? 'TOP_AND_BOTTOM');
599
+ const wrapMap: Record<string, ImgWrap> = {
600
+ TOP_AND_BOTTOM: 'square',
601
+ SQUARE: 'square',
602
+ BOTH_SIDES: 'tight',
603
+ LEFT: 'tight',
604
+ RIGHT: 'tight',
605
+ LARGER_ONLY: 'tight',
606
+ SMALLER_ONLY: 'tight',
607
+ LARGEST_ONLY: 'tight',
608
+ BEHIND_TEXT: 'behind',
609
+ FRONT_TEXT: 'none',
610
+ };
611
+ const wrap: ImgWrap = wrapMap[textWrap] ?? 'square';
612
+
613
+ // 기준점
614
+ const horzRelToMap: Record<string, ImgHorzRelTo> = {
615
+ PARA: 'para', MARGIN: 'margin', PAGE: 'page', COLUMN: 'column',
616
+ };
617
+ const vertRelToMap: Record<string, ImgVertRelTo> = {
618
+ PARA: 'para', MARGIN: 'margin', PAGE: 'page', PAPER: 'page', LINE: 'line',
619
+ };
620
+ const horzRelTo = horzRelToMap[posAttr.horzRelTo ?? ''] ?? 'para';
621
+ const vertRelTo = vertRelToMap[posAttr.vertRelTo ?? ''] ?? 'para';
622
+
623
+ // 정렬
624
+ const horzAlignMap: Record<string, ImgHorzAlign> = { LEFT: 'left', CENTER: 'center', RIGHT: 'right' };
625
+ const vertAlignMap: Record<string, ImgVertAlign> = { TOP: 'top', CENTER: 'center', BOTTOM: 'bottom' };
626
+ const horzAlign = horzAlignMap[posAttr.horzAlign ?? ''];
627
+ const vertAlign = vertAlignMap[posAttr.vertAlign ?? ''];
628
+
629
+ // 오프셋
630
+ const horzOffset = Number(posAttr.horzOffset ?? 0);
631
+ const vertOffset = Number(posAttr.vertOffset ?? 0);
632
+ const xPt = horzOffset !== 0 ? Metric.hwpToPt(horzOffset) : undefined;
633
+ const yPt = vertOffset !== 0 ? Metric.hwpToPt(vertOffset) : undefined;
634
+
635
+ return { wrap, horzAlign, vertAlign, horzRelTo, vertRelTo, xPt, yPt };
636
+ }
637
+
638
+ // ─── Table decoding ────────────────────────────────────────
639
+
640
+ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
641
+ const tblAttr = tbl?._attr ?? {};
642
+ const borderFillId = Number(tblAttr.borderFillIDRef ?? 0);
643
+ const borderFill = ctx.borderFills.get(borderFillId);
644
+ const headerRow = tblAttr.repeatHeader === '1';
645
+
646
+ const gridProps: GridProps = { headerRow: headerRow || undefined };
647
+ if (borderFill?.stroke) gridProps.defaultStroke = borderFill.stroke;
648
+
649
+ const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
650
+
651
+ // Read column widths from the first row that has all cs=1 cells
652
+ for (const row of rowArr) {
653
+ const cells = getTag(row, 'hp:tc', 'hp:CELL');
654
+ const rowWidths: number[] = [];
655
+ let allSingle = true;
656
+ for (const cell of cells) {
657
+ const cellSpanAttr = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
658
+ const cs = Number(cellSpanAttr.colSpan ?? cell?._attr?.ColSpan ?? 1);
659
+ if (cs > 1) { allSingle = false; break; }
660
+ const szAttr = cell?.['hp:cellSz']?.[0]?._attr ?? {};
661
+ const w = Number(szAttr.width ?? 0);
662
+ rowWidths.push(Metric.hwpToPt(w));
663
+ }
664
+ if (allSingle && rowWidths.length > 0 && rowWidths.some(w => w > 0)) {
665
+ gridProps.colWidths = rowWidths;
666
+ break;
667
+ }
668
+ }
669
+ const rowNodes = rowArr.map((row: any) => {
670
+ const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
671
+ const cellNodes = cellArr.map((cell: any) => {
672
+ const ca = cell?._attr ?? {};
673
+
674
+ // Cell borderFill
675
+ const cellBfId = Number(ca.borderFillIDRef ?? 0);
676
+ const cellBf = ctx.borderFills.get(cellBfId);
677
+
678
+ const cellProps: CellProps = {
679
+ bg: cellBf?.bgColor ?? safeHex(ca.BgColor),
680
+ };
681
+
682
+ if (cellBf?.stroke) {
683
+ cellProps.top = cellBf.stroke;
684
+ cellProps.bot = cellBf.stroke;
685
+ cellProps.left = cellBf.stroke;
686
+ cellProps.right = cellBf.stroke;
687
+ }
688
+
689
+ // Vertical alignment from subList
690
+ const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
691
+ const subAttr = subList?._attr ?? {};
692
+ if (subAttr.vertAlign) {
693
+ const vaMap: Record<string, 'top' | 'mid' | 'bot'> = {
694
+ TOP: 'top', CENTER: 'mid', BOTTOM: 'bot',
695
+ };
696
+ cellProps.va = vaMap[subAttr.vertAlign];
697
+ }
698
+
699
+ // Colspan/rowspan from cellSpan element or attributes
700
+ const cellSpan = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
701
+ const cs = Number(cellSpan.colSpan ?? ca.ColSpan ?? 1);
702
+ const rs = Number(cellSpan.rowSpan ?? ca.RowSpan ?? 1);
703
+
704
+ // Parse paragraphs
705
+ let paras: ParaNode[];
706
+ if (subList) {
707
+ const subParas = getTag(subList, 'hp:p', 'hp:P');
708
+ paras = subParas.map((p: any) => decodePara(p, ctx));
709
+ } else {
710
+ paras = getTag(cell, 'hp:p', 'hp:P').map((p: any) => decodePara(p, ctx));
711
+ }
712
+
713
+ return buildCell(
714
+ paras.length > 0 ? paras : [buildPara([buildSpan('')])],
715
+ { cs, rs, props: cellProps },
716
+ );
717
+ });
718
+ return buildRow(cellNodes);
719
+ });
720
+ return buildGrid(rowNodes, gridProps);
721
+ }
722
+
723
+ function decodeGridSimple(tbl: any, ctx: DecCtx): GridNode {
724
+ const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
725
+ const rowNodes = rowArr.map((row: any) => {
726
+ const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
727
+ return buildRow(cellArr.map((cell: any) => buildCell([buildPara([buildSpan(cellText(cell))])])));
728
+ });
729
+ return buildGrid(rowNodes);
730
+ }
731
+
732
+ function decodeGridFlat(tbl: any): GridNode {
733
+ return buildGrid([buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])])]);
734
+ }
735
+
736
+ function decodeGridText(tbl: any): ParaNode {
737
+ return buildPara([buildSpan(tableText(tbl))]);
738
+ }
739
+
740
+ function cellText(cell: any): string {
741
+ const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
742
+ const source = subList ?? cell;
743
+ return getTag(source, 'hp:p', 'hp:P').map((p: any) =>
744
+ getTag(p, 'hp:run', 'hp:RUN').map((r: any) =>
745
+ getTag(r, 'hp:t', 'hp:T').map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join(''),
746
+ ).join(''),
747
+ ).join(' ');
748
+ }
749
+
750
+ function tableText(tbl: any): string {
751
+ return getTag(tbl, 'hp:tr', 'hp:ROW').map((row: any) =>
752
+ getTag(row, 'hp:tc', 'hp:CELL').map((c: any) => cellText(c)).join('\t'),
753
+ ).join('\n');
754
+ }
755
+
756
+ function toArr(v: any): any[] { return v == null ? [] : Array.isArray(v) ? v : [v]; }
757
+
758
+ // Auto-register
759
+ registry.registerDecoder(new HwpxDecoder());