hwpkit-dev 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/ .npmignore +4 -1
  2. package/README.md +39 -2
  3. package/dist/index.d.mts +74 -16
  4. package/dist/index.d.ts +70 -16
  5. package/dist/index.js +4985 -698
  6. package/dist/index.js.map +1 -1
  7. package/dist/index.mjs +4981 -698
  8. package/dist/index.mjs.map +1 -1
  9. package/package.json +4 -1
  10. package/playground/index.html +346 -0
  11. package/playground/main.ts +302 -0
  12. package/playground/vite.config.ts +16 -0
  13. package/src/contract/decoder.ts +1 -0
  14. package/src/contract/encoder.ts +6 -1
  15. package/src/core/BaseDecoder.ts +118 -0
  16. package/src/core/BaseEncoder.ts +146 -0
  17. package/src/decoders/docx/DocxDecoder.ts +867 -150
  18. package/src/decoders/html/HtmlDecoder.ts +366 -0
  19. package/src/decoders/hwp/HwpScanner.ts +477 -88
  20. package/src/decoders/hwpx/HwpxDecoder.ts +789 -293
  21. package/src/decoders/md/MdDecoder.ts +4 -4
  22. package/src/encoders/docx/DocxEncoder.ts +600 -295
  23. package/src/encoders/html/HtmlEncoder.ts +203 -0
  24. package/src/encoders/hwp/HwpEncoder.ts +1647 -398
  25. package/src/encoders/hwpx/HwpxEncoder.ts +1512 -444
  26. package/src/encoders/hwpx/constants.ts +148 -0
  27. package/src/encoders/hwpx/utils.ts +198 -0
  28. package/src/encoders/md/MdEncoder.ts +117 -30
  29. package/src/index.ts +1 -0
  30. package/src/model/builders.ts +8 -6
  31. package/src/model/doc-props.ts +19 -5
  32. package/src/model/doc-tree.ts +13 -5
  33. package/src/pipeline/Pipeline.ts +21 -4
  34. package/src/pipeline/registry.ts +13 -2
  35. package/src/safety/StyleBridge.ts +52 -7
  36. package/src/toolkit/ArchiveKit.ts +56 -0
  37. package/src/toolkit/StyleMapper.ts +221 -0
  38. package/src/toolkit/UnitConverter.ts +138 -0
  39. package/src/toolkit/XmlKit.ts +0 -5
  40. package/test-styling.ts +210 -0
@@ -1,33 +1,85 @@
1
- import type { Decoder } from '../../contract/decoder';
2
- import type { DocRoot, ContentNode, ParaNode, SpanNode, GridNode, ImgNode, PageNumNode } from '../../model/doc-tree';
3
- import type { Outcome } from '../../contract/result';
4
- import type { DocMeta, PageDims, TextProps, ParaProps, CellProps, GridProps, Stroke, ImgLayout, ImgWrap, ImgHorzAlign, ImgVertAlign, ImgHorzRelTo, ImgVertRelTo } from '../../model/doc-props';
5
- import { A4 } from '../../model/doc-props';
6
- import { succeed, fail } from '../../contract/result';
7
- import { buildRoot, buildSheet, buildPara, buildSpan, buildImg, buildGrid, buildRow, buildCell, buildPb } from '../../model/builders';
8
- import { ShieldedParser } from '../../safety/ShieldedParser';
9
- import { Metric, safeAlign, safeFont, safeHex, safeStrokeHwpx } from '../../safety/StyleBridge';
10
- import { ArchiveKit } from '../../toolkit/ArchiveKit';
11
- import { XmlKit } from '../../toolkit/XmlKit';
12
- import { TextKit } from '../../toolkit/TextKit';
13
- import { registry } from '../../pipeline/registry';
1
+ import type {
2
+ DocRoot,
3
+ ContentNode,
4
+ ParaNode,
5
+ SpanNode,
6
+ GridNode,
7
+ ImgNode,
8
+ PageNumNode,
9
+ } from "../../model/doc-tree";
10
+ import type { Outcome } from "../../contract/result";
11
+ import type {
12
+ DocMeta,
13
+ PageDims,
14
+ TextProps,
15
+ ParaProps,
16
+ CellProps,
17
+ GridProps,
18
+ Stroke,
19
+ ImgLayout,
20
+ ImgWrap,
21
+ ImgHorzAlign,
22
+ ImgVertAlign,
23
+ ImgHorzRelTo,
24
+ ImgVertRelTo,
25
+ } from "../../model/doc-props";
26
+ import { A4 } from "../../model/doc-props";
27
+ import { succeed, fail } from "../../contract/result";
28
+ import {
29
+ buildRoot,
30
+ buildSheet,
31
+ buildPara,
32
+ buildSpan,
33
+ buildImg,
34
+ buildGrid,
35
+ buildRow,
36
+ buildCell,
37
+ buildPb,
38
+ } from "../../model/builders";
39
+ import { ShieldedParser } from "../../safety/ShieldedParser";
40
+ import {
41
+ Metric,
42
+ safeAlign,
43
+ safeFont,
44
+ safeHex,
45
+ safeStrokeHwpx,
46
+ } from "../../safety/StyleBridge";
47
+ import { ArchiveKit } from "../../toolkit/ArchiveKit";
48
+ import { XmlKit } from "../../toolkit/XmlKit";
49
+ import { TextKit } from "../../toolkit/TextKit";
50
+ import { registry } from "../../pipeline/registry";
51
+ import { BaseDecoder } from "../../core/BaseDecoder";
52
+ import { HWPX_MIME_TYPE } from "../../encoders/hwpx/constants";
14
53
 
15
54
  interface BorderFillInfo {
16
- stroke?: Stroke;
55
+ stroke?: Stroke; // uniform fallback (used when all sides are the same)
56
+ top?: Stroke;
57
+ right?: Stroke;
58
+ bottom?: Stroke;
59
+ left?: Stroke;
17
60
  bgColor?: string;
18
61
  }
19
62
 
20
63
  interface CharPrInfo {
21
- b?: boolean; i?: boolean; u?: boolean; s?: boolean;
22
- pt?: number; color?: string; font?: string; bg?: string;
64
+ b?: boolean;
65
+ i?: boolean;
66
+ u?: boolean;
67
+ s?: boolean;
68
+ pt?: number;
69
+ color?: string;
70
+ font?: string;
71
+ bg?: string;
23
72
  }
24
73
 
25
74
  interface ParaPrInfo {
26
75
  align?: string;
27
- indentPt?: number;
76
+ indentPt?: number; // hc:left → 문단 전체 왼쪽 여백
77
+ indentRightPt?: number; // hc:right → 문단 전체 오른쪽 여백
78
+ firstLineIndentPt?: number; // hc:indent → 첫 줄 들여쓰기 (양수=들여쓰기, 음수=내어쓰기)
28
79
  spaceBefore?: number;
29
80
  spaceAfter?: number;
30
81
  lineHeight?: number;
82
+ lineHeightFixed?: number; // FIXED 행 높이 (pt)
31
83
  }
32
84
 
33
85
  interface DecCtx {
@@ -39,8 +91,13 @@ interface DecCtx {
39
91
  warns: string[];
40
92
  }
41
93
 
42
- export class HwpxDecoder implements Decoder {
43
- readonly format = 'hwpx';
94
+ export class HwpxDecoder extends BaseDecoder {
95
+ protected getFormat(): string {
96
+ return "hwpx";
97
+ }
98
+ protected getAliases(): string[] {
99
+ return [HWPX_MIME_TYPE, "application/hwp+zip"];
100
+ }
44
101
 
45
102
  async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
46
103
  const shield = new ShieldedParser();
@@ -49,13 +106,23 @@ export class HwpxDecoder implements Decoder {
49
106
  try {
50
107
  const files = await ArchiveKit.unzip(data);
51
108
 
52
- const bodyXml = files.get('Contents/section0.xml')
53
- ?? files.get('section0.xml')
54
- ?? findSectionFile(files);
109
+ const sectionFiles: Uint8Array[] = [];
110
+ for (let i = 0; ; i++) {
111
+ const sec =
112
+ files.get(`Contents/section${i}.xml`) ?? files.get(`section${i}.xml`);
113
+ if (!sec) break;
114
+ sectionFiles.push(sec);
115
+ }
116
+ if (sectionFiles.length === 0) {
117
+ const fallback = findSectionFile(files);
118
+ if (fallback) sectionFiles.push(fallback);
119
+ }
55
120
 
56
- if (!bodyXml) return fail('HWPX: section0.xml not found in archive');
121
+ if (sectionFiles.length === 0)
122
+ return fail("HWPX: No section files found");
57
123
 
58
- const headXml = files.get('Contents/header.xml') ?? files.get('header.xml');
124
+ const headXml =
125
+ files.get("Contents/header.xml") ?? files.get("header.xml");
59
126
 
60
127
  let meta: DocMeta = {};
61
128
  let dims: PageDims = { ...A4 };
@@ -79,17 +146,27 @@ export class HwpxDecoder implements Decoder {
79
146
  }
80
147
  }
81
148
 
82
- const ctx: DecCtx = { files, shield, borderFills, charPrs, paraPrs, warns };
149
+ const ctx: DecCtx = {
150
+ files,
151
+ shield,
152
+ borderFills,
153
+ charPrs,
154
+ paraPrs,
155
+ warns,
156
+ };
83
157
 
84
- const bodyStr = TextKit.decode(bodyXml);
85
- const bodyObj: any = await XmlKit.parseStrict(bodyStr);
158
+ const allSections: any[] = [];
159
+ for (const secFile of sectionFiles) {
160
+ const bodyStr = TextKit.decode(secFile);
161
+ const bodyObj: any = await XmlKit.parseStrict(bodyStr);
162
+ allSections.push(...normalizeSections(bodyObj));
163
+ }
86
164
 
87
- const sections = normalizeSections(bodyObj);
88
165
  const kids = shield.guardAll(
89
- sections,
166
+ allSections,
90
167
  (sec: any) => decodeSection(sec, dims, ctx),
91
- () => buildSheet([buildPara([buildSpan('[섹션 파싱 실패]')])], dims),
92
- 'hwpx:section',
168
+ () => buildSheet([buildPara([buildSpan("[섹션 파싱 실패]")])], dims),
169
+ "hwpx:section",
93
170
  );
94
171
 
95
172
  warns.push(...shield.flush());
@@ -103,22 +180,29 @@ export class HwpxDecoder implements Decoder {
103
180
 
104
181
  // ─── helpers ────────────────────────────────────────────────
105
182
 
106
- function findSectionFile(files: Map<string, Uint8Array>): Uint8Array | undefined {
183
+ function findSectionFile(
184
+ files: Map<string, Uint8Array>,
185
+ ): Uint8Array | undefined {
107
186
  for (const [key, val] of files) {
108
- if (key.toLowerCase().includes('section') && key.endsWith('.xml')) return val;
187
+ if (key.toLowerCase().includes("section") && key.endsWith(".xml"))
188
+ return val;
109
189
  }
110
190
  return undefined;
111
191
  }
112
192
 
113
193
  function normalizeSections(bodyObj: any): any[] {
114
194
  // <hs:sec> (real HWPX), <hp:SEC> (legacy)
115
- if (bodyObj?.['hs:sec']) return toArr(bodyObj['hs:sec']);
116
- if (bodyObj?.['hp:SEC']) return toArr(bodyObj['hp:SEC']);
117
-
118
- const root = bodyObj?.['hp:HWPML'] ?? bodyObj?.HWPML ?? bodyObj;
119
- const body = root?.['hp:BODY']?.[0] ?? root?.BODY?.[0] ?? root?.['hp:BODY'] ?? root?.BODY;
195
+ if (bodyObj?.["hs:sec"]) return toArr(bodyObj["hs:sec"]);
196
+ if (bodyObj?.["hp:SEC"]) return toArr(bodyObj["hp:SEC"]);
197
+
198
+ const root = bodyObj?.["hp:HWPML"] ?? bodyObj?.HWPML ?? bodyObj;
199
+ const body =
200
+ root?.["hp:BODY"]?.[0] ??
201
+ root?.BODY?.[0] ??
202
+ root?.["hp:BODY"] ??
203
+ root?.BODY;
120
204
  if (!body) return [bodyObj];
121
- const sections = body?.['hp:SECTION'] ?? body?.SECTION ?? [];
205
+ const sections = body?.["hp:SECTION"] ?? body?.SECTION ?? [];
122
206
  return Array.isArray(sections) ? sections : [sections];
123
207
  }
124
208
 
@@ -134,51 +218,85 @@ function getTag(obj: any, ...names: string[]): any[] {
134
218
  function extractMeta(headObj: any): DocMeta {
135
219
  try {
136
220
  // Support both <hh:HEAD> and <hh:head>
137
- const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
138
- const info = root?.['hh:DOCSUMMARY']?.[0] ?? root?.DOCSUMMARY?.[0];
221
+ const root =
222
+ headObj?.["hh:head"]?.[0] ??
223
+ headObj?.["hh:HEAD"]?.[0] ??
224
+ headObj?.HEAD?.[0] ??
225
+ headObj;
226
+ const info = root?.["hh:DOCSUMMARY"]?.[0] ?? root?.DOCSUMMARY?.[0];
139
227
  if (!info) return {};
140
- const a = (k: string) => info?.[`hh:${k}`]?.[0]?._text ?? info?.[k]?.[0]?._text ?? '';
141
- return { title: a('TITLE') || undefined, author: a('AUTHOR') || undefined, subject: a('SUBJECT') || undefined };
142
- } catch { return {}; }
228
+ const a = (k: string) =>
229
+ info?.[`hh:${k}`]?.[0]?._text ?? info?.[k]?.[0]?._text ?? "";
230
+ return {
231
+ title: a("TITLE") || undefined,
232
+ author: a("AUTHOR") || undefined,
233
+ subject: a("SUBJECT") || undefined,
234
+ };
235
+ } catch {
236
+ return {};
237
+ }
143
238
  }
144
239
 
145
240
  function extractDims(headObj: any): PageDims | null {
146
241
  try {
147
- const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
148
- const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
242
+ const root =
243
+ headObj?.["hh:head"]?.[0] ??
244
+ headObj?.["hh:HEAD"]?.[0] ??
245
+ headObj?.HEAD?.[0] ??
246
+ headObj;
247
+ const refList =
248
+ root?.["hh:refList"]?.[0] ??
249
+ root?.["hh:REFLIST"]?.[0] ??
250
+ root?.REFLIST?.[0];
149
251
  if (!refList) return null;
150
252
 
151
- const secPrList = refList?.['hh:SECPRLST']?.[0]?.['hh:SECPR']
152
- ?? refList?.SECPRLST?.[0]?.SECPR;
253
+ const secPrList =
254
+ refList?.["hh:SECPRLST"]?.[0]?.["hh:SECPR"] ??
255
+ refList?.SECPRLST?.[0]?.SECPR;
153
256
  const sec = Array.isArray(secPrList) ? secPrList[0] : secPrList;
154
257
  if (!sec) return null;
155
258
 
156
- const pa = sec?.['hh:PAGEPROPERTY']?.[0]?._attr ?? sec?.PAGEPROPERTY?.[0]?._attr;
259
+ const pa =
260
+ sec?.["hh:PAGEPROPERTY"]?.[0]?._attr ?? sec?.PAGEPROPERTY?.[0]?._attr;
157
261
  if (!pa) return null;
158
262
 
263
+ const ew = Number(pa.Width ?? 59528);
264
+ const eh = Number(pa.Height ?? 84188);
159
265
  return {
160
- wPt: Metric.hwpToPt(Number(pa.Width ?? 59528)),
161
- hPt: Metric.hwpToPt(Number(pa.Height ?? 84188)),
162
- mt: Metric.hwpToPt(Number(pa.TopMargin ?? 5670)),
163
- mb: Metric.hwpToPt(Number(pa.BottomMargin ?? 4252)),
164
- ml: Metric.hwpToPt(Number(pa.LeftMargin ?? 8504)),
165
- mr: Metric.hwpToPt(Number(pa.RightMargin ?? 8504)),
166
- orient: Number(pa.Landscape) === 1 ? 'landscape' : 'portrait',
266
+ wPt: Metric.hwpToPt(ew),
267
+ hPt: Metric.hwpToPt(eh),
268
+ mt: Metric.hwpToPt(Number(pa.TopMargin ?? 5670)),
269
+ mb: Metric.hwpToPt(Number(pa.BottomMargin ?? 4252)),
270
+ ml: Metric.hwpToPt(Number(pa.LeftMargin ?? 8504)),
271
+ mr: Metric.hwpToPt(Number(pa.RightMargin ?? 8504)),
272
+ orient: ew > eh ? "landscape" : "portrait",
167
273
  };
168
- } catch { return null; }
274
+ } catch {
275
+ return null;
276
+ }
169
277
  }
170
278
 
171
279
  function extractBorderFills(headObj: any): Map<number, BorderFillInfo> {
172
280
  const map = new Map<number, BorderFillInfo>();
173
281
  try {
174
- const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
175
- const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
282
+ const root =
283
+ headObj?.["hh:head"]?.[0] ??
284
+ headObj?.["hh:HEAD"]?.[0] ??
285
+ headObj?.HEAD?.[0] ??
286
+ headObj;
287
+ const refList =
288
+ root?.["hh:refList"]?.[0] ??
289
+ root?.["hh:REFLIST"]?.[0] ??
290
+ root?.REFLIST?.[0];
176
291
  if (!refList) return map;
177
292
 
178
- const bfList = refList?.['hh:borderFills']?.[0] ?? refList?.['hh:BORDERFILLLIST']?.[0] ?? refList?.BORDERFILLLIST?.[0];
293
+ const bfList =
294
+ refList?.["hh:borderFills"]?.[0] ??
295
+ refList?.["hh:BORDERFILLLIST"]?.[0] ??
296
+ refList?.BORDERFILLLIST?.[0];
179
297
  if (!bfList) return map;
180
298
 
181
- const bfs = getTag(bfList, 'hh:borderFill', 'hh:BORDERFILL');
299
+ const bfs = getTag(bfList, "hh:borderFill", "hh:BORDERFILL");
182
300
  for (const bf of bfs) {
183
301
  const attr = bf?._attr ?? {};
184
302
  const id = Number(attr.id ?? 0);
@@ -186,41 +304,119 @@ function extractBorderFills(headObj: any): Map<number, BorderFillInfo> {
186
304
 
187
305
  const info: BorderFillInfo = {};
188
306
 
189
- // Parse border (take top as representative)
190
- const top = bf?.['hh:topBorder']?.[0]?._attr ?? bf?.['hh:top']?.[0]?._attr ?? bf?.top?.[0]?._attr;
191
- if (top) {
192
- // width is in mm (e.g. "0.18 mm"), convert mm → pt (1mm ≈ 2.835pt), then pt → hwp (*100) for safeStrokeHwpx
193
- const mmVal = parseFloat(top.width) || undefined;
307
+ // Helper: parse a border element into a Stroke
308
+ const parseBorderEl = (el: any): Stroke | undefined => {
309
+ if (!el) return undefined;
310
+ const a = el?._attr ?? {};
311
+ const mmVal = parseFloat(a.width) || undefined;
194
312
  const hwpVal = mmVal != null ? mmVal * 2.835 * 100 : undefined;
195
- info.stroke = safeStrokeHwpx(top.type, hwpVal, top.color);
196
- }
313
+ return safeStrokeHwpx(a.type, hwpVal, a.color);
314
+ };
315
+
316
+ // Parse all four sides
317
+ const topEl =
318
+ bf?.["hh:topBorder"]?.[0] ?? bf?.["hh:top"]?.[0] ?? bf?.top?.[0];
319
+ const rightEl =
320
+ bf?.["hh:rightBorder"]?.[0] ?? bf?.["hh:right"]?.[0] ?? bf?.right?.[0];
321
+ const bottomEl =
322
+ bf?.["hh:bottomBorder"]?.[0] ??
323
+ bf?.["hh:bottom"]?.[0] ??
324
+ bf?.bottom?.[0];
325
+ const leftEl =
326
+ bf?.["hh:leftBorder"]?.[0] ?? bf?.["hh:left"]?.[0] ?? bf?.left?.[0];
327
+
328
+ info.top = parseBorderEl(topEl);
329
+ info.right = parseBorderEl(rightEl);
330
+ info.bottom = parseBorderEl(bottomEl);
331
+ info.left = parseBorderEl(leftEl);
332
+
333
+ // Set uniform stroke fallback = top border (for defaultStroke etc.)
334
+ info.stroke = info.top ?? info.left ?? info.right ?? info.bottom;
197
335
 
198
336
  // Parse fill (real HWPX uses hc:fillBrush, not hh:fillBrush)
199
- const fillBrush = bf?.['hc:fillBrush']?.[0] ?? bf?.['hh:fillBrush']?.[0] ?? bf?.['hh:fill']?.[0] ?? bf?.fill?.[0] ?? bf?.fillBrush?.[0];
337
+ const fillBrush =
338
+ bf?.["hc:fillBrush"]?.[0] ??
339
+ bf?.["hh:fillBrush"]?.[0] ??
340
+ bf?.["hh:fill"]?.[0] ??
341
+ bf?.fill?.[0] ??
342
+ bf?.fillBrush?.[0];
200
343
  if (fillBrush) {
201
- const winBrush = fillBrush?.['hc:winBrush']?.[0]?._attr ?? fillBrush?.['hh:winBrush']?.[0]?._attr ?? fillBrush?.winBrush?.[0]?._attr;
202
- if (winBrush?.faceColor && winBrush.faceColor !== 'none') {
344
+ const winBrush =
345
+ fillBrush?.["hc:winBrush"]?.[0]?._attr ??
346
+ fillBrush?.["hh:winBrush"]?.[0]?._attr ??
347
+ fillBrush?.winBrush?.[0]?._attr;
348
+ if (winBrush?.faceColor && winBrush.faceColor !== "none") {
203
349
  info.bgColor = safeHex(winBrush.faceColor);
204
350
  }
205
351
  }
206
352
 
207
353
  map.set(id, info);
208
354
  }
209
- } catch { /* non-fatal */ }
355
+ } catch {
356
+ /* non-fatal */
357
+ }
210
358
  return map;
211
359
  }
212
360
 
361
+ function buildFontIdMap(headObj: any): Map<number, string> {
362
+ const fontMap = new Map<number, string>();
363
+ try {
364
+ const root =
365
+ headObj?.["hh:head"]?.[0] ??
366
+ headObj?.["hh:HEAD"]?.[0] ??
367
+ headObj?.HEAD?.[0] ??
368
+ headObj;
369
+ const refList =
370
+ root?.["hh:refList"]?.[0] ??
371
+ root?.["hh:REFLIST"]?.[0] ??
372
+ root?.REFLIST?.[0];
373
+ if (!refList) return fontMap;
374
+
375
+ const fontfaces =
376
+ refList?.["hh:fontfaces"]?.[0] ?? refList?.["hh:FONTFACES"]?.[0];
377
+ if (!fontfaces) return fontMap;
378
+
379
+ // Try each fontface group (HANGUL, LATIN, etc.) — use the first group that has entries
380
+ const ffGroups = getTag(fontfaces, "hh:fontface", "hh:FONTFACE");
381
+ for (const ff of ffGroups) {
382
+ const fonts = getTag(ff, "hh:font", "hh:FONT");
383
+ for (const font of fonts) {
384
+ const fa = font?._attr ?? {};
385
+ const fid = Number(fa.id ?? -1);
386
+ const name = fa.face ?? fa.name ?? fa.Face ?? "";
387
+ if (fid >= 0 && name && !fontMap.has(fid)) fontMap.set(fid, name);
388
+ }
389
+ if (fontMap.size > 0) break; // use first group (usually HANGUL)
390
+ }
391
+ } catch {
392
+ /* non-fatal */
393
+ }
394
+ return fontMap;
395
+ }
396
+
213
397
  function extractCharPrs(headObj: any): Map<number, CharPrInfo> {
214
398
  const map = new Map<number, CharPrInfo>();
215
399
  try {
216
- const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
217
- const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
400
+ const root =
401
+ headObj?.["hh:head"]?.[0] ??
402
+ headObj?.["hh:HEAD"]?.[0] ??
403
+ headObj?.HEAD?.[0] ??
404
+ headObj;
405
+ const refList =
406
+ root?.["hh:refList"]?.[0] ??
407
+ root?.["hh:REFLIST"]?.[0] ??
408
+ root?.REFLIST?.[0];
218
409
  if (!refList) return map;
219
410
 
220
- const cpList = refList?.['hh:charProperties']?.[0] ?? refList?.['hh:CHARPROPERTIES']?.[0];
411
+ // Build font id name map from fontfaces
412
+ const fontIdMap = buildFontIdMap(headObj);
413
+
414
+ const cpList =
415
+ refList?.["hh:charProperties"]?.[0] ??
416
+ refList?.["hh:CHARPROPERTIES"]?.[0];
221
417
  if (!cpList) return map;
222
418
 
223
- const cps = getTag(cpList, 'hh:charPr', 'hh:CHARPR');
419
+ const cps = getTag(cpList, "hh:charPr", "hh:CHARPR");
224
420
  for (const cp of cps) {
225
421
  const attr = cp?._attr ?? {};
226
422
  const id = Number(attr.id ?? -1);
@@ -235,85 +431,137 @@ function extractCharPrs(headObj: any): Map<number, CharPrInfo> {
235
431
  if (attr.textColor) info.color = safeHex(attr.textColor);
236
432
 
237
433
  // bold
238
- if (cp?.['hh:bold']?.[0] != null) info.b = true;
434
+ if (cp?.["hh:bold"]?.[0] != null) info.b = true;
239
435
 
240
436
  // italic
241
- if (cp?.['hh:italic']?.[0] != null) info.i = true;
437
+ if (cp?.["hh:italic"]?.[0] != null) info.i = true;
242
438
 
243
439
  // underline
244
- const ulAttr = cp?.['hh:underline']?.[0]?._attr;
245
- if (ulAttr?.type && ulAttr.type !== 'NONE') info.u = true;
440
+ const ulAttr = cp?.["hh:underline"]?.[0]?._attr;
441
+ if (ulAttr?.type && ulAttr.type !== "NONE") info.u = true;
246
442
 
247
443
  // strikeout — shape="3D" is default "no strikeout" in real HWPX; only SOLID/etc means active
248
- const stAttr = cp?.['hh:strikeout']?.[0]?._attr;
249
- if (stAttr?.shape && stAttr.shape !== 'NONE' && stAttr.shape !== '3D') info.s = true;
250
-
251
- // font — from fontRef + fontface
252
- // (simplified: just store what we find)
444
+ const stAttr = cp?.["hh:strikeout"]?.[0]?._attr;
445
+ if (stAttr?.shape && stAttr.shape !== "NONE" && stAttr.shape !== "3D")
446
+ info.s = true;
447
+
448
+ // font name resolve from fontRef.hangul → fontfaces
449
+ const fontRefAttr =
450
+ cp?.["hh:fontRef"]?.[0]?._attr ?? cp?.["hh:FONTREF"]?.[0]?._attr;
451
+ if (fontRefAttr) {
452
+ const fid = Number(
453
+ fontRefAttr.hangul ?? fontRefAttr.latin ?? fontRefAttr.Hangul ?? 0,
454
+ );
455
+ const name = fontIdMap.get(fid);
456
+ if (name) info.font = safeFont(name);
457
+ }
253
458
 
254
459
  map.set(id, info);
255
460
  }
256
- } catch { /* non-fatal */ }
461
+ } catch {
462
+ /* non-fatal */
463
+ }
257
464
  return map;
258
465
  }
259
466
 
260
467
  function extractParaPrs(headObj: any): Map<number, ParaPrInfo> {
261
468
  const map = new Map<number, ParaPrInfo>();
262
469
  try {
263
- const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
264
- const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
470
+ const root =
471
+ headObj?.["hh:head"]?.[0] ??
472
+ headObj?.["hh:HEAD"]?.[0] ??
473
+ headObj?.HEAD?.[0] ??
474
+ headObj;
475
+ const refList =
476
+ root?.["hh:refList"]?.[0] ??
477
+ root?.["hh:REFLIST"]?.[0] ??
478
+ root?.REFLIST?.[0];
265
479
  if (!refList) return map;
266
480
 
267
- const ppList = refList?.['hh:paraProperties']?.[0] ?? refList?.['hh:PARAPROPERTIES']?.[0];
481
+ const ppList =
482
+ refList?.["hh:paraProperties"]?.[0] ??
483
+ refList?.["hh:PARAPROPERTIES"]?.[0];
268
484
  if (!ppList) return map;
269
485
 
270
- const pps = getTag(ppList, 'hh:paraPr', 'hh:PARAPR');
486
+ const pps = getTag(ppList, "hh:paraPr", "hh:PARAPR");
271
487
  for (const pp of pps) {
272
488
  const attr = pp?._attr ?? {};
273
489
  const id = Number(attr.id ?? -1);
274
490
  if (id < 0) continue;
275
491
 
276
- const alignNode = pp?.['hh:align']?.[0]?._attr ?? pp?.['hh:ALIGN']?.[0]?._attr;
492
+ const alignNode =
493
+ pp?.["hh:align"]?.[0]?._attr ?? pp?.["hh:ALIGN"]?.[0]?._attr;
277
494
  const align = alignNode?.horizontal ?? alignNode?.Horizontal;
278
495
 
279
496
  // Read margin and lineSpacing from direct child OR hp:switch > hp:default/hp:case
280
- let marginEl = pp?.['hh:margin']?.[0] ?? null;
281
- let lineSpEl = pp?.['hh:lineSpacing']?.[0] ?? null;
497
+ let marginEl = pp?.["hh:margin"]?.[0] ?? null;
498
+ let lineSpEl = pp?.["hh:lineSpacing"]?.[0] ?? null;
282
499
  if (!marginEl) {
283
- const sw = pp?.['hp:switch']?.[0];
284
- const container = sw?.['hp:default']?.[0] ?? sw?.['hp:case']?.[0];
285
- marginEl = container?.['hh:margin']?.[0] ?? null;
286
- lineSpEl = lineSpEl ?? container?.['hh:lineSpacing']?.[0] ?? null;
500
+ const sw = pp?.["hp:switch"]?.[0];
501
+ const container = sw?.["hp:default"]?.[0] ?? sw?.["hp:case"]?.[0];
502
+ marginEl = container?.["hh:margin"]?.[0] ?? null;
503
+ lineSpEl = lineSpEl ?? container?.["hh:lineSpacing"]?.[0] ?? null;
287
504
  }
288
505
 
289
506
  let indentPt: number | undefined;
507
+ let indentRightPt: number | undefined;
508
+ let firstLineIndentPt: number | undefined;
290
509
  let spaceBefore: number | undefined;
291
510
  let spaceAfter: number | undefined;
292
511
  let lineHeight: number | undefined;
512
+ let lineHeightFixed: number | undefined;
293
513
 
294
514
  if (marginEl) {
295
- // Handle both hc:intent (our encoder) and hc:indent (Hancom standard)
296
- const intentEl = marginEl?.['hc:intent']?.[0] ?? marginEl?.['hc:indent']?.[0];
297
- const prevEl = marginEl?.['hc:prev']?.[0];
298
- const nextEl = marginEl?.['hc:next']?.[0];
299
- const intentVal = Number(intentEl?._attr?.value ?? 0);
300
- const prevVal = Number(prevEl?._attr?.value ?? 0);
301
- const nextVal = Number(nextEl?._attr?.value ?? 0);
302
- if (intentVal !== 0) indentPt = Metric.hwpToPt(intentVal);
303
- if (prevVal > 0) spaceBefore = Metric.hwpToPt(prevVal);
304
- if (nextVal > 0) spaceAfter = Metric.hwpToPt(nextVal);
515
+ // OWPML §7.5.4.4: hc:left=전체왼쪽여백, hc:right=전체오른쪽여백,
516
+ // hc:indent=첫줄들여쓰기(양수)/내어쓰기(음수)
517
+ // hc:intent는 자사 인코더가 생성하는 오기 표기로, hc:indent와 동일하게 처리
518
+ const leftEl = marginEl?.["hc:left"]?.[0];
519
+ const rightEl = marginEl?.["hc:right"]?.[0];
520
+ const indentEl =
521
+ marginEl?.["hc:intent"]?.[0] ?? marginEl?.["hc:indent"]?.[0];
522
+ const prevEl = marginEl?.["hc:prev"]?.[0];
523
+ const nextEl = marginEl?.["hc:next"]?.[0];
524
+
525
+ const leftVal = Number(leftEl?._attr?.value ?? 0);
526
+ const rightVal = Number(rightEl?._attr?.value ?? 0);
527
+ const indentVal = Number(indentEl?._attr?.value ?? 0);
528
+ const prevVal = Number(prevEl?._attr?.value ?? 0);
529
+ const nextVal = Number(nextEl?._attr?.value ?? 0);
530
+
531
+ if (leftVal !== 0) indentPt = Metric.hwpToPt(leftVal);
532
+ if (rightVal !== 0) indentRightPt = Metric.hwpToPt(rightVal);
533
+ if (indentVal !== 0) firstLineIndentPt = Metric.hwpToPt(indentVal);
534
+ if (prevVal > 0) spaceBefore = Metric.hwpToPt(prevVal);
535
+ if (nextVal > 0) spaceAfter = Metric.hwpToPt(nextVal);
305
536
  }
306
537
 
307
538
  if (lineSpEl) {
308
539
  const lsAttr = lineSpEl._attr ?? {};
309
- const lsType = lsAttr.type ?? 'PERCENT';
310
- const lsVal = Number(lsAttr.value ?? 160);
311
- if (lsType === 'PERCENT' && lsVal > 0) lineHeight = lsVal / 100;
540
+ const lsType = lsAttr.type ?? "PERCENT";
541
+ const lsVal = Number(lsAttr.value ?? 160);
542
+ // OWPML §7.5.4.6: PERCENT(비율), FIXED(고정), BETWEEN_LINE(줄간격), AT_LEAST(최소)
543
+ if (lsType === "PERCENT" && lsVal > 0 && lsVal !== 160) {
544
+ lineHeight = lsVal / 100;
545
+ } else if (lsType === "FIXED" && lsVal > 0) {
546
+ // FIXED: 값이 HWPUNIT 단위의 고정 줄 높이
547
+ lineHeightFixed = Metric.hwpToPt(lsVal);
548
+ }
312
549
  }
313
550
 
314
- map.set(id, { align, indentPt, spaceBefore, spaceAfter, lineHeight });
551
+ map.set(id, {
552
+ align,
553
+ indentPt,
554
+ indentRightPt,
555
+ firstLineIndentPt,
556
+ spaceBefore,
557
+ spaceAfter,
558
+ lineHeight,
559
+ lineHeightFixed,
560
+ });
315
561
  }
316
- } catch { /* non-fatal */ }
562
+ } catch {
563
+ /* non-fatal */
564
+ }
317
565
  return map;
318
566
  }
319
567
 
@@ -321,120 +569,154 @@ function extractParaPrs(headObj: any): Map<number, ParaPrInfo> {
321
569
 
322
570
  function addParaItems(p: any, items: { type: string; node: any }[]): void {
323
571
  // Check if this paragraph contains a table in its runs
324
- const runs = getTag(p, 'hp:run', 'hp:RUN');
572
+ const runs = getTag(p, "hp:run", "hp:RUN");
325
573
  let hasTable = false;
326
574
  for (const run of runs) {
327
- const tbls = getTag(run, 'hp:tbl', 'hp:TABLE');
328
- for (const tbl of tbls) {
329
- items.push({ type: 'table', node: tbl });
575
+ const tbls = getTag(run, "hp:tbl", "hp:TABLE");
576
+ if (tbls.length > 0) {
577
+ for (const tbl of tbls) {
578
+ items.push({ type: "table", node: tbl });
579
+ }
330
580
  hasTable = true;
331
581
  }
332
582
  }
333
- // Also add as paragraph unless it's just a table container
334
- const hasText = runs.some((run: any) => {
335
- const ts = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
336
- return ts.some((t: any) => {
337
- const text = typeof t === 'string' ? t : t?._text ?? '';
338
- return text.trim().length > 0;
339
- });
340
- });
341
- if (hasText || !hasTable) {
342
- items.push({ type: 'para', node: p });
583
+ // 테이블을 포함한 단락은 일반 단락으로 다시 추가하지 않음 (중복 방지)
584
+ if (!hasTable) {
585
+ items.push({ type: "para", node: p });
343
586
  }
344
587
  }
345
588
 
346
589
  function decodeSection(sec: any, dims: PageDims, ctx: DecCtx) {
347
590
  // Try to extract dims from first paragraph's secPr
348
- const firstParas = getTag(sec, 'hp:p', 'hp:P');
591
+ const firstParas = getTag(sec, "hp:p", "hp:P");
349
592
  const pageDims = extractSecPrDims(firstParas[0]) ?? dims;
350
593
 
351
594
  // Build items list preserving document order via _childOrder
352
595
  const items: { type: string; node: any }[] = [];
353
- const paras = getTag(sec, 'hp:p', 'hp:P');
354
- const childOrder = sec?.['_childOrder'] as string[] | undefined;
596
+ const paras = getTag(sec, "hp:p", "hp:P");
597
+ const tbls = getTag(sec, "hp:tbl", "hp:TABLE");
598
+
599
+ const childOrder = sec?.["_childOrder"] as string[] | undefined;
355
600
 
356
601
  if (Array.isArray(childOrder)) {
357
602
  let pi = 0;
603
+ let ti = 0;
358
604
  for (const tag of childOrder) {
359
- if ((tag === 'hp:p' || tag === 'hp:P') && pi < paras.length) {
360
- const p = paras[pi++];
361
- addParaItems(p, items);
605
+ if ((tag === "hp:p" || tag === "hp:P") && pi < paras.length) {
606
+ addParaItems(paras[pi++], items);
607
+ } else if ((tag === "hp:tbl" || tag === "hp:TABLE") && ti < tbls.length) {
608
+ items.push({ type: "table", node: tbls[ti++] });
362
609
  }
363
610
  }
364
- // Append any remaining
611
+ // Append any remaining (fallback)
365
612
  while (pi < paras.length) addParaItems(paras[pi++], items);
613
+ while (ti < tbls.length) items.push({ type: "table", node: tbls[ti++] });
366
614
  } else {
367
- // No order info — process paragraphs sequentially
615
+ // No order info — process paragraphs sequentially (fallback to previous logic)
368
616
  for (const p of paras) addParaItems(p, items);
617
+ // Note: direct tables are appended after paras in this fallback
618
+ for (const t of tbls) items.push({ type: "table", node: t });
369
619
  }
370
620
 
371
621
  const kids: ContentNode[] = ctx.shield.guardAll(
372
622
  items,
373
623
  (item: any) => {
374
- if (item.type === 'table') {
375
- const { value } = ctx.shield.guardGrid(
376
- item.node,
377
- (n) => decodeGrid(n, ctx),
378
- (n) => decodeGridSimple(n, ctx),
379
- (n) => decodeGridFlat(n),
380
- (n) => decodeGridText(n) as unknown as GridNode,
381
- 'hwpx:table',
382
- );
383
- return value;
624
+ if (item.type === "table") {
625
+ try {
626
+ const { value } = ctx.shield.guardGrid(
627
+ item.node,
628
+ (n) => decodeGrid(n, ctx),
629
+ (n) => decodeGridSimple(n, ctx),
630
+ (n) => decodeGridFlat(n),
631
+ (n) => decodeGridText(n) as unknown as GridNode,
632
+ "hwpx:table",
633
+ );
634
+ return value;
635
+ } catch {
636
+ return buildPara([buildSpan("[표 파싱 실패]")]);
637
+ }
384
638
  }
385
639
  return decodePara(item.node, ctx);
386
640
  },
387
- () => buildPara([buildSpan('[파싱 실패]')]),
388
- 'hwpx:content',
641
+ () => buildPara([buildSpan("[파싱 실패]")]),
642
+ "hwpx:content",
389
643
  );
390
644
 
391
645
  // Decode header/footer
392
- const headerParas = decodeHeaderFooter(sec, 'header', ctx);
393
- const footerParas = decodeHeaderFooter(sec, 'footer', ctx);
646
+ const headerParas = decodeHeaderFooter(sec, "header", ctx);
647
+ const footerParas = decodeHeaderFooter(sec, "footer", ctx);
394
648
 
395
- return buildSheet(
396
- kids.filter(Boolean) as ContentNode[],
397
- pageDims,
398
- { header: headerParas, footer: footerParas },
399
- );
649
+ return buildSheet(kids.filter(Boolean) as ContentNode[], pageDims, {
650
+ headers: { default: headerParas },
651
+ footers: { default: footerParas },
652
+ });
653
+ }
654
+
655
+ function parseSecPrDims(secPr: any): PageDims | null {
656
+ const pagePr =
657
+ secPr?.["hp:pagePr"]?.[0]?._attr ?? secPr?.["hp:PAGEPR"]?.[0]?._attr;
658
+ if (!pagePr) return null;
659
+ const margin =
660
+ secPr?.["hp:pagePr"]?.[0]?.["hp:margin"]?.[0]?._attr ??
661
+ secPr?.["hp:PAGEPR"]?.[0]?.["hp:MARGIN"]?.[0]?._attr ??
662
+ {};
663
+ const pw = Number(pagePr.width ?? 59528);
664
+ const ph = Number(pagePr.height ?? 84188);
665
+ return {
666
+ wPt: Metric.hwpToPt(pw),
667
+ hPt: Metric.hwpToPt(ph),
668
+ mt: Metric.hwpToPt(Number(margin.top ?? 5670)),
669
+ mb: Metric.hwpToPt(Number(margin.bottom ?? 4252)),
670
+ ml: Metric.hwpToPt(Number(margin.left ?? 8504)),
671
+ mr: Metric.hwpToPt(Number(margin.right ?? 8504)),
672
+ orient: pw > ph ? "landscape" : "portrait",
673
+ };
400
674
  }
401
675
 
402
676
  function extractSecPrDims(p: any): PageDims | null {
403
677
  if (!p) return null;
404
678
  try {
405
- const runs = getTag(p, 'hp:run', 'hp:RUN');
679
+ // Primary: hp:secPr is a DIRECT child of hp:p (as generated by HwpxEncoder)
680
+ const secPrDirect = p?.["hp:secPr"]?.[0] ?? p?.["hp:SECPR"]?.[0];
681
+ if (secPrDirect) {
682
+ const dims = parseSecPrDims(secPrDirect);
683
+ if (dims) return dims;
684
+ }
685
+ // Fallback: legacy format may nest hp:secPr inside hp:run
686
+ const runs = getTag(p, "hp:run", "hp:RUN");
406
687
  for (const run of runs) {
407
- const secPr = run?.['hp:secPr']?.[0] ?? run?.['hp:SECPR']?.[0];
688
+ const secPr = run?.["hp:secPr"]?.[0] ?? run?.["hp:SECPR"]?.[0];
408
689
  if (!secPr) continue;
409
- const pagePr = secPr?.['hp:pagePr']?.[0]?._attr ?? secPr?.['hp:PAGEPR']?.[0]?._attr;
410
- if (!pagePr) continue;
411
- const margin = secPr?.['hp:pagePr']?.[0]?.['hp:margin']?.[0]?._attr
412
- ?? secPr?.['hp:PAGEPR']?.[0]?.['hp:MARGIN']?.[0]?._attr ?? {};
413
- return {
414
- wPt: Metric.hwpToPt(Number(pagePr.width ?? 59528)),
415
- hPt: Metric.hwpToPt(Number(pagePr.height ?? 84188)),
416
- mt: Metric.hwpToPt(Number(margin.top ?? 5670)),
417
- mb: Metric.hwpToPt(Number(margin.bottom ?? 4252)),
418
- ml: Metric.hwpToPt(Number(margin.left ?? 8504)),
419
- mr: Metric.hwpToPt(Number(margin.right ?? 8504)),
420
- orient: pagePr.landscape === 'NARROWLY' ? 'landscape' : 'portrait',
421
- };
690
+ const dims = parseSecPrDims(secPr);
691
+ if (dims) return dims;
422
692
  }
423
- } catch { /* ignore */ }
693
+ } catch {
694
+ /* ignore */
695
+ }
424
696
  return null;
425
697
  }
426
698
 
427
- function decodeHeaderFooter(sec: any, kind: 'header' | 'footer', ctx: DecCtx): ParaNode[] | undefined {
699
+ function decodeHeaderFooter(
700
+ sec: any,
701
+ kind: "header" | "footer",
702
+ ctx: DecCtx,
703
+ ): ParaNode[] | undefined {
428
704
  try {
429
- const hf = sec?.['hp:headerFooter']?.[0] ?? sec?.['hp:HEADERFOOTER']?.[0]
430
- ?? sec?.headerFooter?.[0] ?? sec?.HEADERFOOTER?.[0];
705
+ const hf =
706
+ sec?.["hp:headerFooter"]?.[0] ??
707
+ sec?.["hp:HEADERFOOTER"]?.[0] ??
708
+ sec?.headerFooter?.[0] ??
709
+ sec?.HEADERFOOTER?.[0];
431
710
  if (!hf) return undefined;
432
711
 
433
- const part = hf?.['hp:' + kind]?.[0] ?? hf?.['hp:' + kind.toUpperCase()]?.[0]
434
- ?? hf?.[kind]?.[0] ?? hf?.[kind.toUpperCase()]?.[0];
712
+ const part =
713
+ hf?.["hp:" + kind]?.[0] ??
714
+ hf?.["hp:" + kind.toUpperCase()]?.[0] ??
715
+ hf?.[kind]?.[0] ??
716
+ hf?.[kind.toUpperCase()]?.[0];
435
717
  if (!part) return undefined;
436
718
 
437
- const paras = getTag(part, 'hp:p', 'hp:P');
719
+ const paras = getTag(part, "hp:p", "hp:P");
438
720
  if (paras.length === 0) return undefined;
439
721
 
440
722
  return paras.map((p: any) => decodePara(p, ctx));
@@ -455,10 +737,13 @@ function decodePara(p: any, ctx: DecCtx): ParaNode {
455
737
  if (paraPrDef?.align) align = paraPrDef.align;
456
738
 
457
739
  // Check inline PARAPR too
458
- const inlineParaPr = p?.['hp:PARAPR']?.[0] ?? p?.['hp:paraPr']?.[0] ?? p?.PARAPR?.[0];
740
+ const inlineParaPr =
741
+ p?.["hp:PARAPR"]?.[0] ?? p?.["hp:paraPr"]?.[0] ?? p?.PARAPR?.[0];
459
742
  if (inlineParaPr) {
460
- const alignNode = inlineParaPr?.['hp:ALIGN']?.[0]?._attr ?? inlineParaPr?.['hp:align']?.[0]?._attr
461
- ?? inlineParaPr?.ALIGN?.[0]?._attr;
743
+ const alignNode =
744
+ inlineParaPr?.["hp:ALIGN"]?.[0]?._attr ??
745
+ inlineParaPr?.["hp:align"]?.[0]?._attr ??
746
+ inlineParaPr?.ALIGN?.[0]?._attr;
462
747
  if (alignNode?.Type) align = alignNode.Type;
463
748
  if (alignNode?.horizontal) align = alignNode.horizontal;
464
749
  }
@@ -468,85 +753,147 @@ function decodePara(p: any, ctx: DecCtx): ParaNode {
468
753
 
469
754
  // Apply spacing/indent/lineHeight from paraPr definition
470
755
  if (paraPrDef) {
471
- if (paraPrDef.indentPt !== undefined) props.indentPt = paraPrDef.indentPt;
472
- if (paraPrDef.spaceBefore !== undefined) props.spaceBefore = paraPrDef.spaceBefore;
473
- if (paraPrDef.spaceAfter !== undefined) props.spaceAfter = paraPrDef.spaceAfter;
474
- if (paraPrDef.lineHeight !== undefined) props.lineHeight = paraPrDef.lineHeight;
756
+ if (paraPrDef.indentPt !== undefined) props.indentPt = paraPrDef.indentPt;
757
+ if (paraPrDef.indentRightPt !== undefined)
758
+ props.indentRightPt = paraPrDef.indentRightPt;
759
+ if (paraPrDef.firstLineIndentPt !== undefined)
760
+ props.firstLineIndentPt = paraPrDef.firstLineIndentPt;
761
+ if (paraPrDef.spaceBefore !== undefined)
762
+ props.spaceBefore = paraPrDef.spaceBefore;
763
+ if (paraPrDef.spaceAfter !== undefined)
764
+ props.spaceAfter = paraPrDef.spaceAfter;
765
+ if (paraPrDef.lineHeight !== undefined)
766
+ props.lineHeight = paraPrDef.lineHeight;
767
+ if (paraPrDef.lineHeightFixed !== undefined)
768
+ props.lineHeightFixed = paraPrDef.lineHeightFixed;
475
769
  }
476
770
 
477
771
  // List support (from inline attr)
478
772
  if (inlineAttr.listType) {
479
- props.listOrd = inlineAttr.listType === 'DIGIT' || inlineAttr.listType === 'DECIMAL';
773
+ props.listOrd =
774
+ inlineAttr.listType === "DIGIT" || inlineAttr.listType === "DECIMAL";
480
775
  props.listLv = Number(inlineAttr.listLevel ?? 0);
481
776
  }
482
777
 
483
- const runs = getTag(p, 'hp:run', 'hp:RUN');
778
+ const runs = getTag(p, "hp:run", "hp:RUN");
484
779
  const kids: (SpanNode | ImgNode)[] = [];
485
780
 
781
+ // Helper: collect hp:pic elements from a container (direct child OR inside hp:ctrl)
782
+ const collectPics = (container: any): any[] => {
783
+ const direct = getTag(container, "hp:pic", "hp:PIC");
784
+ const ctrls = getTag(container, "hp:ctrl", "hp:CTRL");
785
+ const nested = ctrls.flatMap((c: any) => getTag(c, "hp:pic", "hp:PIC"));
786
+ return [...direct, ...nested];
787
+ };
788
+
789
+ // Images that are direct children of <hp:p> (common in table cells and floats)
790
+ for (const pic of collectPics(p)) {
791
+ const img = decodePic(pic, ctx);
792
+ if (img) kids.push(img);
793
+ }
794
+
486
795
  for (const run of runs) {
487
- // Images inside run
488
- const pics = getTag(run, 'hp:pic', 'hp:PIC');
489
- for (const pic of pics) {
796
+ // Images: directly in run OR in run→ctrl (both patterns appear in practice)
797
+ for (const pic of collectPics(run)) {
490
798
  const img = decodePic(pic, ctx);
491
799
  if (img) kids.push(img);
492
800
  }
493
801
 
494
802
  // Page number
495
- const pageNums = getTag(run, 'hp:pageNum', 'hp:PAGENUM');
803
+ const pageNums = getTag(run, "hp:pageNum", "hp:PAGENUM");
496
804
  if (pageNums.length > 0) {
497
805
  const pn = pageNums[0]?._attr ?? {};
498
- const fmt = pn.formatType === 'ROMAN_LOWER' ? 'roman' as const
499
- : pn.formatType === 'ROMAN_UPPER' ? 'romanCaps' as const
500
- : 'decimal' as const;
501
- const pageNumNode: PageNumNode = { tag: 'pagenum', format: fmt };
806
+ const fmt =
807
+ pn.formatType === "ROMAN_LOWER"
808
+ ? ("roman" as const)
809
+ : pn.formatType === "ROMAN_UPPER"
810
+ ? ("romanCaps" as const)
811
+ : ("decimal" as const);
812
+ const pageNumNode: PageNumNode = { tag: "pagenum", format: fmt };
502
813
  const spanProps = resolveCharPr(run, ctx);
503
- kids.push({ tag: 'span', props: spanProps, kids: [pageNumNode] });
814
+ kids.push({ tag: "span", props: spanProps, kids: [pageNumNode] });
504
815
  continue;
505
816
  }
506
817
 
507
818
  // Text
508
- const textNodes = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
509
- const content = textNodes.map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join('');
510
-
511
- // Skip empty secPr-only runs
512
- if (content === '' && (run?.['hp:secPr']?.[0] || run?.['hp:SECPR']?.[0]) && pics.length === 0) continue;
819
+ const runPics = collectPics(run);
820
+ const textNodes = getTag(run, "hp:t", "hp:T", "hp:CHAR");
821
+ const content = textNodes
822
+ .map((t: any) => {
823
+ const val =
824
+ typeof t === "string" ? t : (t?._text ?? t?._ ?? t?.["#text"] ?? "");
825
+ return val.replace(/__EXT_\d+(?:_W\d+_H\d+)?__/g, "");
826
+ })
827
+ .join("");
828
+
829
+ // Skip empty secPr-only runs that produced no images
830
+ if (
831
+ content === "" &&
832
+ (run?.["hp:secPr"]?.[0] || run?.["hp:SECPR"]?.[0]) &&
833
+ runPics.length === 0 &&
834
+ pageNums.length === 0
835
+ )
836
+ continue;
513
837
 
514
- const spanProps = resolveCharPr(run, ctx);
515
- kids.push(buildSpan(content, spanProps));
838
+ // Only push text span when there's actual content and no image already pushed for this run
839
+ if (content !== "" || (runPics.length === 0 && pageNums.length === 0)) {
840
+ const spanProps = resolveCharPr(run, ctx);
841
+ kids.push(buildSpan(content, spanProps));
842
+ }
516
843
  }
517
844
 
518
845
  // pageBreak="1" → prepend a pb node in its own span
519
- if (pAttr.pageBreak === '1') {
520
- kids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
846
+ if (pAttr.pageBreak === "1") {
847
+ kids.unshift({ tag: "span", props: {}, kids: [buildPb()] });
521
848
  }
522
849
 
523
- return buildPara(kids.filter(Boolean) as ParaNode['kids'], props);
850
+ return buildPara(kids.filter(Boolean) as ParaNode["kids"], props);
524
851
  }
525
852
 
526
853
  function resolveCharPr(run: any, ctx: DecCtx): TextProps {
527
854
  const runAttr = run?._attr ?? {};
528
- const charPrIdRef = Number(runAttr.charPrIDRef ?? -1);
855
+ const charPrIdRef = Number(runAttr.charPrIDRef ?? runAttr.CharPrIDRef ?? -1);
529
856
 
530
- // Try IDRef first
857
+ // IDRef 먼저 조회
531
858
  const def = ctx.charPrs.get(charPrIdRef);
532
859
  if (def) {
533
860
  return {
534
- b: def.b, i: def.i, u: def.u, s: def.s,
535
- pt: def.pt, color: def.color, font: def.font, bg: def.bg,
861
+ b: def.b,
862
+ i: def.i,
863
+ u: def.u,
864
+ s: def.s,
865
+ pt: def.pt,
866
+ color: def.color,
867
+ font: def.font,
868
+ bg: def.bg,
536
869
  };
537
870
  }
538
871
 
539
- // Fallback: inline CHARPR
540
- const ca = run?.['hp:CHARPR']?.[0]?._attr ?? run?.['hp:charPr']?.[0]?._attr ?? run?.CHARPR?.[0]?._attr ?? {};
872
+ // 인라인 CHARPR fallback — 대소문자 모두 시도
873
+ const inlinePr =
874
+ run?.["hp:CHARPR"]?.[0] ??
875
+ run?.["hp:charPr"]?.[0] ??
876
+ run?.CHARPR?.[0] ??
877
+ run?.charPr?.[0];
878
+ const ca = inlinePr?._attr ?? {};
879
+
880
+ const bVal = ca.Bold ?? ca.bold ?? ca.B ?? "";
881
+ const iVal = ca.Italic ?? ca.italic ?? ca.I ?? "";
882
+ const uVal = ca.Underline ?? ca.underline ?? "";
883
+ const sVal = ca.Strikeout ?? ca.strikeout ?? "";
884
+ const fontName =
885
+ ca.FontName ?? ca.fontName ?? ca.FaceNameHangul ?? ca.faceNameHangul ?? "";
886
+ const heightVal = ca.Height ?? ca.height ?? "";
887
+
541
888
  return {
542
- b: ca.Bold === '1' || ca.Bold === 'true' || undefined,
543
- i: ca.Italic === '1' || ca.Italic === 'true' || undefined,
544
- u: ca.Underline ? ca.Underline !== 'NONE' : undefined,
545
- s: ca.Strikeout ? ca.Strikeout !== 'NONE' : undefined,
546
- font: safeFont(ca.FontName ?? ca.FaceNameHangul),
547
- pt: ca.Height ? Metric.hHeightToPt(Number(ca.Height)) : undefined,
548
- color: safeHex(ca.TextColor),
549
- bg: safeHex(ca.BgColor),
889
+ b: bVal === "1" || bVal === "true" || bVal === "True" || undefined,
890
+ i: iVal === "1" || iVal === "true" || iVal === "True" || undefined,
891
+ u: uVal && uVal !== "NONE" ? true : undefined,
892
+ s: sVal && sVal !== "NONE" && sVal !== "3D" ? true : undefined,
893
+ font: fontName ? safeFont(fontName) : undefined,
894
+ pt: heightVal ? Metric.hHeightToPt(Number(heightVal)) : undefined,
895
+ color: safeHex(ca.TextColor ?? ca.textColor),
896
+ bg: safeHex(ca.BgColor ?? ca.bgColor),
550
897
  };
551
898
  }
552
899
 
@@ -554,77 +901,112 @@ function resolveCharPr(run: any, ctx: DecCtx): TextProps {
554
901
 
555
902
  function decodePic(pic: any, ctx: DecCtx): ImgNode | null {
556
903
  try {
557
- const szAttr = pic?.['hp:sz']?.[0]?._attr ?? pic?.sz?.[0]?._attr ?? {};
904
+ const szAttr = pic?.["hp:sz"]?.[0]?._attr ?? pic?.sz?.[0]?._attr ?? {};
558
905
  const w = Metric.hwpToPt(Number(szAttr.width ?? 0));
559
906
  const h = Metric.hwpToPt(Number(szAttr.height ?? 0));
560
907
 
561
908
  // Try multiple tag patterns for image reference
562
- const imgNode = pic?.['hp:img']?.[0]?._attr ?? pic?.['hc:img']?.[0]?._attr
563
- ?? pic?.img?.[0]?._attr ?? {};
909
+ const imgNode =
910
+ pic?.["hp:img"]?.[0]?._attr ??
911
+ pic?.["hc:img"]?.[0]?._attr ??
912
+ pic?.img?.[0]?._attr ??
913
+ {};
564
914
  const binRef = imgNode.binaryItemIDRef ?? imgNode.BinaryItemIDRef;
565
915
  if (!binRef) return null;
566
916
 
567
917
  // Find binary data
568
918
  let imgData: Uint8Array | undefined;
569
919
  for (const [key, val] of ctx.files) {
570
- if (key.includes(binRef) || key.toLowerCase().includes(binRef.toLowerCase())) {
920
+ if (
921
+ key.includes(binRef) ||
922
+ key.toLowerCase().includes(binRef.toLowerCase())
923
+ ) {
571
924
  imgData = val;
572
925
  break;
573
926
  }
574
927
  }
575
928
  if (!imgData) return null;
576
929
 
577
- const ext = binRef.split('.').pop()?.toLowerCase() ?? 'png';
578
- const mimeMap: Record<string, ImgNode['mime']> = {
579
- png: 'image/png', jpg: 'image/jpeg', jpeg: 'image/jpeg',
580
- gif: 'image/gif', bmp: 'image/bmp',
930
+ const ext = binRef.split(".").pop()?.toLowerCase() ?? "png";
931
+ const mimeMap: Record<string, ImgNode["mime"]> = {
932
+ png: "image/png",
933
+ jpg: "image/jpeg",
934
+ jpeg: "image/jpeg",
935
+ gif: "image/gif",
936
+ bmp: "image/bmp",
581
937
  };
582
938
 
583
939
  // ── hp:pos에서 layout 추출 ───────────────────────────────
584
- const posAttr = pic?.['hp:pos']?.[0]?._attr ?? pic?.pos?.[0]?._attr ?? {};
940
+ const posAttr = pic?.["hp:pos"]?.[0]?._attr ?? pic?.pos?.[0]?._attr ?? {};
585
941
  const layout = extractHwpxLayout(posAttr, pic);
586
942
 
587
- return buildImg(TextKit.base64Encode(imgData), mimeMap[ext] ?? 'image/png', w, h, undefined, layout);
943
+ return buildImg(
944
+ TextKit.base64Encode(imgData),
945
+ mimeMap[ext] ?? "image/png",
946
+ w,
947
+ h,
948
+ undefined,
949
+ layout,
950
+ );
588
951
  } catch {
589
952
  return null;
590
953
  }
591
954
  }
592
955
 
593
956
  function extractHwpxLayout(posAttr: any, pic: any): ImgLayout {
594
- const treatAsChar = posAttr.treatAsChar === '1' || posAttr.treatAsChar === 'true';
595
- if (treatAsChar) return { wrap: 'inline' };
596
-
597
- // textWrap → wrap
598
- const textWrap: string = (pic?._attr?.textWrap ?? pic?.['hp:pic']?.[0]?._attr?.textWrap ?? 'TOP_AND_BOTTOM');
957
+ const treatAsChar =
958
+ posAttr.treatAsChar === "1" || posAttr.treatAsChar === "true";
959
+ if (treatAsChar) return { wrap: "inline" };
960
+
961
+ // textWrap wrap (direct attribute of hp:pic element)
962
+ const textWrap: string =
963
+ pic?._attr?.textWrap ?? pic?.pic?.[0]?._attr?.textWrap ?? "TOP_AND_BOTTOM";
964
+ // OWPML §7.5.8.1 textWrap → ImgWrap 매핑
965
+ // TOP_AND_BOTTOM: 텍스트가 이미지 위아래로만 흐름 → DOCX wrapTopAndBottom (float anchor)
599
966
  const wrapMap: Record<string, ImgWrap> = {
600
- TOP_AND_BOTTOM: 'square',
601
- SQUARE: 'square',
602
- BOTH_SIDES: 'tight',
603
- LEFT: 'tight',
604
- RIGHT: 'tight',
605
- LARGER_ONLY: 'tight',
606
- SMALLER_ONLY: 'tight',
607
- LARGEST_ONLY: 'tight',
608
- BEHIND_TEXT: 'behind',
609
- FRONT_TEXT: 'none',
967
+ TOP_AND_BOTTOM: "topAndBottom", // float, 위아래 텍스트 흐름
968
+ SQUARE: "square",
969
+ BOTH_SIDES: "tight",
970
+ LEFT: "tight",
971
+ RIGHT: "tight",
972
+ LARGER_ONLY: "tight",
973
+ SMALLER_ONLY: "tight",
974
+ LARGEST_ONLY: "tight",
975
+ BEHIND_TEXT: "behind",
976
+ FRONT_TEXT: "front",
610
977
  };
611
- const wrap: ImgWrap = wrapMap[textWrap] ?? 'square';
978
+ const wrap: ImgWrap = wrapMap[textWrap] ?? "square";
612
979
 
613
980
  // 기준점
614
981
  const horzRelToMap: Record<string, ImgHorzRelTo> = {
615
- PARA: 'para', MARGIN: 'margin', PAGE: 'page', COLUMN: 'column',
982
+ PARA: "para",
983
+ MARGIN: "margin",
984
+ PAGE: "page",
985
+ COLUMN: "column",
616
986
  };
617
987
  const vertRelToMap: Record<string, ImgVertRelTo> = {
618
- PARA: 'para', MARGIN: 'margin', PAGE: 'page', PAPER: 'page', LINE: 'line',
988
+ PARA: "para",
989
+ MARGIN: "margin",
990
+ PAGE: "page",
991
+ PAPER: "page",
992
+ LINE: "line",
619
993
  };
620
- const horzRelTo = horzRelToMap[posAttr.horzRelTo ?? ''] ?? 'para';
621
- const vertRelTo = vertRelToMap[posAttr.vertRelTo ?? ''] ?? 'para';
994
+ const horzRelTo = horzRelToMap[posAttr.horzRelTo ?? ""] ?? "para";
995
+ const vertRelTo = vertRelToMap[posAttr.vertRelTo ?? ""] ?? "para";
622
996
 
623
997
  // 정렬
624
- const horzAlignMap: Record<string, ImgHorzAlign> = { LEFT: 'left', CENTER: 'center', RIGHT: 'right' };
625
- const vertAlignMap: Record<string, ImgVertAlign> = { TOP: 'top', CENTER: 'center', BOTTOM: 'bottom' };
626
- const horzAlign = horzAlignMap[posAttr.horzAlign ?? ''];
627
- const vertAlign = vertAlignMap[posAttr.vertAlign ?? ''];
998
+ const horzAlignMap: Record<string, ImgHorzAlign> = {
999
+ LEFT: "left",
1000
+ CENTER: "center",
1001
+ RIGHT: "right",
1002
+ };
1003
+ const vertAlignMap: Record<string, ImgVertAlign> = {
1004
+ TOP: "top",
1005
+ CENTER: "center",
1006
+ BOTTOM: "bottom",
1007
+ };
1008
+ const horzAlign = horzAlignMap[posAttr.horzAlign ?? ""];
1009
+ const vertAlign = vertAlignMap[posAttr.vertAlign ?? ""];
628
1010
 
629
1011
  // 오프셋
630
1012
  const horzOffset = Number(posAttr.horzOffset ?? 0);
@@ -641,33 +1023,75 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
641
1023
  const tblAttr = tbl?._attr ?? {};
642
1024
  const borderFillId = Number(tblAttr.borderFillIDRef ?? 0);
643
1025
  const borderFill = ctx.borderFills.get(borderFillId);
644
- const headerRow = tblAttr.repeatHeader === '1';
1026
+ const headerRow = tblAttr.repeatHeader === "1";
645
1027
 
646
1028
  const gridProps: GridProps = { headerRow: headerRow || undefined };
647
1029
  if (borderFill?.stroke) gridProps.defaultStroke = borderFill.stroke;
648
1030
 
649
- const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
1031
+ const rowArr = getTag(tbl, "hp:tr", "hp:ROW");
650
1032
 
651
- // Read column widths from the first row that has all cs=1 cells
1033
+ // Read column widths: first try a row where ALL cells have cs=1
652
1034
  for (const row of rowArr) {
653
- const cells = getTag(row, 'hp:tc', 'hp:CELL');
1035
+ const cells = getTag(row, "hp:tc", "hp:CELL");
654
1036
  const rowWidths: number[] = [];
655
1037
  let allSingle = true;
656
1038
  for (const cell of cells) {
657
- const cellSpanAttr = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
1039
+ const cellSpanAttr = cell?.["hp:cellSpan"]?.[0]?._attr ?? {};
658
1040
  const cs = Number(cellSpanAttr.colSpan ?? cell?._attr?.ColSpan ?? 1);
659
- if (cs > 1) { allSingle = false; break; }
660
- const szAttr = cell?.['hp:cellSz']?.[0]?._attr ?? {};
1041
+ if (cs > 1) {
1042
+ allSingle = false;
1043
+ break;
1044
+ }
1045
+ const szAttr = cell?.["hp:cellSz"]?.[0]?._attr ?? {};
661
1046
  const w = Number(szAttr.width ?? 0);
662
1047
  rowWidths.push(Metric.hwpToPt(w));
663
1048
  }
664
- if (allSingle && rowWidths.length > 0 && rowWidths.some(w => w > 0)) {
1049
+ if (allSingle && rowWidths.length > 0 && rowWidths.some((w) => w > 0)) {
665
1050
  gridProps.colWidths = rowWidths;
666
1051
  break;
667
1052
  }
668
1053
  }
1054
+
1055
+ // Fallback: proportional distribution when no all-single row exists
1056
+ if (!gridProps.colWidths) {
1057
+ // Determine colCount first: max column index reached across all rows
1058
+ let detectedCols = 0;
1059
+ for (const row of rowArr) {
1060
+ let ci = 0;
1061
+ for (const cell of getTag(row, "hp:tc", "hp:CELL")) {
1062
+ const csEl = cell?.["hp:cellSpan"]?.[0]?._attr ?? {};
1063
+ ci += Number(csEl.colSpan ?? cell?._attr?.ColSpan ?? 1);
1064
+ }
1065
+ if (ci > detectedCols) detectedCols = ci;
1066
+ }
1067
+ if (detectedCols > 0) {
1068
+ const sums = new Float64Array(detectedCols);
1069
+ const counts = new Int32Array(detectedCols);
1070
+ for (const row of rowArr) {
1071
+ let ci = 0;
1072
+ for (const cell of getTag(row, "hp:tc", "hp:CELL")) {
1073
+ const csEl = cell?.["hp:cellSpan"]?.[0]?._attr ?? {};
1074
+ const cs = Number(csEl.colSpan ?? cell?._attr?.ColSpan ?? 1);
1075
+ const szAttr = cell?.["hp:cellSz"]?.[0]?._attr ?? {};
1076
+ const w = Number(szAttr.width ?? 0);
1077
+ if (w > 0 && cs > 0) {
1078
+ const perCol = w / cs;
1079
+ for (let k = 0; k < cs && ci + k < detectedCols; k++) {
1080
+ sums[ci + k] += perCol;
1081
+ counts[ci + k]++;
1082
+ }
1083
+ }
1084
+ ci += cs;
1085
+ }
1086
+ }
1087
+ const estimated = Array.from(sums).map((s, i) =>
1088
+ counts[i] > 0 ? Metric.hwpToPt(s / counts[i]) : 0,
1089
+ );
1090
+ if (estimated.some((w) => w > 0)) gridProps.colWidths = estimated;
1091
+ }
1092
+ }
669
1093
  const rowNodes = rowArr.map((row: any) => {
670
- const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
1094
+ const cellArr = getTag(row, "hp:tc", "hp:CELL");
671
1095
  const cellNodes = cellArr.map((cell: any) => {
672
1096
  const ca = cell?._attr ?? {};
673
1097
 
@@ -679,58 +1103,112 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
679
1103
  bg: cellBf?.bgColor ?? safeHex(ca.BgColor),
680
1104
  };
681
1105
 
682
- if (cellBf?.stroke) {
683
- cellProps.top = cellBf.stroke;
684
- cellProps.bot = cellBf.stroke;
685
- cellProps.left = cellBf.stroke;
686
- cellProps.right = cellBf.stroke;
1106
+ if (cellBf) {
1107
+ // Preserve explicit NONE so it overrides table-level defaultStroke in DOCX tcBorders.
1108
+ // Only skip when the side is truly undefined (not specified in borderFill).
1109
+ cellProps.top = cellBf.top ?? cellBf.stroke;
1110
+ cellProps.bot = cellBf.bottom ?? cellBf.stroke;
1111
+ cellProps.left = cellBf.left ?? cellBf.stroke;
1112
+ cellProps.right = cellBf.right ?? cellBf.stroke;
687
1113
  }
688
1114
 
689
- // Vertical alignment from subList
690
- const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
1115
+ // Vertical alignment and cell padding from subList
1116
+ const subList = cell?.["hp:subList"]?.[0] ?? cell?.subList?.[0];
691
1117
  const subAttr = subList?._attr ?? {};
692
1118
  if (subAttr.vertAlign) {
693
- const vaMap: Record<string, 'top' | 'mid' | 'bot'> = {
694
- TOP: 'top', CENTER: 'mid', BOTTOM: 'bot',
1119
+ const vaMap: Record<string, "top" | "mid" | "bot"> = {
1120
+ TOP: "top",
1121
+ CENTER: "mid",
1122
+ BOTTOM: "bot",
695
1123
  };
696
1124
  cellProps.va = vaMap[subAttr.vertAlign];
697
1125
  }
1126
+ // Cell margins (stored in HWPUNIT on subList attributes)
1127
+ const HWPX_DEFAULT_MARGIN_LR = 360; // typical default: 3.6pt
1128
+ const HWPX_DEFAULT_MARGIN_TB = 141; // typical default: ~1.4pt
1129
+ const mL = Number(subAttr.marginLeft ?? HWPX_DEFAULT_MARGIN_LR);
1130
+ const mR = Number(subAttr.marginRight ?? HWPX_DEFAULT_MARGIN_LR);
1131
+ const mT = Number(subAttr.marginTop ?? HWPX_DEFAULT_MARGIN_TB);
1132
+ const mB = Number(subAttr.marginBottom ?? HWPX_DEFAULT_MARGIN_TB);
1133
+ if (mL !== HWPX_DEFAULT_MARGIN_LR) cellProps.padL = Metric.hwpToPt(mL);
1134
+ if (mR !== HWPX_DEFAULT_MARGIN_LR) cellProps.padR = Metric.hwpToPt(mR);
1135
+ if (mT !== HWPX_DEFAULT_MARGIN_TB) cellProps.padT = Metric.hwpToPt(mT);
1136
+ if (mB !== HWPX_DEFAULT_MARGIN_TB) cellProps.padB = Metric.hwpToPt(mB);
698
1137
 
699
1138
  // Colspan/rowspan from cellSpan element or attributes
700
- const cellSpan = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
1139
+ const cellSpan = cell?.["hp:cellSpan"]?.[0]?._attr ?? {};
701
1140
  const cs = Number(cellSpan.colSpan ?? ca.ColSpan ?? 1);
702
1141
  const rs = Number(cellSpan.rowSpan ?? ca.RowSpan ?? 1);
703
1142
 
704
- // Parse paragraphs
705
- let paras: ParaNode[];
706
- if (subList) {
707
- const subParas = getTag(subList, 'hp:p', 'hp:P');
708
- paras = subParas.map((p: any) => decodePara(p, ctx));
709
- } else {
710
- paras = getTag(cell, 'hp:p', 'hp:P').map((p: any) => decodePara(p, ctx));
1143
+ // Parse cell content — paragraphs and nested tables (중첩 표)
1144
+ const cellKids: (ParaNode | GridNode)[] = [];
1145
+ const source = subList ?? cell;
1146
+ const sourcePSource = getTag(source, "hp:p", "hp:P");
1147
+ for (const sp of sourcePSource) {
1148
+ try {
1149
+ // Check if this paragraph contains a nested table in its runs
1150
+ const runs = getTag(sp, "hp:run", "hp:RUN");
1151
+ let hasNestedTable = false;
1152
+ for (const run of runs) {
1153
+ const nestedTbls = getTag(run, "hp:tbl", "hp:TABLE");
1154
+ for (const nestedTbl of nestedTbls) {
1155
+ try {
1156
+ cellKids.push(decodeGrid(nestedTbl, ctx));
1157
+ } catch {
1158
+ /* skip malformed nested table */
1159
+ }
1160
+ hasNestedTable = true;
1161
+ }
1162
+ }
1163
+ if (!hasNestedTable) {
1164
+ cellKids.push(decodePara(sp, ctx));
1165
+ }
1166
+ } catch {
1167
+ /* skip corrupted para in cell */
1168
+ }
711
1169
  }
712
1170
 
713
1171
  return buildCell(
714
- paras.length > 0 ? paras : [buildPara([buildSpan('')])],
1172
+ cellKids.length > 0 ? cellKids : [buildPara([buildSpan("")])],
715
1173
  { cs, rs, props: cellProps },
716
1174
  );
717
1175
  });
718
- return buildRow(cellNodes);
1176
+ // Row height: prefer a non-merged cell (rs=1) for accuracy.
1177
+ // For merged cells, divide total height by rowSpan to get per-row height.
1178
+ let rowHeightPt: number | undefined;
1179
+ for (const cell of cellArr) {
1180
+ const ca = cell?._attr ?? {};
1181
+ const cellSpan = cell?.["hp:cellSpan"]?.[0]?._attr ?? {};
1182
+ const cellRs = Math.max(1, Number(cellSpan.rowSpan ?? ca.RowSpan ?? 1));
1183
+ const hSz = cell?.["hp:cellSz"]?.[0]?._attr ?? {};
1184
+ const hVal = Number(hSz.height ?? 0);
1185
+ if (hVal > 0) {
1186
+ rowHeightPt = Metric.hwpToPt(hVal) / cellRs;
1187
+ if (cellRs === 1) break; // exact match — stop searching
1188
+ }
1189
+ }
1190
+ return buildRow(cellNodes, rowHeightPt);
719
1191
  });
720
1192
  return buildGrid(rowNodes, gridProps);
721
1193
  }
722
1194
 
723
1195
  function decodeGridSimple(tbl: any, ctx: DecCtx): GridNode {
724
- const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
1196
+ const rowArr = getTag(tbl, "hp:tr", "hp:ROW");
725
1197
  const rowNodes = rowArr.map((row: any) => {
726
- const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
727
- return buildRow(cellArr.map((cell: any) => buildCell([buildPara([buildSpan(cellText(cell))])])));
1198
+ const cellArr = getTag(row, "hp:tc", "hp:CELL");
1199
+ return buildRow(
1200
+ cellArr.map((cell: any) =>
1201
+ buildCell([buildPara([buildSpan(cellText(cell))])]),
1202
+ ),
1203
+ );
728
1204
  });
729
1205
  return buildGrid(rowNodes);
730
1206
  }
731
1207
 
732
1208
  function decodeGridFlat(tbl: any): GridNode {
733
- return buildGrid([buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])])]);
1209
+ return buildGrid([
1210
+ buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])]),
1211
+ ]);
734
1212
  }
735
1213
 
736
1214
  function decodeGridText(tbl: any): ParaNode {
@@ -738,22 +1216,40 @@ function decodeGridText(tbl: any): ParaNode {
738
1216
  }
739
1217
 
740
1218
  function cellText(cell: any): string {
741
- const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
1219
+ const subList = cell?.["hp:subList"]?.[0] ?? cell?.subList?.[0];
742
1220
  const source = subList ?? cell;
743
- return getTag(source, 'hp:p', 'hp:P').map((p: any) =>
744
- getTag(p, 'hp:run', 'hp:RUN').map((r: any) =>
745
- getTag(r, 'hp:t', 'hp:T').map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join(''),
746
- ).join(''),
747
- ).join(' ');
1221
+ return getTag(source, "hp:p", "hp:P")
1222
+ .map((p: any) =>
1223
+ getTag(p, "hp:run", "hp:RUN")
1224
+ .map((r: any) =>
1225
+ getTag(r, "hp:t", "hp:T")
1226
+ .map((t: any) => {
1227
+ const val =
1228
+ typeof t === "string"
1229
+ ? t
1230
+ : (t?._text ?? t?._ ?? t?.["#text"] ?? "");
1231
+ return val.replace(/__EXT_\d+(?:_W\d+_H\d+)?__/g, "");
1232
+ })
1233
+ .join(""),
1234
+ )
1235
+ .join(""),
1236
+ )
1237
+ .join(" ");
748
1238
  }
749
1239
 
750
1240
  function tableText(tbl: any): string {
751
- return getTag(tbl, 'hp:tr', 'hp:ROW').map((row: any) =>
752
- getTag(row, 'hp:tc', 'hp:CELL').map((c: any) => cellText(c)).join('\t'),
753
- ).join('\n');
1241
+ return getTag(tbl, "hp:tr", "hp:ROW")
1242
+ .map((row: any) =>
1243
+ getTag(row, "hp:tc", "hp:CELL")
1244
+ .map((c: any) => cellText(c))
1245
+ .join("\t"),
1246
+ )
1247
+ .join("\n");
754
1248
  }
755
1249
 
756
- function toArr(v: any): any[] { return v == null ? [] : Array.isArray(v) ? v : [v]; }
1250
+ function toArr(v: any): any[] {
1251
+ return v == null ? [] : Array.isArray(v) ? v : [v];
1252
+ }
757
1253
 
758
1254
  // Auto-register
759
1255
  registry.registerDecoder(new HwpxDecoder());