hwpkit-dev 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/ .npmignore +4 -2
  2. package/README.md +39 -2
  3. package/dist/index.d.mts +41 -14
  4. package/dist/index.d.ts +41 -14
  5. package/dist/index.js +3553 -1159
  6. package/dist/index.js.map +1 -1
  7. package/dist/index.mjs +3553 -1159
  8. package/dist/index.mjs.map +1 -1
  9. package/package.json +2 -1
  10. package/playground/index.html +346 -0
  11. package/playground/main.ts +302 -0
  12. package/playground/vite.config.ts +16 -0
  13. package/src/contract/decoder.ts +1 -0
  14. package/src/contract/encoder.ts +6 -1
  15. package/src/core/BaseDecoder.ts +118 -0
  16. package/src/core/BaseEncoder.ts +146 -0
  17. package/src/decoders/docx/DocxDecoder.ts +743 -151
  18. package/src/decoders/html/HtmlDecoder.ts +366 -0
  19. package/src/decoders/hwp/HwpScanner.ts +325 -157
  20. package/src/decoders/hwpx/HwpxDecoder.ts +785 -297
  21. package/src/decoders/md/MdDecoder.ts +4 -4
  22. package/src/encoders/docx/DocxEncoder.ts +504 -240
  23. package/src/encoders/html/HtmlEncoder.ts +17 -19
  24. package/src/encoders/hwp/HwpEncoder.ts +1466 -859
  25. package/src/encoders/hwpx/HwpxEncoder.ts +1477 -469
  26. package/src/encoders/hwpx/constants.ts +148 -0
  27. package/src/encoders/hwpx/utils.ts +198 -0
  28. package/src/encoders/md/MdEncoder.ts +20 -15
  29. package/src/model/builders.ts +4 -4
  30. package/src/model/doc-props.ts +19 -5
  31. package/src/model/doc-tree.ts +12 -4
  32. package/src/pipeline/Pipeline.ts +7 -3
  33. package/src/pipeline/registry.ts +13 -2
  34. package/src/safety/StyleBridge.ts +51 -6
  35. package/src/toolkit/ArchiveKit.ts +56 -0
  36. package/src/toolkit/StyleMapper.ts +221 -0
  37. package/src/toolkit/UnitConverter.ts +138 -0
  38. package/src/toolkit/XmlKit.ts +0 -5
  39. package/test-styling.ts +210 -0
  40. package/hwp-analyze.ts +0 -90
  41. package/inspect-doc.ts +0 -57
  42. package/output_test.hwp +0 -0
  43. package/test-docx-to-hwp.ts +0 -45
@@ -1,4 +1,3 @@
1
- import type { Decoder } from "../../contract/decoder";
2
1
  import type {
3
2
  DocRoot,
4
3
  ContentNode,
@@ -6,6 +5,7 @@ import type {
6
5
  SpanNode,
7
6
  GridNode,
8
7
  ImgNode,
8
+ LinkNode,
9
9
  PageNumNode,
10
10
  CellNode,
11
11
  } from "../../model/doc-tree";
@@ -47,13 +47,16 @@ import {
47
47
  safeHex,
48
48
  safeStrokeDocx,
49
49
  } from "../../safety/StyleBridge";
50
+ import { BaseDecoder } from "../../core/BaseDecoder";
50
51
  import { ArchiveKit } from "../../toolkit/ArchiveKit";
51
52
  import { XmlKit } from "../../toolkit/XmlKit";
52
53
  import { TextKit } from "../../toolkit/TextKit";
53
54
  import { registry } from "../../pipeline/registry";
54
55
 
55
- export class DocxDecoder implements Decoder {
56
- readonly format = "docx";
56
+ export class DocxDecoder extends BaseDecoder {
57
+ protected getFormat(): string {
58
+ return "docx";
59
+ }
57
60
 
58
61
  async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
59
62
  const shield = new ShieldedParser();
@@ -62,15 +65,23 @@ export class DocxDecoder implements Decoder {
62
65
  try {
63
66
  const files = await ArchiveKit.unzip(data);
64
67
 
65
- const docXml = files.get("word/document.xml");
68
+ const getFile = (path: string) => {
69
+ const lower = path.toLowerCase();
70
+ for (const [name, data] of files.entries()) {
71
+ if (name.toLowerCase() === lower) return data;
72
+ }
73
+ return undefined;
74
+ };
75
+
76
+ const docXml = getFile("word/document.xml");
66
77
  if (!docXml) return fail("DOCX: word/document.xml not found");
67
78
 
68
- const relsXml = files.get("word/_rels/document.xml.rels");
79
+ const relsXml = getFile("word/_rels/document.xml.rels");
69
80
  const relsMap = relsXml
70
81
  ? await parseRels(TextKit.decode(relsXml))
71
82
  : new Map<string, string>();
72
83
 
73
- const coreXml = files.get("docProps/core.xml");
84
+ const coreXml = getFile("docProps/core.xml");
74
85
  let meta: DocMeta = {};
75
86
  if (coreXml) {
76
87
  try {
@@ -81,7 +92,7 @@ export class DocxDecoder implements Decoder {
81
92
  }
82
93
 
83
94
  // Parse numbering.xml for list support
84
- const numXml = files.get("word/numbering.xml");
95
+ const numXml = getFile("word/numbering.xml");
85
96
  let numMap: NumMap = new Map();
86
97
  if (numXml) {
87
98
  try {
@@ -91,58 +102,85 @@ export class DocxDecoder implements Decoder {
91
102
  }
92
103
  }
93
104
 
94
- // Parse styles.xml for tblStyle defaults
105
+ // Parse styles.xml for table and paragraph/character style defaults
95
106
  let stylesMap: StylesMap = new Map();
96
- const stylesXml = files.get("word/styles.xml");
107
+ let paraStyleMap: ParaStyleMap = new Map();
108
+ const stylesXml = getFile("word/styles.xml");
97
109
  if (stylesXml) {
98
110
  try {
99
- stylesMap = await parseStylesMap(TextKit.decode(stylesXml));
111
+ const stylesStr = TextKit.decode(stylesXml);
112
+ stylesMap = await parseStylesMap(stylesStr);
113
+ paraStyleMap = await parseParaStyleMap(stylesStr);
100
114
  } catch {
101
115
  /* non-fatal */
102
116
  }
103
117
  }
104
118
 
105
- const docStr = TextKit.decode(docXml);
119
+ let docStr = TextKit.decode(docXml).trim();
120
+ if (!docStr) {
121
+ warns.push(
122
+ "DOCX: word/document.xml is empty, using fallback empty document",
123
+ );
124
+ docStr =
125
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main"><w:body/></w:document>';
126
+ }
106
127
  const docObj: any = await XmlKit.parseStrict(docStr);
107
128
 
108
129
  const body = getBody(docObj);
109
130
  const dims = extractDims(body) ?? { ...A4 };
110
131
  const elements = getBodyElements(body);
132
+ console.log(
133
+ `[DocxDecoder] 파싱된 전체 본문 요소 개수: ${elements.length}`,
134
+ );
111
135
 
112
- const decCtx: DecCtx = { relsMap, files, shield, numMap, warns, stylesMap };
136
+ const decCtx: DecCtx = {
137
+ relsMap,
138
+ files,
139
+ shield,
140
+ numMap,
141
+ warns,
142
+ stylesMap,
143
+ paraStyleMap,
144
+ };
113
145
 
114
146
  const kids: ContentNode[] = [];
115
147
  for (const el of elements) {
116
- const node = shield.guard(
148
+ const nodes = shield.guard(
117
149
  () => decodeElement(el, decCtx),
118
- buildPara([buildSpan("[요소 파싱 실패]")]),
150
+ [buildPara([buildSpan("[요소 파싱 실패]")])],
119
151
  "docx:bodyElement",
120
152
  );
121
- kids.push(node);
153
+ if (Array.isArray(nodes)) {
154
+ kids.push(...nodes);
155
+ } else {
156
+ kids.push(nodes);
157
+ }
122
158
 
123
159
  // Inline sectPr in pPr = section break → insert page-break paragraph after
124
- if (el.type === 'para') {
160
+ if (el.type === "para") {
125
161
  const pPr = el.node?.["w:pPr"]?.[0] ?? el.node?.pPr?.[0] ?? {};
126
162
  const inlineSectPr = pPr?.["w:sectPr"]?.[0] ?? pPr?.sectPr?.[0];
127
163
  if (inlineSectPr) {
128
164
  const typeAttr = inlineSectPr?.["w:type"]?.[0]?._attr;
129
- const sectType = typeAttr?.["w:val"] ?? typeAttr?.val ?? 'nextPage';
130
- if (sectType !== 'continuous') {
131
- kids.push(buildPara([{ tag: 'span', props: {}, kids: [buildPb()] }]));
165
+ const sectType = typeAttr?.["w:val"] ?? typeAttr?.val ?? "nextPage";
166
+ if (sectType !== "continuous") {
167
+ kids.push(
168
+ buildPara([{ tag: "span", props: {}, kids: [buildPb()] }]),
169
+ );
132
170
  }
133
171
  }
134
172
  }
135
173
  }
136
174
 
137
175
  // Decode header/footer
138
- const headerParas = await decodeHeaderFooter(
176
+ const headersMap = await decodeHeaderFooter(
139
177
  "header",
140
178
  body,
141
179
  relsMap,
142
180
  files,
143
181
  decCtx,
144
182
  );
145
- const footerParas = await decodeHeaderFooter(
183
+ const footersMap = await decodeHeaderFooter(
146
184
  "footer",
147
185
  body,
148
186
  relsMap,
@@ -152,8 +190,8 @@ export class DocxDecoder implements Decoder {
152
190
 
153
191
  warns.push(...shield.flush());
154
192
  const sheet = buildSheet(kids.filter(Boolean) as ContentNode[], dims, {
155
- header: headerParas,
156
- footer: footerParas,
193
+ headers: headersMap,
194
+ footers: footersMap,
157
195
  });
158
196
  return succeed(buildRoot(meta, [sheet]), warns);
159
197
  } catch (e: any) {
@@ -180,7 +218,31 @@ interface TblStyleDef {
180
218
  cellBg?: string; // default cell background
181
219
  }
182
220
 
183
- type StylesMap = Map<string, TblStyleDef>; // styleId → defaults
221
+ /** Parsed paragraph/character style defaults */
222
+ interface ParaStyleDef {
223
+ rPr?: {
224
+ b?: boolean;
225
+ i?: boolean;
226
+ u?: boolean;
227
+ s?: boolean;
228
+ pt?: number;
229
+ color?: string;
230
+ font?: string;
231
+ };
232
+ pPr?: {
233
+ align?: string;
234
+ spaceBefore?: number;
235
+ spaceAfter?: number;
236
+ lineHeight?: number;
237
+ indentPt?: number;
238
+ indentRightPt?: number;
239
+ firstLineIndentPt?: number;
240
+ };
241
+ basedOn?: string; // parent style id
242
+ }
243
+
244
+ type StylesMap = Map<string, TblStyleDef>; // styleId → table style defaults
245
+ type ParaStyleMap = Map<string, ParaStyleDef>; // styleId → para/char style defaults
184
246
 
185
247
  interface DecCtx {
186
248
  relsMap: Map<string, string>;
@@ -189,6 +251,7 @@ interface DecCtx {
189
251
  numMap: NumMap;
190
252
  warns: string[];
191
253
  stylesMap: StylesMap;
254
+ paraStyleMap: ParaStyleMap;
192
255
  }
193
256
 
194
257
  // numId → { abstractNumId, levels: Map<ilvl, { fmt, isOrdered }> }
@@ -220,8 +283,10 @@ function resolveDocxPath(baseDir: string, target: string): string {
220
283
 
221
284
  async function parseRels(xml: string): Promise<Map<string, string>> {
222
285
  const map = new Map<string, string>();
286
+ const trimmed = xml.trim();
287
+ if (!trimmed) return map;
223
288
  try {
224
- const obj: any = await XmlKit.parseStrict(xml);
289
+ const obj: any = await XmlKit.parseStrict(trimmed);
225
290
  for (const rel of toArr(obj?.Relationships?.[0]?.Relationship)) {
226
291
  const a = rel?._attr ?? {};
227
292
  if (a.Id && a.Target) map.set(a.Id, a.Target);
@@ -233,8 +298,10 @@ async function parseRels(xml: string): Promise<Map<string, string>> {
233
298
  }
234
299
 
235
300
  async function parseCoreProps(xml: string): Promise<DocMeta> {
301
+ const trimmed = xml.trim();
302
+ if (!trimmed) return {};
236
303
  try {
237
- const obj: any = await XmlKit.parseStrict(xml);
304
+ const obj: any = await XmlKit.parseStrict(trimmed);
238
305
  const c = obj?.["cp:coreProperties"]?.[0] ?? obj?.coreProperties?.[0] ?? {};
239
306
  return {
240
307
  title: c?.["dc:title"]?.[0]?._text ?? undefined,
@@ -250,8 +317,10 @@ async function parseCoreProps(xml: string): Promise<DocMeta> {
250
317
 
251
318
  async function parseNumbering(xml: string): Promise<NumMap> {
252
319
  const map: NumMap = new Map();
320
+ const trimmed = xml.trim();
321
+ if (!trimmed) return map;
253
322
  try {
254
- const obj: any = await XmlKit.parseStrict(xml);
323
+ const obj: any = await XmlKit.parseStrict(trimmed);
255
324
  const root = obj?.["w:numbering"]?.[0] ?? obj?.numbering?.[0] ?? obj;
256
325
 
257
326
  // Parse abstractNums
@@ -292,11 +361,14 @@ async function parseNumbering(xml: string): Promise<NumMap> {
292
361
  }
293
362
 
294
363
  function getBody(obj: any): any {
295
- return (
296
- obj?.["w:document"]?.[0]?.["w:body"]?.[0] ??
297
- obj?.document?.[0]?.body?.[0] ??
298
- obj
299
- );
364
+ // XML 파서에 따라 w:document 또는 document 형태일 수 있음
365
+ const doc = obj?.["w:document"]?.[0] ?? obj?.document?.[0] ?? obj;
366
+ const body = doc?.["w:body"]?.[0] ?? doc?.body?.[0] ?? doc;
367
+
368
+ if (!body) {
369
+ console.error("[DocxDecoder] 본문(body)을 찾을 수 없습니다.");
370
+ }
371
+ return body;
300
372
  }
301
373
 
302
374
  function extractDims(body: any): PageDims | null {
@@ -306,6 +378,8 @@ function extractDims(body: any): PageDims | null {
306
378
  const sz = sp?.["w:pgSz"]?.[0]?._attr ?? sp?.pgSz?.[0]?._attr;
307
379
  const mar = sp?.["w:pgMar"]?.[0]?._attr ?? sp?.pgMar?.[0]?._attr;
308
380
  if (!sz) return null;
381
+ const headerDxa = Number(mar?.["w:header"] ?? mar?.header ?? 0);
382
+ const footerDxa = Number(mar?.["w:footer"] ?? mar?.footer ?? 0);
309
383
  return {
310
384
  wPt: Metric.dxaToPt(Number(sz["w:w"] ?? sz.w ?? 11906)),
311
385
  hPt: Metric.dxaToPt(Number(sz["w:h"] ?? sz.h ?? 16838)),
@@ -317,6 +391,8 @@ function extractDims(body: any): PageDims | null {
317
391
  (sz["w:orient"] ?? sz.orient) === "landscape"
318
392
  ? "landscape"
319
393
  : "portrait",
394
+ headerPt: headerDxa > 0 ? Metric.dxaToPt(headerDxa) : undefined,
395
+ footerPt: footerDxa > 0 ? Metric.dxaToPt(footerDxa) : undefined,
320
396
  };
321
397
  } catch {
322
398
  return null;
@@ -326,35 +402,36 @@ function extractDims(body: any): PageDims | null {
326
402
  function getBodyElements(body: any): { type: string; node: any }[] {
327
403
  const paras = toArr(body?.["w:p"] ?? body?.p);
328
404
  const tables = toArr(body?.["w:tbl"] ?? body?.tbl);
405
+ const sdts = toArr(body?.["w:sdt"] ?? body?.sdt);
329
406
 
330
- if (tables.length === 0)
331
- return paras.map((n: any) => ({ type: "para", node: n }));
332
- if (paras.length === 0)
333
- return tables.map((n: any) => ({ type: "table", node: n }));
334
-
335
- // Use _childOrder from XmlKit to preserve document order
336
407
  const childOrder = body?.["_childOrder"] as string[] | undefined;
337
408
  if (Array.isArray(childOrder)) {
338
409
  const items: { type: string; node: any }[] = [];
339
410
  let pi = 0,
340
- ti = 0;
411
+ ti = 0,
412
+ si = 0;
341
413
  for (const tag of childOrder) {
342
414
  if ((tag === "w:p" || tag === "p") && pi < paras.length) {
343
415
  items.push({ type: "para", node: paras[pi++] });
344
416
  } else if ((tag === "w:tbl" || tag === "tbl") && ti < tables.length) {
345
417
  items.push({ type: "table", node: tables[ti++] });
418
+ } else if ((tag === "w:sdt" || tag === "sdt") && si < sdts.length) {
419
+ items.push({ type: "sdt", node: sdts[si++] });
346
420
  }
347
421
  }
422
+ // Append any remainders
348
423
  while (pi < paras.length) items.push({ type: "para", node: paras[pi++] });
349
424
  while (ti < tables.length)
350
425
  items.push({ type: "table", node: tables[ti++] });
426
+ while (si < sdts.length) items.push({ type: "sdt", node: sdts[si++] });
351
427
  return items;
352
428
  }
353
429
 
354
- // Fallback: paragraphs first, then tables
430
+ // Fallback: paragraphs, then tables, then sdts
355
431
  return [
356
432
  ...paras.map((n: any) => ({ type: "para", node: n })),
357
433
  ...tables.map((n: any) => ({ type: "table", node: n })),
434
+ ...sdts.map((n: any) => ({ type: "sdt", node: n })),
358
435
  ];
359
436
  }
360
437
 
@@ -363,10 +440,10 @@ function getBodyElements(body: any): { type: string; node: any }[] {
363
440
  async function decodeHeaderFooter(
364
441
  kind: "header" | "footer",
365
442
  body: any,
366
- relsMap: Map<string, string>,
443
+ relsMap: Map<string, string>, // document.xml.rels (기존)
367
444
  files: Map<string, Uint8Array>,
368
445
  ctx: DecCtx,
369
- ): Promise<ParaNode[] | undefined> {
446
+ ): Promise<Record<string, ParaNode[]> | undefined> {
370
447
  try {
371
448
  const sp = body?.["w:sectPr"]?.[0] ?? body?.sectPr?.[0];
372
449
  if (!sp) return undefined;
@@ -376,35 +453,79 @@ async function decodeHeaderFooter(
376
453
  const refs = toArr(sp?.[refTag] ?? sp?.[refTag.replace("w:", "")]);
377
454
  if (refs.length === 0) return undefined;
378
455
 
379
- const rId =
380
- refs[0]?._attr?.["r:id"] ??
381
- refs[0]?._attr?.["r:Id"] ??
382
- refs[0]?._attr?.id;
383
- if (!rId) return undefined;
384
-
385
- const target = relsMap.get(rId);
386
- if (!target) return undefined;
387
-
388
- const filePath = resolveDocxPath("word", target);
389
- const fileData = files.get(filePath);
390
- if (!fileData) return undefined;
456
+ const result: Record<string, ParaNode[]> = {};
457
+
458
+ for (const ref of refs) {
459
+ const type = ref._attr?.["w:type"] ?? ref._attr?.type ?? "default";
460
+ const rId = ref._attr?.["r:id"] ?? ref._attr?.["r:Id"] ?? ref._attr?.id;
461
+ if (!rId) continue;
462
+
463
+ const target = relsMap.get(rId);
464
+ if (!target) continue;
465
+
466
+ const filePath = resolveDocxPath("word", target);
467
+ const fileData = files.get(filePath);
468
+ if (!fileData) continue;
469
+
470
+ // ★ 핵심 수정: 헤더/풋터 전용 rels 파일 로드
471
+ const hfFileName = filePath.split("/").pop() ?? "";
472
+ const hfRelsPath = `word/_rels/${hfFileName}.rels`;
473
+ const hfRelsData = files.get(hfRelsPath);
474
+ // 헤더/풋터 rels를 document rels와 병합
475
+ let hfRelsMap = relsMap;
476
+ if (hfRelsData) {
477
+ const hfRelsStr = TextKit.decode(hfRelsData).trim();
478
+ const parsed = hfRelsStr
479
+ ? await parseRels(hfRelsStr)
480
+ : new Map<string, string>();
481
+ // 병합 (헤더/풋터 rels 우선)
482
+ hfRelsMap = new Map([...relsMap, ...parsed]);
483
+ }
391
484
 
392
- const xmlStr = TextKit.decode(fileData);
393
- const obj: any = await XmlKit.parseStrict(xmlStr);
485
+ const xmlStr = TextKit.decode(fileData).trim();
486
+ if (!xmlStr) continue;
394
487
 
395
- const rootTag = kind === "header" ? "w:hdr" : "w:ftr";
396
- const root =
397
- obj?.[rootTag]?.[0] ?? obj?.[rootTag.replace("w:", "")]?.[0] ?? obj;
488
+ const watermark = extractWatermark(xmlStr);
489
+ if (watermark) {
490
+ result[type] = [
491
+ buildPara([
492
+ buildSpan(watermark, { pt: 80, color: "CCCCCC", b: true }),
493
+ ]),
494
+ ];
495
+ continue;
496
+ }
398
497
 
399
- const paras = toArr(root?.["w:p"] ?? root?.p);
400
- if (paras.length === 0) return undefined;
498
+ try {
499
+ const obj: any = await XmlKit.parseStrict(xmlStr);
500
+ const rootTag = kind === "header" ? "w:hdr" : "w:ftr";
501
+ const root =
502
+ obj?.[rootTag]?.[0] ?? obj?.[rootTag.replace("w:", "")]?.[0] ?? obj;
503
+
504
+ // ctx에 hfRelsMap 임시 적용
505
+ const origRelsMap = ctx.relsMap;
506
+ (ctx as any).relsMap = hfRelsMap;
507
+ const paras = toArr(root?.["w:p"] ?? root?.p);
508
+ result[type] = paras.map((p: any) => decodePara(p, ctx));
509
+ (ctx as any).relsMap = origRelsMap;
510
+ } catch (err) {
511
+ console.warn(`[DocxDecoder] ${kind} (${type}) XML 파싱 실패:`, err);
512
+ continue;
513
+ }
514
+ }
401
515
 
402
- return paras.map((p: any) => decodePara(p, ctx));
516
+ return Object.keys(result).length > 0 ? result : undefined;
403
517
  } catch {
404
518
  return undefined;
405
519
  }
406
520
  }
407
521
 
522
+ /** 워터마크 텍스트 추출 (VML v:textpath 기반) */
523
+ function extractWatermark(xml: string): string | null {
524
+ if (!xml.includes("v:textpath")) return null;
525
+ const m = xml.match(/string="([^"]+)"/);
526
+ return m ? m[1] : null;
527
+ }
528
+
408
529
  // ─── Element decoding ──────────────────────────────────────
409
530
 
410
531
  //만약에 drawing 태그가 안에 있으면 true 반환
@@ -422,7 +543,7 @@ function hasDrawingDeep(node: any): boolean {
422
543
  function decodeElement(
423
544
  el: { type: string; node: any },
424
545
  ctx: DecCtx,
425
- ): ContentNode {
546
+ ): ContentNode | ContentNode[] {
426
547
  if (el.type === "table") {
427
548
  const { value } = ctx.shield.guardGrid(
428
549
  el.node,
@@ -433,10 +554,25 @@ function decodeElement(
433
554
  "docx:table",
434
555
  );
435
556
  return value;
557
+ } else if (el.type === "sdt") {
558
+ return decodeSdt(el.node, ctx);
436
559
  }
437
560
  return decodePara(el.node, ctx);
438
561
  }
439
562
 
563
+ function decodeSdt(sdt: any, ctx: DecCtx): ContentNode[] {
564
+ const content = sdt?.["w:sdtContent"]?.[0] ?? sdt?.sdtContent?.[0];
565
+ if (!content) return [];
566
+ const elements = getBodyElements(content);
567
+ const kids: ContentNode[] = [];
568
+ for (const el of elements) {
569
+ const res = decodeElement(el, ctx);
570
+ if (Array.isArray(res)) kids.push(...res);
571
+ else kids.push(res);
572
+ }
573
+ return kids;
574
+ }
575
+
440
576
  function decodePara(p: any, ctx: DecCtx): ParaNode {
441
577
  const pPr = p?.["w:pPr"]?.[0] ?? {};
442
578
  const alignVal =
@@ -446,12 +582,19 @@ function decodePara(p: any, ctx: DecCtx): ParaNode {
446
582
  pPr?.["w:pStyle"]?.[0]?._attr?.val ??
447
583
  "";
448
584
 
585
+ // Resolve paragraph style inheritance chain
586
+ const styleInherited = resolveParaStyle(
587
+ headStyle || undefined,
588
+ ctx.paraStyleMap,
589
+ );
590
+
449
591
  const props: ParaProps = {
450
592
  align: safeAlign(alignVal),
451
593
  heading: parseHeading(headStyle),
594
+ styleId: headStyle || undefined,
452
595
  };
453
596
 
454
- // Spacing (before/after/line height)
597
+ // Spacing (before/after/line height) — inline pPr wins over style
455
598
  const spacingAttr =
456
599
  pPr?.["w:spacing"]?.[0]?._attr ?? pPr?.spacing?.[0]?._attr ?? {};
457
600
  const beforeVal = Number(
@@ -462,13 +605,38 @@ function decodePara(p: any, ctx: DecCtx): ParaNode {
462
605
  const lineRule =
463
606
  spacingAttr?.["w:lineRule"] ?? spacingAttr?.lineRule ?? "auto";
464
607
  if (beforeVal > 0) props.spaceBefore = Metric.dxaToPt(beforeVal);
608
+ else if (styleInherited.pPr?.spaceBefore)
609
+ props.spaceBefore = styleInherited.pPr.spaceBefore;
465
610
  if (afterVal > 0) props.spaceAfter = Metric.dxaToPt(afterVal);
611
+ else if (styleInherited.pPr?.spaceAfter)
612
+ props.spaceAfter = styleInherited.pPr.spaceAfter;
466
613
  if (lineVal > 0 && lineRule === "auto") props.lineHeight = lineVal / 240;
614
+ else if (styleInherited.pPr?.lineHeight)
615
+ props.lineHeight = styleInherited.pPr.lineHeight;
467
616
 
468
617
  // Indentation
469
618
  const indAttr = pPr?.["w:ind"]?.[0]?._attr ?? pPr?.ind?.[0]?._attr ?? {};
470
619
  const leftVal = Number(indAttr?.["w:left"] ?? indAttr?.left ?? 0);
620
+ const rightVal = Number(indAttr?.["w:right"] ?? indAttr?.right ?? 0);
621
+ const firstLineVal = Number(
622
+ indAttr?.["w:firstLine"] ?? indAttr?.firstLine ?? 0,
623
+ );
624
+ const hangingVal = Number(indAttr?.["w:hanging"] ?? indAttr?.hanging ?? 0);
471
625
  if (leftVal > 0) props.indentPt = Metric.dxaToPt(leftVal);
626
+ else if (styleInherited.pPr?.indentPt)
627
+ props.indentPt = styleInherited.pPr.indentPt;
628
+ if (rightVal > 0) props.indentRightPt = Metric.dxaToPt(rightVal);
629
+ else if (styleInherited.pPr?.indentRightPt)
630
+ props.indentRightPt = styleInherited.pPr.indentRightPt;
631
+ if (firstLineVal > 0) props.firstLineIndentPt = Metric.dxaToPt(firstLineVal);
632
+ else if (hangingVal > 0)
633
+ props.firstLineIndentPt = -Metric.dxaToPt(hangingVal);
634
+ else if (styleInherited.pPr?.firstLineIndentPt)
635
+ props.firstLineIndentPt = styleInherited.pPr.firstLineIndentPt;
636
+
637
+ // Alignment from style if not set inline
638
+ if (!alignVal && styleInherited.pPr?.align)
639
+ props.align = safeAlign(styleInherited.pPr.align);
472
640
 
473
641
  // List/numbering
474
642
  const numPr = pPr?.["w:numPr"]?.[0] ?? pPr?.numPr?.[0];
@@ -492,26 +660,100 @@ function decodePara(p: any, ctx: DecCtx): ParaNode {
492
660
  }
493
661
 
494
662
  // pageBreakBefore: paragraph always starts on a new page
495
- const pbBeforeNode = pPr?.["w:pageBreakBefore"]?.[0] ?? pPr?.pageBreakBefore?.[0];
496
- const hasPageBreakBefore = pbBeforeNode != null &&
663
+ const pbBeforeNode =
664
+ pPr?.["w:pageBreakBefore"]?.[0] ?? pPr?.pageBreakBefore?.[0];
665
+ const hasPageBreakBefore =
666
+ pbBeforeNode != null &&
497
667
  (pbBeforeNode?._attr?.["w:val"] ?? pbBeforeNode?._attr?.val ?? "1") !== "0";
498
668
 
499
- const runs = toArr(p?.["w:r"] ?? p?.r);
500
-
501
- // 3/28 이미지 태크를 찾을수 있기 때문에 별도 함수 구현
502
- const kids: (SpanNode | ImgNode)[] = ctx.shield.guardAll(
503
- runs,
504
- (run: any) =>
505
- hasDrawingDeep(run) ? decodeRunOrImage(run, ctx) : decodeRun(run, ctx),
506
- () => buildSpan(""),
507
- "docx:run",
508
- );
669
+ // Resolve all children (runs AND hyperlinks) in document order
670
+ const children = p?.["_childOrder"] as string[] | undefined;
671
+ const kids: (SpanNode | ImgNode | LinkNode)[] = [];
672
+
673
+ if (Array.isArray(children)) {
674
+ const runsArr = toArr(p?.["w:r"] ?? p?.r);
675
+ const hlArr = toArr(p?.["w:hyperlink"] ?? p?.hyperlink);
676
+ const sdtArr = toArr(p?.["w:sdt"] ?? p?.sdt);
677
+ let ri = 0;
678
+ let hi = 0;
679
+ let si = 0;
680
+
681
+ for (const tag of children) {
682
+ if (tag === "w:r" || tag === "r") {
683
+ const run = runsArr[ri++];
684
+ if (run) {
685
+ kids.push(
686
+ ctx.shield.guard(
687
+ () =>
688
+ hasDrawingDeep(run)
689
+ ? decodeRunOrImage(run, ctx)
690
+ : decodeRun(run, ctx, styleInherited.rPr),
691
+ buildSpan(""),
692
+ "docx:run",
693
+ ),
694
+ );
695
+ }
696
+ } else if (tag === "w:hyperlink" || tag === "hyperlink") {
697
+ const hl = hlArr[hi++];
698
+ if (hl) {
699
+ const rId = hl?._attr?.["r:id"] ?? hl?._attr?.id;
700
+ const url = rId ? ctx.relsMap.get(rId) : "";
701
+ const hlRuns = toArr(hl?.["w:r"] ?? hl?.r);
702
+ const hlKids = hlRuns.map((r: any) =>
703
+ decodeRun(r, ctx, {
704
+ ...styleInherited.rPr,
705
+ u: true,
706
+ color: "0000FF",
707
+ }),
708
+ );
709
+ kids.push({
710
+ tag: "link",
711
+ href: url || "",
712
+ kids: hlKids,
713
+ });
714
+ }
715
+ } else if (tag === "w:sdt" || tag === "sdt") {
716
+ const sdt = sdtArr[si++];
717
+ if (sdt) {
718
+ const sdtContent = sdt?.["w:sdtContent"]?.[0] ?? sdt?.sdtContent?.[0];
719
+ if (sdtContent) {
720
+ const innerRuns = toArr(sdtContent?.["w:r"] ?? sdtContent?.r);
721
+ for (const ir of innerRuns) {
722
+ kids.push(
723
+ ctx.shield.guard(
724
+ () =>
725
+ hasDrawingDeep(ir)
726
+ ? decodeRunOrImage(ir, ctx)
727
+ : decodeRun(ir, ctx, styleInherited.rPr),
728
+ buildSpan(""),
729
+ "docx:run",
730
+ ),
731
+ );
732
+ }
733
+ }
734
+ }
735
+ }
736
+ }
737
+ } else {
738
+ // Fallback if _childOrder is missing
739
+ const runs = toArr(p?.["w:r"] ?? p?.r);
740
+ const legacyKids: (SpanNode | ImgNode)[] = ctx.shield.guardAll(
741
+ runs,
742
+ (run: any) =>
743
+ hasDrawingDeep(run)
744
+ ? decodeRunOrImage(run, ctx)
745
+ : decodeRun(run, ctx, styleInherited.rPr),
746
+ () => buildSpan(""),
747
+ "docx:run",
748
+ );
749
+ kids.push(...legacyKids);
750
+ }
509
751
 
510
752
  const filteredKids = kids.filter(Boolean) as ParaNode["kids"];
511
753
 
512
754
  // Prepend pb span when pageBreakBefore is set
513
755
  if (hasPageBreakBefore) {
514
- filteredKids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
756
+ filteredKids.unshift({ tag: "span", props: {}, kids: [buildPb()] });
515
757
  }
516
758
 
517
759
  return buildPara(filteredKids, props);
@@ -549,6 +791,33 @@ function decodeRunOrImage(run: any, ctx: DecCtx): SpanNode | ImgNode {
549
791
 
550
792
  return decodeRun(run, ctx);
551
793
  }
794
+ /** Decode image layout from anchor element */
795
+ function decodeImageLayout(anchor: any): ImgLayout {
796
+ const wrap = anchor?.["wp:wrapTop"]?.[0] ?? anchor?.wrapTop?.[0];
797
+ const anchorPos =
798
+ anchor?.["wp:anchorPos"]?.[0]?._attr ?? anchor?.anchorPos?.[0]?._attr ?? {};
799
+
800
+ const layout: ImgLayout = {
801
+ wrap: "square",
802
+ horzAlign: "left",
803
+ vertAlign: "top",
804
+ horzRelTo: "page",
805
+ vertRelTo: "page",
806
+ xPt: Number(anchorPos?.x ?? 0) / 12700, // emu to pt
807
+ yPt: Number(anchorPos?.y ?? 0) / 12700, // emu to pt
808
+ };
809
+
810
+ // Parse wrap type
811
+ if (wrap?.["wp:none"]) layout.wrap = "none";
812
+ else if (wrap?.["wp:square"]) layout.wrap = "square";
813
+ else if (wrap?.["wp:tight"]) layout.wrap = "tight";
814
+ else if (wrap?.["wp:through"]) layout.wrap = "through";
815
+ else if (wrap?.["wp:behind"]) layout.wrap = "behind";
816
+ else if (wrap?.["wp:inFront"]) layout.wrap = "front";
817
+
818
+ return layout;
819
+ }
820
+
552
821
  function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
553
822
  try {
554
823
  const inline = drawing?.["wp:inline"]?.[0] ?? drawing?.inline?.[0];
@@ -575,6 +844,20 @@ function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
575
844
  const graphic = container?.["a:graphic"]?.[0] ?? container?.graphic?.[0];
576
845
  const graphicData =
577
846
  graphic?.["a:graphicData"]?.[0] ?? graphic?.graphicData?.[0];
847
+
848
+ // 1. 차트 감지
849
+ if (graphicData?.["c:chart"] || graphicData?.chart) {
850
+ return {
851
+ tag: "img",
852
+ b64: "", // 플레이스홀더
853
+ mime: "image/png",
854
+ w: wPt,
855
+ h: hPt,
856
+ alt: `[차트: ${alt || "차트"}]`,
857
+ layout: decodeImageLayout(anchor),
858
+ };
859
+ }
860
+
578
861
  const pic = graphicData?.["pic:pic"]?.[0] ?? graphicData?.pic?.[0];
579
862
  const blipFill = pic?.["pic:blipFill"]?.[0] ?? pic?.blipFill?.[0];
580
863
  const blip =
@@ -586,12 +869,27 @@ function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
586
869
  const target = ctx.relsMap.get(rId);
587
870
  if (!target) return null;
588
871
 
589
- const filePath = resolveDocxPath("word", target);
590
- const fileData = ctx.files.get(filePath);
872
+ let filePath = resolveDocxPath("word", target);
873
+ let fileData = ctx.files.get(filePath);
874
+
591
875
  if (!fileData) {
592
- console.warn(
593
- `[DocxDecoder] image not found in ZIP: "${filePath}" (rId=${rId}, target=${target})`,
594
- );
876
+ filePath = resolveDocxPath("word/_rels", target);
877
+ fileData = ctx.files.get(filePath);
878
+ }
879
+
880
+ if (!fileData) {
881
+ const fileName = target.split("/").pop() ?? "";
882
+ for (const [k, v] of ctx.files) {
883
+ if (fileName && (k.endsWith("/" + fileName) || k === fileName)) {
884
+ fileData = v;
885
+ filePath = k;
886
+ break;
887
+ }
888
+ }
889
+ }
890
+
891
+ if (!fileData) {
892
+ console.warn(`[DocxDecoder] image not found: "${target}"`);
595
893
  return null;
596
894
  }
597
895
 
@@ -610,20 +908,63 @@ function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
610
908
 
611
909
  // ── layout 추출 ──────────────────────────────────────────
612
910
  const layout: ImgLayout = inline
613
- ? { wrap: 'inline' }
911
+ ? { wrap: "inline" }
614
912
  : extractAnchorLayout(anchor);
615
913
 
616
- return buildImg(TextKit.base64Encode(fileData), mime, wPt, hPt, alt || undefined, layout);
914
+ return buildImg(
915
+ TextKit.base64Encode(fileData),
916
+ mime,
917
+ wPt,
918
+ hPt,
919
+ alt || undefined,
920
+ layout,
921
+ );
617
922
  } catch {
618
923
  return null;
619
924
  }
620
925
  }
621
926
 
622
- function decodeRun(run: any, ctx: DecCtx): SpanNode {
927
+ /** w:highlight val hex 색상 매핑 (OOXML 명세) */
928
+ const HIGHLIGHT_COLOR_MAP: Record<string, string> = {
929
+ yellow: "FFFF00",
930
+ green: "00FF00",
931
+ cyan: "00FFFF",
932
+ magenta: "FF00FF",
933
+ blue: "0000FF",
934
+ red: "FF0000",
935
+ darkBlue: "00008B",
936
+ darkCyan: "008B8B",
937
+ darkGreen: "006400",
938
+ darkMagenta: "8B008B",
939
+ darkRed: "8B0000",
940
+ darkYellow: "808000",
941
+ darkGray: "A9A9A9",
942
+ lightGray: "D3D3D3",
943
+ black: "000000",
944
+ white: "FFFFFF",
945
+ };
946
+
947
+ function decodeRun(
948
+ run: any,
949
+ ctx: DecCtx,
950
+ styleRpr?: ParaStyleDef["rPr"],
951
+ ): SpanNode {
623
952
  const rPr = run?.["w:rPr"]?.[0] ?? run?.rPr?.[0] ?? {};
624
953
 
954
+ // w:vanish — 숨긴 텍스트: run 전체 건너뜀 (빈 span 반환)
955
+ const vanishNode = rPr?.["w:vanish"]?.[0] ?? rPr?.vanish?.[0];
956
+ if (vanishNode != null) {
957
+ const vanishVal =
958
+ vanishNode?._attr?.["w:val"] ?? vanishNode?._attr?.val ?? "1";
959
+ if (vanishVal !== "0") return buildSpan("");
960
+ }
961
+
962
+ // w:sz → 없으면 w:szCs 로 fallback (한글 글꼴 크기)
625
963
  const szAttr = rPr?.["w:sz"]?.[0]?._attr ?? rPr?.sz?.[0]?._attr ?? {};
626
964
  const szVal = szAttr?.["w:val"] ?? szAttr?.val;
965
+ const szCsAttr = rPr?.["w:szCs"]?.[0]?._attr ?? rPr?.szCs?.[0]?._attr ?? {};
966
+ const szCsVal = szCsAttr?.["w:val"] ?? szCsAttr?.val;
967
+ const effectiveSzVal = szVal ?? szCsVal;
627
968
 
628
969
  const colorAttr =
629
970
  rPr?.["w:color"]?.[0]?._attr ?? rPr?.color?.[0]?._attr ?? {};
@@ -642,15 +983,33 @@ function decodeRun(run: any, ctx: DecCtx): SpanNode {
642
983
  const underVal =
643
984
  rPr?.["w:u"]?.[0]?._attr?.["w:val"] ?? rPr?.["w:u"]?.[0]?._attr?.val;
644
985
 
645
- // Background/highlight
986
+ // w:shd — 배경색 (낮은 우선순위)
646
987
  const shdAttr = rPr?.["w:shd"]?.[0]?._attr ?? rPr?.shd?.[0]?._attr ?? {};
647
- const bgVal = safeHex(shdAttr?.["w:fill"] ?? shdAttr?.fill);
988
+ const shdBg = safeHex(shdAttr?.["w:fill"] ?? shdAttr?.fill);
989
+
990
+ // w:highlight — 형광펜 색상 (w:shd보다 우선)
991
+ const hlAttr =
992
+ rPr?.["w:highlight"]?.[0]?._attr ?? rPr?.highlight?.[0]?._attr ?? {};
993
+ const hlVal = hlAttr?.["w:val"] ?? hlAttr?.val;
994
+ const bgVal = (hlVal ? HIGHLIGHT_COLOR_MAP[hlVal] : undefined) ?? shdBg;
648
995
 
649
- // Superscript/subscript
996
+ // w:vertAlign — superscript / subscript
650
997
  const vertAlignVal =
651
998
  rPr?.["w:vertAlign"]?.[0]?._attr?.["w:val"] ??
652
999
  rPr?.["w:vertAlign"]?.[0]?._attr?.val;
653
1000
 
1001
+ // w:position — 글자 상하 이동 (half-point, 양수=위, 음수=아래)
1002
+ // vertAlign이 없을 때 보조 판단: ±4 half-pt(≈2pt) 이상이면 sup/sub
1003
+ const posAttr =
1004
+ rPr?.["w:position"]?.[0]?._attr ?? rPr?.position?.[0]?._attr ?? {};
1005
+ const posVal = Number(posAttr?.["w:val"] ?? posAttr?.val ?? 0);
1006
+ let isSup = vertAlignVal === "superscript";
1007
+ let isSub = vertAlignVal === "subscript";
1008
+ if (!isSup && !isSub && posVal !== 0) {
1009
+ if (posVal >= 4) isSup = true;
1010
+ else if (posVal <= -4) isSub = true;
1011
+ }
1012
+
654
1013
  // Check bold/italic/strike — val="0" means explicitly OFF
655
1014
  const bNode = rPr?.["w:b"]?.[0] ?? rPr?.b?.[0];
656
1015
  const isBold =
@@ -665,16 +1024,19 @@ function decodeRun(run: any, ctx: DecCtx): SpanNode {
665
1024
  sNode != null &&
666
1025
  (sNode?._attr?.["w:val"] ?? sNode?._attr?.val ?? "1") !== "0";
667
1026
 
1027
+ // Run-level properties: run wins, then fall back to paragraph style inheritance
668
1028
  const props: TextProps = {
669
- b: isBold || undefined,
670
- i: isItalic || undefined,
671
- u: underVal && underVal !== "none" ? true : undefined,
672
- s: isStrike || undefined,
673
- sup: vertAlignVal === "superscript" || undefined,
674
- sub: vertAlignVal === "subscript" || undefined,
675
- pt: szVal ? Metric.halfPtToPt(Number(szVal)) : undefined,
676
- color: safeHex(colorVal),
677
- font: fontName ? safeFont(fontName) : undefined,
1029
+ b: (bNode != null ? isBold : styleRpr?.b) || undefined,
1030
+ i: (iNode != null ? isItalic : styleRpr?.i) || undefined,
1031
+ u: (underVal ? underVal !== "none" : styleRpr?.u) || undefined,
1032
+ s: (sNode != null ? isStrike : styleRpr?.s) || undefined,
1033
+ sup: isSup || undefined,
1034
+ sub: isSub || undefined,
1035
+ pt: effectiveSzVal
1036
+ ? Metric.halfPtToPt(Number(effectiveSzVal))
1037
+ : styleRpr?.pt,
1038
+ color: safeHex(colorVal) ?? styleRpr?.color,
1039
+ font: fontName ? safeFont(fontName) : styleRpr?.font,
678
1040
  bg: bgVal,
679
1041
  };
680
1042
 
@@ -712,8 +1074,12 @@ function decodeRun(run: any, ctx: DecCtx): SpanNode {
712
1074
  /** Parse all 6 border sides from a w:tblBorders or w:tcBorders node */
713
1075
  function parseBorderDef(bdrNode: any): TblBorderDef {
714
1076
  const sides: [string, keyof TblBorderDef][] = [
715
- ["top", "top"], ["bottom", "bottom"], ["left", "left"], ["right", "right"],
716
- ["insideH", "insideH"], ["insideV", "insideV"],
1077
+ ["top", "top"],
1078
+ ["bottom", "bottom"],
1079
+ ["left", "left"],
1080
+ ["right", "right"],
1081
+ ["insideH", "insideH"],
1082
+ ["insideV", "insideV"],
717
1083
  ];
718
1084
  const result: TblBorderDef = {};
719
1085
  for (const [xml, prop] of sides) {
@@ -733,8 +1099,10 @@ function parseBorderDef(bdrNode: any): TblBorderDef {
733
1099
  /** Parse styles.xml and build a map of tblStyle defaults */
734
1100
  async function parseStylesMap(xml: string): Promise<StylesMap> {
735
1101
  const map: StylesMap = new Map();
1102
+ const trimmed = xml.trim();
1103
+ if (!trimmed) return map;
736
1104
  try {
737
- const obj: any = await XmlKit.parseStrict(xml);
1105
+ const obj: any = await XmlKit.parseStrict(trimmed);
738
1106
  const stylesRoot = obj?.["w:styles"]?.[0] ?? obj?.styles?.[0] ?? obj;
739
1107
  const styleArr = toArr(stylesRoot?.["w:style"] ?? stylesRoot?.style);
740
1108
  for (const style of styleArr) {
@@ -767,24 +1135,168 @@ async function parseStylesMap(xml: string): Promise<StylesMap> {
767
1135
  return map;
768
1136
  }
769
1137
 
1138
+ /** Parse styles.xml and build a map of paragraph/character style defaults */
1139
+ async function parseParaStyleMap(xml: string): Promise<ParaStyleMap> {
1140
+ const map: ParaStyleMap = new Map();
1141
+ const trimmed = xml.trim();
1142
+ if (!trimmed) return map;
1143
+ try {
1144
+ const obj: any = await XmlKit.parseStrict(trimmed);
1145
+ const stylesRoot = obj?.["w:styles"]?.[0] ?? obj?.styles?.[0] ?? obj;
1146
+ const styleArr = toArr(stylesRoot?.["w:style"] ?? stylesRoot?.style);
1147
+ for (const style of styleArr) {
1148
+ const attr = style?._attr ?? {};
1149
+ const type = attr?.["w:type"] ?? attr?.type;
1150
+ if (type !== "paragraph" && type !== "character") continue;
1151
+ const id = attr?.["w:styleId"] ?? attr?.styleId;
1152
+ if (!id) continue;
1153
+ const basedOn = (style?.["w:basedOn"]?.[0]?._attr ??
1154
+ style?.basedOn?.[0]?._attr)?.["w:val"];
1155
+
1156
+ const def: ParaStyleDef = { basedOn };
1157
+
1158
+ // rPr from run properties
1159
+ const rPr = style?.["w:rPr"]?.[0] ?? style?.rPr?.[0];
1160
+ if (rPr) {
1161
+ const szAttr = rPr?.["w:sz"]?.[0]?._attr ?? rPr?.sz?.[0]?._attr ?? {};
1162
+ const szVal = szAttr?.["w:val"] ?? szAttr?.val;
1163
+ const colorAttr =
1164
+ rPr?.["w:color"]?.[0]?._attr ?? rPr?.color?.[0]?._attr ?? {};
1165
+ const colorVal = colorAttr?.["w:val"] ?? colorAttr?.val;
1166
+ const fontAttr =
1167
+ rPr?.["w:rFonts"]?.[0]?._attr ?? rPr?.rFonts?.[0]?._attr ?? {};
1168
+ const fontName =
1169
+ fontAttr?.["w:ascii"] ??
1170
+ fontAttr?.ascii ??
1171
+ fontAttr?.["w:eastAsia"] ??
1172
+ fontAttr?.eastAsia;
1173
+ const bNode = rPr?.["w:b"]?.[0] ?? rPr?.b?.[0];
1174
+ const isBold =
1175
+ bNode != null &&
1176
+ (bNode?._attr?.["w:val"] ?? bNode?._attr?.val ?? "1") !== "0";
1177
+ const iNode = rPr?.["w:i"]?.[0] ?? rPr?.i?.[0];
1178
+ const isItalic =
1179
+ iNode != null &&
1180
+ (iNode?._attr?.["w:val"] ?? iNode?._attr?.val ?? "1") !== "0";
1181
+ const underVal =
1182
+ rPr?.["w:u"]?.[0]?._attr?.["w:val"] ?? rPr?.["w:u"]?.[0]?._attr?.val;
1183
+ const sNode = rPr?.["w:strike"]?.[0] ?? rPr?.strike?.[0];
1184
+ const isStrike =
1185
+ sNode != null &&
1186
+ (sNode?._attr?.["w:val"] ?? sNode?._attr?.val ?? "1") !== "0";
1187
+ def.rPr = {
1188
+ b: isBold || undefined,
1189
+ i: isItalic || undefined,
1190
+ u: underVal && underVal !== "none" ? true : undefined,
1191
+ s: isStrike || undefined,
1192
+ pt: szVal ? Metric.halfPtToPt(Number(szVal)) : undefined,
1193
+ color: safeHex(colorVal),
1194
+ font: fontName ? safeFont(fontName) : undefined,
1195
+ };
1196
+ }
1197
+
1198
+ // pPr from paragraph properties
1199
+ const pPr = style?.["w:pPr"]?.[0] ?? style?.pPr?.[0];
1200
+ if (pPr) {
1201
+ const spacingAttr =
1202
+ pPr?.["w:spacing"]?.[0]?._attr ?? pPr?.spacing?.[0]?._attr ?? {};
1203
+ const beforeVal = Number(
1204
+ spacingAttr?.["w:before"] ?? spacingAttr?.before ?? 0,
1205
+ );
1206
+ const afterVal = Number(
1207
+ spacingAttr?.["w:after"] ?? spacingAttr?.after ?? 0,
1208
+ );
1209
+ const lineVal = Number(
1210
+ spacingAttr?.["w:line"] ?? spacingAttr?.line ?? 0,
1211
+ );
1212
+ const lineRule =
1213
+ spacingAttr?.["w:lineRule"] ?? spacingAttr?.lineRule ?? "auto";
1214
+ const indAttr =
1215
+ pPr?.["w:ind"]?.[0]?._attr ?? pPr?.ind?.[0]?._attr ?? {};
1216
+ const leftVal = Number(indAttr?.["w:left"] ?? indAttr?.left ?? 0);
1217
+ const rightVal = Number(indAttr?.["w:right"] ?? indAttr?.right ?? 0);
1218
+ const firstLineVal = Number(
1219
+ indAttr?.["w:firstLine"] ?? indAttr?.firstLine ?? 0,
1220
+ );
1221
+ const hangingVal = Number(
1222
+ indAttr?.["w:hanging"] ?? indAttr?.hanging ?? 0,
1223
+ );
1224
+ const alignVal =
1225
+ pPr?.["w:jc"]?.[0]?._attr?.["w:val"] ??
1226
+ pPr?.["w:jc"]?.[0]?._attr?.val;
1227
+ def.pPr = {
1228
+ align: alignVal,
1229
+ spaceBefore: beforeVal > 0 ? Metric.dxaToPt(beforeVal) : undefined,
1230
+ spaceAfter: afterVal > 0 ? Metric.dxaToPt(afterVal) : undefined,
1231
+ lineHeight:
1232
+ lineVal > 0 && lineRule === "auto" ? lineVal / 240 : undefined,
1233
+ indentPt: leftVal > 0 ? Metric.dxaToPt(leftVal) : undefined,
1234
+ indentRightPt: rightVal > 0 ? Metric.dxaToPt(rightVal) : undefined,
1235
+ firstLineIndentPt:
1236
+ firstLineVal > 0
1237
+ ? Metric.dxaToPt(firstLineVal)
1238
+ : hangingVal > 0
1239
+ ? -Metric.dxaToPt(hangingVal)
1240
+ : undefined,
1241
+ };
1242
+ }
1243
+
1244
+ map.set(id, def);
1245
+ }
1246
+ } catch {
1247
+ /* non-fatal */
1248
+ }
1249
+ return map;
1250
+ }
1251
+
1252
+ /** Resolve paragraph style inheritance chain (max depth 8) */
1253
+ function resolveParaStyle(
1254
+ styleId: string | undefined,
1255
+ map: ParaStyleMap,
1256
+ ): ParaStyleDef {
1257
+ let merged: ParaStyleDef = {};
1258
+ const visited = new Set<string>();
1259
+ let cur = styleId;
1260
+ while (cur && !visited.has(cur)) {
1261
+ visited.add(cur);
1262
+ const def = map.get(cur);
1263
+ if (!def) break;
1264
+ // Merge: child values win over parent
1265
+ if (def.rPr) {
1266
+ merged.rPr = { ...def.rPr, ...merged.rPr };
1267
+ }
1268
+ if (def.pPr) {
1269
+ merged.pPr = { ...def.pPr, ...merged.pPr };
1270
+ }
1271
+ cur = def.basedOn;
1272
+ }
1273
+ return merged;
1274
+ }
1275
+
770
1276
  /** Resolve final CellProps borders using 3-level priority chain */
771
1277
  function resolveCellBorders(
772
1278
  cp: CellProps,
773
- ri: number, ci: number, rs: number, cs: number,
774
- rowCount: number, colCount: number,
1279
+ ri: number,
1280
+ ci: number,
1281
+ rs: number,
1282
+ cs: number,
1283
+ rowCount: number,
1284
+ colCount: number,
775
1285
  tblBdr: TblBorderDef,
776
1286
  ): CellProps {
777
- const isTopEdge = ri === 0;
1287
+ const isTopEdge = ri === 0;
778
1288
  const isBottomEdge = ri + rs >= rowCount;
779
- const isLeftEdge = ci === 0;
780
- const isRightEdge = ci + cs >= colCount;
1289
+ const isLeftEdge = ci === 0;
1290
+ const isRightEdge = ci + cs >= colCount;
781
1291
 
782
1292
  // Apply tblBorders only where no explicit tcBorder was set
783
1293
  const resolved: CellProps = { ...cp };
784
- if (!resolved.top) resolved.top = isTopEdge ? tblBdr.top : tblBdr.insideH;
785
- if (!resolved.bot) resolved.bot = isBottomEdge ? tblBdr.bottom : tblBdr.insideH;
786
- if (!resolved.left) resolved.left = isLeftEdge ? tblBdr.left : tblBdr.insideV;
787
- if (!resolved.right) resolved.right = isRightEdge ? tblBdr.right : tblBdr.insideV;
1294
+ if (!resolved.top) resolved.top = isTopEdge ? tblBdr.top : tblBdr.insideH;
1295
+ if (!resolved.bot)
1296
+ resolved.bot = isBottomEdge ? tblBdr.bottom : tblBdr.insideH;
1297
+ if (!resolved.left) resolved.left = isLeftEdge ? tblBdr.left : tblBdr.insideV;
1298
+ if (!resolved.right)
1299
+ resolved.right = isRightEdge ? tblBdr.right : tblBdr.insideV;
788
1300
  return resolved;
789
1301
  }
790
1302
 
@@ -810,7 +1322,8 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
810
1322
  };
811
1323
 
812
1324
  // ① tblStyle 기본값 로드
813
- const tblStyleId = (tblPr?.["w:tblStyle"]?.[0]?._attr ?? tblPr?.tblStyle?.[0]?._attr)?.["w:val"];
1325
+ const tblStyleId = (tblPr?.["w:tblStyle"]?.[0]?._attr ??
1326
+ tblPr?.tblStyle?.[0]?._attr)?.["w:val"];
814
1327
  const styleDef = tblStyleId ? ctx.stylesMap.get(tblStyleId) : undefined;
815
1328
  let tblBdr: TblBorderDef = styleDef?.tblBorders ?? {};
816
1329
 
@@ -900,7 +1413,8 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
900
1413
 
901
1414
  // Row height from w:trHeight
902
1415
  let rowHeightPt: number | undefined;
903
- const trHAttr = trPr?.["w:trHeight"]?.[0]?._attr ?? trPr?.trHeight?.[0]?._attr;
1416
+ const trHAttr =
1417
+ trPr?.["w:trHeight"]?.[0]?._attr ?? trPr?.trHeight?.[0]?._attr;
904
1418
  if (trHAttr) {
905
1419
  const hDxa = Number(trHAttr?.["w:val"] ?? trHAttr?.val ?? 0);
906
1420
  if (hDxa > 0) rowHeightPt = Metric.dxaToPt(hDxa);
@@ -925,7 +1439,10 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
925
1439
 
926
1440
  if (tcBordersNode) {
927
1441
  const dirs: Array<[string, "top" | "bot" | "left" | "right"]> = [
928
- ["top", "top"], ["bottom", "bot"], ["left", "left"], ["right", "right"],
1442
+ ["top", "top"],
1443
+ ["bottom", "bot"],
1444
+ ["left", "left"],
1445
+ ["right", "right"],
929
1446
  ];
930
1447
  for (const [xmlTag, propKey] of dirs) {
931
1448
  const bdr =
@@ -951,22 +1468,55 @@ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
951
1468
  const vaVal = vaAttr?.["w:val"] ?? vaAttr?.val;
952
1469
  if (vaVal) {
953
1470
  const vaMap: Record<string, "top" | "mid" | "bot"> = {
954
- top: "top", center: "mid", bottom: "bot",
1471
+ top: "top",
1472
+ center: "mid",
1473
+ bottom: "bot",
955
1474
  };
956
1475
  cp.va = vaMap[vaVal];
957
1476
  }
958
1477
 
1478
+ // Cell margins (padding)
1479
+ const tcMar = tcPr?.["w:tcMar"]?.[0] ?? tcPr?.tcMar?.[0];
1480
+ if (tcMar) {
1481
+ const top = tcMar?.["w:top"]?.[0]?._attr ?? tcMar?.top?.[0]?._attr;
1482
+ const bot =
1483
+ tcMar?.["w:bottom"]?.[0]?._attr ?? tcMar?.bottom?.[0]?._attr;
1484
+ const left = tcMar?.["w:left"]?.[0]?._attr ?? tcMar?.left?.[0]?._attr;
1485
+ const right =
1486
+ tcMar?.["w:right"]?.[0]?._attr ?? tcMar?.right?.[0]?._attr;
1487
+
1488
+ if (top) cp.padT = Metric.dxaToPt(Number(top?.["w:w"] ?? top?.w ?? 0));
1489
+ if (bot) cp.padB = Metric.dxaToPt(Number(bot?.["w:w"] ?? bot?.w ?? 0));
1490
+ if (left)
1491
+ cp.padL = Metric.dxaToPt(Number(left?.["w:w"] ?? left?.w ?? 0));
1492
+ if (right)
1493
+ cp.padR = Metric.dxaToPt(Number(right?.["w:w"] ?? right?.w ?? 0));
1494
+ }
1495
+
959
1496
  const rs = rsMap.get(`${ri},${ci}`) ?? 1;
960
1497
 
961
1498
  // Compute logical column index for this cell
962
1499
  let gridColIdx = 0;
963
1500
  for (let prevCi = 0; prevCi < ci; prevCi++) {
964
- if (!rawRow[prevCi].vMergeContinue) gridColIdx += rawRow[prevCi].gridSpan;
1501
+ if (!rawRow[prevCi].vMergeContinue)
1502
+ gridColIdx += rawRow[prevCi].gridSpan;
965
1503
  }
966
1504
 
967
1505
  // Apply 3-level border resolution (tblStyle → tblBorders → tcBorders already in cp)
968
- const colCount = gridProps.colWidths?.length ?? rawGrid[0]?.reduce((s, c) => s + c.gridSpan, 0) ?? 1;
969
- const resolvedCp = resolveCellBorders(cp, ri, gridColIdx, rs, rc.gridSpan, rawGrid.length, colCount, tblBdr);
1506
+ const colCount =
1507
+ gridProps.colWidths?.length ??
1508
+ rawGrid[0]?.reduce((s, c) => s + c.gridSpan, 0) ??
1509
+ 1;
1510
+ const resolvedCp = resolveCellBorders(
1511
+ cp,
1512
+ ri,
1513
+ gridColIdx,
1514
+ rs,
1515
+ rc.gridSpan,
1516
+ rawGrid.length,
1517
+ colCount,
1518
+ tblBdr,
1519
+ );
970
1520
 
971
1521
  const paras = toArr(cell?.["w:p"] ?? cell?.p).map((p: any) =>
972
1522
  decodePara(p, ctx),
@@ -1045,36 +1595,39 @@ registry.registerDecoder(new DocxDecoder());
1045
1595
 
1046
1596
  function extractAnchorLayout(anchor: any): ImgLayout {
1047
1597
  const attr = anchor?._attr ?? {};
1048
- const behindDoc = attr.behindDoc === '1';
1598
+ const behindDoc = attr.behindDoc === "1";
1049
1599
 
1050
1600
  // 텍스트 감싸기 타입
1051
- let wrap: ImgWrap = 'square';
1052
- if (anchor?.['wp:wrapNone']?.[0] != null) wrap = behindDoc ? 'behind' : 'none';
1053
- else if (anchor?.['wp:wrapTight']?.[0] != null) wrap = 'tight';
1054
- else if (anchor?.['wp:wrapThrough']?.[0] != null) wrap = 'through';
1055
- else if (anchor?.['wp:wrapSquare']?.[0] != null) wrap = 'square';
1056
- else if (anchor?.['wp:wrapTopAndBottom']?.[0] != null) wrap = 'square';
1057
- else if (anchor?.['wp:wrapBehind']?.[0] != null || behindDoc) wrap = 'behind';
1601
+ let wrap: ImgWrap = "square";
1602
+ if (anchor?.["wp:wrapNone"]?.[0] != null)
1603
+ wrap = behindDoc ? "behind" : "none";
1604
+ else if (anchor?.["wp:wrapTight"]?.[0] != null) wrap = "tight";
1605
+ else if (anchor?.["wp:wrapThrough"]?.[0] != null) wrap = "through";
1606
+ else if (anchor?.["wp:wrapSquare"]?.[0] != null) wrap = "square";
1607
+ else if (anchor?.["wp:wrapTopAndBottom"]?.[0] != null) wrap = "square";
1608
+ else if (anchor?.["wp:wrapBehind"]?.[0] != null || behindDoc) wrap = "behind";
1058
1609
 
1059
1610
  // 가로 위치
1060
- const posH = anchor?.['wp:positionH']?.[0];
1611
+ const posH = anchor?.["wp:positionH"]?.[0];
1061
1612
  const horzRelTo = parseHorzRelTo(posH?._attr?.relativeFrom);
1062
- const horzAlignTxt = posH?.['wp:align']?.[0]?._text;
1063
- const horzOffsetTxt = posH?.['wp:posOffset']?.[0]?._text;
1613
+ const horzAlignTxt = posH?.["wp:align"]?.[0]?._text;
1614
+ const horzOffsetTxt = posH?.["wp:posOffset"]?.[0]?._text;
1064
1615
  const horzAlign = horzAlignTxt ? parseHorzAlign(horzAlignTxt) : undefined;
1065
- const xPt = horzOffsetTxt && !horzAlignTxt
1066
- ? Metric.emuToPt(Number(horzOffsetTxt))
1067
- : undefined;
1616
+ const xPt =
1617
+ horzOffsetTxt && !horzAlignTxt
1618
+ ? Metric.emuToPt(Number(horzOffsetTxt))
1619
+ : undefined;
1068
1620
 
1069
1621
  // 세로 위치
1070
- const posV = anchor?.['wp:positionV']?.[0];
1622
+ const posV = anchor?.["wp:positionV"]?.[0];
1071
1623
  const vertRelTo = parseVertRelTo(posV?._attr?.relativeFrom);
1072
- const vertAlignTxt = posV?.['wp:align']?.[0]?._text;
1073
- const vertOffsetTxt = posV?.['wp:posOffset']?.[0]?._text;
1624
+ const vertAlignTxt = posV?.["wp:align"]?.[0]?._text;
1625
+ const vertOffsetTxt = posV?.["wp:posOffset"]?.[0]?._text;
1074
1626
  const vertAlign = vertAlignTxt ? parseVertAlign(vertAlignTxt) : undefined;
1075
- const yPt = vertOffsetTxt && !vertAlignTxt
1076
- ? Metric.emuToPt(Number(vertOffsetTxt))
1077
- : undefined;
1627
+ const yPt =
1628
+ vertOffsetTxt && !vertAlignTxt
1629
+ ? Metric.emuToPt(Number(vertOffsetTxt))
1630
+ : undefined;
1078
1631
 
1079
1632
  // 텍스트와의 거리
1080
1633
  const distT = attr.distT ? Metric.emuToPt(Number(attr.distT)) : undefined;
@@ -1083,29 +1636,68 @@ function extractAnchorLayout(anchor: any): ImgLayout {
1083
1636
  const distR = attr.distR ? Metric.emuToPt(Number(attr.distR)) : undefined;
1084
1637
  const zOrder = attr.relativeHeight ? Number(attr.relativeHeight) : undefined;
1085
1638
 
1086
- return { wrap, horzAlign, vertAlign, horzRelTo, vertRelTo, xPt, yPt, distT, distB, distL, distR, behindDoc, zOrder };
1639
+ return {
1640
+ wrap,
1641
+ horzAlign,
1642
+ vertAlign,
1643
+ horzRelTo,
1644
+ vertRelTo,
1645
+ xPt,
1646
+ yPt,
1647
+ distT,
1648
+ distB,
1649
+ distL,
1650
+ distR,
1651
+ behindDoc,
1652
+ zOrder,
1653
+ };
1087
1654
  }
1088
1655
 
1089
1656
  const HORZ_RELTO_MAP: Record<string, ImgHorzRelTo> = {
1090
- margin: 'margin', leftMargin: 'margin', rightMargin: 'margin',
1091
- insideMargin: 'margin', outsideMargin: 'margin',
1092
- column: 'column', page: 'page', character: 'para', paragraph: 'para',
1657
+ margin: "margin",
1658
+ leftMargin: "margin",
1659
+ rightMargin: "margin",
1660
+ insideMargin: "margin",
1661
+ outsideMargin: "margin",
1662
+ column: "column",
1663
+ page: "page",
1664
+ character: "para",
1665
+ paragraph: "para",
1093
1666
  };
1094
1667
  const VERT_RELTO_MAP: Record<string, ImgVertRelTo> = {
1095
- margin: 'margin', topMargin: 'margin', bottomMargin: 'margin',
1096
- insideMargin: 'margin', outsideMargin: 'margin',
1097
- line: 'line', page: 'page', paragraph: 'para',
1668
+ margin: "margin",
1669
+ topMargin: "margin",
1670
+ bottomMargin: "margin",
1671
+ insideMargin: "margin",
1672
+ outsideMargin: "margin",
1673
+ line: "line",
1674
+ page: "page",
1675
+ paragraph: "para",
1098
1676
  };
1099
1677
  const HORZ_ALIGN_MAP: Record<string, ImgHorzAlign> = {
1100
- left: 'left', center: 'center', right: 'right',
1101
- inside: 'left', outside: 'right',
1678
+ left: "left",
1679
+ center: "center",
1680
+ right: "right",
1681
+ inside: "left",
1682
+ outside: "right",
1102
1683
  };
1103
1684
  const VERT_ALIGN_MAP: Record<string, ImgVertAlign> = {
1104
- top: 'top', center: 'center', bottom: 'bottom',
1105
- inside: 'top', outside: 'bottom',
1685
+ top: "top",
1686
+ center: "center",
1687
+ bottom: "bottom",
1688
+ inside: "top",
1689
+ outside: "bottom",
1106
1690
  };
1107
1691
 
1108
- function parseHorzRelTo(v?: string): ImgHorzRelTo { return HORZ_RELTO_MAP[v ?? ''] ?? 'column'; }
1109
- function parseVertRelTo(v?: string): ImgVertRelTo { return VERT_RELTO_MAP[v ?? ''] ?? 'para'; }
1110
- function parseHorzAlign(v?: string): ImgHorzAlign | undefined { return HORZ_ALIGN_MAP[v ?? '']; }
1111
- function parseVertAlign(v?: string): ImgVertAlign | undefined { return VERT_ALIGN_MAP[v ?? '']; }
1692
+ function parseHorzRelTo(v?: string): ImgHorzRelTo {
1693
+ return HORZ_RELTO_MAP[v ?? ""] ?? "column";
1694
+ }
1695
+ function parseVertRelTo(v?: string): ImgVertRelTo {
1696
+ return VERT_RELTO_MAP[v ?? ""] ?? "para";
1697
+ }
1698
+ function parseHorzAlign(v?: string): ImgHorzAlign | undefined {
1699
+ return HORZ_ALIGN_MAP[v ?? ""];
1700
+ }
1701
+ function parseVertAlign(v?: string): ImgVertAlign | undefined {
1702
+ return VERT_ALIGN_MAP[v ?? ""];
1703
+ }