hwpkit-dev 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,986 @@
1
+ import type { Decoder } from "../../contract/decoder";
2
+ import type {
3
+ DocRoot,
4
+ ContentNode,
5
+ ParaNode,
6
+ SpanNode,
7
+ GridNode,
8
+ ImgNode,
9
+ PageNumNode,
10
+ CellNode,
11
+ } from "../../model/doc-tree";
12
+ import type { Outcome } from "../../contract/result";
13
+ import type {
14
+ DocMeta,
15
+ PageDims,
16
+ TextProps,
17
+ ParaProps,
18
+ CellProps,
19
+ GridProps,
20
+ TableLook,
21
+ ImgLayout,
22
+ ImgHorzAlign,
23
+ ImgVertAlign,
24
+ ImgHorzRelTo,
25
+ ImgVertRelTo,
26
+ ImgWrap,
27
+ } from "../../model/doc-props";
28
+ import { A4 } from "../../model/doc-props";
29
+ import { succeed, fail } from "../../contract/result";
30
+ import {
31
+ buildRoot,
32
+ buildSheet,
33
+ buildPara,
34
+ buildSpan,
35
+ buildImg,
36
+ buildGrid,
37
+ buildRow,
38
+ buildCell,
39
+ buildPb,
40
+ } from "../../model/builders";
41
+ import { ShieldedParser } from "../../safety/ShieldedParser";
42
+ import {
43
+ Metric,
44
+ safeAlign,
45
+ safeFont,
46
+ safeHex,
47
+ safeStrokeDocx,
48
+ } from "../../safety/StyleBridge";
49
+ import { ArchiveKit } from "../../toolkit/ArchiveKit";
50
+ import { XmlKit } from "../../toolkit/XmlKit";
51
+ import { TextKit } from "../../toolkit/TextKit";
52
+ import { registry } from "../../pipeline/registry";
53
+
54
+ export class DocxDecoder implements Decoder {
55
+ readonly format = "docx";
56
+
57
+ async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
58
+ const shield = new ShieldedParser();
59
+ const warns: string[] = [];
60
+
61
+ try {
62
+ const files = await ArchiveKit.unzip(data);
63
+
64
+ const docXml = files.get("word/document.xml");
65
+ if (!docXml) return fail("DOCX: word/document.xml not found");
66
+
67
+ const relsXml = files.get("word/_rels/document.xml.rels");
68
+ const relsMap = relsXml
69
+ ? await parseRels(TextKit.decode(relsXml))
70
+ : new Map<string, string>();
71
+
72
+ const coreXml = files.get("docProps/core.xml");
73
+ let meta: DocMeta = {};
74
+ if (coreXml) {
75
+ try {
76
+ meta = await parseCoreProps(TextKit.decode(coreXml));
77
+ } catch {
78
+ // ignore — meta is optional
79
+ }
80
+ }
81
+
82
+ // Parse numbering.xml for list support
83
+ const numXml = files.get("word/numbering.xml");
84
+ let numMap: NumMap = new Map();
85
+ if (numXml) {
86
+ try {
87
+ numMap = await parseNumbering(TextKit.decode(numXml));
88
+ } catch {
89
+ /* non-fatal */
90
+ }
91
+ }
92
+
93
+ const docStr = TextKit.decode(docXml);
94
+ const docObj: any = await XmlKit.parseStrict(docStr);
95
+
96
+ const body = getBody(docObj);
97
+ const dims = extractDims(body) ?? { ...A4 };
98
+ const elements = getBodyElements(body);
99
+
100
+ const decCtx: DecCtx = { relsMap, files, shield, numMap, warns };
101
+
102
+ const kids: ContentNode[] = [];
103
+ for (const el of elements) {
104
+ const node = shield.guard(
105
+ () => decodeElement(el, decCtx),
106
+ buildPara([buildSpan("[요소 파싱 실패]")]),
107
+ "docx:bodyElement",
108
+ );
109
+ kids.push(node);
110
+
111
+ // Inline sectPr in pPr = section break → insert page-break paragraph after
112
+ if (el.type === 'para') {
113
+ const pPr = el.node?.["w:pPr"]?.[0] ?? el.node?.pPr?.[0] ?? {};
114
+ const inlineSectPr = pPr?.["w:sectPr"]?.[0] ?? pPr?.sectPr?.[0];
115
+ if (inlineSectPr) {
116
+ const typeAttr = inlineSectPr?.["w:type"]?.[0]?._attr;
117
+ const sectType = typeAttr?.["w:val"] ?? typeAttr?.val ?? 'nextPage';
118
+ if (sectType !== 'continuous') {
119
+ kids.push(buildPara([{ tag: 'span', props: {}, kids: [buildPb()] }]));
120
+ }
121
+ }
122
+ }
123
+ }
124
+
125
+ // Decode header/footer
126
+ const headerParas = await decodeHeaderFooter(
127
+ "header",
128
+ body,
129
+ relsMap,
130
+ files,
131
+ decCtx,
132
+ );
133
+ const footerParas = await decodeHeaderFooter(
134
+ "footer",
135
+ body,
136
+ relsMap,
137
+ files,
138
+ decCtx,
139
+ );
140
+
141
+ warns.push(...shield.flush());
142
+ const sheet = buildSheet(kids.filter(Boolean) as ContentNode[], dims, {
143
+ header: headerParas,
144
+ footer: footerParas,
145
+ });
146
+ return succeed(buildRoot(meta, [sheet]), warns);
147
+ } catch (e: any) {
148
+ warns.push(...shield.flush());
149
+ return fail(`DOCX decode error: ${e?.message ?? String(e)}`, warns);
150
+ }
151
+ }
152
+ }
153
+
154
+ // ─── types ─────────────────────────────────────────────────
155
+
156
+ interface DecCtx {
157
+ relsMap: Map<string, string>;
158
+ files: Map<string, Uint8Array>;
159
+ shield: ShieldedParser;
160
+ numMap: NumMap;
161
+ warns: string[];
162
+ }
163
+
164
+ // numId → { abstractNumId, levels: Map<ilvl, { fmt, isOrdered }> }
165
+ type NumMap = Map<
166
+ number,
167
+ { levels: Map<number, { fmt: string; isOrdered: boolean }> }
168
+ >;
169
+
170
+ // ─── helpers ────────────────────────────────────────────────
171
+
172
+ function toArr(v: any): any[] {
173
+ return v == null ? [] : Array.isArray(v) ? v : [v];
174
+ }
175
+
176
+ /** Resolve DOCX relative paths. e.g. ("word", "../media/image1.png") → "word/media/image1.png" */
177
+ function resolveDocxPath(baseDir: string, target: string): string {
178
+ if (target.startsWith("/")) return target.slice(1);
179
+ const parts = (baseDir + "/" + target).split("/");
180
+ const stack: string[] = [];
181
+ for (const p of parts) {
182
+ if (p === "..") {
183
+ stack.pop();
184
+ } else if (p !== ".") {
185
+ stack.push(p);
186
+ }
187
+ }
188
+ return stack.join("/");
189
+ }
190
+
191
+ async function parseRels(xml: string): Promise<Map<string, string>> {
192
+ const map = new Map<string, string>();
193
+ try {
194
+ const obj: any = await XmlKit.parseStrict(xml);
195
+ for (const rel of toArr(obj?.Relationships?.[0]?.Relationship)) {
196
+ const a = rel?._attr ?? {};
197
+ if (a.Id && a.Target) map.set(a.Id, a.Target);
198
+ }
199
+ } catch {
200
+ /* ignore */
201
+ }
202
+ return map;
203
+ }
204
+
205
+ async function parseCoreProps(xml: string): Promise<DocMeta> {
206
+ try {
207
+ const obj: any = await XmlKit.parseStrict(xml);
208
+ const c = obj?.["cp:coreProperties"]?.[0] ?? obj?.coreProperties?.[0] ?? {};
209
+ return {
210
+ title: c?.["dc:title"]?.[0]?._text ?? undefined,
211
+ author: c?.["dc:creator"]?.[0]?._text ?? undefined,
212
+ subject: c?.["dc:subject"]?.[0]?._text ?? undefined,
213
+ created: c?.["dcterms:created"]?.[0]?._text ?? undefined,
214
+ modified: c?.["dcterms:modified"]?.[0]?._text ?? undefined,
215
+ };
216
+ } catch {
217
+ return {};
218
+ }
219
+ }
220
+
221
+ async function parseNumbering(xml: string): Promise<NumMap> {
222
+ const map: NumMap = new Map();
223
+ try {
224
+ const obj: any = await XmlKit.parseStrict(xml);
225
+ const root = obj?.["w:numbering"]?.[0] ?? obj?.numbering?.[0] ?? obj;
226
+
227
+ // Parse abstractNums
228
+ const absMap = new Map<
229
+ number,
230
+ Map<number, { fmt: string; isOrdered: boolean }>
231
+ >();
232
+ for (const abs of toArr(root?.["w:abstractNum"] ?? root?.abstractNum)) {
233
+ const absId = Number(
234
+ abs?._attr?.["w:abstractNumId"] ?? abs?._attr?.abstractNumId ?? 0,
235
+ );
236
+ const levels = new Map<number, { fmt: string; isOrdered: boolean }>();
237
+ for (const lvl of toArr(abs?.["w:lvl"] ?? abs?.lvl)) {
238
+ const ilvl = Number(lvl?._attr?.["w:ilvl"] ?? lvl?._attr?.ilvl ?? 0);
239
+ const fmtNode =
240
+ lvl?.["w:numFmt"]?.[0]?._attr ?? lvl?.numFmt?.[0]?._attr ?? {};
241
+ const fmt = fmtNode?.["w:val"] ?? fmtNode?.val ?? "decimal";
242
+ levels.set(ilvl, { fmt, isOrdered: fmt !== "bullet" });
243
+ }
244
+ absMap.set(absId, levels);
245
+ }
246
+
247
+ // Parse nums
248
+ for (const num of toArr(root?.["w:num"] ?? root?.num)) {
249
+ const numId = Number(num?._attr?.["w:numId"] ?? num?._attr?.numId ?? 0);
250
+ const absRef =
251
+ num?.["w:abstractNumId"]?.[0]?._attr ??
252
+ num?.abstractNumId?.[0]?._attr ??
253
+ {};
254
+ const absId = Number(absRef?.["w:val"] ?? absRef?.val ?? 0);
255
+ const levels = absMap.get(absId) ?? new Map();
256
+ map.set(numId, { levels });
257
+ }
258
+ } catch {
259
+ /* non-fatal */
260
+ }
261
+ return map;
262
+ }
263
+
264
+ function getBody(obj: any): any {
265
+ return (
266
+ obj?.["w:document"]?.[0]?.["w:body"]?.[0] ??
267
+ obj?.document?.[0]?.body?.[0] ??
268
+ obj
269
+ );
270
+ }
271
+
272
+ function extractDims(body: any): PageDims | null {
273
+ try {
274
+ const sp = body?.["w:sectPr"]?.[0] ?? body?.sectPr?.[0];
275
+ if (!sp) return null;
276
+ const sz = sp?.["w:pgSz"]?.[0]?._attr ?? sp?.pgSz?.[0]?._attr;
277
+ const mar = sp?.["w:pgMar"]?.[0]?._attr ?? sp?.pgMar?.[0]?._attr;
278
+ if (!sz) return null;
279
+ return {
280
+ wPt: Metric.dxaToPt(Number(sz["w:w"] ?? sz.w ?? 11906)),
281
+ hPt: Metric.dxaToPt(Number(sz["w:h"] ?? sz.h ?? 16838)),
282
+ mt: Metric.dxaToPt(Number(mar?.["w:top"] ?? mar?.top ?? 1440)),
283
+ mb: Metric.dxaToPt(Number(mar?.["w:bottom"] ?? mar?.bottom ?? 1440)),
284
+ ml: Metric.dxaToPt(Number(mar?.["w:left"] ?? mar?.left ?? 1800)),
285
+ mr: Metric.dxaToPt(Number(mar?.["w:right"] ?? mar?.right ?? 1800)),
286
+ orient:
287
+ (sz["w:orient"] ?? sz.orient) === "landscape"
288
+ ? "landscape"
289
+ : "portrait",
290
+ };
291
+ } catch {
292
+ return null;
293
+ }
294
+ }
295
+
296
+ function getBodyElements(body: any): { type: string; node: any }[] {
297
+ const paras = toArr(body?.["w:p"] ?? body?.p);
298
+ const tables = toArr(body?.["w:tbl"] ?? body?.tbl);
299
+
300
+ if (tables.length === 0)
301
+ return paras.map((n: any) => ({ type: "para", node: n }));
302
+ if (paras.length === 0)
303
+ return tables.map((n: any) => ({ type: "table", node: n }));
304
+
305
+ // Use _childOrder from XmlKit to preserve document order
306
+ const childOrder = body?.["_childOrder"] as string[] | undefined;
307
+ if (Array.isArray(childOrder)) {
308
+ const items: { type: string; node: any }[] = [];
309
+ let pi = 0,
310
+ ti = 0;
311
+ for (const tag of childOrder) {
312
+ if ((tag === "w:p" || tag === "p") && pi < paras.length) {
313
+ items.push({ type: "para", node: paras[pi++] });
314
+ } else if ((tag === "w:tbl" || tag === "tbl") && ti < tables.length) {
315
+ items.push({ type: "table", node: tables[ti++] });
316
+ }
317
+ }
318
+ while (pi < paras.length) items.push({ type: "para", node: paras[pi++] });
319
+ while (ti < tables.length)
320
+ items.push({ type: "table", node: tables[ti++] });
321
+ return items;
322
+ }
323
+
324
+ // Fallback: paragraphs first, then tables
325
+ return [
326
+ ...paras.map((n: any) => ({ type: "para", node: n })),
327
+ ...tables.map((n: any) => ({ type: "table", node: n })),
328
+ ];
329
+ }
330
+
331
+ // ─── Header/Footer decoding ────────────────────────────────
332
+
333
+ async function decodeHeaderFooter(
334
+ kind: "header" | "footer",
335
+ body: any,
336
+ relsMap: Map<string, string>,
337
+ files: Map<string, Uint8Array>,
338
+ ctx: DecCtx,
339
+ ): Promise<ParaNode[] | undefined> {
340
+ try {
341
+ const sp = body?.["w:sectPr"]?.[0] ?? body?.sectPr?.[0];
342
+ if (!sp) return undefined;
343
+
344
+ const refTag =
345
+ kind === "header" ? "w:headerReference" : "w:footerReference";
346
+ const refs = toArr(sp?.[refTag] ?? sp?.[refTag.replace("w:", "")]);
347
+ if (refs.length === 0) return undefined;
348
+
349
+ const rId =
350
+ refs[0]?._attr?.["r:id"] ??
351
+ refs[0]?._attr?.["r:Id"] ??
352
+ refs[0]?._attr?.id;
353
+ if (!rId) return undefined;
354
+
355
+ const target = relsMap.get(rId);
356
+ if (!target) return undefined;
357
+
358
+ const filePath = resolveDocxPath("word", target);
359
+ const fileData = files.get(filePath);
360
+ if (!fileData) return undefined;
361
+
362
+ const xmlStr = TextKit.decode(fileData);
363
+ const obj: any = await XmlKit.parseStrict(xmlStr);
364
+
365
+ const rootTag = kind === "header" ? "w:hdr" : "w:ftr";
366
+ const root =
367
+ obj?.[rootTag]?.[0] ?? obj?.[rootTag.replace("w:", "")]?.[0] ?? obj;
368
+
369
+ const paras = toArr(root?.["w:p"] ?? root?.p);
370
+ if (paras.length === 0) return undefined;
371
+
372
+ return paras.map((p: any) => decodePara(p, ctx));
373
+ } catch {
374
+ return undefined;
375
+ }
376
+ }
377
+
378
+ // ─── Element decoding ──────────────────────────────────────
379
+
380
+ //만약에 drawing 태그가 안에 있으면 true 반환
381
+ function hasDrawingDeep(node: any): boolean {
382
+ if (!node || typeof node !== "object") return false;
383
+
384
+ if (node["w:drawing"] || node["w:pict"]) return true;
385
+
386
+ return Object.values(node).some((v) => {
387
+ if (Array.isArray(v)) return v.some(hasDrawingDeep);
388
+ return hasDrawingDeep(v);
389
+ });
390
+ }
391
+
392
+ function decodeElement(
393
+ el: { type: string; node: any },
394
+ ctx: DecCtx,
395
+ ): ContentNode {
396
+ if (el.type === "table") {
397
+ const { value } = ctx.shield.guardGrid(
398
+ el.node,
399
+ (n) => decodeGrid(n as any, ctx),
400
+ (n) => decodeGridSimple(n as any),
401
+ (n) => decodeGridFlat(n as any),
402
+ (n) => decodeGridText(n as any) as unknown as GridNode,
403
+ "docx:table",
404
+ );
405
+ return value;
406
+ }
407
+ return decodePara(el.node, ctx);
408
+ }
409
+
410
+ function decodePara(p: any, ctx: DecCtx): ParaNode {
411
+ const pPr = p?.["w:pPr"]?.[0] ?? {};
412
+ const alignVal =
413
+ pPr?.["w:jc"]?.[0]?._attr?.["w:val"] ?? pPr?.["w:jc"]?.[0]?._attr?.val;
414
+ const headStyle =
415
+ pPr?.["w:pStyle"]?.[0]?._attr?.["w:val"] ??
416
+ pPr?.["w:pStyle"]?.[0]?._attr?.val ??
417
+ "";
418
+
419
+ const props: ParaProps = {
420
+ align: safeAlign(alignVal),
421
+ heading: parseHeading(headStyle),
422
+ };
423
+
424
+ // Spacing (before/after/line height)
425
+ const spacingAttr =
426
+ pPr?.["w:spacing"]?.[0]?._attr ?? pPr?.spacing?.[0]?._attr ?? {};
427
+ const beforeVal = Number(
428
+ spacingAttr?.["w:before"] ?? spacingAttr?.before ?? 0,
429
+ );
430
+ const afterVal = Number(spacingAttr?.["w:after"] ?? spacingAttr?.after ?? 0);
431
+ const lineVal = Number(spacingAttr?.["w:line"] ?? spacingAttr?.line ?? 0);
432
+ const lineRule =
433
+ spacingAttr?.["w:lineRule"] ?? spacingAttr?.lineRule ?? "auto";
434
+ if (beforeVal > 0) props.spaceBefore = Metric.dxaToPt(beforeVal);
435
+ if (afterVal > 0) props.spaceAfter = Metric.dxaToPt(afterVal);
436
+ if (lineVal > 0 && lineRule === "auto") props.lineHeight = lineVal / 240;
437
+
438
+ // Indentation
439
+ const indAttr = pPr?.["w:ind"]?.[0]?._attr ?? pPr?.ind?.[0]?._attr ?? {};
440
+ const leftVal = Number(indAttr?.["w:left"] ?? indAttr?.left ?? 0);
441
+ if (leftVal > 0) props.indentPt = Metric.dxaToPt(leftVal);
442
+
443
+ // List/numbering
444
+ const numPr = pPr?.["w:numPr"]?.[0] ?? pPr?.numPr?.[0];
445
+ if (numPr) {
446
+ const ilvlNode =
447
+ numPr?.["w:ilvl"]?.[0]?._attr ?? numPr?.ilvl?.[0]?._attr ?? {};
448
+ const numIdNode =
449
+ numPr?.["w:numId"]?.[0]?._attr ?? numPr?.numId?.[0]?._attr ?? {};
450
+ const ilvl = Number(ilvlNode?.["w:val"] ?? ilvlNode?.val ?? 0);
451
+ const numId = Number(numIdNode?.["w:val"] ?? numIdNode?.val ?? 0);
452
+
453
+ props.listLv = ilvl;
454
+ const numEntry = ctx.numMap.get(numId);
455
+ if (numEntry) {
456
+ const lvlInfo = numEntry.levels.get(ilvl) ?? numEntry.levels.get(0);
457
+ props.listOrd = lvlInfo?.isOrdered ?? false;
458
+ } else {
459
+ // Fallback: numId=1 is typically bullet, numId=2 is numbered
460
+ props.listOrd = numId >= 2;
461
+ }
462
+ }
463
+
464
+ // pageBreakBefore: paragraph always starts on a new page
465
+ const pbBeforeNode = pPr?.["w:pageBreakBefore"]?.[0] ?? pPr?.pageBreakBefore?.[0];
466
+ const hasPageBreakBefore = pbBeforeNode != null &&
467
+ (pbBeforeNode?._attr?.["w:val"] ?? pbBeforeNode?._attr?.val ?? "1") !== "0";
468
+
469
+ const runs = toArr(p?.["w:r"] ?? p?.r);
470
+
471
+ // 3/28 이미지 태크를 찾을수 있기 때문에 별도 함수 구현
472
+ const kids: (SpanNode | ImgNode)[] = ctx.shield.guardAll(
473
+ runs,
474
+ (run: any) =>
475
+ hasDrawingDeep(run) ? decodeRunOrImage(run, ctx) : decodeRun(run, ctx),
476
+ () => buildSpan(""),
477
+ "docx:run",
478
+ );
479
+
480
+ const filteredKids = kids.filter(Boolean) as ParaNode["kids"];
481
+
482
+ // Prepend pb span when pageBreakBefore is set
483
+ if (hasPageBreakBefore) {
484
+ filteredKids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
485
+ }
486
+
487
+ return buildPara(filteredKids, props);
488
+ }
489
+
490
+ // 3/28 코드 수정
491
+ function decodeRunOrImage(run: any, ctx: DecCtx): SpanNode | ImgNode {
492
+ function findFirstDrawing(node: any): any | null {
493
+ if (!node || typeof node !== "object") return null;
494
+
495
+ if (node["w:drawing"]) return node["w:drawing"][0];
496
+ if (node["w:pict"]) return node["w:pict"][0];
497
+
498
+ for (const value of Object.values(node)) {
499
+ if (Array.isArray(value)) {
500
+ for (const v of value) {
501
+ const found = findFirstDrawing(v);
502
+ if (found) return found;
503
+ }
504
+ } else {
505
+ const found = findFirstDrawing(value);
506
+ if (found) return found;
507
+ }
508
+ }
509
+
510
+ return null;
511
+ }
512
+
513
+ const drawing = findFirstDrawing(run);
514
+
515
+ if (drawing) {
516
+ const img = decodeDrawing(drawing, ctx);
517
+ if (img) return img;
518
+ }
519
+
520
+ return decodeRun(run, ctx);
521
+ }
522
+ function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
523
+ try {
524
+ const inline = drawing?.["wp:inline"]?.[0] ?? drawing?.inline?.[0];
525
+ const anchor = drawing?.["wp:anchor"]?.[0] ?? drawing?.anchor?.[0];
526
+ const container = inline ?? anchor;
527
+ if (!container) return null;
528
+
529
+ // Get dimensions
530
+ const extent =
531
+ container?.["wp:extent"]?.[0]?._attr ??
532
+ container?.extent?.[0]?._attr ??
533
+ {};
534
+ const cx = Number(extent?.cx ?? 0);
535
+ const cy = Number(extent?.cy ?? 0);
536
+ const wPt = Metric.emuToPt(cx);
537
+ const hPt = Metric.emuToPt(cy);
538
+
539
+ // Get alt text
540
+ const docPr =
541
+ container?.["wp:docPr"]?.[0]?._attr ?? container?.docPr?.[0]?._attr ?? {};
542
+ const alt = docPr?.descr ?? docPr?.name ?? "";
543
+
544
+ // Navigate to blip
545
+ const graphic = container?.["a:graphic"]?.[0] ?? container?.graphic?.[0];
546
+ const graphicData =
547
+ graphic?.["a:graphicData"]?.[0] ?? graphic?.graphicData?.[0];
548
+ const pic = graphicData?.["pic:pic"]?.[0] ?? graphicData?.pic?.[0];
549
+ const blipFill = pic?.["pic:blipFill"]?.[0] ?? pic?.blipFill?.[0];
550
+ const blip =
551
+ blipFill?.["a:blip"]?.[0]?._attr ?? blipFill?.blip?.[0]?._attr ?? {};
552
+ const rId = blip?.["r:embed"] ?? blip?.embed;
553
+
554
+ if (!rId) return null;
555
+
556
+ const target = ctx.relsMap.get(rId);
557
+ if (!target) return null;
558
+
559
+ const filePath = resolveDocxPath("word", target);
560
+ const fileData = ctx.files.get(filePath);
561
+ if (!fileData) {
562
+ console.warn(
563
+ `[DocxDecoder] image not found in ZIP: "${filePath}" (rId=${rId}, target=${target})`,
564
+ );
565
+ return null;
566
+ }
567
+
568
+ const ext = target.split(".").pop()?.toLowerCase() ?? "png";
569
+ const mimeMap: Record<string, ImgNode["mime"]> = {
570
+ png: "image/png",
571
+ jpg: "image/jpeg",
572
+ jpeg: "image/jpeg",
573
+ gif: "image/gif",
574
+ bmp: "image/bmp",
575
+ };
576
+ const mime = mimeMap[ext] ?? "image/png";
577
+ console.log(
578
+ `[DocxDecoder] image loaded: ${filePath} (${mime}, ${fileData.length} bytes)`,
579
+ );
580
+
581
+ // ── layout 추출 ──────────────────────────────────────────
582
+ const layout: ImgLayout = inline
583
+ ? { wrap: 'inline' }
584
+ : extractAnchorLayout(anchor);
585
+
586
+ return buildImg(TextKit.base64Encode(fileData), mime, wPt, hPt, alt || undefined, layout);
587
+ } catch {
588
+ return null;
589
+ }
590
+ }
591
+
592
+ function decodeRun(run: any, ctx: DecCtx): SpanNode {
593
+ const rPr = run?.["w:rPr"]?.[0] ?? run?.rPr?.[0] ?? {};
594
+
595
+ const szAttr = rPr?.["w:sz"]?.[0]?._attr ?? rPr?.sz?.[0]?._attr ?? {};
596
+ const szVal = szAttr?.["w:val"] ?? szAttr?.val;
597
+
598
+ const colorAttr =
599
+ rPr?.["w:color"]?.[0]?._attr ?? rPr?.color?.[0]?._attr ?? {};
600
+ const colorVal = colorAttr?.["w:val"] ?? colorAttr?.val;
601
+
602
+ const fontAttr =
603
+ rPr?.["w:rFonts"]?.[0]?._attr ?? rPr?.rFonts?.[0]?._attr ?? {};
604
+ const fontName =
605
+ fontAttr?.["w:ascii"] ??
606
+ fontAttr?.ascii ??
607
+ fontAttr?.["w:hAnsi"] ??
608
+ fontAttr?.hAnsi ??
609
+ fontAttr?.["w:eastAsia"] ??
610
+ fontAttr?.eastAsia;
611
+
612
+ const underVal =
613
+ rPr?.["w:u"]?.[0]?._attr?.["w:val"] ?? rPr?.["w:u"]?.[0]?._attr?.val;
614
+
615
+ // Background/highlight
616
+ const shdAttr = rPr?.["w:shd"]?.[0]?._attr ?? rPr?.shd?.[0]?._attr ?? {};
617
+ const bgVal = safeHex(shdAttr?.["w:fill"] ?? shdAttr?.fill);
618
+
619
+ // Superscript/subscript
620
+ const vertAlignVal =
621
+ rPr?.["w:vertAlign"]?.[0]?._attr?.["w:val"] ??
622
+ rPr?.["w:vertAlign"]?.[0]?._attr?.val;
623
+
624
+ // Check bold/italic/strike — val="0" means explicitly OFF
625
+ const bNode = rPr?.["w:b"]?.[0] ?? rPr?.b?.[0];
626
+ const isBold =
627
+ bNode != null &&
628
+ (bNode?._attr?.["w:val"] ?? bNode?._attr?.val ?? "1") !== "0";
629
+ const iNode = rPr?.["w:i"]?.[0] ?? rPr?.i?.[0];
630
+ const isItalic =
631
+ iNode != null &&
632
+ (iNode?._attr?.["w:val"] ?? iNode?._attr?.val ?? "1") !== "0";
633
+ const sNode = rPr?.["w:strike"]?.[0] ?? rPr?.strike?.[0];
634
+ const isStrike =
635
+ sNode != null &&
636
+ (sNode?._attr?.["w:val"] ?? sNode?._attr?.val ?? "1") !== "0";
637
+
638
+ const props: TextProps = {
639
+ b: isBold || undefined,
640
+ i: isItalic || undefined,
641
+ u: underVal && underVal !== "none" ? true : undefined,
642
+ s: isStrike || undefined,
643
+ sup: vertAlignVal === "superscript" || undefined,
644
+ sub: vertAlignVal === "subscript" || undefined,
645
+ pt: szVal ? Metric.halfPtToPt(Number(szVal)) : undefined,
646
+ color: safeHex(colorVal),
647
+ font: fontName ? safeFont(fontName) : undefined,
648
+ bg: bgVal,
649
+ };
650
+
651
+ // Check for field codes (PAGE number)
652
+ const fldChar = run?.["w:fldChar"]?.[0]?._attr ?? run?.fldChar?.[0]?._attr;
653
+ const instrText = run?.["w:instrText"]?.[0];
654
+
655
+ // Page break: <w:br w:type="page"/>
656
+ const brNodes = toArr(run?.["w:br"] ?? run?.br ?? []);
657
+ for (const br of brNodes) {
658
+ const brType = br?._attr?.["w:type"] ?? br?._attr?.type;
659
+ if (brType === "page") {
660
+ return { tag: "span", props, kids: [buildPb()] };
661
+ }
662
+ }
663
+
664
+ const textNodes = toArr(run?.["w:t"] ?? run?.t);
665
+ const content = textNodes
666
+ .map((t: any) => (typeof t === "string" ? t : (t?._ ?? t?._text ?? "")))
667
+ .join("");
668
+
669
+ // Handle page number field in instrText
670
+ if (instrText) {
671
+ const instrStr =
672
+ typeof instrText === "string" ? instrText : (instrText?._text ?? "");
673
+ if (instrStr.trim().toUpperCase() === "PAGE") {
674
+ const pageNum: PageNumNode = { tag: "pagenum", format: "decimal" };
675
+ return { tag: "span", props, kids: [pageNum] };
676
+ }
677
+ }
678
+
679
+ return buildSpan(content, props);
680
+ }
681
+
682
+ function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
683
+ // Parse tblPr for table styles
684
+ const tblPr = tbl?.["w:tblPr"]?.[0] ?? tbl?.tblPr?.[0] ?? {};
685
+ const tblLookAttr =
686
+ tblPr?.["w:tblLook"]?.[0]?._attr ?? tblPr?.tblLook?.[0]?._attr ?? {};
687
+
688
+ const look: TableLook = {
689
+ firstRow: tblLookAttr?.["w:firstRow"] === "1" || undefined,
690
+ lastRow: tblLookAttr?.["w:lastRow"] === "1" || undefined,
691
+ firstCol:
692
+ tblLookAttr?.["w:firstColumn"] === "1" ||
693
+ tblLookAttr?.["w:firstCol"] === "1" ||
694
+ undefined,
695
+ lastCol:
696
+ tblLookAttr?.["w:lastColumn"] === "1" ||
697
+ tblLookAttr?.["w:lastCol"] === "1" ||
698
+ undefined,
699
+ bandedRows: tblLookAttr?.["w:noHBand"] === "0" || undefined,
700
+ bandedCols: tblLookAttr?.["w:noVBand"] === "0" || undefined,
701
+ };
702
+
703
+ // Parse table borders for defaultStroke
704
+ const tblBorders = tblPr?.["w:tblBorders"]?.[0] ?? tblPr?.tblBorders?.[0];
705
+ let defaultStroke = undefined;
706
+ if (tblBorders) {
707
+ const top =
708
+ tblBorders?.["w:top"]?.[0]?._attr ?? tblBorders?.top?.[0]?._attr;
709
+ if (top) {
710
+ defaultStroke = safeStrokeDocx(
711
+ top?.["w:val"] ?? top?.val,
712
+ Number(top?.["w:sz"] ?? top?.sz ?? 4),
713
+ top?.["w:color"] ?? top?.color,
714
+ );
715
+ }
716
+ }
717
+
718
+ const gridProps: GridProps = { look, defaultStroke };
719
+
720
+ // Read column widths from w:tblGrid
721
+ const tblGrid = tbl?.["w:tblGrid"]?.[0] ?? tbl?.tblGrid?.[0];
722
+ if (tblGrid) {
723
+ const gridCols = toArr(tblGrid?.["w:gridCol"] ?? tblGrid?.gridCol ?? []);
724
+ const colWidthsPt = gridCols
725
+ .map((gc: any) =>
726
+ Metric.dxaToPt(Number(gc?._attr?.["w:w"] ?? gc?._attr?.w ?? 0)),
727
+ )
728
+ .filter((w: number) => w > 0);
729
+ if (colWidthsPt.length > 0) gridProps.colWidths = colWidthsPt;
730
+ }
731
+
732
+ const rowArr = toArr(tbl?.["w:tr"] ?? tbl?.tr);
733
+
734
+ // ── Pass 1: parse raw cells with vMerge info ──
735
+ interface RawCell {
736
+ cell: any;
737
+ gridSpan: number;
738
+ vMergeRestart: boolean;
739
+ vMergeContinue: boolean;
740
+ }
741
+ const rawGrid: RawCell[][] = rowArr.map((row: any) => {
742
+ const cellArr = toArr(row?.["w:tc"] ?? row?.tc);
743
+ return cellArr.map((cell: any): RawCell => {
744
+ const tcPr = cell?.["w:tcPr"]?.[0] ?? {};
745
+ const gridSpan = Number(tcPr?.["w:gridSpan"]?.[0]?._attr?.["w:val"] ?? 1);
746
+ const vMergeNode = tcPr?.["w:vMerge"]?.[0];
747
+ const vMergeVal = vMergeNode?._attr?.["w:val"] ?? vMergeNode?._attr?.val;
748
+ const vMergeRestart = vMergeVal === "restart";
749
+ // vMerge present but val is not "restart" → continuation cell
750
+ const vMergeContinue = vMergeNode != null && !vMergeRestart;
751
+ return { cell, gridSpan, vMergeRestart, vMergeContinue };
752
+ });
753
+ });
754
+
755
+ // ── Pass 2: compute rowSpan for restart cells ──
756
+ // rsMap[ri][ci] = computed rowSpan (only set for restart cells)
757
+ const rsMap: Map<string, number> = new Map();
758
+ for (let ri = 0; ri < rawGrid.length; ri++) {
759
+ let gridCol = 0;
760
+ for (let ci = 0; ci < rawGrid[ri].length; ci++) {
761
+ const rc = rawGrid[ri][ci];
762
+ if (rc.vMergeRestart) {
763
+ let span = 1;
764
+ for (let nr = ri + 1; nr < rawGrid.length; nr++) {
765
+ // Find the cell at the same grid column in the next row
766
+ let col = 0;
767
+ let found = false;
768
+ for (const nc of rawGrid[nr]) {
769
+ if (col === gridCol && nc.vMergeContinue) {
770
+ span++;
771
+ found = true;
772
+ break;
773
+ }
774
+ col += nc.gridSpan;
775
+ }
776
+ if (!found) break;
777
+ }
778
+ rsMap.set(`${ri},${ci}`, span);
779
+ }
780
+ gridCol += rc.gridSpan;
781
+ }
782
+ }
783
+
784
+ // ── Pass 3: build CellNodes, skip continuation cells ──
785
+ const rowNodes = rawGrid.map((rawRow, ri) => {
786
+ // Check for header row
787
+ const row = rowArr[ri];
788
+ const trPr = row?.["w:trPr"]?.[0] ?? row?.trPr?.[0] ?? {};
789
+ const isHeaderRow =
790
+ trPr?.["w:tblHeader"]?.[0] != null || trPr?.tblHeader?.[0] != null;
791
+ if (ri === 0 && isHeaderRow) gridProps.headerRow = true;
792
+
793
+ const cellNodes: CellNode[] = [];
794
+ for (let ci = 0; ci < rawRow.length; ci++) {
795
+ const rc = rawRow[ci];
796
+ // Skip continuation cells — they are part of a vertical merge
797
+ if (rc.vMergeContinue) continue;
798
+
799
+ const cell = rc.cell;
800
+ const tcPr = cell?.["w:tcPr"]?.[0] ?? {};
801
+
802
+ // Cell background
803
+ const bgAttr = tcPr?.["w:shd"]?.[0]?._attr ?? {};
804
+ const bg = safeHex(bgAttr?.["w:fill"] ?? bgAttr?.fill);
805
+
806
+ // Cell borders
807
+ const tcBorders = tcPr?.["w:tcBorders"]?.[0] ?? tcPr?.tcBorders?.[0];
808
+ const cp: CellProps = { bg, isHeader: isHeaderRow || undefined };
809
+
810
+ if (tcBorders) {
811
+ const dirs: Array<[string, "top" | "bot" | "left" | "right"]> = [
812
+ ["top", "top"],
813
+ ["bottom", "bot"],
814
+ ["left", "left"],
815
+ ["right", "right"],
816
+ ];
817
+ for (const [xmlTag, propKey] of dirs) {
818
+ const bdr =
819
+ tcBorders?.["w:" + xmlTag]?.[0]?._attr ??
820
+ tcBorders?.[xmlTag]?.[0]?._attr;
821
+ if (bdr) {
822
+ cp[propKey] = safeStrokeDocx(
823
+ bdr?.["w:val"] ?? bdr?.val,
824
+ Number(bdr?.["w:sz"] ?? bdr?.sz ?? 4),
825
+ bdr?.["w:color"] ?? bdr?.color,
826
+ );
827
+ }
828
+ }
829
+ }
830
+
831
+ // Vertical alignment
832
+ const vaAttr =
833
+ tcPr?.["w:vAlign"]?.[0]?._attr ?? tcPr?.vAlign?.[0]?._attr ?? {};
834
+ const vaVal = vaAttr?.["w:val"] ?? vaAttr?.val;
835
+ if (vaVal) {
836
+ const vaMap: Record<string, "top" | "mid" | "bot"> = {
837
+ top: "top",
838
+ center: "mid",
839
+ bottom: "bot",
840
+ };
841
+ cp.va = vaMap[vaVal];
842
+ }
843
+
844
+ const rs = rsMap.get(`${ri},${ci}`) ?? 1;
845
+
846
+ const paras = toArr(cell?.["w:p"] ?? cell?.p).map((p: any) =>
847
+ decodePara(p, ctx),
848
+ );
849
+ cellNodes.push(
850
+ buildCell(paras.length > 0 ? paras : [buildPara([buildSpan("")])], {
851
+ cs: rc.gridSpan,
852
+ rs,
853
+ props: cp,
854
+ }),
855
+ );
856
+ }
857
+ return buildRow(cellNodes);
858
+ });
859
+ return buildGrid(rowNodes, gridProps);
860
+ }
861
+
862
+ function decodeGridSimple(tbl: any): GridNode {
863
+ const rowArr = toArr(tbl?.["w:tr"] ?? tbl?.tr);
864
+ const rowNodes = rowArr.map((row: any) => {
865
+ const cellArr = toArr(row?.["w:tc"] ?? row?.tc);
866
+ return buildRow(
867
+ cellArr.map((c: any) => buildCell([buildPara([buildSpan(cellText(c))])])),
868
+ );
869
+ });
870
+ return buildGrid(rowNodes);
871
+ }
872
+
873
+ function decodeGridFlat(tbl: any): GridNode {
874
+ return buildGrid([
875
+ buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])]),
876
+ ]);
877
+ }
878
+
879
+ function decodeGridText(tbl: any): ParaNode {
880
+ return buildPara([buildSpan(tableText(tbl))]);
881
+ }
882
+
883
+ function cellText(cell: any): string {
884
+ return toArr(cell?.["w:p"] ?? cell?.p)
885
+ .map((p: any) =>
886
+ toArr(p?.["w:r"] ?? p?.r)
887
+ .map((r: any) =>
888
+ toArr(r?.["w:t"] ?? r?.t)
889
+ .map((t: any) => (typeof t === "string" ? t : (t?._ ?? "")))
890
+ .join(""),
891
+ )
892
+ .join(""),
893
+ )
894
+ .join(" ");
895
+ }
896
+
897
+ function tableText(tbl: any): string {
898
+ return toArr(tbl?.["w:tr"] ?? tbl?.tr)
899
+ .map((row: any) =>
900
+ toArr(row?.["w:tc"] ?? row?.tc)
901
+ .map((c: any) => cellText(c))
902
+ .join("\t"),
903
+ )
904
+ .join("\n");
905
+ }
906
+
907
+ function parseHeading(style?: string): 1 | 2 | 3 | 4 | 5 | 6 | undefined {
908
+ if (!style) return undefined;
909
+ const m = style.match(/[Hh]eading(\d)/);
910
+ if (m) {
911
+ const n = Number(m[1]);
912
+ if (n >= 1 && n <= 6) return n as any;
913
+ }
914
+ return undefined;
915
+ }
916
+
917
+ registry.registerDecoder(new DocxDecoder());
918
+
919
+ // ─── Anchor layout 추출 ─────────────────────────────────────
920
+
921
+ function extractAnchorLayout(anchor: any): ImgLayout {
922
+ const attr = anchor?._attr ?? {};
923
+ const behindDoc = attr.behindDoc === '1';
924
+
925
+ // 텍스트 감싸기 타입
926
+ let wrap: ImgWrap = 'square';
927
+ if (anchor?.['wp:wrapNone']?.[0] != null) wrap = behindDoc ? 'behind' : 'none';
928
+ else if (anchor?.['wp:wrapTight']?.[0] != null) wrap = 'tight';
929
+ else if (anchor?.['wp:wrapThrough']?.[0] != null) wrap = 'through';
930
+ else if (anchor?.['wp:wrapSquare']?.[0] != null) wrap = 'square';
931
+ else if (anchor?.['wp:wrapTopAndBottom']?.[0] != null) wrap = 'square';
932
+ else if (anchor?.['wp:wrapBehind']?.[0] != null || behindDoc) wrap = 'behind';
933
+
934
+ // 가로 위치
935
+ const posH = anchor?.['wp:positionH']?.[0];
936
+ const horzRelTo = parseHorzRelTo(posH?._attr?.relativeFrom);
937
+ const horzAlignTxt = posH?.['wp:align']?.[0]?._text;
938
+ const horzOffsetTxt = posH?.['wp:posOffset']?.[0]?._text;
939
+ const horzAlign = horzAlignTxt ? parseHorzAlign(horzAlignTxt) : undefined;
940
+ const xPt = horzOffsetTxt && !horzAlignTxt
941
+ ? Metric.emuToPt(Number(horzOffsetTxt))
942
+ : undefined;
943
+
944
+ // 세로 위치
945
+ const posV = anchor?.['wp:positionV']?.[0];
946
+ const vertRelTo = parseVertRelTo(posV?._attr?.relativeFrom);
947
+ const vertAlignTxt = posV?.['wp:align']?.[0]?._text;
948
+ const vertOffsetTxt = posV?.['wp:posOffset']?.[0]?._text;
949
+ const vertAlign = vertAlignTxt ? parseVertAlign(vertAlignTxt) : undefined;
950
+ const yPt = vertOffsetTxt && !vertAlignTxt
951
+ ? Metric.emuToPt(Number(vertOffsetTxt))
952
+ : undefined;
953
+
954
+ // 텍스트와의 거리
955
+ const distT = attr.distT ? Metric.emuToPt(Number(attr.distT)) : undefined;
956
+ const distB = attr.distB ? Metric.emuToPt(Number(attr.distB)) : undefined;
957
+ const distL = attr.distL ? Metric.emuToPt(Number(attr.distL)) : undefined;
958
+ const distR = attr.distR ? Metric.emuToPt(Number(attr.distR)) : undefined;
959
+ const zOrder = attr.relativeHeight ? Number(attr.relativeHeight) : undefined;
960
+
961
+ return { wrap, horzAlign, vertAlign, horzRelTo, vertRelTo, xPt, yPt, distT, distB, distL, distR, behindDoc, zOrder };
962
+ }
963
+
964
+ const HORZ_RELTO_MAP: Record<string, ImgHorzRelTo> = {
965
+ margin: 'margin', leftMargin: 'margin', rightMargin: 'margin',
966
+ insideMargin: 'margin', outsideMargin: 'margin',
967
+ column: 'column', page: 'page', character: 'para', paragraph: 'para',
968
+ };
969
+ const VERT_RELTO_MAP: Record<string, ImgVertRelTo> = {
970
+ margin: 'margin', topMargin: 'margin', bottomMargin: 'margin',
971
+ insideMargin: 'margin', outsideMargin: 'margin',
972
+ line: 'line', page: 'page', paragraph: 'para',
973
+ };
974
+ const HORZ_ALIGN_MAP: Record<string, ImgHorzAlign> = {
975
+ left: 'left', center: 'center', right: 'right',
976
+ inside: 'left', outside: 'right',
977
+ };
978
+ const VERT_ALIGN_MAP: Record<string, ImgVertAlign> = {
979
+ top: 'top', center: 'center', bottom: 'bottom',
980
+ inside: 'top', outside: 'bottom',
981
+ };
982
+
983
+ function parseHorzRelTo(v?: string): ImgHorzRelTo { return HORZ_RELTO_MAP[v ?? ''] ?? 'column'; }
984
+ function parseVertRelTo(v?: string): ImgVertRelTo { return VERT_RELTO_MAP[v ?? ''] ?? 'para'; }
985
+ function parseHorzAlign(v?: string): ImgHorzAlign | undefined { return HORZ_ALIGN_MAP[v ?? '']; }
986
+ function parseVertAlign(v?: string): ImgVertAlign | undefined { return VERT_ALIGN_MAP[v ?? '']; }