hwpkit-dev 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +11 -0
- package/README.md +223 -0
- package/dist/index.d.mts +313 -0
- package/dist/index.d.ts +317 -0
- package/dist/index.js +3546 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3479 -0
- package/dist/index.mjs.map +1 -0
- package/license.md +136 -0
- package/package.json +45 -0
- package/src/contract/decoder.ts +7 -0
- package/src/contract/encoder.ts +7 -0
- package/src/contract/result.ts +21 -0
- package/src/decoders/docx/DocxDecoder.ts +986 -0
- package/src/decoders/hwp/HwpScanner.ts +809 -0
- package/src/decoders/hwpx/HwpxDecoder.ts +759 -0
- package/src/decoders/md/MdDecoder.ts +180 -0
- package/src/encoders/docx/DocxEncoder.ts +710 -0
- package/src/encoders/hwp/HwpEncoder.ts +711 -0
- package/src/encoders/hwpx/HwpxEncoder.ts +770 -0
- package/src/encoders/md/MdEncoder.ts +108 -0
- package/src/index.ts +47 -0
- package/src/model/builders.ts +66 -0
- package/src/model/doc-props.ts +138 -0
- package/src/model/doc-tree.ts +90 -0
- package/src/pipeline/Pipeline.ts +71 -0
- package/src/pipeline/registry.ts +18 -0
- package/src/safety/ShieldedParser.ts +91 -0
- package/src/safety/StyleBridge.ts +106 -0
- package/src/toolkit/ArchiveKit.ts +150 -0
- package/src/toolkit/BinaryKit.ts +187 -0
- package/src/toolkit/TextKit.ts +57 -0
- package/src/toolkit/XmlKit.ts +91 -0
- package/src/walk/TreeWalker.ts +42 -0
- package/src/walk/tree-ops.ts +26 -0
- package/tsconfig.json +23 -0
- package/tsup.config.ts +12 -0
|
@@ -0,0 +1,986 @@
|
|
|
1
|
+
import type { Decoder } from "../../contract/decoder";
|
|
2
|
+
import type {
|
|
3
|
+
DocRoot,
|
|
4
|
+
ContentNode,
|
|
5
|
+
ParaNode,
|
|
6
|
+
SpanNode,
|
|
7
|
+
GridNode,
|
|
8
|
+
ImgNode,
|
|
9
|
+
PageNumNode,
|
|
10
|
+
CellNode,
|
|
11
|
+
} from "../../model/doc-tree";
|
|
12
|
+
import type { Outcome } from "../../contract/result";
|
|
13
|
+
import type {
|
|
14
|
+
DocMeta,
|
|
15
|
+
PageDims,
|
|
16
|
+
TextProps,
|
|
17
|
+
ParaProps,
|
|
18
|
+
CellProps,
|
|
19
|
+
GridProps,
|
|
20
|
+
TableLook,
|
|
21
|
+
ImgLayout,
|
|
22
|
+
ImgHorzAlign,
|
|
23
|
+
ImgVertAlign,
|
|
24
|
+
ImgHorzRelTo,
|
|
25
|
+
ImgVertRelTo,
|
|
26
|
+
ImgWrap,
|
|
27
|
+
} from "../../model/doc-props";
|
|
28
|
+
import { A4 } from "../../model/doc-props";
|
|
29
|
+
import { succeed, fail } from "../../contract/result";
|
|
30
|
+
import {
|
|
31
|
+
buildRoot,
|
|
32
|
+
buildSheet,
|
|
33
|
+
buildPara,
|
|
34
|
+
buildSpan,
|
|
35
|
+
buildImg,
|
|
36
|
+
buildGrid,
|
|
37
|
+
buildRow,
|
|
38
|
+
buildCell,
|
|
39
|
+
buildPb,
|
|
40
|
+
} from "../../model/builders";
|
|
41
|
+
import { ShieldedParser } from "../../safety/ShieldedParser";
|
|
42
|
+
import {
|
|
43
|
+
Metric,
|
|
44
|
+
safeAlign,
|
|
45
|
+
safeFont,
|
|
46
|
+
safeHex,
|
|
47
|
+
safeStrokeDocx,
|
|
48
|
+
} from "../../safety/StyleBridge";
|
|
49
|
+
import { ArchiveKit } from "../../toolkit/ArchiveKit";
|
|
50
|
+
import { XmlKit } from "../../toolkit/XmlKit";
|
|
51
|
+
import { TextKit } from "../../toolkit/TextKit";
|
|
52
|
+
import { registry } from "../../pipeline/registry";
|
|
53
|
+
|
|
54
|
+
export class DocxDecoder implements Decoder {
|
|
55
|
+
readonly format = "docx";
|
|
56
|
+
|
|
57
|
+
async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
|
|
58
|
+
const shield = new ShieldedParser();
|
|
59
|
+
const warns: string[] = [];
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
const files = await ArchiveKit.unzip(data);
|
|
63
|
+
|
|
64
|
+
const docXml = files.get("word/document.xml");
|
|
65
|
+
if (!docXml) return fail("DOCX: word/document.xml not found");
|
|
66
|
+
|
|
67
|
+
const relsXml = files.get("word/_rels/document.xml.rels");
|
|
68
|
+
const relsMap = relsXml
|
|
69
|
+
? await parseRels(TextKit.decode(relsXml))
|
|
70
|
+
: new Map<string, string>();
|
|
71
|
+
|
|
72
|
+
const coreXml = files.get("docProps/core.xml");
|
|
73
|
+
let meta: DocMeta = {};
|
|
74
|
+
if (coreXml) {
|
|
75
|
+
try {
|
|
76
|
+
meta = await parseCoreProps(TextKit.decode(coreXml));
|
|
77
|
+
} catch {
|
|
78
|
+
// ignore — meta is optional
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Parse numbering.xml for list support
|
|
83
|
+
const numXml = files.get("word/numbering.xml");
|
|
84
|
+
let numMap: NumMap = new Map();
|
|
85
|
+
if (numXml) {
|
|
86
|
+
try {
|
|
87
|
+
numMap = await parseNumbering(TextKit.decode(numXml));
|
|
88
|
+
} catch {
|
|
89
|
+
/* non-fatal */
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const docStr = TextKit.decode(docXml);
|
|
94
|
+
const docObj: any = await XmlKit.parseStrict(docStr);
|
|
95
|
+
|
|
96
|
+
const body = getBody(docObj);
|
|
97
|
+
const dims = extractDims(body) ?? { ...A4 };
|
|
98
|
+
const elements = getBodyElements(body);
|
|
99
|
+
|
|
100
|
+
const decCtx: DecCtx = { relsMap, files, shield, numMap, warns };
|
|
101
|
+
|
|
102
|
+
const kids: ContentNode[] = [];
|
|
103
|
+
for (const el of elements) {
|
|
104
|
+
const node = shield.guard(
|
|
105
|
+
() => decodeElement(el, decCtx),
|
|
106
|
+
buildPara([buildSpan("[요소 파싱 실패]")]),
|
|
107
|
+
"docx:bodyElement",
|
|
108
|
+
);
|
|
109
|
+
kids.push(node);
|
|
110
|
+
|
|
111
|
+
// Inline sectPr in pPr = section break → insert page-break paragraph after
|
|
112
|
+
if (el.type === 'para') {
|
|
113
|
+
const pPr = el.node?.["w:pPr"]?.[0] ?? el.node?.pPr?.[0] ?? {};
|
|
114
|
+
const inlineSectPr = pPr?.["w:sectPr"]?.[0] ?? pPr?.sectPr?.[0];
|
|
115
|
+
if (inlineSectPr) {
|
|
116
|
+
const typeAttr = inlineSectPr?.["w:type"]?.[0]?._attr;
|
|
117
|
+
const sectType = typeAttr?.["w:val"] ?? typeAttr?.val ?? 'nextPage';
|
|
118
|
+
if (sectType !== 'continuous') {
|
|
119
|
+
kids.push(buildPara([{ tag: 'span', props: {}, kids: [buildPb()] }]));
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Decode header/footer
|
|
126
|
+
const headerParas = await decodeHeaderFooter(
|
|
127
|
+
"header",
|
|
128
|
+
body,
|
|
129
|
+
relsMap,
|
|
130
|
+
files,
|
|
131
|
+
decCtx,
|
|
132
|
+
);
|
|
133
|
+
const footerParas = await decodeHeaderFooter(
|
|
134
|
+
"footer",
|
|
135
|
+
body,
|
|
136
|
+
relsMap,
|
|
137
|
+
files,
|
|
138
|
+
decCtx,
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
warns.push(...shield.flush());
|
|
142
|
+
const sheet = buildSheet(kids.filter(Boolean) as ContentNode[], dims, {
|
|
143
|
+
header: headerParas,
|
|
144
|
+
footer: footerParas,
|
|
145
|
+
});
|
|
146
|
+
return succeed(buildRoot(meta, [sheet]), warns);
|
|
147
|
+
} catch (e: any) {
|
|
148
|
+
warns.push(...shield.flush());
|
|
149
|
+
return fail(`DOCX decode error: ${e?.message ?? String(e)}`, warns);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ─── types ─────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
interface DecCtx {
|
|
157
|
+
relsMap: Map<string, string>;
|
|
158
|
+
files: Map<string, Uint8Array>;
|
|
159
|
+
shield: ShieldedParser;
|
|
160
|
+
numMap: NumMap;
|
|
161
|
+
warns: string[];
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// numId → { abstractNumId, levels: Map<ilvl, { fmt, isOrdered }> }
|
|
165
|
+
type NumMap = Map<
|
|
166
|
+
number,
|
|
167
|
+
{ levels: Map<number, { fmt: string; isOrdered: boolean }> }
|
|
168
|
+
>;
|
|
169
|
+
|
|
170
|
+
// ─── helpers ────────────────────────────────────────────────
|
|
171
|
+
|
|
172
|
+
function toArr(v: any): any[] {
|
|
173
|
+
return v == null ? [] : Array.isArray(v) ? v : [v];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/** Resolve DOCX relative paths. e.g. ("word", "../media/image1.png") → "word/media/image1.png" */
|
|
177
|
+
function resolveDocxPath(baseDir: string, target: string): string {
|
|
178
|
+
if (target.startsWith("/")) return target.slice(1);
|
|
179
|
+
const parts = (baseDir + "/" + target).split("/");
|
|
180
|
+
const stack: string[] = [];
|
|
181
|
+
for (const p of parts) {
|
|
182
|
+
if (p === "..") {
|
|
183
|
+
stack.pop();
|
|
184
|
+
} else if (p !== ".") {
|
|
185
|
+
stack.push(p);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return stack.join("/");
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async function parseRels(xml: string): Promise<Map<string, string>> {
|
|
192
|
+
const map = new Map<string, string>();
|
|
193
|
+
try {
|
|
194
|
+
const obj: any = await XmlKit.parseStrict(xml);
|
|
195
|
+
for (const rel of toArr(obj?.Relationships?.[0]?.Relationship)) {
|
|
196
|
+
const a = rel?._attr ?? {};
|
|
197
|
+
if (a.Id && a.Target) map.set(a.Id, a.Target);
|
|
198
|
+
}
|
|
199
|
+
} catch {
|
|
200
|
+
/* ignore */
|
|
201
|
+
}
|
|
202
|
+
return map;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
async function parseCoreProps(xml: string): Promise<DocMeta> {
|
|
206
|
+
try {
|
|
207
|
+
const obj: any = await XmlKit.parseStrict(xml);
|
|
208
|
+
const c = obj?.["cp:coreProperties"]?.[0] ?? obj?.coreProperties?.[0] ?? {};
|
|
209
|
+
return {
|
|
210
|
+
title: c?.["dc:title"]?.[0]?._text ?? undefined,
|
|
211
|
+
author: c?.["dc:creator"]?.[0]?._text ?? undefined,
|
|
212
|
+
subject: c?.["dc:subject"]?.[0]?._text ?? undefined,
|
|
213
|
+
created: c?.["dcterms:created"]?.[0]?._text ?? undefined,
|
|
214
|
+
modified: c?.["dcterms:modified"]?.[0]?._text ?? undefined,
|
|
215
|
+
};
|
|
216
|
+
} catch {
|
|
217
|
+
return {};
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
async function parseNumbering(xml: string): Promise<NumMap> {
|
|
222
|
+
const map: NumMap = new Map();
|
|
223
|
+
try {
|
|
224
|
+
const obj: any = await XmlKit.parseStrict(xml);
|
|
225
|
+
const root = obj?.["w:numbering"]?.[0] ?? obj?.numbering?.[0] ?? obj;
|
|
226
|
+
|
|
227
|
+
// Parse abstractNums
|
|
228
|
+
const absMap = new Map<
|
|
229
|
+
number,
|
|
230
|
+
Map<number, { fmt: string; isOrdered: boolean }>
|
|
231
|
+
>();
|
|
232
|
+
for (const abs of toArr(root?.["w:abstractNum"] ?? root?.abstractNum)) {
|
|
233
|
+
const absId = Number(
|
|
234
|
+
abs?._attr?.["w:abstractNumId"] ?? abs?._attr?.abstractNumId ?? 0,
|
|
235
|
+
);
|
|
236
|
+
const levels = new Map<number, { fmt: string; isOrdered: boolean }>();
|
|
237
|
+
for (const lvl of toArr(abs?.["w:lvl"] ?? abs?.lvl)) {
|
|
238
|
+
const ilvl = Number(lvl?._attr?.["w:ilvl"] ?? lvl?._attr?.ilvl ?? 0);
|
|
239
|
+
const fmtNode =
|
|
240
|
+
lvl?.["w:numFmt"]?.[0]?._attr ?? lvl?.numFmt?.[0]?._attr ?? {};
|
|
241
|
+
const fmt = fmtNode?.["w:val"] ?? fmtNode?.val ?? "decimal";
|
|
242
|
+
levels.set(ilvl, { fmt, isOrdered: fmt !== "bullet" });
|
|
243
|
+
}
|
|
244
|
+
absMap.set(absId, levels);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Parse nums
|
|
248
|
+
for (const num of toArr(root?.["w:num"] ?? root?.num)) {
|
|
249
|
+
const numId = Number(num?._attr?.["w:numId"] ?? num?._attr?.numId ?? 0);
|
|
250
|
+
const absRef =
|
|
251
|
+
num?.["w:abstractNumId"]?.[0]?._attr ??
|
|
252
|
+
num?.abstractNumId?.[0]?._attr ??
|
|
253
|
+
{};
|
|
254
|
+
const absId = Number(absRef?.["w:val"] ?? absRef?.val ?? 0);
|
|
255
|
+
const levels = absMap.get(absId) ?? new Map();
|
|
256
|
+
map.set(numId, { levels });
|
|
257
|
+
}
|
|
258
|
+
} catch {
|
|
259
|
+
/* non-fatal */
|
|
260
|
+
}
|
|
261
|
+
return map;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function getBody(obj: any): any {
|
|
265
|
+
return (
|
|
266
|
+
obj?.["w:document"]?.[0]?.["w:body"]?.[0] ??
|
|
267
|
+
obj?.document?.[0]?.body?.[0] ??
|
|
268
|
+
obj
|
|
269
|
+
);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function extractDims(body: any): PageDims | null {
|
|
273
|
+
try {
|
|
274
|
+
const sp = body?.["w:sectPr"]?.[0] ?? body?.sectPr?.[0];
|
|
275
|
+
if (!sp) return null;
|
|
276
|
+
const sz = sp?.["w:pgSz"]?.[0]?._attr ?? sp?.pgSz?.[0]?._attr;
|
|
277
|
+
const mar = sp?.["w:pgMar"]?.[0]?._attr ?? sp?.pgMar?.[0]?._attr;
|
|
278
|
+
if (!sz) return null;
|
|
279
|
+
return {
|
|
280
|
+
wPt: Metric.dxaToPt(Number(sz["w:w"] ?? sz.w ?? 11906)),
|
|
281
|
+
hPt: Metric.dxaToPt(Number(sz["w:h"] ?? sz.h ?? 16838)),
|
|
282
|
+
mt: Metric.dxaToPt(Number(mar?.["w:top"] ?? mar?.top ?? 1440)),
|
|
283
|
+
mb: Metric.dxaToPt(Number(mar?.["w:bottom"] ?? mar?.bottom ?? 1440)),
|
|
284
|
+
ml: Metric.dxaToPt(Number(mar?.["w:left"] ?? mar?.left ?? 1800)),
|
|
285
|
+
mr: Metric.dxaToPt(Number(mar?.["w:right"] ?? mar?.right ?? 1800)),
|
|
286
|
+
orient:
|
|
287
|
+
(sz["w:orient"] ?? sz.orient) === "landscape"
|
|
288
|
+
? "landscape"
|
|
289
|
+
: "portrait",
|
|
290
|
+
};
|
|
291
|
+
} catch {
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function getBodyElements(body: any): { type: string; node: any }[] {
|
|
297
|
+
const paras = toArr(body?.["w:p"] ?? body?.p);
|
|
298
|
+
const tables = toArr(body?.["w:tbl"] ?? body?.tbl);
|
|
299
|
+
|
|
300
|
+
if (tables.length === 0)
|
|
301
|
+
return paras.map((n: any) => ({ type: "para", node: n }));
|
|
302
|
+
if (paras.length === 0)
|
|
303
|
+
return tables.map((n: any) => ({ type: "table", node: n }));
|
|
304
|
+
|
|
305
|
+
// Use _childOrder from XmlKit to preserve document order
|
|
306
|
+
const childOrder = body?.["_childOrder"] as string[] | undefined;
|
|
307
|
+
if (Array.isArray(childOrder)) {
|
|
308
|
+
const items: { type: string; node: any }[] = [];
|
|
309
|
+
let pi = 0,
|
|
310
|
+
ti = 0;
|
|
311
|
+
for (const tag of childOrder) {
|
|
312
|
+
if ((tag === "w:p" || tag === "p") && pi < paras.length) {
|
|
313
|
+
items.push({ type: "para", node: paras[pi++] });
|
|
314
|
+
} else if ((tag === "w:tbl" || tag === "tbl") && ti < tables.length) {
|
|
315
|
+
items.push({ type: "table", node: tables[ti++] });
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
while (pi < paras.length) items.push({ type: "para", node: paras[pi++] });
|
|
319
|
+
while (ti < tables.length)
|
|
320
|
+
items.push({ type: "table", node: tables[ti++] });
|
|
321
|
+
return items;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// Fallback: paragraphs first, then tables
|
|
325
|
+
return [
|
|
326
|
+
...paras.map((n: any) => ({ type: "para", node: n })),
|
|
327
|
+
...tables.map((n: any) => ({ type: "table", node: n })),
|
|
328
|
+
];
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// ─── Header/Footer decoding ────────────────────────────────
|
|
332
|
+
|
|
333
|
+
async function decodeHeaderFooter(
|
|
334
|
+
kind: "header" | "footer",
|
|
335
|
+
body: any,
|
|
336
|
+
relsMap: Map<string, string>,
|
|
337
|
+
files: Map<string, Uint8Array>,
|
|
338
|
+
ctx: DecCtx,
|
|
339
|
+
): Promise<ParaNode[] | undefined> {
|
|
340
|
+
try {
|
|
341
|
+
const sp = body?.["w:sectPr"]?.[0] ?? body?.sectPr?.[0];
|
|
342
|
+
if (!sp) return undefined;
|
|
343
|
+
|
|
344
|
+
const refTag =
|
|
345
|
+
kind === "header" ? "w:headerReference" : "w:footerReference";
|
|
346
|
+
const refs = toArr(sp?.[refTag] ?? sp?.[refTag.replace("w:", "")]);
|
|
347
|
+
if (refs.length === 0) return undefined;
|
|
348
|
+
|
|
349
|
+
const rId =
|
|
350
|
+
refs[0]?._attr?.["r:id"] ??
|
|
351
|
+
refs[0]?._attr?.["r:Id"] ??
|
|
352
|
+
refs[0]?._attr?.id;
|
|
353
|
+
if (!rId) return undefined;
|
|
354
|
+
|
|
355
|
+
const target = relsMap.get(rId);
|
|
356
|
+
if (!target) return undefined;
|
|
357
|
+
|
|
358
|
+
const filePath = resolveDocxPath("word", target);
|
|
359
|
+
const fileData = files.get(filePath);
|
|
360
|
+
if (!fileData) return undefined;
|
|
361
|
+
|
|
362
|
+
const xmlStr = TextKit.decode(fileData);
|
|
363
|
+
const obj: any = await XmlKit.parseStrict(xmlStr);
|
|
364
|
+
|
|
365
|
+
const rootTag = kind === "header" ? "w:hdr" : "w:ftr";
|
|
366
|
+
const root =
|
|
367
|
+
obj?.[rootTag]?.[0] ?? obj?.[rootTag.replace("w:", "")]?.[0] ?? obj;
|
|
368
|
+
|
|
369
|
+
const paras = toArr(root?.["w:p"] ?? root?.p);
|
|
370
|
+
if (paras.length === 0) return undefined;
|
|
371
|
+
|
|
372
|
+
return paras.map((p: any) => decodePara(p, ctx));
|
|
373
|
+
} catch {
|
|
374
|
+
return undefined;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ─── Element decoding ──────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
//만약에 drawing 태그가 안에 있으면 true 반환
|
|
381
|
+
function hasDrawingDeep(node: any): boolean {
|
|
382
|
+
if (!node || typeof node !== "object") return false;
|
|
383
|
+
|
|
384
|
+
if (node["w:drawing"] || node["w:pict"]) return true;
|
|
385
|
+
|
|
386
|
+
return Object.values(node).some((v) => {
|
|
387
|
+
if (Array.isArray(v)) return v.some(hasDrawingDeep);
|
|
388
|
+
return hasDrawingDeep(v);
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function decodeElement(
|
|
393
|
+
el: { type: string; node: any },
|
|
394
|
+
ctx: DecCtx,
|
|
395
|
+
): ContentNode {
|
|
396
|
+
if (el.type === "table") {
|
|
397
|
+
const { value } = ctx.shield.guardGrid(
|
|
398
|
+
el.node,
|
|
399
|
+
(n) => decodeGrid(n as any, ctx),
|
|
400
|
+
(n) => decodeGridSimple(n as any),
|
|
401
|
+
(n) => decodeGridFlat(n as any),
|
|
402
|
+
(n) => decodeGridText(n as any) as unknown as GridNode,
|
|
403
|
+
"docx:table",
|
|
404
|
+
);
|
|
405
|
+
return value;
|
|
406
|
+
}
|
|
407
|
+
return decodePara(el.node, ctx);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function decodePara(p: any, ctx: DecCtx): ParaNode {
|
|
411
|
+
const pPr = p?.["w:pPr"]?.[0] ?? {};
|
|
412
|
+
const alignVal =
|
|
413
|
+
pPr?.["w:jc"]?.[0]?._attr?.["w:val"] ?? pPr?.["w:jc"]?.[0]?._attr?.val;
|
|
414
|
+
const headStyle =
|
|
415
|
+
pPr?.["w:pStyle"]?.[0]?._attr?.["w:val"] ??
|
|
416
|
+
pPr?.["w:pStyle"]?.[0]?._attr?.val ??
|
|
417
|
+
"";
|
|
418
|
+
|
|
419
|
+
const props: ParaProps = {
|
|
420
|
+
align: safeAlign(alignVal),
|
|
421
|
+
heading: parseHeading(headStyle),
|
|
422
|
+
};
|
|
423
|
+
|
|
424
|
+
// Spacing (before/after/line height)
|
|
425
|
+
const spacingAttr =
|
|
426
|
+
pPr?.["w:spacing"]?.[0]?._attr ?? pPr?.spacing?.[0]?._attr ?? {};
|
|
427
|
+
const beforeVal = Number(
|
|
428
|
+
spacingAttr?.["w:before"] ?? spacingAttr?.before ?? 0,
|
|
429
|
+
);
|
|
430
|
+
const afterVal = Number(spacingAttr?.["w:after"] ?? spacingAttr?.after ?? 0);
|
|
431
|
+
const lineVal = Number(spacingAttr?.["w:line"] ?? spacingAttr?.line ?? 0);
|
|
432
|
+
const lineRule =
|
|
433
|
+
spacingAttr?.["w:lineRule"] ?? spacingAttr?.lineRule ?? "auto";
|
|
434
|
+
if (beforeVal > 0) props.spaceBefore = Metric.dxaToPt(beforeVal);
|
|
435
|
+
if (afterVal > 0) props.spaceAfter = Metric.dxaToPt(afterVal);
|
|
436
|
+
if (lineVal > 0 && lineRule === "auto") props.lineHeight = lineVal / 240;
|
|
437
|
+
|
|
438
|
+
// Indentation
|
|
439
|
+
const indAttr = pPr?.["w:ind"]?.[0]?._attr ?? pPr?.ind?.[0]?._attr ?? {};
|
|
440
|
+
const leftVal = Number(indAttr?.["w:left"] ?? indAttr?.left ?? 0);
|
|
441
|
+
if (leftVal > 0) props.indentPt = Metric.dxaToPt(leftVal);
|
|
442
|
+
|
|
443
|
+
// List/numbering
|
|
444
|
+
const numPr = pPr?.["w:numPr"]?.[0] ?? pPr?.numPr?.[0];
|
|
445
|
+
if (numPr) {
|
|
446
|
+
const ilvlNode =
|
|
447
|
+
numPr?.["w:ilvl"]?.[0]?._attr ?? numPr?.ilvl?.[0]?._attr ?? {};
|
|
448
|
+
const numIdNode =
|
|
449
|
+
numPr?.["w:numId"]?.[0]?._attr ?? numPr?.numId?.[0]?._attr ?? {};
|
|
450
|
+
const ilvl = Number(ilvlNode?.["w:val"] ?? ilvlNode?.val ?? 0);
|
|
451
|
+
const numId = Number(numIdNode?.["w:val"] ?? numIdNode?.val ?? 0);
|
|
452
|
+
|
|
453
|
+
props.listLv = ilvl;
|
|
454
|
+
const numEntry = ctx.numMap.get(numId);
|
|
455
|
+
if (numEntry) {
|
|
456
|
+
const lvlInfo = numEntry.levels.get(ilvl) ?? numEntry.levels.get(0);
|
|
457
|
+
props.listOrd = lvlInfo?.isOrdered ?? false;
|
|
458
|
+
} else {
|
|
459
|
+
// Fallback: numId=1 is typically bullet, numId=2 is numbered
|
|
460
|
+
props.listOrd = numId >= 2;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
// pageBreakBefore: paragraph always starts on a new page
|
|
465
|
+
const pbBeforeNode = pPr?.["w:pageBreakBefore"]?.[0] ?? pPr?.pageBreakBefore?.[0];
|
|
466
|
+
const hasPageBreakBefore = pbBeforeNode != null &&
|
|
467
|
+
(pbBeforeNode?._attr?.["w:val"] ?? pbBeforeNode?._attr?.val ?? "1") !== "0";
|
|
468
|
+
|
|
469
|
+
const runs = toArr(p?.["w:r"] ?? p?.r);
|
|
470
|
+
|
|
471
|
+
// 3/28 이미지 태크를 찾을수 있기 때문에 별도 함수 구현
|
|
472
|
+
const kids: (SpanNode | ImgNode)[] = ctx.shield.guardAll(
|
|
473
|
+
runs,
|
|
474
|
+
(run: any) =>
|
|
475
|
+
hasDrawingDeep(run) ? decodeRunOrImage(run, ctx) : decodeRun(run, ctx),
|
|
476
|
+
() => buildSpan(""),
|
|
477
|
+
"docx:run",
|
|
478
|
+
);
|
|
479
|
+
|
|
480
|
+
const filteredKids = kids.filter(Boolean) as ParaNode["kids"];
|
|
481
|
+
|
|
482
|
+
// Prepend pb span when pageBreakBefore is set
|
|
483
|
+
if (hasPageBreakBefore) {
|
|
484
|
+
filteredKids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
return buildPara(filteredKids, props);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// 3/28 코드 수정
|
|
491
|
+
function decodeRunOrImage(run: any, ctx: DecCtx): SpanNode | ImgNode {
|
|
492
|
+
function findFirstDrawing(node: any): any | null {
|
|
493
|
+
if (!node || typeof node !== "object") return null;
|
|
494
|
+
|
|
495
|
+
if (node["w:drawing"]) return node["w:drawing"][0];
|
|
496
|
+
if (node["w:pict"]) return node["w:pict"][0];
|
|
497
|
+
|
|
498
|
+
for (const value of Object.values(node)) {
|
|
499
|
+
if (Array.isArray(value)) {
|
|
500
|
+
for (const v of value) {
|
|
501
|
+
const found = findFirstDrawing(v);
|
|
502
|
+
if (found) return found;
|
|
503
|
+
}
|
|
504
|
+
} else {
|
|
505
|
+
const found = findFirstDrawing(value);
|
|
506
|
+
if (found) return found;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
return null;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
const drawing = findFirstDrawing(run);
|
|
514
|
+
|
|
515
|
+
if (drawing) {
|
|
516
|
+
const img = decodeDrawing(drawing, ctx);
|
|
517
|
+
if (img) return img;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return decodeRun(run, ctx);
|
|
521
|
+
}
|
|
522
|
+
function decodeDrawing(drawing: any, ctx: DecCtx): ImgNode | null {
|
|
523
|
+
try {
|
|
524
|
+
const inline = drawing?.["wp:inline"]?.[0] ?? drawing?.inline?.[0];
|
|
525
|
+
const anchor = drawing?.["wp:anchor"]?.[0] ?? drawing?.anchor?.[0];
|
|
526
|
+
const container = inline ?? anchor;
|
|
527
|
+
if (!container) return null;
|
|
528
|
+
|
|
529
|
+
// Get dimensions
|
|
530
|
+
const extent =
|
|
531
|
+
container?.["wp:extent"]?.[0]?._attr ??
|
|
532
|
+
container?.extent?.[0]?._attr ??
|
|
533
|
+
{};
|
|
534
|
+
const cx = Number(extent?.cx ?? 0);
|
|
535
|
+
const cy = Number(extent?.cy ?? 0);
|
|
536
|
+
const wPt = Metric.emuToPt(cx);
|
|
537
|
+
const hPt = Metric.emuToPt(cy);
|
|
538
|
+
|
|
539
|
+
// Get alt text
|
|
540
|
+
const docPr =
|
|
541
|
+
container?.["wp:docPr"]?.[0]?._attr ?? container?.docPr?.[0]?._attr ?? {};
|
|
542
|
+
const alt = docPr?.descr ?? docPr?.name ?? "";
|
|
543
|
+
|
|
544
|
+
// Navigate to blip
|
|
545
|
+
const graphic = container?.["a:graphic"]?.[0] ?? container?.graphic?.[0];
|
|
546
|
+
const graphicData =
|
|
547
|
+
graphic?.["a:graphicData"]?.[0] ?? graphic?.graphicData?.[0];
|
|
548
|
+
const pic = graphicData?.["pic:pic"]?.[0] ?? graphicData?.pic?.[0];
|
|
549
|
+
const blipFill = pic?.["pic:blipFill"]?.[0] ?? pic?.blipFill?.[0];
|
|
550
|
+
const blip =
|
|
551
|
+
blipFill?.["a:blip"]?.[0]?._attr ?? blipFill?.blip?.[0]?._attr ?? {};
|
|
552
|
+
const rId = blip?.["r:embed"] ?? blip?.embed;
|
|
553
|
+
|
|
554
|
+
if (!rId) return null;
|
|
555
|
+
|
|
556
|
+
const target = ctx.relsMap.get(rId);
|
|
557
|
+
if (!target) return null;
|
|
558
|
+
|
|
559
|
+
const filePath = resolveDocxPath("word", target);
|
|
560
|
+
const fileData = ctx.files.get(filePath);
|
|
561
|
+
if (!fileData) {
|
|
562
|
+
console.warn(
|
|
563
|
+
`[DocxDecoder] image not found in ZIP: "${filePath}" (rId=${rId}, target=${target})`,
|
|
564
|
+
);
|
|
565
|
+
return null;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
const ext = target.split(".").pop()?.toLowerCase() ?? "png";
|
|
569
|
+
const mimeMap: Record<string, ImgNode["mime"]> = {
|
|
570
|
+
png: "image/png",
|
|
571
|
+
jpg: "image/jpeg",
|
|
572
|
+
jpeg: "image/jpeg",
|
|
573
|
+
gif: "image/gif",
|
|
574
|
+
bmp: "image/bmp",
|
|
575
|
+
};
|
|
576
|
+
const mime = mimeMap[ext] ?? "image/png";
|
|
577
|
+
console.log(
|
|
578
|
+
`[DocxDecoder] image loaded: ${filePath} (${mime}, ${fileData.length} bytes)`,
|
|
579
|
+
);
|
|
580
|
+
|
|
581
|
+
// ── layout 추출 ──────────────────────────────────────────
|
|
582
|
+
const layout: ImgLayout = inline
|
|
583
|
+
? { wrap: 'inline' }
|
|
584
|
+
: extractAnchorLayout(anchor);
|
|
585
|
+
|
|
586
|
+
return buildImg(TextKit.base64Encode(fileData), mime, wPt, hPt, alt || undefined, layout);
|
|
587
|
+
} catch {
|
|
588
|
+
return null;
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
function decodeRun(run: any, ctx: DecCtx): SpanNode {
|
|
593
|
+
const rPr = run?.["w:rPr"]?.[0] ?? run?.rPr?.[0] ?? {};
|
|
594
|
+
|
|
595
|
+
const szAttr = rPr?.["w:sz"]?.[0]?._attr ?? rPr?.sz?.[0]?._attr ?? {};
|
|
596
|
+
const szVal = szAttr?.["w:val"] ?? szAttr?.val;
|
|
597
|
+
|
|
598
|
+
const colorAttr =
|
|
599
|
+
rPr?.["w:color"]?.[0]?._attr ?? rPr?.color?.[0]?._attr ?? {};
|
|
600
|
+
const colorVal = colorAttr?.["w:val"] ?? colorAttr?.val;
|
|
601
|
+
|
|
602
|
+
const fontAttr =
|
|
603
|
+
rPr?.["w:rFonts"]?.[0]?._attr ?? rPr?.rFonts?.[0]?._attr ?? {};
|
|
604
|
+
const fontName =
|
|
605
|
+
fontAttr?.["w:ascii"] ??
|
|
606
|
+
fontAttr?.ascii ??
|
|
607
|
+
fontAttr?.["w:hAnsi"] ??
|
|
608
|
+
fontAttr?.hAnsi ??
|
|
609
|
+
fontAttr?.["w:eastAsia"] ??
|
|
610
|
+
fontAttr?.eastAsia;
|
|
611
|
+
|
|
612
|
+
const underVal =
|
|
613
|
+
rPr?.["w:u"]?.[0]?._attr?.["w:val"] ?? rPr?.["w:u"]?.[0]?._attr?.val;
|
|
614
|
+
|
|
615
|
+
// Background/highlight
|
|
616
|
+
const shdAttr = rPr?.["w:shd"]?.[0]?._attr ?? rPr?.shd?.[0]?._attr ?? {};
|
|
617
|
+
const bgVal = safeHex(shdAttr?.["w:fill"] ?? shdAttr?.fill);
|
|
618
|
+
|
|
619
|
+
// Superscript/subscript
|
|
620
|
+
const vertAlignVal =
|
|
621
|
+
rPr?.["w:vertAlign"]?.[0]?._attr?.["w:val"] ??
|
|
622
|
+
rPr?.["w:vertAlign"]?.[0]?._attr?.val;
|
|
623
|
+
|
|
624
|
+
// Check bold/italic/strike — val="0" means explicitly OFF
|
|
625
|
+
const bNode = rPr?.["w:b"]?.[0] ?? rPr?.b?.[0];
|
|
626
|
+
const isBold =
|
|
627
|
+
bNode != null &&
|
|
628
|
+
(bNode?._attr?.["w:val"] ?? bNode?._attr?.val ?? "1") !== "0";
|
|
629
|
+
const iNode = rPr?.["w:i"]?.[0] ?? rPr?.i?.[0];
|
|
630
|
+
const isItalic =
|
|
631
|
+
iNode != null &&
|
|
632
|
+
(iNode?._attr?.["w:val"] ?? iNode?._attr?.val ?? "1") !== "0";
|
|
633
|
+
const sNode = rPr?.["w:strike"]?.[0] ?? rPr?.strike?.[0];
|
|
634
|
+
const isStrike =
|
|
635
|
+
sNode != null &&
|
|
636
|
+
(sNode?._attr?.["w:val"] ?? sNode?._attr?.val ?? "1") !== "0";
|
|
637
|
+
|
|
638
|
+
const props: TextProps = {
|
|
639
|
+
b: isBold || undefined,
|
|
640
|
+
i: isItalic || undefined,
|
|
641
|
+
u: underVal && underVal !== "none" ? true : undefined,
|
|
642
|
+
s: isStrike || undefined,
|
|
643
|
+
sup: vertAlignVal === "superscript" || undefined,
|
|
644
|
+
sub: vertAlignVal === "subscript" || undefined,
|
|
645
|
+
pt: szVal ? Metric.halfPtToPt(Number(szVal)) : undefined,
|
|
646
|
+
color: safeHex(colorVal),
|
|
647
|
+
font: fontName ? safeFont(fontName) : undefined,
|
|
648
|
+
bg: bgVal,
|
|
649
|
+
};
|
|
650
|
+
|
|
651
|
+
// Check for field codes (PAGE number)
|
|
652
|
+
const fldChar = run?.["w:fldChar"]?.[0]?._attr ?? run?.fldChar?.[0]?._attr;
|
|
653
|
+
const instrText = run?.["w:instrText"]?.[0];
|
|
654
|
+
|
|
655
|
+
// Page break: <w:br w:type="page"/>
|
|
656
|
+
const brNodes = toArr(run?.["w:br"] ?? run?.br ?? []);
|
|
657
|
+
for (const br of brNodes) {
|
|
658
|
+
const brType = br?._attr?.["w:type"] ?? br?._attr?.type;
|
|
659
|
+
if (brType === "page") {
|
|
660
|
+
return { tag: "span", props, kids: [buildPb()] };
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
const textNodes = toArr(run?.["w:t"] ?? run?.t);
|
|
665
|
+
const content = textNodes
|
|
666
|
+
.map((t: any) => (typeof t === "string" ? t : (t?._ ?? t?._text ?? "")))
|
|
667
|
+
.join("");
|
|
668
|
+
|
|
669
|
+
// Handle page number field in instrText
|
|
670
|
+
if (instrText) {
|
|
671
|
+
const instrStr =
|
|
672
|
+
typeof instrText === "string" ? instrText : (instrText?._text ?? "");
|
|
673
|
+
if (instrStr.trim().toUpperCase() === "PAGE") {
|
|
674
|
+
const pageNum: PageNumNode = { tag: "pagenum", format: "decimal" };
|
|
675
|
+
return { tag: "span", props, kids: [pageNum] };
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
return buildSpan(content, props);
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
|
|
683
|
+
// Parse tblPr for table styles
|
|
684
|
+
const tblPr = tbl?.["w:tblPr"]?.[0] ?? tbl?.tblPr?.[0] ?? {};
|
|
685
|
+
const tblLookAttr =
|
|
686
|
+
tblPr?.["w:tblLook"]?.[0]?._attr ?? tblPr?.tblLook?.[0]?._attr ?? {};
|
|
687
|
+
|
|
688
|
+
const look: TableLook = {
|
|
689
|
+
firstRow: tblLookAttr?.["w:firstRow"] === "1" || undefined,
|
|
690
|
+
lastRow: tblLookAttr?.["w:lastRow"] === "1" || undefined,
|
|
691
|
+
firstCol:
|
|
692
|
+
tblLookAttr?.["w:firstColumn"] === "1" ||
|
|
693
|
+
tblLookAttr?.["w:firstCol"] === "1" ||
|
|
694
|
+
undefined,
|
|
695
|
+
lastCol:
|
|
696
|
+
tblLookAttr?.["w:lastColumn"] === "1" ||
|
|
697
|
+
tblLookAttr?.["w:lastCol"] === "1" ||
|
|
698
|
+
undefined,
|
|
699
|
+
bandedRows: tblLookAttr?.["w:noHBand"] === "0" || undefined,
|
|
700
|
+
bandedCols: tblLookAttr?.["w:noVBand"] === "0" || undefined,
|
|
701
|
+
};
|
|
702
|
+
|
|
703
|
+
// Parse table borders for defaultStroke
|
|
704
|
+
const tblBorders = tblPr?.["w:tblBorders"]?.[0] ?? tblPr?.tblBorders?.[0];
|
|
705
|
+
let defaultStroke = undefined;
|
|
706
|
+
if (tblBorders) {
|
|
707
|
+
const top =
|
|
708
|
+
tblBorders?.["w:top"]?.[0]?._attr ?? tblBorders?.top?.[0]?._attr;
|
|
709
|
+
if (top) {
|
|
710
|
+
defaultStroke = safeStrokeDocx(
|
|
711
|
+
top?.["w:val"] ?? top?.val,
|
|
712
|
+
Number(top?.["w:sz"] ?? top?.sz ?? 4),
|
|
713
|
+
top?.["w:color"] ?? top?.color,
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
const gridProps: GridProps = { look, defaultStroke };
|
|
719
|
+
|
|
720
|
+
// Read column widths from w:tblGrid
|
|
721
|
+
const tblGrid = tbl?.["w:tblGrid"]?.[0] ?? tbl?.tblGrid?.[0];
|
|
722
|
+
if (tblGrid) {
|
|
723
|
+
const gridCols = toArr(tblGrid?.["w:gridCol"] ?? tblGrid?.gridCol ?? []);
|
|
724
|
+
const colWidthsPt = gridCols
|
|
725
|
+
.map((gc: any) =>
|
|
726
|
+
Metric.dxaToPt(Number(gc?._attr?.["w:w"] ?? gc?._attr?.w ?? 0)),
|
|
727
|
+
)
|
|
728
|
+
.filter((w: number) => w > 0);
|
|
729
|
+
if (colWidthsPt.length > 0) gridProps.colWidths = colWidthsPt;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const rowArr = toArr(tbl?.["w:tr"] ?? tbl?.tr);
|
|
733
|
+
|
|
734
|
+
// ── Pass 1: parse raw cells with vMerge info ──
|
|
735
|
+
interface RawCell {
|
|
736
|
+
cell: any;
|
|
737
|
+
gridSpan: number;
|
|
738
|
+
vMergeRestart: boolean;
|
|
739
|
+
vMergeContinue: boolean;
|
|
740
|
+
}
|
|
741
|
+
const rawGrid: RawCell[][] = rowArr.map((row: any) => {
|
|
742
|
+
const cellArr = toArr(row?.["w:tc"] ?? row?.tc);
|
|
743
|
+
return cellArr.map((cell: any): RawCell => {
|
|
744
|
+
const tcPr = cell?.["w:tcPr"]?.[0] ?? {};
|
|
745
|
+
const gridSpan = Number(tcPr?.["w:gridSpan"]?.[0]?._attr?.["w:val"] ?? 1);
|
|
746
|
+
const vMergeNode = tcPr?.["w:vMerge"]?.[0];
|
|
747
|
+
const vMergeVal = vMergeNode?._attr?.["w:val"] ?? vMergeNode?._attr?.val;
|
|
748
|
+
const vMergeRestart = vMergeVal === "restart";
|
|
749
|
+
// vMerge present but val is not "restart" → continuation cell
|
|
750
|
+
const vMergeContinue = vMergeNode != null && !vMergeRestart;
|
|
751
|
+
return { cell, gridSpan, vMergeRestart, vMergeContinue };
|
|
752
|
+
});
|
|
753
|
+
});
|
|
754
|
+
|
|
755
|
+
// ── Pass 2: compute rowSpan for restart cells ──
|
|
756
|
+
// rsMap[ri][ci] = computed rowSpan (only set for restart cells)
|
|
757
|
+
const rsMap: Map<string, number> = new Map();
|
|
758
|
+
for (let ri = 0; ri < rawGrid.length; ri++) {
|
|
759
|
+
let gridCol = 0;
|
|
760
|
+
for (let ci = 0; ci < rawGrid[ri].length; ci++) {
|
|
761
|
+
const rc = rawGrid[ri][ci];
|
|
762
|
+
if (rc.vMergeRestart) {
|
|
763
|
+
let span = 1;
|
|
764
|
+
for (let nr = ri + 1; nr < rawGrid.length; nr++) {
|
|
765
|
+
// Find the cell at the same grid column in the next row
|
|
766
|
+
let col = 0;
|
|
767
|
+
let found = false;
|
|
768
|
+
for (const nc of rawGrid[nr]) {
|
|
769
|
+
if (col === gridCol && nc.vMergeContinue) {
|
|
770
|
+
span++;
|
|
771
|
+
found = true;
|
|
772
|
+
break;
|
|
773
|
+
}
|
|
774
|
+
col += nc.gridSpan;
|
|
775
|
+
}
|
|
776
|
+
if (!found) break;
|
|
777
|
+
}
|
|
778
|
+
rsMap.set(`${ri},${ci}`, span);
|
|
779
|
+
}
|
|
780
|
+
gridCol += rc.gridSpan;
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// ── Pass 3: build CellNodes, skip continuation cells ──
|
|
785
|
+
const rowNodes = rawGrid.map((rawRow, ri) => {
|
|
786
|
+
// Check for header row
|
|
787
|
+
const row = rowArr[ri];
|
|
788
|
+
const trPr = row?.["w:trPr"]?.[0] ?? row?.trPr?.[0] ?? {};
|
|
789
|
+
const isHeaderRow =
|
|
790
|
+
trPr?.["w:tblHeader"]?.[0] != null || trPr?.tblHeader?.[0] != null;
|
|
791
|
+
if (ri === 0 && isHeaderRow) gridProps.headerRow = true;
|
|
792
|
+
|
|
793
|
+
const cellNodes: CellNode[] = [];
|
|
794
|
+
for (let ci = 0; ci < rawRow.length; ci++) {
|
|
795
|
+
const rc = rawRow[ci];
|
|
796
|
+
// Skip continuation cells — they are part of a vertical merge
|
|
797
|
+
if (rc.vMergeContinue) continue;
|
|
798
|
+
|
|
799
|
+
const cell = rc.cell;
|
|
800
|
+
const tcPr = cell?.["w:tcPr"]?.[0] ?? {};
|
|
801
|
+
|
|
802
|
+
// Cell background
|
|
803
|
+
const bgAttr = tcPr?.["w:shd"]?.[0]?._attr ?? {};
|
|
804
|
+
const bg = safeHex(bgAttr?.["w:fill"] ?? bgAttr?.fill);
|
|
805
|
+
|
|
806
|
+
// Cell borders
|
|
807
|
+
const tcBorders = tcPr?.["w:tcBorders"]?.[0] ?? tcPr?.tcBorders?.[0];
|
|
808
|
+
const cp: CellProps = { bg, isHeader: isHeaderRow || undefined };
|
|
809
|
+
|
|
810
|
+
if (tcBorders) {
|
|
811
|
+
const dirs: Array<[string, "top" | "bot" | "left" | "right"]> = [
|
|
812
|
+
["top", "top"],
|
|
813
|
+
["bottom", "bot"],
|
|
814
|
+
["left", "left"],
|
|
815
|
+
["right", "right"],
|
|
816
|
+
];
|
|
817
|
+
for (const [xmlTag, propKey] of dirs) {
|
|
818
|
+
const bdr =
|
|
819
|
+
tcBorders?.["w:" + xmlTag]?.[0]?._attr ??
|
|
820
|
+
tcBorders?.[xmlTag]?.[0]?._attr;
|
|
821
|
+
if (bdr) {
|
|
822
|
+
cp[propKey] = safeStrokeDocx(
|
|
823
|
+
bdr?.["w:val"] ?? bdr?.val,
|
|
824
|
+
Number(bdr?.["w:sz"] ?? bdr?.sz ?? 4),
|
|
825
|
+
bdr?.["w:color"] ?? bdr?.color,
|
|
826
|
+
);
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
// Vertical alignment
|
|
832
|
+
const vaAttr =
|
|
833
|
+
tcPr?.["w:vAlign"]?.[0]?._attr ?? tcPr?.vAlign?.[0]?._attr ?? {};
|
|
834
|
+
const vaVal = vaAttr?.["w:val"] ?? vaAttr?.val;
|
|
835
|
+
if (vaVal) {
|
|
836
|
+
const vaMap: Record<string, "top" | "mid" | "bot"> = {
|
|
837
|
+
top: "top",
|
|
838
|
+
center: "mid",
|
|
839
|
+
bottom: "bot",
|
|
840
|
+
};
|
|
841
|
+
cp.va = vaMap[vaVal];
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
const rs = rsMap.get(`${ri},${ci}`) ?? 1;
|
|
845
|
+
|
|
846
|
+
const paras = toArr(cell?.["w:p"] ?? cell?.p).map((p: any) =>
|
|
847
|
+
decodePara(p, ctx),
|
|
848
|
+
);
|
|
849
|
+
cellNodes.push(
|
|
850
|
+
buildCell(paras.length > 0 ? paras : [buildPara([buildSpan("")])], {
|
|
851
|
+
cs: rc.gridSpan,
|
|
852
|
+
rs,
|
|
853
|
+
props: cp,
|
|
854
|
+
}),
|
|
855
|
+
);
|
|
856
|
+
}
|
|
857
|
+
return buildRow(cellNodes);
|
|
858
|
+
});
|
|
859
|
+
return buildGrid(rowNodes, gridProps);
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
function decodeGridSimple(tbl: any): GridNode {
|
|
863
|
+
const rowArr = toArr(tbl?.["w:tr"] ?? tbl?.tr);
|
|
864
|
+
const rowNodes = rowArr.map((row: any) => {
|
|
865
|
+
const cellArr = toArr(row?.["w:tc"] ?? row?.tc);
|
|
866
|
+
return buildRow(
|
|
867
|
+
cellArr.map((c: any) => buildCell([buildPara([buildSpan(cellText(c))])])),
|
|
868
|
+
);
|
|
869
|
+
});
|
|
870
|
+
return buildGrid(rowNodes);
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
function decodeGridFlat(tbl: any): GridNode {
|
|
874
|
+
return buildGrid([
|
|
875
|
+
buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])]),
|
|
876
|
+
]);
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
function decodeGridText(tbl: any): ParaNode {
|
|
880
|
+
return buildPara([buildSpan(tableText(tbl))]);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
function cellText(cell: any): string {
|
|
884
|
+
return toArr(cell?.["w:p"] ?? cell?.p)
|
|
885
|
+
.map((p: any) =>
|
|
886
|
+
toArr(p?.["w:r"] ?? p?.r)
|
|
887
|
+
.map((r: any) =>
|
|
888
|
+
toArr(r?.["w:t"] ?? r?.t)
|
|
889
|
+
.map((t: any) => (typeof t === "string" ? t : (t?._ ?? "")))
|
|
890
|
+
.join(""),
|
|
891
|
+
)
|
|
892
|
+
.join(""),
|
|
893
|
+
)
|
|
894
|
+
.join(" ");
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
function tableText(tbl: any): string {
|
|
898
|
+
return toArr(tbl?.["w:tr"] ?? tbl?.tr)
|
|
899
|
+
.map((row: any) =>
|
|
900
|
+
toArr(row?.["w:tc"] ?? row?.tc)
|
|
901
|
+
.map((c: any) => cellText(c))
|
|
902
|
+
.join("\t"),
|
|
903
|
+
)
|
|
904
|
+
.join("\n");
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
function parseHeading(style?: string): 1 | 2 | 3 | 4 | 5 | 6 | undefined {
|
|
908
|
+
if (!style) return undefined;
|
|
909
|
+
const m = style.match(/[Hh]eading(\d)/);
|
|
910
|
+
if (m) {
|
|
911
|
+
const n = Number(m[1]);
|
|
912
|
+
if (n >= 1 && n <= 6) return n as any;
|
|
913
|
+
}
|
|
914
|
+
return undefined;
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
registry.registerDecoder(new DocxDecoder());
|
|
918
|
+
|
|
919
|
+
// ─── Anchor layout 추출 ─────────────────────────────────────
|
|
920
|
+
|
|
921
|
+
function extractAnchorLayout(anchor: any): ImgLayout {
|
|
922
|
+
const attr = anchor?._attr ?? {};
|
|
923
|
+
const behindDoc = attr.behindDoc === '1';
|
|
924
|
+
|
|
925
|
+
// 텍스트 감싸기 타입
|
|
926
|
+
let wrap: ImgWrap = 'square';
|
|
927
|
+
if (anchor?.['wp:wrapNone']?.[0] != null) wrap = behindDoc ? 'behind' : 'none';
|
|
928
|
+
else if (anchor?.['wp:wrapTight']?.[0] != null) wrap = 'tight';
|
|
929
|
+
else if (anchor?.['wp:wrapThrough']?.[0] != null) wrap = 'through';
|
|
930
|
+
else if (anchor?.['wp:wrapSquare']?.[0] != null) wrap = 'square';
|
|
931
|
+
else if (anchor?.['wp:wrapTopAndBottom']?.[0] != null) wrap = 'square';
|
|
932
|
+
else if (anchor?.['wp:wrapBehind']?.[0] != null || behindDoc) wrap = 'behind';
|
|
933
|
+
|
|
934
|
+
// 가로 위치
|
|
935
|
+
const posH = anchor?.['wp:positionH']?.[0];
|
|
936
|
+
const horzRelTo = parseHorzRelTo(posH?._attr?.relativeFrom);
|
|
937
|
+
const horzAlignTxt = posH?.['wp:align']?.[0]?._text;
|
|
938
|
+
const horzOffsetTxt = posH?.['wp:posOffset']?.[0]?._text;
|
|
939
|
+
const horzAlign = horzAlignTxt ? parseHorzAlign(horzAlignTxt) : undefined;
|
|
940
|
+
const xPt = horzOffsetTxt && !horzAlignTxt
|
|
941
|
+
? Metric.emuToPt(Number(horzOffsetTxt))
|
|
942
|
+
: undefined;
|
|
943
|
+
|
|
944
|
+
// 세로 위치
|
|
945
|
+
const posV = anchor?.['wp:positionV']?.[0];
|
|
946
|
+
const vertRelTo = parseVertRelTo(posV?._attr?.relativeFrom);
|
|
947
|
+
const vertAlignTxt = posV?.['wp:align']?.[0]?._text;
|
|
948
|
+
const vertOffsetTxt = posV?.['wp:posOffset']?.[0]?._text;
|
|
949
|
+
const vertAlign = vertAlignTxt ? parseVertAlign(vertAlignTxt) : undefined;
|
|
950
|
+
const yPt = vertOffsetTxt && !vertAlignTxt
|
|
951
|
+
? Metric.emuToPt(Number(vertOffsetTxt))
|
|
952
|
+
: undefined;
|
|
953
|
+
|
|
954
|
+
// 텍스트와의 거리
|
|
955
|
+
const distT = attr.distT ? Metric.emuToPt(Number(attr.distT)) : undefined;
|
|
956
|
+
const distB = attr.distB ? Metric.emuToPt(Number(attr.distB)) : undefined;
|
|
957
|
+
const distL = attr.distL ? Metric.emuToPt(Number(attr.distL)) : undefined;
|
|
958
|
+
const distR = attr.distR ? Metric.emuToPt(Number(attr.distR)) : undefined;
|
|
959
|
+
const zOrder = attr.relativeHeight ? Number(attr.relativeHeight) : undefined;
|
|
960
|
+
|
|
961
|
+
return { wrap, horzAlign, vertAlign, horzRelTo, vertRelTo, xPt, yPt, distT, distB, distL, distR, behindDoc, zOrder };
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
const HORZ_RELTO_MAP: Record<string, ImgHorzRelTo> = {
|
|
965
|
+
margin: 'margin', leftMargin: 'margin', rightMargin: 'margin',
|
|
966
|
+
insideMargin: 'margin', outsideMargin: 'margin',
|
|
967
|
+
column: 'column', page: 'page', character: 'para', paragraph: 'para',
|
|
968
|
+
};
|
|
969
|
+
const VERT_RELTO_MAP: Record<string, ImgVertRelTo> = {
|
|
970
|
+
margin: 'margin', topMargin: 'margin', bottomMargin: 'margin',
|
|
971
|
+
insideMargin: 'margin', outsideMargin: 'margin',
|
|
972
|
+
line: 'line', page: 'page', paragraph: 'para',
|
|
973
|
+
};
|
|
974
|
+
const HORZ_ALIGN_MAP: Record<string, ImgHorzAlign> = {
|
|
975
|
+
left: 'left', center: 'center', right: 'right',
|
|
976
|
+
inside: 'left', outside: 'right',
|
|
977
|
+
};
|
|
978
|
+
const VERT_ALIGN_MAP: Record<string, ImgVertAlign> = {
|
|
979
|
+
top: 'top', center: 'center', bottom: 'bottom',
|
|
980
|
+
inside: 'top', outside: 'bottom',
|
|
981
|
+
};
|
|
982
|
+
|
|
983
|
+
function parseHorzRelTo(v?: string): ImgHorzRelTo { return HORZ_RELTO_MAP[v ?? ''] ?? 'column'; }
|
|
984
|
+
function parseVertRelTo(v?: string): ImgVertRelTo { return VERT_RELTO_MAP[v ?? ''] ?? 'para'; }
|
|
985
|
+
function parseHorzAlign(v?: string): ImgHorzAlign | undefined { return HORZ_ALIGN_MAP[v ?? '']; }
|
|
986
|
+
function parseVertAlign(v?: string): ImgVertAlign | undefined { return VERT_ALIGN_MAP[v ?? '']; }
|