hwpkit-dev 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ .npmignore +11 -0
- package/README.md +223 -0
- package/dist/index.d.mts +313 -0
- package/dist/index.d.ts +317 -0
- package/dist/index.js +3546 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +3479 -0
- package/dist/index.mjs.map +1 -0
- package/license.md +136 -0
- package/package.json +45 -0
- package/src/contract/decoder.ts +7 -0
- package/src/contract/encoder.ts +7 -0
- package/src/contract/result.ts +21 -0
- package/src/decoders/docx/DocxDecoder.ts +986 -0
- package/src/decoders/hwp/HwpScanner.ts +809 -0
- package/src/decoders/hwpx/HwpxDecoder.ts +759 -0
- package/src/decoders/md/MdDecoder.ts +180 -0
- package/src/encoders/docx/DocxEncoder.ts +710 -0
- package/src/encoders/hwp/HwpEncoder.ts +711 -0
- package/src/encoders/hwpx/HwpxEncoder.ts +770 -0
- package/src/encoders/md/MdEncoder.ts +108 -0
- package/src/index.ts +47 -0
- package/src/model/builders.ts +66 -0
- package/src/model/doc-props.ts +138 -0
- package/src/model/doc-tree.ts +90 -0
- package/src/pipeline/Pipeline.ts +71 -0
- package/src/pipeline/registry.ts +18 -0
- package/src/safety/ShieldedParser.ts +91 -0
- package/src/safety/StyleBridge.ts +106 -0
- package/src/toolkit/ArchiveKit.ts +150 -0
- package/src/toolkit/BinaryKit.ts +187 -0
- package/src/toolkit/TextKit.ts +57 -0
- package/src/toolkit/XmlKit.ts +91 -0
- package/src/walk/TreeWalker.ts +42 -0
- package/src/walk/tree-ops.ts +26 -0
- package/tsconfig.json +23 -0
- package/tsup.config.ts +12 -0
|
@@ -0,0 +1,759 @@
|
|
|
1
|
+
import type { Decoder } from '../../contract/decoder';
|
|
2
|
+
import type { DocRoot, ContentNode, ParaNode, SpanNode, GridNode, ImgNode, PageNumNode } from '../../model/doc-tree';
|
|
3
|
+
import type { Outcome } from '../../contract/result';
|
|
4
|
+
import type { DocMeta, PageDims, TextProps, ParaProps, CellProps, GridProps, Stroke, ImgLayout, ImgWrap, ImgHorzAlign, ImgVertAlign, ImgHorzRelTo, ImgVertRelTo } from '../../model/doc-props';
|
|
5
|
+
import { A4 } from '../../model/doc-props';
|
|
6
|
+
import { succeed, fail } from '../../contract/result';
|
|
7
|
+
import { buildRoot, buildSheet, buildPara, buildSpan, buildImg, buildGrid, buildRow, buildCell, buildPb } from '../../model/builders';
|
|
8
|
+
import { ShieldedParser } from '../../safety/ShieldedParser';
|
|
9
|
+
import { Metric, safeAlign, safeFont, safeHex, safeStrokeHwpx } from '../../safety/StyleBridge';
|
|
10
|
+
import { ArchiveKit } from '../../toolkit/ArchiveKit';
|
|
11
|
+
import { XmlKit } from '../../toolkit/XmlKit';
|
|
12
|
+
import { TextKit } from '../../toolkit/TextKit';
|
|
13
|
+
import { registry } from '../../pipeline/registry';
|
|
14
|
+
|
|
15
|
+
interface BorderFillInfo {
|
|
16
|
+
stroke?: Stroke;
|
|
17
|
+
bgColor?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface CharPrInfo {
|
|
21
|
+
b?: boolean; i?: boolean; u?: boolean; s?: boolean;
|
|
22
|
+
pt?: number; color?: string; font?: string; bg?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface ParaPrInfo {
|
|
26
|
+
align?: string;
|
|
27
|
+
indentPt?: number;
|
|
28
|
+
spaceBefore?: number;
|
|
29
|
+
spaceAfter?: number;
|
|
30
|
+
lineHeight?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
interface DecCtx {
|
|
34
|
+
files: Map<string, Uint8Array>;
|
|
35
|
+
shield: ShieldedParser;
|
|
36
|
+
borderFills: Map<number, BorderFillInfo>;
|
|
37
|
+
charPrs: Map<number, CharPrInfo>;
|
|
38
|
+
paraPrs: Map<number, ParaPrInfo>;
|
|
39
|
+
warns: string[];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export class HwpxDecoder implements Decoder {
|
|
43
|
+
readonly format = 'hwpx';
|
|
44
|
+
|
|
45
|
+
async decode(data: Uint8Array): Promise<Outcome<DocRoot>> {
|
|
46
|
+
const shield = new ShieldedParser();
|
|
47
|
+
const warns: string[] = [];
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const files = await ArchiveKit.unzip(data);
|
|
51
|
+
|
|
52
|
+
const bodyXml = files.get('Contents/section0.xml')
|
|
53
|
+
?? files.get('section0.xml')
|
|
54
|
+
?? findSectionFile(files);
|
|
55
|
+
|
|
56
|
+
if (!bodyXml) return fail('HWPX: section0.xml not found in archive');
|
|
57
|
+
|
|
58
|
+
const headXml = files.get('Contents/header.xml') ?? files.get('header.xml');
|
|
59
|
+
|
|
60
|
+
let meta: DocMeta = {};
|
|
61
|
+
let dims: PageDims = { ...A4 };
|
|
62
|
+
let borderFills = new Map<number, BorderFillInfo>();
|
|
63
|
+
let charPrs = new Map<number, CharPrInfo>();
|
|
64
|
+
let paraPrs = new Map<number, ParaPrInfo>();
|
|
65
|
+
|
|
66
|
+
if (headXml) {
|
|
67
|
+
try {
|
|
68
|
+
const headStr = TextKit.decode(headXml);
|
|
69
|
+
const headObj: any = await XmlKit.parseStrict(headStr);
|
|
70
|
+
if (headObj) {
|
|
71
|
+
meta = extractMeta(headObj);
|
|
72
|
+
dims = extractDims(headObj) ?? dims;
|
|
73
|
+
borderFills = extractBorderFills(headObj);
|
|
74
|
+
charPrs = extractCharPrs(headObj);
|
|
75
|
+
paraPrs = extractParaPrs(headObj);
|
|
76
|
+
}
|
|
77
|
+
} catch {
|
|
78
|
+
// header parse failure is non-fatal
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const ctx: DecCtx = { files, shield, borderFills, charPrs, paraPrs, warns };
|
|
83
|
+
|
|
84
|
+
const bodyStr = TextKit.decode(bodyXml);
|
|
85
|
+
const bodyObj: any = await XmlKit.parseStrict(bodyStr);
|
|
86
|
+
|
|
87
|
+
const sections = normalizeSections(bodyObj);
|
|
88
|
+
const kids = shield.guardAll(
|
|
89
|
+
sections,
|
|
90
|
+
(sec: any) => decodeSection(sec, dims, ctx),
|
|
91
|
+
() => buildSheet([buildPara([buildSpan('[섹션 파싱 실패]')])], dims),
|
|
92
|
+
'hwpx:section',
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
warns.push(...shield.flush());
|
|
96
|
+
return succeed(buildRoot(meta, kids), warns);
|
|
97
|
+
} catch (e: any) {
|
|
98
|
+
warns.push(...shield.flush());
|
|
99
|
+
return fail(`HWPX decode error: ${e?.message ?? String(e)}`, warns);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ─── helpers ────────────────────────────────────────────────
|
|
105
|
+
|
|
106
|
+
function findSectionFile(files: Map<string, Uint8Array>): Uint8Array | undefined {
|
|
107
|
+
for (const [key, val] of files) {
|
|
108
|
+
if (key.toLowerCase().includes('section') && key.endsWith('.xml')) return val;
|
|
109
|
+
}
|
|
110
|
+
return undefined;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
function normalizeSections(bodyObj: any): any[] {
|
|
114
|
+
// <hs:sec> (real HWPX), <hp:SEC> (legacy)
|
|
115
|
+
if (bodyObj?.['hs:sec']) return toArr(bodyObj['hs:sec']);
|
|
116
|
+
if (bodyObj?.['hp:SEC']) return toArr(bodyObj['hp:SEC']);
|
|
117
|
+
|
|
118
|
+
const root = bodyObj?.['hp:HWPML'] ?? bodyObj?.HWPML ?? bodyObj;
|
|
119
|
+
const body = root?.['hp:BODY']?.[0] ?? root?.BODY?.[0] ?? root?.['hp:BODY'] ?? root?.BODY;
|
|
120
|
+
if (!body) return [bodyObj];
|
|
121
|
+
const sections = body?.['hp:SECTION'] ?? body?.SECTION ?? [];
|
|
122
|
+
return Array.isArray(sections) ? sections : [sections];
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Get a tag regardless of namespace/case variations
|
|
126
|
+
function getTag(obj: any, ...names: string[]): any[] {
|
|
127
|
+
for (const n of names) {
|
|
128
|
+
const v = obj?.[n];
|
|
129
|
+
if (v != null) return toArr(v);
|
|
130
|
+
}
|
|
131
|
+
return [];
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function extractMeta(headObj: any): DocMeta {
|
|
135
|
+
try {
|
|
136
|
+
// Support both <hh:HEAD> and <hh:head>
|
|
137
|
+
const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
|
|
138
|
+
const info = root?.['hh:DOCSUMMARY']?.[0] ?? root?.DOCSUMMARY?.[0];
|
|
139
|
+
if (!info) return {};
|
|
140
|
+
const a = (k: string) => info?.[`hh:${k}`]?.[0]?._text ?? info?.[k]?.[0]?._text ?? '';
|
|
141
|
+
return { title: a('TITLE') || undefined, author: a('AUTHOR') || undefined, subject: a('SUBJECT') || undefined };
|
|
142
|
+
} catch { return {}; }
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function extractDims(headObj: any): PageDims | null {
|
|
146
|
+
try {
|
|
147
|
+
const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
|
|
148
|
+
const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
|
|
149
|
+
if (!refList) return null;
|
|
150
|
+
|
|
151
|
+
const secPrList = refList?.['hh:SECPRLST']?.[0]?.['hh:SECPR']
|
|
152
|
+
?? refList?.SECPRLST?.[0]?.SECPR;
|
|
153
|
+
const sec = Array.isArray(secPrList) ? secPrList[0] : secPrList;
|
|
154
|
+
if (!sec) return null;
|
|
155
|
+
|
|
156
|
+
const pa = sec?.['hh:PAGEPROPERTY']?.[0]?._attr ?? sec?.PAGEPROPERTY?.[0]?._attr;
|
|
157
|
+
if (!pa) return null;
|
|
158
|
+
|
|
159
|
+
return {
|
|
160
|
+
wPt: Metric.hwpToPt(Number(pa.Width ?? 59528)),
|
|
161
|
+
hPt: Metric.hwpToPt(Number(pa.Height ?? 84188)),
|
|
162
|
+
mt: Metric.hwpToPt(Number(pa.TopMargin ?? 5670)),
|
|
163
|
+
mb: Metric.hwpToPt(Number(pa.BottomMargin ?? 4252)),
|
|
164
|
+
ml: Metric.hwpToPt(Number(pa.LeftMargin ?? 8504)),
|
|
165
|
+
mr: Metric.hwpToPt(Number(pa.RightMargin ?? 8504)),
|
|
166
|
+
orient: Number(pa.Landscape) === 1 ? 'landscape' : 'portrait',
|
|
167
|
+
};
|
|
168
|
+
} catch { return null; }
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function extractBorderFills(headObj: any): Map<number, BorderFillInfo> {
|
|
172
|
+
const map = new Map<number, BorderFillInfo>();
|
|
173
|
+
try {
|
|
174
|
+
const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
|
|
175
|
+
const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
|
|
176
|
+
if (!refList) return map;
|
|
177
|
+
|
|
178
|
+
const bfList = refList?.['hh:borderFills']?.[0] ?? refList?.['hh:BORDERFILLLIST']?.[0] ?? refList?.BORDERFILLLIST?.[0];
|
|
179
|
+
if (!bfList) return map;
|
|
180
|
+
|
|
181
|
+
const bfs = getTag(bfList, 'hh:borderFill', 'hh:BORDERFILL');
|
|
182
|
+
for (const bf of bfs) {
|
|
183
|
+
const attr = bf?._attr ?? {};
|
|
184
|
+
const id = Number(attr.id ?? 0);
|
|
185
|
+
if (id === 0) continue;
|
|
186
|
+
|
|
187
|
+
const info: BorderFillInfo = {};
|
|
188
|
+
|
|
189
|
+
// Parse border (take top as representative)
|
|
190
|
+
const top = bf?.['hh:topBorder']?.[0]?._attr ?? bf?.['hh:top']?.[0]?._attr ?? bf?.top?.[0]?._attr;
|
|
191
|
+
if (top) {
|
|
192
|
+
// width is in mm (e.g. "0.18 mm"), convert mm → pt (1mm ≈ 2.835pt), then pt → hwp (*100) for safeStrokeHwpx
|
|
193
|
+
const mmVal = parseFloat(top.width) || undefined;
|
|
194
|
+
const hwpVal = mmVal != null ? mmVal * 2.835 * 100 : undefined;
|
|
195
|
+
info.stroke = safeStrokeHwpx(top.type, hwpVal, top.color);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Parse fill (real HWPX uses hc:fillBrush, not hh:fillBrush)
|
|
199
|
+
const fillBrush = bf?.['hc:fillBrush']?.[0] ?? bf?.['hh:fillBrush']?.[0] ?? bf?.['hh:fill']?.[0] ?? bf?.fill?.[0] ?? bf?.fillBrush?.[0];
|
|
200
|
+
if (fillBrush) {
|
|
201
|
+
const winBrush = fillBrush?.['hc:winBrush']?.[0]?._attr ?? fillBrush?.['hh:winBrush']?.[0]?._attr ?? fillBrush?.winBrush?.[0]?._attr;
|
|
202
|
+
if (winBrush?.faceColor && winBrush.faceColor !== 'none') {
|
|
203
|
+
info.bgColor = safeHex(winBrush.faceColor);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
map.set(id, info);
|
|
208
|
+
}
|
|
209
|
+
} catch { /* non-fatal */ }
|
|
210
|
+
return map;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
function extractCharPrs(headObj: any): Map<number, CharPrInfo> {
|
|
214
|
+
const map = new Map<number, CharPrInfo>();
|
|
215
|
+
try {
|
|
216
|
+
const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
|
|
217
|
+
const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
|
|
218
|
+
if (!refList) return map;
|
|
219
|
+
|
|
220
|
+
const cpList = refList?.['hh:charProperties']?.[0] ?? refList?.['hh:CHARPROPERTIES']?.[0];
|
|
221
|
+
if (!cpList) return map;
|
|
222
|
+
|
|
223
|
+
const cps = getTag(cpList, 'hh:charPr', 'hh:CHARPR');
|
|
224
|
+
for (const cp of cps) {
|
|
225
|
+
const attr = cp?._attr ?? {};
|
|
226
|
+
const id = Number(attr.id ?? -1);
|
|
227
|
+
if (id < 0) continue;
|
|
228
|
+
|
|
229
|
+
const info: CharPrInfo = {};
|
|
230
|
+
|
|
231
|
+
// height → pt
|
|
232
|
+
if (attr.height) info.pt = Metric.hHeightToPt(Number(attr.height));
|
|
233
|
+
|
|
234
|
+
// textColor
|
|
235
|
+
if (attr.textColor) info.color = safeHex(attr.textColor);
|
|
236
|
+
|
|
237
|
+
// bold
|
|
238
|
+
if (cp?.['hh:bold']?.[0] != null) info.b = true;
|
|
239
|
+
|
|
240
|
+
// italic
|
|
241
|
+
if (cp?.['hh:italic']?.[0] != null) info.i = true;
|
|
242
|
+
|
|
243
|
+
// underline
|
|
244
|
+
const ulAttr = cp?.['hh:underline']?.[0]?._attr;
|
|
245
|
+
if (ulAttr?.type && ulAttr.type !== 'NONE') info.u = true;
|
|
246
|
+
|
|
247
|
+
// strikeout — shape="3D" is default "no strikeout" in real HWPX; only SOLID/etc means active
|
|
248
|
+
const stAttr = cp?.['hh:strikeout']?.[0]?._attr;
|
|
249
|
+
if (stAttr?.shape && stAttr.shape !== 'NONE' && stAttr.shape !== '3D') info.s = true;
|
|
250
|
+
|
|
251
|
+
// font — from fontRef + fontface
|
|
252
|
+
// (simplified: just store what we find)
|
|
253
|
+
|
|
254
|
+
map.set(id, info);
|
|
255
|
+
}
|
|
256
|
+
} catch { /* non-fatal */ }
|
|
257
|
+
return map;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
function extractParaPrs(headObj: any): Map<number, ParaPrInfo> {
|
|
261
|
+
const map = new Map<number, ParaPrInfo>();
|
|
262
|
+
try {
|
|
263
|
+
const root = headObj?.['hh:head']?.[0] ?? headObj?.['hh:HEAD']?.[0] ?? headObj?.HEAD?.[0] ?? headObj;
|
|
264
|
+
const refList = root?.['hh:refList']?.[0] ?? root?.['hh:REFLIST']?.[0] ?? root?.REFLIST?.[0];
|
|
265
|
+
if (!refList) return map;
|
|
266
|
+
|
|
267
|
+
const ppList = refList?.['hh:paraProperties']?.[0] ?? refList?.['hh:PARAPROPERTIES']?.[0];
|
|
268
|
+
if (!ppList) return map;
|
|
269
|
+
|
|
270
|
+
const pps = getTag(ppList, 'hh:paraPr', 'hh:PARAPR');
|
|
271
|
+
for (const pp of pps) {
|
|
272
|
+
const attr = pp?._attr ?? {};
|
|
273
|
+
const id = Number(attr.id ?? -1);
|
|
274
|
+
if (id < 0) continue;
|
|
275
|
+
|
|
276
|
+
const alignNode = pp?.['hh:align']?.[0]?._attr ?? pp?.['hh:ALIGN']?.[0]?._attr;
|
|
277
|
+
const align = alignNode?.horizontal ?? alignNode?.Horizontal;
|
|
278
|
+
|
|
279
|
+
// Read margin and lineSpacing from direct child OR hp:switch > hp:default/hp:case
|
|
280
|
+
let marginEl = pp?.['hh:margin']?.[0] ?? null;
|
|
281
|
+
let lineSpEl = pp?.['hh:lineSpacing']?.[0] ?? null;
|
|
282
|
+
if (!marginEl) {
|
|
283
|
+
const sw = pp?.['hp:switch']?.[0];
|
|
284
|
+
const container = sw?.['hp:default']?.[0] ?? sw?.['hp:case']?.[0];
|
|
285
|
+
marginEl = container?.['hh:margin']?.[0] ?? null;
|
|
286
|
+
lineSpEl = lineSpEl ?? container?.['hh:lineSpacing']?.[0] ?? null;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
let indentPt: number | undefined;
|
|
290
|
+
let spaceBefore: number | undefined;
|
|
291
|
+
let spaceAfter: number | undefined;
|
|
292
|
+
let lineHeight: number | undefined;
|
|
293
|
+
|
|
294
|
+
if (marginEl) {
|
|
295
|
+
// Handle both hc:intent (our encoder) and hc:indent (Hancom standard)
|
|
296
|
+
const intentEl = marginEl?.['hc:intent']?.[0] ?? marginEl?.['hc:indent']?.[0];
|
|
297
|
+
const prevEl = marginEl?.['hc:prev']?.[0];
|
|
298
|
+
const nextEl = marginEl?.['hc:next']?.[0];
|
|
299
|
+
const intentVal = Number(intentEl?._attr?.value ?? 0);
|
|
300
|
+
const prevVal = Number(prevEl?._attr?.value ?? 0);
|
|
301
|
+
const nextVal = Number(nextEl?._attr?.value ?? 0);
|
|
302
|
+
if (intentVal !== 0) indentPt = Metric.hwpToPt(intentVal);
|
|
303
|
+
if (prevVal > 0) spaceBefore = Metric.hwpToPt(prevVal);
|
|
304
|
+
if (nextVal > 0) spaceAfter = Metric.hwpToPt(nextVal);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (lineSpEl) {
|
|
308
|
+
const lsAttr = lineSpEl._attr ?? {};
|
|
309
|
+
const lsType = lsAttr.type ?? 'PERCENT';
|
|
310
|
+
const lsVal = Number(lsAttr.value ?? 160);
|
|
311
|
+
if (lsType === 'PERCENT' && lsVal > 0) lineHeight = lsVal / 100;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
map.set(id, { align, indentPt, spaceBefore, spaceAfter, lineHeight });
|
|
315
|
+
}
|
|
316
|
+
} catch { /* non-fatal */ }
|
|
317
|
+
return map;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// ─── Section decoding ──────────────────────────────────────
|
|
321
|
+
|
|
322
|
+
function addParaItems(p: any, items: { type: string; node: any }[]): void {
|
|
323
|
+
// Check if this paragraph contains a table in its runs
|
|
324
|
+
const runs = getTag(p, 'hp:run', 'hp:RUN');
|
|
325
|
+
let hasTable = false;
|
|
326
|
+
for (const run of runs) {
|
|
327
|
+
const tbls = getTag(run, 'hp:tbl', 'hp:TABLE');
|
|
328
|
+
for (const tbl of tbls) {
|
|
329
|
+
items.push({ type: 'table', node: tbl });
|
|
330
|
+
hasTable = true;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
// Also add as paragraph unless it's just a table container
|
|
334
|
+
const hasText = runs.some((run: any) => {
|
|
335
|
+
const ts = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
|
|
336
|
+
return ts.some((t: any) => {
|
|
337
|
+
const text = typeof t === 'string' ? t : t?._text ?? '';
|
|
338
|
+
return text.trim().length > 0;
|
|
339
|
+
});
|
|
340
|
+
});
|
|
341
|
+
if (hasText || !hasTable) {
|
|
342
|
+
items.push({ type: 'para', node: p });
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
function decodeSection(sec: any, dims: PageDims, ctx: DecCtx) {
|
|
347
|
+
// Try to extract dims from first paragraph's secPr
|
|
348
|
+
const firstParas = getTag(sec, 'hp:p', 'hp:P');
|
|
349
|
+
const pageDims = extractSecPrDims(firstParas[0]) ?? dims;
|
|
350
|
+
|
|
351
|
+
// Build items list preserving document order via _childOrder
|
|
352
|
+
const items: { type: string; node: any }[] = [];
|
|
353
|
+
const paras = getTag(sec, 'hp:p', 'hp:P');
|
|
354
|
+
const childOrder = sec?.['_childOrder'] as string[] | undefined;
|
|
355
|
+
|
|
356
|
+
if (Array.isArray(childOrder)) {
|
|
357
|
+
let pi = 0;
|
|
358
|
+
for (const tag of childOrder) {
|
|
359
|
+
if ((tag === 'hp:p' || tag === 'hp:P') && pi < paras.length) {
|
|
360
|
+
const p = paras[pi++];
|
|
361
|
+
addParaItems(p, items);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
// Append any remaining
|
|
365
|
+
while (pi < paras.length) addParaItems(paras[pi++], items);
|
|
366
|
+
} else {
|
|
367
|
+
// No order info — process paragraphs sequentially
|
|
368
|
+
for (const p of paras) addParaItems(p, items);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
const kids: ContentNode[] = ctx.shield.guardAll(
|
|
372
|
+
items,
|
|
373
|
+
(item: any) => {
|
|
374
|
+
if (item.type === 'table') {
|
|
375
|
+
const { value } = ctx.shield.guardGrid(
|
|
376
|
+
item.node,
|
|
377
|
+
(n) => decodeGrid(n, ctx),
|
|
378
|
+
(n) => decodeGridSimple(n, ctx),
|
|
379
|
+
(n) => decodeGridFlat(n),
|
|
380
|
+
(n) => decodeGridText(n) as unknown as GridNode,
|
|
381
|
+
'hwpx:table',
|
|
382
|
+
);
|
|
383
|
+
return value;
|
|
384
|
+
}
|
|
385
|
+
return decodePara(item.node, ctx);
|
|
386
|
+
},
|
|
387
|
+
() => buildPara([buildSpan('[파싱 실패]')]),
|
|
388
|
+
'hwpx:content',
|
|
389
|
+
);
|
|
390
|
+
|
|
391
|
+
// Decode header/footer
|
|
392
|
+
const headerParas = decodeHeaderFooter(sec, 'header', ctx);
|
|
393
|
+
const footerParas = decodeHeaderFooter(sec, 'footer', ctx);
|
|
394
|
+
|
|
395
|
+
return buildSheet(
|
|
396
|
+
kids.filter(Boolean) as ContentNode[],
|
|
397
|
+
pageDims,
|
|
398
|
+
{ header: headerParas, footer: footerParas },
|
|
399
|
+
);
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function extractSecPrDims(p: any): PageDims | null {
|
|
403
|
+
if (!p) return null;
|
|
404
|
+
try {
|
|
405
|
+
const runs = getTag(p, 'hp:run', 'hp:RUN');
|
|
406
|
+
for (const run of runs) {
|
|
407
|
+
const secPr = run?.['hp:secPr']?.[0] ?? run?.['hp:SECPR']?.[0];
|
|
408
|
+
if (!secPr) continue;
|
|
409
|
+
const pagePr = secPr?.['hp:pagePr']?.[0]?._attr ?? secPr?.['hp:PAGEPR']?.[0]?._attr;
|
|
410
|
+
if (!pagePr) continue;
|
|
411
|
+
const margin = secPr?.['hp:pagePr']?.[0]?.['hp:margin']?.[0]?._attr
|
|
412
|
+
?? secPr?.['hp:PAGEPR']?.[0]?.['hp:MARGIN']?.[0]?._attr ?? {};
|
|
413
|
+
return {
|
|
414
|
+
wPt: Metric.hwpToPt(Number(pagePr.width ?? 59528)),
|
|
415
|
+
hPt: Metric.hwpToPt(Number(pagePr.height ?? 84188)),
|
|
416
|
+
mt: Metric.hwpToPt(Number(margin.top ?? 5670)),
|
|
417
|
+
mb: Metric.hwpToPt(Number(margin.bottom ?? 4252)),
|
|
418
|
+
ml: Metric.hwpToPt(Number(margin.left ?? 8504)),
|
|
419
|
+
mr: Metric.hwpToPt(Number(margin.right ?? 8504)),
|
|
420
|
+
orient: pagePr.landscape === 'NARROWLY' ? 'landscape' : 'portrait',
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
} catch { /* ignore */ }
|
|
424
|
+
return null;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function decodeHeaderFooter(sec: any, kind: 'header' | 'footer', ctx: DecCtx): ParaNode[] | undefined {
|
|
428
|
+
try {
|
|
429
|
+
const hf = sec?.['hp:headerFooter']?.[0] ?? sec?.['hp:HEADERFOOTER']?.[0]
|
|
430
|
+
?? sec?.headerFooter?.[0] ?? sec?.HEADERFOOTER?.[0];
|
|
431
|
+
if (!hf) return undefined;
|
|
432
|
+
|
|
433
|
+
const part = hf?.['hp:' + kind]?.[0] ?? hf?.['hp:' + kind.toUpperCase()]?.[0]
|
|
434
|
+
?? hf?.[kind]?.[0] ?? hf?.[kind.toUpperCase()]?.[0];
|
|
435
|
+
if (!part) return undefined;
|
|
436
|
+
|
|
437
|
+
const paras = getTag(part, 'hp:p', 'hp:P');
|
|
438
|
+
if (paras.length === 0) return undefined;
|
|
439
|
+
|
|
440
|
+
return paras.map((p: any) => decodePara(p, ctx));
|
|
441
|
+
} catch {
|
|
442
|
+
return undefined;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// ─── Paragraph & run decoding ──────────────────────────────
|
|
447
|
+
|
|
448
|
+
function decodePara(p: any, ctx: DecCtx): ParaNode {
|
|
449
|
+
const pAttr = p?._attr ?? {};
|
|
450
|
+
const paraPrIdRef = Number(pAttr.paraPrIDRef ?? -1);
|
|
451
|
+
|
|
452
|
+
// Resolve paraPr from IDRef or inline
|
|
453
|
+
let align: string | undefined;
|
|
454
|
+
const paraPrDef = ctx.paraPrs.get(paraPrIdRef);
|
|
455
|
+
if (paraPrDef?.align) align = paraPrDef.align;
|
|
456
|
+
|
|
457
|
+
// Check inline PARAPR too
|
|
458
|
+
const inlineParaPr = p?.['hp:PARAPR']?.[0] ?? p?.['hp:paraPr']?.[0] ?? p?.PARAPR?.[0];
|
|
459
|
+
if (inlineParaPr) {
|
|
460
|
+
const alignNode = inlineParaPr?.['hp:ALIGN']?.[0]?._attr ?? inlineParaPr?.['hp:align']?.[0]?._attr
|
|
461
|
+
?? inlineParaPr?.ALIGN?.[0]?._attr;
|
|
462
|
+
if (alignNode?.Type) align = alignNode.Type;
|
|
463
|
+
if (alignNode?.horizontal) align = alignNode.horizontal;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const inlineAttr = inlineParaPr?._attr ?? {};
|
|
467
|
+
const props: ParaProps = { align: safeAlign(align) };
|
|
468
|
+
|
|
469
|
+
// Apply spacing/indent/lineHeight from paraPr definition
|
|
470
|
+
if (paraPrDef) {
|
|
471
|
+
if (paraPrDef.indentPt !== undefined) props.indentPt = paraPrDef.indentPt;
|
|
472
|
+
if (paraPrDef.spaceBefore !== undefined) props.spaceBefore = paraPrDef.spaceBefore;
|
|
473
|
+
if (paraPrDef.spaceAfter !== undefined) props.spaceAfter = paraPrDef.spaceAfter;
|
|
474
|
+
if (paraPrDef.lineHeight !== undefined) props.lineHeight = paraPrDef.lineHeight;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// List support (from inline attr)
|
|
478
|
+
if (inlineAttr.listType) {
|
|
479
|
+
props.listOrd = inlineAttr.listType === 'DIGIT' || inlineAttr.listType === 'DECIMAL';
|
|
480
|
+
props.listLv = Number(inlineAttr.listLevel ?? 0);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
const runs = getTag(p, 'hp:run', 'hp:RUN');
|
|
484
|
+
const kids: (SpanNode | ImgNode)[] = [];
|
|
485
|
+
|
|
486
|
+
for (const run of runs) {
|
|
487
|
+
// Images inside run
|
|
488
|
+
const pics = getTag(run, 'hp:pic', 'hp:PIC');
|
|
489
|
+
for (const pic of pics) {
|
|
490
|
+
const img = decodePic(pic, ctx);
|
|
491
|
+
if (img) kids.push(img);
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// Page number
|
|
495
|
+
const pageNums = getTag(run, 'hp:pageNum', 'hp:PAGENUM');
|
|
496
|
+
if (pageNums.length > 0) {
|
|
497
|
+
const pn = pageNums[0]?._attr ?? {};
|
|
498
|
+
const fmt = pn.formatType === 'ROMAN_LOWER' ? 'roman' as const
|
|
499
|
+
: pn.formatType === 'ROMAN_UPPER' ? 'romanCaps' as const
|
|
500
|
+
: 'decimal' as const;
|
|
501
|
+
const pageNumNode: PageNumNode = { tag: 'pagenum', format: fmt };
|
|
502
|
+
const spanProps = resolveCharPr(run, ctx);
|
|
503
|
+
kids.push({ tag: 'span', props: spanProps, kids: [pageNumNode] });
|
|
504
|
+
continue;
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Text
|
|
508
|
+
const textNodes = getTag(run, 'hp:t', 'hp:T', 'hp:CHAR');
|
|
509
|
+
const content = textNodes.map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join('');
|
|
510
|
+
|
|
511
|
+
// Skip empty secPr-only runs
|
|
512
|
+
if (content === '' && (run?.['hp:secPr']?.[0] || run?.['hp:SECPR']?.[0]) && pics.length === 0) continue;
|
|
513
|
+
|
|
514
|
+
const spanProps = resolveCharPr(run, ctx);
|
|
515
|
+
kids.push(buildSpan(content, spanProps));
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
// pageBreak="1" → prepend a pb node in its own span
|
|
519
|
+
if (pAttr.pageBreak === '1') {
|
|
520
|
+
kids.unshift({ tag: 'span', props: {}, kids: [buildPb()] });
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return buildPara(kids.filter(Boolean) as ParaNode['kids'], props);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
function resolveCharPr(run: any, ctx: DecCtx): TextProps {
|
|
527
|
+
const runAttr = run?._attr ?? {};
|
|
528
|
+
const charPrIdRef = Number(runAttr.charPrIDRef ?? -1);
|
|
529
|
+
|
|
530
|
+
// Try IDRef first
|
|
531
|
+
const def = ctx.charPrs.get(charPrIdRef);
|
|
532
|
+
if (def) {
|
|
533
|
+
return {
|
|
534
|
+
b: def.b, i: def.i, u: def.u, s: def.s,
|
|
535
|
+
pt: def.pt, color: def.color, font: def.font, bg: def.bg,
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Fallback: inline CHARPR
|
|
540
|
+
const ca = run?.['hp:CHARPR']?.[0]?._attr ?? run?.['hp:charPr']?.[0]?._attr ?? run?.CHARPR?.[0]?._attr ?? {};
|
|
541
|
+
return {
|
|
542
|
+
b: ca.Bold === '1' || ca.Bold === 'true' || undefined,
|
|
543
|
+
i: ca.Italic === '1' || ca.Italic === 'true' || undefined,
|
|
544
|
+
u: ca.Underline ? ca.Underline !== 'NONE' : undefined,
|
|
545
|
+
s: ca.Strikeout ? ca.Strikeout !== 'NONE' : undefined,
|
|
546
|
+
font: safeFont(ca.FontName ?? ca.FaceNameHangul),
|
|
547
|
+
pt: ca.Height ? Metric.hHeightToPt(Number(ca.Height)) : undefined,
|
|
548
|
+
color: safeHex(ca.TextColor),
|
|
549
|
+
bg: safeHex(ca.BgColor),
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// ─── Image decoding ────────────────────────────────────────
|
|
554
|
+
|
|
555
|
+
function decodePic(pic: any, ctx: DecCtx): ImgNode | null {
|
|
556
|
+
try {
|
|
557
|
+
const szAttr = pic?.['hp:sz']?.[0]?._attr ?? pic?.sz?.[0]?._attr ?? {};
|
|
558
|
+
const w = Metric.hwpToPt(Number(szAttr.width ?? 0));
|
|
559
|
+
const h = Metric.hwpToPt(Number(szAttr.height ?? 0));
|
|
560
|
+
|
|
561
|
+
// Try multiple tag patterns for image reference
|
|
562
|
+
const imgNode = pic?.['hp:img']?.[0]?._attr ?? pic?.['hc:img']?.[0]?._attr
|
|
563
|
+
?? pic?.img?.[0]?._attr ?? {};
|
|
564
|
+
const binRef = imgNode.binaryItemIDRef ?? imgNode.BinaryItemIDRef;
|
|
565
|
+
if (!binRef) return null;
|
|
566
|
+
|
|
567
|
+
// Find binary data
|
|
568
|
+
let imgData: Uint8Array | undefined;
|
|
569
|
+
for (const [key, val] of ctx.files) {
|
|
570
|
+
if (key.includes(binRef) || key.toLowerCase().includes(binRef.toLowerCase())) {
|
|
571
|
+
imgData = val;
|
|
572
|
+
break;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
if (!imgData) return null;
|
|
576
|
+
|
|
577
|
+
const ext = binRef.split('.').pop()?.toLowerCase() ?? 'png';
|
|
578
|
+
const mimeMap: Record<string, ImgNode['mime']> = {
|
|
579
|
+
png: 'image/png', jpg: 'image/jpeg', jpeg: 'image/jpeg',
|
|
580
|
+
gif: 'image/gif', bmp: 'image/bmp',
|
|
581
|
+
};
|
|
582
|
+
|
|
583
|
+
// ── hp:pos에서 layout 추출 ───────────────────────────────
|
|
584
|
+
const posAttr = pic?.['hp:pos']?.[0]?._attr ?? pic?.pos?.[0]?._attr ?? {};
|
|
585
|
+
const layout = extractHwpxLayout(posAttr, pic);
|
|
586
|
+
|
|
587
|
+
return buildImg(TextKit.base64Encode(imgData), mimeMap[ext] ?? 'image/png', w, h, undefined, layout);
|
|
588
|
+
} catch {
|
|
589
|
+
return null;
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
function extractHwpxLayout(posAttr: any, pic: any): ImgLayout {
|
|
594
|
+
const treatAsChar = posAttr.treatAsChar === '1' || posAttr.treatAsChar === 'true';
|
|
595
|
+
if (treatAsChar) return { wrap: 'inline' };
|
|
596
|
+
|
|
597
|
+
// textWrap → wrap
|
|
598
|
+
const textWrap: string = (pic?._attr?.textWrap ?? pic?.['hp:pic']?.[0]?._attr?.textWrap ?? 'TOP_AND_BOTTOM');
|
|
599
|
+
const wrapMap: Record<string, ImgWrap> = {
|
|
600
|
+
TOP_AND_BOTTOM: 'square',
|
|
601
|
+
SQUARE: 'square',
|
|
602
|
+
BOTH_SIDES: 'tight',
|
|
603
|
+
LEFT: 'tight',
|
|
604
|
+
RIGHT: 'tight',
|
|
605
|
+
LARGER_ONLY: 'tight',
|
|
606
|
+
SMALLER_ONLY: 'tight',
|
|
607
|
+
LARGEST_ONLY: 'tight',
|
|
608
|
+
BEHIND_TEXT: 'behind',
|
|
609
|
+
FRONT_TEXT: 'none',
|
|
610
|
+
};
|
|
611
|
+
const wrap: ImgWrap = wrapMap[textWrap] ?? 'square';
|
|
612
|
+
|
|
613
|
+
// 기준점
|
|
614
|
+
const horzRelToMap: Record<string, ImgHorzRelTo> = {
|
|
615
|
+
PARA: 'para', MARGIN: 'margin', PAGE: 'page', COLUMN: 'column',
|
|
616
|
+
};
|
|
617
|
+
const vertRelToMap: Record<string, ImgVertRelTo> = {
|
|
618
|
+
PARA: 'para', MARGIN: 'margin', PAGE: 'page', PAPER: 'page', LINE: 'line',
|
|
619
|
+
};
|
|
620
|
+
const horzRelTo = horzRelToMap[posAttr.horzRelTo ?? ''] ?? 'para';
|
|
621
|
+
const vertRelTo = vertRelToMap[posAttr.vertRelTo ?? ''] ?? 'para';
|
|
622
|
+
|
|
623
|
+
// 정렬
|
|
624
|
+
const horzAlignMap: Record<string, ImgHorzAlign> = { LEFT: 'left', CENTER: 'center', RIGHT: 'right' };
|
|
625
|
+
const vertAlignMap: Record<string, ImgVertAlign> = { TOP: 'top', CENTER: 'center', BOTTOM: 'bottom' };
|
|
626
|
+
const horzAlign = horzAlignMap[posAttr.horzAlign ?? ''];
|
|
627
|
+
const vertAlign = vertAlignMap[posAttr.vertAlign ?? ''];
|
|
628
|
+
|
|
629
|
+
// 오프셋
|
|
630
|
+
const horzOffset = Number(posAttr.horzOffset ?? 0);
|
|
631
|
+
const vertOffset = Number(posAttr.vertOffset ?? 0);
|
|
632
|
+
const xPt = horzOffset !== 0 ? Metric.hwpToPt(horzOffset) : undefined;
|
|
633
|
+
const yPt = vertOffset !== 0 ? Metric.hwpToPt(vertOffset) : undefined;
|
|
634
|
+
|
|
635
|
+
return { wrap, horzAlign, vertAlign, horzRelTo, vertRelTo, xPt, yPt };
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// ─── Table decoding ────────────────────────────────────────
|
|
639
|
+
|
|
640
|
+
function decodeGrid(tbl: any, ctx: DecCtx): GridNode {
|
|
641
|
+
const tblAttr = tbl?._attr ?? {};
|
|
642
|
+
const borderFillId = Number(tblAttr.borderFillIDRef ?? 0);
|
|
643
|
+
const borderFill = ctx.borderFills.get(borderFillId);
|
|
644
|
+
const headerRow = tblAttr.repeatHeader === '1';
|
|
645
|
+
|
|
646
|
+
const gridProps: GridProps = { headerRow: headerRow || undefined };
|
|
647
|
+
if (borderFill?.stroke) gridProps.defaultStroke = borderFill.stroke;
|
|
648
|
+
|
|
649
|
+
const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
|
|
650
|
+
|
|
651
|
+
// Read column widths from the first row that has all cs=1 cells
|
|
652
|
+
for (const row of rowArr) {
|
|
653
|
+
const cells = getTag(row, 'hp:tc', 'hp:CELL');
|
|
654
|
+
const rowWidths: number[] = [];
|
|
655
|
+
let allSingle = true;
|
|
656
|
+
for (const cell of cells) {
|
|
657
|
+
const cellSpanAttr = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
|
|
658
|
+
const cs = Number(cellSpanAttr.colSpan ?? cell?._attr?.ColSpan ?? 1);
|
|
659
|
+
if (cs > 1) { allSingle = false; break; }
|
|
660
|
+
const szAttr = cell?.['hp:cellSz']?.[0]?._attr ?? {};
|
|
661
|
+
const w = Number(szAttr.width ?? 0);
|
|
662
|
+
rowWidths.push(Metric.hwpToPt(w));
|
|
663
|
+
}
|
|
664
|
+
if (allSingle && rowWidths.length > 0 && rowWidths.some(w => w > 0)) {
|
|
665
|
+
gridProps.colWidths = rowWidths;
|
|
666
|
+
break;
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
const rowNodes = rowArr.map((row: any) => {
|
|
670
|
+
const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
|
|
671
|
+
const cellNodes = cellArr.map((cell: any) => {
|
|
672
|
+
const ca = cell?._attr ?? {};
|
|
673
|
+
|
|
674
|
+
// Cell borderFill
|
|
675
|
+
const cellBfId = Number(ca.borderFillIDRef ?? 0);
|
|
676
|
+
const cellBf = ctx.borderFills.get(cellBfId);
|
|
677
|
+
|
|
678
|
+
const cellProps: CellProps = {
|
|
679
|
+
bg: cellBf?.bgColor ?? safeHex(ca.BgColor),
|
|
680
|
+
};
|
|
681
|
+
|
|
682
|
+
if (cellBf?.stroke) {
|
|
683
|
+
cellProps.top = cellBf.stroke;
|
|
684
|
+
cellProps.bot = cellBf.stroke;
|
|
685
|
+
cellProps.left = cellBf.stroke;
|
|
686
|
+
cellProps.right = cellBf.stroke;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// Vertical alignment from subList
|
|
690
|
+
const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
|
|
691
|
+
const subAttr = subList?._attr ?? {};
|
|
692
|
+
if (subAttr.vertAlign) {
|
|
693
|
+
const vaMap: Record<string, 'top' | 'mid' | 'bot'> = {
|
|
694
|
+
TOP: 'top', CENTER: 'mid', BOTTOM: 'bot',
|
|
695
|
+
};
|
|
696
|
+
cellProps.va = vaMap[subAttr.vertAlign];
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Colspan/rowspan from cellSpan element or attributes
|
|
700
|
+
const cellSpan = cell?.['hp:cellSpan']?.[0]?._attr ?? {};
|
|
701
|
+
const cs = Number(cellSpan.colSpan ?? ca.ColSpan ?? 1);
|
|
702
|
+
const rs = Number(cellSpan.rowSpan ?? ca.RowSpan ?? 1);
|
|
703
|
+
|
|
704
|
+
// Parse paragraphs
|
|
705
|
+
let paras: ParaNode[];
|
|
706
|
+
if (subList) {
|
|
707
|
+
const subParas = getTag(subList, 'hp:p', 'hp:P');
|
|
708
|
+
paras = subParas.map((p: any) => decodePara(p, ctx));
|
|
709
|
+
} else {
|
|
710
|
+
paras = getTag(cell, 'hp:p', 'hp:P').map((p: any) => decodePara(p, ctx));
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
return buildCell(
|
|
714
|
+
paras.length > 0 ? paras : [buildPara([buildSpan('')])],
|
|
715
|
+
{ cs, rs, props: cellProps },
|
|
716
|
+
);
|
|
717
|
+
});
|
|
718
|
+
return buildRow(cellNodes);
|
|
719
|
+
});
|
|
720
|
+
return buildGrid(rowNodes, gridProps);
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
function decodeGridSimple(tbl: any, ctx: DecCtx): GridNode {
|
|
724
|
+
const rowArr = getTag(tbl, 'hp:tr', 'hp:ROW');
|
|
725
|
+
const rowNodes = rowArr.map((row: any) => {
|
|
726
|
+
const cellArr = getTag(row, 'hp:tc', 'hp:CELL');
|
|
727
|
+
return buildRow(cellArr.map((cell: any) => buildCell([buildPara([buildSpan(cellText(cell))])])));
|
|
728
|
+
});
|
|
729
|
+
return buildGrid(rowNodes);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
function decodeGridFlat(tbl: any): GridNode {
|
|
733
|
+
return buildGrid([buildRow([buildCell([buildPara([buildSpan(tableText(tbl))])])])]);
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
function decodeGridText(tbl: any): ParaNode {
|
|
737
|
+
return buildPara([buildSpan(tableText(tbl))]);
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
function cellText(cell: any): string {
|
|
741
|
+
const subList = cell?.['hp:subList']?.[0] ?? cell?.subList?.[0];
|
|
742
|
+
const source = subList ?? cell;
|
|
743
|
+
return getTag(source, 'hp:p', 'hp:P').map((p: any) =>
|
|
744
|
+
getTag(p, 'hp:run', 'hp:RUN').map((r: any) =>
|
|
745
|
+
getTag(r, 'hp:t', 'hp:T').map((t: any) => typeof t === 'string' ? t : t?._text ?? t?._ ?? '').join(''),
|
|
746
|
+
).join(''),
|
|
747
|
+
).join(' ');
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
function tableText(tbl: any): string {
|
|
751
|
+
return getTag(tbl, 'hp:tr', 'hp:ROW').map((row: any) =>
|
|
752
|
+
getTag(row, 'hp:tc', 'hp:CELL').map((c: any) => cellText(c)).join('\t'),
|
|
753
|
+
).join('\n');
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
function toArr(v: any): any[] { return v == null ? [] : Array.isArray(v) ? v : [v]; }
|
|
757
|
+
|
|
758
|
+
// Auto-register
|
|
759
|
+
registry.registerDecoder(new HwpxDecoder());
|